# Get the latest binary files for bowtie & bowtie2 and unzip:
wget https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.3.5.1/bowtie2-2.3.5.1-linux-x86_64.zip/download
unzip bowtie-1.2.3-linux-x86_64.zip
unzip bowtie2-2.3.5.1-linux-x86_64.zip
# Copy binary files to your $PATH (e.g. /usr/local/bin):
cd bowtie-1.2.3
sudo cp bowtie* /usr/local/bin/
cd ..
cd bowtie2-2.3.5.1
sudo cp bowtie2* /usr/local/bin/
# Prepare bowtie and bowtie2 indexes:
bowtie-build reference.fa reference.fa
bowtie2-build reference.fa reference.fa
# Bowtie mapping with 0 mismatch tolerance (preferred parameter for small RNA fragments between 15-25 nt)
bowtie -p [insert number of threads] -v 0 reference.fa INPUT.fastq -S INPUT_over_reference.sam
# Bowtie mapping with 1 mismatch tolerance (passes for RNA fragments between 25-100 nt)
bowtie -p [insert number of threads] -v 1 reference.fa INPUT.fastq -S INPUT_over_reference.sam
# Bowtie mapping with 2 mismatches tolerance (passes for RNA fragments > 100 nt)
bowtie -p [insert number of threads] -v 2 reference.fa INPUT.fastq -S INPUT_over_reference.sam
# Bowtie2 mapping with default mode (passes for RNA fragments > 50 nt)
bowtie2 -q -p [insert number of threads] -x reference.fa -U INPUT.fastq -S INPUT_over_reference.sam
# Get the latest source files for BWA aligner and unzip:
https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.17.tar.bz2/download
tar xvjf bwa-0.7.17.tar.bz2
# Compile binary files from source and copy to your $PATH (e.g. /usr/local/bin):
cd bwa-0.7.17
make
sudo cp bwa /usr/local/bin/
# Generate bwa indexes:
bwa index reference.fa
# BWA mapping to a reference with mem option (most accurate):
bwa mem -t [insert number of threads] reference.fa INPUT.fastq > INPUT_over_reference.sam
# Get the latest version of HiSat2 aligner binary file and unzip:
wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.1.0-Linux_x86_64.zip
unzip hisat2-2.1.0-Linux_x86_64.zip
# Copy binary file to your $PATH (e.g. /usr/local/bin):
cd hisat2-2.1.0
sudo cp hisat2* /usr/local/bin/
# Download and unzip HiSat2 indexes for H. sapiens GRCh38.84 (you would need 200 Gb RAM to build them by yourself!):
## Genome_indexes
wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/grch38.tar.gz
tar -xzf grch38.tar.gz
## Genome_snp_indexes
wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/grch38_snp.tar.gz
tar -xzf grch38_snp.tar.gz
## Genome_tran_indexes
wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/grch38_tran.tar.gz
tar -xzf grch38_tran.tar.gz
cd grch38_tran
bash make_grch38_tran.sh
# Mapping to a genome reference (for transcriptome references use Bowtie or Bowtie2):
hisat2 -q -p [insert number of threads] --dta -x folder_with_Genome_tran_indexes/Homo_sapiens.GRCh38.dna.84.fa -U INPUT.fastq -S INPUT.sam
# Get the latest version of STAR aligner source code, unzip and compile:
wget https://github.com/alexdobin/STAR/archive/2.6.1a.tar.gz
tar -xzf STAR-2.6.1a.tar.gz
cd STAR-2.6.1a/source/
sudo make STAR
# Copy binary file to your $PATH (e.g. /usr/local/bin):
cd bin
sudo cp STAR /usr/local/bin/
# Generate STAR indexes:
## STAR genome indexes without annotation GFT file
STAR --runThreadN [insert number of threads] --runMode genomeGenerate --genomeDir folder_with_STAR_indexes_no_gtf/ --genomeFastaFiles folder_with_reference_genome/genome.fa
## STAR genome indexes with annotation GFT file
STAR --runThreadN [insert number of threads] --runMode genomeGenerate --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --genomeDir folder_with_STAR_indexes_with_gtf/ --genomeFastaFiles folder_with_reference_genome/genome.fa
## STAR transcriptome indexes
STAR --runThreadN [insert number of threads] --limitGenomeGenerateRAM=60000000000 --runMode genomeGenerate --genomeDir folder_with_STAR_indexes_for_transcriptome/ --genomeFastaFiles folder_with_reference_transcriptome/transcriptome.fa
# STAR mapping to a genome reference | Default mode (output - SAM file)
STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_no_gtf/ --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --readFilesIn INPUT.fastq --outFileNamePrefix INPUT
# STAR mapping to a genome reference | BAM mode (output - BAM file)
STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_no_gtf/ --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --readFilesIn INPUT.fastq --outSAMtype BAM Unsorted --outFileNamePrefix INPUT
# STAR mapping to a genome reference | Sorted BAM mode (output - sorted by coordinate BAM file)
STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_no_gtf/ --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --readFilesIn INPUT.fastq --outSAMtype BAM SortedByCoordinate --outFileNamePrefix INPUT
# STAR mapping to a genome reference | Transcriptome mode
STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_no_gtf/ --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --readFilesIn INPUT.fastq --quantMode TranscriptomeSAM --outFileNamePrefix INPUT
# STAR mapping to a transcriptome reference:
STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_for_transcriptome/ --readFilesIn INPUT.fastq --outFileNamePrefix INPUT
SciBerg e.Kfm
Legal form: Sole Proprietorship
James-Monroe-Ring 107, Mannheim 68309, Germany
Amtsgericht Mannheim HRA 707401
VAT identification number: DE 312303132