Advanced Bioinformatics Services

Reads alignment

With Bowtie & Bowtie2 aligners

# Get the latest binary files for bowtiebowtie2 and unzip:

wget https://sourceforge.net/projects/bowtie-bio/files/bowtie/1.2.3/bowtie-1.2.3-linux-x86_64.zip/download

wget https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.3.5.1/bowtie2-2.3.5.1-linux-x86_64.zip/download

unzip bowtie-1.2.3-linux-x86_64.zip

unzip bowtie2-2.3.5.1-linux-x86_64.zip

# Copy binary files to your $PATH (e.g. /usr/local/bin):

cd bowtie-1.2.3

sudo cp bowtie* /usr/local/bin/

cd ..

cd bowtie2-2.3.5.1

sudo cp bowtie2* /usr/local/bin/

# Prepare bowtie and bowtie2 indexes:

bowtie-build reference.fa reference.fa

bowtie2-build reference.fa reference.fa

# Bowtie mapping with 0 mismatch tolerance (preferred parameter for small RNA fragments between 15-25 nt)

bowtie -p [insert number of threads] -v 0 reference.fa INPUT.fastq -S INPUT_over_reference.sam

 # Bowtie mapping with 1 mismatch tolerance (passes for RNA fragments between 25-100 nt)

bowtie -p [insert number of threads] -v 1 reference.fa INPUT.fastq -S INPUT_over_reference.sam

 # Bowtie mapping with 2 mismatches tolerance (passes for RNA fragments > 100 nt)

bowtie -p [insert number of threads] -v 2 reference.fa INPUT.fastq -S INPUT_over_reference.sam

 # Bowtie2 mapping with default mode (passes for RNA fragments > 50 nt)

bowtie2 -q -p [insert number of threads] -x reference.fa -U INPUT.fastq -S INPUT_over_reference.sam

With BWA aligner

# Get the latest source files for BWA aligner and unzip:

https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.17.tar.bz2/download

tar xvjf bwa-0.7.17.tar.bz2

# Compile binary files from source and copy to your $PATH (e.g. /usr/local/bin):

cd bwa-0.7.17

make

sudo cp bwa /usr/local/bin/

# Generate bwa indexes:

bwa index reference.fa

# BWA mapping to a reference with mem option (most accurate):

bwa mem -t [insert number of threads] reference.fa INPUT.fastq > INPUT_over_reference.sam

With HiSat2 aligner

# Get the latest version of HiSat2 aligner binary file and unzip:

wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.1.0-Linux_x86_64.zip

unzip hisat2-2.1.0-Linux_x86_64.zip

# Copy binary file to your $PATH (e.g. /usr/local/bin):

cd hisat2-2.1.0

sudo cp hisat2* /usr/local/bin/

# Download and unzip HiSat2 indexes for H. sapiens GRCh38.84 (you would need 200 Gb RAM to build them by yourself!):

## Genome_indexes

wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/grch38.tar.gz

tar -xzf grch38.tar.gz

## Genome_snp_indexes

wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/grch38_snp.tar.gz

tar -xzf grch38_snp.tar.gz

## Genome_tran_indexes

wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/grch38_tran.tar.gz

tar -xzf grch38_tran.tar.gz

cd grch38_tran

bash make_grch38_tran.sh

# Mapping to a genome reference (for transcriptome references use Bowtie or Bowtie2):

hisat2 -q -p [insert number of threads] --dta -x folder_with_Genome_tran_indexes/Homo_sapiens.GRCh38.dna.84.fa -U INPUT.fastq -S INPUT.sam

With STAR aligner

# Get the latest version of STAR aligner source code, unzip and compile:

wget https://github.com/alexdobin/STAR/archive/2.6.1a.tar.gz

tar -xzf STAR-2.6.1a.tar.gz

cd STAR-2.6.1a/source/

sudo make STAR

# Copy binary file to your $PATH (e.g. /usr/local/bin):

cd bin

sudo cp STAR /usr/local/bin/

# Generate STAR indexes:

## STAR genome indexes without annotation GFT file

STAR --runThreadN [insert number of threads] --runMode genomeGenerate --genomeDir folder_with_STAR_indexes_no_gtf/ --genomeFastaFiles folder_with_reference_genome/genome.fa

## STAR genome indexes with annotation GFT file

STAR --runThreadN [insert number of threads] --runMode genomeGenerate --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --genomeDir folder_with_STAR_indexes_with_gtf/ --genomeFastaFiles folder_with_reference_genome/genome.fa

## STAR transcriptome indexes

STAR --runThreadN [insert number of threads] --limitGenomeGenerateRAM=60000000000 --runMode genomeGenerate --genomeDir folder_with_STAR_indexes_for_transcriptome/ --genomeFastaFiles folder_with_reference_transcriptome/transcriptome.fa

# STAR mapping to a genome reference | Default mode (output - SAM file)

STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_no_gtf/ --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --readFilesIn INPUT.fastq --outFileNamePrefix INPUT

# STAR mapping to a genome reference | BAM mode (output - BAM file)

STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_no_gtf/ --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --readFilesIn INPUT.fastq --outSAMtype BAM Unsorted --outFileNamePrefix INPUT

# STAR mapping to a genome reference | Sorted BAM mode (output - sorted by coordinate BAM file)

STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_no_gtf/ --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --readFilesIn INPUT.fastq --outSAMtype BAM SortedByCoordinate --outFileNamePrefix INPUT

# STAR mapping to a genome reference | Transcriptome mode

STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_no_gtf/ --sjdbGTFfile folder_with_gtf_file/genome_annotation.gtf --readFilesIn INPUT.fastq --quantMode TranscriptomeSAM --outFileNamePrefix INPUT

# STAR mapping to a transcriptome reference:

STAR --runThreadN [insert number of threads] --genomeDir folder_with_STAR_indexes_for_transcriptome/ --readFilesIn INPUT.fastq --outFileNamePrefix INPUT