# Counting number of reads in an uncompressed FASTQ file
echo $(cat fastq_file.fastq|wc -l)/4|bc
# Counting number of reads in gzip compressed FASTQ file
echo $(zcat fastq_file.fastq.gz|wc -l)/4|bc
# Examining certain lines within the FASTQ file (e.g, lines 530 to 640) | way 1
sed -n '530,640p;641q' fastq_file.fastq
# Examining certain lines within the FASTQ file (e.g, lines 530 to 640) | way 2
awk 'FNR>=530 && FNR<=540' fastq_file.fastq
# Converting FastQ to FastA | way 1
awk 'NR%4==1{a=substr($0,2);}NR%4==2{print ">"a"\n"$0}' input.fastq > output.fa
# Converting FastQ to FastA | way 2
sed '/^@/!d;s//>/;N' input.fastq > output.fa
# Extracting all reads containing XbaI cleavage site
awk 'NR%4==1{a=substr($0,2);}NR%4==2 && $1~/TCTAGA/ {print ">"a"\n"$0}' fastq_file.fastq
# Counting number of sequences in a FASTA file:
grep -c "^>" fasta_file.fa
# Extracting a FASTA header (e.g. to obtain a table with genes/transcripts annotation from a given reference):
grep -e ">" fasta.fa > fasta_header
# Cleaning up a FASTA header so that only the first column of the header remains:
awk '{print $1}' fasta_file_input.fa > fasta_file_output.fa
perl -p -i -e 's/>(.+?) .+/>$1/g' fasta_file.fa
# Converting a multi-line FASTA to a single-line FASTA:
awk '!/^>/ { printf "%s", $0; n = "\n" } /^>/ { print n $0; n = "" } END { printf "%s", n }' multi_line.fa > single_line.fa
SciBerg e.Kfm
Legal form: Sole Proprietorship
James-Monroe-Ring 107, Mannheim 68309, Germany
Amtsgericht Mannheim HRA 707401
VAT identification number: DE 312303132