#!/usr/bin/shell #code gb2312 ##创建工作目录,存放SRA文件 mkdir rnaseq cd rnaseq/ mkdir data cd data/ mkdir sra // ##安装sratoolkit tar -zxvf sratoolkit.current-ubuntu64.tar.gz cd sratoolkit.2.8.2-1-ubuntu64/ vi .bashrc export PATH=$PATH:~/local/sratoolkit.2.8.2-1-ubuntu64/bin #将sratoolkit的bin目录添加到环境变量PATH source .bashrc // ##使用fastq-dump将SRA文件转为fastq格式 fastq-dump -h fastq-dump SRR3418005.sra & fastq-dump SRR3418006.sra & fastq-dump SRR3418019.sra & fastq-dump SRR3418020.sra & ll -h #查看文件大小 df -h #查看系统硬盘使用情况 rm *.sra // ##安装Java JDK tar -zxvf jdk-8u77-linux-x64.tar.gz vi .bashrc export PATH=$PATH:~/local/jdk1.8.0_77/bin #将java可执行文件目录添加至环境变量 // ##安装使用FastQC cd mkdir bin #创建用户可执行文件目录(存放其他可执行文件) vi .bashrc export PATH=$PATH:~/bin #将用户可执行文件目录添加至系统路径 unzip fastqc_v0.11.5.zip cd FastQC ln -s /home/ventson/local/FastQC/fastqc /home/ventson/bin/fastqc #建立软链接 fastqc -o ../fastqc_results -f fastq SRR3418005.fastq SRR3418006.fastq SRR3418019.fastq SRR3418020.fastq & // ##安装使用fastx_toolkit tar -jxvf fastx_toolkit_0.0.13_binaries_Linux_2.6_amd64.tar.bz2 vi .bashrc export PATH=$PATH:~/local/bin #将解压出来的bin目录添加至环境变量 source .bashrc fastx_trimmer -Q 33 -f 12 -i SRR3418005.fastq -o fastx_results/ SRR3418005_trimmed.fastq & fastx_trimmer -Q 33 -f 12 -i SRR3418006.fastq -o fastx_results/ SRR3418006_trimmed.fastq & fastx_trimmer -Q 33 -f 12 -i SRR3418019.fastq -o fastx_results/ SRR3418019_trimmed.fastq & fastx_trimmer -Q 33 -f 12 -i SRR3418020.fastq -o fastx_results/ SRR3418020_trimmed.fastq & fastq_quality_filter -Q 33 -q 20 -p 80 -i SRR3418005_trimmed.fastq -o SRR3418005_filtered.fastq & fastq_quality_filter -Q 33 -q 20 -p 80 -i SRR3418006_trimmed.fastq -o SRR3418006_filtered.fastq & fastq_quality_filter -Q 33 -q 20 -p 80 -i SRR3418019_trimmed.fastq -o SRR3418019_filtered.fastq & fastq_quality_filter -Q 33 -q 20 -p 80 -i SRR3418020_trimmed.fastq -o SRR3418020_filtered.fastq & // ##安装HISAT2和StringTie unzip hisat2-2.1.0-Linux_x86_64.zip cd hisat2-2.1.0 cp hisat2* ../bin cp *.py ../bin tar -zxvf stringtie-1.3.3b.Linux_x86_64.tar.gz cd stringtie-1.3.3b.Linux_x86_64 cp stringtie ../bin // ##使用HISAT2进行reads比对 hisat2-build ../genome/fasta/tair10.fasta tair10 #建立基因组索引库 hisat2 -p 2 --dta -x ../data/index/tair10 -U ../data/fastx_results/SRR3418005_filtered.fastq -S SRR3418005.sam & hisat2 -p 2 --dta -x ../data/index/tair10 -U ../data/fastx_results/SRR3418006_filtered.fastq -S SRR3418006.sam & hisat2 -p 2 --dta -x ../data/index/tair10 -U ../data/fastx_results/SRR3418019_filtered.fastq -S SRR3418019.sam & hisat2 -p 2 --dta -x ../data/index/tair10 -U ../data/fastx_results/SRR3418020_filtered.fastq -S SRR3418020.sam & // ##安装使用samtools tar -jxvf samtools-1.6.tar.bz2 cd samtools-1.6 ./configure make sudo make install samtools sort -@ 2 -m 200M -o SRR3418005.bam SRR3418005.sam & samtools sort -@ 2 -m 200M -o SRR3418006.bam SRR3418006.sam & samtools sort -@ 2 -m 200M -o SRR3418019.bam SRR3418019.sam & samtools sort -@ 2 -m 200M -o SRR3418020.bam SRR3418020.sam & samtools index SRR3418005.bam SRR3418005.bai & #为BAM文件建立索引用于IGV展示 // ##安装Cufflinks,使用其中的gffread将GFF文件转为GTF文件 tar -zxvf cufflinks-2.2.1.Linux_x86_64.tar.gz cd cufflinks-2.2.1.Linux_x86_64/ cp cuff* gffread gtf_to_sam ../bin/ gffread -T -o tair10.gtf TAIR10_GFF3_genes.gff & // ##转录本拼接 stringtie -p 2 -G ../data/genome/gff/tair10.gtf -o SRR3418005.gtf -l SRR3418005 ../alignment/SRR3418005.bam & stringtie -p 2 -G ../data/genome/gff/tair10.gtf -o SRR3418006.gtf -l SRR3418006 ../alignment/SRR3418006.bam & stringtie -p 2 -G ../data/genome/gff/tair10.gtf -o SRR3418019.gtf -l SRR3418019 ../alignment/SRR3418019.bam & stringtie -p 2 -G ../data/genome/gff/tair10.gtf -o SRR3418020.gtf -l SRR3418020 ../alignment/SRR3418020.bam & // ##转录本整合与比较 vi gtflist.txt #将gtf文件路径写入文件 stringtie --merge -p 2 -G ../data/genome/gff/tair10.gtf -o stringtie_merged.gtf gtflist.txt & #整合 tar -zxvf gffcompare-0.10.1.Linux_x86_64.tar.gz #安装gffcompare cd gffcompare-0.10.1.Linux_x86_64/ cp gffcompare ../bin/ gffcompare -r ../../data/genome/gff/tair10.gtf -G -o merged ../stringtie_merged.gtf & #比较鉴定新转录本 // ##计算FPKM stringtie -e -p 2 -G ../../assembly/stringtie_merged.gtf -A SRR3418005_genes.gtf -o SRR3418005_transcripts.gtf ../../alignment/SRR3418005.bam & stringtie -e -p 2 -G ../../assembly/stringtie_merged.gtf -A SRR3418006_genes.gtf -o SRR3418006_transcripts.gtf ../../alignment/SRR3418006.bam & stringtie -e -p 2 -G ../../assembly/stringtie_merged.gtf -A SRR3418019_genes.gtf -o SRR3418019_transcripts.gtf ../../alignment/SRR3418019.bam & stringtie -e -p 2 -G ../../assembly/stringtie_merged.gtf -A SRR3418020_genes.gtf -o SRR3418020_transcripts.gtf ../../alignment/SRR3418020.bam & sed -i '1d' *_genes.gtf #删除第一行标题,以下步骤将四个gtf文件整合为一个 sort SRR3418005_genes.gtf > merge/SRR3418005_genes.gtf #排序 sort SRR3418006_genes.gtf > merge/SRR3418006_genes.gtf sort SRR3418019_genes.gtf > merge/SRR3418019_genes.gtf sort SRR3418020_genes.gtf > merge/SRR3418020_genes.gtf cd merge join -t $'\t' SRR3418005_genes.gtf SRR3418006_genes.gtf | join - SRR3418019_genes.gtf | join - SRR3418020_genes.gtf > out_fpkm.gtf awk -F ' ' '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$5"\t"$6"\t"$7"\t"$8"\t"$16"\t"$24"\t"$32}' out_fpkm.gtf > fpkm.gtf #fpkm_gtf为最终的FPKM注释文件 // ##安装HTSeq,计算count数 sudo apt-get install python-pip #下载速度过慢可以更换Ubuntu源(观看视频或自行百度) sudo pip install HTSeq #下载速度过慢可以选择wheel文件安装 sudo pip install numpy-1.13.3-cp27-cp27mu-manylinux1_x86_64.whl #自行下载whl文件安装 sudo pip install pysam-0.12.0.1-cp27-cp27mu-manylinux1_x86_64.whl sudo pip install HTSeq-0.9.1-cp27-cp27mu-manylinux1_x86_64.whl#md5\=56b5981267fa8fddcf21ae0fa767f9b9.whl htseq-count -q -f bam -s no -i gene_name ../../alignment/SRR3418005.bam ../../data/genome/gff/tair10.gtf > SRR3418005.count & htseq-count -q -f bam -s no -i gene_name ../../alignment/SRR3418006.bam ../../data/genome/gff/tair10.gtf > SRR3418006.count & htseq-count -q -f bam -s no -i gene_name ../../alignment/SRR3418019.bam ../../data/genome/gff/tair10.gtf > SRR3418019.count & htseq-count -q -f bam -s no -i gene_name ../../alignment/SRR3418020.bam ../../data/genome/gff/tair10.gtf > SRR3418020.count & join SRR3418005.count SRR3418006.count | join - SRR3418019.count | join - SRR3418020.count > count.txt #count文件整合 sed -i 's/ /\t/g' count.txt //