#Trout fin-specific splice sites, reference genome modifications and mapping ############################################################################ #GFF to GTF conversion gffread GCF_000233375.1_ICSASG_v2_genomic.gff -T -o GCF_000233375.1_ICSASG_v2_genomic.gtf #Atlantic salmon splice sites and exons extraction extract_splice_sites.py GCF_000233375.1_ICSASG_v2_genomic.gtf > GCF_000233375.1_ICSASG_v2_genomic.ss extract_exons.py GCF_000233375.1_ICSASG_v2_genomic.gtf > GCF_000233375.1_ICSASG_v2_genomic.exon #Hisat2 index hisat2-build GCF_000233375.1_ICSASG_v2_genomic.fna -p 12 \ --ss GCF_000233375.1_ICSASG_v2_genomic.ss \ --exon GCF_000233375.1_ICSASG_v2_genomic.exons \ $genomeDir/salmon_genome2 #Merging of 3’ mRNA-Seq samples with the highest read depth (NCBI SRA sample information# 414: SRS4322894; 323: SRS4322925; 320:SRS4322922) cat BT_Altja_414_2_merged_trimmed2.fastq BT_Altja_323_1_rep_merged_trimmed2.fastq BT_Altja_320_3_merged_trimmed2.fastq > three_libMax_merged/MaxReadsPerLib_merged.fastq #Merging of Paired end RNA-Seq of PKD infected Salmo trutta (juvenile) fin pools (pooling of RNA extracts) (SRX9103572 and SRX9103573) #quality controlled using Trimmomatic # 3_Altja_paired_read1.fq # 3_Altja_paired_read2.fq # 4_Altja_paired_read1.fq # 4_Altja_paired_read2.fq # 3_Altja_unpaired_read1.fq # 3_Altja_unpaired_read2.fq # 4_Altja_unpaired_read1.fq # 4_Altja_unpaired_read2.fq cat *_paired_read1.fq >altja_merged_read1.fq cat *_paired_read2.fq >altja_merged_read2.fq cat *_unpaired_read1.fq > altja_unpaired_read1.fq cat *_unpaired_read2.fq > altja_unpaired_read2.fq# excluded because of low number of reads #Hisat2 alignment hisat2 -p 16 -I 40 -X 700 \ --score-min L,0,-0.4 --rg-id Altja --rg SM:Altja --rg LB:RNASeq --rg PL:ILLUMINA --rg DS:BtroutRNAseq \ -x $genomeDir/salmon_genome2 \ --known-splicesite-infile salmon_genome/GCF_000233375.1_ICSASG_v2_genomic.ss \ --dta -q -1 altja_merged_read1.fq -2 altja_merged_read2.fq \ -U altja_unpaired_read1.fq,MaxReadsPerLib_merged.fastq \ -S Altja_FIN_ALIGN.sam samtools view -bh Altja_FIN_ALIGN.sam > Altja_FIN_ALIGN.bam samtools sort -@ 8 Altja_FIN_ALIGN.bam Altja_FIN_ALIGN_SORT samtools index Altja_FIN_ALIGN_SORT.bam #stringtie run ############## #Parameters: stringtie -p 8 -B -C string_work2/ref_cov_Altja_FIN.txt \ -A string_work2/estm_abund_Altja_FIN.txt Altja_FIN_ALIGN_SORT.bam \ -G GCF_000233375.1_ICSASG_v2_genomic.gff \ -l Strutta_Fin -o string_work2/Altja_FIN_ref_annot.gtf #splice site infomration from stringtie assembled transcripts (GTF) extract_splice_sites.py Altja_FIN_ref_annot.gtf> Altja_FIN_ref_annot.ss #Align 3_altja Paired end RNA-Seq of PKD infected Salmo trutta (juvenile) fin pool (SRX9103572) to Atlantic salmon genome #quality controlled using Trimmomatic # 3_Altja hisat2 -p 8 -I 40 -X 700 --score-min L,0,-0.4 --rg-id 3_Altja --rg SM:3_Altja --rg LB:RNASeq --rg PL:ILLUMINA --rg DS:BtroutRNAseq -x $genomeDir/salmon_genome2 --dta --un unalign/3_Altja --un-conc unalign/3_Altja -q -1 3_Altja_paired_read1.fq -2 3_Altja_paired_read2.fq -U 3_Altja_unpaired_read1.fq,3_Altja_unpaired_read2.fq -S 3_Altja_ALIGN.sam samtools view -bh 3_Altja_ALIGN.sam > 3_Altja_ALIGN.bam samtools sort -@ 8 3_Altja_ALIGN.bam 3_Altja_ALIGN_SORT samtools index 3_Altja_ALIGN_SORT.bam # call SNPS samtools mpileup -uAIg -t DP,DV,DPR,INFO/DPR,DP4,SP -d 200000 -f $genomeDir/GCF_000233375.1_ICSASG_v2_genomic.fna 3_Altja_ALIGN_SORT.bam -o 3_Altja_ALIGN_SORT_mpileup_FILE.bcf bcftools call -mv 3_Altja_ALIGN_SORT_mpileup_FILE.bcf -V indels -O v -o 3_Altja_snps-m.vcf # modify the reference for alternative allele cat $genomeDir/GCF_000233375.1_ICSASG_v2_genomic.fna| vcf-consensus 3_Altja_snps-m_copy.vcf.gz > $genomeDir/altja_reference/GCF_000233375.1_ICSASG_v2_genomic_3_Altja.fasta #indexing of the Modifiled reference genome hisat2-build GCF_000233375.1_ICSASG_v2_genomic_3_Altja.fasta -p 16 \ --ss ../../GCF_000233375.1_ICSASG_v2_genomic.ss \ --exon ../../GCF_000233375.1_ICSASG_v2_genomic.exons \ $genomeDir/altja_reference/salmon_genome #all quality-controlled reads from QuantSeq 3’ mRNA-Seq from every sample are aligned to the modefied reference genome in the following way #################################################################### #separately for libray, lane and run hisat2 -p 13 --score-min L,-0.4,-0.4 --rg-id SampleID --rg SM:SampleID --rg LB:QuantSeq_3prime_mRNA-Seq \ --rg PL:ILLUMINA --rg DS:BtroutRNAseq -x $genomeDir/altja_reference/salmon_genome \ --dta --known-splicesite-infile $string_work2/Altja_FIN_ref_annot.ss \ -q -U $QuantSeqReads/qual_cont/BT_Altja_SampleID_S165_L002_R1_001_trimmed2PA2.fastq.gz -S SampleID_SampleID_3_1_2_trimmedPA2_ALIGN.sam