#combination of the samples per population #initial .bam files are the results of the script named "reads_cleaning_mapping_filtering.txt" for sample in `ls *.bam` do #create mpileup files samtools mpileup -A -d 100000 -f Cotesia_sesamiae_bracovirus.fasta "$sample" > "$name".mpileup #create sync files java -jar -Xmx4g mpileup2sync.jar --input "$name".mpileup --output "$name".sync --fastq-type illumina #subsampling perl subsample-synchronized.pl --input "$name".sync --output "$name"_sub.sync --target-coverage 100 --max-coverage 100000 --method withreplace done #for each population, samples was combined as fallows (example with the population 3): cut -f 1-3 G7315_sub.sync > header.sync paste G7315_sub.sync G5783_sub.sync G7354_sub.sync G5781_sub.sync | awk '{print $4":"$8":"$12":"$16}' | awk -F ":" '{print $1+$7+$13+$19":"$2+$8+$14+$20":"$3+$9+$15+$21":"$4+$10+$16+$22":"$5+$11+$17+$23":"$6+$12+$18+$24}' > temp3.sync paste header.sync temp3.sync > POP3_sub.sync #extract gene regions (exons) for pop in `ls *_sub.sync` do name=`echo "$pop" |sed -e 's/_sub.sync//g'` perl create-genewise-sync.pl --input "$pop" --gtf Cotesia_sesamiae_bracovirus_genes.gtf --output "$name"_sub_genes.sync done #estimate population nucleotide diversity pi for sample in `ls *_sub_genes.sync` do name=`echo "$sample" |sed -e 's/_gene_all.sync//g'` while read gene do R --slave --vanilla --quiet --no-save --args < 0.02){ assign(paste("Pi", j, sep=""), (-3/4)*log(1-(4/3)*get(paste("Pip", j, sep="")))) listeFinale = get(paste("Pi", j, sep="")) } else{ listeFinale = get(paste("Pip", j, sep="")) } resultatFinal = c(paste("population", j),listeFinale) return(resultatFinal) } derp = mclapply(c(1:length(colnames(pop_data))), traitement) herp = c() for (i in 1:length(derp)){herp=cbind(herp, derp[[i]])} FINAL = rbind(as.numeric(herp[2,])) colnames(FINAL) = herp[1,] print(FINAL) write.table(FINAL, file = "div_$gene.$name.div",row.names=FALSE,col.names=FALSE) EEE done < Cotesia_sesamiae_bracovirus_genes_list.txt done #estimate population differentiation FST #Merge population files for i in `ls *_sub.sync` do awk '{print $4}' "$i" > "$i"_r done awk '{print $1"\t"$2"\t"$3}' POP1_sub.sync > HEADER_sub.sync paste HEADER_sub.sync POP1_sub.sync_r POP2_sub.sync_r POP3_sub.sync_r POP4_sub.sync_r > popALL_sub.sync #extract gene regions (exons) perl create-genewise-sync.pl --input popALL_sub.sync --gtf Cotesia_sesamiae_bracovirus_genes.gtf --output popALL_sub_genes.sync #estimate FST perl ./fst-sliding.pl --input popALL_gene.sync --output popALL_gene.fst --min-count 6 --min-coverage 20 --min-covered-fraction 0.0 --max-coverage 100000 --window-size 10000 --step-size 10000 --pool-size 6:9:4:3