# Setting global variables RF='_R1_001.fastq'; RR='_R2_001.fastq'; #set path data=$(pwd) u08=~/PATH-TO/usearch8.0.1477_i86osx32 u11=~/PATH-TO/usearch11.0.667_i86osx32 p=~/PATH-TO/python_scripts s=~/PATH-TO/SeqFilter pr=~/PATH-TO/prinseq-lite.pl ## Filtering ls $data/*$RF | sed "s/^.*\/\([a-zA-Z0-9_.-]*\)$/\1/g" | sed "s/$RF//" > samples.txt for file in `cat samples.txt` ; do echo "Processing >>> $file <<<"; $u08 -fastq_filter $data/$file$RF -fastq_maxee 1 -fastaout filter.$file.fasta -fastq_minlen 150 echo "..parse"; python2.7 $p/fasta_number.py filter.$file.fasta $file. > parsed1.$file.fasta cat parsed1.$file.fasta | sed "s/_L001//g" | sed "s/\./_/g " > parsed2.$file.fasta done ## Clean Up cat parsed2.* > its2.all.fasta cat its2.all.fasta | sed -e "s/^>\([a-zA-Z0-9_-]*\)_\(.*$\)$/>\1_\2;barcodelabel=\1/" > reads_bc.fa ### VSEARCH Classification # direct classification vsearch --usearch_global reads_bc.fa --db ../its2.penn-malus2018-5.fasta -id 0.97 --uc its2.combined.vs.txt python2.7 $p/uc2otutab.py its2.vs.auto.uc > its2.combined.vs.txt # formatting output cat its2.combined.vs.txt | sed -e "s/:k:[^^\t]*\t/\t/" > combined.vs.otu cat its2.combined.vs.txt | cut -f 1 | sed -e "s/;tax=/,/g;s/:/_/g" -e "s/;tax=.*,//g;s/:/_/g" -e "s/,c[^,]*//" | sed "s/OTUId/,Kingdom,Phylum,Order,Family,Genus,Species,SubSpecies/" > combined.vs.tax ### USEARCH verification Classification # clustering ASVs $u11 -fastx_uniques reads_bc.fa -sizeout -fastaout reads_bc.derep.fa --minuniquesize 3 $u11 -sortbysize reads_bc.derep.fa -fastaout reads_bc.derep.sort.fa $u11 -unoise3 reads_bc.derep.sort.fa -zotus reads_bc.zotus.fa -tabbedout unoise3.txt # cleaning up reference database perl $pr -fasta its2.penn-malus2018-6b.fasta -ns_max_n 10 > its2.penn-malus2018-6c.fasta # direct classification $u11 -usearch_global reads_bc.zotus.fa -db its2.penn-malus2018-6b_prinseq_good_yOLY.fasta -id 0.97 -uc reads_bc.zotus_manRefs.uc -strand both cut -f 10 reads_bc.zotus_manRefs.uc | cut -f 2 -d";" | sort | uniq -c | sort grep "^N" reads_bc.zotus_manRefs.uc | cut -f 9 > nohits $s reads_bc.zotus.fa --ids nohits -o reads_bc.zotus.nohits.fa # hierarchical classification $u11 -sintax reads_bc.zotus.nohits.fa -db its2.penn-malus2018-6b_prinseq_good_yOLY.fasta -tabbedout reads_bc.zotus.sintax -strand plus -sintax_cutoff 0.8 # formatting output cut -f1,4 reads_bc.zotus.sintax > reads_bc.zotus.sintax.cut cat reads_bc.zotus.sintax.cut | sed -E -e "s/\_[0-9]+//g" -e "s/,s:.*$//" > reads_bc.zotus.sintax.cutx cut -f 9,10 reads_bc.zotus_manRefs.uc | grep -v "*" | sed "s/[A-Za-z0-9]*;tax=//" > reads_bc.zotus.cutx echo ",kingdom,phylum,order,family,genus,species" > header.cutx cat header.cutx reads_bc.zotus.cutx reads_bc.zotus.sintax.cutx | sed -E -e "s/[[:space:]]/,/" -e "s/,p:Streptophyta,c:[^,]*,/,p:Streptophyta,/" -e "s/,c:undef:[0-9]*$//" -e "s/;$//"> taxonomy.data $u11 -usearch_global reads_bc.fa -strand both -db reads_bc.zotus.fa -id 0.97 -uc sample_zotus.uc python2.7 $p/uc2otutab.py sample_zotus.uc > combined.txt sed -e "s/OTUId//" combined.txt > combined.out