#!user/local/perl #Created by M. R. Snyder 2017 use Cwd; use File::Basename; use List::Util "min"; use File::Slurp; use Math::Round; my $dir = getcwd; #print "Directory is $dir\n"; $dirname = basename($dir); open (INR, "INReads.txt") || die "Cannot open INReads.txt: $!"; my %Reads; while (){ chomp ($_); my @columns = split (/\t/, $_); $Reads{$columns[0]} = $columns[1]; } opendir (DIR, $dir) || die "Cannot open $dir: $!"; my @files = readdir DIR; closedir DIR; foreach (@files){ if (($_ =~ /Res.txt/) && ($_ !~ /^\./) && $_ !~ /Summary/){ push (@ResFiles, $_); } } print "Result files here are:\n"; foreach (@ResFiles){ print "$_\n"; push (@samples, substr($ResFiles[$_], 0, 6)); } $outfilename = $dir."/".$dirname."BlastSummary.txt"; $outfilename2 = $dir."/".$dirname."GLBlastSummary.txt"; open (SUMRES, ">$outfilename") || die "Cannot open BlastSummary.txt: $!"; print SUMRES "Sample\tSpecies\tN OTUs\treads\tproportion\n"; open (SUMRES2, ">$outfilename2") || die "Cannot open GLBlastSummary.txt: $!"; print SUMRES2 "Sample\tSpecies\tN OTUs\treads\tproportion\n"; my @Covers = (); my @Idents = (); for my $x (0..$#ResFiles){ my @SampleRes = (); my $c = 0; my $TotalReads = 0; open (RESFILE, $ResFiles[$x]); print "File $ResFiles[$x]:\n"; $sample = substr($ResFiles[$x], 0, 6); if (exists ($Reads{$sample})){ $TotalReads = $Reads{$sample}; } else { print "$sample not in INReads.txt!\n"; } my @lines = (); while (){ chomp($_); push (@lines, $_); #print ($_); } close RESFILE; my @OTU = (); my %BlastHash = (); my %OTUCount = (); for my $l (0..$#lines){ #print "$lines[$y]\n"; my @columns = split(/\t/, $lines[$l]); #print (@columns); push (@OTU, $columns[0]); } #foreach (@OTU){print "$_\n";} my @eval = (); my @names = (); my $NoHits = 0; my $BadHits = 0; for my $O (0..$#OTU){ if ($OTU[$O] == $OTU[$O+1]){ my @columns = split(/\t/, $lines[$O]); my @SpCol = split(' ', $columns[2]); if ($columns[1] =~ /KJ135626\.1|JQ231114\.1|JQ231114\.1|AB045119\.1|AF352278.1|JQ346141.1|BM028495.1/){ next; } if ($columns[2] =~ /Hypop.* x Hypop/){ print "Hybrid Hypop $OTU[$O]!\n"; next; } if ($SpCol[0] =~ /\:/){ $Sp = join (" ", $SpCol[1], $SpCol[2]); } elsif ($SpCol [1] =~ /cf/){ $Sp = join (" ", $SpCol[0], $SpCol[2]); } else { if ($SpCol[0] =~ /\(/){ $Sp = join (" ", substr($SpCol[0], 1), $SpCol[1]); } else { $Sp = join (" ", $SpCol[0], $SpCol[1]); } } if ($Sp =~ /chrysocephalus/){ $Sp = "Notemigonus crysoleucas"; } if ($Sp =~ /Apollonia/){ $Sp = "Neogobius melanostomus"; } if ($Sp =~ /melanostomus/){ #print "Goby found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /Hypoph/){ #print "Carp found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /vitreus/){ #print "Walleye found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /Gambusia/){ #print "Mosquito Fish found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /Perca/){ #print "Perch found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /Natural gynogenetic/){ next #print "Perch found in $ResFiles[$x], $OTU[$O]!\n"; } push (@names, $Sp); push (@eval, $columns[6]); push (@PIdent, $columns[4]); push (@PCover, $columns[5]); } else { my @columns = split(/\t/, $lines[$O]); my @SpCol = split(' ', $columns[2]); my $SpCol1 = $SpCol[0]; if ($columns[1] =~ /KJ135626\.1|JQ231114\.1|JQ231114\.1|AB045119\.1|AF352278.1|JQ346141.1|BM028495.1/){ next; } if ($SpCol[0] =~ /\:/){ $Sp = join (" ", $SpCol[1], $SpCol[2]); } elsif ($SpCol [1] =~ /cf/){ $Sp = join (" ", $SpCol[0], $SpCol[2]); } else { if ($SpCol[0] =~ /\(/){ $Sp = join (" ", substr($SpCol[0], 1), $SpCol[1]); } else { $Sp = join (" ", $SpCol[0], $SpCol[1]); } } if ($Sp =~ /chrysocephalus/){ $Sp = "Notemigonus crysoleucas"; } if ($Sp =~ /Apollonia/){ $Sp = "Neogobius melanostomus"; } if ($Sp =~ /melanostomus/){ #print "Goby found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /Hypoph/){ #print "Carp found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /vitreus/){ #print "Walleye found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /Gambusia/){ #print "Mosquito Fish found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /Perca/){ #print "Perch found in $ResFiles[$x], $OTU[$O]!\n"; } if ($Sp =~ /Natural gynogenetic/){ next #print "Perch found in $ResFiles[$x], $OTU[$O]!\n"; } push (@names, $Sp); push (@eval, $columns[6]); push (@PIdent, $columns[4]); push (@PCover, $columns[5]); my @ReadsCol = split (/\|/, $columns[0]); #Check for missing OTUs @NextReadsCol = split (/\|/, $OTU[$O+1]); $NextOTUNumObs = $NextReadsCol[0]; my $NextOTUNumExp = $ReadsCol[0] + 1; if (($NextOTUNumObs =~ /[0-9]/) && ($NextOTUNumExp != $NextOTUNumObs)){ print "OTU #$NextOTUNumExp missing from results!\n"; } #Check for bad hits my $PMatch = $PIdent[0] + $PCover[0]; if (($PIdent[0] < 90) || ($PCover[0] < 90)) { print "Ident or Cover < 90 in $OTU[$O]\n"; $NoHits++; @eval = (); @names = (); @PIdent = (); @PCover =(); next; } else { push (@Covers, $PCover[0]); push (@Idents, $PIdent[0]); } #Do stuff to arrays my @columns = split(/\t/, $lines[$O]); my $reads = $ReadsCol[1]; my $spp = $names[0]; my $min = min @eval; for $Es (0..$#eval){ if ($eval[$Es] == $min){ if (($Es == ($#eval)) && ($eval[$Es] == $min) && ($spp !~ /Carassius auratus/)){ print "All E values of $OTU[$O] in $sample were the minimum\n"; } my $namecurr = $names[$Es]; if ($spp =~ /$namecurr/) { next; } else { $spp = $spp.", "."$names[$Es]"; } } } if ($sample =~ /BLE/){ $TotalReads += $reads; } #} @eval = (); @names = (); @PIdent = (); @PCover =(); $SampleRes[$c][0] = $spp; $SampleRes[$c][1] = $reads; $SampleRes[$c][2] = $OTU[$O]; if (exists($OTUCount{$spp})){ $OTUCount{$spp}++; } else { $OTUCount{$spp} = 1; } $c++; } } open (SUMRES, ">>$outfilename") || die "Cannot open BlastSummary.txt: $!"; open (SUMRES2, ">>$outfilename2") || die "Cannot open GLBlastSummary.txt: $!"; for my $y (0..$#SampleRes){ $prop = (0 >= $TotalReads) ? ($SampleRes[$y][1]/1):($SampleRes[$y][1]/$TotalReads); if ($ResFiles[$x] =~ /GLRes/){ print SUMRES2 "$sample\t$SampleRes[$y][0]\t$OTUCount{$SampleRes[$y][0]}\t$SampleRes[$y][1]\t$prop\t$SampleRes[$y][2]\n"; } else { print SUMRES "$sample\t$SampleRes[$y][0]\t$OTUCount{$SampleRes[$y][0]}\t$SampleRes[$y][1]\t$prop\t$SampleRes[$y][2]\n"; } } close SUMRES; close SUMRES2; } # print "@Covers\n\n@Idents\n";