#To remove the redundant positions and find total coverage #within the consensus file generated by sstacks there will be multiple entries for each loci if there are multiple genotypes called, this script removes the multiple calls and determines the total coverage at a position #Input file: XXX.consensus.tsv file #Output file: XXX.compress.tsv file that contains the total coverage of all genotypes at a position #Before running this script the consensus reads need to be removed from the matches file so run this command on unix/linux: # grep -v "consensus" XXX.matches.tsv > XXX.consensus.tsv use warnings; open (INFILE, "/scratch2/graham9c/LWF-EcoRI-NheI-v2/stacksoutputgeno/D1-8/D1-96-8.consensus.tsv"); open (OUTFILE, ">/scratch2/graham9c/LWF-EcoRI-NheI-v2/stacksoutputgeno/D1-8/D1-96-8.compress.tsv"); my (@splitarray, @file) = []; my ($position1, $position2, $position3, $position4, $position5, $coverage1, $coverage2, $coverage3, $coverage4, $coverage5, $line1, $line2, $line3, $line4, $line5); my $totalcoverage = 0; @file = ; while (@file) { $line1 = $file[0]; @splitarray = split("\t", $line1); $position1 = $splitarray[2]; $coverage1 = $splitarray[6]; $line2 = $file[1]; @splitarray = split("\t", $line2); $position2 = $splitarray[2]; $coverage2 = $splitarray[6]; $line3 = $file[2]; @splitarray = split("\t", $line3); $position3 = $splitarray[2]; $coverage3 = $splitarray[6]; $line4 = $file[3]; @splitarray = split("\t", $line4); $position4 = $splitarray[2]; $coverage4 = $splitarray[6]; $line5 = $file[4]; @splitarray = split("\t", $line5); $position5 = $splitarray[2]; $coverage5 = $splitarray[6]; if (($position1 eq $position2) && ($position1 eq $position3) && ($position1 eq $position4) && ($position1 eq $position5)) { $totalcoverage = $coverage1 + $coverage2 + $coverage3 + $coverage4 + $coverage5; print OUTFILE "$position1 $totalcoverage \n"; shift @file; shift @file; shift @file; shift @file; shift @file; } elsif (($position1 eq $position2) && ($position1 eq $position3) && ($position1 eq $position4)) { $totalcoverage = $coverage1 + $coverage2 + $coverage3 + $coverage4; print OUTFILE "$position1 $totalcoverage \n"; shift @file; shift @file; shift @file; shift @file; } elsif (($position1 eq $position2) && ($position1 eq $position3)) { $totalcoverage = $coverage1 + $coverage2 + $coverage3; print OUTFILE "$position1 $totalcoverage \n"; shift @file; shift @file; shift @file; } elsif ($position1 eq $position2) { $totalcoverage = $coverage1 + $coverage2; print OUTFILE "$position1 $totalcoverage \n"; shift @file; shift @file; } elsif ($position1 ne $position2) { print OUTFILE "$position1 $coverage1"; shift @file; } }