#!/usr/bin/perl
use strict;
#
# This program BLASTs all the sequences within a file to one another to ensure
# they are not duplicates of one another.
# Usage: After calling the program, type in the file that has queryIDs within
# it and the strange sequence(s) that was the best hit to this queryID. When I
# did this analysis this file ended in .weird. Because I need to actually blast
# sequences to one another and not just the IDs, you should then type in the
# file with all the original 454 reads. Then of course I need to know how stringent
# of an e-value you want to use when comparing these sequences to one another. The last
# thing I need is the name of the output file. I would recommend using something
# ending in .single.
my @inp = @ARGV; # filenames and e-value threshold should be in command line.
my $inp_file = $inp[0] or die "ERROR: Put .weird, database, thresholdE-valu
e and output file name at command line.\n\n";
my $data_file = $inp[1] or die "ERROR: Put .weird, database, thresholdE-valu
e and output file name at command line.\n\n";
my $e = $inp[2] or die "ERROR: Put .weird, database, thresholdE-valu
e and output file name at command line.\n\n";
my $output = $inp[3]; "ERROR: Put .weird, database, thresholdE-valu
e and output file name at command line.\n\n";
open (INPUT, "<$inp_file") or die "ERROR: $inp_file input file not found...\n\n"
;
open (DATABASE, "<$data_file") or die "ERROR: database file not found
...\n\n";
open (OUT, ">$output") or die "ERROR: $output would not open correctly\n\n";
print OUT "These sequences seem to align to another sequence that aligns to something strange\n";
#This forloop goes through the output from comparing the 454 reads to the entire
#NCBI database and isolates the unique ID number from each of the 454 reads.
#At the end of the forloop there is an array called @query housing all these unique IDs.
my @line_read = ;
my $i;
my $info;
my @summary;
my $ID1;
my $ID;
my @query;
for ($i=1; $i < $#line_read+1; $i++)
{
$info = $line_read[$i];
if ($info =~ /Query=/)
{
@summary = split(' ', $info);
$ID1 = $summary[1];
$ID = ">".$ID1;
push (@query, $ID);
}
}
#This forloop does a ton of stuff. It goes through each of the query IDs in @query
# and finds the sequence associated with that ID number. Then it prints that ID and sequence
#into a temporary file called TEMP. Then another forloop runs that isolates
#the next query ID and sequence in their own file called TEMP1. These two
#sequences are then BLASTed to one another and the output is analyzed. If the
#two sequences do NOT align to one another then the program prints "we're good"
#into the output. If the two sequences do overlap, it prints the two sequences and
#their alignment output into the output file. The forloop runs such that the first
#sequence within the @query array is compared to the second, then the third, fourth etc.
#Then the second sequence within @query is compared to the third, then fourth, etc.
#This scheme should be reflected in the output file.
my $j;
my $k;
my $l;
my $m;
my $ident1;
my @sequences = ;
my $seq;
my $seq1;
my @blasters;
my @blasters1;
my @blastResult;
print "Here are the number of ID sequences to get through: $#query+1\n";
for ($j=0; $j< $#query;$j++)
{
for ($k=0; $k < $#sequences; $k++)
{
if ($sequences[$k] =~ /$query[$j]/)
{
open (TEMP, ">seqOfInterest") or die "I couldn't open seqOfInterest\n";
$seq = $sequences[$k+1];
print TEMP "$query[$j]\n";
print TEMP "$seq";
}
}
close TEMP;
my @ridiculous;
my $evenMore;
for ($l = $j+1; $l < $#query+1; $l++)
{
for ($m = 0; $m<$#sequences; $m++)
{
$ident1 = $sequences[$m];
if ( $ident1 =~ /$query[$l]/)
{
open (TEMP1, ">others") or die "I can't seem to open TEMP1\n";
$seq1 = $sequences[$m+1];
print TEMP1 $ident1;
print TEMP1 $seq1;
#print "carriage return?";
#chomp $seq1;
#chomp $ident1;
}
}
close TEMP1;
system "blast -p blastn -i seqOfInterest -j others -o outputfile -e $e -m8";
open (BLASTOUT, "outputfile") or die "I can't seem to open the blast outputfile\n";
#my @coolStuff = ;
#my $neato;
#my $indicator = 0;
#foreach $neato (@coolStuff)
#{
#if ($neato =~ /No hits found/)
#{
#$indicator = 1;
#print "I found a No hits found!\n";
# last;
#}
#}
if (eof(BLASTOUT))
{
print "We're good at $l and $j!\n";
print OUT "We're good at $l and $j!\n";
}
else
{
print "I found two sequences that align to each other.\n";
print OUT "I found two sequences that align to each other.\n";
open (TEMP2, ";
print OUT @blasters;
close TEMP2;
open (TEMP3, ";
print OUT @blasters1;
close TEMP3;
@blastResult = ;
print OUT @blastResult;
close BLASTOUT;
}
}
}
close OUT;