README file for countffrs.pl
Usage: perl countffrs.pl -i -n -s -r -v -f -g -R
To generate .list files (two-colum files where the first column lists the IDs of the sequences in the dataset and the second column tells the ID of the species to which each sequence was attributed; useful to calculate %oversplitting, %overlumping and %success):
Simcoal:
for i in `ls *fasta` ; do countffrs.pl -i $i -n 100 -s 1 -r 10 -g > $i.ffrs ; done
for i in `ls *ffrs` ; do csplit -f $i $i 1 {10} ; done ; rm *00; rm *11 ; sed -i 's/;/\n/g' *ffrs[0-9]*
for k in `ls *ffrs[0-9]*` ; do awk '{for (i=1; i<=NF; i++) {print $i"\t"NR; print $i"\t"NR}}' $k | sort -n > $k.list ; done
DendroPy 3 species:
for i in `ls *.fasta` ; do countffrs.pl -i $i -n 50 -s 3 -r 10 -g > $i.ffrs ; done
for i in `ls *ffrs` ; do csplit -f $i $i 1 {10} ; done ; rm *00; rm *11 ; sed -i 's/;/\n/g' *ffrs[0-9]*
for k in `ls *ffrs[0-9]*` ; do awk '{for (i=1; i<=NF; i++) {print $i"\t"NR; print $i"\t"NR}}' $k | sort -n > $k.list ; done
Dendropy 6 species:
for i in `ls *.fasta` ; do countffrs.pl -i $i -n 30 -s 6 -r 10 -g > $i.ffrs ; done
for i in `ls *ffrs` ; do csplit -f $i $i 1 {10} ; done ; rm *00; rm *11 ; sed -i 's/;/\n/g' *ffrs[0-9]*
for k in `ls *ffrs[0-9]*` ; do awk '{for (i=1; i<=NF; i++) {print $i"\t"NR; print $i"\t"NR}}' $k | sort -n > $k.list ; done