README file for countffrs.pl Usage: perl countffrs.pl -i -n -s -r -v -f -g -R To generate .list files (two-colum files where the first column lists the IDs of the sequences in the dataset and the second column tells the ID of the species to which each sequence was attributed; useful to calculate %oversplitting, %overlumping and %success): Simcoal: for i in `ls *fasta` ; do countffrs.pl -i $i -n 100 -s 1 -r 10 -g > $i.ffrs ; done for i in `ls *ffrs` ; do csplit -f $i $i 1 {10} ; done ; rm *00; rm *11 ; sed -i 's/;/\n/g' *ffrs[0-9]* for k in `ls *ffrs[0-9]*` ; do awk '{for (i=1; i<=NF; i++) {print $i"\t"NR; print $i"\t"NR}}' $k | sort -n > $k.list ; done DendroPy 3 species: for i in `ls *.fasta` ; do countffrs.pl -i $i -n 50 -s 3 -r 10 -g > $i.ffrs ; done for i in `ls *ffrs` ; do csplit -f $i $i 1 {10} ; done ; rm *00; rm *11 ; sed -i 's/;/\n/g' *ffrs[0-9]* for k in `ls *ffrs[0-9]*` ; do awk '{for (i=1; i<=NF; i++) {print $i"\t"NR; print $i"\t"NR}}' $k | sort -n > $k.list ; done Dendropy 6 species: for i in `ls *.fasta` ; do countffrs.pl -i $i -n 30 -s 6 -r 10 -g > $i.ffrs ; done for i in `ls *ffrs` ; do csplit -f $i $i 1 {10} ; done ; rm *00; rm *11 ; sed -i 's/;/\n/g' *ffrs[0-9]* for k in `ls *ffrs[0-9]*` ; do awk '{for (i=1; i<=NF; i++) {print $i"\t"NR; print $i"\t"NR}}' $k | sort -n > $k.list ; done