#by Paul Lewis, Ecology & Evolutionary Biology, UConn import re # import regular expression module import sys nreps = 100 nsubsets = 82 seqlen = 31467 paupf = open('paupexport.nex', 'w') paupf.write('#nexus\n\n') for rep in range(nreps): outfname = 'out%d.nex' % (rep+1,) exportedfname = 'noninterleaved%d.txt' % (rep+1,) paupf.write('begin paup;\n') paupf.write(' set warnreset=no;\n') paupf.write(' exe %s;\n' % outfname) paupf.write(' export file=%s format=text;\n' % exportedfname) paupf.write('end;\n\n') outf = open(outfname, 'w') outf.write('#nexus\n\n') outf.write('Begin data;\n') outf.write('Dimensions NTAX=60 NCHAR=%d;\n' % seqlen) outf.write('Format MISSING=? GAP=- DATATYPE=DNA INTERLEAVE;\n') outf.write('Matrix\n') for subset in range(nsubsets): print ' Working on rep %d, subset %d...' % (rep+1,subset+1) data = open('%d.txt' % (subset+1,), 'r').read(); mrep = re.search('Begin DATA;\s+\[Dataset %d\](.+?)END;' % (rep+1,), data, re.DOTALL | re.MULTILINE) if mrep: msub = re.search('Matrix(.+?);', mrep.group(1), re.DOTALL | re.MULTILINE) if msub: outf.write(msub.group(1)) outf.write('\n') outf.write(';\n'); outf.write('End;\n'); outf.close() paupf.close()