# Morphological (CHAR data below) ## morphological (concatenated - raxml) * build phylip file by hand * run raxml: ~/git/raxml/raxmlHPC-PTHREADS-SSE3 -m BINGAMMA -N 20 -p 659741671 -n best -s ostario_morph.phylip -o outgroup -T 12 ~/git/raxml/raxmlHPC-PTHREADS-SSE3 -m BINGAMMA -N autoMRE -p 659741671 -b 890834888 -n autoMRE -s ostario_morph.phylip -o outgroup -T 12 ~/git/raxml/raxmlHPC-SSE3 -m BINGAMMA -f b -t RAxML_bestTree.best -z RAxML_bootstrap.autoMRE -n final -o outgroup ## morphological (concatenated - mrbayes) * convert raxml phylip to nexus mkdir tmp cp ostario_morph.phylip tmp/ python ~/git/phyluce/bin/align/convert_one_align_to_another.py --alignments tmp --output tmp2 --input-format phylip-relaxed --output-format nexus cp tmp2 ./ rm -rf tmp tmp2 * edit by hand to add mrbayes block * run in supermike #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=1:00:00 #PBS -o mb_stdout #PBS -e mb_stderr #PBS -A hpc_bfstart01 #PBS -N mb2 cd /home/brant/work/prosanta/morphological mpirun -np 16 -machinefile $PBS_NODEFILE mb ostario_morph.nexus # Sequence * assemble with Trinity using kmer 2 * get coverage: python ~/git/phyluce/bin/assembly/get_trinity_coverage.py \ --assemblies assemblies \ --assemblo-config assembly.conf \ --subfolder split-adapter-quality-trimmed \ --cores 12 \ --bwa-mem * get assembly coverage: taxon,Total contigs (after trimming),Total contigs coverage (x),Total contigs mean length ameirus-natalis2,14678,12.5,266.3 apteronotus-albifrons,1330,30.8,583.3 arius-felis,3382,14.7,269.8 astynax-aeneus,1716,25.2,468.8 auchenoglanis-occidentalis,14533,11.8,281.7 brachyhypopomus-occidentalis3,15468,10.6,314.9 brycinus-macrolepidotus,2122,13.8,283.7 bryconaethiops-yseuxi,14664,11.7,285.7 chanos-chanos2,10796,11.4,244.8 chirocentrus-dorab2,7579,10.5,260.2 citharinus-gibbosus,2007,13.5,299.9 ctenolucius-beani,1381,38.4,501.6 cyrpinella-venusta,2074,29.4,506.3 distichodus-hypostomatus,6250,13.9,295.9 distichodus-maculatus,2214,11.9,262.0 dorosoma-pentense,2150,25.8,482.9 gonorynchus-sp2,7962,11.0,271.1 gymnotus-carapo,1055,25.8,542.4 gymnotus-cylindricus,58399,9.7,277.9 hepsetus-odoe2,36141,11.1,290.2 hoplias-microlepis,865,18.0,506.4 hyostomus-panamensis,1066,23.3,491.4 malapterurus-stiassnyae,833,19.3,561.3 moxostoma-poecilurum,1772,32.2,460.6 nannaethiops-unitaeniatus,10486,11.4,252.5 pangasius-cfpangasius2,19458,9.9,269.6 parakneria-abbreviata,1105,22.9,600.7 phraectolaemus-ansorgii2,9873,10.8,267.2 plotosus-lineatus,3042,47.9,508.0 puntiopiles-falcifer2,2932,10.6,286.9 sternopygus-macrurus,1176,26.8,569.3 synodontis-filamentosus,15509,10.6,288.2 thryssa-hamiltonii2,17864,11.0,239.6 xenocharax-crassus,8705,12.8,223.3 * move from scratch to ./assemblies (in /nfs/data1/working/pchakrabarty-fish/trinity-kmer2) * get match counts: python ~/git/phyluce/bin/assembly/match_contigs_to_probes.py \ --contigs assemblies/contigs \ --probes /nfs/data1/uce-probe-sets/uce-fish-1k-probe-set/fish-uce-1k-probes.fasta \ --output lastz \ --dupefile /nfs/data1/uce-probe-sets/uce-fish-1k-probe-set/fish-uce-1k-probes.toself.lastz \ --log-path log 2014-12-19 08:50:43,249 - match_contigs_to_probes - INFO - ameirus_natalis2: 537 (0.93%) uniques of 57842 contigs, 0 dupe probe matches, 97 UCE loci removed for matching multiple contigs, 69 contigs removed for matching multiple UCE loci 2014-12-19 08:50:44,918 - match_contigs_to_probes - INFO - amia_calva: 363 (31.87%) uniques of 1139 contigs, 0 dupe probe matches, 14 UCE loci removed for matching multiple contigs, 20 contigs removed for matching multiple UCE loci 2014-12-19 08:50:47,530 - match_contigs_to_probes - INFO - apteronotus_albifrons: 538 (37.99%) uniques of 1416 contigs, 0 dupe probe matches, 55 UCE loci removed for matching multiple contigs, 85 contigs removed for matching multiple UCE loci 2014-12-19 08:50:50,403 - match_contigs_to_probes - INFO - arius_felis: 432 (3.18%) uniques of 13592 contigs, 0 dupe probe matches, 22 UCE loci removed for matching multiple contigs, 46 contigs removed for matching multiple UCE loci 2014-12-19 08:50:52,887 - match_contigs_to_probes - INFO - astynax_aeneus: 537 (29.38%) uniques of 1828 contigs, 0 dupe probe matches, 43 UCE loci removed for matching multiple contigs, 71 contigs removed for matching multiple UCE loci 2014-12-19 08:50:57,940 - match_contigs_to_probes - INFO - auchenoglanis_occidentalis: 427 (1.62%) uniques of 26284 contigs, 0 dupe probe matches, 128 UCE loci removed for matching multiple contigs, 63 contigs removed for matching multiple UCE loci 2014-12-19 08:51:07,505 - match_contigs_to_probes - INFO - brachyhypopomus_occidentalis3: 518 (0.77%) uniques of 66895 contigs, 0 dupe probe matches, 129 UCE loci removed for matching multiple contigs, 71 contigs removed for matching multiple UCE loci 2014-12-19 08:51:10,595 - match_contigs_to_probes - INFO - brycinus_macrolepidotus: 503 (4.00%) uniques of 12564 contigs, 0 dupe probe matches, 29 UCE loci removed for matching multiple contigs, 57 contigs removed for matching multiple UCE loci 2014-12-19 08:51:15,451 - match_contigs_to_probes - INFO - bryconaethiops_yseuxi: 470 (1.68%) uniques of 27985 contigs, 0 dupe probe matches, 83 UCE loci removed for matching multiple contigs, 54 contigs removed for matching multiple UCE loci 2014-12-19 08:51:22,939 - match_contigs_to_probes - INFO - chanos_chanos2: 597 (1.27%) uniques of 47113 contigs, 0 dupe probe matches, 142 UCE loci removed for matching multiple contigs, 68 contigs removed for matching multiple UCE loci 2014-12-19 08:51:28,972 - match_contigs_to_probes - INFO - chirocentrus_dorab2: 550 (1.58%) uniques of 34728 contigs, 0 dupe probe matches, 112 UCE loci removed for matching multiple contigs, 73 contigs removed for matching multiple UCE loci 2014-12-19 08:51:32,069 - match_contigs_to_probes - INFO - citharinus_gibbosus: 536 (4.57%) uniques of 11727 contigs, 0 dupe probe matches, 32 UCE loci removed for matching multiple contigs, 62 contigs removed for matching multiple UCE loci 2014-12-19 08:51:34,721 - match_contigs_to_probes - INFO - ctenolucius_beani: 575 (38.90%) uniques of 1478 contigs, 0 dupe probe matches, 46 UCE loci removed for matching multiple contigs, 71 contigs removed for matching multiple UCE loci 2014-12-19 08:51:37,553 - match_contigs_to_probes - INFO - cyrpinella_venusta: 558 (25.10%) uniques of 2223 contigs, 0 dupe probe matches, 75 UCE loci removed for matching multiple contigs, 80 contigs removed for matching multiple UCE loci 2014-12-19 08:51:40,025 - match_contigs_to_probes - INFO - diaphus_theta: 383 (7.86%) uniques of 4872 contigs, 0 dupe probe matches, 43 UCE loci removed for matching multiple contigs, 38 contigs removed for matching multiple UCE loci 2014-12-19 08:51:46,283 - match_contigs_to_probes - INFO - distichodus_hypostomatus: 580 (1.41%) uniques of 41119 contigs, 0 dupe probe matches, 65 UCE loci removed for matching multiple contigs, 70 contigs removed for matching multiple UCE loci 2014-12-19 08:51:49,007 - match_contigs_to_probes - INFO - distichodus_maculatus: 479 (5.27%) uniques of 9096 contigs, 0 dupe probe matches, 41 UCE loci removed for matching multiple contigs, 52 contigs removed for matching multiple UCE loci 2014-12-19 08:51:51,945 - match_contigs_to_probes - INFO - dorosoma_pentense: 573 (24.03%) uniques of 2385 contigs, 0 dupe probe matches, 69 UCE loci removed for matching multiple contigs, 79 contigs removed for matching multiple UCE loci 2014-12-19 08:51:58,031 - match_contigs_to_probes - INFO - gonorynchus_sp2: 557 (1.48%) uniques of 37759 contigs, 0 dupe probe matches, 126 UCE loci removed for matching multiple contigs, 72 contigs removed for matching multiple UCE loci 2014-12-19 08:52:00,432 - match_contigs_to_probes - INFO - gymnotus_carapo: 532 (48.10%) uniques of 1106 contigs, 0 dupe probe matches, 36 UCE loci removed for matching multiple contigs, 71 contigs removed for matching multiple UCE loci 2014-12-19 08:52:28,637 - match_contigs_to_probes - INFO - gymnotus_cylindricus: 579 (0.25%) uniques of 230510 contigs, 0 dupe probe matches, 59 UCE loci removed for matching multiple contigs, 75 contigs removed for matching multiple UCE loci 2014-12-19 08:52:42,575 - match_contigs_to_probes - INFO - hepsetus_odoe2: 572 (0.55%) uniques of 104767 contigs, 0 dupe probe matches, 130 UCE loci removed for matching multiple contigs, 77 contigs removed for matching multiple UCE loci 2014-12-19 08:52:44,756 - match_contigs_to_probes - INFO - hoplias_microlepis: 511 (56.65%) uniques of 902 contigs, 0 dupe probe matches, 22 UCE loci removed for matching multiple contigs, 57 contigs removed for matching multiple UCE loci 2014-12-19 08:52:46,764 - match_contigs_to_probes - INFO - hyostomus_panamensis: 439 (37.46%) uniques of 1172 contigs, 0 dupe probe matches, 26 UCE loci removed for matching multiple contigs, 54 contigs removed for matching multiple UCE loci 2014-12-19 08:52:48,890 - match_contigs_to_probes - INFO - malapterurus_stiassnyae: 475 (53.13%) uniques of 894 contigs, 0 dupe probe matches, 19 UCE loci removed for matching multiple contigs, 62 contigs removed for matching multiple UCE loci 2014-12-19 08:52:51,770 - match_contigs_to_probes - INFO - moxostoma_poecilurum: 358 (18.82%) uniques of 1902 contigs, 0 dupe probe matches, 174 UCE loci removed for matching multiple contigs, 86 contigs removed for matching multiple UCE loci 2014-12-19 08:53:00,715 - match_contigs_to_probes - INFO - nannaethiops_unitaeniatus: 603 (0.94%) uniques of 64020 contigs, 0 dupe probe matches, 91 UCE loci removed for matching multiple contigs, 75 contigs removed for matching multiple UCE loci 2014-12-19 08:53:09,892 - match_contigs_to_probes - INFO - pangasius_cfpangasius2: 481 (0.79%) uniques of 60695 contigs, 0 dupe probe matches, 200 UCE loci removed for matching multiple contigs, 86 contigs removed for matching multiple UCE loci 2014-12-19 08:53:12,918 - match_contigs_to_probes - INFO - parakneria_abbreviata: 586 (49.70%) uniques of 1179 contigs, 0 dupe probe matches, 111 UCE loci removed for matching multiple contigs, 83 contigs removed for matching multiple UCE loci 2014-12-19 08:53:20,253 - match_contigs_to_probes - INFO - phraectolaemus_ansorgii2: 522 (1.05%) uniques of 49797 contigs, 0 dupe probe matches, 120 UCE loci removed for matching multiple contigs, 70 contigs removed for matching multiple UCE loci 2014-12-19 08:53:21,480 - match_contigs_to_probes - INFO - plotosus_lineatus: 122 (3.74%) uniques of 3262 contigs, 0 dupe probe matches, 1 UCE loci removed for matching multiple contigs, 22 contigs removed for matching multiple UCE loci 2014-12-19 08:53:25,800 - match_contigs_to_probes - INFO - puntiopiles_falcifer2: 454 (3.20%) uniques of 14173 contigs, 0 dupe probe matches, 245 UCE loci removed for matching multiple contigs, 94 contigs removed for matching multiple UCE loci 2014-12-19 08:53:28,421 - match_contigs_to_probes - INFO - sternopygus_macrurus: 551 (43.63%) uniques of 1263 contigs, 0 dupe probe matches, 53 UCE loci removed for matching multiple contigs, 77 contigs removed for matching multiple UCE loci * go ahead and get locus matches, danRer7*; dropping amia-calva, lepocu, umbra-limi, diaphus-theta, gadMor1, and orylat2: python ~/git/phyluce/bin/assembly/get_match_counts.py \ --locus-db lastz/probe.matches.sqlite \ --taxon-list-config taxon-set.conf \ --taxon-group 'closest-root' \ --incomplete-matrix \ --output incomplete-matrix/prosanta-INCOMPLETE-matrix.conf \ --extend /nfs/data1/uce-probe-sets/uce-fish-1k-probe-set/outgroup-loci/probe.matches.sqlite \ --log-path log 2014-12-19 09:04:11,056 - get_match_counts - INFO - There are 35 taxa in the taxon-group '[closest-root]' in the config file taxon-set.conf 2014-12-19 09:04:11,056 - get_match_counts - INFO - Getting UCE names from database 2014-12-19 09:04:11,070 - get_match_counts - INFO - There are 1314 total UCE loci in the database 2014-12-19 09:04:11,465 - get_match_counts - INFO - Getting UCE matches by organism to generate a INCOMPLETE matrix 2014-12-19 09:04:11,468 - get_match_counts - INFO - There are 1102 UCE loci in an INCOMPLETE matrix 2014-12-19 09:04:11,468 - get_match_counts - INFO - Writing the taxa and loci in the data matrix to /nfs/data1/working/pchakrabarty-fish/trinity-kmer2/incomplete-matrix/prosanta-INCOMPLETE-matrix.conf * get fastas:: python ~/git/phyluce/bin/assembly/get_fastas_from_match_counts.py \ --contigs ../assemblies/contigs \ --locus-db ../lastz/probe.matches.sqlite \ --match-count-output prosanta-INCOMPLETE-matrix.conf \ --output prosanta-INCOMPLETE-matrix.fasta \ --incomplete-matrix prosanta-INCOMPLETE-matrix.notstrict \ --extend-locus-db /nfs/data1/uce-probe-sets/uce-fish-1k-probe-set/outgroup-loci/probe.matches.sqlite \ --extend-locus-contigs /nfs/data1/uce-probe-sets/uce-fish-1k-probe-set/outgroup-fasta \ --log-path log * get UCE coverage: python ~/git/phyluce/bin/assembly/get_trinity_coverage_for_uce_loci.py \ --assemblies ../assemblies \ --match-count-output prosanta-INCOMPLETE-matrix.conf \ --type untrimmed \ --locus-db ../lastz/probe.matches.sqlite \ --output uce-coverage-info \ --log-path log \ --exclude umbra-limi amia-calva diaphus-theta * output coverage info: python ~/git/phyluce/bin/assembly/parse_trinity_coverage_for_uce_loci_log.py \ --log log/get_trinity_coverage_for_uce_loci.log taxon,UCE contigs,UCE contigs mean length,UCE contigs coverage (x),UCE contigs reads on target,UCE contigs unique reads aligned, ameirus-natalis2,537,556.5,31.6,11.4%,57.5% apteronotus-albifrons,538,780.6,42.4,62.3%,57.9% arius-felis,432,351.2,6.9,4.7%,59.4% astynax-aeneus,537,528.1,18.0,32.8%,22.4% auchenoglanis-occidentalis,427,484.1,21.2,7.9%,60.5% brachyhypopomus-occidentalis3,518,569.7,27.5,9.8%,48.4% brycinus-macrolepidotus,503,420.0,8.9,13.6%,44.1% bryconaethiops-yseuxi,470,461.5,17.4,6.4%,60.8% chanos-chanos2,597,468.6,30.1,14.2%,52.9% chirocentrus-dorab2,550,470.0,19.8,12.6%,50.3% citharinus-gibbosus,536,455.3,9.5,17.5%,44.8% ctenolucius-beani,575,667.1,52.0,65.3%,49.5% cyrpinella-venusta,558,639.3,21.3,33.1%,28.2% distichodus-hypostomatus,580,672.9,18.9,15.2%,51.4% distichodus-maculatus,479,385.0,9.6,15.9%,42.0% dorosoma-pentense,573,620.1,35.8,44.3%,26.6% gonorynchus-sp2,557,486.2,19.4,11.9%,46.5% gymnotus-carapo,532,665.3,30.6,66.3%,51.7% gymnotus-cylindricus,502,790.7,66.4,3.8%,33.0% hepsetus-odoe2,572,575.0,40.5,7.8%,61.9% hoplias-microlepis,511,581.0,18.5,68.5%,51.0% hyostomus-panamensis,439,518.8,13.9,30.4%,20.6% malapterurus-stiassnyae,475,656.6,20.3,66.5%,55.1% moxostoma-poecilurum,358,406.9,15.1,9.8%,20.9% nannaethiops-unitaeniatus,603,514.3,24.3,11.8%,55.6% pangasius-cfpangasius2,481,527.4,28.6,9.0%,55.5% parakneria-abbreviata,586,664.4,24.5,59.9%,60.5% phraectolaemus-ansorgii2,522,532.7,22.3,11.1%,51.1% plotosus-lineatus,122,271.6,7.1,0.4%,37.4% puntiopiles-falcifer2,454,444.6,11.7,15.2%,48.7% sternopygus-macrurus,551,717.9,33.1,64.4%,53.5% synodontis-filamentosus,475,516.1,18.3,7.4%,68.7% thryssa-hamiltonii2,573,386.6,17.9,5.8%,53.5% xenocharax-crassus,542,437.2,21.6,10.4%,51.3% * explode fasta files: python ~/git/phyluce/bin/assembly/explode_get_fastas_file.py \ --input prosanta-INCOMPLETE-matrix.fasta \ --output-dir exploded-fastas \ --by-taxon for i in exploded-fastas/*; do python ~/git/phyluce/bin/assembly/get_fasta_lengths.py $i --csv; done ameirus-natalis2.unaligned.fasta,537,298836,556.491620112,8.08039667542,201,1128,550.0,8 apteronotus-albifrons.unaligned.fasta,538,419987,780.644981413,13.2237296787,202,1553,770.5,151 arius-felis.unaligned.fasta,432,151722,351.208333333,5.0015021214,201,879,336.5,0 astynax-aeneus.unaligned.fasta,537,283587,528.094972067,8.22656008788,208,1184,514.0,5 auchenoglanis-occidentalis.unaligned.fasta,427,206724,484.131147541,8.11295633901,202,1278,466.0,2 brachyhypopomus-occidentalis3.unaligned.fasta,518,295098,569.687258687,8.53092924768,206,1164,575.0,9 brycinus-macrolepidotus.unaligned.fasta,503,211273,420.02584493,6.53603093471,203,915,398.0,0 bryconaethiops-yseuxi.unaligned.fasta,470,216910,461.510638298,8.04346927797,201,1058,423.5,1 chanos-chanos2.unaligned.fasta,597,279773,468.631490787,6.33751443869,202,976,463.0,0 chirocentrus-dorab2.unaligned.fasta,550,258516,470.029090909,7.30741786149,203,1126,444.0,2 citharinus-gibbosus.unaligned.fasta,536,244054,455.324626866,6.88669977728,204,1036,442.0,2 ctenolucius-beani.unaligned.fasta,575,383584,667.102608696,9.56106597438,206,1237,684.0,34 cyrpinella-venusta.unaligned.fasta,558,356710,639.265232975,9.81844984282,202,1263,659.5,27 danRer7.unaligned.fasta,640,2447474,3824.178125,17.4872586423,1625,4200,3998.0,640 distichodus-hypostomatus.unaligned.fasta,580,390302,672.934482759,9.89320038931,206,1369,676.0,46 distichodus-maculatus.unaligned.fasta,479,184421,385.012526096,5.52221663382,201,852,366.0,0 dorosoma-pentense.unaligned.fasta,573,355323,620.109947644,10.7623845749,204,1496,582.0,54 gonorynchus-sp2.unaligned.fasta,557,270808,486.190305206,6.67301179433,204,968,471.0,0 gymnotus-carapo.unaligned.fasta,532,353955,665.328947368,10.7105583773,203,1325,661.5,48 gymnotus-cylindricus.unaligned.fasta,579,465122,803.319516408,13.867928841,201,1726,775.0,165 hepsetus-odoe2.unaligned.fasta,572,328899,574.998251748,7.89631654585,202,1076,568.5,8 hoplias-microlepis.unaligned.fasta,511,296892,581.001956947,10.2448540941,201,1274,556.0,24 hyostomus-panamensis.unaligned.fasta,439,227744,518.77904328,9.17448377923,201,1085,505.0,3 malapterurus-stiassnyae.unaligned.fasta,475,311867,656.562105263,12.1350372519,201,1450,656.0,52 moxostoma-poecilurum.unaligned.fasta,358,145687,406.946927374,7.46124885718,204,831,377.5,0 nannaethiops-unitaeniatus.unaligned.fasta,603,310094,514.252072968,7.58015113799,201,1078,511.0,1 pangasius-cfpangasius2.unaligned.fasta,481,253702,527.446985447,8.5952620397,202,1129,526.0,5 parakneria-abbreviata.unaligned.fasta,586,389347,664.414675768,11.8470336981,201,1434,692.5,82 phraectolaemus-ansorgii2.unaligned.fasta,522,278059,532.680076628,7.18863156742,205,1186,534.5,4 plotosus-lineatus.unaligned.fasta,122,33136,271.606557377,5.37199779415,201,503,252.0,0 puntiopiles-falcifer2.unaligned.fasta,454,201869,444.645374449,6.57432519235,201,900,429.0,0 sternopygus-macrurus.unaligned.fasta,551,395580,717.931034483,11.5453950605,209,1440,715.0,88 synodontis-filamentosus.unaligned.fasta,475,245144,516.092631579,8.96595478523,204,1047,485.0,4 thryssa-hamiltonii2.unaligned.fasta,573,221533,386.619546248,5.87123415381,201,919,361.0,0 xenocharax-crassus.unaligned.fasta,542,236979,437.230627306,5.89360707892,201,838,432.5,0 * explode fasta files for sate: python ~/git/phyluce/bin/assembly/explode_get_fastas_file.py \ --input prosanta-INCOMPLETE-matrix.fasta \ --output-dir exploded-fastas-by-locus * get list of loci w/ fewer than 3 taxa, alignments which will fail in SATé: python /nfs/data1/tmp/output_list_of_taxon_counts.py \ --fastas exploded-fastas-by-locus > exclude.txt * filter those loci based on the exclude list: mkdir fastas-for-sate cd exploded-fastas-by-locus # copy only alignments not in exclude file find . -type f | grep -v -f ../exclude.txt | xargs cp -t ../exploded-fastas-for-sate * zip those up, copy to zcluster tar -czvf exploded-fastas-for-sate.tar.gz exploded-fastas-for-sate * run on zcluster: * copy zcluster alignments to their own folder: for i in sate-alignments/*; do cp $i/*.marker001.*.unaligned.aln sate-alignment-fastas/; done * some of these have zero file sizes for the alignment files (< 3 taxa), locate those * move those to "failures" find . -type d | grep -f failures.txt | xargs mv -t failures/ * that grepped some things that didnt fit, so move those back: cd failures mv uce-1013 uce-1015 uce-1016 uce-1017 uce-1018 uce-1110 uce-1112 uce-1113 uce-1115 uce-1117 uce-1118 uce-1119 uce-730 uce-731 uce-734 uce-736 uce-737 uce-739 uce-761 uce-762 uce-765 uce-766 uce-767 uce-768 uce-769 ../ * now, copy actual alignments to "alignments" for i in *; do cp $i/*.marker001.*.unaligned.aln alignments; done tar -cvf alignments.tar.gz alignemnts * download those to local. rename to sate-alignments. zip up all files on zcluster for archiving. * get archived files (sate-alignments.tar.gz) * rename files in sate-alignments folder autoload -U zmv zmv '(*).marker001.*.unaligned.aln' '$1.fasta' $ ls | wc -l 844 * trim with gblocks: python ~/git/phyluce/bin/align/get_gblocks_trimmed_alignments_from_untrimmed.py \ --alignments sate-alignments \ --output sate-alignments-gblocks \ --b1 0.5 \ --b4 8 \ --cores 12 \ --log log * strip locus names: python ~/git/phyluce/bin/align/remove_locus_name_from_nexus_lines.py \ --alignments sate-alignments-gblocks \ --output sate-alignments-gblocks-clean \ --cores 12 \ --log-path log * get stats: python ~/git/phyluce/bin/align/get_align_summary_data.py \ --alignments sate-alignments-gblocks-clean \ --cores 12 \ --log-path log 2014-12-19 12:01:21,129 - get_align_summary_data - INFO - ----------------------- Alignment summary ----------------------- 2014-12-19 12:01:21,129 - get_align_summary_data - INFO - [Alignments] loci: 844 2014-12-19 12:01:21,130 - get_align_summary_data - INFO - [Alignments] length: 212,546 2014-12-19 12:01:21,130 - get_align_summary_data - INFO - [Alignments] mean: 251.83 2014-12-19 12:01:21,130 - get_align_summary_data - INFO - [Alignments] 95% CI: 5.05 2014-12-19 12:01:21,130 - get_align_summary_data - INFO - [Alignments] min: 103 2014-12-19 12:01:21,130 - get_align_summary_data - INFO - [Alignments] max: 567 2014-12-19 12:01:21,131 - get_align_summary_data - INFO - ------------------------- Taxon summary ------------------------- 2014-12-19 12:01:21,131 - get_align_summary_data - INFO - [Taxa] mean: 20.77 2014-12-19 12:01:21,132 - get_align_summary_data - INFO - [Taxa] 95% CI: 0.61 2014-12-19 12:01:21,132 - get_align_summary_data - INFO - [Taxa] min: 4 2014-12-19 12:01:21,132 - get_align_summary_data - INFO - [Taxa] max: 35 2014-12-19 12:01:21,132 - get_align_summary_data - INFO - ----------------- Missing data from trim summary ---------------- 2014-12-19 12:01:21,133 - get_align_summary_data - INFO - [Missing] mean: 0.00 2014-12-19 12:01:21,133 - get_align_summary_data - INFO - [Missing] 95% CI: 0.00 2014-12-19 12:01:21,133 - get_align_summary_data - INFO - [Missing] min: 0.00 2014-12-19 12:01:21,133 - get_align_summary_data - INFO - [Missing] max: 0.00 2014-12-19 12:01:21,143 - get_align_summary_data - INFO - -------------------- Character count summary -------------------- 2014-12-19 12:01:21,143 - get_align_summary_data - INFO - [All characters] 4,495,676 2014-12-19 12:01:21,143 - get_align_summary_data - INFO - [Nucleotides] 4,354,573 2014-12-19 12:01:21,145 - get_align_summary_data - INFO - ---------------- Data matrix completeness summary --------------- 2014-12-19 12:01:21,145 - get_align_summary_data - INFO - [Matrix 50%] 567 alignments 2014-12-19 12:01:21,145 - get_align_summary_data - INFO - [Matrix 55%] 530 alignments 2014-12-19 12:01:21,145 - get_align_summary_data - INFO - [Matrix 60%] 487 alignments 2014-12-19 12:01:21,145 - get_align_summary_data - INFO - [Matrix 65%] 464 alignments 2014-12-19 12:01:21,145 - get_align_summary_data - INFO - [Matrix 70%] 412 alignments 2014-12-19 12:01:21,146 - get_align_summary_data - INFO - [Matrix 75%] 353 alignments 2014-12-19 12:01:21,146 - get_align_summary_data - INFO - [Matrix 80%] 278 alignments 2014-12-19 12:01:21,146 - get_align_summary_data - INFO - [Matrix 85%] 222 alignments 2014-12-19 12:01:21,146 - get_align_summary_data - INFO - [Matrix 90%] 99 alignments 2014-12-19 12:01:21,146 - get_align_summary_data - INFO - [Matrix 95%] 19 alignments 2014-12-19 12:01:21,146 - get_align_summary_data - INFO - ------------------------ Character counts ----------------------- 2014-12-19 12:01:21,146 - get_align_summary_data - INFO - [Characters] '-' is present 141,103 times 2014-12-19 12:01:21,147 - get_align_summary_data - INFO - [Characters] 'A' is present 1,189,974 times 2014-12-19 12:01:21,147 - get_align_summary_data - INFO - [Characters] 'C' is present 986,931 times 2014-12-19 12:01:21,147 - get_align_summary_data - INFO - [Characters] 'G' is present 972,763 times 2014-12-19 12:01:21,147 - get_align_summary_data - INFO - [Characters] 'T' is present 1,204,905 times 2014-12-19 12:01:21,147 - get_align_summary_data - INFO - ================ Completed get_align_summary_data =============== ## 75 % complete matrix * copy alignments (95% complete):: python ~/git/phyluce/bin/align/get_only_loci_with_min_taxa.py \ --alignments sate-alignments-gblocks-clean \ --taxa 35 \ --output sate-gblocks-clean-75p-complete \ --percent 0.75 \ --cores 12 \ --log log 2014-12-19 12:11:40,957 - get_only_loci_with_min_taxa - INFO - Copied 353 alignments of 844 total containing ≥ 0.75 proportion of taxa (n = 26) - get summary stats before adding missing data characters:: python ~/git/phyluce/bin/align/get_align_summary_data.py \ --alignments sate-gblocks-clean-75p-complete \ --cores 12 \ --log-path log 2014-12-19 12:12:05,327 - get_align_summary_data - INFO - ----------------------- Alignment summary ----------------------- 2014-12-19 12:12:05,327 - get_align_summary_data - INFO - [Alignments] loci: 353 2014-12-19 12:12:05,327 - get_align_summary_data - INFO - [Alignments] length: 95,274 2014-12-19 12:12:05,328 - get_align_summary_data - INFO - [Alignments] mean: 269.90 2014-12-19 12:12:05,328 - get_align_summary_data - INFO - [Alignments] 95% CI: 8.01 2014-12-19 12:12:05,328 - get_align_summary_data - INFO - [Alignments] min: 116 2014-12-19 12:12:05,328 - get_align_summary_data - INFO - [Alignments] max: 563 2014-12-19 12:12:05,329 - get_align_summary_data - INFO - ------------------------- Taxon summary ------------------------- 2014-12-19 12:12:05,329 - get_align_summary_data - INFO - [Taxa] mean: 29.28 2014-12-19 12:12:05,329 - get_align_summary_data - INFO - [Taxa] 95% CI: 0.21 2014-12-19 12:12:05,329 - get_align_summary_data - INFO - [Taxa] min: 26 2014-12-19 12:12:05,329 - get_align_summary_data - INFO - [Taxa] max: 35 2014-12-19 12:12:05,330 - get_align_summary_data - INFO - ----------------- Missing data from trim summary ---------------- 2014-12-19 12:12:05,330 - get_align_summary_data - INFO - [Missing] mean: 0.00 2014-12-19 12:12:05,330 - get_align_summary_data - INFO - [Missing] 95% CI: 0.00 2014-12-19 12:12:05,330 - get_align_summary_data - INFO - [Missing] min: 0.00 2014-12-19 12:12:05,330 - get_align_summary_data - INFO - [Missing] max: 0.00 2014-12-19 12:12:05,335 - get_align_summary_data - INFO - -------------------- Character count summary -------------------- 2014-12-19 12:12:05,335 - get_align_summary_data - INFO - [All characters] 2,802,974 2014-12-19 12:12:05,335 - get_align_summary_data - INFO - [Nucleotides] 2,723,672 2014-12-19 12:12:05,336 - get_align_summary_data - INFO - ---------------- Data matrix completeness summary --------------- 2014-12-19 12:12:05,336 - get_align_summary_data - INFO - [Matrix 50%] 353 alignments 2014-12-19 12:12:05,336 - get_align_summary_data - INFO - [Matrix 55%] 353 alignments 2014-12-19 12:12:05,336 - get_align_summary_data - INFO - [Matrix 60%] 353 alignments 2014-12-19 12:12:05,336 - get_align_summary_data - INFO - [Matrix 65%] 353 alignments 2014-12-19 12:12:05,336 - get_align_summary_data - INFO - [Matrix 70%] 353 alignments 2014-12-19 12:12:05,336 - get_align_summary_data - INFO - [Matrix 75%] 353 alignments 2014-12-19 12:12:05,337 - get_align_summary_data - INFO - [Matrix 80%] 278 alignments 2014-12-19 12:12:05,337 - get_align_summary_data - INFO - [Matrix 85%] 222 alignments 2014-12-19 12:12:05,337 - get_align_summary_data - INFO - [Matrix 90%] 99 alignments 2014-12-19 12:12:05,337 - get_align_summary_data - INFO - [Matrix 95%] 19 alignments 2014-12-19 12:12:05,337 - get_align_summary_data - INFO - ------------------------ Character counts ----------------------- 2014-12-19 12:12:05,337 - get_align_summary_data - INFO - [Characters] '-' is present 79,302 times 2014-12-19 12:12:05,337 - get_align_summary_data - INFO - [Characters] 'A' is present 754,549 times 2014-12-19 12:12:05,338 - get_align_summary_data - INFO - [Characters] 'C' is present 609,070 times 2014-12-19 12:12:05,338 - get_align_summary_data - INFO - [Characters] 'G' is present 591,608 times 2014-12-19 12:12:05,338 - get_align_summary_data - INFO - [Characters] 'T' is present 768,445 times 2014-12-19 12:12:05,338 - get_align_summary_data - INFO - ================ Completed get_align_summary_data =============== * prep raxml file:: python ~/git/phyluce/bin/align/format_nexus_files_for_raxml.py \ --alignments sate-gblocks-clean-75p-complete \ --output sate-gblocks-clean-75p-complete-raxml \ --log-path log --charsets ### partitioning * prep partition finder config file by hand. use hcluster method and raxml, only, due to size. Then: python ~/bin/PartitionFinderV1.1.1/PartitionFinder.py --raxml --weights '1, 1, 1, 1' -p 12 partition_finder.cfg ### partitioned (raxml) * copy best_scheme raxml partitioned to part.txt * upload to supermike * best tree search: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o raxml_stdout #PBS -e raxml_stderr #PBS -A hpc_bfstart01 #PBS -N raxml-best cd /home/brant/work/prosanta/raxml-uce raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N 20 -p 680965947 -n best -q part.txt -s sate-gblocks-clean-75p-complete.phylip -o thryssa_hamiltonii2 -T 16 * bootreps: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o raxml-boot_stdout #PBS -e raxml-boot_stderr #PBS -A hpc_bfstart01 #PBS -N raxml-boot cd /home/brant/work/prosanta/raxml-uce raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N autoMRE -p 680965947 -b 969871906 -n bootrep -q part.txt -s sate-gblocks-clean-75p-complete.phylip -o thryssa_hamiltonii2 -T 16 * build consensus tree ~/git/raxml/raxmlHPC-SSE3 -m GTRGAMMA -f b -t RAxML_bestTree.best -z RAxML_bootstrap.bootrep -n final -o thryssa_hamiltonii2 ### unpartitioned (raxml) * best tree search: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o raxml_stdout #PBS -e raxml_stderr #PBS -A hpc_bfstart01 #PBS -N raxml-best-nopart cd /home/brant/work/prosanta/raxml-uce-no-part raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N 20 -p 785979080 -n best -s sate-gblocks-clean-75p-complete.phylip -o thryssa_hamiltonii2 -T 16 * bootreps: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o raxml-boot_stdout #PBS -e raxml-boot_stderr #PBS -A hpc_bfstart01 #PBS -N raxml-boot-no-part cd /home/brant/work/prosanta/raxml-uce-no-part raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N autoMRE -p 785979080 -b 611726682 -n bootrep -s sate-gblocks-clean-75p-complete.phylip -o thryssa_hamiltonii2 -T 16 * build consensus tree ~/git/raxml/raxmlHPC-SSE3 -m GTRGAMMA -f b -t RAxML_bestTree.best -z RAxML_bootstrap.bootrep -n final -o thryssa_hamiltonii2 ### partitioned (mrbayes) * convert partitioned file from phylip to nexus: python ~/git/phyluce/bin/align/convert_one_align_to_another.py \ --alignments sate-gblocks-clean-75p-complete-raxml \ --output sate-gblocks-clean-75p-complete-nexus \ --input-format phylip-relaxed --output-format nexus * add mrbayes block * setup queue file #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o mb_stdout #PBS -e mb_stderr #PBS -A hpc_bfstart01 #PBS -N mb cd /home/brant/work/prosanta/mrbayes-uce mpirun -np 16 -machinefile $PBS_NODEFILE mb sate-gblocks-clean-75p-complete.nexus ### with CHAR data * merge in character data by hand ### with CHAR data partitioned (raxml) * build part.txt using previous partitioned and also adding in BIN: DNA, p1 = 1-357, 1580-1866, 1867-2141, 2554-2745, 3675-3888, 4389-4579, 5149-5420, 6852-7235, 7988-8209, 8455-8797, 9186-9509, 10371-10581, 15998-16208, 16209-16327, 17297-17619, 18341-18629, 19489-19930, 21950-22212, 23835-24008, 24009-24385, 24386-24815, 25101-25359, 26866-27187, 28816-29038, 30183-30437, 33402-33681, 34038-34459, 37793-37982, 39036-39414, 40611-40913, 41224-41583, 42860-43303, 47007-47477, 47963-48282, 48486-48707, 48897-49326, 51171-51419, 51718-52013, 52014-52375, 52565-52897, 53674-54081, 54082-54341, 54342-54762, 56547-56831, 57529-57802, 57803-58155, 65521-65855, 67123-67438, 70608-70980, 71712-71900, 73732-73995, 74442-74635, 75889-76152, 77048-77297, 77949-78219, 78677-78922, 81290-81520, 81708-81983, 88910-89117, 89118-89351, 89864-90055, 90666-90876, 90877-91142, 91908-92191, 93625-93943, 94914-95242, 95537-95733, 95734-96144, 97009-97373, 97596-97846, 98176-98458, 98459-98726, 98727-99134, 101791-101992, 102659-102844, 103361-103571, 103572-103828, 104794-105164, 110111-110366, 113306-113703, 114199-114602, 117332-117546, 120174-120319, 121492-121641, 124504-124747, 124748-124892, 125124-125411, 125412-125660, 127388-127682, 127683-127949, 127950-128175, 128362-128813, 128814-128978, 128979-129232, 132253-132476, 132477-132613, 132820-133002, 133356-133762, 136247-136566, 137756-138163, 140142-140511, 141031-141320, 143456-143664, 143665-144111, 144447-144645 DNA, p2 = 358-542 DNA, p3 = 543-836, 62904-63138, 84174-84358 DNA, p4 = 837-1031, 29689-29938, 34743-34991, 62627-62903, 108187-108411, 129530-129645, 145121-145442 DNA, p5 = 1032-1315, 1316-1579, 2142-2553, 2887-3200, 3889-4137, 4580-4809, 4810-5148, 5421-5685, 5909-6252, 7522-7720, 7721-7987, 8210-8454, 8978-9185, 9510-9830, 10021-10370, 10582-10751, 10904-11237, 11493-11726, 11727-11959, 11960-12219, 12962-13225, 13751-14091, 15294-15633, 16610-16789, 17124-17296, 17620-17910, 18813-19071, 19231-19488, 20360-20647, 21055-21291, 21292-21428, 21429-21669, 22538-22795, 22796-23035, 23233-23581, 23582-23834, 24816-25100, 25749-26188, 26526-26865, 27599-27780, 27781-28016, 29192-29536, 29939-30182, 31252-31525, 31526-31781, 31782-31972, 31973-32200, 32201-32441, 32442-32681, 35883-36240, 36470-36758, 37205-37437, 37587-37792, 37983-38228, 38819-39035, 39415-39706, 39707-40019, 41584-41796, 41998-42353, 42354-42670, 42671-42859, 43770-43984, 43985-44361, 46206-46540, 47478-47672, 48708-48896, 49327-49598, 49900-50238, 50239-50520, 50709-50892, 50893-51170, 52376-52564, 52898-53153, 53490-53673, 54763-55052, 55822-56006, 56832-57002, 57319-57528, 58156-58520, 60215-60616, 60617-60891, 61413-61613, 61866-62088, 62373-62626, 63566-63751, 63929-64129, 64130-64396, 64632-64825, 64962-65169, 66721-67122, 67439-67727, 67728-68018, 68019-68308, 68309-68603, 68604-68986, 68987-69327, 69766-70079, 70080-70377, 70378-70607, 71187-71474, 71475-71711, 71901-72179, 73013-73356, 73357-73731, 73996-74193, 74194-74441, 74636-74914, 74915-75199, 75200-75337, 75495-75672, 76153-76423, 76424-76821, 77605-77948, 78923-79406, 81521-81707, 81984-82200, 82351-82658, 83192-83464, 83961-84173, 84359-84588, 85135-85333, 85587-85823, 85824-86242, 86243-86585, 86843-87127, 88199-88415, 90056-90314, 90315-90665, 91738-91907, 92403-92828, 92829-93178, 94307-94578, 96339-96530, 96531-96730, 97374-97595, 97847-98175, 99306-99629, 99874-100104, 100105-100359, 100814-101021, 101022-101329, 101330-101546, 101993-102234, 102480-102658, 102987-103360, 104470-104793, 105165-105370, 105371-105623, 106456-106600, 107128-107475, 107476-107665, 109259-109410, 109778-110110, 110367-110649, 111083-111369, 111896-112263, 112264-112488, 113943-114198, 114818-115004, 115334-115537, 115538-115770, 115980-116302, 116624-116984, 118555-118797, 119099-119284, 119285-119542, 119894-120173, 120320-120584, 120735-121002, 121003-121198, 121642-121947, 122768-122992, 123746-124045, 126588-126871, 126872-127080, 130052-130384, 130651-130843, 130844-131275, 131470-131716, 133003-133355, 133763-133870, 133871-134315, 134316-134526, 134527-134710, 135391-135773, 136002-136246, 136567-136894, 137444-137755, 138164-138309, 138310-138665, 139028-139223, 139224-139486, 139487-139771, 139772-139976, 140512-140840, 141321-141564, 141565-141777, 141778-141985, 142201-142375, 142847-143087, 144112-144446, 145706-145897 DNA, p6 = 2746-2886, 3423-3674, 15165-15293, 15634-15997, 16328-16609, 34460-34742, 36241-36469, 38229-38531, 45296-45561, 47673-47962, 57003-57318, 63334-63565, 64397-64631, 66531-66720, 78472-78676, 80051-80226, 87641-87875, 94116-94306, 117768-118038, 119543-119893, 124046-124178, 127081-127387, 129233-129529, 137151-137443, 144646-144870 DNA, p7 = 3201-3422, 20233-20359, 28017-28250, 70981-71186, 105933-106129, 121948-122179 DNA, p8 = 4138-4388, 17911-18136, 123594-123745 DNA, p9 = 5686-5908, 91341-91737 DNA, p10 = 6253-6516 DNA, p11 = 6517-6851, 29537-29688, 45562-45753, 63752-63928, 83465-83638, 93944-94115, 117547-117767, 123293-123395, 134711-134889 DNA, p12 = 7236-7521, 56007-56274, 64826-64961 DNA, p13 = 8798-8977, 14509-14793, 18137-18340, 29039-29191, 43465-43625, 46541-46798, 46799-47006, 53154-53489, 55425-55821, 59757-60214, 72180-72414, 79782-80050, 80227-80489, 82823-83061, 91143-91340, 93363-93624, 108953-109108, 109411-109777, 110650-110880, 113704-113942, 118798-119098, 122180-122338, 124179-124503, 128176-128361, 132614-132819, 140841-141030 DNA, p14 = 9831-10020, 14794-14970, 35243-35437, 44564-44747, 92192-92402 DNA, p15 = 10752-10903, 18630-18812, 123396-123593 DNA, p16 = 11238-11492, 14092-14325, 16790-17123, 25360-25608, 34992-35242, 36978-37204, 48283-48485, 96145-96338, 104064-104469, 105624-105932, 111370-111895, 131276-131469, 131717-131978, 135774-136001 DNA, p17 = 12220-12441, 45754-45975, 66180-66382, 72844-73012, 79407-79575, 80680-80955, 86586-86842, 87419-87640, 99135-99305, 113004-113305, 124893-125123, 143088-143237, 144871-145120 DNA, p18 = 12442-12704, 13226-13478, 44748-44897, 82659-82822, 118283-118554, 129856-130051, 134890-135082, 136895-137150, 142658-142846 DNA, p19 = 12705-12961, 23036-23232 DNA, p20 = 13479-13750, 75338-75494, 108694-108952, 142376-142657 DNA, p21 = 14326-14508 DNA, p22 = 14971-15164, 59194-59756 DNA, p23 = 19072-19230, 116985-117154 DNA, p24 = 19931-20232, 28251-28415, 33682-34037, 44898-45074, 45976-46205, 63139-63333, 72415-72620, 122339-122552, 122993-123292 DNA, p25 = 20648-20842, 33113-33401, 44362-44563, 56275-56546, 58521-58681, 60892-61203, 69543-69765, 83639-83960, 84841-85134, 101547-101790, 102235-102479, 112489-112819 DNA, p26 = 20843-21054 DNA, p27 = 21670-21949, 43304-43464 DNA, p28 = 22213-22537 DNA, p29 = 25609-25748 DNA, p30 = 26189-26324 DNA, p31 = 26325-26525, 94579-94913, 126367-126587 DNA, p32 = 27188-27598, 41797-41997, 49599-49899, 61204-61412, 106786-107127, 115005-115333, 118039-118282 DNA, p33 = 28416-28632 DNA, p34 = 28633-28815 DNA, p35 = 30438-30696 DNA, p36 = 30697-30878, 72621-72843, 110881-111082 DNA, p37 = 30879-31039 DNA, p38 = 31040-31251 DNA, p39 = 32682-32889 DNA, p40 = 32890-33112, 107666-107962, 116303-116623, 117155-117331, 135083-135390 DNA, p41 = 35438-35737, 40263-40610, 79576-79781, 93179-93362 DNA, p42 = 35738-35882 DNA, p43 = 36759-36977, 40020-40262, 40914-41223, 50521-50708, 51420-51717, 62089-62372, 66383-66530, 82201-82350, 83062-83191, 89713-89863, 96731-97008, 102845-102986, 103829-104063, 108412-108693, 126106-126366, 131979-132252, 138666-138859, 141986-142200, 143238-143455, 145443-145705 DNA, p44 = 37438-37586, 87128-87418, 99630-99873 DNA, p45 = 38532-38818, 61614-61865, 100619-100813, 138860-139027 DNA, p46 = 43626-43769 DNA, p47 = 45075-45295, 80490-80679 DNA, p48 = 55053-55424 DNA, p49 = 58682-59193, 65856-66179, 80956-81289, 95243-95536, 106601-106785 DNA, p50 = 65170-65520, 89352-89712 DNA, p51 = 69328-69542 DNA, p52 = 75673-75888, 78220-78471, 85334-85586, 100360-100618 DNA, p53 = 76822-77047 DNA, p54 = 77298-77604 DNA, p55 = 84589-84840 DNA, p56 = 87876-88198 DNA, p57 = 88416-88634 DNA, p58 = 88635-88909 DNA, p59 = 106130-106455 DNA, p60 = 107963-108186 DNA, p61 = 109109-109258 DNA, p62 = 112820-113003, 120585-120734 DNA, p63 = 114603-114817 DNA, p64 = 115771-115979 DNA, p65 = 121199-121491 DNA, p66 = 122553-122767, 130385-130650 DNA, p67 = 125661-125874 DNA, p68 = 125875-126105 DNA, p69 = 129646-129855 DNA, p70 = 139977-140141 BIN, p71 = 145898-146023 * best tree #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=4:00:00 #PBS -o raxml_stdout #PBS -e raxml_stderr #PBS -N pro_50p_part_best #PBS -A hpc_bfearly02 cd /home/brant/work/prosanta/50p/ml-part raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N 20 -p 359299968 -n best -q part.txt -s sate-gblocks-clean-50p-complete-with-characters.phylip -o thryssa_hamiltonii2 -T 16 * bootreps: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=6:00:00 #PBS -o raxml-boot_stdout #PBS -e raxml-boot_stderr #PBS -N pro_50p_part_boot #PBS -A hpc_bfearly02 cd /home/brant/work/prosanta/50p/ml-part raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N autoMRE -p 359299968 -b 180750782 -n bootrep -q part.txt -s sate-gblocks-clean-50p-complete-with-characters.phylip -o thryssa_hamiltonii2 -T 16 * build consensus tree raxmlHPC-SSE3 -m GTRGAMMA -f b -t RAxML_bestTree.best -z RAxML_bootstrap.bootrep -n final -o thryssa_hamiltonii2 ### with CHAR data unpartitioned (raxml) * part.txt is as follows DNA, p1 = 1-145897 BIN, p2 = 145898-146023 * best tree #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=4:00:00 #PBS -o raxml_stdout #PBS -e raxml_stderr #PBS -N pro_50p_unpart_best #PBS -A hpc_bfearly02 cd /home/brant/work/prosanta/50p/ml-unpart raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N 20 -p 588729696 -n best -q part.txt -s sate-gblocks-clean-50p-complete-with-characters.phylip -o thryssa_hamiltonii2 -T 16 * bootreps: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=6:00:00 #PBS -o raxml-boot_stdout #PBS -e raxml-boot_stderr #PBS -N pro_50p_unpart_boot #PBS -A hpc_bfearly02 cd /home/brant/work/prosanta/50p/ml-unpart raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N autoMRE -p 588729696 -b 14008711 -n bootrep -q part.txt -s sate-gblocks-clean-50p-complete-with-characters.phylip -o thryssa_hamiltonii2 -T 16 ### with CHAR data unpartitioned (mrbayes) * convert to nexus: python ~/git/phyluce/bin/align/convert_one_align_to_another.py --alignments test --output test2 --input-format phylip-relaxed --output-format nexus * setup queue file #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o mb_stdout #PBS -e mb_stderr #PBS -A hpc_bfstart02 #PBS -N mb_50p cd /home/brant/work/prosanta/50p/bayes mpirun -np 16 -machinefile $PBS_NODEFILE mb sate-gblocks-clean-50p-complete-with-characters.nexus ## 50 % complete matrix * copy alignments (50% complete):: python ~/git/phyluce/bin/align/get_only_loci_with_min_taxa.py \ --alignments sate-alignments-gblocks-clean \ --taxa 35 \ --output sate-gblocks-clean-50p-complete \ --percent 0.5 \ --cores 12 \ --log log 2014-12-20 08:51:08,079 - get_only_loci_with_min_taxa - INFO - Copied 567 alignments of 844 total containing ≥ 0.5 proportion of taxa (n = 17) - get summary stats before adding missing data characters:: python ~/git/phyluce/bin/align/get_align_summary_data.py \ --alignments sate-gblocks-clean-50p-complete \ --cores 12 \ --log-path log 2014-12-20 08:51:33,814 - get_align_summary_data - INFO - ----------------------- Alignment summary ----------------------- 2014-12-20 08:51:33,815 - get_align_summary_data - INFO - [Alignments] loci: 567 2014-12-20 08:51:33,815 - get_align_summary_data - INFO - [Alignments] length: 145,897 2014-12-20 08:51:33,815 - get_align_summary_data - INFO - [Alignments] mean: 257.31 2014-12-20 08:51:33,815 - get_align_summary_data - INFO - [Alignments] 95% CI: 6.17 2014-12-20 08:51:33,815 - get_align_summary_data - INFO - [Alignments] min: 103 2014-12-20 08:51:33,815 - get_align_summary_data - INFO - [Alignments] max: 563 2014-12-20 08:51:33,816 - get_align_summary_data - INFO - ------------------------- Taxon summary ------------------------- 2014-12-20 08:51:33,816 - get_align_summary_data - INFO - [Taxa] mean: 26.32 2014-12-20 08:51:33,817 - get_align_summary_data - INFO - [Taxa] 95% CI: 0.36 2014-12-20 08:51:33,817 - get_align_summary_data - INFO - [Taxa] min: 17 2014-12-20 08:51:33,817 - get_align_summary_data - INFO - [Taxa] max: 35 2014-12-20 08:51:33,817 - get_align_summary_data - INFO - ----------------- Missing data from trim summary ---------------- 2014-12-20 08:51:33,817 - get_align_summary_data - INFO - [Missing] mean: 0.00 2014-12-20 08:51:33,818 - get_align_summary_data - INFO - [Missing] 95% CI: 0.00 2014-12-20 08:51:33,818 - get_align_summary_data - INFO - [Missing] min: 0.00 2014-12-20 08:51:33,818 - get_align_summary_data - INFO - [Missing] max: 0.00 2014-12-20 08:51:33,825 - get_align_summary_data - INFO - -------------------- Character count summary -------------------- 2014-12-20 08:51:33,825 - get_align_summary_data - INFO - [All characters] 3,889,236 2014-12-20 08:51:33,825 - get_align_summary_data - INFO - [Nucleotides] 3,773,263 2014-12-20 08:51:33,827 - get_align_summary_data - INFO - ---------------- Data matrix completeness summary --------------- 2014-12-20 08:51:33,827 - get_align_summary_data - INFO - [Matrix 50%] 567 alignments 2014-12-20 08:51:33,827 - get_align_summary_data - INFO - [Matrix 55%] 530 alignments 2014-12-20 08:51:33,827 - get_align_summary_data - INFO - [Matrix 60%] 487 alignments 2014-12-20 08:51:33,827 - get_align_summary_data - INFO - [Matrix 65%] 464 alignments 2014-12-20 08:51:33,827 - get_align_summary_data - INFO - [Matrix 70%] 412 alignments 2014-12-20 08:51:33,828 - get_align_summary_data - INFO - [Matrix 75%] 353 alignments 2014-12-20 08:51:33,828 - get_align_summary_data - INFO - [Matrix 80%] 278 alignments 2014-12-20 08:51:33,828 - get_align_summary_data - INFO - [Matrix 85%] 222 alignments 2014-12-20 08:51:33,828 - get_align_summary_data - INFO - [Matrix 90%] 99 alignments 2014-12-20 08:51:33,828 - get_align_summary_data - INFO - [Matrix 95%] 19 alignments 2014-12-20 08:51:33,828 - get_align_summary_data - INFO - ------------------------ Character counts ----------------------- 2014-12-20 08:51:33,828 - get_align_summary_data - INFO - [Characters] '-' is present 115,973 times 2014-12-20 08:51:33,829 - get_align_summary_data - INFO - [Characters] 'A' is present 1,037,244 times 2014-12-20 08:51:33,829 - get_align_summary_data - INFO - [Characters] 'C' is present 849,259 times 2014-12-20 08:51:33,829 - get_align_summary_data - INFO - [Characters] 'G' is present 834,209 times 2014-12-20 08:51:33,829 - get_align_summary_data - INFO - [Characters] 'T' is present 1,052,551 times 2014-12-20 08:51:33,829 - get_align_summary_data - INFO - ================ Completed get_align_summary_data =============== * prep raxml file:: python ~/git/phyluce/bin/align/format_nexus_files_for_raxml.py \ --alignments sate-gblocks-clean-50p-complete \ --output sate-gblocks-clean-50p-complete-raxml \ --log-path log --charsets ### partitioning * prep partition finder config file by hand. use hcluster method and raxml, only, due to size. Then: python ~/bin/PartitionFinderV1.1.1/PartitionFinder.py --raxml --weights '1, 1, 1, 1' -p 12 partition_finder.cfg ### partitioned (raxml) * best tree search: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o raxml_stdout #PBS -e raxml_stderr #PBS -A hpc_bfstart01 #PBS -N raxml-best cd /home/brant/work/prosanta/raxml-uce/ raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N 20 -p 649090132 -n best -q part.txt -s sate-gblocks-clean-50p-complete.phylip -o thryssa_hamiltonii2 -T 16 * bootreps: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o raxml-boot_stdout #PBS -e raxml-boot_stderr #PBS -A hpc_bfstart01 #PBS -N raxml-boot cd /home/brant/work/prosanta/raxml-uce raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N autoMRE -p 649090132 -b 763861176 -n bootrep -q part.txt -s sate-gblocks-clean-50p-complete.phylip -o thryssa_hamiltonii2 -T 16 * build consensus: ~/git/raxml/raxmlHPC-SSE3 -m GTRGAMMA -f b -t RAxML_bestTree.best -z RAxML_bootstrap.bootrep -n final -o thryssa_hamiltonii2 ### unpartitioned (raxml) * best: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o raxml_stdout #PBS -e raxml_stderr #PBS -A hpc_bfstart01 #PBS -N raxml-best cd /home/brant/work/prosanta/raxml-uce-no-part/ raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N 20 -p 997137450 -n best -s sate-gblocks-clean-50p-complete.phylip -o thryssa_hamiltonii2 -T 16 * bootreps: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o raxml-boot_stdout #PBS -e raxml-boot_stderr #PBS -A hpc_bfstart01 #PBS -N raxml-boot cd /home/brant/work/prosanta/raxml-uce-no-part raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N autoMRE -p 997137450 -b 330715379 -n bootrep -s sate-gblocks-clean-50p-complete.phylip -o thryssa_hamiltonii2 -T 16 * build consensus: ~/git/raxml/raxmlHPC-SSE3 -m GTRGAMMA -f b -t RAxML_bestTree.best -z RAxML_bootstrap.bootrep -n final -o thryssa_hamiltonii2 ### partitioned (mrbayes) * convert partitioned file from phylip to nexus: python ~/git/phyluce/bin/align/convert_one_align_to_another.py \ --alignments sate-gblocks-clean-50p-complete-raxml \ --output sate-gblocks-clean-50p-complete-nexus \ --input-format phylip-relaxed --output-format nexus * add mrbayes block * setup queue file #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o mb_stdout #PBS -e mb_stderr #PBS -A hpc_bfstart01 #PBS -N mb cd /home/brant/work/prosanta/mrbayes-uce mpirun -np 16 -machinefile $PBS_NODEFILE mb sate-gblocks-clean-50p-complete.nexus ### with CHAR data * merge in character data by hand ### with CHAR data partitioned (raxml) * build part.txt using previous partitioned and also adding in BIN: DNA, p1 = 1-357, 1583-1857, 3169-3382, 4930-5313, 6066-6287, 6811-7134, 7485-7695, 12125-12335, 13023-13248, 17548-17721, 23593-23872, 26452-26641, 32632-32951, 34955-35316, 36650-36909, 36910-37330, 38743-39027, 43902-44225, 50786-51035, 51380-51650, 51903-52148, 53071-53260, 53261-53594, 62454-62747, 67693-67894, 68382-68567, 69758-70128, 74392-74789, 75029-75432, 75852-76060, 77236-77450, 78481-78781, 82397-82610, 84090-84541 DNA, p2 = 358-542 DNA, p3 = 543-836, 837-1031, 1032-1295, 1296-1582, 1858-2269, 2270-2461, 2462-2602, 2603-2916, 2917-3168, 3383-3721, 3722-3986, 3987-4330, 5600-5798, 5799-6065, 6288-6630, 6631-6810, 7135-7484, 7696-8029, 8030-8263, 8264-8523, 8524-8745, 8746-9008, 9009-9265, 9266-9529, 9530-9782, 10055-10395, 10396-10629, 10813-11097, 11292-11420, 11421-11760, 12336-12515, 12516-12849, 12850-13022, 13249-13452, 13453-13741, 13925-14183, 14184-14441, 14442-14883, 15011-15298, 15299-15493, 15706-15942, 16509-16748, 16749-16945, 16946-17294, 17295-17547, 17722-18098, 18099-18528, 18529-18787, 19177-19616, 19617-19956, 19957-20367, 20368-20603, 20604-20768, 20769-20991, 21145-21489, 21490-21739, 21740-21998, 22181-22436, 22437-22627, 22628-22855, 22856-23095, 23304-23592, 23873-24228, 24229-24650, 24934-25182, 25483-25840, 25841-26069, 26070-26302, 26642-26944, 26945-27161, 27162-27540, 27541-27832, 27833-28145, 28494-28803, 28804-29163, 29164-29376, 29578-29894, 29895-30338, 30500-30660, 30805-31019, 31020-31169, 31170-31346, 31569-31798, 31799-32133, 32134-32341, 32342-32631, 32952-33140, 33141-33570, 33571-33871, 33872-34210, 34211-34492, 34493-34676, 34677-34954, 35317-35649, 35650-35905, 35906-36241, 36242-36649, 37331-37620, 37621-38017, 38018-38202, 38203-38470, 38471-38742, 39028-39237, 39238-39511, 39512-39864, 39865-40229, 40954-41411, 41412-41686, 41687-41998, 42283-42559, 42560-42794, 43343-43550, 44226-44627, 44628-44943, 44944-45232, 45233-45522, 45523-45817, 45818-46158, 46159-46472, 46473-46770, 46771-47143, 47144-47431, 47432-47710, 47711-47945, 47946-48289, 48290-48664, 48665-48862, 48863-49110, 49111-49389, 49390-49674, 49675-49852, 49853-50116, 50117-50387, 50388-50785, 51036-51379, 52149-52632, 52802-53070, 53595-53825, 53826-54012, 54013-54229, 54230-54537, 54538-54701, 54702-54940, 55245-55566, 55567-55779, 55780-56073, 56074-56272, 56273-56615, 56616-56900, 57446-57662, 58394-58744, 58745-58955, 58956-59221, 59222-59419, 59817-60100, 60312-60737, 60738-61087, 61272-61533, 61534-61852, 61853-62124, 62125-62453, 62748-63158, 63631-63995, 63996-64217, 64218-64468, 64469-64797, 64798-65080, 65081-65488, 65660-65983, 66228-66458, 66459-66713, 67476-67692, 67895-68136, 68137-68381, 68568-68941, 68942-69198, 69434-69757, 70129-70381, 70905-71049, 71050-71397, 71398-71587, 71812-71967, 71968-72119, 72120-72486, 72487-72819, 72820-73075, 73076-73306, 73509-73876, 73877-74207, 74790-75028, 75648-75851, 76061-76383, 76384-76704, 76705-77065, 77966-78237, 78238-78480, 79133-79412, 79413-79558, 79559-79823, 79974-80241, 80242-80437, 80438-80596, 80812-81036, 81037-81336, 81535-81859, 81860-82147, 82148-82396, 82832-83115, 83116-83410, 83411-83677, 83678-83903, 84839-84954, 84955-85150, 85151-85483, 86182-86375, 86376-86622, 86885-87108, 87109-87245, 87246-87652, 88282-88460, 88769-88996, 88997-89316, 89317-89644, 89645-89900, 89901-90193, 90194-90505, 90506-90913, 91082-91344, 91345-91629, 91630-91834, 92000-92369, 92370-92698, 92699-92888, 92889-93178, 93179-93391, 93392-93599, 93600-93774, 93993-94439, 94690-95011 DNA, p4 = 4331-4594 DNA, p5 = 4595-4929, 5314-5599, 10630-10812, 15943-16183, 20992-21144, 42795-42971, 51651-51902, 55071-55244, 66714-66972, 67168-67475, 83904-84089, 85750-86181, 87653-88097, 88098-88281 DNA, p6 = 9783-10054, 15494-15705, 60101-60311, 66973-67167, 90914-91081 DNA, p7 = 11098-11291, 18788-19036, 40391-40953, 63159-63352, 86623-86884 DNA, p8 = 11761-12124, 14884-15010, 24651-24933, 28146-28493, 41999-42282, 42972-43206, 54941-55070, 69199-69433, 70382-70578, 70579-70904, 77066-77235, 77451-77721, 78782-79132, 84542-84838, 94440-94689, 95012-95274 DNA, p9 = 13742-13924, 26303-26451, 81337-81534, 82611-82831 DNA, p10 = 16184-16508, 25183-25482, 61088-61271 DNA, p11 = 19037-19176 DNA, p12 = 21999-22180 DNA, p13 = 23096-23303 DNA, p14 = 29377-29577, 59420-59816, 77722-77965 DNA, p15 = 30339-30499 DNA, p16 = 30661-30804 DNA, p17 = 31347-31568, 40230-40390, 52633-52801, 56901-57122, 58243-58393, 63353-63630, 65489-65659, 80597-80811, 85484-85749, 88461-88768, 93775-93992 DNA, p18 = 43207-43342, 73307-73508 DNA, p19 = 43551-43901, 57882-58242 DNA, p20 = 57123-57445 DNA, p21 = 57663-57881 DNA, p22 = 65984-66227 DNA, p23 = 71588-71811 DNA, p24 = 74208-74391, 79824-79973 DNA, p25 = 75433-75647 DNA, p26 = 91835-91999 BIN, p27 = 95275-95400 * best tree #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=4:00:00 #PBS -o raxml_stdout #PBS -e raxml_stderr #PBS -N pro_75p_part_best #PBS -A hpc_bfearly02 cd /home/brant/work/prosanta/75p/ml-part raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N 20 -p 967824310 -n best -q part.txt -s sate-gblocks-clean-75p-complete-with-characters.phylip -o thryssa_hamiltonii2 -T 16 * bootreps: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=6:00:00 #PBS -o raxml-boot_stdout #PBS -e raxml-boot_stderr #PBS -N pro_75p_part_boot #PBS -A hpc_bfearly02 cd /home/brant/work/prosanta/75p/ml-part raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N autoMRE -p 967824310 -b 453005531 -n bootrep -q part.txt -s sate-gblocks-clean-75p-complete-with-characters.phylip -o thryssa_hamiltonii2 -T 16 ### with CHAR data unpartitioned (raxml) * part.txt is as follows DNA, p1 = 1-95274 BIN, p2 = 95275-95400 * best tree #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=4:00:00 #PBS -o raxml_stdout #PBS -e raxml_stderr #PBS -N pro_75p_unpart_best #PBS -A hpc_bfearly02 cd /home/brant/work/prosanta/75p/ml-unpart raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N 20 -p 710212176 -n best -q part.txt -s sate-gblocks-clean-75p-complete-with-characters.phylip -o thryssa_hamiltonii2 -T 16 * bootreps: #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=6:00:00 #PBS -o raxml-boot_stdout #PBS -e raxml-boot_stderr #PBS -N pro_75p_unpart_boot #PBS -A hpc_bfearly02 cd /home/brant/work/prosanta/75p/ml-unpart raxmlHPC-PTHREADS-AVX -m GTRGAMMA -N autoMRE -p 710212176 -b 205604419 -n bootrep -q part.txt -s sate-gblocks-clean-75p-complete-with-characters.phylip -o thryssa_hamiltonii2 -T 16 ### with CHAR data unpartitioned (mrbayes) * convert to nexus: python ~/git/phyluce/bin/align/convert_one_align_to_another.py --alignments test --output test2 --input-format phylip-relaxed --output-format nexus * setup queue file #!/bin/bash #PBS -q checkpt #PBS -l nodes=1:ppn=16 #PBS -l walltime=72:00:00 #PBS -o mb_stdout #PBS -e mb_stderr #PBS -A hpc_bfstart02 #PBS -N mb_75p cd /home/brant/work/prosanta/75p/bayes mpirun -np 16 -machinefile $PBS_NODEFILE mb sate-gblocks-clean-75p-complete-with-characters.nexus ### GTST * convert 50p alignments to phylip: python ~/git/phyluce/bin/align/convert_one_align_to_another.py --alignments sate-gblocks-clean-50p-complete --output sate-gblocks-clean-50p-complete-phylip --input-format nexus --output-format phylip-relaxed * convert file endings: zmv '*' '$f:gs/phylip-relaxed/phylip' * tar and gzip and upload to zcluster tar -czvf sate-gblocks-clean-50p-complete-phylip.tar.gz sate-gblocks-clean-50p-complete-phylip * unzip * run genetrees: python run_raxml_genetrees_on_zcluster.py sate-gblocks-clean-50p-complete-phylip --searches 20 --input-format phylip python run_raxml_bootreps_on_zcluster.py sate-gblocks-clean-50p-complete-phylip --bootreps 100 --input-format phylip * on local NFS: mkdir sate-gblocks-clean-50p-complete-phylip-gtst * rsync down the files from the run: rsync -avx brant@copy.rcc.uga.edu:/escratch4/brant/brant_Dec_19/prosanta/raxml-output ./ rsync -avx brant@copy.rcc.uga.edu:/escratch4/brant/brant_Dec_19/prosanta/bootrep-raxml-output ./ * create a directory containing the 75p complete loci mkdir sate-gblocks-clean-75p-complete-phylip-gtst ls ../sate-gblocks-clean-75p-complete > 75p-best-tree-list.txt ls ../sate-gblocks-clean-75p-complete > 75p-bootrep-list.txt # modify list in sublime text to add names mkdir ../../sate-gblocks-clean-75p-complete-phylip-gtst/raxml-output/ find . -type f | grep -f ../../sate-gblocks-clean-75p-complete-phylip-gtst/75p-best-tree-list.txt | xargs cp -t ../../sate-gblocks-clean-75p-complete-phylip-gtst/raxml-output/ mkdir ../../sate-gblocks-clean-75p-complete-phylip-gtst/bootrep-raxml-output/ find . -type f | grep -f ../../sate-gblocks-clean-75p-complete-phylip-gtst/75p-bootrep-list.txt | xargs cp -t ../../sate-gblocks-clean-75p-complete-phylip-gtst/bootrep-raxml-output/ ## 75 % complete matrix * get all genetrees in the same file for i in raxml-output/*; do cat $i >> sate-gblocks-clean-75p-complete-phylip-ALL-genetrees.tre; done * create a file containing all bootstrap replicate names: ls -d -1 $PWD/bootrep-raxml-output/* >> sate-gblocks-clean-75p-complete-phylip-ALL-bootreps.txt * run astral: java -jar ~/bin/Astral/astral.4.4.4.jar -i sate-gblocks-clean-75p-complete-phylip-ALL-genetrees.tre -b sate-gblocks-clean-75p-complete-phylip-ALL-bootreps.txt -o sate-gblocks-clean-75p-complete-phylip-ASTRAL.tre ## 50 % complete matrix * get all genetrees in the same file for i in raxml-output/RAxML_bestTree.*.best; do cat $i >> sate-gblocks-clean-50p-complete-phylip-ALL-genetrees.tre; done * create a file containing all bootstrap replicate names: ls -d -1 $PWD/bootrep-raxml-output/RAxML_bootstrap.*.bootrep >> sate-gblocks-clean-50p-complete-phylip-ALL-bootreps.txt * run astral: java -jar ~/bin/Astral/astral.4.4.4.jar -i sate-gblocks-clean-50p-complete-phylip-ALL-genetrees.tre -b sate-gblocks-clean-50p-complete-phylip-ALL-bootreps.txt -o sate-gblocks-clean-50p-complete-phylip-ASTRAL.tre # Final steps * rename tip labels according to tip-name-remappings.txt to fix some taxonomy crap: for i in ../trees/*.tre; do echo $i; python ~/git/phyluce/bin/genetrees/phyluce_genetrees_rename_tree_leaves --config ../tip-name-mappings.txt \ --section translate \ --order left:right \ --input-format newick \ --output-format newick \ --input $i \ --output ./$i:t:r.name-remapped.tre; done * Zip blown up contigs so that there's some handy fastas for them (and so this includes danRer, which we won't repost to GenBank)