macro-; macro* 5 30000; macro [ 2000000; macro=; /** simnrind15c, replacing 1s (presences) with 0s (absences) for use with empirical datasets **/ report=; collapse 0; hold 10000; log $dataset<.simNRIND15c0.log; mult= tbr replic 100 hold 1000; unique; var-; report-; sil=all; macfloat 3; /** calculate average distance from root for each taxon for original MPTs (not including taxon 0)**/ var : nodesdownavgO[(ntax+1)] nodesdownthis nodesdownall x y ; set x (ntax+1); set y (ntrees+1); loop=taxonO 0 ntax set nodesdownall 0; loop=MPTO 0 ntrees travtree below #MPTO #taxonO -nodesdownthis set nodesdownall ++; endtrav stop set nodesdownavgO[#taxonO] ('nodesdownall'/(ntrees+1)); stop keep 0; /** work out number of 1s (presences) for each taxon **/ var: ones[(ntax+1)]; loop 0 ntax set ones[#1] 0; loop 0 nchar if ( (states[#2 #1] > 1) && (states[#2 #1] <65535) && (isinxgroup[1 #2]) ) set ones[#1] ++; end stop stop var ones; var : nodesrelativeavgOR[(ntax+1) 100] ; var : maxdistanceO y exclusions[ntax]; var : i j k l p q output[(ntax*8) 14] thisdisnode thisdislength thisdownnode thisupnode ; var : ma la ka na oa pa xa wa ta; /** Number of entries replaced for each taxon at set proportions of missing entries**/ var : propmd[8]; set propmd[0] 1; set propmd[1] 2; set propmd[2] 5; set propmd[3] 10; set propmd[4] 20; set propmd[5] 50; set propmd[6] 80; set propmd[7] 95; var : nummd[(ntax+1) 8]; loop=a 1 ntax loop=b 0 7 if ('ones[#a]'==0) set nummd[#a #b] 0; continue end if (#b == 0) set nummd[#a #b] 1; continue end if ((#b == 1) && ('ones[#a]'>1) ) set nummd[#a #b] 2; continue end if ('nummd[#a (#b-1)]'==0) set nummd[#a #b] 0; continue end set q 0; loop=c 0 7 if ( ((('propmd[#c]'/100)*'ones[#a]') > ('nummd[#a (#b-1)]' +0.001)) && ('q'==0) ) set nummd[#a #b] (('propmd[#c]'/100)*'ones[#a]'); set q 1; end stop stop stop if(nchar>100) loop=f 1 ntax if ('ones[#f]'>100) loop=g 0 7 set q 0; loop=h 0 7 if ( ((('propmd[#h]'/100)*'ones[#f]') > ('nummd[#f (#g-1)]' +0.001)) && ('q'==0) ) set nummd[#f #g] (('propmd[#h]'/100)*'ones[#f]'); set q 1; end stop stop end stop end loop=d 1 ntax loop=e 2 7 loop=f 0 nchar if ( ('nummd[#d #e]' > (#f-1)) && ('nummd[#d #e]' < #f)) set nummd[#d #e] #f; end stop stop stop set i 0; macfloat0; sil-all; var ones*; var propmd*; var nummd*; macfloat 3; var : wiltab [(ntax+1) 24]; /** create trees for random deletions for each taxon in turn, looped for proportions of missing data pmd**/ loop=pmd 0 7 sil=all; keep0; loop=taxonR 1 ntax progress #taxonR 'x' Progress for 'x' taxa with 'propmd[#pmd]' percent missing characters; rseed*; set j 0; loop=randomrep 0 99 if ('ones[#taxonR]'== 0) continue end if (('nummd[#pmd]'=='ones[#taxonR]') && (#randomrep>=1)) continue end if (('propmd[#pmd]'==1) && ('j'=='ones[#taxonR]')) continue end proc $dataset; rseed+1; /** make random missing data and search **/ if ('propmd[#pmd]'== 1) set k 0; set l 0; loop=psextincta 0 nchar if ((states[#psextincta #taxonR] >= 2) && ('l' != 1) && (states[#psextincta #taxonR] < 65535) && (isinxgroup[1 #psextincta]) ) set k ++; if ('k' == ('j'+1)) xread=!#psextincta #taxonR 0; set j++; set l 1; quote change to T#taxonR C#psextincta; end end stop else randtrees 'ones[#taxonR]'; tgroup = 3 (random) * 'nummd[#taxonR #pmd]'; set k 0; loop=psextinctb 0 nchar if ( (states[#psextinctb #taxonR] >= 2) && (states[#psextinctb #taxonR] < 65535) && (isinxgroup[1 #psextinctb]) ) if ( isintgroup[3 'k']) xread=!#psextinctb #taxonR 0; end set k ++; end stop end xread!; hold 10000; mult= tbr replic 100 hold 1000; unique; set y (ntrees+1); /** count nodesdown to root for MPTs**/ set nodesdownall 0; loop=treeR 0 ntrees travtree below #treeR #taxonR -nodesdownthis set nodesdownall ++; endtrav stop set nodesrelativeavgOR[#taxonR #randomrep] (('nodesdownall'/(ntrees+1))-'nodesdownavgO[#taxonR]'); stop progress/; stop sil-all; quote a; /**maximum distance from root to exclude taxa on extreme tips (down only)**/ set maxdistanceO 0; set y 1; set exclusions [0] 0; loop=taxC 1 ntax if ('nodesdownavgO[#taxC]' > 'maxdistanceO') set maxdistanceO 'nodesdownavgO[#taxC]'; end stop quote b; /** output for this proportion of missing data (pmd) **/ loop=outB 0 ntax if (#outB == 0) continue end set output ['i' 0] 'propmd[#pmd]'; set output ['i' 1] #outB; set output ['i' 2] ('nummd[#outB #pmd]'); if ('ones[#outB]'> 0) set output ['i' 3] ('nummd[#outB #pmd]'/'ones[#outB]'*100); else continue; end set output ['i' 4] 'nodesdownavgO[#outB]'; if ((#outB==0) ^ ('nodesdownavgO[#outB]' == 2) ^ ('nodesdownavgO[#outB]' == 'maxdistanceO')) set output ['i' 6] 1; else set output ['i' 6] 0; end set thisdisnode 0; set thisdislength 0; if ('nummd[#pmd]'=='ones[#outB]') set output ['i' 7] 'nodesrelativeavgOR[#outB 0]'; errmsg FFFFFLAAAAAPPP; set i++; continue end if ('propmd[#pmd]'==1) loop=repC 0 ('ones[#outB]'-1) set thisdisnode ('thisdisnode'+'nodesrelativeavgOR[#outB #repC]'); stop set output ['i' 7] ('thisdisnode'/'ones[#outB]'); else loop=repA 0 99 set thisdisnode ('thisdisnode'+'nodesrelativeavgOR[#outB #repA]'); stop set output ['i' 7] ('thisdisnode'/100); end set thisdownnode 0; set thisupnode 0; loop=repB 0 99 if ('nodesrelativeavgOR[#outB #repB]'< 0) set thisdownnode ++; end if ('nodesrelativeavgOR[#outB #repB]'> 0) set thisupnode ++; end stop set output['i' 9] 'thisdownnode'; set output['i' 10] 'thisupnode'; set i++; stop quote c; sil-all; /** do the wilcoxon signed rank test for this value of pmd test **/ macfloat 3; loop=wilxc 0 ntax set ma 0; set la (1/1000); set wa 0; set ka 0; set oa 0; set pa 0; set ta 0; set xa 0; set na 0; loop=wilxa 0 99 if ('ka'== 100) continue; end set ka 100; set na 0; loop=wilxb 0 99 set ta ( power[ ('nodesrelativeavgOR[#wilxc #wilxb]'*'nodesrelativeavgOR[#wilxc #wilxb]') (1/2) ] ); if ('ta' == 'ka') set na++; if ('nodesrelativeavgOR[#wilxc #wilxb]' < 0) set oa ++; end if ('nodesrelativeavgOR[#wilxc #wilxb]' > 0) set pa ++; end end if (('ta'< 'ka') && ('ta' > 'la') ) set ka 'ta'; set na 1; set oa 0; set pa 0; if ('nodesrelativeavgOR[#wilxc #wilxb]' < 0) set oa ++; end if ('nodesrelativeavgOR[#wilxc #wilxb]' > 0) set pa ++; end end stop if ('ka'==100) continue; end set la 'ka'; set xa ( ('na'+1)/2 + 'ma'); set wa ( 'wa' + ('pa'*'xa') - ('oa'*'xa')); set ma ('ma'+'na'); stop quote d#wilxc; set wiltab [#wilxc #pmd] 'wa'; set wiltab [#wilxc (#pmd+8)] 'ma'; stop var : wilxsig[100] np wp zp; set wilxsig[5] 0-15; set wilxsig[6] 0-17; set wilxsig[7] 0-22; set wilxsig[8] 0-26; set wilxsig[9] 0-29; macfloat 10; loop=wilxd 1 ntax loop=wilxe 0 7 set np 'wiltab[#wilxd (#wilxe+8)]'; set wp 'wiltab[#wilxd #wilxe]'; if ('wp'==0) continue end; if(('np'<10) && ('wp' < -14)) loop=wilxf 'np' 'np' if ('wp' <= 'wilxsig[#wilxf]') set wiltab[#wilxd (#wilxe+16)] 1; end stop end set zp ( ('wp'+(1/2)) / ( power [ (('np' * ('np'+1) * ( (2*'np')+1))/6) (1/2)]) ); if ( ('np'>9) && ('zp'< (0- (1645/1000)) ) ) set wiltab[#wilxd (#wilxe+16)] 1; end stop stop macfloat 3; quote RAW values for 'propmd[#pmd]'; quote Average number of nodes a taxon moves relative to original position for random missing data replications; var nodesrelativeavgOR*; stop /** binomial test **/ var : factorials[101] i; set factorials[0] 1; set factorials[1] 1; set factorials[2] 2; loop=facta 3 100; set i 2; loop=factb 3 #facta set i ('i' * #factb); stop set factorials[#facta] 'i'; stop var : kb nb pb jb lb binom; loop=binoma 0 ((ntax*8)-1) set kb 'output[#binoma 9]'; set nb ('output[#binoma 9]' + 'output[#binoma 10]'); set binom 0; set jb 0; loop=binomb 'nb' 'nb' loop=binomc 'kb' 'nb' loop=binomd (#binomb-#binomc) (#binomb-#binomc) set lb 'factorials[#binomd]'; stop set jb ( ('factorials[#binomb]'/('factorials[#binomc]'*'lb')) * (power [(1/2) 'kb']) * (power[(1/2) ('nb'-'kb')])); set binom ('jb'+'binom'); stop stop set output [#binoma 13] 'binom'; stop quote OUTPUT Nchar missing, Taxon, Number of characters deleted, percent characters deleted, Original height#nodes, Original height#steps, Exclusions, Avg displacement#nodes, Avg displacement#steps, count down nodes, count up nodes, count down length, count up length; var output*; macfloat 0; var wiltab*; macfloat 3; /** establish thresholds **/ var : thresh [(ntax+1) 8] sw tw; loop=thresha 0 ntax set thresh[#thresha 0] #thresha; set thresh[#thresha 1] 'ones[#thresha]'; set thresh[#thresha 2] 'nodesdownavgO[#thresha]'; if ((#thresha==0) ^ ('nodesdownavgO[#thresha]' == 2) ^ ('nodesdownavgO[#thresha]' == 'maxdistanceO') ^ ('ones[#thresha]' <= 1) ) set thresh[#thresha 3] 1; else set thresh[#thresha 3] 0; end set sw 0; loop=threshb 0 7 if ( ( 'wiltab[#thresha (#threshb+16)]' == 1 ) && ('sw'==0) && ('ones[#thresha]'>0) ) set thresh[#thresha 4] 'nummd[#thresha #threshb]'; set thresh[#thresha 5] (('nummd[#thresha #threshb]'/'ones[#thresha]')*100); set sw 1; else continue end stop set sw 0; set tw 0; if (#thresha==0) continue end loop=threshc 0 7 if ( ( 'output[((#thresha-1)+(#threshc*ntax)) 13]' <= 0.05) && ('sw'==0) && ('ones[#thresha]'>0) ) set thresh[#thresha 6] 'nummd[#thresha #threshc]'; set thresh[#thresha 7] (('nummd[#thresha #threshc]'/'ones[#thresha]')*100); set sw 1; else continue end stop stop quote OUTPUT for thresholds for taxon, number of 1s, height in tree (nodes), exclusions, absolute threshold wilcox, percent threshold wilcox, absolute threshold binomial, percent threshold binomial; quote where threshold is number or percent of 1 entries converted to 0s that cause significant shift of taxon down tree; var thresh*; report=; log/; proc/; /** SCRIPT FOR GENERATING RANDOM SIMULATED DATASETS **/ macro=; sil-all; sil=buffer; keep 0; rseed 0; var : char charx tax; set char 100; set charx 200; set tax 20; xread/'charx' 'tax' 2; rand 1; /** ttags*0; var : nodes; set nodes (2*ntax); loop 0 'nodes' ttags+#1 1; stop ttags); ttags; **/ xread+0 0 'charx' 2; xread!; var : i; set i 0; loop 0 nchar if ('i' == 100) ccode ] #1; continue end if ((isinfo[#1]) && (nstates[#1]>1)) set i ++; continue end ccode ]#1; stop log simtemp.txt; xread-; quote proc/; log/; proc simtemp.txt; mult=tbr replic 100 hold 1000; if (ntrees > 1) nelsen*; end tplot/; var : ci; set ci (minsteps/(length[ntrees])); quote ci 'ci'; proc/;