get_mrca<-function(phy,nodelabels){ node<-which(phy$tip.label %in% nodelabels) n<-node[1] while (sum(!node %in% get_all_kid(phy,n))>0){ n<-go_up_one_node(phy,n) if(n==length(phy$tip.label)+1){break;} } return(n) } get_all_clade_innode<-function(phy,node){ if(node<=length(phy$tip.label)){ return(node); }else{ l<-phy$edge[,1]== node m<-phy$edge[l,2] a<-c() for(mm in m){ a<-c(a,get_all_clade_innode(phy,mm)) } return(c(node,a)) } } get_all_kid<-function(phy,node){ if(node<=length(phy$tip.label)){ return(node); }else{ tip<-c(); innernode<-c(node); while (length(innernode)>0){ l<-phy$edge[,1] %in% innernode m<-phy$edge[l,2] tip<-c(tip,m[m<=length(phy$tip.label)+1]) innernode<-m[m>length(phy$tip.label)+1] } return(tip) } } go_up_one_node<-function(phy,node){ if(node==length(phy$tip.label)+1){ return(node); }else{ l<-phy$edge[,2] %in% node m<-phy$edge[l,1] return(m) } } get_support_midpoint<-function(phy,mcra){ rtree<-midpoint(phy) tree<-phy midbr<-rtree$edge[which(rtree$edge[,1]==length(rtree$tip.label)+1),2] if(mrca %in% get_all_clade_innode(tree,max(midbr))){ boot<-tree$node.label[mrca-length(tree$tip.label)] }else{ rootbr<-tree$edge[which(tree$edge[,1]==length(tree$tip.label)+1),2] for(rbr in rootbr){ if(min(midbr) %in% get_all_clade_innode(tree,rbr)){break;} } changingnode<-c(rbr,length(tree$tip.label)+1) startnode<-min(midbr) while (startnode !=rbr){ changingnode<-c(changingnode,startnode) startnode<-go_up_one_node(tree,startnode) } if(mrca %in% changingnode){ lowernode<-tree$edge[which(tree$edge[,1]==mrca),2]; for(ll in lowernode){ if(max(midbr) %in% get_all_clade_innode(tree,ll)){boot<-tree$node.label[ll-length(tree$tip.label)];break;} } }else{ boot<-tree$node.label[mrca-length(tree$tip.label)] } } return(boot) } postorder<-function(phy,node){ if(node<=length(phy$tip.label)){ return(node); }else{ l<-phy$edge[which(phy$edge[,1]==node),2]; size<-c(length(get_all_kid(phy,l[1])),length(get_all_kid(phy,l[2]))) l<-l[rev(order(size))] return(c(node,postorder(phy,l[2]),postorder(phy, l[1]))) } } mbranch<-function(phy){ #cat(phy$tip.label,"\n"); if(sum(table(phy$tip.label))==length(unique(phy$tip.label))){ return(phy) }else{ count<-table(phy$tip.label) spname<-names(count)[count>1] spname<-as.numeric(spname[1]) l<-which(phy$tip.label==spname) #cat(count,"\n",l,"\n") return(sapply(c(1:length(l)),function(ll){mbranch(drop.tip(phy,l[-ll]))})) } } library(ape) library(phangorn) library(stringr) library(plotrix) setwd("/Users/huatenghuang/Documents/Windows/Work/NextGenDesign/Missing") sptree<-read.nexus(file="speciestree.nex") setwd("/Users/huatenghuang/Documents/Windows/Work/NextGenDesign/Missing/Trees/") folderlist<-dir() result<-matrix(numeric(0), 0,20) for (folder in folderlist){ depth<-as.integer(str_extract(folder, "^\\d+")) filelist<-dir(path= folder,pattern="\\.tre") for (file in filelist){ sp<-as.integer(sub("sp","",str_extract(file, "sp\\d+"))) missingcut<-as.integer(sub("\\_","",str_extract(file, "\\_\\d+"))) missingtype<-sub("\\.","",sub("\\_","",str_extract(file, "\\_\\w\\."))) f<-paste(folder,'/',file,sep='') tree<-read.nexus(f) rtree<-midpoint(tree) l<-c(); for(tip in seq(from=1,to=96,by=12)){ spnode<-as.character(seq(from=tip,to=tip+11,by=1)) mono<-as.integer(is.monophyletic(rtree,spnode)) if(mono==1){ mrca<-get_mrca(rtree,spnode) boot<-get_support_midpoint(tree,mrca) }else{ boot<-NA; } #cat(tip,"\n") l<-c(l,mono,boot) } result<-rbind(result,c(sp,depth,missingcut,missingtype,l)) #break; cat(file) } cat(folder) #break; } colnames(result)<-c("sp","depth","missingcut","missingtype","sp1","sp1b","sp2","sp2b","sp3","sp3b","sp4","sp4b","sp5","sp5b","sp6","sp6b","sp7","sp7b","sp8","sp8b") result<-as.data.frame(result) write.table(result, file="mono_sp.txt",sep="\t",row.names=F,quote=F) #### plot figure 4 setwd("~/Documents/Windows/Work/NextGenDesign/Missing/Trees") file<-"mono_sp.txt" d<-read.table(file, sep="\t",header=T,stringsAsFactors=F) d$monocount<-apply(d[,c(5,7,9,11,13,15,17,19)],1,function(x){sum(x,na.rm=T)}) d<-d[order(d$depth,-d$missingcut,d$missingtype),] d$missingtype[is.na(d$missingtype)]<-"F" d$highmono<-sapply(c(1:dim(d)[1]),function(y){x<-d[y,];t<-x[c(5,7,9,11,13,15,17,19)]*x[c(6,8,10,12,14,16,18,20)];sum(t>=0.7,na.rm=T)}) depth<-as.integer(unique(d$depth)) missingcut<-as.integer(unique(d$missingcut)) missingtype<-as.character(unique(d$missingtype)) result<-matrix(0,0,7) colnames(result)<-c("depth","missingcut","missingtype","monocount","sdcount","highmono","sdhigh") for (dd in depth){ for (c in missingcut){ for (t in missingtype){ l<-d[(d$depth==dd)&(d$missingcut==c)&(d$missingtype==t),]; result<-rbind(result,c(dd,c,t,mean(l$monocount),sd(l$monocount),mean(l$highmono),sd(l$highmono))) } } } result<-as.data.frame(result) result$monocount<-as.numeric(as.character(result$monocount)) result$highmono<-as.numeric(as.character(result$highmono)) result$sdcount<-as.numeric(as.character(result$sdcount)) result$sdhigh<-as.numeric(as.character(result$sdhigh)) result$depth<-as.numeric(as.character(result$depth)) result$missingcut<-as.numeric(as.character(result$missingcut)) result$missingtype<-as.character(result$missingtype) result<-result[!is.na(result$monocount),] postscript(file="monosp.eps",onefile=F, paper="special",width=8,height=5,horizontal=F) par(mfrow=c(1,2),oma=c(1,1,1,1),bty='n') for(depth in c(2,20)){ subr<-result[result$depth==depth,] subr<-subr[rev(order(subr$missingcut,subr$missingtype)),] subr$plot<-c(5,4,3,2,1) if(depth==2){ xlim<-c(2,6) at=c(2,4,6) }else{ xlim<-c(5.5,8) at=c(6,7,8) } plot(1, 1, type="n", xlim=xlim,ylim=c(0.5,5.5),xaxt='n',yaxt='n',xlab="Number of monophyletic taxa",ylab="Tolerance of missing data (%)",main=paste(depth,"Ne",sep='')) axis(2, at=c(0.5,5.5), labels=c("",""), lwd.ticks=0,pos=min(xlim)) axis(side=2,at=c(5,4,3,2,1),labels=c("1","12.5","25","50","50"), lwd.ticks=1,lwd=0,pos=min(xlim),cex=0.8) axis(1, at=c(0.5,5.5), labels=c("",""), lwd.ticks=0,pos=0.5) axis(side=1,at=at,labels=at, lwd.ticks=1,lwd=0,pos=0.5) for (i in c(1:dim(subr)[1])){ rect(ybottom=subr$plot[i]-0.2,ytop=subr$plot[i]+0.2,xleft=min(xlim),xright=subr$monocount[i],lwd=2) #lines(x=c(subr$monocount[i]-subr$sdcount[i],subr$monocount[i]+subr$sdcount[i]),y=c(subr$plot[i],subr$plot[i])) rect(ybottom=subr$plot[i]-0.2,ytop=subr$plot[i]+0.2,xleft=min(xlim),xright=subr$highmono[i],lwd=0,col="black") #text(x=i,y=subr$monocount[i]+0.05,labels=subr$label[i],adj=c(0.5,0),cex=0.8) } } dev.off() #wilcox test x<-d[d$depth==2 &d$missingcut==95 & d$missingtype=="F",c("sp","monocount")] y<-d[d$depth==2 &d$missingcut==84 & d$missingtype=="L",c("sp","monocount")] x<-merge(x,y,by="sp") wilcox.test(x=x$monocount.x,y=x$monocount.y,paired=T) #not x<-d[d$depth==2 &d$missingcut==95 & d$missingtype=="F",c("sp","monocount")] y<-d[d$depth==2 &d$missingcut==72 & d$missingtype=="L",c("sp","monocount")] x<-merge(x,y,by="sp") wilcox.test(x=x$monocount.x,y=x$monocount.y,paired=T) #not x<-d[d$depth==2 &d$missingcut==95 & d$missingtype=="F",c("sp","monocount")] y<-d[d$depth==2 &d$missingcut==48 & d$missingtype=="L",c("sp","monocount")] x<-merge(x,y,by="sp") wilcox.test(x=x$monocount.x,y=x$monocount.y,paired=T) #p=0.04 x<-d[d$depth==2 &d$missingcut==95 & d$missingtype=="F",c("sp","monocount")] y<-d[d$depth==2 &d$missingcut==48 & d$missingtype=="F",c("sp","monocount")] x<-merge(x,y,by="sp") wilcox.test(x=x$monocount.x,y=x$monocount.y,paired=T) #p<0.001 x<-d[d$depth==20 &d$missingcut==95 & d$missingtype=="F",c("sp","monocount")] y<-d[d$depth==20 &d$missingcut==84 & d$missingtype=="L",c("sp","monocount")] x<-merge(x,y,by="sp") wilcox.test(x=x$monocount.x,y=x$monocount.y,paired=T) #not x<-d[d$depth==20 &d$missingcut==95 & d$missingtype=="F",c("sp","monocount")] y<-d[d$depth==20 &d$missingcut==72 & d$missingtype=="L",c("sp","monocount")] x<-merge(x,y,by="sp") wilcox.test(x=x$monocount.x,y=x$monocount.y,paired=T) #not x<-d[d$depth==20 &d$missingcut==95 & d$missingtype=="F",c("sp","monocount")] y<-d[d$depth==20 &d$missingcut==48 & d$missingtype=="L",c("sp","monocount")] x<-merge(x,y,by="sp") wilcox.test(x=x$monocount.x,y=x$monocount.y,paired=T) #p=0.08 x<-d[d$depth==20 &d$missingcut==95 & d$missingtype=="F",c("sp","monocount")] y<-d[d$depth==20 &d$missingcut==48 & d$missingtype=="F",c("sp","monocount")] x<-merge(x,y,by="sp") wilcox.test(x=x$monocount.x,y=x$monocount.y,paired=T) #p<0.001