#testing_gap_models_bifurc_09-21-12.R library(paleotree) getPs<-function(p,q,r){ #the probability of sampling at least once an extinct clade of unknown size res<-numeric() for(N in 1:1000){ res1<-((p^(N-1))*(q^N)*choose((2*N)-2,N-1))/(N*((p+q+r)^((2*N)-1))) if(is.na(res1)){break} if(res1==Inf){break} if(res1==0){break} res[N]<-res1 } res<-1-sum(res) return(res) } rootSplit<-function(tree){ #returns a list with the daughter taxa of the two clades at the root split tips<-lapply(tree$edge[tree$edge[,1]==(Ntip(tree)+1),2],function(zz) if(zz>Ntip(tree)){tree$tip.label[prop.part(tree)[[zz-Ntip(tree)]]] }else{tree$tip.label[zz]}) return(tips) } simPaleoTrees<-function(p,q,r,ntrees=1,all.extinct=FALSE,modern.samp.prob=1.0,mintime=1,maxtime=100, mintaxa=2,maxtaxa=500,prop.bifurc=0,drop.zlb=TRUE,print.runs=FALSE,plot=FALSE){ #this is a wrapper which will create many paleo trees with at least two observed tips #uses simFossilTaxa, sampRanges,taxa2phylo, etc #good if you want to simulate many many trees with extinct taxa #divergence times for nodes will be perfectly known #by default: (minimal conditioning) #no conditioning on the number of extant taxa #living taxa are sampled perfectly at the present #zero-length branches are dropped #simPaleoTrees(p=0.1,q=0.1,r=0.1,ntrees=10) require(ape) if(mintaxa<2){stop("Error: Need at least two taxa per tree; increase mintaxa")} if(ntrees<1){stop("Error: number of trees to simulate is <1")} res<-rmtree(ntrees,2) ntries<-0 for(i in 1:ntrees){ rerun<-TRUE while(rerun){ ntries<-ntries+1 taxa<-suppressMessages(simFossilTaxa(p=p,q=q,anag.rate=0,prop.bifurc=prop.bifurc,prop.cryptic=0,nruns=1,mintaxa=mintaxa, maxtaxa=maxtaxa,maxtime=maxtime,maxExtant=ifelse(all.extinct,0,maxtaxa),min.cond=FALSE,plot=plot)) ranges<-sampleRanges(taxa,r,min.taxa=0,modern.samp.prob=modern.samp.prob) if(sum(!is.na(ranges[,1]))>1){ tree<-taxa2phylo(taxa,obs_time=ranges[,2],plot=plot) if(drop.zlb){tree<-dropZLB(tree)} if(all(!is.na(tree))){ numext<-sum(ranges[,2]==0) minta<-Ntip(tree)>mintaxa minti<-(max(ranges,na.rm=TRUE)-min(ranges,na.rm=TRUE))>mintime if(minta & minti){ rerun<-FALSE } } } } tree$taxa<-taxa #attach original taxon matrix tree$ranges<-ranges #attach sampled ranges res[[i]]<-tree if(ntrees>10){if((i%%(ntrees/10))==0){message(cat(round(i*100/ntrees),"% ",sep=""))}} } if(print.runs){message(paste(ntrees," trees accepted from ",ntries," total runs (",signif(ntrees/ntries,2)," Acceptance Probability)",sep=""))} if(ntrees==1){res<-res[[1]]} return(res) } ############################ #SETUP p<-0.1 q<-0.1 r<-0.1 nruns<-1000 propBifurc<-1 ######################## set.seed(444) pq<-p+q Ps<-getPs(p,q,r) #condition on at least 2 sampled taxa (so there is a node) data<-simPaleoTrees(p=p,q=q,r=r,ntrees=nruns,all.extinct=FALSE, modern.samp.prob=1,mintaxa=2,maxtime=100,prop.bifurc=propBifurc,drop.zlb=FALSE) #okay, identify earliest appearance times of taxa in both clades FADs<-lapply(data,function(k) sapply(rootSplit(k),function(y) max(k$ranges[y,1]))) #crown root time is the time of first branching point where both daughters are observed crownTime<-sapply(data,function(x) x$root.time) #get fad of taxon 1 FAD1<-sapply(FADs,max) FAD2<-sapply(FADs,min) #max FAD in taxa is the stem age stemTime<-sapply(data,function(x) max(x$taxa[,3])) #07-29-12: sum of all 3 gaps... gapBall<-crownTime-FAD2 #stem to first event, either node or fad1 gapS1<-apply(cbind(stemTime-FAD1,stemTime-crownTime),1,min) gaps<-cbind(ifelse(FAD1>=crownTime,0,crownTime-FAD1),gapBall,gapS1) #gap<-c(gaps[gap[,1]!=0,1],gap[,2:3] gap<-apply(gaps,1,sum) #save data save.image("gaptest_data_bifurc_09-22-12.Rdata")