## Functions used to perform analysis of Hunt (2013), Methods in Ecology and Evolution, ## "Testing the link between phenotypic evolution and speciation: an integrated paleontological and phylogenetic analysis" ## Code by Gene Hunt ## required packages require(geiger) require(paleoTS) require(paleotree) require(phytools) ## optimize PE model, with approach assuming traits are in standardized units ## so, can optimize directly for Vg, Va, Vc, which are in units of Vp, Vp/time, Vp. # p is vs and g, y is paleoTS object, site is a vector of localities, one for each sample logL.URWgeo2<- function(p, y, site) { sn<- as.numeric(site) dg<- sign(abs(diff(sn))) # 1 if sample differs from site of previous, 0 if the same Va<- p[1] # step variance Vg<- p[2] # geogr component of variance dy<- diff(y$mm) dt<- diff(y$tt) nd<- length(dy) sv<- y$vv/y$nn svA<- sv[1:nd] svD<- sv[2:(nd+1)] svAD<- svA + svD S <- dnorm(x=dy, mean=rep(0,nd), sd=sqrt(Va*dt + Vg*dg + svAD), log=TRUE) return(sum(S)) } # p is c(vs, g); with new parameterization of Stasis, theta goes away: dX ~ N(0, 2*omega) logL.Stasisgeo2<- function(p, y, site) { sn<- as.numeric(site) dg<- sign(abs(diff(sn))) # 1 if sample differs from site of previous, 0 if the same Va<- p[1] # omega, really Vg<- p[2] # geogr component of variance dy<- diff(y$mm) dt<- diff(y$tt) # not used b/c stasis model nd<- length(dy) sv<- y$vv/y$nn svA<- sv[1:nd] svD<- sv[2:(nd+1)] svAD<- svA + svD # model: expected change within same site: N(0,2*omega); across sites: N(0, 2*omega + G) S <- dnorm(x=dy, mean=rep(0,nd), sd=sqrt(2*Va + Vg*dg + svAD), log=TRUE) return(sum(S)) } opt.URWgeo2<- function(y, site, pool=TRUE, cl=list(fnscale= -1), meth="L-BFGS-B", hess=FALSE, model="URW") { #v0<- mle.URW(y) #if(v0 <= 1e-7) v0<- 1e-7 v0<- 1; g0<- 1 # v0 p0<- c(v0, g0) # initial par estimates #if(model=="URW") cl$parscale<-c(1,1) else cl$parscale<- c(1,1) cl$ndeps<- c(1e-6, 1e-6) names(p0)<- c("Va", "Vg") if(pool) y<- pool.var(y, ret.paleoTS=TRUE) # more limited optim options than usual if(model=="URW") w<- optim(p0, fn=logL.URWgeo2, y=y, site=site, method="L-BFGS-B", control=cl, lower=c(1e-9, 1e-9), hessian=hess) else{ w<- optim(p0, fn=logL.Stasisgeo2, y=y, site=site, method="L-BFGS-B", control=cl, lower=c(1e-9, 1e-9), hessian=hess) } if(hess) w$se<- sqrt(diag(-1*solve(w$hessian))) else w$se<- NULL #print(w) wc<- as.paleoTSfit(logL=w$value, parameters=w$par, modelName="URWgeo", method="AD", K=2, n=length(y$mm)-1, se=w$se) return(wc) } logL.Multgeo2<- function(p, yl, sitelist, model="URW") { Smult <- 0 nseq<- length(yl) if(model=="URW") { for (i in 1:nseq) Smult<- Smult + logL.URWgeo2(p, yl[[i]], sitelist[[i]]) } if (model=="Stasis") { for (i in 1:nseq) Smult<- Smult + logL.Stasisgeo2(p, yl[[i]], sitelist[[i]]) } return(Smult) } opt.Multgeo2<- function (yl, sitelist, pool=TRUE, cl=list(fnscale=-1), hess=FALSE, model="URW") { # get initial guess nseq<- length(yl) v0<- array(dim=nseq) for (i in 1:nseq) v0[i]<- mle.URW(yl[[i]]) v00<- median(v0) g0<- v00/10 p0<- c(v00,g0) names(p0)<- c("Va", "Vg") # limited optim options w<- optim(p0, fn=logL.Multgeo2, method="L-BFGS-B", control=cl, lower=c(1e-6, 1e-6), hessian=hess, yl=yl, sitelist=sitelist, model=model) # save as paleoTSfit n<- 0 for (i in 1:nseq) n<- n + length(yl[[i]]$mm)-1 wc<- as.paleoTSfit(logL=w$value, parameters=w$par, modelName="URWgeoMult", method="AD", K=2, n=n, se=w$se) return(wc) } #### phylo/PE functions logL.phygeo2<- function(p, x, meserr, Ca, Cc) { # parameters nt<- length(x) Va<- p[1] Vg<- p[2] Vc<- p[3] # prepare phylo calculations VVa<- Ca*Va # Anagenetic: Ca is VCV of tree VVc<- Cc*Vc # Cladogenetic: Cc is VCV of kappa tree VVg<- diag(rep(Vg), nrow(Ca)) # Geographic VVtot<- VVa + VVc + VVg + diag(meserr)^2 mm<- geiger:::phylogMean(VVtot, x) # root state ll<- dmvnorm(x, mean=rep(mm, nt), sigma=VVtot, log=TRUE) return(ll) } logL.PEgeo2<- function(p, tsList, treeList, siteList, Ca, Cc, model) { # parameters ntr<- length(tsList) Va<- p[1] Vg<- p[2] Vc<- p[3] # logl over each trait logLv<- array(dim=ntr) for (j in 1:ntr) { logL.ts<- logL.Multgeo2(p=c(Va, Vg), tsList[[j]], siteList, model=model) logL.tree<- logL.phygeo2(p=c(Va,Vg,Vc), x=treeList[[j]][,1], meserr=treeList[[j]][,2], Ca, Cc) logLv[j]<- logL.ts+logL.tree } return(sum(logLv)) } # this is the main function to be called by the user; it uses above functions as needed opt.PEgeo2<- function(tsList, treeList, siteList, phy, phyFull, p0=NULL, model=c("URW","Stasis"), hess=FALSE, prof=NULL) { model<- match.arg(model) # make sure number of traits match ntr<- length(tsList) if (length(treeList) != ntr) stop("ntr do not match") # initial parameter estimates if(is.null(p0)) p0<- c(0.5, 0.5, 1) # guesses! names(p0)<- c("Va", "Vg", "Vc") lb<- 1e-9 # lower bound # compute Ca and Cc phyK<- kappaTree(phyFull, kappa=0) Cc<- vcv(phyK)[phy$tip.label, phy$tip.label] # claogenetic vcv - includes speciations in full tree! if (model=="URW") {Ca<- vcv(phy); ps<- c(0.1,1,1)} # anagenetic vcv if (model=="Stasis") {Ca<- Cc; ps<- c(1,1,1)} # if Stasis within lineages # optimize w<- optim(p0, fn=logL.PEgeo2, method="L-BFGS-B", lower=rep(lb,3), hessian=hess, control=list(fnscale=-1, parscale=ps), tsList=tsList, treeList=treeList, siteList=siteList, Ca=Ca, Cc=Cc, model=model) #w<- optim(p0, fn=logL.PEgeo2, method="Nelder", hessian=hess, control=list(fnscale=-1, parscale=c(1,1,10)), tsList=tsList, treeList=treeList, siteList=siteList, Ca=Ca, Cc=Cc, model=model) if(hess) w$se<- sqrt(diag(-1*solve(w$hessian))) else w$se<- NULL if(!is.null(prof)){ profCI<- dirtyprof(prof, np=30, tsList, treeList, siteList, Ca, Cc, model, lim=w$val-3.907) w$profCI<- profCI } return(w) } # compute quick and dirty profile CIs dirtyprof<- function(prange, np, tsList, treeList, siteList, Ca, Cc, model, lim) { vas<- seq(prange[1,1], prange[2,1], length.out=np) vgs<- seq(prange[1,2], prange[2,2], length.out=np) vcs<- seq(prange[1,3], prange[2,3], length.out=np) ll<- array(dim=c(np^3, 4)) cc<- 1 for (i in 1:np) for (j in 1:np) for (k in 1:np) { pp<- c(vas[i], vgs[j], vcs[k]) ss<- logL.PEgeo2(pp, tsList, treeList, siteList, Ca, Cc, model) ll[cc,]<- c(pp, ss) cc<- cc+1 } ok<- ll[,4]>=lim llok<- ll[ok,] ci<- apply(llok, 2, range) colnames(ci)<- c("Va", "Vg", "Vc", "ll") ptok<- rep(1, nrow(ll)) ptok[ok]<- 2 pairs(llok[,1:3], pch=21, bg="red") #print(ci[,1:3]) return(ll) } sim.PEgeo4<- function(phy, phyFull, tsList, siteList, va,vc, vg=1, vp=1, ntr=6, model=c("URW", "Stasis")) # same as 3, but accounts for full phylogenetic tree -- NOT DONE YET { ## generate ts data model<- match.arg(model) nAD<- length(siteList) ntr<- length(tsList) Xad<- list() kk<- list() for (i in 1:ntr) { for (j in 1:nAD) kk[[j]]<- sim.GRWgeo(ns=length(siteList[[j]]), ms=0, vs=va, vp=vp, nn=tsList[[i]][[j]]$nn, tt=tsList[[i]][[j]]$tt, site=siteList[[j]], vg=vg, model=model) Xad[[i]]<- kk } fooURW<- function(x,l, vc, va, vg) x + rnorm(n=1, mean=0, sd=sqrt(vc + va*l + vg)) fooStasis<- function(x,l, vc, va, vg) x + rnorm(n=1, mean=0, sd=sqrt(vc + va + vg)) Xp<- list() NN<- 20 # approx. n within terminal tips nt<- Ntip(phy) for (j in 1:ntr) { err<- rep(vp/NN, nt) if(model=="URW") y<- rTraitCont(phyFull, model=fooURW, vc=vc, va=va, vg=vg) else y<- rTraitCont(phyFull, model=fooStasis, vc=vc, va=va, vg=vg) y<- y + rnorm(n=length(y), m=0, sd=sqrt(err)) # add sampling error ys<- y[phy$tip.label] Xp[[j]]<- cbind(ys, sqrt(err)) } return(list(Xad=Xad, Xp=Xp, phy=phy, phyFull=phyFull)) } sim.GRWgeo<- function(ns=10, ms=0, vs=1, vg=1, vp=1, nn=rep(20,ns), tt=0:(ns-1), site=factor(rep(c("A","B"),ns/2)), model) { # set-up MM <- array(dim = ns) mm <- array(dim = ns) vv <- array(dim = ns) dt <- diff(tt) sn<- as.numeric(site) dg<- sign(abs(diff(sn))) # simulate if(model=="URW") inc <- rnorm(ns-1, ms*dt, sqrt(vs*dt)) else inc <- rnorm(ns-1, 0, sqrt(vs)) ginc<- rnorm(ns-1, 0, sqrt(vg)) if(model=="URW") MM <- cumsum(c(0, inc + ginc*dg)) else MM <- c(0, inc) + cumsum(c(0,ginc*dg)) mm <- MM + rnorm(ns, 0, sqrt(vp/nn)) vv <- rep(vp, ns) gp <- c(ms, vs, vg) if(model=="URW") names(gp) <- c("mstep", "vstep", "Vg") else names(gp)<- c("mstep", "omega", "Vg") res <- as.paleoTS(mm = mm, vv = vv, nn = nn, tt = tt, MM = MM, genpars = gp, label = "Created by sim.GRWgeo()") return(res) } ## version of logL/opt PE that takes Va and Vc as given, and only searches over VG ## useful for plotting logL surface wrt Va and Vc logL.PEgeo2.VG<- function(p, Va, Vc, tsList, treeList, siteList, Ca, Cc, model) { # parameters ntr<- length(tsList) Vg<- p # logl over each trait logLv<- array(dim=ntr) for (j in 1:ntr) { logL.ts<- logL.Multgeo2(p=c(Va, Vg), tsList[[j]], siteList, model=model) logL.tree<- logL.phygeo2(p=c(Va,Vg,Vc), x=treeList[[j]][,1], meserr=treeList[[j]][,2], Ca, Cc) logLv[j]<- logL.ts+logL.tree } return(sum(logLv)) } opt.PEgeo2.VG<- function(Va, Vc, tsList, treeList, siteList, phy, phyFull, p0=NULL, model=c("URW","Stasis"), hess=FALSE, prof=NULL) { model<- match.arg(model) # make sure number of traits match ntr<- length(tsList) if (length(treeList) != ntr) stop("ntr do not match") # initial parameter estimates if(is.null(p0)) p0<- c(0.5) # guesses! names(p0)<- c("Vg") lb<- 1e-9 # lower bound # compute Ca and Cc phyK<- kappaTree(phyFull, kappa=0) Cc<- vcv(phyK)[phy$tip.label, phy$tip.label] # claogenetic vcv - includes speciations in full tree! if (model=="URW") {Ca<- vcv(phy); ps<- c(0.1,1,1)} # anagenetic vcv if (model=="Stasis") {Ca<- Cc; ps<- c(1,1,1)} # if Stasis within lineages # optimize w<- optimize(f=logL.PEgeo2.VG, interval=c(0,8), maximum=TRUE, tsList=tsList, treeList=treeList, siteList=siteList, Ca=Ca, Cc=Cc, model=model, Va=Va, Vc=Vc) }