fitContinuousMCMC_fossil <- function(phy, d, priorframe, model = c("BM", "ACDC", "SSP", "OU", "TREND"), Ngens = 1000000, sampleFreq = 1000, printFreq = 1000, startValues = "ML", propwidth = c(1,1,1,1,0.1), startingRate = c("tree"), RootIs =c("tree", "FossilNorm", "FossilUnif"), RootRange = NA, acdc.prior = NULL, outputName = "MCMCoutput") { # phy - phylogenetic tree # d - named vector of data for tip species # priorframe - a dataframe with four columns 1) tip 1 and 2) tip 2 spanning the node of interest, and 3) the mean and 4) the standard deviation for the range of data values for the node if(model == "BM" | model == "ACDC" | model == "SSP") { k <- 3 } else if(model=="OU" | model == "TREND") { k <- 4 } if (is.null(priorframe)) warning("no fossil priors have been specified. a standard model will be fit instead."); if(!is.data.frame(priorframe)) stop("the prior data need to be in a data frame"); fossil.prop.width <- priorframe[,4] * propwidth[5]; ## uses a fraction of the SD for each fossil distribution as the proposal width. if(!RootIs == "tree") {if(length(RootRange) < 2) {stop(" if using a fossil prior on the root, you must specify distribution parameters using the RootRange argument") }}; ft <- makeFossilTree(phy, priorframe); # make the fossil tree C <- vcv.phylo(ft); # obtain the covariance matrix for the fossil tree ## ACDC can posses a parameter space that is difficult to effectively explore. constraining the prior ## ## bounds based on the height of the tree is sometimes useful: ## if(model == "ACDC") { if(is.null(acdc.prior)) { scaler.prior <- ACDC.prior(phy); } else { scaler.prior <- acdc.prior; } } else { scaler.prior <- NULL; } ## prepare starting parameters and generate output for the MCMC ## starting<- initiate(phy,d, C, priorframe, model,startingRate, RootIs, RootRange, startValues, outputName, scaler.prior); params <- starting$params; paramslogLk <- starting$paramslogLk; output <- starting$output; ## set up the acceptance tracker and start the mcmc ## accept <- rep(0, k); for(ngen in 1:Ngens) { # start the mcmc props <- generateProposal(ngen, params, propwidth, model, scaler.prior, fossil.prop.width); # generate proposals propfd <- c(d, props$fossil); # pull out the fossil data and add it to the tipdata ProplogLk <- getBrownianLk(model,propfd, C, props); # get the proposal likelihood h <- lkratio(ProplogLk, paramslogLk) * prRatio(props$fossil, params$fossil, props$root, params$root, priorframe, RootIs, RootRange); # compute the acceptance probability p <- runif(1); # draw a threshold for acceptance if(h == Inf | is.na(h)) h <- 0; ## this catches "bad" (i.e. infinite) likelihoods ## if(h >= p) { ## do we accept or not? params <- props; paramslogLk <- ProplogLk; accept <- updateAcceptance(k, accept, ngen); } if (ngen %% sampleFreq == 0) { ## if on a sampling generation, retain parameters and likelihoods. cat(ngen, paramslogLk, as.numeric(na.omit(as.numeric(unlist(params)))), file = output, sep = "\t","\n"); } if (ngen %% printFreq == 0) { ## if on a print generation, print to screen. cat("Generation:", ngen, " ", accept/(ngen/k), "\n\n"); } } # end ngens close(output) return("fossiltree" = ft); } # end function ############################################################# makeFossilTree <- function(phy, priorframe){ extant <- length(phy$tip.label); root <- length(phy$tip.label) + 1; nfossils <- length(priorframe[,1]); fossilnodes <- numeric(nfossils); fossiltips <- seq(extant + 1, extant + nfossils); for (n in 1:nfossils) { fossilnodes[n] <- mrca(phy, priorframe[n,1], priorframe[n, 2]); } if(root %in% fossilnodes) stop("the root node cannot be in the prior matrix: use the RootIs argument to specify the root condition"); edgemat <- cbind(phy$edge, phy$edge.length); fossilmat <- cbind(fossilnodes, fossiltips, rep(0, nfossils)); newedgemat <- rbind(edgemat, fossilmat); colnames(newedgemat) <- NULL; for (i in 1:length(newedgemat[,1])) { newedgemat[i,1] <- newedgemat[i,1] + nfossils; } for(i in 1:(length(newedgemat[,2]) - nfossils)) { if(newedgemat[i, 2] > extant) { newedgemat[i, 2] <- newedgemat[i, 2] + nfossils } } newedgemat <-(newedgemat)[order(newedgemat[,1]),]; phy$tip.label <- c(phy$tip.label, paste("fossil", seq(1, length(priorframe[,1])), sep = "")); phy$edge <- newedgemat[,-3]; phy$edge.length <- newedgemat[,3]; return(multi2di(phy)); } ####################################################################################### updateFossildata <- function(fossildata, propwidth) { fossilprop <- apply(as.matrix(fossildata), 1, getProposal, psi = propwidth) return(fossilprop); } ####################################################################################### getProposal<-function(currentState, psi, bound = FALSE, min = NA, max = NA, prop.type = "sw") { # This function returns a proposal using a sliding window # Arguments; # currentState state <- the currentState state of the parameter; # psi <- a tuning parameter - here, some proportion of the standard deviation from the calibration step; # min <- the lower bound for the prior distribution; # max <- the upper bound for the prior distribution; # Value; # prop <- the proposal; if(prop.type == "sw") { prop <- currentState + ((runif(1) - 0.5) * psi); } else if(prop.type == "mlt") { u <- runif(1); prop <- currentState * exp((u-0.5)*psi) } if(bound == TRUE) { if(is.na(min) & is.na(max)) stop("you must specify limits if using a bounded proposal!") if(!is.na(min) & is.na(max)) max <- Inf; if(is.na(min) & !is.na(max)) min <- -Inf; if (prop < min) { prop <- min + (min - prop) } # if lower than lower bound, bounce back; if (prop > max) { prop <- max - (prop - max) } # if higher than upper bound, bounce back; } if(prop.type == "sw") return(prop); if(prop.type == "mlt") return(list(prop = prop, hastings = u)) } ############################################## ahat <- function(C, d) { N <-length(d); ones <- rep(1, N); X <- as.numeric(d); invc <- solve(C) ahat <- solve(ones %*% invc %*% ones) * (ones %*% invc %*% X)[1,1]; return(ahat); } ############################################## MLsigmasq <- function(C, d, root) { N <-length(d); X <- as.numeric(d); EX <- root; return(((t(X-EX)) %*% solve(C) %*% (X - EX)) / N) } ############################################## makeOutput <- function(model, Ngens, sampleFreq, fossildat, outputName) { if(model == "BM") param <- 3 else param <- 4; output <- matrix(data =NA, nrow = (Ngens / sampleFreq), ncol = 1+ length(fossildat) + param); output <- file(outputName, "w"); if(model == "BM") cat("generation", "logLk","root","sigmasq", names(fossildat), file = outputName, sep = "\t","\n"); if(model == "OU") cat("generation", "logLk","root","sigmasq","alpha", names(fossildat), file = outputName, sep = "\t","\n"); if(model == "ACDC") cat("generation", "logLk","root","sigmasq","r", names(fossildat), file = outputName, sep = "\t","\n"); if(model == "TREND") cat("generation", "logLk","root","sigmasq","mu", names(fossildat), file = outputName, sep = "\t","\n"); } ############################################## getBrownianLk <- function(model, d, C, params) { N <- length(d); # the number of tips d <- d[match(rownames(C),names(d))]; X <- as.numeric(d) # just renaming the data rate <- params$sigma root <- params$root V <- getVCV(C, model, params) if(model == "TREND") { EX <- root + (params$scaler * diag(C)) } else if(model =="OU") { EX <- (root * exp(-params$scaler* diag(C))) + (params$theta*(1-exp(-params$scaler* diag(C)))) } else { EX <- matrix(rep(params$root,N), ncol = 1); # column vector of root states } logL <- -t(X-EX) %*% solve(V) %*% (X-EX) / 2-N*log(2*pi) / 2-determinant(V)$modulus[1]/2; return(as.numeric(logL)); } ################################################ generateStartingValues <- function(phy, d, C, priorframe, model, startingRate, RootIs, RootRange,startValues, scaler.prior) { fossildat <- priorframe[,3]; names(fossildat) <- paste("fossil", seq(1:length(fossildat)), sep = ""); if(startValues == "random") { if(model == "BM") { return(list("root" = runif(1, min = (0.5*min(d)),max= (2*max(d))), "sigma" = runif(1, min = 0, max = 1), "scaler" = NA, "fossil" = fossildat)) }else if(model == "OU") { return(list("root" = ahat(vcv.phylo(phy), d), "sigma" = runif(1, min = 0, max = 1), "scaler" = runif(1),"theta" =ahat(vcv.phylo(phy), d), "fossil" = fossildat)) } else if(model == "SSP") { return(list("root" = ahat(vcv.phylo(phy), d), "sigma" = runif(1, min = 0, max = 1), "scaler" = runif(1), "fossil" = fossildat)) } else if(model == "ACDC") { return(list("root" = ahat(vcv.phylo(phy), d), "sigma" = runif(1), "scaler" = 0,"fossil" = fossildat)) } else if(model == "TREND"){ return(list("root" = ahat(vcv.phylo(phy), d), "sigma" = runif(1, min = 0, max = 1), "scaler" = runif(1),"fossil" = fossildat)) } } if(startValues == "ML") { if(RootIs == "tree") root <- ahat(vcv.phylo(phy), d); if(RootIs == "FossilNorm") root <- rnorm(1, mean = RootRange[1], sd = RootRange[2]); if(RootIs == "FossilUnif") root <- runif(1, min = RootRange[1], max = RootRange[2]); if(startingRate == "tree") { sigma <- as.numeric(MLsigmasq(vcv.phylo(phy), d, root)); } else { sigma <- startingRate; } if(model == "BM") { return(list("root" = root, "sigma" = sigma, "fossil" = fossildat)) }else if(model == "OU") { return(list("root" = root, "sigma" = sigma, "scaler" = runif(1,0,0.5),"theta" =ahat(vcv.phylo(phy), d), "fossil" = fossildat)) } else if(model == "SSP") { return(list("root" = root, "sigma" = sigma, "scaler" = runif(1,0,0.5), "fossil" = fossildat)) } else if(model == "ACDC") { return(list("root" = root, "sigma" = sigma, "scaler" = 0,"fossil" = fossildat)) } else if(model == "TREND"){ return(list("root" = root, "sigma" = sigma, "scaler" = runif(1, -0.5,0.5),"fossil" = fossildat)) } } } ################################################ generateProposal <- function(ngen, params, propwidth, model, scaler.prior = NULL, fossil.prop.width) { ## draws rate parameters on the ln scale so range from -inf to inf. draws r and mu from an exp space so 0 to inf pwRoot <- propwidth[1]; pwRate <- propwidth[2]; pwScale <- propwidth[3]; pwTheta <- propwidth[4] pwFossil <- propwidth[5]; if(model == "BM"){ if(ngen %% 3 == 1) params$root <- getProposal(params$root, pwRoot); if(ngen %% 3 == 2) params$sigma <- exp(getProposal(log(params$sigma), pwRate, bound= T, min = -Inf, max = Inf)); if(ngen %% 3 == 0) { #params$fossil <- rnorm(length(params$fossil), params$fossil, fossil.prop.width); fu <- fossil.update(model, ngen, params$fossil); params$fossil[fu] <- rnorm(1, params$fossil[fu],fossil.prop.width[fu]); } } else if (model == "TREND") { if(ngen %% 4 == 1) params$root <- getProposal(params$root, pwRoot); if(ngen %% 4 == 2) params$sigma <- exp(getProposal(log(params$sigma), pwRate, bound= T, min = -Inf, max = Inf)); if(ngen %% 4 == 3) params$scaler <- log(getProposal(exp(params$scaler), pwScale, bound = T, min = 0, max = Inf)); if(ngen %% 4 == 0) { fu <- fossil.update(model, ngen, params$fossil); params$fossil[fu] <- rnorm(1, params$fossil[fu],fossil.prop.width[fu]); } } else if(model =="ACDC") { if(ngen %% 3 == 1) params$root <- getProposal(params$root, pwRoot); if(ngen %% 3 == 2) { params$sigma <- exp(getProposal(log(params$sigma), pwRate, bound= T, min = -Inf, max = Inf)); params$scaler <- log(getProposal(exp(params$scaler), pwScale, bound = T, min = exp(scaler.prior$min), max = exp(scaler.prior$max))); } if(ngen %% 3 == 0) { fu <- fossil.update(model, ngen, params$fossil); params$fossil[fu] <- rnorm(1, params$fossil[fu],fossil.prop.width[fu]); } } else if (model == "SSP") { if(ngen %% 4 == 1) params$root <- getProposal(params$root, pwRoot); if(ngen %% 4 == 2) params$sigma <- exp(getProposal(log(params$sigma), pwRate, bound= T, min = -Inf, max = Inf)); if(ngen %% 4 == 3) params$scaler <- exp(getProposal(log(params$scaler), pwScale, bound = T, min = log(10^-5), max = Inf)); if(ngen %% 4 == 0) { fu <- fossil.update(model, ngen, params$fossil); params$fossil[fu] <- rnorm(1, params$fossil[fu],fossil.prop.width[fu]); } } else if(model == "OU") { if(ngen %% 5 == 1) params$root <- getProposal(params$root, pwRoot); if(ngen %% 5 == 2) params$sigma <- exp(getProposal(log(params$sigma), pwRate, bound= T, min = -Inf, max = Inf)); if(ngen %% 5 == 3) params$scaler <- exp(getProposal(log(params$scaler), pwScale, bound = T, min = log(10^-5), max = Inf)); if(ngen %% 5 == 4) params$theta <- getProposal(params$theta, pwTheta); if(ngen %% 5 == 0){ fu <- fossil.update(model, ngen, params$fossil); params$fossil[fu] <- rnorm(1, params$fossil[fu],fossil.prop.width[fu]); } } return(params); } ################################################ lkratio <- function(ProplogLk, paramslogLk) { return(exp(ProplogLk - paramslogLk)); } ################################################ fossildensity <- function(fossilValues, priorframe) { pd <- numeric(length(fossilValues)) for(i in 1: length(pd)) { m <- as.numeric(priorframe[i,3]); s <- as.numeric(priorframe[i,4]); pd[i] <- dnorm(as.numeric(fossilValues)[i], mean = m, sd = s, log = T); } return(pd) } ################################################ rootdensity <- function(rootValue, RootIs, RootRange){ if(RootIs == "tree") rootden <- log(1); if(RootIs == "FossilNorm") rootden <- dnorm(rootValue, mean = RootRange[1], sd = RootRange[2], log = T); if(RootIs == "FossilUnif") rootden <- dunif(rootValue, min = RootRange[1], max = RootRange[2], log = T); return(rootden); } ################################################ prRatio <- function(fossilproposal, fossilcurrentState, rootproposal, rootcurrentState, priorframe, RootIs, RootRange) { return(exp(sum(fossildensity(fossilproposal, priorframe), rootdensity(rootproposal, RootIs, RootRange)) - sum(fossildensity(fossilcurrentState, priorframe), rootdensity(rootcurrentState, RootIs, RootRange)))); } ################################################ updateAcceptance <- function(k, acceptance, ngen) { if(k == 3) { if(ngen %% 3 == 1) acceptance[1] <- acceptance[1] + 1; if(ngen %% 3 == 2) acceptance[2] <- acceptance[2] + 1; if(ngen %% 3 == 0) acceptance[3] <- acceptance[3] + 1; } else if (k == 4) { if(ngen %% 4 == 1) acceptance[1] <- acceptance[1] + 1; if(ngen %% 4 == 2) acceptance[2] <- acceptance[2] + 1; if(ngen %% 4 == 3) acceptance[3] <- acceptance[3] + 1; if(ngen %% 4 == 0) acceptance[4] <- acceptance[4] + 1; } return(acceptance); } #################################################### getVCV <- function(C, model, params) { if(model == "BM" | model == "TREND") { V <- C * params$sigma; } if(model == "ACDC") { if(!params$scaler==0) { C <- (exp(params$scaler*C) - 1) / params$scaler } V <- C * params$sigma; } if(model == "OU"| model == "SSP" ) { if(params$scaler == 0) { V <- C * params$sigma; } else { V <- geiger:::ouMatrix(C, params$scaler) #V <- (params$sigma / 2*params$scaler) * V V <- params$sigma * V } } return(V); } #################################################### posteriors <- function(output, hpds = c(50, 95, 99)){ minhpd <- (100 - hpds) / 2; maxhpd <- hpds + minhpd; hpds <- paste(hpds, "%", sep = ""); n <- ncol(output); mode <- numeric(n); credible <- matrix(nrow = n, ncol = 2); convergFail <- 0; for(i in 1:n) { x <- getDensList(output[,i]) post <- hdr(den = x); mode[i] <- post$mode; cred <- post$hdr[which(rownames(post$hdr)==hpds), ]; if(length(cred)>2) cred <- c(NA, NA) credible[i, ] <- cred; } names(mode) <- colnames(output); rownames(credible) <- colnames(output); colnames(credible) <- c(paste(minhpd, "%HPD", sep = ""), paste(maxhpd, "%HPD", sep = "")); if(sum(is.na(credible)) > 0) warning("one or more of the posteriors appear to be multimodal.You may need to run your mcmc longer"); return(cbind(mode, credible)); } #################################################### getDensList <- function(dat, from = NA, to = NA, cut = F) { ## a function to give you a list of x and y components of the density from the dens function - it just saves time if you're doing this over and over again. # Arguments # dat - a vector of data you want to know the density of (e.g. output from some mcmc sampler) # from, to, cut - by default NA but use if you want to restrict your posterior density characteristics to the prior range # Values # a list containing x (values) and y (densities) for the sample if(cut == F) { den <- density(dat); dl <- list("x" = den$x, "y" = den$y); } if(cut == T){ den <- density(dat, from = from, to = to, cut = T); dl <- list("x" = den$x, "y" = den$y); } return(dl) } ################################ mrca <- function(phy, tip1, tip2) { if(is.null(tip2)) { bb<-which(phy$tip.label==tip1); mrca<-parent.node(phy, bb); } else { nn <- phy$Nnode; nt <- length(phy$tip.label); minLeaves <- nt; mrca <- NULL; for(i in 1:nn) { leaves <- node.leaves(phy, i + nt); if(tip1 %in% leaves & tip2 %in% leaves) { ll <- length(leaves); if(ll < minLeaves) { mrca <- i + nt; minLeaves <- ll } } } } return(mrca); } ################################ harmonic.mean <- function(x) {return(1/ (mean(1/x)))} ################################ remove.burnin <- function(output, burninProp = 0.1) { if(is.matrix(output)| is.data.frame(output)) { nsamples <- nrow(output); burnin <- nsamples * burninProp; return(output[-seq(1,burnin), ]); } if(is.numeric(output)) { nsamples <- length(output); burnin <- nsamples * burninProp; return(output[-seq(1,burnin)]); } } ################################ BayesFactorTest <- function(output1, output2, burninProp, BF = c("bayesfactor","log10BF", "lnBF")) { # output1 = model of interest; #output2 = null model; #burninProp = proportion of generations to discard as burnin model1 <- remove.burnin(output1, burninProp); model2 <- remove.burnin(output2, burninProp); ml1 <- harmonic.mean(model1$logLk); ml2 <- harmonic.mean(model2$logLk); bf <- ml1 - ml2; if(BF == "bayesfactor") return(list("marginal likelihood of model 1" = ml1,"marginal likelihood of model 2" = ml2, "Bayes Factor" = exp(bf))); if(BF == "log10BF") return(list("marginal likelihood of model 1" = ml1,"marginal likelihood of model 2" = ml2, "log10(Bayes Factor)" = log(exp(bf), base = 10))); if(BF == "lnBF") return(list("marginal likelihood of model 1" = ml1,"marginal likelihood of model 2" = ml2, "ln(Bayes Factor)" = bf)); } ########################################## initiate <- function(phy, d, C, priorframe, model,startingRate, RootIs, RootRange, startValues, outputName, scaler.prior) { params <- generateStartingValues(phy, d, C, priorframe, model, startingRate, RootIs, RootRange, startValues, scaler.prior)# generate starting values for the mcmc fd <- c(d, params$fossil); # adds the fossil data to the tip data ## Create the output file with appropriate column headers for the specified model ## output <- file(outputName, "w"); if(model == "BM") cat("generation", "logLk","root","sigmasq", names(params$fossil), file = output, sep = "\t","\n"); if(model == "OU") cat("generation", "logLk","root","sigmasq","alpha", "theta", names(params$fossil), file = output, sep = "\t","\n"); if(model == "SSP") cat("generation", "logLk","root","sigmasq","alpha", names(params$fossil), file = output, sep = "\t","\n"); if(model == "ACDC") cat("generation","logLk","root","sigmasq","r", names(params$fossil), file = output, sep = "\t","\n"); if(model == "TREND") cat("generation", "logLk","root","sigmasq","mu", names(params$fossil), file = output, sep = "\t","\n"); ## get the likelihood for the starting parameters and add them to the output ## paramslogLk <- getBrownianLk(model, fd, C, params); cat(0, paramslogLk, as.numeric(na.omit(as.numeric(unlist(params)))), file = output, sep = "\t","\n"); return(list(params = params, paramslogLk = paramslogLk, output = output)); } ########################################## ACDC.prior <- function(phy, decrease.max = 1.0e-5, increase.max = 1.0e+5) { if(is.ultrametric(phy)) { max.bt <- max(branching.times(phy)) } else { max.bt <- max(BranchingTimesFossil(phy)); } prior.min <- log(decrease.max) / max.bt; prior.max <- log(increase.max) / max.bt; return(list(min = prior.min, max = prior.max)); ############################################## accpt.prob <- function(ProplogLk, paramslogLk, props, params, priorframe, RootIs, RootRange) { lkrat <- lkratio(ProplogLk, paramslogLk) prrat <- prRatio(props$fossil, params$fossil, props$root, params$root, priorframe, RootIs, RootRange); if(is.null(props$hastings)) { h <- lkrat * prrat } else { h <- lkrat * prrat * props$hastings; } return(h) } } ############################################## fossil.update <- function(model, ngen, fossildata) { if(model == "SSP" | model == "TREND") { fossil.k <- 4 } else if(model == "OU"){ fossil.k <- 5 } else { fossil.k <- 3; } nfossil <- length(fossildata); fossil.gen <- (ngen/fossil.k); fossil.update <- fossil.gen %% nfossil; if(fossil.update == 0) { fossil.update <- nfossil} return(fossil.update); } ############################################################################## fitContinuousMCMC <- function(phy, d, model = c("BM", "ACDC", "SSP", "TREND"), Ngens = 1000000, sampleFreq = 1000, printFreq = 1000, startValues = "ML", startingRate = NULL, propwidth = c(0.1, 0.1, 0.1), RootIs =c("tree", "FossilNorm", "FossilUnif"), acdc.prior = NULL, RootRange = NA, hpdInterval = 95, outputName = "MCMCoutput", restart = 100) { ### Preliminaries if(model=="TREND") { if(is.ultrametric(phy)) print("cannot fit a trend model to an ultrametric tree: a standard BM model will be fit instead"); model <- "BM" } if(model == "BM") k <- 2 else k <- 3; if((!RootIs == "tree")) if(length(RootRange) < 2) stop(" if using a fossil prior on the root, you must specify distribution parameters using the RootRange argument"); d <- d[match(phy$tip.label, names(d))]; C <- vcv.phylo(phy); if(model == "ACDC"){ if(is.null(acdc.prior)){ scaler.prior <- ACDC.prior(phy); }else{ scaler.prior <- acdc.prior; } } else { scaler.prior <- NULL; } starting<- initiate_nofossil(phy, d, C, model,startingRate, RootIs, RootRange, startValues, outputName, scaler.prior); params <- starting$params; paramslogLk <- starting$paramslogLk; output <- starting$output; #pb <- txtProgressBar(min = 0, max = Ngens, char = "-", style = 3); accept <- rep(0, k); for (ngen in 1: Ngens) { props <- Proposal(k, ngen, params, propwidth, model, scaler.prior); ## needs to change if rate prior is specified as normal proplk <- getBrownianLk(model,d, C, props); if(proplk== - Inf) { lkRatio <- 0 ; } else { lkRatio <- lkratio(proplk, paramslogLk); } if(lkRatio >= 1) { params <- props paramslogLk <- proplk accept <- mcmcAcceptance(k, accept, ngen); } if (lkRatio < 1) { p <- runif(1); if (p <= lkRatio) { params <- props paramslogLk <- proplk accept <- mcmcAcceptance(k, accept, ngen); } } if (ngen%% sampleFreq == 0) { ## if on a sampling generation, log parameters and likelihoods. cat(ngen, paramslogLk, as.numeric(na.omit(as.numeric(unlist(params)))), file = output, sep = "\t","\n"); } if (ngen %% printFreq == 0) { ## if on a print generation, print to screen. cat("Generation:", ngen, " ", accept/(ngen/k), "\n\n"); } if (ngen %% restart == 0) { if (accept[1] == 0) { starting<- initiate_nofossil(d, C, model,startingRate, RootIs, RootRange, startValues, outputName); params <- starting$params; paramslogLk <- starting$paramslogLk; output <- starting$output; accept <- rep(0, k); } } #setTxtProgressBar(pb, ngen); } # end of loop through ngens #close(pb); }## end of mcmc function ###################################################### ## other functions ## ###################################################### acceptanceProb <- function(lkRatio, rootState, rootProp,rootPrior, rootPriorType) { if (rootPriorType == "uniform") { acceptanceProb <- lkRatio; } if (rootPriorType == "normal") { acceptanceProb <- lkRatio * dnorm(rootProp, mean = rootPrior[1], sd = rootPrior[2]) / dnorm(rootState, mean = rootPrior[1], sd = rootPrior[2]); } return(acceptanceProb); } ################################################## getProposal<-function(currentState, psi, bound = FALSE, min = NA, max = NA) { # This function returns a proposal using a sliding window # Arguments; # currentState state <- the currentState state of the parameter; # psi <- a tuning parameter - here, some proportion of the standard deviation from the calibration step; # min <- the lower bound for the prior distribution; # max <- the upper bound for the prior distribution; # Value; # prop <- the proposal; prop <- currentState + ((runif(1) - 0.5) * psi); if(bound == TRUE) { if(is.na(min) & is.na(max)) stop("you must specify limits if using a bounded proposal!") if(!is.na(min) & is.na(max)) max <- Inf; if(is.na(min) & !is.na(max)) min <- -Inf; if (prop < min) { prop <- min + (min - prop) } # if lower than lower bound, bounce back; if (prop > max) { prop <- max - (prop - max) } # if higher than upper bound, bounce back; } return(prop); } ################################################## ahat <- function(C, d) { N <-length(d); ones <- rep(1, N); X <- d; ahat <- solve(ones %*% solve(C) %*% ones) * (ones %*% solve(C) %*% X)[1,1]; return(ahat); } #################################################### discardBurnin <- function(data, frac) { if(!is.matrix(data)) { print( "error - data is not in correct format") } burnin <- length(data[ ,1]) * frac data <- data[-c(1: burnin), ] return(data) } ##################################################### mrca<-function(phy, tip1, tip2) { if(is.null(tip2)) { bb<-which(phy$tip.label==tip1) mrca<-parent.node(phy, bb) } else { nn<-phy$Nnode nt<-length(phy$tip.label) minLeaves<-nt mrca<-NULL for(i in 1:nn) { leaves<-node.leaves(phy, i+nt) if(tip1 %in% leaves & tip2 %in% leaves) { ll<-length(leaves) if(ll n) l<-c(l, allBranches(phy, j)); } return(l); } ##################################################### inspectPosteriors <- function(output) { L <- length(output[1, ]); par(mfrow = c(1, L-1)); for(i in 2: L) { plot(output[,1], output[,i], type = "l", xlab = "generation", ylab = colnames(output)[i]) } } ##################################################### getDensList <- function(dat, from = NA, to = NA, cut = F) { if(cut == F) { den <- density(dat); dl <- list("x" = den$x, "y" = den$y); } if(cut == T){ den <- density(dat, from = from, to = to, cut = T); dl <- list("x" = den$x, "y" = den$y); } return(dl) } MLsigmasq <- function(C, d, root) { N <-length(d); X <- as.numeric(d); EX <- root; return(((t(X-EX)) %*% solve(C) %*% (X - EX)) / N) } StartingValues_nofossils <- function(phy, d, C, model, startingRate, RootIs, RootRange,startValues, scaler.prior) { if(startValues == "random") { if(model == "SSP") { return(list("root" = ahat(vcv.phylo(phy), d), "sigma" = runif(1, min = 0, max = 1), "scaler" = runif(1))) } if(model == "ACDC") { return(list("root" = ahat(vcv.phylo(phy), d), "sigma" = runif(1, 0, 1), scaler = 0)) } if(model == "BM") { return(list("root" = runif(1, min = (0.5*min(d)), max = (2*max(d))), "sigma" = runif(1, min = 0, max = 1), "scaler" = NA)) } } if(startValues == "ML") { if(RootIs == "tree") root <- ahat(C, d); if(RootIs == "FossilNorm") root <- rnorm(1, mean = RootRange[1], sd = RootRange[2]); if(RootIs == "FossilUnif") root <- runif(1, min = RootRange[1], max = RootRange[2]); } if(startingRate == "tree") { sigma <- as.numeric(MLsigmasq(C, d, root)); } else { sigma <- startingRate; } if(!model == "BM") scaler = 0.1 else scaler = NA; return(list("root" = root, "sigma" = sigma, "scaler" = scaler)); } ################################################ Proposal <- function(k, ngen, params, propwidth, model, scaler.prior) { ## draws rate parameters on the ln scale so range from -inf to inf. draws r and mu from an exp space so 0 to inf pwRoot <- propwidth[1]; pwRate <- propwidth[2]; pwScale <- propwidth[3]; ## only works for pure BM right now if(model == "BM"){ if(ngen %% 2 == 1) params$root <- getProposal(params$root, pwRoot); if(ngen %% 2 == 0) params$sigma <- exp(getProposal(log(params$sigma), pwRate, bound= T, min = -Inf, max = Inf)); } else if (!model == "BM") { if(ngen %% 3 == 1) params$root <- getProposal(params$root, pwRoot); if(ngen %% 3 == 2) params$sigma <- exp(getProposal(log(params$sigma), pwRate, bound= T, min = -Inf, max = Inf)); if(ngen %% 3 == 0) { if(model == "SSP") { params$scaler <- getProposal(params$scaler, pwScale, bound = T, min = 0);; } else { params$scaler <- log(getProposal(exp(params$scaler), pwScale, bound = T, min = exp(scaler.prior$min), max = exp(scaler.prior$max))); } } } return(params); } ################################################ mcmcAcceptance <- function(k, acceptance, ngen) { if(k == 2) { if(ngen %% 2 == 1) acceptance[1] <- acceptance[1] + 1; if(ngen %% 2 == 0) acceptance[2] <- acceptance[2] + 1; } else if (k == 3) { if(ngen %% 3 == 1) acceptance[1] <- acceptance[1] + 1; if(ngen %% 3 == 2) acceptance[2] <- acceptance[2] + 1; if(ngen %% 3 == 0) acceptance[3] <- acceptance[3] + 1; } return(acceptance); } ################################################## initiate_nofossil <- function(phy, d, C, model, startingRate, RootIs, RootRange, startValues, outputName, scaler.prior) { output <- file(outputName, "w"); if(model == "BM") cat("generation", "logLk","root","sigmasq",file = output, sep = "\t","\n"); if(model == "SSP") cat("generation", "logLk","root","sigmasq","alpha", file = output, sep = "\t","\n"); if(model == "ACDC") cat("generation","logLk","root","sigmasq","r", file = output, sep = "\t","\n"); if(model == "TREND") cat("generation", "logLk","root","sigmasq","mu", file = output, sep = "\t","\n"); params <- StartingValues_nofossils(phy, d, C, model, startingRate, RootIs, RootRange, startValues, scaler.prior)# generate starting values for the mcmc paramslogLk <- getBrownianLk(model, d, C, params); cat(0, paramslogLk, as.numeric(na.omit(as.numeric(unlist(params)))), file = output, sep = "\t","\n"); return(list(params = params, paramslogLk = paramslogLk, output = output)); } ############################################# find.node.fossils <- function(phy, tol) { ## A function to take a tree with extinct branches and find the fossils associated with nodes in the extant tree. association is determined based on tol - a time tolerance within which the fossils may fall from the node. #phy is the tree with all the fossils if(sum(phy$edge.length == 0)>0) { phy <- drop.tip(phy, phy$tip.label[phy$edge[which(phy$edge.length==0)[1],2]]) ## remove one of the zero length tips } extant.phy <- prune.extinct.taxa(phy); # this is the extant taxon tree bt <- branching.times(extant.phy); # branching times for the extant tree tiptimes <- tip.ages(phy); # returns the ages of the fossil and extant (0) tips nodes <- names(bt) fossiltips <- names(tiptimes)[-match(extant.phy$tip.label, names(tiptimes))]; # names of the fossil taxa; ###### this loop gets all the fossil data ###### fossils.for.nodes <- list(); tol <- tol; for(i in 2:length(nodes)) { current.node <- as.numeric(nodes[i]) current.node.age <- as.numeric(bt[match(current.node, names(bt))]); tips <- node.leaves(extant.phy,current.node); # gets all the daughter species from a node in the extant tree fossilnode <- mrca(phy, tips[1], tips[length(tips)]); #gets the same node from the fossil tree fossilparent <- parent.node(phy, fossilnode); # gives the parent node of the node fossilparent.leaves <- node.leaves(phy, fossilparent); #get all the descendents; putative.fossils <- intersect(fossilparent.leaves, fossiltips); fossil.times <- tiptimes[match(putative.fossils, names(tiptimes))]; # ages of putative fossils; if(length(fossil.times) > 0) { fossil.keepers <- numeric(); for(f in 1:length(fossil.times)) { if(abs(current.node.age - fossil.times[f]) < tol) { fossil.keepers <- append(fossil.keepers,names(fossil.times)[f]); } } } else { fossil.keepers <- 0; } fossils.for.nodes[[i]] <- fossil.keepers; names(fossils.for.nodes)[[i]] <- current.node; } keepers <- numeric() for(i in 1:length(fossils.for.nodes)) { if (is.character(fossils.for.nodes[[i]])) { keepers <- c(keepers, i) } } nodes.for.use <- list(); for(i in 1:length(keepers)){ nodes.for.use[[i]] <- fossils.for.nodes[[keepers[i]]]; names(nodes.for.use)[[i]] <- names(fossils.for.nodes)[[keepers[i]]] } return(nodes.for.use); } ########################################################################## tip.ages <- function (phy, tol = .Machine$double.eps^0.5) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") phy2 <- phy phy <- new2old.phylo(phy) tmp <- as.numeric(phy$edge) nb.tip <- max(tmp) nb.node <- -min(tmp) xx <- as.numeric(rep(NA, nb.tip + nb.node)) names(xx) <- as.character(c(-(1:nb.node), 1:nb.tip)) xx["-1"] <- 0 for (i in 2:length(xx)) { nod <- names(xx[i]) ind <- which(as.numeric(phy$edge[, 2]) == nod) base <- phy$edge[ind, 1] xx[i] <- xx[base] + phy$edge.length[ind] } depth <- max(xx) offset <- depth - xx[names(xx) > 0] names(offset) <- phy$tip.label; for(i in 1:length(offset)) { if(offset[i]<.Machine$double.eps^0.5) offset[i] <- 0; } return(offset); } ############################################################ parent.node <- function(phy, tip){ ## a function to return the parent node of a given tip # Arguments # phy <- a phylogenetic tree # tip <- a tip name # Value # a node number return(phy$edge[which(phy$edge[ ,2] == tip), 1]); } ######################################### mrca <- function(phy, tip1, tip2) { if(is.null(tip2)) { bb<-which(phy$tip.label==tip1); mrca<-parent.node(phy, bb); } else { nn <- phy$Nnode; nt <- length(phy$tip.label); minLeaves <- nt; mrca <- NULL; for(i in 1:nn) { leaves <- node.leaves(phy, i + nt); if(tip1 %in% leaves & tip2 %in% leaves) { ll <- length(leaves); if(ll < minLeaves) { mrca <- i + nt; minLeaves <- ll } } } } return(mrca); } ######################################### make.fossil.datamatrix <- function(phy, nodefossils, d) { nodes <- names(nodefossils) # the names of the nodes identified above means <- numeric(length(nodes)) sds <- numeric(length(nodes)) for(i in 1:length(nodes)) { means[i] <- mean(d[match(nodefossils[[i]], names(d))]) sds[i] <- sd(d[match(nodefossils[[i]], names(d))]) } singlefossils <- which(is.na(sds)) if(length(singlefossils)>0) { sds[singlefossils] <- mean(sds, na.rm = T); } phy <- prune.extinct.taxa(phy); leaves <- lapply(nodes, node.leaves, phy = phy); foo1 <- function(x) { return(x[1]) } foo2 <- function(x) { return(x[length(x)]) } tip1 <- unlist(lapply(leaves, foo1)); tip2 <- unlist(lapply(leaves, foo2)); return(cbind(tip1, tip2, means, sds)); } ############################################# fossiltree.N.extant <- function(lambda, mu, ntipsreq) { while(1) { # this loop will simulate birth death trees using the geiger function and stop when a tree with the right number of extant tips has been found birthdeath.tree(b = lambda, d = mu, taxa.stop = ntipsreq+1) -> t t2 <- prune.extinct.taxa(t); if(sum(t2$edge.length==0)>0) { t2 <- drop.tip(t2, t2$tip.label[t2$edge[which(t2$edge.length==0),2][1]]); } if(length(t2$tip.label)==ntipsreq) break } return("phy" = drop.tip(t, t$tip.label[t$edge[which(t$edge.length==0),2][1]])); } ############################################# # Simulates BM evolution more quickly. # A trend can be simulated by mu!=0. # mu=0 is standard BM; mu<0 downward trend; mu>0 upward trend. # Bounds can be simulated by bounds=c(>-Inf, bounds[1]. Simulating without bounds.") bounds<-c(-Inf,Inf) } if(bounds[1]==-Inf&&bounds[2]==Inf) no.bounds=TRUE else no.bounds=FALSE if(abounds[2]){ warning("a must be bounds[1] 0. Setting sig2 to 1.0.") sig2=1.0 } # function for reflection off bounds reflect<-function(yy,bounds){ while(yybounds[2]){ if(yybounds[2]) yy<-2*bounds[2]-yy } return(yy) } # how many species? n<-length(tree$tip) # first simulate changes along each branch x<-matrix(data=rnorm(n=length(tree$edge.length)*nsim,mean=rep(mu*tree$edge.length,nsim),sd=rep(sqrt(sig2*tree$edge.length),nsim)),length(tree$edge.length),nsim) # now add them up y<-array(0,dim=c(nrow(tree$edge),ncol(tree$edge),nsim)) for(i in 1:nrow(x)){ if(tree$edge[i,1]==(n+1)) y[i,1,]<-a else y[i,1,]<-y[match(tree$edge[i,1],tree$edge[,2]),2,] y[i,2,]<-y[i,1,]+x[i,] if(!no.bounds) y[i,2,]<-apply(as.matrix(y[i,2,]),1,function(yy) reflect(yy,bounds)) } rm(x); x<-matrix(data=rbind(y[1,1,],as.matrix(y[,2,])),length(tree$edge.length)+1,nsim) rownames(x)<-c(n+1,tree$edge[,2]) x<-as.matrix(x[as.character(1:(n+tree$Nnode)),]) rownames(x)[1:n]<-tree$tip.label # return simulated data if(internal==TRUE) return(x[1:nrow(x),]) # include internal nodes else return(x[1:length(tree$tip.label),]) # tip nodes only } extantMRCA.State <- function(phy, d) { node.d <- d[-match(phy$tip.label, names(d))] d <- d[match(phy$tip.label, names(d))] extant.tips <- prune.extinct.taxa(phy)$tip.label extant.root <- match(mrca(phy, extant.tips[1], extant.tips[length(extant.tips)]), names(node.d)) return(as.numeric(node.d[extant.root])); } ########################################################################### ACDC.Sim <- function(tree, sigma, r, root, nsim = 1) { if(is.ultrametric(tree)) { bt <- branching.times(tree) } else if(!is.ultrametric(tree)) { bt <- btfossils(tree); bt <- bt[-match(tree$tip.label, names(bt))] } new.edge <- tree$edge.length names(bt) <- as.numeric(names(bt)) for (i in 1:length(tree$edge.length)) { bl <- tree$edge.length[i] age = bt[which(names(bt) == tree$edge[i, 1])] t1 = max(bt) - age t2 = t1 + bl new.edge[i] = (exp(r * t2) - exp(r * t1))/(r) } tree -> phy phy$edge.length <- new.edge d <- fastBM(phy, a = root, sig2 = sigma, internal = TRUE); tips <- d[match(tree$tip.label, names(d))]; nodes <- d[-match(tree$tip.label, names(d))] return(list(nodes = nodes, tips = tips)) } ############################################# fitContinuous_fossil <- function(phy, data, data.names = NULL, model = c("BM", "OU", "lambda", "kappa", "delta", "EB", "white", "trend"), bounds = NULL, meserr = NULL) { model <- match.arg(model) td <- treedata(phy, data, data.names, sort = T) ntax = length(td$phy$tip.label) if (is.null(meserr)) { me = td$data me[] = 0 meserr = me } else if (length(meserr) == 1) { me = td$data me[] = meserr meserr = me } else if (is.vector(meserr)) { if (!is.null(names(meserr))) { o <- match(rownames(td$data), names(meserr)) if (length(o) != ntax) stop("meserr is missing some taxa from the tree") meserr <- as.matrix(meserr[o, ]) } else { if (length(meserr) != ntax) stop("No taxon names in meserr, and the number of taxa does not match the tree") me <- td$data me[] = meserr meserr = me } } else { if (!is.null(rownames(meserr))) { o <- match(rownames(td$data), rownames(meserr)) meserr = meserr[o, ] } else { if (sum(dim(meserr) != dim(td$data)) != 0) stop("No taxon names in meserr, and the number of taxa does not match the tree") print("No names in meserr; assuming that taxa are in the same order as tree") } } ds <- list() ds$tree <- td$phy cat("Fitting ", model, "model:\n") bounds.default <- matrix(c(1e-08, 20, 1e-07, 1, 1e-06, 1, 1e-05, 2.999999, 1e-07, 50, -3, 0, 1e-10, 100, -100, 100), nrow = 8, ncol = 2, byrow = TRUE) rownames(bounds.default) <- c("beta", "lambda", "kappa", "delta", "alpha", "a", "nv", "mu") colnames(bounds.default) <- c("min", "max") if (is.null(bounds)) { bounds <- bounds.default } else { if (class(bounds) != "list") { stop("Please specify user defined parameter bounds as a list()") } else { specified <- !c(is.null(bounds$beta), is.null(bounds$lambda), is.null(bounds$kappa), is.null(bounds$delta), is.null(bounds$alpha), is.null(bounds$a), is.null(bounds$nv), is.null(bounds$mu)) bounds.user <- matrix(c(bounds$beta, bounds$lambda, bounds$kappa, bounds$delta, bounds$alpha, bounds$a, bounds$nv, bounds$mu), nrow = sum(specified), ncol = 2, byrow = TRUE) rownames(bounds.user) <- c("beta", "lambda", "kappa", "delta", "alpha", "a", "nv", "mu")[specified] colnames(bounds.user) <- c("min", "max") bounds <- bounds.default bounds[specified, ] <- bounds.user } } ds$bounds <- data.frame(t(bounds)) ds$model <- model result <- list() for (i in 1:ncol(td$data)) { ds$data = td$data[, i] ds$meserr = meserr[, i] result[[i]] <- fitContinuousModel_fossil(ds, print = print) if (!is.null(colnames(td$data))) names(result)[i] <- colnames(td$data)[i] else names(result)[i] <- paste("Trait", i, sep = "") } result } fitContinuousModel_fossil <- function (ds, print = TRUE) { bounds <- ds$bounds model <- ds$model n <- length(ds$data) beta.start <- var(ds$data)/max(branching.times(ds$tree)) out <- NULL y <- ds$data tree <- ds$tree meserr <- ds$meserr n <- length(y) if (model == "BM") { k <- 2 vcv <- vcv.phylo(tree) start = log(beta.start) lower = log(bounds[1, "beta"]) upper = log(bounds[2, "beta"]) foo <- function(x) { vv <- exp(x) * vcv diag(vv) <- diag(vv) + meserr^2 mu <- phylogMean(vv, y) mu <- rep(mu, n) -dmvnorm(y, mu, vv, log = T) } o <- optim(foo, p = start, lower = lower, upper = upper, method = "L") results <- list(lnl = -o$value, beta = exp(o$par)) } else if (model == "lambda") { k <- 3 start = log(c(beta.start, 0.5)) lower = log(bounds[1, c("beta", "lambda")]) upper = log(bounds[2, c("beta", "lambda")]) foo <- function(x) { vcv <- vcv.phylo(tree) index <- matrix(TRUE, n, n) diag(index) <- FALSE vcv[index] <- vcv[index] * exp(x[2]) vv <- exp(x[1]) * vcv diag(vv) <- diag(vv) + meserr^2 mu <- phylogMean(vv, y) mu <- rep(mu, n) -dmvnorm(y, mu, vv, log = T) } o <- optim(foo, p = start, lower = lower, upper = upper, method = "L") results <- list(lnl = -o$value, beta = exp(o$par[1]), lambda = exp(o$par[2])) } else if (model == "kappa") { k <- 3 start = log(c(beta.start, 0.5)) lower = log(bounds[1, c("beta", "kappa")]) upper = log(bounds[2, c("beta", "kappa")]) foo <- function(x) { t <- kappaTree(tree, kappa = exp(x[2])) vcv <- vcv.phylo(t) vv <- exp(x[1]) * vcv diag(vv) <- diag(vv) + meserr^2 mu <- phylogMean(vv, y) mu <- rep(mu, n) -dmvnorm(y, mu, vv, log = T) } o <- optim(foo, p = start, lower = lower, upper = upper, method = "L") results <- list(lnl = -o$value, beta = exp(o$par[1]), lambda = exp(o$par[2])) } else if (model == "delta") { k <- 3 start = log(c(beta.start, 0.5)) lower = log(bounds[1, c("beta", "delta")]) upper = log(bounds[2, c("beta", "delta")]) foo <- function(x) { t <- deltaTree(tree, delta = exp(x[2])) vcv <- vcv.phylo(t) vv <- exp(x[1]) * vcv diag(vv) <- diag(vv) + meserr^2 mu <- phylogMean(vv, y) mu <- rep(mu, n) -dmvnorm(y, mu, vv, log = T) } o <- optim(foo, p = start, lower = lower, upper = upper, method = "L") results <- list(lnl = -o$value, beta = exp(o$par[1]), delta = exp(o$par[2])) } else if (model == "white") { k <- 2 start = c(mean(y), log(var(y))) lower = c(-Inf, log(bounds[1, "nv"])) upper = c(Inf, log(bounds[2, "nv"])) lnl.noise <- function(p, x, se) { root <- p[1] vs <- exp(p[2]) n <- length(x) VV <- diag(vs, nrow = n) diag(VV) <- diag(VV) + se^2 M <- rep(root, n) -dmvnorm(x, M, VV, log = TRUE) } o <- optim(start, fn = lnl.noise, x = y, se = meserr, lower = lower, upper = upper, method = "L") results <- list(lnl = -o$value, mean = o$par[1], nv = exp(o$par[2])) } else if (model == "trend") { k <- 3 vcv <- vcv.phylo(tree) ww <- lm(y ~ diag(vcv)) p0 <- c(phylogMean(vcv, y), var(y)/max(branching.times(tree)), coef(ww)[2]) if (is.na(p0[3])) { p0[3] <- 0 if (is.ultrametric(tree)) cat("WARNING: Cannot estimate a trend with an ultrametric tree; lnl will be the same as the BM model") } lower = c(-Inf, log(bounds[1, "beta"]), bounds[1, "mu"]) upper = c(Inf, log(bounds[2, "beta"]), bounds[2, "mu"]) lnl.BMtrend <- function(p, vcv, x, se) { root <- p[1] vs <- exp(p[2]) ms <- p[3] VV <- vs * vcv diag(VV) <- diag(VV) + se^2 n <- length(x) M <- root + ms * diag(vcv) -dmvnorm(x, M, VV, log = TRUE) } o <- optim(p0, fn = lnl.BMtrend, vcv = vcv, x = y, se = meserr, lower = lower, upper = upper, method = "L") names(o$par) <- NULL results <- list(lnl = -o$value, mean = o$par[1], beta = exp(o$par[2]), mu = o$par[3]) } else if (model == "OU") { k <- 3 start = log(c(beta.start, 0.5)) lower = log(bounds[1, c("beta", "alpha")]) upper = log(bounds[2, c("beta", "alpha")]) vcvOrig <- vcv.phylo(tree) foo <- function(x) { vcv <- geiger:::ouMatrix(vcvOrig, exp(x[2])) vv <- exp(x[1]) * vcv diag(vv) <- diag(vv) + meserr^2 mu <- phylogMean(vv, y) mu <- rep(mu, n) -dmvnorm(y, mu, vv, log = T) } outTries <- list() start = c(log(beta.start), -50) outTries[[1]] <- optim(foo, p = start, lower = lower, upper = upper, method = "L") tv <- var(y) start = log(c(tv * 2000, 1000)) outTries[[2]] <- optim(foo, p = start, lower = lower, upper = upper, method = "L") for (i in 1:10) { while (1) { lower = c(runif(2, min = -20, max = -1)) upper = lower + runif(2, min = 0, max = 10) start = c(runif(1, min = lower[1], max = upper[1]), runif(1, min = lower[2], max = upper[2])) te <- try(outTries[[i + 2]] <- optim(foo, p = start, lower = lower, upper = upper, method = "L"), silent = T) if (class(te) != "try-error") break } } atry <- -5:4 stry <- log(tv * 2 * exp(atry)) for (i in 1:10) { while (1) { lower = c(-20, -20) upper = c(10, 10) start = c(stry[i], atry[i]) te <- try(outTries[[i + 12]] <- optim(foo, p = start, lower = lower, upper = upper, method = "L"), silent = T) if (class(te) != "try-error") break } } ntries <- 22 ltry <- numeric(ntries) lsol <- matrix(nrow = ntries, ncol = 2) for (j in 1:ntries) { ltry[j] <- outTries[[j]]$value lsol[j, ] <- exp(outTries[[j]]$par) } ltd <- ltry - min(ltry) b <- min(which(ltry == min(ltry))) gc <- which(ltd < 0.01) us <- lsol[gc, 1] usc <- sum((us - min(us)) > 0.01) out <- outTries[[b[1]]] if (usc > 1) { out$message = "Warning: likelihood surface is flat." } if (out$convergence != 0) { out$message = "Warning: may not have converged to a proper solution." } results <- list(lnl = -out$value, beta = exp(out$par[1]), alpha = exp(out$par[2]), convergence = out$convergence, message = out$message, k = k) } else if (model == "EB") { k <- 3 start = c(log(beta.start), 0.01) lower = c(log(bounds[1, "beta"]), bounds[1, "a"]) upper = c(log(bounds[2, "beta"]), bounds[2, "a"]) foo <- function(x) { t <- exponentialchangeTree_fossil(tree, a = x[2]) vcv <- vcv.phylo(t) vv <- exp(x[1]) * vcv diag(vv) <- diag(vv) + meserr^2 mu <- phylogMean(vv, y) mu <- rep(mu, n) -dmvnorm(y, mu, vv, log = T) } o <- optim(foo, p = start, lower = lower, upper = upper, method = "L") results <- list(lnl = -o$value, beta = exp(o$par[1]), a = o$par[2]) } results$aic <- 2 * k - 2 * results$lnl results$aicc <- 2 * k * (n - 1)/(n - k - 2) - 2 * results$lnl results$k <- k return(results) } ########################################################################## BranchingTimesFossil <- function (phy, tol = .Machine$double.eps^0.5) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") phy2 <- phy phy <- new2old.phylo(phy) tmp <- as.numeric(phy$edge) nb.tip <- max(tmp) nb.node <- -min(tmp) xx <- as.numeric(rep(NA, nb.tip + nb.node)) names(xx) <- as.character(c(-(1:nb.node), 1:nb.tip)) xx["-1"] <- 0 for (i in 2:length(xx)) { nod <- names(xx[i]) ind <- which(as.numeric(phy$edge[, 2]) == nod) base <- phy$edge[ind, 1] xx[i] <- xx[base] + phy$edge.length[ind] } bt <- abs(xx - max(xx)); for(i in 1:length(bt)) { if(bt[i]<.Machine$double.eps^0.5) bt[i] <- 0; } names(bt) <- c(seq(nb.tip+1, nb.tip+nb.node), phy$tip.label) return(bt[-match(phy$tip.label, names(bt))]); } ############################################################ exponentialchangeTree_fossil <- function (phy, endRate = NULL, a = NULL) { if (is.null(a) && is.null(endRate)) stop("Must supply either endRate or a") times <- BranchingTimesFossil(phy) d <- max(times) if (is.null(a)) a <- log(endRate)/d if (a == 0) return(phy) names(times) <- (as.numeric(names(times))) for (i in 1:length(phy$edge.length)) { bl <- phy$edge.length[i] age = times[which(names(times) == phy$edge[i, 1])] t1 = max(times) - age t2 = t1 + bl phy$edge.length[i] = (exp(a * t2) - exp(a * t1))/(a) } phy } phylogMean <- function (phyvcv, data) { o <- rep(1, length(data)) ci <- solve(phyvcv) m1 <- solve(t(o) %*% ci %*% o) m2 <- t(o) %*% ci %*% data return(m1 %*% m2) } ############################################################ ########################################################################### OU.Sim <- function(tree, sigma, alpha, root, optimum) { traits <- numeric(length(tree$edge.length)+1); TE <- tree$edge.length traits[1] <- root; names(traits) <- c(tree$edge[1,1], tree$edge[,2]); vars <- (1-exp(-2*alpha*tree$edge.length)) * (sigma/ (2*alpha)); for(i in 2:length(traits)) { parent <- tree$edge[as.numeric(i-1),][1] traits[i] <- rnorm(1, mean = (traits[which(names(traits)==parent)]* exp(-(alpha*TE[i-1]))) + ( optimum * (1-exp(-(alpha*TE[i-1])))), sd = sqrt(vars[i-1])); } nodes <- traits[match(names(branching.times(tree)), names(traits))]; tips <- traits[-match(names(branching.times(tree)), names(traits))]; names(tips) <- tree$tip.label; return(list(nodes = nodes, tips = tips)) }