###################### Fitting models of trait evolution of caniform ###################### ###################### carnivore body size with and without fossil info ###################### ###################### PRELIMINARIES ###################### ## make sure working directory is set to where scripts and data are placed. Output files will be written to that directory also. setwd("yourworkingdirectoryhere"); ## first, source the fitContinuousMCMC code. The version provided here is the working version used to conduct analyses done in the manuscript and should lead to comparable results. A refined version with more options and user control will be posted to the Harmon lab code page (http://www.webpages.uidaho.edu/~lukeh/software/index.html) and ultimately will be incorporated into geiger ## source("fitContinousMCMC.R"); phy <- read.tree("caniform.phy"); d <- read.csv("caniform_mass.csv", row.names = 1); priors <- read.csv("priors.csv"); # optionally plot phylogeny to take a look # #plot(ladderize(phy), cex = 0.5); axisPhylo(); ## ensure non-overlapping species are removed from both datasets (the body size data contains all carnivoran entries from panTheria) ## td <- treedata(phy, d); phy <- td$phy; d <- td$data; ## data need to be in named vector form ## extant.dat <- d[,1]; names(extant.dat) <- rownames(d); ## the priors are currently not in the correct format ## head(priors); ## for this code we require a dataframe with four columns, the first two defining the node to be targetted and the second two providing the mean and sd for the normal distribution. Note also that the root node prior is treated separately and should be removed ## root.prior <- c(priors[1,4], priors[1,5]) node.priors <- priors[-1,-c(1,6)]; root.prior node.priors ### this looks good. now we're ready to start analyses ## ###################### ANALYSES ###################### ## first we run analyses without fossil priors on any nodes. This is equivalent to a straight MCMC version of fitContinuous. Note that because no fossils are available and the tree is ultrametric, Trend cannot be fitted ## fitContinuousMCMC(phy, extant.dat, model = "BM", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "BMnofossils.txt", propwidth = c(2,1,1), RootIs = "tree", RootRange = NA, startValues = "ML", startingRate = "tree"); fitContinuousMCMC(phy, extant.dat, model = "ACDC", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "ACDCnofossils.txt", propwidth = c(2,1,0.1), RootIs = "tree", RootRange = NA, startValues = "ML", startingRate = "tree"); fitContinuousMCMC(phy, extant.dat, model = "SSP", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "SSPnofossils.txt", propwidth = c(2,1,0.1), RootIs = "tree", RootRange = NA, startValues = "ML", startingRate = "tree") ## now that the analyses are run, we we can read the output files back in and perform model selection. bm <- remove.burnin(read.table("BMnofossils.txt", header = T)); ssp <- remove.burnin(read.table("SSPnofossils.txt", header = T)); ac <- remove.burnin(read.table("ACnofossils.txt", header = T)); ## remove burn-in defaults at 10% - this seems fine ## ## we'll use the harmonic mean estimator of the marginal likelihood ## bm.lk <- harmonic.mean(bm$logLk); ssp.lk <- harmonic.mean(ssp$logLk); ac.lk <- harmonic.mean(ac$logLk); ## and Bayes factors can be computed, using BM as a null model. the other two models both hold BM as a special case ## bfssp <- BayesFactorTest(ssp, bm, 0, "log10BF")[[3]]; bfac <- BayesFactorTest(ac, bm, 0, "log10BF")[[3]]; ## table up the results ## res <- cbind(c(bm.lk, "-", ssp.lk, ac.lk), c("-", "-", 2*bfssp, 2*bfac)); colnames(res) <- c("marginal lk", "2*logBF"); rownames(res) <- c("BM", "Trend", "SSP", "ACDC"); pp <- c("-", "-", "-",length(which(ac$r>0)) / nrow(ac) ); ## this line computes the posterior probability that the non-BM parameter for ACDC is greater than 0 (i.e. = AC). The pp of DC is 1-ppAC. If BM is true, these should both be ~0.5 res <- cbind(res, pp); ## look at them and write them to a text file ## res; write.table(res, "results_nofossils.txt",quote = FALSE, sep = "\t"); ### Now we'll try using informative fossil priors on internal nodes in the phylogeny but leave an uniformative prior on the root. Trend can be added now #### fitContinuousMCMC_fossil(phy = phy, d = extant.dat, prior.frame = node.priors, model = "BM", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "BMnoroot.txt", propwidth = c(1,1,1,1,1), RootIs = "tree", RootRange = NA, startValues = "ML"); fitContinuousMCMC_fossil(phy, extant.dat, node.priors, model = "SSP", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "SSPnoroot.txt", propwidth = c(1,1,0.1,1,1), RootIs = "tree", RootRange = NA, startValues = "ML"); fitContinuousMCMC_fossil(phy, extant.dat, node.priors, model = "TREND",Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "trendnoroot.txt", propwidth = c(1,1,0.1,1,1), RootIs = "tree", RootRange = NA, startValues = "ML"); fitContinuousMCMC_fossil(phy, extant.dat, node.priors, model = "ACDC", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "ACDCnoroot.txt", propwidth = c(1,1,0.05,1,1), RootIs = "tree", RootRange = NA, startValues = "ML") bm <- remove.burnin(read.table("BMnoroot.txt", header = T)); tr <- remove.burnin(read.table("trendnoroot.txt", header = T)); ssp <- remove.burnin (read.table("SSPnoroot.txt", header = T)); ac <- remove.burnin(read.table("ACDCnoroot.txt", header = T)); #ou <- read.table("OUroot.txt", header = T); ## as before, we can read the results back in, remove burnin and peform model selection with the marginal likelihoods ## bm.lk <- harmonic.mean(bm$logLk); tr.lk <- harmonic.mean(tr$logLk); ssp.lk <- harmonic.mean(ssp$logLk); ac.lk <- harmonic.mean(ac$logLk); bftr <- BayesFactorTest(tr, bm, 0, "log10BF")[[3]]; bfssp <- BayesFactorTest(ssp, bm, 0, "log10BF")[[3]]; bfac <- BayesFactorTest(ac, bm, 0, "log10BF")[[3]]; res <- cbind(c(bm.lk, tr.lk, ssp.lk, ac.lk), c("-", 2*bftr, 2*bfssp, 2*bfac)); colnames(res) <- c("marginal lk", "2*logBF"); rownames(res) <- c("BM", "Trend", "SSP", "ACDC"); pp <- c("-", length(which(tr$mu>0)) / nrow(tr), "-",length(which(ac$r>0)) / nrow(ac)); ## this line computes the pp for ACDC and Trend as above. res <- cbind(res, pp); res; write.table(res, "results_noRoot.txt",quote = FALSE, sep = "\t"); #### Finally , we repeat analyses with informative priors on internal nodes and on the root node ##### fitContinuousMCMC_fossil(phy, extant.dat, priors, model = "BM", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "BMroot.txt", propwidth = c(1,1,1,1,1), RootIs = "FossilNorm", RootRange = root.prior, startValues = "ML"); fitContinuousMCMC_fossil(phy, extant.dat, priors, model = "SSP", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "SSProot.txt", propwidth = c(1,1,0.1,1,1), RootIs = "FossilNorm", RootRange = root.prior, startValues = "ML"); fitContinuousMCMC_fossil(phy, extant.dat, priors, model = "TREND",Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "trendroot.txt", propwidth = c(1,1,0.1,1,1), RootIs = "FossilNorm", RootRange = root.prior, startValues = "ML"); fitContinuousMCMC_fossil(phy, extant.dat, priors, model = "ACDC", Ngens = 500000, sampleFreq = 100, printFreq=1000, outputName = "ACDCroot.txt", propwidth = c(1,1,0.05,1,1), RootIs = "FossilNorm", RootRange = root.prior, startValues = "ML"); bm <- remove.burnin(read.table("BMroot.txt", header = T)); tr <- remove.burnin(read.table("trendroot.txt", header = T)); ssp <- remove.burnin(read.table("SSProot.txt", header = T)); ac <- remove.burnin(read.table("ACDCroot.txt", header = T)); bm.lk <- harmonic.mean(bm$logLk); tr.lk <- harmonic.mean(tr$logLk); ssp.lk <- harmonic.mean(ssp$logLk); ac.lk <- harmonic.mean(ac$logLk); bftr <- BayesFactorTest(tr, bm, 0, "log10BF")[[3]]; bfssp <- BayesFactorTest(ssp, bm, 0, "log10BF")[[3]]; bfac <- BayesFactorTest(ac, bm, 0, "log10BF")[[3]]; res <- cbind(c(bm.lk, tr.lk, ssp.lk, ac.lk), c("-", 2*bftr, 2*bfou, 2*bfac)); colnames(res) <- c("marginal lk", "2*logBF"); rownames(res) <- c("BM", "Trend", "SSP", "ACDC") pp <- c("-", length(which(tr$mu>0)) / nrow(tr), "-",length(which(ac$r>0)) / nrow(ac)); res <- cbind(res, pp); res; write.table(res, "results_withRoot.txt",quote = FALSE, sep = "\t"); ######## end of analyses conducted on caniform dataset ########