--- title: "BAMM_empirical" author: "David Cerny" date: "12/21/2019" output: html_document --- ## Data preparation First, read in the BEAST 2 time trees: ```{r} library(ape) dir <- "/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BEAST/" folder_names <- list.files(dir)[grepl("_", list.files(dir)) & !grepl("\\.", list.files(dir))] all_trees <- list() for(i in 1:length(folder_names)) { name_to_assign <- tolower(folder_names[i]) tr <- list.files(paste(dir, folder_names[i], sep = ""), pattern = "_MCC.tre", full.names = T) tree <- read.nexus(tr) assign(name_to_assign, tree) all_trees[[i]] <- tree names(all_trees)[[i]] <- name_to_assign } ``` Are all the trees binary, and do they all lack zero-length edges? ```{r} any(sapply(all_trees, function(x) is.binary(x)) == F) any(sapply(all_trees, function(x) min(x$edge.length) > 0) == F) ``` Now we have to identify the number of stratigraphically unique occurrences corresponding to the taxon sample of each tree: ```{r} orn_occ_new <- read.csv("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/Occurrences/ornithischia_occurrences_FINAL.csv", stringsAsFactors = F) totTaxa <- length(unique(orn_occ_new$accepted_name)) # Identify those taxa that are in the trees but are not in the occurrence table. We can split # this step into 3 cases, since all of our trees are based on just 3 datasets. find.mismatch <- function(dataset) { tiplabs <- all_trees[[grep(dataset, names(all_trees))[1]]]$tip.label # Assumption: if more than half of the names contain underscores, then that means the names # are in the 'genus_species' format, and we only need to delete the underscore (plus possibly # any trailing underscores) to match them against orn_occ_new$accepted_name. If only a few # or no names contain underscores, they are probably genus names only, and have to be matched # against the genus part of orn_occ_new$accepted_name only. if (length(grep("_", tiplabs)) > 0.5*length(tiplabs)) { tiplabs <- gsub("_$", "", tiplabs) tiplabs <- sapply(strsplit(tiplabs, "_"), function(x) paste(x, collapse = " ")) compvect <- orn_occ_new$accepted_name match <- tiplabs[which(tiplabs %in% compvect)] } else { compvect <- sapply(strsplit(orn_occ_new$accepted_name, " "), function(x) x[1]) match <- unique(orn_occ_new$accepted_name[which(compvect %in% tiplabs)]) } mismatch <- tiplabs[which(!tiplabs %in% compvect)] return(list(match, mismatch)) } han_mismatch <- find.mismatch("han")[[2]] ``` Original string | Notes ------------------------|------------------------------------------------------------- Macrogrypho gondwanicus | Contraction of *Macrogryphosaurus gondwanicus*; replaceable. Mahuidacursor | No occurrences in PBDB. Minmi paravertebra | Contraction of *Minmi paravertebrata*; replaceable. Sektensaurus | No occurrences in PBDB. Talenkauen santa | Contraction of *Talenkauen santacrucensis*; replaceable. ```{r} herne_mismatch <- find.mismatch("herne")[[2]] ``` Original string | Notes --------------------|------------------------------------------------------------- Emausaurus ernstii | Misspelling of *Emausaurus ernsti*; replaceable. Fostoria dhimbang | Contraction of *Fostoria dhimbangunmal*; replaceable. Rhabdodon sp1 | Unnamed taxon: not findable in PBDB. Stegoceras validus | Misspelling of *Stegoceras validum*; replaceable. Vegagete ornithopod | Unnamed taxon: not findable in PBDB. ```{r} madzia_mismatch <- find.mismatch("madzia")[[2]] ``` Original string | Notes -----------------------|------------------------------------------------------------------------------------ M._suessi | Contraction of *Mochlodon suessi*; replaceable. M._vorosi | Contraction of *Mochlodon vorosi*; replaceable. Atlascopcosaurus | *Nomen dubium* according to PBDB, and so not included in the data downloaded. Eousdryosaurus | No occurrences in PBDB. Fulgurotherium | *Nomen dubium* according to PBDB, and so not included in the data downloaded. Kaiparowits_orodromine | Unnamed taxon: not findable in PBDB. Lioceratops | Misspelling of *Liaoceratops*; replaceable. NMV_P186047 | Unnamed taxon: not findable in PBDB. NewParksosaurus | Code for *Parksosaurus warreni*; replaceable. Rhabdodonp | Contraction of *Rhabdodon priscus*; replaceable. Tassiniboiensis | Contraction of *Thescelosaurus assiniboiensis*; replaceable. Tenontosaurusd | Contraction of *Tenontosaurus dossi*; replaceable. Tgarbanii | Contraction of *Thescelosaurus garbanii*; replaceable. Tneglectus | Contraction of *Thescelosaurus neglectus*; replaceable. Zalmoxesr | Contraction of *Zalmoxes robustus*; replaceable. Zalmoxess | Contraction of *Zalmoxes shqiperorum*; replaceable. We will determine the total number of stratigraphically unique occurrences as follows: if a species is in the PBDB-downloaded spreadsheet, get the number of rows that have that species in the "accepted name" column; else, arbitrarily assume each species is associated with exactly 1 occurrence. (`Occurrence_stats` show that this is a justifiable assumption, since the median of the per-species occurrence number distribution is 1.) ```{r} if(!require("mgsub")) {devtools::install_github("bmewing/mgsub")} library(mgsub) han_to_rplc <- c("Macrogrypho gondwanicus", "Minmi paravertebra", "Talenkauen santa") han_rplcmnt <- c("Macrogryphosaurus gondwanicus", "Minmi paravertebrata", "Talenkauen santacrucensis") herne_to_rplc <- c("Emausaurus ernstii", "Fostoria dhimbang", "Stegoceras validus") herne_rplcmnt <- c("Emausaurus ernsti", "Fostoria dhimbangunmal", "Stegoceras validum") madzia_to_rplc <- c("M._suessi", "M._vorosi", "Lioceratops", "NewParksosaurus", "Rhabdodonp", "Tassiniboiensis", "Tenontosaurusd", "Tgarbanii", "Tneglectus", "Zalmoxesr", "Zalmoxess") madzia_rplcmnt <- c("Mochlodon suessi", "Mochlodon vorosi", "Liaoceratops yanzigouensis", "Parksosaurus warreni", "Rhabdodon priscus", "Thescelosaurus assiniboiensis", "Tenontosaurus dossi", "Thescelosaurus garbanii", "Thescelosaurus neglectus", "Zalmoxes robustus", "Zalmoxes shqiperorum") occurrence.counter <- function(dataset) { vect1 <- mgsub(find.mismatch(dataset)[[2]], eval(parse(text = paste(dataset, "_to_rplc", sep = ""))), eval(parse(text = paste(dataset, "_rplcmnt", sep = "")))) lookup <- c(find.mismatch(dataset)[[1]], vect1) abbr_frame <- orn_occ_new[which(orn_occ_new$accepted_name %in% lookup),] occnum <- nrow(abbr_frame) + length(which(!vect1 %in% orn_occ_new$accepted_name)) return(occnum) } han_occ_num <- occurrence.counter("han"); han_occ_num herne_occ_num <- occurrence.counter("herne"); herne_occ_num madzia_occ_num <- occurrence.counter("madzia"); madzia_occ_num ``` ## Analysis set-up Compute priors for the control files: ```{r} library(stringr) library(BAMMtools) dir <- "/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/" ``` ```{r, eval = FALSE} for(i in 1:length(all_trees)) { name <- mgsub(names(all_trees)[i], c("unconstrained", "partitioned", "unpartitioned"), c("unconstr", "part", "unpart")) name2 <- str_to_title(name) setBAMMpriors(all_trees[[i]], total.taxa = totTaxa, traits = NULL, outfile = paste(dir, name2, "_const/myPriors.txt", sep = "")) setBAMMpriors(all_trees[[i]], total.taxa = totTaxa, traits = NULL, outfile = paste(dir, name2, "_var/myPriors.txt", sep = "")) } ``` Reprint the time trees into the appropriate BAMM directories, but without the BEAST annotations and in the Newick rather than Nexus format: ```{r, eval = FALSE} for(i in 1:length(all_trees)) { name1 <- str_to_title(names(all_trees)[i]) name2 <- mgsub(name1, c("unconstrained", "partitioned", "unpartitioned"), c("unconstr", "part", "unpart")) write.tree(all_trees[[i]], paste(dir, name2, "_const/", name1, "_rho_MCC.tre", sep = "")) write.tree(all_trees[[i]], paste(dir, name2, "_var/", name1, "_rho_MCC.tre", sep = "")) } ``` We will write all the configuration files in a single loop. This will require rather complicated code; see the interspersed comments for explanation. Note that we only wrap the loop inside a function to be able to vary `expectedNumberOfShifts`; no attempt was made to make the function pure. ```{r} library(phytools) bamm.control.file.printer <- function(cnfg, expNumShift, suffix = NULL) { folder_names <- list.files(dir)[grepl("_", list.files(dir)) & !grepl("\\.", list.files(dir))] for(i in 1:length(folder_names)) { if (i %% 2 == 0) { index <- i/2 } else { index <- (i + 1)/2 } # Get the number of stratigraphically unique occurrences: prefix <- strsplit(names(all_trees)[index], "_")[[1]][1] totOcc <- eval(parse(text = paste(prefix, "_occ_num", sep = ""))) name1 <- str_to_title(names(all_trees)[index]) # Read in the tree: tree <- read.nexus(paste("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BEAST/", name1, "/", name1, "_rho_MCC.tre", sep = "")) # Read in the priors: priors <- readLines(paste("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/", folder_names[i], "/myPriors.txt", sep = "")) # Read in the params table for zero-rho trees: if (!grepl("nonzero", folder_names[i])) { params <- read.table(paste("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BEAST/", name1, "/", name1, "_rho_params.txt", sep = ""), header = T, stringsAsFactors = F) offset <- params$mean[params$statistic == "offset"] } else { offset <- NULL } scr <- vector() scr <- c(scr, cnfg[1:17]) scr <- c(scr, paste("treefile = ", name1, "_rho_MCC.tre", sep = "")) scr <- c(scr, cnfg[19:20]) if (is.null(suffix)) { scr <- c(scr, paste("runInfoFilename = ", folder_names[i], ".txt", sep = "")) } else { scr <- c(scr, paste("runInfoFilename = ", folder_names[i], "_", suffix, ".txt", sep = "")) } scr <- c(scr, cnfg[22:37]) scr <- c(scr, paste("globalSamplingFraction = ", round(Ntip(tree)/totTaxa, 3), sep = "")) scr <- c(scr, paste("#", Ntip(tree), "sampled species over", totTaxa, "total")) scr <- c(scr, cnfg[40:50]) scr <- c(scr, paste("expectedNumberOfShifts =", expNumShift)) scr <- c(scr, priors[6:12]) # The line below will read '1' (allowing for time-varying rates) for configuration files in # folders with the 'var' suffix and '0' (disallowing for time-varying rates) for those in # folders with the 'const' suffix: scr <- c(scr, paste("lambdaIsTimeVariablePrior =", as.numeric(grepl("var", folder_names[i])))) scr <- c(scr, cnfg[70:79]) if (is.null(suffix)) { scr <- c(scr, paste("mcmcOutfile = ", folder_names[i], "_out.txt", sep = "")) } else { scr <- c(scr, paste("mcmcOutfile = ", folder_names[i], "_", suffix, "_out.txt", sep = "")) } scr <- c(scr, cnfg[81:86]) if (is.null(suffix)) { scr <- c(scr, paste("eventDataOutfile = ", folder_names[i], "_event_data.txt", sep = "")) } else { scr <- c(scr, paste("eventDataOutfile = ", folder_names[i], "_", suffix, "_event_data.txt", sep = "")) } scr <- c(scr, cnfg[88:168]) scr <- c(scr, cnfg[171:173]) # Initial rate change parameter for speciation at the root: the root event is assumed to be # time-constant in configuration files in folders WITHOUT the 'var' suffix and time-varying # otherwise: if (grepl("var", folder_names[i])) { scr <- c(scr, paste("lambdaShift0 =", 0.01)) } else { scr <- c(scr, paste("lambdaShift0 =", 0)) } scr <- c(scr, cnfg[175:197]) if (is.null(suffix)) { scr <- c(scr, paste("chainSwapFileName = ", folder_names[i], "_chain_swap.txt", sep = "")) } else { scr <- c(scr, paste("chainSwapFileName = ", folder_names[i], "_", suffix, "_chain_swap.txt", sep = "")) } scr <- c(scr, cnfg[199:215]) # 'Observation time' is defined as the height of the tree minus the time by which we are # confident all members of the clade have gone extinct. This is equal simply to uncorrected # tree height for nonzero-rho trees, but an offset needs to be accounted for in the case of # zero-rho trees: if (is.null(offset)) { root_height <- max(nodeHeights(tree)) } else { root_height <- max(nodeHeights(tree)) + (offset - 66.0) } scr <- c(scr, paste("observationTime =", round(root_height, 4))) scr <- c(scr, cnfg[217:239]) scr <- c(scr, paste("numberOccurrences =", totOcc)) scr <- c(scr, cnfg[241:253]) if (is.null(suffix)) { write(scr, paste(dir, folder_names[i], "/", folder_names[i], "_control.txt", sep = "")) } else { write(scr, paste(dir, folder_names[i], "/", folder_names[i], "_", suffix, "_control.txt", sep = "")) } } } ``` ```{r, eval = FALSE} template <- readLines("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/template_control.txt") bamm.control.file.printer(template, 1.0) bamm.control.file.printer(template, 0.1, "lowShiftProb") bamm.control.file.printer(template, 10.0, "highShiftProb") ``` Write a script to run all 32 analyses that share the same expected number of shifts at a time: ```{r} bamm.scripter <- function(suffix = NULL) { run_script <- rep(NA, 2*length(folder_names) + 1) run_script[1] <- "#!/bin/bash" for(i in 1:length(ctl_files)) { run_script[2*i] <- paste("cd ", dir, folder_names[i], sep = "") if (is.null(suffix)) { run_script[2*i + 1] <- paste("/Users/David/Grive/Alfaro_Lab/BAMM-fossil/build/bamm -c ", folder_names[i], "_control.txt", sep = "") } else { run_script[2*i + 1] <- paste("/Users/David/Grive/Alfaro_Lab/BAMM-fossil/build/bamm -c ", folder_names[i], "_", suffix, "_control.txt", sep = "") } } if (is.null(suffix)) { write(run_script, paste(dir, "run_bamm.sh", sep = "")) } else { write(run_script, paste(dir, "run_bamm_", suffix, ".sh", sep = "")) } } ``` ```{r, eval = FALSE} bamm.scripter("lowShiftProb") bamm.scripter("highShiftProb") ``` ## Convergence diagnostics Check the minimum effective sample size (ESS) for each run, allowing for a 10% burnin proportion: ```{r} library(mgsub) library(coda) library(kableExtra) name_formatter <- function(dir_name) { temp1 <- strsplit(dir_name, "/")[[1]][2] temp2 <- strsplit(temp1, "_")[[1]] temp3 <- paste(temp2[temp2 != "out.txt"], collapse = ", ") temp4 <- mgsub(temp3, c("const", "var", "unconstr", "part", "unpart", ", highShiftProb", ", lowShiftProb"), c("constant", "variable", "unconstr.", "part.", "unpart.", "", "")) name <- ifelse(grepl("highShiftProb", temp3), paste(temp4, ", 10 exp. shifts", sep = ""), ifelse(grepl("lowShiftProb", temp3), paste(temp4, ", 0.1 exp. shifts", sep = ""), paste(temp4, ", 1 exp. shift", sep = ""))) return(name) } get_bamm_ess <- function(dir_name) { name <- name_formatter(dir_name) output <- read.csv(paste("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/", dir_name, sep = "")) no_burnin <- output[-c(1:(0.1*nrow(output))),] ess <- min(effectiveSize(no_burnin$N_shifts), effectiveSize(no_burnin$logLik), effectiveSize(no_burnin$eventRate), effectiveSize(no_burnin$preservationRate)) return(data.frame(Run = name, `min ESS` = ess, stringsAsFactors = F)) } bamm_dir <- list.files(dir, pattern = "_out.txt", recursive = T) bamm_conv <- do.call(rbind, Map(get_bamm_ess, bamm_dir)); rownames(bamm_conv) <- NULL bamm_conv %>% kable() %>% kable_styling() ``` We see that all values are substantially greater than 200, which we take as indicating that the chains have reached convergence. ```{r, results = 'hide'} get_mcmcOut <- function(dir_name) { dir <- "/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/" outframe <- read.csv(paste(dir, dir_name, sep = "")) return(outframe) } get_eventData <- function(dir_name) { dir <- "/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/" subdir <- strsplit(dir_name, "/")[[1]][1] treefile <- list.files(paste(dir, subdir, sep = ""), pattern = ".tre", full.names = T) tree <- read.tree(treefile) edfile <- paste(dir, strsplit(dir_name, "_out.txt")[[1]][1], "_event_data.txt", sep = "") ed <- getEventData(tree, eventdata = edfile, burnin = 0.1) return(ed) } get_credShiftSet <- function(eventdata, name) { exp_n_shifts <- ifelse(grepl("high", name), 10, ifelse(grepl("low", name), 0.1, 1.0)) css <- credibleShiftSet(eventdata, expectedNumberOfShifts = exp_n_shifts, threshold = 5, set.limit = 0.95) } outfiles <- Map(get_mcmcOut, bamm_dir) eventdata <- Map(get_eventData, bamm_dir) css <- Map(get_credShiftSet, eventdata, names(eventdata)) ``` In the next step, we produces composite plots comparing the prior and posterior distribution of rate shifts for each analysis: ```{r} PvsP_plot_generator <- function(out, name, tag = NULL) { n <- ifelse(is.null(tag), 1.0, ifelse(grepl("high", tag), 10, 0.1)) formatted_names <- Map(name_formatter, name) png(paste("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/", tag, "posterior_vs_prior.png", sep = ""), width = 4200, height = 4800, pointsize = 48) layout(matrix(1:32, nrow = 8, ncol = 4, byrow = TRUE)) Map(function(x, y) plotPrior(x, expectedNumberOfShifts = n, main = y), out, formatted_names) dev.off() } low_out <- outfiles[grepl("low", names(outfiles)) == T] high_out <- outfiles[grepl("high", names(outfiles)) == T] mid_out <- outfiles[grepl("low", names(outfiles)) == F & grepl("high", names(outfiles)) == F] ``` ```{r, eval = FALSE} PvsP_plot_generator(low_out, names(low_out), "lowShiftProb_") PvsP_plot_generator(high_out, names(high_out), "highShiftProb_") PvsP_plot_generator(mid_out, names(mid_out)) ``` ![Posterior vs. prior comparison, expected number of shifts = 0.1](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/lowShiftProb_posterior_vs_prior.png) ![Posterior vs. prior comparison, expected number of shifts = 1.0](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/posterior_vs_prior.png) ![Posterior vs. prior comparison, expected number of shifts = 10](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/highShiftProb_posterior_vs_prior.png) Now we want to compute the Bayes factors rather than posterior probabilities of individual models, since unlike the latter, the former do not depend on the prior placed on the number of shifts: ```{r} BF_plot_generator <- function(out, name, tag = NULL) { n <- ifelse(is.null(tag), 1.0, ifelse(grepl("high", tag), 10, 0.1)) formatted_names <- Map(name_formatter, name) bf <- Map(function(x) computeBayesFactors(x, expectedNumberOfShifts = n, burnin = 0.1), out) bf_mat <- Map(function(x) cbind(shifts = as.numeric(rownames(x)), BF = as.numeric(x[,1])), bf) png(paste("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/", tag, "bayes_factors.png", sep = ""), width = 4200, height = 4800, pointsize = 48) layout(matrix(1:32, nrow = 8, ncol = 4, byrow = TRUE)) Map(function(x, y) {barplot(x[,2], names.arg = x[,1], col = 'cornflowerblue', xlab = 'Shifts', main = y, ylab = 'Bayes factor w.r.t. 0 shifts'); abline(h = 1/20)}, bf_mat, formatted_names) dev.off() } ``` ```{r, eval = FALSE} BF_plot_generator(low_out, names(low_out), "lowShiftProb_") BF_plot_generator(high_out, names(high_out), "highShiftProb_") BF_plot_generator(mid_out, names(mid_out)) ``` ![Bayes factor model comparison, expected number of shifts = 0.1](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/lowShiftProb_bayes_factors.png) ![Bayes factor model comparison, expected number of shifts = 1.0](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/bayes_factors.png) ![Bayes factor model comparison, expected number of shifts = 10](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/highShiftProb_bayes_factors.png) Get the means and 95% credible intervals of mean tip rates and of root rates: ```{r} # Credit: http://stackoverflow.com/a/4692702 root_and_tip_rates <- function(ed, name, expr_txt) { rmat <- getRateThroughTimeMatrix(ed) temp <- eval(parse(text = expr_txt), rmat) means_thru_time <- colMeans(temp) CI_thru_time <- apply(temp, 2, quantile, c(0.025, 0.975)) output <- data.frame(Analysis = name_formatter(name), Root_mean = round(means_thru_time[1], 4), Root_min_95CI = round(CI_thru_time[1, 1], 4), Root_max_95CI = round(CI_thru_time[2, 1], 4), Tips_mean = round(means_thru_time[100], 4), Tips_min_95CI = round(CI_thru_time[1, 100], 4), Tips_max_95CI = round(CI_thru_time[2, 100], 4), stringsAsFactors = F) return(output) } ``` Speciation rates (those where there is no overlap between the 95% credible intervals for the root rates and the tip rates are highlighted): ```{r} sp_rates <- do.call(rbind, Map(function(x, y) root_and_tip_rates(x, y, "lambda"), eventdata, names(eventdata))) rownames(sp_rates) <- NULL sp_rates %>% kable() %>% kable_styling() %>% row_spec(which(sp_rates$Tips_max_95CI < sp_rates$Root_min_95CI), background = "coral") ``` Extinction rates: ```{r} ex_rates <- do.call(rbind, Map(function(x, y) root_and_tip_rates(x, y, "mu"), eventdata, names(eventdata))) rownames(ex_rates) <- NULL ex_rates %>% kable() %>% kable_styling() ``` Net diversification rates (those where there is no overlap between the 95% credible intervals for the root rates and the tip rates are highlighted): ```{r} nd_rates <- do.call(rbind, Map(function(x, y) root_and_tip_rates(x, y, "lambda - mu"), eventdata, names(eventdata))) rownames(nd_rates) <- NULL nd_rates %>% kable() %>% kable_styling() %>% row_spec(which(nd_rates$Tips_max_95CI < nd_rates$Root_min_95CI), background = "dodgerblue") ``` Plotting speciation, extinction, and net diversification rates through time from different analyses: ```{r} low_ed <- eventdata[grepl("low", names(eventdata)) == T] high_ed <- eventdata[grepl("high", names(eventdata)) == T] mid_ed <- eventdata[grepl("low", names(eventdata)) == F & grepl("high", names(eventdata)) == F] bgn <- c(100.5, 163.5, 201.3, 1e6) end <- c(66, 145, 174.1, 237) mycol1 <- rep(c("firebrick3", "coral1"), 16) mycol2 <- rep(c("limegreen", "green1"), 16) mycol3 <- rep(c("dodgerblue3", "dodgerblue"), 16) # Credit: http://stackoverflow.com/a/4692702 gen_cm <- function(rate_object, expr_txt) { temp <- eval(parse(text = expr_txt), rate_object) return(colMeans(temp)) } RTT_plotter <- function(rates, expr, index, ylabel, xupper, ylower, yupper, maxage, minage, cv) { par(mgp = c(5, 1, 0)) par(mar = c(7, 8, 2, 2) + 0.1) # There are twice as many BAMM analyses as there are trees (1 constant & 1 variable analysis # per each tree), hence we are multiplying the index by 2: plot(rates[[2*index]]$times + 66.0, rates[[2*index]]$times + 66.0, type = 'n', ylim = c(ylower, yupper), xlim = rev(c(66.0, xupper)), ylab = ylabel, xlab = 'Million years ago', main = '', cex.axis = 2.5, cex.lab = 2.5) rect(minage, -1e6, maxage, 1e6, col = 'gray93', border = NA) for(i in 1:length(rates)) { lines(rates[[i]]$times + 66.0, rev(gen_cm(rates[[i]], expr)), col = cv[i], lwd = 6) } legend("topright", legend = c("Time-constant BAMM", "Time-varying BAMM"), fill = c(cv[1:2]), bty = "n", cex = 2.25, y.intersp = 1) } RTT_plot_generator <- function(ed, trees, tag = NULL) { bamm_rates <- Map(function(x) getRateThroughTimeMatrix(x), ed) spyupper <- max(unlist(Map(function(x) max(colMeans(x$lambda)), bamm_rates))) exyupper <- max(unlist(Map(function(x) max(colMeans(x$mu)), bamm_rates))) ndylower <- min(unlist(Map(function(x) min(colMeans(x$lambda - x$mu)), bamm_rates))) ndyupper <- max(unlist(Map(function(x) max(colMeans(x$lambda - x$mu)), bamm_rates))) root_heights <- sapply(trees, function(x) max(nodeHeights(x))) max_age <- max(root_heights) ind <- which(root_heights == max_age) xup <- max_age + 66.0 png(paste("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/", tag, "rates_through_time.png", sep = ""), width = 4200, height = 4800, pointsize = 48) layout(matrix(1:3, nrow = 3, ncol = 1)) RTT_plotter(bamm_rates, "lambda", ind, "Speciation rate", xup, 0, spyupper, bgn, end, mycol1) RTT_plotter(bamm_rates, "mu", ind, "Extinction rate", xup, 0, exyupper, bgn, end, mycol2) RTT_plotter(bamm_rates, "lambda - mu", ind, "Net diversification rate", xup, ndylower, ndyupper, bgn, end, mycol3) dev.off() } ``` ```{r, eval = FALSE} RTT_plot_generator(low_ed, all_trees, "lowShiftProb_") RTT_plot_generator(high_ed, all_trees, "highShiftProb_") RTT_plot_generator(mid_ed, all_trees) ``` ![Rates through time, expected number of shifts = 0.1](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/lowShiftProb_rates_through_time.png) ![Rates through time, expected number of shifts = 1.0](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/rates_through_time.png) ![Rates through time, expected number of shifts = 10](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/highShiftProb_rates_through_time.png) Are there any 95% credible sets of rate shift configurations (= sets of rate shift configurations that account for 95% of the probability of the data) that contain more than 1 distinct shift configuration? ```{r} any(unlist(Map(function(x) x$number.distinct, css)) != 1) ``` We summarize the results in the table below. Note that the "probability" of the only distinct configuration falling within the 95% credible set that is reported here is not equivalent to true posterior probability of the zero shift configuration. This is because the `credibleShiftSet()` function only views as distinct those configurations that differ in the number and location of *core* shifts, where core shifts are those with a marginal odds ratio greater than 5 (i.e., those that are sampled more often than expected under the prior alone). Some of the nonzero-shift configurations that contribute to the posterior probability of a given number of shifts (as shown in the prior-posterior comparisons above) only contain non-core shifts, and thus do not contribute to the probability of the distinct configurations reported below (see http://groups.google.com/d/msg/bamm-project/s5FXtlp8i1g/6NZuf-VcAgAJ). ```{r} # The table that css[[i]] prints to the console is very nice, but it is hard to reassemble it # from the individual list objects. We will therefore recreate it from the captured output: cred_set_summary <- function(css_object, name) { a <- capture.output(css_object) # A bit opaque; what it does is extract some of the info printed to the console as a table: prcs <- as.numeric(unlist(Map(function(x) x[c(3,5)], strsplit(a[10:(length(a) - 1)], "\\s+")))) output <- data.frame(Analysis = name_formatter(name), Probability = prcs[seq(1, length(prcs), by = 2)], `Core shifts` = prcs[seq(2, length(prcs), by = 2)], stringsAsFactors = F) return(output) } get_dupl_rows <- function(df, col) { colvals <- df[colnames(df) == col] return(which(duplicated(colvals))) } shiftSets <- do.call(rbind, Map(cred_set_summary, css, names(css))); rownames(shiftSets) <- NULL shiftSets %>% kable() %>% kable_styling() %>% row_spec(get_dupl_rows(shiftSets, "Analysis"), background = "palegreen") ``` Finally, we will summarize marginal rate shift probabilities by scaling the length of each branch of our tree according to the probability that a rate shift occurred somewhere along it: ```{r} margProb_plotter <- function(msp_tree, name) { par(mar = c(1, 1, 1, 1) + 0.1) plot.phylo(msp_tree, edge.width = 2, cex = 0.5) title(name_formatter(name), outer = F) add.scale.bar(par("usr")[1] + 0.65*(par("usr")[2] - par("usr")[1]), par("usr")[3] + 0.9*(par("usr")[4] - par("usr")[3]), lwd = 2) } margProb_plot_generator <- function(mspt_list, tag = NULL) { png(paste("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/", tag, "marg_prob_trees.png", sep = ""), width = 4800, height = 8000, pointsize = 48) layout(matrix(1:32, nrow = 8, ncol = 4, byrow = TRUE)) Map(margProb_plotter, mspt_list, names(mspt_list)) dev.off() } mspt <- Map(marginalShiftProbsTree, eventdata) low_mspt <- mspt[grepl("low", names(mspt)) == T] high_mspt <- mspt[grepl("high", names(mspt)) == T] mid_mspt <- mspt[grepl("low", names(mspt)) == F & grepl("high", names(mspt)) == F] ``` ```{r, eval = FALSE} margProb_plot_generator(low_mspt, tag = "lowShiftProb_") margProb_plot_generator(high_mspt, tag = "highShiftProb_") margProb_plot_generator(mid_mspt) ``` ![Branch-wise marginal rate shift probabilities, expected number of shifts = 0.1](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/lowShiftProb_marg_prob_trees.png) ![Branch-wise marginal rate shift probabilities, expected number of shifts = 1.0](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/marg_prob_trees.png) ![Branch-wise marginal rate shift probabilities, expected number of shifts = 10](/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/highShiftProb_marg_prob_trees.png) # For the manuscript We want to compute the mean, minimum, and maximum speciation, extinction, and net diversification rates separately for the time-constant and time-varying analyses. Furthermore, for the time-constant analyses, we want to average the rates at the root and at the youngest tips, while for the time-varying analyses, we want to compute the widths of the 95% credibility intervals around the root and youngest-tip rates: ```{r} sp_rates_const <- sp_rates[grepl("constant", sp_rates$Analysis), ] ex_rates_const <- ex_rates[grepl("constant", ex_rates$Analysis), ] nd_rates_const <- nd_rates[grepl("constant", nd_rates$Analysis), ] const_analyses <- list(sp_rates_const, ex_rates_const, nd_rates_const) sp_rates_var <- sp_rates[grepl("variable", sp_rates$Analysis), ] ex_rates_var <- ex_rates[grepl("variable", ex_rates$Analysis), ] nd_rates_var <- nd_rates[grepl("variable", nd_rates$Analysis), ] var_analyses <- list(sp_rates_var, ex_rates_var, nd_rates_var) const_summ <- function(const_frame) { tip_root_rates <- colMeans(rbind(const_frame$Root_mean, const_frame$Tips_mean)) output <- data.frame(Mean = mean(tip_root_rates), Min = min(tip_root_rates), Max = max(tip_root_rates), stringsAsFactors = F) return(output) } var_summ <- function(var_frame) { rootCIwidth <- var_frame$Root_max_95CI - var_frame$Root_min_95CI tipsCIwidth <- var_frame$Tips_max_95CI - var_frame$Tips_min_95CI output <- data.frame(Root_mean = mean(var_frame$Root_mean), Root_min = min(var_frame$Root_mean), Root_max = max(var_frame$Root_mean), Tips_mean = mean(var_frame$Tips_mean), Tips_min = min(var_frame$Tips_mean), Tips_max = max(var_frame$Tips_mean), Root_CI_mean_width = mean(rootCIwidth), Root_CI_min_width = min(rootCIwidth), Root_CI_max_width = max(rootCIwidth), Tips_CI_mean_width = mean(tipsCIwidth), Tips_CI_min_width = min(tipsCIwidth), Tips_CI_max_width = max(tipsCIwidth), stringsAsFactors = F) return(output) } const_summ_list <- Map(const_summ, const_analyses) const_summ_frame <- do.call(rbind, const_summ_list); rownames(const_summ_frame) <- NULL var_summ_list <- Map(var_summ, var_analyses) var_summ_frame <- do.call(rbind, var_summ_list); rownames(var_summ_frame) <- NULL var_summ_frame <- t(var_summ_frame) # for clearer arrangement const_summ_frame %>% kable() %>% kable_styling() var_summ_frame %>% kable() %>% kable_styling() ``` ## Figure 3 ```{r, eval = FALSE} low_rates <- Map(function(x) getRateThroughTimeMatrix(x), low_ed) mid_rates <- Map(function(x) getRateThroughTimeMatrix(x), mid_ed) high_rates <- Map(function(x) getRateThroughTimeMatrix(x), high_ed) maxheight <- max(sapply(all_trees, function(x) max(nodeHeights(x)))) + 66 # plotting boundary tscales.period <- function(top, bot, s.bot) { bases <- read.csv("~/Grive/Ornithischia_macroevolution/UPDATED/PyRate/ICS_colors.csv", header = T, stringsAsFactors = F) ages <- bases[bases$Type == "Age" & bases$Part_of %in% c("Triassic", "Jurassic", "Cretaceous"), ] ages <- head(ages, -3) # Drop everything after the Ladinian epochs <- bases[bases$Type == "Epoch" & bases$Part_of %in% c("Triassic", "Jurassic", "Cretaceous"), ] epochs <- head(epochs, -1) epochs[nrow(epochs), 1] <- 242.0 epochs[nrow(epochs), 4] <- 239.5 ac <- rgb(ages$Col_R, ages$Col_G, ages$Col_B, maxColorValue = 255) ec <- rgb(epochs$Col_R, epochs$Col_G, epochs$Col_B, maxColorValue = 255) age_labels <- ages$Abbrev age_labels[c(3:4, 10, 14, 16:19, 23)] <- "" epoch_labels <- paste(epochs$Name, epochs$Part_of) epoch_labels[4] <- "MJ" epoch_labels[7] <- "MT" age_bott_mar <- (bot + s.bot)/2 rect(xleft = ages[, 1], ybottom = rep(age_bott_mar, nrow(ages)), xright = ages[, 2], ytop = rep(bot, nrow(ages)), col = ac, border = NA) rect(xleft = ages[, 1], ybottom = rep(age_bott_mar, nrow(ages)), xright = ages[, 2], ytop = rep(bot, nrow(ages)), border = "black") rect(xleft = ages[, 1][seq(1, nrow(ages), by = 2)], ybottom = rep(bot, nrow(ages)), xright = ages[, 2][seq(1, nrow(ages), by = 2)], ytop = rep(top, nrow(ages)), col = alpha("gray75", 0.1), border = NA) rect(xleft = epochs[, 1], ybottom = rep(s.bot, nrow(epochs)), xright = epochs[, 2], ytop = rep(age_bott_mar, nrow(epochs)), col = ec, border = NA) rect(xleft = epochs[, 1], ybottom = rep(s.bot, nrow(epochs)), xright = epochs[, 2], ytop = rep(age_bott_mar, nrow(epochs)), border = "black") text(x = ages$Midpoint, y = (bot + age_bott_mar)/2, labels = age_labels, cex = 0.75) text(x = epochs$Midpoint, y = (s.bot + age_bott_mar)/2, labels = epoch_labels, cex = 0.75) } figure3_plotter <- function() { sp_cols <- rep(c("red3", "coral"), length(low_rates)/2) ex_cols <- rep(c("blue3", "deepskyblue"), length(low_rates)/2) nd_cols <- rep(c("green4", "limegreen"), length(low_rates)/2) layout(matrix(1:4, 4, 1), heights = c(0.31, 0.31, 0.31, 0.07)) par(mai = c(0.5, 1, 0.5, 0)) plot(low_rates[[1]]$times + 66, low_rates[[1]]$times + 66, type = 'n', ylim = c(-0.02, 0.1), xlim = rev(c(66, maxheight)), ylab = expression(Speciation ~ rate ~ (sp %.% Myr^-1)), xlab = 'Ma', axes = F, cex.lab = 1.4) tscales.period(0.1, 0, -0.02) axis(1, at = seq(70, 240, by = 20), cex.axis = 1.4) axis(2, at = seq(0, 0.1, by = 0.02), cex.axis = 1.4) for(i in 1:length(low_rates)) { lines(low_rates[[i]]$times + 66.0, rev(colMeans(low_rates[[i]]$lambda)), col = sp_cols[i], lwd = 1, lty = 'dotted') } for(i in 1:length(mid_rates)) { lines(mid_rates[[i]]$times + 66.0, rev(colMeans(mid_rates[[i]]$lambda)), col = sp_cols[i], lwd = 1, lty = 'dashed') } for(i in 1:length(high_rates)) { lines(high_rates[[i]]$times + 66.0, rev(colMeans(high_rates[[i]]$lambda)), col = sp_cols[i], lwd = 1, lty = 'solid') } legend(x = 140, y = 0.1, legend = c(expression(paste("Time-constant within-regime ", lambda)), expression(paste("Time-varying within-regime ", lambda))), fill = c(sp_cols[1:2]), bty = "n", cex = 1.4, y.intersp = 1) mtext(paste0("a)"), side = 3, adj = 0.05, line = 1.5, cex = 2) par(mai = c(0.5, 1, 0.5, 0)) plot(low_rates[[1]]$times + 66, low_rates[[1]]$times + 66, type = 'n', ylim = c(-0.013, 0.065), xlim = rev(c(66, maxheight)), ylab = expression(Extinction ~ rate ~ (sp %.% Myr^-1)), xlab = 'Ma', axes = F, cex.lab = 1.4) tscales.period(0.065, 0, -0.013) axis(1, at = seq(70, 240, by = 20), cex.axis = 1.4) axis(2, at = seq(0, 0.06, by = 0.01), cex.axis = 1.4) for(i in 1:length(low_rates)) { lines(low_rates[[i]]$times + 66.0, rev(colMeans(low_rates[[i]]$mu)), col = ex_cols[i], lwd = 1, lty = 'dotted') } for(i in 1:length(mid_rates)) { lines(mid_rates[[i]]$times + 66.0, rev(colMeans(mid_rates[[i]]$mu)), col = ex_cols[i], lwd = 1, lty = 'dashed') } for(i in 1:length(high_rates)) { lines(high_rates[[i]]$times + 66.0, rev(colMeans(high_rates[[i]]$mu)), col = ex_cols[i], lwd = 1, lty = 'solid') } legend(x = 140, y = 0.02, legend = c(expression(paste("Time-constant within-regime ", lambda)), expression(paste("Time-varying within-regime ", lambda))), fill = c(ex_cols[1:2]), bty = "n", cex = 1.4, y.intersp = 1) mtext(paste0("b)"), side = 3, adj = 0.05, line = 1.5, cex = 2) par(mai = c(0.5, 1, 0.5, 0)) plot(low_rates[[1]]$times + 66, low_rates[[1]]$times + 66, type = 'n', ylim = c(-0.04, 0.08), xlim = rev(c(66, maxheight)), xlab = 'Ma', axes = F, cex.lab = 1.4, ylab = expression(Net ~ diversification ~ rate ~ (sp %.% Myr^-1))) tscales.period(0.08, -0.02, -0.04) axis(1, at = seq(70, 240, by = 20), cex.axis = 1.4) axis(2, at = seq(-0.02, 0.08, by = 0.02), cex.axis = 1.4) for(i in 1:length(low_rates)) { lines(low_rates[[i]]$times + 66.0, rev(colMeans(low_rates[[i]]$lambda - low_rates[[i]]$mu)), col = nd_cols[i], lwd = 1, lty = 'dotted') } for(i in 1:length(mid_rates)) { lines(mid_rates[[i]]$times + 66.0, rev(colMeans(mid_rates[[i]]$lambda - mid_rates[[i]]$mu)), col = nd_cols[i], lwd = 1, lty = 'dashed') } for(i in 1:length(high_rates)) { lines(high_rates[[i]]$times + 66.0, rev(colMeans(high_rates[[i]]$lambda - high_rates[[i]]$mu)), col = nd_cols[i], lwd = 1, lty = 'solid') } legend(x = 140, y = 0.08, legend = c(expression(paste("Time-constant within-regime ", lambda)), expression(paste("Time-varying within-regime ", lambda))), fill = c(nd_cols[1:2]), bty = "n", cex = 1.4, y.intersp = 1) mtext(paste0("c)"), side = 3, adj = 0.05, line = 1.5, cex = 2) par(mar = c(0, 0, 2, 0)) plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n') # Credit: Jota, http://stackoverflow.com/a/38234618 leg <- legend("top", legend = c("0.1 expected shifts", "1 expected shift", "10 expected shifts"), lwd = 3, lty = c("dotted", "dashed", "solid"), xpd = T, ncol = 3, seg.len = 4.4, cex = 1.5, bty = 'n', plot = F) x1 <- leg$rect$left - 0.04*leg$rect$w x2 <- leg$rect$left + 1.08*leg$rect$w y1 <- leg$rect$top y2 <- leg$rect$top - leg$rect$h legend(x = c(x1, x2), y = c(y1, y2), legend = c("0.1 expected shifts", "1 expected shift", "10 expected shifts"), lwd = 3, lty = c("dotted", "dashed", "solid"), xpd = T, ncol = 3, seg.len = 4.4, cex = 1.5, bty = 'n') } svg("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/Figure_3.svg", width = 8, height = 12) figure3_plotter() dev.off() pdf("/Users/David/Grive/Ornithischia_macroevolution/UPDATED/BAMM/Figure_3.pdf", width = 8, height = 12) figure3_plotter() dev.off() ```