#################################################################################################### ########################## By Renske Onstein ####################################################### # When using this script, please CITE Onstein et al. "Frugivory-related traits promote speciation of tropical palms" Nature Ecology and Evolution # # R script to perform global MuSSE multitrait analysis for palms with small (< 4 cm) fruits and an understory growth form or oceanic island distribution # rm(list=ls()) # load R packages library(ape) library(geiger) library(diversitree) # import palm traits and phylogeny # please note: the relevant columns (e.g. fruits < 4 cm and understory) in the trait csv file should be selected and saved as a txt file, headers should be one capital letter (e.g. Species, F, U) # set working directory where the data is setwd("WD") MuSSE_traits<-read.table("revFruits_VolcanicIslands.txt", header=T, row.names=1) MuSSE_traits MuSSE_traits.v<-MuSSE_traits[,1] names(MuSSE_traits.v)<-row.names(MuSSE_traits) tree2<-read.nexus("TREE") # drop.tip species in tree missing data on traits x<-name.check(tree2, MuSSE_traits) tree<-drop.tip(tree2,c(x$tree_not_data)) ###################################################################################### ########################## Model selection ########################################### # the full model lik<-make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,allow.multistep=TRUE) lik p<-starting.point.musse.multitrait(tree, lik) fit<-find.mle(lik,p,control=list(maxit=100000)) # including allow.multistep? Should transition rates be included that imply simultaneous changes in more than one trait? likm<-make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7) likm p<-starting.point.musse.multitrait(tree, likm) fitm<-find.mle(likm,p,control=list(maxit=100000)) # anova will perform a likelihood ratio test to compare nested models, and to evaluate whether a model including more parameters fits significantly better than the simple model (indicated with p < 0.05) anova(fit, fitm) ###################################################################################### # we will now fit a simpler model first, and add parameters to check the # improvement of model fit. # this model allows the forwards and backwards transition rates to vary, # but the speciation and extinction rates do not depend on the character state: lik0 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=0) argnames(lik0) p <- starting.point.musse.multitrait(tree, lik0) fit0 <- find.mle(lik0, p) # now we will allow the speciation rates to vary additively with both # character states (extinction and character changes are left as in the # previous, simple model): lik1<-make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7, depth=c(1,0,0)) p <- starting.point.musse.multitrait(tree, lik1) fit1 <- find.mle(lik1 p, control=list(maxit=100000)) # --> does a model including trait affected speciation fit better than a model without? anova(fit1, fit0) # now we will allow the extinction rates to vary additively with both # character states (speciation and character changes are left as in the # simple model): lik2 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7, depth=c(0, 1, 0)) p <- starting.point.musse.multitrait(tree, lik2) fit2 <- find.mle(lik2, p, control=list(maxit=100000)) # --> does a model including trait affected extinction fit better than model without? anova(fit0, fit2) # now we will allow the transition rates to vary additively with both # character states (speciation and extinction are left as in the # simple model): lik3 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7, depth=c(0, 0, 1)) p <- starting.point.musse.multitrait(tree, lik3) fit3 <- find.mle(lik3, p, control=list(maxit=100000)) # --> does a model including trait affected transition fit better than model without? anova(fit0, fit3) # now we will allow the speciation and the extinction rates to vary additively with both # character states (character changes are left as in the simple model): lik4 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7, depth=c(1, 1, 0)) p <- starting.point.musse.multitrait(tree, lik4) fit4 <- find.mle(lik4, p) #--> does a model including trait affected speciation and extinction fit better than model without? anova(fit1, fit4) anova(fit2, fit4) anova(fit0, fit4) # now we will allow the speciation, extinction and transition rates to vary additively with both # character states: lik5 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7, depth=c(1, 1, 1)) p <- starting.point.musse.multitrait(tree, lik5) fit5 <- find.mle(lik5, p) #--> does a model including trait affected speciation, extinction and transition rates fit better than model without? anova(fit4, fit5) anova(fit3, fit5) ###################################################################################### # now, we will test the support for including an interaction term in the model. # Please note: In case in the previous step the models did not perform # significantly better than the simpler model, one should proceed the comparison with this simpler model below. # Models should therefore be adapted depending on the specific case (data) # we can fit an interaction for speciation rates only: lik6 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=c(2, 1, 1)) p <- starting.point.musse.multitrait(tree, lik6) fit6 <- find.mle(lik6, p) # is there support for the interaction term for speciation rates? anova(fit5, fit6) # we can fit an interaction for extinction rates only: lik7 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=c(1, 2, 1)) p <- starting.point.musse.multitrait(tree, lik7) fit7 <- find.mle(lik7, p) # is there support for the interaction term for extinction rates? anova(fit5, fit7) # we can fit an interaction for both speciation and extinction rates: lik8 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=c(2, 2, 1)) p <- starting.point.musse.multitrait(tree, lik8) fit8 <- find.mle(lik8, p, control=list(maxit=100000)) # is there support for an interaction term for both speciation and extinction rates? anova(fit6, fit8) anova(fit7, fit8) # including allow.multistep? Should transition rates be included that imply simultaneous changes in more than one trait? lik9 <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=c(2, 2, 1), allow.multistep=TRUE) p <- starting.point.musse.multitrait(tree, lik9) fit9 <- find.mle(lik9, p, control=list(maxit=100000)) # is there support for allow.multistep? anova(fit8, fit9) # best model parameters round(coef(fit9), 3) # save pipeline and models tested save.image("NAME.RData") ###################################################################################### ########################## Bayesian run on MCC palm phylogenetic tree ################ # this is based on the best fitting model given the fewest number of parameters. As an # example we take a model that had a sigificant interaction term for speciation rates, # additive effects on extinction rates, and not support for transition rates to vary additively (2, 1, 0) # get ready lik <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=c(2, 1, 0)) p <- starting.point.musse.multitrait(tree, lik) fit <- find.mle(lik, p) prior <- make.prior.exponential(2*(log(1774)/100)) p <- starting.point.musse.multitrait(tree, lik) # optimize w mcmc.musse.multitrait<-mcmc(lik,p,nsteps=100,prior=prior,w=0.1) mcmc.musse.multitrait w=diff(sapply(mcmc.musse.multitrait[2:12],quantile,c(0.05,0.95))) # long run mcmc.musse.multitrait2<-mcmc(lik,p,nsteps=5000,w=w,prior=prior) # now we can examine the 95% credible intervals of the posterior samples for each parameter. If the intervals do not overlap, # then we have posterior Bayesian support for a difference in rates. sapply(mcmc.musse.multitrait2[,2:12],quantile,c(0.025,0.975)) # to plot this col <- c("#004165", "#eaab00", "brown2", "darkgreen", "darkorchid", "darkred", "yellow", "black", "green", "blue", "red", "purple", "pink") profiles.plot(mcmc.musse.multitrait2[c("lambdaS", "lambdaM", "lambda0")], col.line=col, las=1,xlab="Speciation rate", legend="topright") profiles.plot(mcmc.musse.multitrait2[c("muS", "muM", "mu0")], col.line=col, las=1,xlab="Extinction rate", legend="topright") profiles.plot(mcmc.musse2[c("q12", "q13", "q14", "q21", "q23", "q24", "q34", "q43", "q42", "q41", "q32", "q31", "q21")], col.line=col, las=1,xlab="Transition rate", legend="topright") # save output to file write.csv(mcmc.musse.multitrait2, file="NAME.csv") ###################################################################################### ########################## Loop on 100 palm phylogenetic trees ####################### # import file with 100 trees trees<-read.nexus("TREES.nex") # this is a posterior distribution of trees from the BEAST analysis # drop.tip of species in tree missing data on fruit length MuSSE_traits MuSSE_traits.v<-MuSSE_traits[,1] names(MuSSE_traits.v)<-row.names(MuSSE_traits) x<-name.check(trees[[1]],MuSSE_traits) allreps <- list() for(i in 1:100) { thisrep <- drop.tip(trees[[i]],x$tree_not_data) allreps[[i]] <- thisrep } trees2<-allreps ###################################################################################### # loop on 100 trees lik <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=c(2, 1, 0)) p <- starting.point.musse.multitrait(tree, lik) fit <- find.mle(lik, p) # get ready func <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=c(2, 1, 0)) start <- starting.point.musse.multitrait(tree, func) prior <- make.prior.exponential(2*(log(1774)/100)) # optimize w tmp4w <- mcmc(func, start[argnames(func)], prior=prior, nsteps=100, w=0.1) w <- diff(sapply(tmp4w[2:12], quantile, c(.05, .95))); w # on 100 trees MuSSE_traits.mcmc.musse <- list() for(i in seq(from=1, to=100, by=1)){ print(i) tree <- trees2[[i]] func <- make.musse.multitrait(tree,MuSSE_traits,sampling.f=0.7,depth=c(2, 1, 0)) prior <- make.prior.exponential(2*(log(1774)/100)) start <- starting.point.musse.multitrait(tree, func) # unconstraint thisRep <- mcmc(func, start[argnames(func)], prior=prior, nsteps=100, w=w) #220 so 10%=200 MuSSE_traits.mcmc.musse[[i]] <- thisRep save.image("NAME.RData") } ###################################################################################### # results unconstr <- list() for (i in seq(1,100,1)){ unconstr[[i]] <- MuSSE_traits.mcmc.musse[[i]] } summ_uc <- matrix(ncol=13, nrow=length(unconstr)*length(10:100)) colnames(summ_uc) <- names(unconstr[[1]]) for (par in 1:13){ allRep <- NULL for (rep in 1:length(unconstr)){ thisRep <- unconstr[[rep]][par][10:100,] allRep <- append(allRep, thisRep) } summ_uc[,par] <- allRep }; rm("par", "rep", "thisRep", "allRep") summ_uc <- data.frame(summ_uc) # save output to file write.csv(summ_uc, file="NAME.csv") ###################################################################################### # import results summ_uc<-read.csv("NAME.csv") summ_uc$lambda_additive_fruits<-summ_uc$lambda0+summ_uc$lambdaF summ_uc$lambda_additive_understory<-summ_uc$lambda0+summ_uc$lambdaU summ_uc$lambda_interaction<-summ_uc$lambda0+summ_uc$lambdaF+summ_uc$lambdaU+summ_uc$lambdaFU # to plot the speciation rate col <- c("grey", "#004165", "#eaab00", "red") profiles.plot(summ_uc[c("lambda0", "lambda_additive_fruits", "lambda_additive_understory", "lambda_interaction")], col.line=col, las=1,xlab="Speciation rate", legend="topright")