This script details how phylogenetic signal was estimated for discrete characters and how ancestral states were reconstructed using stochastic character mapping
library(ape)
library(geiger)
library(phytools)
tree<-read.nexus("~/r_scripts/data_files/beast_MCC.tre")
my.data<-read.csv("~/r_scripts/data_files/my_data.csv", header=T, row.names = 1)
tree<-drop.tip(tree, name.check(tree, my.data)$tree_not_data)
my.data<-my.data[tree$tip.label,]
all(rownames(my.data)==tree$tip)
## [1] TRUE
name.check(tree, my.data)
## [1] "OK"
plot(tree, cex=0.5)
rep.modes<-my.data$rep_mode; names(rep.modes)<-rownames(my.data)
rep.lamb<-fitDiscrete(tree, rep.modes, transform="lambda")
rep.lamb0<-fitDiscrete(tree,rep.modes, transform="white")
rep.lamb0$opt$aicc
## [1] 178.4174
rep.lamb$opt$aicc
## [1] 97.42607
(dAICc<-rep.lamb0$opt$aicc-rep.lamb$opt$aicc)
## [1] 80.99133
Ancestral state reconstruction was perfomed using Stochastic Character mapping. Due to the paraphyletic nature of African Bufonidae, this was done only on Clade A (see text for details).
reimport data and prune to include just Clade A. We will also create colour vectors for reproductive mode and habitat cluster (from the UPGMA)
my.data<-read.csv("~/r_scripts/data_files/my_data.csv", header=T, row.names = 1)
tree<-read.nexus("~/r_scripts/data_files/beast_MCC.tre")
tree<-extract.clade(tree, 131) #extract Clade A
my.data<-my.data[tree$tip,]
# colour vectors:
hab_cols<-c("#a595ed","#c4c0d3","#F56476","#BE3E82"); names(hab_cols)<-c(1,2,3,4)
rep_cols<-c("deepskyblue3","orange","#B404AE","gold","chartreuse3"); names(rep_cols)<-c("generalized","ephem","terr_eggs","steep_terr","viviparity")
plot(tree, cex=0.5, label.offset = 2)
tiplabels(pch=20, col=hab_cols[my.data$upgma_clusters], adj=1)
tiplabels(pch=20, col=rep_cols[as.character(my.data$rep_mode)], adj=2)
make a probability matrix of tip states
hab.mode.prob<-data.frame(model.matrix(~as.character(my.data$upgma_clusters)-1))
colnames(hab.mode.prob)<-c("1","2","3","4")
rownames(hab.mode.prob)<-tree$tip
hab.mode.prob<-hab.mode.prob[tree$tip,]
hab.mode.prob<-as.matrix(hab.mode.prob)
name.check(tree, hab.mode.prob)
## [1] "OK"
head(hab.mode.prob)
## 1 2 3 4
## Poyntonophrynus_kavangensis 0 0 1 0
## Mertensophryne_uzunguensis 0 1 0 0
## Nectophrynoides_wendyae 0 1 0 0
## Poyntonophrynus_damaranus 0 0 1 0
## Poyntonophrynus_dombensis 0 0 1 0
## Mertensophryne_lindneri 0 0 1 0
simulate stochastic character maps
sim.hab.trees <- make.simmap(tree, hab.mode.prob, model = "ER", nsim = 20, Q="mcmc", pi="estimated") #NOTE! the number of simulations has been reduced to 20 here for illustration purposes. Run code with nsim=999.
## Running MCMC burn-in. Please wait....
## Running 2000 generations of MCMC, sampling every 100 generations.
## Please wait....
##
## make.simmap is simulating with a sample of Q from
## the posterior distribution
##
## Mean Q from the posterior is
## Q =
## 1 2 3 4
## 1 -0.05831809 0.01943936 0.01943936 0.01943936
## 2 0.01943936 -0.05831809 0.01943936 0.01943936
## 3 0.01943936 0.01943936 -0.05831809 0.01943936
## 4 0.01943936 0.01943936 0.01943936 -0.05831809
## and (mean) root node prior probabilities
## pi =
## 1 2 3 4
## 0.25 0.25 0.25 0.25
hab_simmap <- describe.simmap(sim.hab.trees, plot=F)
plot summarized node states as pie charts on tree
par(mar=c(4,4,4,4))
plot(tree, show.tip.label=T, label.offset=1, cex=0.4, main="SCM of Habitat Preferences")
tiplabels(pch=20, col=hab_cols[my.data$upgma_clusters],cex=1, adj=1)
nodelabels(pie=hab_simmap$ace, piecol= hab_cols[colnames(hab_simmap$tips)], cex = 0.5)
legend("bottomleft", bty="n", pch=19, col=hab_cols, legend=names(hab_cols), title="Hab. pref.")
same code as above, but for reproductive modes:
make a probability matrix of tip states
rep.mode.prob<-data.frame(model.matrix(~my.data$rep_mode-1))
colnames(rep.mode.prob)<-levels(my.data$rep_mode)
rownames(rep.mode.prob)<-tree$tip
rep.mode.prob<-rep.mode.prob[tree$tip,]
rep.mode.prob<-as.matrix(rep.mode.prob)
name.check(tree, rep.mode.prob)
## [1] "OK"
head(rep.mode.prob)
## ephem generalized steep_terr terr_eggs
## Poyntonophrynus_kavangensis 0 1 0 0
## Mertensophryne_uzunguensis 0 1 0 0
## Nectophrynoides_wendyae 0 0 0 0
## Poyntonophrynus_damaranus 0 1 0 0
## Poyntonophrynus_dombensis 0 1 0 0
## Mertensophryne_lindneri 0 1 0 0
## viviparity
## Poyntonophrynus_kavangensis 0
## Mertensophryne_uzunguensis 0
## Nectophrynoides_wendyae 1
## Poyntonophrynus_damaranus 0
## Poyntonophrynus_dombensis 0
## Mertensophryne_lindneri 0
## NOTE: for species where reproductive mode is unknown/uncertain, we can include a degree of uncertainty in the tip state coding. For example, Churamiti maridadi is only inferred to be a generalized tadpole breeder, but in reality, we do not know. We could therefore assign euqal proabilities for each state like so:
#rep.mode.prob["Churamiti_maridadi",]<-c(0.2,0.2,0.2,0.2,0.2)
simulate stochastic character maps
sim.rep.trees <- make.simmap(tree, rep.mode.prob, model = "ER", nsim = 20, Q="mcmc", pi="estimated") #NOTE! the number of simulations has been reduced to 20 here for illustration purposes. Run code with nsim=999.
## Running MCMC burn-in. Please wait....
## Running 2000 generations of MCMC, sampling every 100 generations.
## Please wait....
##
## make.simmap is simulating with a sample of Q from
## the posterior distribution
##
## Mean Q from the posterior is
## Q =
## ephem generalized steep_terr terr_eggs
## ephem -0.012680834 0.003170209 0.003170209 0.003170209
## generalized 0.003170209 -0.012680834 0.003170209 0.003170209
## steep_terr 0.003170209 0.003170209 -0.012680834 0.003170209
## terr_eggs 0.003170209 0.003170209 0.003170209 -0.012680834
## viviparity 0.003170209 0.003170209 0.003170209 0.003170209
## viviparity
## ephem 0.003170209
## generalized 0.003170209
## steep_terr 0.003170209
## terr_eggs 0.003170209
## viviparity -0.012680834
## and (mean) root node prior probabilities
## pi =
## ephem generalized steep_terr terr_eggs viviparity
## 0.2 0.2 0.2 0.2 0.2
rep_simmap <- describe.simmap(sim.rep.trees, plot=F)
plot summarized node states as pie charts on tree
par(mar=c(4,4,4,4))
plot(tree, show.tip.label=T, label.offset=1, cex=0.4, main="SCM of Reproductive Mode")
tiplabels(pch=20, col=rep_cols[as.character(my.data$rep_mode)],cex=2, adj=1)
nodelabels(pie=rep_simmap$ace, piecol= rep_cols[colnames(rep_simmap$tips)], cex = 0.5)
legend("bottomleft", bty="n", pch=19, col=rep_cols, legend=names(rep_cols), title="Rep. modes")
the following code plots the SCM summaries as above, but colours any downstream branches (towards the tips) the most probably state of that lineage.
par(mfrow=c(1,2))
hab.tree<-tree
for(i in 1:nrow(hab_simmap$ace)){
hab.tree<-paintSubTree(hab.tree,node=as.numeric(rownames(hab_simmap$ace)[i]),state=names(which.max(hab_simmap$ace[i,])))
}
plotSimmap(hab.tree,hab_cols,lwd=3, fsize=0.5, offset=1)
tiplabels(pch=20, col=hab_cols[my.data$upgma_clusters],cex=2,adj = 2)
nodelabels(pie=hab_simmap$ace,piecol= hab_cols[colnames(hab_simmap$tips)], cex = 1)
legend("bottomleft", bty="n", pch=19, col=hab_cols, legend=names(hab_cols), title="Hab. pref.", pt.cex=2, cex=0.75)
rep.tree<-tree
for(i in 1:nrow(rep_simmap$ace)){
rep.tree<-paintSubTree(rep.tree,node=as.numeric(rownames(rep_simmap$ace)[i]),state=names(which.max(rep_simmap$ace[i,])))
}
plotSimmap(rep.tree,rep_cols,lwd=3, fsize = 0.5, offset=1) ### note, for some reason leftwards causes problems with plotting pies on nodes...
tiplabels(pch=20, col=rep_cols[as.character(my.data$rep_mode)],cex=2, adj=2)
nodelabels(pie=rep_simmap$ace,piecol= rep_cols[colnames(rep_simmap$tips)], cex = 1)
legend("bottomleft", bty="n", pch=20, col=rep_cols, legend=names(rep_cols), title="Rep modes", pt.cex = 2, cex=0.75)