This code shows how the habitat coding was derived. Median environmental variables per species were rotated with a PCA and the euclidean distances between species based on the first two components were used to perfom a UPGMA to derive 4 habitat preference groups.
packages
library(ape)
library(geiger)
library(vegan)
library(phytools)
data
tree<-read.nexus("~/r_scripts/data_files/beast_MCC.tre")
my.data<-read.csv("~/r_scripts/data_files/env_data.csv", header=T, row.names = 1)
match tree and data
tree<-drop.tip(tree, name.check(tree, my.data)$tree_not_data)
my.data<-my.data[tree$tip.label,]
all(rownames(my.data)==tree$tip)
## [1] TRUE
name.check(tree, my.data)
## [1] "OK"
plot(tree, cex=0.5)
create empty data frame and select the environmental variables of interest
hab.code<-data.frame(row.names=tree$tip.label)
par.of.intr<-c("forest","slope","Q","twi","bio8","bio16")
hab.pca<-prcomp(my.data[,par.of.intr], scale.=T, center = T)
biplot(hab.pca, pch=20, cex=0.25)
hab.pca
## Standard deviations:
## [1] 1.8404756 1.2085375 0.8274532 0.5801943 0.2706743 0.2398288
##
## Rotation:
## PC1 PC2 PC3 PC4 PC5
## forest 0.4016241 -0.1590306 0.6627476 -0.57977879 0.19501084
## slope 0.4415170 0.3223227 0.2207437 0.63675609 0.49652190
## Q 0.4375679 -0.3543055 -0.4505809 -0.06328954 0.15117422
## twi -0.4948772 -0.2757654 -0.1019860 -0.11286407 0.80559222
## bio8 -0.2231148 -0.6394740 0.4667750 0.48502596 -0.20690625
## bio16 0.3970032 -0.5102072 -0.2841384 0.08002616 -0.02817987
## PC6
## forest -0.0005402017
## slope 0.0212308322
## Q -0.6731415992
## twi 0.0832568594
## bio8 -0.2129635905
## bio16 0.7029538761
summary(hab.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.8405 1.2085 0.8275 0.5802 0.27067 0.23983
## Proportion of Variance 0.5646 0.2434 0.1141 0.0561 0.01221 0.00959
## Cumulative Proportion 0.5646 0.8080 0.9221 0.9782 0.99041 1.00000
perform UPGMA to derive at clustering based on the euclidean distances of the frist two principal components
hab.pca.dist<-dist(hab.pca$x[,1:2])
hab.pca.upgma<-hclust(hab.pca.dist, method="average")
par(mar=c(4,4,4,0))
plot(hab.pca.upgma, hang=-1, main="UPGMA",cex=0.5,axes=F, ylab="", xlab="")
med.upgma.clusters<-cutree(hab.pca.upgma,k = 4) # make 4 groupings
head(med.upgma.clusters)
## Altiphrynoides_malcolmi Altiphrynoides_osgoodi Capensibufo_rosei
## 1 2 1
## Capensibufo_tradouwi Churamiti_maridadi Didynamipus_sjostedti
## 1 2 3
match order with the tip names in the tree and add the PCA components + upgma groups to the new dataframe
med.upgma.clusters<-med.upgma.clusters[tree$tip]
name.check(tree, med.upgma.clusters)
## [1] "OK"
hab.code$upgma_clusters<-med.upgma.clusters
hab.code<-cbind(hab.code, hab.pca$x)
this dataset now has the principal component scores and the UPGMA groups that were used for further analyses (e.g. the Ancestral State Reconstructions for habitat preferences)
Plot the PCA as before, but this time using a colourscheme that reflects reproductive mode. We will also overlay the UPGMA dendrogram and polygons to delimit the clusters.
convert the dendrogram into a phylo object and make a colour scheme vector
#make dendrogram
upgma.tree<-as.phylo(hab.pca.upgma)
upgma.tree$edge.length[upgma.tree$edge.length==0]<-0.00001 # eliminate any 0-length branches (creates problems for plotting with the phylomorphospace() function)
#make colour scheme
rep_cols<-c("deepskyblue3","orange","#B404AE","gold","chartreuse3"); names(rep_cols)<-c("generalized","ephem","terr_eggs","steep_terr","viviparity")
par(mfrow=c(1,2))
#empty plot:
par(mar=c(4,4,1,0))
plot(NA, xlim=c(-4,4), ylim=c(-4,4), bty="n",xlab="PC1", ylab="PC2", las=1)
abline(h=0,v=0, col="black", lty=3, lwd=0.5)
#add upgma grouping polygons
ordihull(ord=hab.code[,c("PC1", "PC2")], groups=hab.code$upgma_clusters, draw="polygon", col="grey", border=NA, label=F)
#add upgma dendrogram
phylomorphospace(tree=upgma.tree, X=hab.code[,c("PC1","PC2")], label="OFF", xlim=c(-4,4), ylim=c(-4,4), add=T)
#add colour points to reflect reproductive modes and a legend for them
points(hab.code[,c("PC1","PC2")],pch=21, bg=rep_cols[as.character(my.data$rep_mode)], cex=1.5, col="white")
legend("bottomright", cex=0.5,pt.cex=1, pch=20, col=rep_cols[as.character(unique(my.data$rep_mode))], legend=unique(my.data$rep_mode), bty="n", title="Rep. mode")
#plot phylogeny
par(mar=c(1,1,1,1))
plot(tree, label.offset = 1, cex=0.5, no.margin=T)
tiplabels(pch=21, cex=1,col="white", bg=rep_cols[as.character(my.data$rep_mode)], adj=1)