This code shows how the habitat coding was derived. Median environmental variables per species were rotated with a PCA and the euclidean distances between species based on the first two components were used to perfom a UPGMA to derive 4 habitat preference groups.

Import packages and data

packages

library(ape)
library(geiger)
library(vegan)
library(phytools)

data

tree<-read.nexus("~/r_scripts/data_files/beast_MCC.tre")
my.data<-read.csv("~/r_scripts/data_files/env_data.csv", header=T, row.names = 1)

Data prep

match tree and data

tree<-drop.tip(tree, name.check(tree, my.data)$tree_not_data)
my.data<-my.data[tree$tip.label,]
all(rownames(my.data)==tree$tip)
## [1] TRUE
name.check(tree, my.data)
## [1] "OK"
plot(tree, cex=0.5)

create empty data frame and select the environmental variables of interest

hab.code<-data.frame(row.names=tree$tip.label)
par.of.intr<-c("forest","slope","Q","twi","bio8","bio16")

PCA

hab.pca<-prcomp(my.data[,par.of.intr], scale.=T, center = T)
biplot(hab.pca, pch=20, cex=0.25)

hab.pca
## Standard deviations:
## [1] 1.8404756 1.2085375 0.8274532 0.5801943 0.2706743 0.2398288
## 
## Rotation:
##               PC1        PC2        PC3         PC4         PC5
## forest  0.4016241 -0.1590306  0.6627476 -0.57977879  0.19501084
## slope   0.4415170  0.3223227  0.2207437  0.63675609  0.49652190
## Q       0.4375679 -0.3543055 -0.4505809 -0.06328954  0.15117422
## twi    -0.4948772 -0.2757654 -0.1019860 -0.11286407  0.80559222
## bio8   -0.2231148 -0.6394740  0.4667750  0.48502596 -0.20690625
## bio16   0.3970032 -0.5102072 -0.2841384  0.08002616 -0.02817987
##                  PC6
## forest -0.0005402017
## slope   0.0212308322
## Q      -0.6731415992
## twi     0.0832568594
## bio8   -0.2129635905
## bio16   0.7029538761
summary(hab.pca)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6
## Standard deviation     1.8405 1.2085 0.8275 0.5802 0.27067 0.23983
## Proportion of Variance 0.5646 0.2434 0.1141 0.0561 0.01221 0.00959
## Cumulative Proportion  0.5646 0.8080 0.9221 0.9782 0.99041 1.00000

UPGMA

perform UPGMA to derive at clustering based on the euclidean distances of the frist two principal components

hab.pca.dist<-dist(hab.pca$x[,1:2])
hab.pca.upgma<-hclust(hab.pca.dist, method="average")
par(mar=c(4,4,4,0))
plot(hab.pca.upgma, hang=-1, main="UPGMA",cex=0.5,axes=F, ylab="", xlab="")

med.upgma.clusters<-cutree(hab.pca.upgma,k = 4) # make 4 groupings
head(med.upgma.clusters)
## Altiphrynoides_malcolmi  Altiphrynoides_osgoodi       Capensibufo_rosei 
##                       1                       2                       1 
##    Capensibufo_tradouwi      Churamiti_maridadi   Didynamipus_sjostedti 
##                       1                       2                       3

match order with the tip names in the tree and add the PCA components + upgma groups to the new dataframe

med.upgma.clusters<-med.upgma.clusters[tree$tip]
name.check(tree, med.upgma.clusters)
## [1] "OK"
hab.code$upgma_clusters<-med.upgma.clusters
hab.code<-cbind(hab.code, hab.pca$x)

this dataset now has the principal component scores and the UPGMA groups that were used for further analyses (e.g. the Ancestral State Reconstructions for habitat preferences)

Plots

Plot the PCA as before, but this time using a colourscheme that reflects reproductive mode. We will also overlay the UPGMA dendrogram and polygons to delimit the clusters.

convert the dendrogram into a phylo object and make a colour scheme vector

#make dendrogram
upgma.tree<-as.phylo(hab.pca.upgma)
upgma.tree$edge.length[upgma.tree$edge.length==0]<-0.00001 # eliminate any 0-length branches (creates problems for plotting with the phylomorphospace() function)
#make colour scheme
rep_cols<-c("deepskyblue3","orange","#B404AE","gold","chartreuse3"); names(rep_cols)<-c("generalized","ephem","terr_eggs","steep_terr","viviparity")
  1. make empty plot with polygons for habitat clusters
  2. add the UPGMA dendrogram
  3. add coloured points to indicate the reproductive modes
  4. plot phylogney with the same scheme for tip labels next to it
par(mfrow=c(1,2))
#empty plot:
par(mar=c(4,4,1,0))
plot(NA, xlim=c(-4,4), ylim=c(-4,4), bty="n",xlab="PC1", ylab="PC2", las=1)
abline(h=0,v=0, col="black", lty=3, lwd=0.5)
#add upgma grouping polygons
ordihull(ord=hab.code[,c("PC1", "PC2")], groups=hab.code$upgma_clusters, draw="polygon", col="grey", border=NA, label=F)
#add upgma dendrogram
phylomorphospace(tree=upgma.tree, X=hab.code[,c("PC1","PC2")], label="OFF", xlim=c(-4,4), ylim=c(-4,4), add=T)
#add colour points to reflect reproductive modes and a legend for them
points(hab.code[,c("PC1","PC2")],pch=21, bg=rep_cols[as.character(my.data$rep_mode)], cex=1.5, col="white")
legend("bottomright", cex=0.5,pt.cex=1, pch=20, col=rep_cols[as.character(unique(my.data$rep_mode))], legend=unique(my.data$rep_mode), bty="n", title="Rep. mode")
#plot phylogeny
par(mar=c(1,1,1,1))
plot(tree, label.offset = 1, cex=0.5, no.margin=T)
tiplabels(pch=21, cex=1,col="white", bg=rep_cols[as.character(my.data$rep_mode)], adj=1)