---
title: "cassowary analysis"
output: html_document
editor_options:
chunk_output_type: console
---
## Setup
```{r}
# Load packages
library(pavo)
library(ggrepel)
library(stringr)
library(dplyr)
library(ape)
library(geiger)
library(viridis)
library(visreg)
library(MCMCglmm)
library(treeplyr)
library(readxl)
library(ellipse)
library(ggplot2)
library(tidyr)
library(RColorBrewer)
library(bayou)
library(doParallel)
library(treeplyr)
library(phytools)
library(phylolm)
library(moments)
# Source R code
source("mycontmap.R")
source("gmaxtest.R")
source("shiftSums.R")
source("utility_functions.R")
.tipregime <- bayou:::.tipregime
# Load tree and data
tree <- read.tree("palaeognaths.tre")
morphdat <- read_excel("Dryad Dataset S1.xlsx", sheet=1)
# Rename some variables to work with R code below
names(morphdat) <- plyr::revalue(names(morphdat), c('Aspect.ratio'='ratio', 'Vane.width.mm'='vane_width', 'Rachis.width.mm'='rachis_width', 'Contrast.gloss'='gloss', 'Species'='spp', 'Diffuse.R'='diffuse', 'Specular.R'='specular', 'Density.um2'='density', 'Barbules'='barbules', 'Rel.rachis.width'='propwidth', 'Mass.g'='mass_g'))
```
## Bayou analysis (testing for exceptional glossiness)
### Rachis width
```{r}
td <- morphdat %>% group_by(spp) %>%
# filter(spp != "Taoniscus_nanus") %>%
summarize_if(is.numeric, mean, na.rm=T) %>%
treeplyr::make.treedata(tree=tree)
btree <- td$phy
bdat <- td[["rachis_width"]]
# phenogram(btree, bdat)
btree <- reorder(btree, "postorder")
# Only allow changes on non-terminal edges
bmax <- ifelse(btree$edge[,2] %in% 1:Ntip(btree), 0, 1)
# bmax <- rep(1, Nedge(btree))
plot(btree, edge.col=bmax+1)
# Make prior
priorOU <- make.prior(btree, dists=list(dalpha="dhalfcauchy", dsig2="dhalfcauchy", dk="cdpois", dtheta="dnorm"), param=list(dk=list(lambda=3, kmax=sum(bmax)/2), dsb=list(bmax=bmax, prob=1), dtheta=list(mean=mean(bdat), sd=1.5*sd(bdat))))
startpars <- priorSim(priorOU, btree, plot=TRUE)$pars[[1]]
# Make model
set.seed(1)
mcmcOU <- bayou.makeMCMC(btree, bdat, prior=priorOU, new.dir="output/runs", outname="modelOU_rachis", plot.freq=NULL)
# Run MCMC analysis (uncomment to run)
mcmcOU$run(1e6)
# Load
chainOU <- mcmcOU$load(saveRDS=TRUE)
# Evaluation:
plot(chainOU)
summary(chainOU)
# Analyze bayou output
pp <- 0.3
# Set the burnin proportion for plotting with coda to check the MCMC chains
chainOU <- set.burnin(chainOU, pp)
# Regime plots
plotSimmap.mcmc(chainOU, burnin=0.3, cex=0.5)
#Phenogram with branches with posterior probabilities > 0.3 colored
phenogram.density(btree, bdat, burnin=0.3, chainOU, pp.cutoff = pp)
# Plot parameter distributions for regimes
shiftsums <- shiftSummaries(chainOU, mcmcOU, pp.cutoff=pp)
# shift summaries with more detailed output (custom R script)
res <- shiftSums(chainOU, mcmcOU, pp.cutoff=pp)
# Make simmap tree
L <- Lposterior(chainOU, btree, burnin=attributes(chainOU)$burnin)
sb <- which(L[,1] > pp)
pars <- list(k=length(sb), ntheta=length(sb)+1, sb=unname(sb), t2=2:(length(sb)+1), loc=rep(0, length(sb)))
sm <- pars2simmap(pars = pars, tree = btree)
# New color palette
pal <- sm$col
pal <- setNames(c('gray', brewer.pal(length(pal), "Set1")[1:(length(pal)-1)]), names(pal))
# Get parameters
df <- as.data.frame(t(sapply(res$cladesummaries, function(x) {hpd(x$values[[1]])})))
df$regime <- as.character(1:nrow(df))
# ADD all edge probs for reviewer 2:
tmp <- shiftSummaries(chainOU, mcmcOU, pp.cutoff=0)
# reorder edges
id.postorder <- apply(sm$tree$edge, 1, paste0, collapse="-")
id.cladewise <- apply(reorder(sm$tree, "cladewise")$edge, 1, paste0, collapse="-")
el <- NA
el[as.numeric(rownames(tmp$regressions)[-1])] <- tmp$PP
el <- el[match(id.cladewise, id.postorder)]
# Make figure
pdf("bayou_rachis.pdf", width=5.5, height=4.5)
layout(cbind(c(1,3),c(2,2)), widths=c(.3, 1))
par(mar=c(1,3,1,0), mgp=c(1.5,.5,0), ps=8, mex=.75)
xx <- seq_along(df$regime)
plot(df$y, col=pal, pch=16, ylim=range(df[,1:3]), axes=F, ylab="Mean rachis diameter (um)", xlim = c(xx[1]-0.5, xx[length(xx)]+0.5))
segments(xx, df$ymin, xx, df$ymax, col=pal)
axis(side=2)
plotSimmap(sm$tree, pal)
edgelabels(round(el, 2), adj=c(0.3,-0.5), frame='none', cex=.6)
dev.off()
```
### Gloss: Are cassowaries exceptionally glossy?
```{r, eval=FALSE}
btree <- td$phy
bdat <- td[["gloss"]]
phenogram(btree, bdat)
btree <- reorder(btree, "postorder")
# Only allow changes on non-terminal edges
bmax <- ifelse(btree$edge[,2] %in% 1:Ntip(btree), 0, 1)
# bmax <- rep(1, Nedge(btree))
plot(btree, edge.col=bmax+1)
# Make prior
priorOU <- make.prior(btree, dists=list(dalpha="dhalfcauchy", dsig2="dhalfcauchy", dk="cdpois", dtheta="dnorm"), param=list(dk=list(lambda=3, kmax=sum(bmax)/2), dsb=list(bmax=bmax, prob=1), dtheta=list(mean=mean(bdat), sd=1.5*sd(bdat))))
startpars <- priorSim(priorOU, btree, plot=TRUE)$pars[[1]]
set.seed(1)
mcmcOU <- bayou.makeMCMC(btree, bdat, prior=priorOU, new.dir="output/runs", outname="modelOU_gloss", plot.freq=NULL)
# Run
# mcmcOU$run(1e6)
# save/Load
chainOU <- mcmcOU$load(saveRDS=TRUE)
plot(chainOU)
summary(chainOU)
# Set the burnin proportion for plotting with coda to check the MCMC chains
chainOU <- set.burnin(chainOU, 0.3)
# Regime plots
plotSimmap.mcmc(chainOU, burnin=0.3, cex=0.5)
# cutoff to use for shifts on branches
pp <- 0.5
#Phenogram with branches with posterior probabilities > 0.3 colored
phenogram.density(btree, bdat, burnin=0.3, chainOU, pp.cutoff=pp)
# Plot parameter distributions for regimes
shiftsums <- shiftSummaries(chainOU, mcmcOU, pp.cutoff=pp)
# shift summaries with more detailed output (custom R script)
res <- shiftSums(chainOU, mcmcOU, pp.cutoff=pp)
# Make simmap tree
L <- Lposterior(chainOU, btree, burnin = attributes(chainOU)$burnin)
sb <- which(L[,1] > pp)
pars <- list(k=length(sb), ntheta=length(sb)+1, sb=unname(sb), t2=2:(length(sb)+1), loc=rep(0, length(sb)))
sm <- pars2simmap(pars = pars, tree = btree)
# New color palette
pal <- sm$col
pal <- setNames(c('gray', brewer.pal(length(pal), "Set1")[1:(length(pal)-1)]), names(pal))
# Make data output
df <- as.data.frame(t(sapply(res$cladesummaries, function(x) {hpd(x$values[[1]])})))
df$regime <- as.character(1:nrow(df))
# All branch probs
tmp <- shiftSummaries(chainOU, mcmcOU, pp.cutoff=0)
el <- NA
el[as.numeric(rownames(tmp$regressions)[-1])] <- tmp$PP
id.postorder <- apply(sm$tree$edge, 1, paste0, collapse="-")
id.cladewise <- apply(reorder(sm$tree, "cladewise")$edge, 1, paste0, collapse="-")
el <- el[match(id.cladewise, id.postorder)]
# Generate figure for ms
pdf("figs/bayou_gloss.pdf", width=5.5, height=4.5)
layout(cbind(c(1,3),c(2,2)), widths=c(.3, 1))
par(mar=c(1,3,1,0), mgp=c(1.5,.5,0), ps=8, mex=.75)
xx <- seq_along(df$regime)
plot(df$y, col=pal, pch=16, ylim=range(df[,1:3]), axes=F, ylab="Mean contrast gloss", xlim = c(xx[1]-0.5, xx[length(xx)]+0.5))
segments(xx, df$ymin, xx, df$ymax, col=pal)
axis(side=2)
plotSimmap(sm$tree, pal)
# reorder edges
# add all edge probs
edgelabels(round(el, 2), adj=c(0.3,-0.5), frame='none', cex=.75)
dev.off()
```
## Comparative analysis of traits
```{r}
df2 <- morphdat %>% group_by(spp) %>%
select(gloss, specular, diffuse, ratio, barbules, rachis_width, vane_width, density, propwidth) %>%
summarize_all(funs(mean), na.rm=TRUE) %>%
as.data.frame
rownames(df2) <- df2$spp
td <- make.treedata(tree, as.matrix(df2[,-1]))
td2 <- filter(td, tip.label!="Eudromia_elegans")
pdf("figs/barbules.pdf", width=6, height=6)
par(ps=8, mar=c(0,0,0,0))
plot(td$phy)
trait <- ifelse(td[['barbules']]=="1", 1, 0)
anc <- ace(x=trait, phy=td$phy, type="discrete")
trait <- trait[td$phy$tip]
nodelabels(pie=anc$lik.anc, piecol = c('white','black'), cex=.5)
dev.off()
```
## Phylogenetic linear mixed models (PLMMs)
### Setup
```{r}
# Setup dataset
traits <- c('spp', 'specular', 'diffuse', 'gloss', 'ratio', 'density', 'barbules', 'propwidth', 'rachis_width', 'vane_width', 'mass_g', 'propwidth')
df2 <- morphdat %>% select(traits)
# rownames(df2) <- df2$spp
df2$clade <- factor(ifelse(grepl("Casuarius|Dromaius", df2$spp), "ce", "other"))
# Get Ainv like this:
Ainv_BM <- inverseA(tree, nodes="TIPS")$Ainv # no nodes, only tips
```
### Allometry data
```{r}
ngen <- 1e6
keep <- complete.cases(df2[c('rachis_width', 'vane_width', 'mass_g')])
sum(keep) # N = 27
dat <- df2[keep, ]
set.seed(1980)
allo.bm <- MCMCglmm(rachis_width ~ vane_width + log(mass_g),
data = dat,
family = 'gaussian',
random = ~spp,
ginverse = list(spp = Ainv_BM),
nitt=ngen, burnin=0.25*ngen)
set.seed(1980)
allo.wn <- MCMCglmm(rachis_width ~ vane_width + log(mass_g),
data = dat,
family = 'gaussian',
nitt=ngen, burnin=0.25*ngen)
aicw(c(BM=allo.bm$DIC, WN=allo.wn$DIC))
summary(allo.bm)
summary(allo.wn)
allo2.bm <- MCMCglmm(vane_width ~ rachis_width + log(mass_g),
data = dat,
family = 'gaussian',
random = ~spp,
ginverse = list(spp = Ainv_BM),
nitt=ngen, burnin=0.25*ngen)
allo2.wn <- MCMCglmm(vane_width ~ rachis_width + log(mass_g),
data = dat,
family = 'gaussian',
nitt=ngen, burnin=0.25*ngen)
aicw(c(BM=allo2.bm$DIC, WN=allo2.wn$DIC))
summary(allo2.bm)
summary(allo2.wn)
p1 <- ggplot(df2, aes(x=mass_g, y=rachis_width/1e3)) + geom_point() + scale_x_log10() + annotation_logticks(side='b') + stat_smooth(method="lm") + theme_minimal() + labs(x= "Body mass (g)", y = "Rachis diameter (mm)") + coord_cartesian(y=c(0, .3))
p2 <- ggplot(df2, aes(x=mass_g, y=vane_width/1e3)) + geom_point() + scale_x_log10() + annotation_logticks(side='b') + stat_smooth(method="lm") + theme_minimal() + labs(x= "Body mass (g)", y = "Vane width (mm)")
cowplot::plot_grid(p1, p2, labels=c('a', 'b'))
ggsave('figs/morphotraits.pdf', width=6.5, height=3.25)
```
### Gloss data
```{r}
# Specular - species with melanosome data
keep <- complete.cases(df2[c('gloss', 'barbules', 'rachis_width', 'ratio', 'density')])
sum(keep) # N = 30
ngen <- 1e6
set.seed(1980)
gloss.bm <- MCMCglmm(gloss ~ barbules + rachis_width + density + ratio,
data = df2[keep, ],
family = 'gaussian',
random = ~spp,
ginverse = list(spp = Ainv_BM),
nitt=ngen, burnin=0.25*ngen)
set.seed(1980)
gloss.wn <- MCMCglmm(gloss ~ barbules + rachis_width + density + ratio,
data = df2[keep, ],
family = 'gaussian',
random = ~spp,
nitt=ngen, burnin=0.25*ngen)
# Model weights
aicw(c(BM=gloss.bm$DIC, WN=gloss.wn$DIC))
summary(gloss.bm) # only rachis width is significant
summary(gloss.wn)
```
### Specular and diffuse reflectance data
```{r}
keep <- complete.cases(df2[c('specular', 'diffuse', 'barbules', 'ratio', 'density', 'propwidth')])
ngen <- 1e6
prior = list(R = list(V = diag(2), nu = 1.002), G = list(G1 = list(V = diag(2),nu = 1.002)))
sum(keep) # N = 31
dat <- df2[keep, ]
set.seet(1980)
spec.bm <- MCMCglmm(cbind(specular, diffuse) ~ trait - 1 + trait : (barbules + ratio + density + propwidth),
data = dat,
family = c('gaussian', 'gaussian'),
random = ~us(trait):spp,
ginverse = list(spp = Ainv_BM),
prior = prior,
rcov = ~us(trait):units,
nitt=ngen, burnin=0.25*ngen)
set.seed(1980)
spec.wn <- MCMCglmm(cbind(specular, diffuse) ~ trait - 1 + trait : (barbules + ratio + density + propwidth),
data = dat,
family = c('gaussian', 'gaussian'),
rcov = ~us(trait):units,
nitt = ngen, burnin = 0.25*ngen)
# Save
save(allo.bm, allo.wn, allo2.bm, allo2.wn, gloss.bm, gloss.wn, spec.bm, spec.wn, file="output/MCMC_fits.rda")
# Load
load(file="output/MCMC_fits.rda")
aicw(c(BM=spec.bm$DIC, WN=spec.wn$DIC))
aicw(c(BM=gloss.bm$DIC, WN=gloss.wn$DIC))
summary(spec.bm)
summary(gloss.bm)
slope <- mean(spec.bm$Sol[,'traitdiffuse:ratio'])
int <- mean(spec.bm$Sol[,'traitdiffuse'])
p1 <- ggplot(morphdat, aes(x=ratio, y=diffuse, fill=tolower(color))) +
geom_point(cex=2, pch=21) +
scale_fill_manual(values = colorpal) +
geom_abline(slope=slope, intercept=int, lty=2) +
theme_minimal() +
theme(legend.position="none") +
labs(x="Melanosome aspect ratio", y="Diffuse reflectance (%)")
morphdat$barbules <- factor(morphdat$barbules)
levels(morphdat$barbules) <- c('Absent', 'Present')
p2 <- ggplot(morphdat, aes(x=factor(barbules), y=specular)) + geom_boxplot() + theme_minimal() +labs(x="Distal barbules", y="Specular reflectance (%)")
cowplot::plot_grid(p1, p2, labels=c('a', 'b'))
ggsave("figs/pgls.pdf", width=6.5, height=3)
# Plumage is glossy because shape is so different compared to other palaeognaths (high AR, etc.), i.e. more rachis showing per unit area of plumage (I think so)
```
## Test if covariance structure differs between groups
```{r}
prior <- list(G = list(G1 = list(V = diag(2), nu = 0.002),
G2 = list(V = diag(2), nu = 0.002),
G3 = list(V = diag(2), nu = 0.002)),
R = list(V = diag(2), n = 0.002)) # relatively uninformative
# Uncomment to run-
set.seed(1980)
covar.bm <- MCMCglmm(log(cbind(diffuse, specular)) ~ trait - 1,
random = ~us(trait:at.level(clade,"ce")):units + us(trait:at.level(clade,"other")):units + us(trait):spp, # G in prior
rcov = ~us(trait):units, # R in prior
data = na.omit(df2),
nitt = 1e6,
burnin = 0.25 * 1e6,
thin = 1000,
prior = prior,
family = rep('gaussian', 2),
ginverse = list(spp=Ainv_BM))
plot(covar.bm)
summary(covar.bm)
# Make covariance matrices
X <- covar.bm$VCV[, grep("ce", colnames(covar.bm$VCV))]
mat1 <- lapply(1:nrow(X), function(x) { matrix(X[x, 1:4], ncol=2, nrow=2) })
Y <- covar.bm$VCV[, grep("other", colnames(covar.bm$VCV))]
mat2 <- lapply(1:nrow(Y), function(x) { matrix(Y[x, 1:4], ncol=2, nrow=2) })
# Subsample VCVs
set.seed(1980)
samp <- sample(1:length(mat1), 100, replace=FALSE)
# centers
scal <- c(attr(scale(df2$specular), "scaled:scale"), attr(scale(df2$diffuse), "scaled:scale"))
cent <- c(attr(scale(df2$specular), "scaled:center"), attr(scale(df2$diffuse), "scaled:center"))
# Plot
pdf("figs/specdiff_byclade.pdf", width=5, height=4)
par(mar=c(3,3,1,1), mgp=c(1.5,.5,0), ps=8, mex=.75)
plot(log(specular) ~ log(diffuse), df2, col=clade, xaxt='n', yaxt='n', ylab="Specular reflectance (%)", xlab="Diffuse reflectance (%)", type='n')
# axis(side=2, at=seq_range(scale(df2$diffuse), 5), labels = round(seq_range(scale(df2$diffuse), 5) * scal[2] + cent[2]))
axis(side=1, at=seq_range(log(df2$diffuse), 5), labels=round(seq_range(df2$diffuse, 5)))
axis(side=2, at=seq_range(log(df2$specular), 5), labels=round(seq_range(df2$specular, 5)))
# lines(ellipse(x=mat1[[1]]), type='l', col=alpha('red', .05), xlim=c(-10, 10), ylim=c(-10, 10), xlab="Specular", ylab="Diffuse")
cent <- c(mean(log(df2$diffuse[df2$clade=="ce"])), mean(log(df2$specular[df2$clade=="ce"])))
for (i in samp) {
lines(ellipse(x=mat1[[i]], centre=cent), col=alpha('red', .1))
}
cent <- c(mean(log(df2$diffuse[df2$clade=="other"])), mean(log(df2$specular[df2$clade=="other"])))
for (i in samp) {
lines(ellipse(x=mat2[[i]], centre=cent), col=alpha('black', .1))
}
points(log(specular) ~ log(diffuse), data =df2, col=c('red','black')[clade], pch=16)
legend("topright", legend=c('Cassowary + Emu', 'Background'), bty='n', col=c('red', 'black'), lty=1)
dev.off()
df2$iscasso <- ifelse(grepl("Cas", df2$spp), "casso", "not")
ggplot(df2, aes(x=diffuse, y=specular, color=clade)) + geom_point(cex=2, aes(shape=iscasso)) + scale_x_log10() + scale_y_log10() + stat_ellipse() + theme_minimal() + scale_color_manual(values=c('ce'='red', 'other'='black')) + scale_shape_manual(values=c('casso'=16, 'not'=1)) + theme(legend.position="none") +
labs(x="Diffuse reflectance (%)", y="Specular reflectance (%)")
ggsave("figs/specdiff_byclade.pdf", width=6.5, height=5)
# Test if significant divergence in major axes of traits:
gaxtest(x=mat1, y=mat2)
```
## Predict color in Lithornithidae
```{r}
source("runqda.R")
# mel <- read.csv("data/lithornithid melanosomes 12-8-15.csv")
mel <- read_excel("Dryad Dataset S1.xlsx", sheet=3)
# train1 = dataset from Hu et al. (2018) Nature Communications
# train2 = dataset from Norden et al. (2019) Evolution
# train1 <- read.csv("data/melanodata_hu.csv", row=1)
# train1$Length_CV <- as.numeric(as.character(train1$Length_CV))
# train1$Diameter_CV <- as.numeric(as.character(train1$Diameter_CV))
# train1$Color <- gsub("\\s+$", "", as.character(train1$Color))
# train2 <- read.csv("data/melanodata_norden.csv")
# train2 <- subset(train2, Data.source=="This study")
# names(train2) <- plyr::revalue(names(train2), c("Aspect.ratio"="Aspect", "Length.CV"="Length_CV", "Colour"="Color"))
# levels(train2$Color) <- c(levels(train2$Color), "platelet")
# train2$Color[train2$Flat=="flat"] <- "platelet"
keep <- c('Species','Color','Length','Length_CV','Diameter','Aspect')
train <- rbind(train1[keep], train2[keep])
train <- mutate(train, class = plyr::revalue(Color, c('peng' = 'black')))
# train <- mutate(train, class = plyr::revalue(Color, c('peng' = 'black', 'platelet'='irid')))
train <- subset(train, class %in% c('black', 'brown', 'grey', 'irid', 'platelet'))
table(train$class)
dim(train)
melsum <- mel %>% group_by(Specimen, Sample) %>% dplyr::select(leng=Length, diam=Diameter) %>%
summarise(Length_CV = 100 * sd(leng)/mean(leng)/sqrt(length(leng)),
Diameter_CV = 100 * sd(diam)/mean(diam)/sqrt(length(diam)),
Aspect = mean(leng/diam),
Aspect_skew = skewness(leng/diam),
Length = mean(leng),
Diameter = mean(diam))
# vars <- c('Length', 'Diameter', 'Aspect', 'Length_CV', 'Diameter_CV', 'Aspect_skew')
vars <- c('Length', 'Diameter', 'Aspect', 'Length_CV')
# qda1 <- runqda(train = train, test = test0, vars = c('leng', 'diam', 'ar'))
qda1 <- runqda(train = train, test = melsum, vars = vars)
summary.aov(manova(qda1$fit))
qda1$prob
table(qda1$predict)
# Save discriminant function analysis output as text file
out <- capture.output(qda1)
cat("My title", out, file="output/qdaout2.txt", sep="\n", append=FALSE)
```