--- title: "cassowary analysis" output: html_document editor_options: chunk_output_type: console --- ## Setup ```{r} # Load packages library(pavo) library(ggrepel) library(stringr) library(dplyr) library(ape) library(geiger) library(viridis) library(visreg) library(MCMCglmm) library(treeplyr) library(readxl) library(ellipse) library(ggplot2) library(tidyr) library(RColorBrewer) library(bayou) library(doParallel) library(treeplyr) library(phytools) library(phylolm) library(moments) # Source R code source("mycontmap.R") source("gmaxtest.R") source("shiftSums.R") source("utility_functions.R") .tipregime <- bayou:::.tipregime # Load tree and data tree <- read.tree("palaeognaths.tre") morphdat <- read_excel("Dryad Dataset S1.xlsx", sheet=1) # Rename some variables to work with R code below names(morphdat) <- plyr::revalue(names(morphdat), c('Aspect.ratio'='ratio', 'Vane.width.mm'='vane_width', 'Rachis.width.mm'='rachis_width', 'Contrast.gloss'='gloss', 'Species'='spp', 'Diffuse.R'='diffuse', 'Specular.R'='specular', 'Density.um2'='density', 'Barbules'='barbules', 'Rel.rachis.width'='propwidth', 'Mass.g'='mass_g')) ``` ## Bayou analysis (testing for exceptional glossiness) ### Rachis width ```{r} td <- morphdat %>% group_by(spp) %>% # filter(spp != "Taoniscus_nanus") %>% summarize_if(is.numeric, mean, na.rm=T) %>% treeplyr::make.treedata(tree=tree) btree <- td$phy bdat <- td[["rachis_width"]] # phenogram(btree, bdat) btree <- reorder(btree, "postorder") # Only allow changes on non-terminal edges bmax <- ifelse(btree$edge[,2] %in% 1:Ntip(btree), 0, 1) # bmax <- rep(1, Nedge(btree)) plot(btree, edge.col=bmax+1) # Make prior priorOU <- make.prior(btree, dists=list(dalpha="dhalfcauchy", dsig2="dhalfcauchy", dk="cdpois", dtheta="dnorm"), param=list(dk=list(lambda=3, kmax=sum(bmax)/2), dsb=list(bmax=bmax, prob=1), dtheta=list(mean=mean(bdat), sd=1.5*sd(bdat)))) startpars <- priorSim(priorOU, btree, plot=TRUE)$pars[[1]] # Make model set.seed(1) mcmcOU <- bayou.makeMCMC(btree, bdat, prior=priorOU, new.dir="output/runs", outname="modelOU_rachis", plot.freq=NULL) # Run MCMC analysis (uncomment to run) mcmcOU$run(1e6) # Load chainOU <- mcmcOU$load(saveRDS=TRUE) # Evaluation: plot(chainOU) summary(chainOU) # Analyze bayou output pp <- 0.3 # Set the burnin proportion for plotting with coda to check the MCMC chains chainOU <- set.burnin(chainOU, pp) # Regime plots plotSimmap.mcmc(chainOU, burnin=0.3, cex=0.5) #Phenogram with branches with posterior probabilities > 0.3 colored phenogram.density(btree, bdat, burnin=0.3, chainOU, pp.cutoff = pp) # Plot parameter distributions for regimes shiftsums <- shiftSummaries(chainOU, mcmcOU, pp.cutoff=pp) # shift summaries with more detailed output (custom R script) res <- shiftSums(chainOU, mcmcOU, pp.cutoff=pp) # Make simmap tree L <- Lposterior(chainOU, btree, burnin=attributes(chainOU)$burnin) sb <- which(L[,1] > pp) pars <- list(k=length(sb), ntheta=length(sb)+1, sb=unname(sb), t2=2:(length(sb)+1), loc=rep(0, length(sb))) sm <- pars2simmap(pars = pars, tree = btree) # New color palette pal <- sm$col pal <- setNames(c('gray', brewer.pal(length(pal), "Set1")[1:(length(pal)-1)]), names(pal)) # Get parameters df <- as.data.frame(t(sapply(res$cladesummaries, function(x) {hpd(x$values[[1]])}))) df$regime <- as.character(1:nrow(df)) # ADD all edge probs for reviewer 2: tmp <- shiftSummaries(chainOU, mcmcOU, pp.cutoff=0) # reorder edges id.postorder <- apply(sm$tree$edge, 1, paste0, collapse="-") id.cladewise <- apply(reorder(sm$tree, "cladewise")$edge, 1, paste0, collapse="-") el <- NA el[as.numeric(rownames(tmp$regressions)[-1])] <- tmp$PP el <- el[match(id.cladewise, id.postorder)] # Make figure pdf("bayou_rachis.pdf", width=5.5, height=4.5) layout(cbind(c(1,3),c(2,2)), widths=c(.3, 1)) par(mar=c(1,3,1,0), mgp=c(1.5,.5,0), ps=8, mex=.75) xx <- seq_along(df$regime) plot(df$y, col=pal, pch=16, ylim=range(df[,1:3]), axes=F, ylab="Mean rachis diameter (um)", xlim = c(xx[1]-0.5, xx[length(xx)]+0.5)) segments(xx, df$ymin, xx, df$ymax, col=pal) axis(side=2) plotSimmap(sm$tree, pal) edgelabels(round(el, 2), adj=c(0.3,-0.5), frame='none', cex=.6) dev.off() ``` ### Gloss: Are cassowaries exceptionally glossy? ```{r, eval=FALSE} btree <- td$phy bdat <- td[["gloss"]] phenogram(btree, bdat) btree <- reorder(btree, "postorder") # Only allow changes on non-terminal edges bmax <- ifelse(btree$edge[,2] %in% 1:Ntip(btree), 0, 1) # bmax <- rep(1, Nedge(btree)) plot(btree, edge.col=bmax+1) # Make prior priorOU <- make.prior(btree, dists=list(dalpha="dhalfcauchy", dsig2="dhalfcauchy", dk="cdpois", dtheta="dnorm"), param=list(dk=list(lambda=3, kmax=sum(bmax)/2), dsb=list(bmax=bmax, prob=1), dtheta=list(mean=mean(bdat), sd=1.5*sd(bdat)))) startpars <- priorSim(priorOU, btree, plot=TRUE)$pars[[1]] set.seed(1) mcmcOU <- bayou.makeMCMC(btree, bdat, prior=priorOU, new.dir="output/runs", outname="modelOU_gloss", plot.freq=NULL) # Run # mcmcOU$run(1e6) # save/Load chainOU <- mcmcOU$load(saveRDS=TRUE) plot(chainOU) summary(chainOU) # Set the burnin proportion for plotting with coda to check the MCMC chains chainOU <- set.burnin(chainOU, 0.3) # Regime plots plotSimmap.mcmc(chainOU, burnin=0.3, cex=0.5) # cutoff to use for shifts on branches pp <- 0.5 #Phenogram with branches with posterior probabilities > 0.3 colored phenogram.density(btree, bdat, burnin=0.3, chainOU, pp.cutoff=pp) # Plot parameter distributions for regimes shiftsums <- shiftSummaries(chainOU, mcmcOU, pp.cutoff=pp) # shift summaries with more detailed output (custom R script) res <- shiftSums(chainOU, mcmcOU, pp.cutoff=pp) # Make simmap tree L <- Lposterior(chainOU, btree, burnin = attributes(chainOU)$burnin) sb <- which(L[,1] > pp) pars <- list(k=length(sb), ntheta=length(sb)+1, sb=unname(sb), t2=2:(length(sb)+1), loc=rep(0, length(sb))) sm <- pars2simmap(pars = pars, tree = btree) # New color palette pal <- sm$col pal <- setNames(c('gray', brewer.pal(length(pal), "Set1")[1:(length(pal)-1)]), names(pal)) # Make data output df <- as.data.frame(t(sapply(res$cladesummaries, function(x) {hpd(x$values[[1]])}))) df$regime <- as.character(1:nrow(df)) # All branch probs tmp <- shiftSummaries(chainOU, mcmcOU, pp.cutoff=0) el <- NA el[as.numeric(rownames(tmp$regressions)[-1])] <- tmp$PP id.postorder <- apply(sm$tree$edge, 1, paste0, collapse="-") id.cladewise <- apply(reorder(sm$tree, "cladewise")$edge, 1, paste0, collapse="-") el <- el[match(id.cladewise, id.postorder)] # Generate figure for ms pdf("figs/bayou_gloss.pdf", width=5.5, height=4.5) layout(cbind(c(1,3),c(2,2)), widths=c(.3, 1)) par(mar=c(1,3,1,0), mgp=c(1.5,.5,0), ps=8, mex=.75) xx <- seq_along(df$regime) plot(df$y, col=pal, pch=16, ylim=range(df[,1:3]), axes=F, ylab="Mean contrast gloss", xlim = c(xx[1]-0.5, xx[length(xx)]+0.5)) segments(xx, df$ymin, xx, df$ymax, col=pal) axis(side=2) plotSimmap(sm$tree, pal) # reorder edges # add all edge probs edgelabels(round(el, 2), adj=c(0.3,-0.5), frame='none', cex=.75) dev.off() ``` ## Comparative analysis of traits ```{r} df2 <- morphdat %>% group_by(spp) %>% select(gloss, specular, diffuse, ratio, barbules, rachis_width, vane_width, density, propwidth) %>% summarize_all(funs(mean), na.rm=TRUE) %>% as.data.frame rownames(df2) <- df2$spp td <- make.treedata(tree, as.matrix(df2[,-1])) td2 <- filter(td, tip.label!="Eudromia_elegans") pdf("figs/barbules.pdf", width=6, height=6) par(ps=8, mar=c(0,0,0,0)) plot(td$phy) trait <- ifelse(td[['barbules']]=="1", 1, 0) anc <- ace(x=trait, phy=td$phy, type="discrete") trait <- trait[td$phy$tip] nodelabels(pie=anc$lik.anc, piecol = c('white','black'), cex=.5) dev.off() ``` ## Phylogenetic linear mixed models (PLMMs) ### Setup ```{r} # Setup dataset traits <- c('spp', 'specular', 'diffuse', 'gloss', 'ratio', 'density', 'barbules', 'propwidth', 'rachis_width', 'vane_width', 'mass_g', 'propwidth') df2 <- morphdat %>% select(traits) # rownames(df2) <- df2$spp df2$clade <- factor(ifelse(grepl("Casuarius|Dromaius", df2$spp), "ce", "other")) # Get Ainv like this: Ainv_BM <- inverseA(tree, nodes="TIPS")$Ainv # no nodes, only tips ``` ### Allometry data ```{r} ngen <- 1e6 keep <- complete.cases(df2[c('rachis_width', 'vane_width', 'mass_g')]) sum(keep) # N = 27 dat <- df2[keep, ] set.seed(1980) allo.bm <- MCMCglmm(rachis_width ~ vane_width + log(mass_g), data = dat, family = 'gaussian', random = ~spp, ginverse = list(spp = Ainv_BM), nitt=ngen, burnin=0.25*ngen) set.seed(1980) allo.wn <- MCMCglmm(rachis_width ~ vane_width + log(mass_g), data = dat, family = 'gaussian', nitt=ngen, burnin=0.25*ngen) aicw(c(BM=allo.bm$DIC, WN=allo.wn$DIC)) summary(allo.bm) summary(allo.wn) allo2.bm <- MCMCglmm(vane_width ~ rachis_width + log(mass_g), data = dat, family = 'gaussian', random = ~spp, ginverse = list(spp = Ainv_BM), nitt=ngen, burnin=0.25*ngen) allo2.wn <- MCMCglmm(vane_width ~ rachis_width + log(mass_g), data = dat, family = 'gaussian', nitt=ngen, burnin=0.25*ngen) aicw(c(BM=allo2.bm$DIC, WN=allo2.wn$DIC)) summary(allo2.bm) summary(allo2.wn) p1 <- ggplot(df2, aes(x=mass_g, y=rachis_width/1e3)) + geom_point() + scale_x_log10() + annotation_logticks(side='b') + stat_smooth(method="lm") + theme_minimal() + labs(x= "Body mass (g)", y = "Rachis diameter (mm)") + coord_cartesian(y=c(0, .3)) p2 <- ggplot(df2, aes(x=mass_g, y=vane_width/1e3)) + geom_point() + scale_x_log10() + annotation_logticks(side='b') + stat_smooth(method="lm") + theme_minimal() + labs(x= "Body mass (g)", y = "Vane width (mm)") cowplot::plot_grid(p1, p2, labels=c('a', 'b')) ggsave('figs/morphotraits.pdf', width=6.5, height=3.25) ``` ### Gloss data ```{r} # Specular - species with melanosome data keep <- complete.cases(df2[c('gloss', 'barbules', 'rachis_width', 'ratio', 'density')]) sum(keep) # N = 30 ngen <- 1e6 set.seed(1980) gloss.bm <- MCMCglmm(gloss ~ barbules + rachis_width + density + ratio, data = df2[keep, ], family = 'gaussian', random = ~spp, ginverse = list(spp = Ainv_BM), nitt=ngen, burnin=0.25*ngen) set.seed(1980) gloss.wn <- MCMCglmm(gloss ~ barbules + rachis_width + density + ratio, data = df2[keep, ], family = 'gaussian', random = ~spp, nitt=ngen, burnin=0.25*ngen) # Model weights aicw(c(BM=gloss.bm$DIC, WN=gloss.wn$DIC)) summary(gloss.bm) # only rachis width is significant summary(gloss.wn) ``` ### Specular and diffuse reflectance data ```{r} keep <- complete.cases(df2[c('specular', 'diffuse', 'barbules', 'ratio', 'density', 'propwidth')]) ngen <- 1e6 prior = list(R = list(V = diag(2), nu = 1.002), G = list(G1 = list(V = diag(2),nu = 1.002))) sum(keep) # N = 31 dat <- df2[keep, ] set.seet(1980) spec.bm <- MCMCglmm(cbind(specular, diffuse) ~ trait - 1 + trait : (barbules + ratio + density + propwidth), data = dat, family = c('gaussian', 'gaussian'), random = ~us(trait):spp, ginverse = list(spp = Ainv_BM), prior = prior, rcov = ~us(trait):units, nitt=ngen, burnin=0.25*ngen) set.seed(1980) spec.wn <- MCMCglmm(cbind(specular, diffuse) ~ trait - 1 + trait : (barbules + ratio + density + propwidth), data = dat, family = c('gaussian', 'gaussian'), rcov = ~us(trait):units, nitt = ngen, burnin = 0.25*ngen) # Save save(allo.bm, allo.wn, allo2.bm, allo2.wn, gloss.bm, gloss.wn, spec.bm, spec.wn, file="output/MCMC_fits.rda") # Load load(file="output/MCMC_fits.rda") aicw(c(BM=spec.bm$DIC, WN=spec.wn$DIC)) aicw(c(BM=gloss.bm$DIC, WN=gloss.wn$DIC)) summary(spec.bm) summary(gloss.bm) slope <- mean(spec.bm$Sol[,'traitdiffuse:ratio']) int <- mean(spec.bm$Sol[,'traitdiffuse']) p1 <- ggplot(morphdat, aes(x=ratio, y=diffuse, fill=tolower(color))) + geom_point(cex=2, pch=21) + scale_fill_manual(values = colorpal) + geom_abline(slope=slope, intercept=int, lty=2) + theme_minimal() + theme(legend.position="none") + labs(x="Melanosome aspect ratio", y="Diffuse reflectance (%)") morphdat$barbules <- factor(morphdat$barbules) levels(morphdat$barbules) <- c('Absent', 'Present') p2 <- ggplot(morphdat, aes(x=factor(barbules), y=specular)) + geom_boxplot() + theme_minimal() +labs(x="Distal barbules", y="Specular reflectance (%)") cowplot::plot_grid(p1, p2, labels=c('a', 'b')) ggsave("figs/pgls.pdf", width=6.5, height=3) # Plumage is glossy because shape is so different compared to other palaeognaths (high AR, etc.), i.e. more rachis showing per unit area of plumage (I think so) ``` ## Test if covariance structure differs between groups ```{r} prior <- list(G = list(G1 = list(V = diag(2), nu = 0.002), G2 = list(V = diag(2), nu = 0.002), G3 = list(V = diag(2), nu = 0.002)), R = list(V = diag(2), n = 0.002)) # relatively uninformative # Uncomment to run- set.seed(1980) covar.bm <- MCMCglmm(log(cbind(diffuse, specular)) ~ trait - 1, random = ~us(trait:at.level(clade,"ce")):units + us(trait:at.level(clade,"other")):units + us(trait):spp, # G in prior rcov = ~us(trait):units, # R in prior data = na.omit(df2), nitt = 1e6, burnin = 0.25 * 1e6, thin = 1000, prior = prior, family = rep('gaussian', 2), ginverse = list(spp=Ainv_BM)) plot(covar.bm) summary(covar.bm) # Make covariance matrices X <- covar.bm$VCV[, grep("ce", colnames(covar.bm$VCV))] mat1 <- lapply(1:nrow(X), function(x) { matrix(X[x, 1:4], ncol=2, nrow=2) }) Y <- covar.bm$VCV[, grep("other", colnames(covar.bm$VCV))] mat2 <- lapply(1:nrow(Y), function(x) { matrix(Y[x, 1:4], ncol=2, nrow=2) }) # Subsample VCVs set.seed(1980) samp <- sample(1:length(mat1), 100, replace=FALSE) # centers scal <- c(attr(scale(df2$specular), "scaled:scale"), attr(scale(df2$diffuse), "scaled:scale")) cent <- c(attr(scale(df2$specular), "scaled:center"), attr(scale(df2$diffuse), "scaled:center")) # Plot pdf("figs/specdiff_byclade.pdf", width=5, height=4) par(mar=c(3,3,1,1), mgp=c(1.5,.5,0), ps=8, mex=.75) plot(log(specular) ~ log(diffuse), df2, col=clade, xaxt='n', yaxt='n', ylab="Specular reflectance (%)", xlab="Diffuse reflectance (%)", type='n') # axis(side=2, at=seq_range(scale(df2$diffuse), 5), labels = round(seq_range(scale(df2$diffuse), 5) * scal[2] + cent[2])) axis(side=1, at=seq_range(log(df2$diffuse), 5), labels=round(seq_range(df2$diffuse, 5))) axis(side=2, at=seq_range(log(df2$specular), 5), labels=round(seq_range(df2$specular, 5))) # lines(ellipse(x=mat1[[1]]), type='l', col=alpha('red', .05), xlim=c(-10, 10), ylim=c(-10, 10), xlab="Specular", ylab="Diffuse") cent <- c(mean(log(df2$diffuse[df2$clade=="ce"])), mean(log(df2$specular[df2$clade=="ce"]))) for (i in samp) { lines(ellipse(x=mat1[[i]], centre=cent), col=alpha('red', .1)) } cent <- c(mean(log(df2$diffuse[df2$clade=="other"])), mean(log(df2$specular[df2$clade=="other"]))) for (i in samp) { lines(ellipse(x=mat2[[i]], centre=cent), col=alpha('black', .1)) } points(log(specular) ~ log(diffuse), data =df2, col=c('red','black')[clade], pch=16) legend("topright", legend=c('Cassowary + Emu', 'Background'), bty='n', col=c('red', 'black'), lty=1) dev.off() df2$iscasso <- ifelse(grepl("Cas", df2$spp), "casso", "not") ggplot(df2, aes(x=diffuse, y=specular, color=clade)) + geom_point(cex=2, aes(shape=iscasso)) + scale_x_log10() + scale_y_log10() + stat_ellipse() + theme_minimal() + scale_color_manual(values=c('ce'='red', 'other'='black')) + scale_shape_manual(values=c('casso'=16, 'not'=1)) + theme(legend.position="none") + labs(x="Diffuse reflectance (%)", y="Specular reflectance (%)") ggsave("figs/specdiff_byclade.pdf", width=6.5, height=5) # Test if significant divergence in major axes of traits: gaxtest(x=mat1, y=mat2) ``` ## Predict color in Lithornithidae ```{r} source("runqda.R") # mel <- read.csv("data/lithornithid melanosomes 12-8-15.csv") mel <- read_excel("Dryad Dataset S1.xlsx", sheet=3) # train1 = dataset from Hu et al. (2018) Nature Communications # train2 = dataset from Norden et al. (2019) Evolution # train1 <- read.csv("data/melanodata_hu.csv", row=1) # train1$Length_CV <- as.numeric(as.character(train1$Length_CV)) # train1$Diameter_CV <- as.numeric(as.character(train1$Diameter_CV)) # train1$Color <- gsub("\\s+$", "", as.character(train1$Color)) # train2 <- read.csv("data/melanodata_norden.csv") # train2 <- subset(train2, Data.source=="This study") # names(train2) <- plyr::revalue(names(train2), c("Aspect.ratio"="Aspect", "Length.CV"="Length_CV", "Colour"="Color")) # levels(train2$Color) <- c(levels(train2$Color), "platelet") # train2$Color[train2$Flat=="flat"] <- "platelet" keep <- c('Species','Color','Length','Length_CV','Diameter','Aspect') train <- rbind(train1[keep], train2[keep]) train <- mutate(train, class = plyr::revalue(Color, c('peng' = 'black'))) # train <- mutate(train, class = plyr::revalue(Color, c('peng' = 'black', 'platelet'='irid'))) train <- subset(train, class %in% c('black', 'brown', 'grey', 'irid', 'platelet')) table(train$class) dim(train) melsum <- mel %>% group_by(Specimen, Sample) %>% dplyr::select(leng=Length, diam=Diameter) %>% summarise(Length_CV = 100 * sd(leng)/mean(leng)/sqrt(length(leng)), Diameter_CV = 100 * sd(diam)/mean(diam)/sqrt(length(diam)), Aspect = mean(leng/diam), Aspect_skew = skewness(leng/diam), Length = mean(leng), Diameter = mean(diam)) # vars <- c('Length', 'Diameter', 'Aspect', 'Length_CV', 'Diameter_CV', 'Aspect_skew') vars <- c('Length', 'Diameter', 'Aspect', 'Length_CV') # qda1 <- runqda(train = train, test = test0, vars = c('leng', 'diam', 'ar')) qda1 <- runqda(train = train, test = melsum, vars = vars) summary.aov(manova(qda1$fit)) qda1$prob table(qda1$predict) # Save discriminant function analysis output as text file out <- capture.output(qda1) cat("My title", out, file="output/qdaout2.txt", sep="\n", append=FALSE) ```