##########################################PRIMATE DIETS#################################################### # Authors: Jun Ying Lim # Reference: Lim, J.Y., Wasserman, M.D., Veen, J., Despres-Einspenner, M.-L. & Kissling, W.D. (in review) Ecological and evolutionary significance of primates' most consumed plant families. rm(list=ls()) ## PACKAGES ===================== library(nlme) #to perform gls library(plyr); library(reshape2) # general data processing tools library(ape) #for reading in primate phylogeny library(phytools) # phylogenetic signals library(caper) # phylogenetic signal and pgls functions library(ggtree) # phylogeny plotting library(ggplot2); library(cowplot); library(wesanderson); library(ggrepel); library(RColorBrewer) # general plotting library(lavaan); library(semTools); library(semPlot); library(piecewiseSEM) # structural equation library(readxl) library(stringr) #BiocManager::install("ggtree") ## DIRECTORIES ===================== main.dir <- "~/Dropbox/projects/PrimateDiet/manuscript/r2/primate_diet_code_R2/" ## Change to your local directory here ## Change to your local directory here data.dir <- main.dir res.dir <- file.path(main.dir, "results") fig.dir <- file.path(main.dir, "figures") if(dir.exists(res.dir) == FALSE){ dir.create(res.dir) } if(dir.exists(fig.dir) == FALSE){ dir.create(fig.dir) } ## DATA IMPORT + PROCESSING ===================== # Import raw data =============== diet<- read_excel(file.path(data.dir, "Primate_Diet_v1-2_Data.xlsx")) #dietary data primates (N=120) study <- read_excel(file.path(data.dir,"Primate_Diet_v1-2_References.xlsx")) #reference data primates diet <- as.data.frame(diet) study <- as.data.frame(study) # Some basic error checks =============== test <- ddply(.variables = .(PlantGenusAcc), .data = subset(diet, PlantGenusAcc != "NA"), .fun = function(x){ fam = unique(x$PlantFamily) nfam = length(unique(x$PlantFamily)) data.frame(fam, nfam) }) subset(test, nfam > 1) # no genera with more than one family # Check that all percentages add up to 100% after rounding (to avoid floating point imprecision) sum(tapply(X = diet$Percentages_Recalculated, INDEX = diet$Study_ID, FUN = function(x){ round(sum(x), 6)}) == 100) # Clean up dataset =============== diet$PartPlant[diet$PartPlant == "NA"] <- NA diet$PartPlant2[diet$PartPlant2 == "NA"] <- NA diet$PartPlant3[diet$PartPlant3 == "NA"] <- NA diet$PartPlant4[diet$PartPlant4 == "NA"] <- NA diet$PartPlant5[diet$PartPlant5 == "NA"] <- NA diet$PartPlant6[diet$PartPlant6 == "NA"] <- NA diet$PartPlant7[diet$PartPlant7 == "NA"] <- NA # Assign different food types to part plant column for ease of processing diet$PartPlant[diet$TypeFood == "Unknown"] <- NA diet$PartPlant[diet$TypeFood == "Dung"] <- "dung" diet$PartPlant[diet$TypeFood == "Honey"] <- "honey" diet$PartPlant[diet$TypeFood == "Animal"] <- "animal" diet$PartPlant[diet$TypeFood == "Soil"] <- "soil" diet$PartPlant[diet$TypeFood == "Fungus"] <- "fungus" # If no identified plant parts are recorded , then all plant parts should be left as unknowns # If only SOME of the plant parts are unknown, then those should be listed as unknown (because our main aim is to assign them to dietary categories) cleanPlantPart <- function(x){ # If all columns are NAs, then first column should be "unknown" partplantlist <- c(x$PartPlant, x$PartPlant2, x$PartPlant3, x$PartPlant4, x$PartPlant5, x$PartPlant6, x$PartPlant7) if(sum(is.na(partplantlist)) == length(partplantlist)){ x$PartPlant <- "unknown" } return(x) } diet_clean <- ddply(.data = diet, .variable = .(RecordID), .fun = cleanPlantPart, .progress = "text") # Only include studies over 6 months in duration, and for which more than half of diet is identified study_id <- subset(study, Duration_months >= 6)$Study_ID diet_subset <- subset(diet_clean, Study_ID %in% study_id) diet_subset_final <- ddply(.data = diet_subset, .variables = .(Study_ID), .fun = function(x){ unknown <- sum(subset(x, TypeFood == "Unknown")$Percentages_Recalculated) plant <- sum(subset(x, TypeFood == "Plant")$Percentages_Recalculated) unknown_plant <- sum(subset(x, PlantFamily == "Unknown_plant")$Percentages_Recalculated) if( unknown <= 50 & (unknown_plant / plant) <= 50 ) { return(x) }}, .progress = "text") write.csv(diet_subset_final, file.path(res.dir, "diet_subset.csv"), row.names = F) # Basic summary statistics =============== nrow(diet) length(unique(diet$Ref_ID)) # references 153 -> 149 (some errors in names) length(unique(diet$Study_ID)) # studies 229 -> 232 length(unique(diet$PrimateSpecies)) # species 120 -> 119 length(unique(diet_subset_final$Ref_ID)) # references 140 -> 141 (added Waser 1975) length(unique(diet_subset_final$Study_ID)) # studies 210 -> 220 (many more meet criteria) length(unique(diet_subset_final$PrimateSpecies)) # species 109 -> 112 length(unique(as.vector(diet_subset_final$PlantFamily))) # 205 = 208 - 3 (Non-plant, Unknown, Unknown_plant) unique(diet_subset_final$PlantFamily)[!grepl(unique(diet_subset_final$PlantFamily), pattern = "ceae")] study_subset <- subset(study, Study_ID %in% unique(diet_subset_final$Study_ID)) mean(study_subset$Duration_months) # 14.2 months range(study_subset$Duration_months) # 6 - 66 months median(study_subset$Duration_months) # 12 months sd(study_subset$Duration_months) # 8.2 mean(table(as.vector(study_subset$Species))) # 1.9 median(table(as.vector(study_subset$Species))) # 1 range(table(as.vector(study_subset$Species))) # 1 - 8 sd(table(as.vector(study_subset$Species))) # 1.3 sum(table(as.vector(study_subset$Species)) == 1) # 60 species with 1 study # Generate study metadata table (Table S1) =============== study_subset$Authors <- gsub("[0-9].*","", x = study_subset$Citation) study_subset$Year <-gsub("\\D*(\\d+).*", "\\1", study_subset$Citation) authors <- gsub(study_subset$Authors, pattern= "[A-ZäëïöÄËÜÏÖ]\\.|\\,|\\&", replacement = "") authors2 <- str_trim(gsub(authors, pattern = "\\s+", replacement = " "), side = "both") authors_split <- str_split_fixed(authors2, pattern = " ", n = 3) nauthors <- apply(authors_split, MARGIN = 1, FUN = function(x){ sum(!x == "") } ) citation <- vector() for(i in 1:nrow(study_subset)){ if(nauthors[i] == 1) { citation[i] <- paste0(authors_split[i,1], " (", study_subset$Year[i], ")") } if(nauthors[i] == 2) { citation[i] <- paste0(authors_split[i,1], " & ", authors_split[i,2], " (", study_subset$Year[i], ")") } if(nauthors[i] == 3){ citation[i] <- paste0(authors_split[i,1], " et al.", " (", study_subset$Year[i], ")") } } study_subset$CitationClean <- citation study_subset_supptable <- study_subset[,c("Species","Study_ID", "Duration_months","SampleMethod","Group","Country","Latitude", "Longitude","CitationClean")] study_subset_supptable$Species <- gsub(study_subset_supptable$Species, pattern = "_", replacement = " ") study_subset_supptable$Latitude <- round(study_subset_supptable$Latitude, 3) study_subset_supptable$Longitude <- round(study_subset_supptable$Longitude, 3) study_subset_supptable <- study_subset_supptable[order(study_subset$Species),] write.csv(study_subset_supptable, file = file.path(res.dir, "study_subset_supptable.csv"), row.names= FALSE, na = "") ## PLANT PART OF ALL PLANTS ===================== nrow(diet_subset_final) # total no. of records in filtered dataset (8044 -> 8981) nrow(subset(diet_subset_final, !PlantFamily %in% c("Unknown", "Non-plant"))) # no of plant records (7797-> 8744) test <- subset(diet_subset_final, !PlantFamily %in% c("Unknown", "Non-plant")) # no. of plant records (identified or unidentified) with no plant part information sum(is.na(test$PartPlant) | test$PartPlant == "NA" | test$PartPlant == "unknown") # 1407 --> 1417 table(test$PartPlant) # proportion of plant records (identified or unidentified) with plant parts 1 - (1417 / 8744) # 84% with plant part information 8744 - 1417 # 7327 with plant part information # different unique plant part records (- 1 for unknown) sort(unique(unlist(test[c("PartPlant", "PartPlant2", "PartPlant3", "PartPlant4", "PartPlant5", "PartPlant6", "PartPlant7")]))) # Plot geographic distribution of studies (Fig S1) ===================== # Download map from Natural Earth country_poly <- rnaturalearth::ne_countries(scale = 110, type = "countries") country_poly_fort <- fortify(country_poly) study_plots <- ggplot() + geom_polygon(aes(y = lat, x= long, group = group),data= country_poly_fort, colour = "grey50", size = 0.1) + geom_point(aes(y = Latitude, x = Longitude), fill= "coral", data = study_subset, pch = 21, colour = "white") + coord_cartesian(ylim = c(-60,90), xlim = c(-180, 180), expand = 0) + #coord_fixed() + theme(panel.background= element_blank(), axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) ggsave(study_plots, filename = file.path(fig.dir, "study_plots.pdf"), width = 6, height = 3) ggsave(study_plots, filename = file.path(fig.dir, "study_plots.png"), width = 6, height = 3) ## RELATIVE IMPORTANCE OF PLANT FAMILIES ===================== # Sum total percentages for each plant family for each study study_family_prop <- ddply(.data = diet_subset_final, .variable = .(Study_ID, PlantFamily), summarize, FamilyPercent = sum(Percentages_Recalculated)) sum(round(with(data = study_family_prop, tapply(FamilyPercent, INDEX = Study_ID, FUN = sum)), 6) == 100) # Checking to see if they are all close to 100 (n = 220) # For each plant family, calculate the number of primates species that use it family_speciesprop <- ddply(.data = diet_subset_final, .variables = .(PlantFamily), summarize, nspecies = length(as.vector(unique(PrimateSpecies)))) family_speciesprop$propspecies <- family_speciesprop$nspecies / length(as.vector(unique(diet_subset_final$PrimateSpecies))) family_speciesprop$PlantFamily <- factor(family_speciesprop$PlantFamily, levels = as.vector(family_speciesprop$PlantFamily)[order(family_speciesprop$propspecies, decreasing = T)]) # Identify the top ten families that are used by the most species family_speciesprop_subset <- subset(family_speciesprop, !PlantFamily %in% c("Non-plant", "Unknown", "Unknown_plant")) toptenfam <- as.vector(family_speciesprop_subset$PlantFamily)[order(family_speciesprop_subset$propspecies, decreasing = T)][1:10] head(family_speciesprop_subset[order(family_speciesprop_subset$propspecies, decreasing = T),], n = 10) # Calculate the mean percentage in the diet for each family across species study_family_prop_all <- merge(study_family_prop, study) study_family_prop_mat <- acast(study_family_prop_all, formula = PlantFamily~Study_ID, value.var = "FamilyPercent", fill= 0) study_family_prop_all_withzeros <- melt(study_family_prop_mat, varnames = c("PlantFamily", "Study_ID"), value.name = "FamilyPercent") species_familyprop <- ddply(.data = merge(study_family_prop_all_withzeros, study[c("Study_ID", "Species")], all.x = TRUE, by = "Study_ID"), .variables = .(Species, PlantFamily), summarize, MeanPercentage = mean(FamilyPercent), #MedianPercentage = median(FamilyPercent), .progress= "text") # Check that the sums still sum to 100 length(unique(species_familyprop$Species)) # 112 species sum(round(with(data = species_familyprop, tapply(MeanPercentage, INDEX = as.vector(Species), FUN = sum)), 6) == 100) # 112 mean(species_familyprop$MeanPercentage[species_familyprop$PlantFamily == "Fabaceae"]) + mean(species_familyprop$MeanPercentage[species_familyprop$PlantFamily == "Moraceae"]) # Summary statistics of plant family consumption all_familyprop_matrix <- acast(formula = Species~PlantFamily, data = species_familyprop, value.var = "MeanPercentage") all_familyprop_matrix_clean <- round(all_familyprop_matrix, 2) write.csv(all_familyprop_matrix_clean, file = file.path(res.dir, "all_familyprop_matrix_clean.csv"), row.names = T) primate_familyprop <- ddply(.data = subset(species_familyprop, !PlantFamily %in% c("Unknown", "Unknown_plant", "Non-plant")), .variables = .(PlantFamily), summarize, nprimatespecies = sum(MeanPercentage > 0), diet_median = median(MeanPercentage), diet_mean = mean(MeanPercentage), diet_max = max(MeanPercentage), diet_min = min(MeanPercentage), diet_sd = sd(MeanPercentage)) write.csv(primate_familyprop, file = file.path(res.dir, "primate_familyprop.csv"), row.names = F) # Plot barplot of prevalence of plant families (Fig 1a ) =============== family_speciesprop_plot <- ggplot() + geom_bar(aes(y = propspecies, x = PlantFamily, fill = PlantFamily), stat = "identity", data = subset(family_speciesprop, !PlantFamily %in% c("Non-plant", "Unknown", "Unknown_plant") )) + labs(y = "Proportion of species", x = "Plant families\n(n = 205)") + scale_fill_manual(values = c(rep(wes_palette("Royal1")[2],10), rep("grey50", 196))) + scale_y_continuous(expand = c(0,0)) + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(), panel.background = element_blank(), legend.position = "none") ggsave(file.path(fig.dir, "family_speciespropr.pdf"), family_speciesprop_plot) topten_familyprop <- subset(species_familyprop, PlantFamily %in% toptenfam) write.csv(topten_familyprop, file = file.path(res.dir, "topten_familyprop.csv"), row.names = F) topten_familyprop_matrix <- acast(formula=Species~PlantFamily, data = topten_familyprop, value.var = "MeanPercentage") write.csv(topten_familyprop_matrix, file = file.path(res.dir, "topten_familyprop_matrix.csv"), row.names = T) topten_familyprop_matrix_clean <- ifelse(topten_familyprop_matrix > 0.01, round(topten_familyprop_matrix, digits = 1), "< 0.1") write.csv(topten_familyprop_matrix_clean, file = file.path(res.dir, "topten_familyprop_matrix_clean.csv"), row.names = T) # Boxplot of consumption for most widely consumed families (Fig 1b) =============== family_avg <- tapply(topten_familyprop$MeanPercentage, INDEX = as.vector(topten_familyprop$PlantFamily), FUN = median, na.rm = T) family_avg_decr_order <- names(family_avg[order(family_avg, decreasing = T)]) topten_familyprop$PlantFamily <- factor(topten_familyprop$PlantFamily, levels = toptenfam) #family_avg_decr_order) topten_speciesprop_plot <- ggplot() + geom_boxplot(aes(y = MeanPercentage, PlantFamily), data = subset(topten_familyprop, !PlantFamily %in% c("Non-plant", "Unknown", "Unknown_plant")), fill = "grey70") + labs(x = "Plant family", y = "Percentage") + scale_x_discrete(expand = c(0.1,0)) + scale_y_continuous(expand = c(0,0), limits = c(0, 90))+ theme(axis.text.x = element_text(angle = 45, hjust = 1), axis.title.x = element_blank(), panel.background = element_blank()) ggsave(file.path(fig.dir, "topten_speciesprop.pdf"), topten_speciesprop_plot) # Median average consumption across species for most impt families =============== topten_familyperc <- subset(topten_familyprop, !PlantFamily %in% c("Non-plant", "Unknown", "Unknown_plant")) topten_primatemedian <- tapply(X = topten_familyperc$MeanPercentage, INDEX = topten_familyperc$PlantFamily, FUN = median) topten_primatemedian_df <- data.frame(medianPerc = topten_primatemedian, PlantFamily = names(topten_primatemedian)) family_summary <- merge(x = topten_primatemedian_df, y = family_speciesprop, all.x = TRUE) family_summary[order(family_summary$propspecies, decreasing = T),] ## GENERA BREAKDOWN =============== species_list <- subset(diet_subset_final, PlantFamily %in% toptenfam)[c("PlantFamily", "PlantGenusAcc", "PlantSpeciesAcc")] species_list2 <- subset(species_list, !(PlantGenusAcc == "NA" | PlantSpeciesAcc == "NA")) species_list3 <- species_list2[!duplicated(species_list2),] genera_species_count <- ddply(species_list3, .variables = .(PlantFamily, PlantGenusAcc), summarize, Nspecies = length(PlantSpeciesAcc) ) genera_species_count$PlantGenusAcc <- factor(genera_species_count$PlantGenusAcc, levels = genera_species_count$PlantGenusAcc[order(genera_species_count$Nspecies,decreasing = T)]) head(genera_species_count[order(genera_species_count$Nspecies, decreasing = TRUE),]) # Plot species counts for each genus of most impt families (Fig S8) =============== genera_counts <- ggplot(genera_species_count) + geom_bar(aes(x = PlantGenusAcc, y= Nspecies), stat = "identity") + facet_wrap(PlantFamily~., scales = "free", nrow = 10, strip.position = "right") + scale_y_continuous(expand = c(0,0.1)) + scale_x_discrete(expand = c(0,0)) + labs(y= "Number of species recorded", x = "Genera") + theme(axis.text.y = element_text(size = 14), axis.text.x = element_text(angle = 45, hjust = 1, size = 14), axis.title = element_text(size = 25), panel.grid = element_blank(), panel.background = element_blank(), #strip.background = element_blank(), strip.text = element_text(size = 18)) ggsave(genera_counts,filename = file.path(fig.dir, "genera_counts.pdf"), width = 24, height = 30) ggsave(genera_counts,filename = file.path(fig.dir, "genera_counts.png"), width = 24, height = 30) # Calculate the mean number of genera per family species_list_all <- subset(diet_subset_final, !(PlantGenusAcc == "NA" | PlantSpeciesAcc == "NA"))[c("PlantFamily", "PlantGenusAcc", "PlantSpeciesAcc")] species_list_all <- species_list_all[!duplicated(species_list_all),] genera_species_count_all <- ddply(species_list_all, .variables = .(PlantFamily), summarize, Nspecies = length(unique(PlantSpeciesAcc)), Ngenera = length(unique(PlantGenusAcc))) head(genera_species_count_all[order(genera_species_count_all$Nspecies, decreasing = TRUE),], 20) median(genera_species_count_all$Ngenera) median(genera_species_count_all$Nspecies) ## PHYLOGENETIC SIGNAL ==================== # Import phylogeny ==== faurby_mcc <- read.nexus(file.path(data.dir, "faurby_mcc_meanheights.nex")) # Reconcile tip labels faurby_mcc$tip.label <- gsub("Callicebus_lugens", "Cheracebus_lugens", faurby_mcc$tip.label) faurby_mcc$tip.label <- gsub("Oreonax_flavicauda","Lagothrix_flavicauda", faurby_mcc$tip.label) faurby_mcc$tip.label <- gsub("Lagothrix_lagotricha", "Lagothrix_lagothricha", faurby_mcc$tip.label) faurby_mcc$tip.label <- gsub("Saguinus_fuscicollis", "Leontocebus_fuscicollis", faurby_mcc$tip.label) faurby_mcc$tip.label <- gsub("Callicebus_caquetensis", "Plecturocebus_caquetensis", faurby_mcc$tip.label) faurby_mcc$tip.label <- gsub("Callicebus_cupreus", "Plecturocebus_cupreus", faurby_mcc$tip.label) faurby_mcc$tip.label <- gsub("Trachypithecus_johnii", "Semnopithecus_johnii", faurby_mcc$tip.label) # Faurby consistent with IUCN, but name changed to reconcil with dataset faurby_mcc$tip.label <- gsub("Trachypithecus_vetulus", "Semnopithecus_vetulus", faurby_mcc$tip.label) # Faurby consistent with IUCN, but name changed to reconcil with dataset faurby_mcc$tip.label <- gsub("Trachypithecus_poliocephalus", "Trachypithecus_leucocephalus", faurby_mcc$tip.label) # Could not be matched with IUCN sum(duplicated(faurby_mcc$tip.label)) # no duplicate tips introduced # synonym_table$scientificName[match("Callicebus_lugens", synonym_table$synonym)] # Cheracebus_lugens (same as our dataset) # synonym_table$scientificName[match("Oreonax_flavicauda", synonym_table$synonym)] # Lagothrix_flavicauda (same as our dataset) # synonym_table$scientificName[match("Lagothrix_lagotricha", synonym_table$synonym)] # Lagothrix_lagothricha (same as our dataset) # synonym_table$scientificName[match("Saguinus_fuscicollis", synonym_table$synonym)] # Leontocebus_leucogenys (in our dataset as Leontocebus fuscicollis) # synonym_table$scientificName[match("Callicebus_caquetensis", synonym_table$synonym)] # Plecturocebus caquetensis (same as our dataset) # synonym_table$scientificName[match("Callicebus_cupreus", synonym_table$synonym)] # Plecturocebus ornatus (in our dataset as Plecturocebus cupresus) # synonym_table$scientificName[match("Trachypithecus_johnii", synonym_table$synonym)] # synonym_table$scientificName[match("Trachypithecus_vetulus", synonym_table$synonym)] # synonym_table$scientificName[match("Trachypithecus_poliocephalus", synonym_table$synonym)] # Check tip reconciliation # spList <- as.vector(unique(diet_subset_final$PrimateSpecies)) # length(spList) # notreelist <- spList[!spList %in% faurby_mcc$tip.label] # species that are not in the tree # nomatchlist %in% notreelist # species that are still not in the tree (and cannot be reconciled) # Cacajao melanocephalus (considered a synonym in Faurby & Svenning, 2015; but seperate species according to itis.gov) # Cercopithecus_doggetti # synonym C. mitis (Faurby & Svenning, 2015), seperate species (Galan-Acedo, 2019), subspecies C. mitis (itis.gov) # Chiropotes_sagulatus #new species (itis.gov), in 2008 considered to be C. chiropotes # Lophocebus_johnstoni #Not in tree, now subspecies of L. albigena (itis.gov) # Nomascus_annamensis #Not in tree, new species (itis.gov) # Trachypithecus_crepusculus #Not in tree, new species, previous subspecies of T. phayrei # Calculate phylogenetic signal ==== phyloSpList <- intersect(as.vector(unique(species_familyprop$Species)), faurby_mcc$tip.label) faurby_mcc_subset <- drop.tip(faurby_mcc, tip = faurby_mcc$tip.label[! faurby_mcc$tip.label %in% phyloSpList]) species_familyprop_subset<- subset(species_familyprop, Species %in% phyloSpList) length(unique(species_familyprop_subset$Species)) # 108 species length(phyloSpList) # only 104 -> 108 species fam_phylosig <- list() for(i in 1:length(toptenfam)){ temp <- subset(species_familyprop_subset, PlantFamily == toptenfam[i]) temp$DietBinary <- ifelse(temp$MeanPercentage > 0, 1, 0) rownames(temp) <- temp$Species family_percent <- temp$MeanPercentage names(family_percent) <- temp$Species family_percent <- family_percent[match(faurby_mcc_subset$tip.label, names(family_percent))] # Calculate Blomberg's K family_K <- phylosig(tree = faurby_mcc_subset, x = family_percent, method = "K", nsim = 1000, test = T) # Calculate Pagel's lambda family_lambda <- phylosig(tree = faurby_mcc_subset, x = family_percent, method = "lambda", nsim = 1000, test = T) # Calculate Fritz and Purvis's D family_D <- phylo.d(data = temp, names.col = Species, binvar = DietBinary, phy = faurby_mcc_subset, permut = 1000) fam_phylosig[[i]] <- data.frame(PlantFamily = toptenfam[i], K = family_K$K, K_p = family_K$P, lambda = family_lambda$lambda, lambda_p = family_lambda$P, D = family_D$DEstimate, D_p0 = family_D$Pval0, D_p1 = family_D$Pval1) } fam_phylosig_df <- do.call("rbind",fam_phylosig) fam_phylosig_df2 <- merge(x = fam_phylosig_df, y= family_summary, all.x = T, sort = F) # Multiple testing correction ==== fam_phylosig_df2$K_p_adj <-p.adjust(fam_phylosig_df2$K_p, method = "fdr") fam_phylosig_df2$lambda_p_adj <-p.adjust(fam_phylosig_df2$lambda_p, method = "fdr") fam_phylosig_df2$D_p0_adj <- p.adjust(fam_phylosig_df2$D_p0, method = "fdr") fam_phylosig_df2$D_p1_adj <- p.adjust(fam_phylosig_df2$D_p1, method = "fdr") # Cleaning up results ==== fam_phylosig_df2[,c("K", "K_p","K_p_adj", "lambda", "lambda_p","lambda_p_adj", "D", "D_p0","D_p0_adj", "D_p1","D_p1_adj")] <- round(fam_phylosig_df2[,c("K", "K_p","K_p_adj", "lambda", "lambda_p","lambda_p_adj", "D", "D_p0","D_p0_adj", "D_p1","D_p1_adj")], 2) fam_phylosig_df2[,"medianPerc"] <- round(fam_phylosig_df2[,"medianPerc"], 2) write.csv(fam_phylosig_df2[c("PlantFamily", "nspecies", "medianPerc", "K", "K_p","K_p_adj", "lambda", "lambda_p","lambda_p_adj", "D", "D_p0", "D_p0_adj", "D_p1", "D_p1_adj")], file.path(res.dir,"phylosig_res.csv"), row.names = F) # Map Faba and Mora consumption on tree =============== fab_mor_prop <- dcast(data = subset(topten_familyprop, PlantFamily %in% c("Fabaceae", "Moraceae")), formula = Species ~ PlantFamily, value.var = "MeanPercentage") rownames(fab_mor_prop) <- fab_mor_prop$Species qbreaks <- c(0, 2.5, 5, 7.5, 10, 20, 30, 40, 50, 100) qlabels <- c("0 - 2.5", "2.5 - 5", "5 - 7.5", "7.5 - 10", "10 - 20", "20 - 30", "30 - 40", "40 - 50", "> 50") fab_mor_prop$Moraceae_bin <- cut(fab_mor_prop$Moraceae, breaks = qbreaks, include.lowest = T, labels = qlabels) fab_mor_prop$Fabaceae_bin <- cut(fab_mor_prop$Fabaceae, breaks = qbreaks, include.lowest = T, labels = qlabels) tree_plot <- ggtree(faurby_mcc_subset, ladderize = T, layout = "circular") + scale_y_continuous(limits = c(0, Ntip(faurby_mcc_subset))) + geom_tiplab(size = 2, offset = 35) mor_fab_plot <- gheatmap(p = tree_plot, data = fab_mor_prop[,c("Moraceae_bin", "Fabaceae_bin")], width = 0.3, color = "transparent", low = "grey80", high = "black", colnames = F, offset = 0, colnames_position = "top", colnames_offset_y = 0, legend_title = "Percentage (%)") + scale_x_continuous(limits = c(0,170)) + scale_fill_manual(values = brewer.pal(name="YlOrRd", n= 9), breaks = levels(fab_mor_prop$Moraceae_bin), limits = levels(fab_mor_prop$Moraceae_bin)) + guides(fill = guide_legend(title = "Percentage"), fill = guide_colorbar()) + theme(legend.position = "right") ggsave(mor_fab_plot + theme(legend.position = "none"), filename = file.path(fig.dir, "mor_fab_percent.pdf"), height = 8, width = 8) ggsave(get_legend(mor_fab_plot), filename = file.path(fig.dir, "mor_fab_percent_legend.pdf"), height = 3, width = 2) ## Inner-ring = Moraceae, outer-ring = Fabaceae ## DETERMINE PLANT PART AND FAMILY ASSOCIATION ============================== # Get counts for associated tags tag_counts <- table(c(as.vector(diet_subset_final$PartPlant), as.vector(diet_subset_final$PartPlant2), as.vector(diet_subset_final$PartPlant3), as.vector(diet_subset_final$PartPlant4), as.vector(diet_subset_final$PartPlant5), as.vector(diet_subset_final$PartPlant6), as.vector(diet_subset_final$PartPlant7))) nrow(diet_subset_final) # 8044 --> 8981 diet records leaf_tags <- c("young_leaves", "leaves", "leaf_buds", "mature_leaves", "petioles") fruit_tags <- c("fruits", "unripe_fruits", "ripe_fruits", "pods") seed_tags <- c("seeds", "immature_seeds", "mature_seeds", "acorns") flower_tags <- c("strobili", "flower_pollen", "flower_buds", "flowers", "nectar") animal_tags <- c("animal") other_tags <- c("branch", "bark", "buds", "buds ", "bulb", "closed_shoots", "exudates", "galls", "gum", "latex", "mature_thorns", "moss", "mucilage", "open_shoots", "pith", "resin", "roots", "sap", "seedlings", "shoots", "soft_thorns", "stems", "thorns", "tubers", "twigs", "wood", "young_thorns") nonplant_tags <- c("dung", "fungus", "soil", "honey") unknown_tags <- c("unidentified", "unknown") tag_df <- data.frame(tag_counts = as.vector(tag_counts), tags = names(tag_counts)) # Check that all are in tags are accounted for sum(tag_df$tag_counts) # 9841 --> 11065 sum(c( sum(tag_counts[names(tag_counts) %in% leaf_tags]), sum(tag_counts[names(tag_counts) %in% fruit_tags]), sum(tag_counts[names(tag_counts) %in% seed_tags]), sum(tag_counts[names(tag_counts) %in% flower_tags]), sum(tag_counts[names(tag_counts) %in% other_tags]), sum(tag_counts[names(tag_counts) %in% nonplant_tags]), sum(tag_counts[names(tag_counts) %in% animal_tags]), sum(tag_counts[names(tag_counts) %in% unknown_tags]) )) # 11065 plant # all_tags <- c(leaf_tags, fruit_tags, seed_tags, flower_tags, animal_tags, other_tags, nonplant_tags, unknown_tags) # setdiff(names(tag_counts), all_tags) # Count the number of associated tags and assign diet class each dietary record diet_classify <- ddply(.data = diet_subset_final, .variables = .(RecordID), .progress = "text", .fun = function(x){ part_plant <- c(as.character(x$PartPlant), as.character(x$PartPlant2), as.character(x$PartPlant3), as.character(x$PartPlant4), as.character(x$PartPlant5), as.character(x$PartPlant6), as.character(x$PartPlant7)) # Count number of tags n_tags <- sum(!is.na(part_plant)) # only count non-NAs n_leaf_tags <- sum(part_plant %in% leaf_tags) # tags that are leaf n_fruit_tags <- sum(part_plant %in% fruit_tags) # tags that are fruit n_other_tags <- sum(part_plant %in% c(other_tags, nonplant_tags, flower_tags, seed_tags)) # tags that are non-NA but also neither leaf nor fruit n_animal_tags <- sum(part_plant %in% animal_tags) n_unknown_tags <- sum(part_plant %in% unknown_tags) # Quantify rank of tags (to assume exponential weighting based on order of appearance) denom <- 2^n_tags -1 fractions <- 2^(n_tags-1:n_tags)/ denom ranked_leaf <- sum(fractions[which(part_plant %in% leaf_tags)]) ranked_fruit <- sum(fractions[which(part_plant %in% fruit_tags)]) ranked_others <- sum(fractions[which(part_plant %in% c(other_tags, nonplant_tags, flower_tags, seed_tags))]) ranked_animals <- sum(fractions[which(part_plant %in% animal_tags)]) ranked_unknown <- sum(fractions[which(part_plant %in% unknown_tags)]) leaves_present <- ifelse(n_leaf_tags > 0, 1, 0) fruit_present <- ifelse(n_fruit_tags > 0, 1, 0) others_present <- ifelse(n_other_tags > 0, 1, 0) animal <- ifelse(n_animal_tags == 1, 1, 0) unknown <- ifelse(n_unknown_tags > 0, 1, 0) Percentage <- x$Percentages_Recalculated Food <- x$Food Study_ID <- x$Study_ID PlantFamily <- x$PlantFamily data.frame(Study_ID, Food, PlantFamily, Percentage, n_tags, n_leaf_tags, n_fruit_tags, n_animal_tags, n_other_tags, n_unknown_tags, ranked_leaf, ranked_fruit, ranked_others, ranked_animals, ranked_unknown, unknown, animal, leaves_present, fruit_present, others_present) }) ## QUANTIFY FOLIOVORY / FRUGIVORY ============================== # Approportion percentages into the different diet classes: fruit, leaves, others and unknown diet_classify_prop <- ddply(.data = diet_classify, .variables = .(RecordID), .fun = function(x){ n_dietclasses <- sum(c(x$leaves_present, x$fruit_present, x$others_present, x$animal, x$unknown)) # Unweighted proportion leaves_proportion_uw = (x$leaves_present / n_dietclasses) * x$Percentage fruit_proportion_uw = (x$fruit_present / n_dietclasses) * x$Percentage others_proportion_uw = (x$others_present / n_dietclasses) * x$Percentage unknown_proportion_uw = (x$unknown / n_dietclasses) * x$Percentage # Weighted proportion leaves_proportion_w = (x$n_leaf_tags / x$n_tags) * x$Percentage fruit_proportion_w = (x$n_fruit_tags / x$n_tags) * x$Percentage others_proportion_w = (x$n_other_tags / x$n_tags) * x$Percentage unknown_proportion_w = (x$n_unknown_tags / x$n_tags) * x$Percentage # Exponential proportion leaves_proportion_e = x$ranked_leaf * x$Percentage fruit_proportion_e = x$ranked_fruit * x$Percentage others_proportion_e = x$ranked_others * x$Percentage unknown_proportion_e = x$ranked_unknown * x$Percentage # Animal proportion animal_proportion = (x$animal / x$n_tags) * x$Percentage PlantFamily = x$PlantFamily Study_ID = x$Study_ID data.frame(Study_ID, PlantFamily, leaves_proportion_uw, fruit_proportion_uw, others_proportion_uw, unknown_proportion_uw, leaves_proportion_w, fruit_proportion_w, others_proportion_w, unknown_proportion_w, leaves_proportion_e, fruit_proportion_e, others_proportion_e, unknown_proportion_e, animal_proportion) }, .progress = "text") # Checking results for an arbitrary study (should sum to 100). unknown proportion should only be different for study 117 and 98 sum(c(sum(subset(diet_classify_prop, Study_ID == 117)$leaves_proportion_uw), sum(subset(diet_classify_prop, Study_ID == 117)$fruit_proportion_uw), sum(subset(diet_classify_prop, Study_ID == 117)$others_proportion_uw), sum(subset(diet_classify_prop, Study_ID == 117)$animal_proportion), sum(subset(diet_classify_prop, Study_ID == 117)$unknown_proportion_uw))) sum(c(sum(subset(diet_classify_prop, Study_ID == 117)$leaves_proportion_e), sum(subset(diet_classify_prop, Study_ID == 117)$fruit_proportion_e), sum(subset(diet_classify_prop, Study_ID == 117)$others_proportion_e), sum(subset(diet_classify_prop, Study_ID == 117)$animal_proportion_e), sum(subset(diet_classify_prop, Study_ID == 117)$unknown_proportion_e))) # Checking results for an arbitrary study (should sum to 100) sum(c(sum(subset(diet_classify_prop, Study_ID == 16)$leaves_proportion_uw), sum(subset(diet_classify_prop, Study_ID == 16)$fruit_proportion_uw), sum(subset(diet_classify_prop, Study_ID == 16)$others_proportion_uw), sum(subset(diet_classify_prop, Study_ID == 16)$animal_proportion), sum(subset(diet_classify_prop, Study_ID == 16)$unknown_proportion_uw))) sum(c(sum(subset(diet_classify_prop, Study_ID == 16)$leaves_proportion_e), sum(subset(diet_classify_prop, Study_ID == 16)$fruit_proportion_e), sum(subset(diet_classify_prop, Study_ID == 16)$others_proportion_e), sum(subset(diet_classify_prop, Study_ID == 16)$animal_proportion), sum(subset(diet_classify_prop, Study_ID == 16)$unknown_proportion_e))) # Identify studies where unknowns are greater than 50% lowqual <- ddply(.data = diet_classify_prop, .variables = .(Study_ID), .fun = function(x) {flag <- ifelse(sum(x$unknown_proportion_uw) >= 50 | sum(x$unknown_proportion_e) >= 50, 1, 0); data.frame(flag)} ) flagged_studyid <- subset(lowqual, flag == 1)$Study_ID sum(lowqual$flag) # 48 studies # Sum up diet class percentages for each plant family diet_classify_prop_summedfamily <- ddply(.data = subset(diet_classify_prop, !Study_ID %in% flagged_studyid), .variables = .(Study_ID, PlantFamily), .fun = summarize, total_percent_uw = sum(leaves_proportion_uw) + sum(fruit_proportion_uw) + sum(others_proportion_uw) + sum(unknown_proportion_uw) + sum(animal_proportion), total_percent_w = sum(leaves_proportion_w) + sum(fruit_proportion_w) + sum(others_proportion_w) + sum(unknown_proportion_w) + sum(animal_proportion), total_percent_e = sum(leaves_proportion_e) + sum(fruit_proportion_e) + sum(others_proportion_e) + sum(unknown_proportion_e) + sum(animal_proportion), total_leaves_prop_uw = sum(leaves_proportion_uw)/total_percent_uw, total_fruit_prop_uw = sum(fruit_proportion_uw)/total_percent_uw, total_others_prop_uw = sum(others_proportion_uw)/total_percent_uw, total_unknown_prop_uw = sum(unknown_proportion_uw) / total_percent_uw, total_leaves_prop_w = sum(leaves_proportion_w)/total_percent_w, total_fruit_prop_w = sum(fruit_proportion_w)/total_percent_w, total_others_prop_w = sum(others_proportion_w)/total_percent_w, total_unknown_prop_w = sum(unknown_proportion_w) / total_percent_w, total_leaves_prop_e = sum(leaves_proportion_e)/total_percent_e, total_fruit_prop_e = sum(fruit_proportion_e)/total_percent_e, total_others_prop_e = sum(others_proportion_e)/total_percent_e, total_unknown_prop_e = sum(unknown_proportion_e) / total_percent_e, total_animal_prop = sum(animal_proportion) / total_percent_uw, .progress = "text") # unweighted and weighted total sums should be the same; because only how it is proportioned is different sum(subset(diet_classify_prop_summedfamily, Study_ID == 117)$total_percent_w) # should sum up to 100 for each study sum(subset(diet_classify_prop_summedfamily, Study_ID == 117)$total_percent_uw) sum(subset(diet_classify_prop_summedfamily, Study_ID == 117)$total_percent_e) # Calculate the proportion of diet for each family that belongs in each diet class (standardized by total percentage for each plant family) diet_classify_prop_familymean <- ddply(.data = diet_classify_prop_summedfamily, .variables = .(PlantFamily), .fun = summarize, mean_leaves_prop_uw = mean(total_leaves_prop_uw), mean_fruit_prop_uw = mean(total_fruit_prop_uw), mean_others_prop_uw = mean(total_others_prop_uw), mean_unknown_uw = mean(total_unknown_prop_uw), mean_leaves_prop_w = mean(total_leaves_prop_w), mean_fruit_prop_w = mean(total_fruit_prop_w), mean_others_prop_w = mean(total_others_prop_w), mean_unknown_w = mean(total_unknown_prop_w), mean_leaves_prop_e = mean(total_leaves_prop_e), mean_fruit_prop_e = mean(total_fruit_prop_e), mean_others_prop_e = mean(total_others_prop_e), mean_unknown_e = mean(total_unknown_prop_e), mean_animal = mean(total_animal_prop)) write.csv(diet_classify_prop_familymean, file = file.path(res.dir, "plantfamily_dietcategories.csv"), row.names = FALSE) # Plot family-level weighted against unweighted estimates (Fig S2) =============== diet_classify_prop_familymean_subset <- subset(diet_classify_prop_familymean, PlantFamily %in% toptenfam) diet_classify_prop_familymean_subset$PlantFamily <- factor(diet_classify_prop_familymean_subset$PlantFamily, levels = toptenfam) custom_palette = c("#003262", "#3B7EA1", "#9BBEA9", "#00B0DA", "#00A598", brewer.pal(9, "BuGn")[8], "#CFDD45", "#859438", "#FDB515", brewer.pal(9, "YlOrRd")[5], "#ED4E33", "#C4820E", "#D9661F", "#6C3302") family_leafcorr <- cor.test(diet_classify_prop_familymean_subset$mean_leaves_prop_uw, diet_classify_prop_familymean_subset$mean_leaves_prop_e) family_leafcorr_plot <- ggplot(data = diet_classify_prop_familymean_subset, aes(y = mean_leaves_prop_uw*100, x= mean_leaves_prop_e*100)) + geom_abline(aes(intercept = 0, slope = 1), linetype = "dashed", colour = "grey70")+ geom_point(aes(fill = PlantFamily), pch = 21, colour = "white", size = 3) + annotate(geom = "text", x = 10, y = 43, label = paste0("Pearson's correlation = ", round(family_leafcorr$estimate,2), "\np < 0.001") , hjust = 0, vjust = 1) + labs(y = "Mean percentage in diet\n(leaf, unweighted)", x = "Mean percentage in diet\n(leaf, weighted)") + #geom_text_repel(aes(label = PlantFamily), # nudge_x=c(5, -5, -5, -5, 5, 5, 5, -5, 5, -5), # box.padding = 0.25, direction = "x", colour = "grey20") + scale_y_continuous(limits = c(10, 43)) + scale_x_continuous(limits = c(10, 43)) + scale_fill_manual(values = custom_palette[c(1,3,4,5,6,7,10,11,12,14)]) + theme(panel.background = element_blank(), legend.position = "none") family_fruitcorr <- cor.test(diet_classify_prop_familymean_subset$mean_fruit_prop_uw, diet_classify_prop_familymean_subset$mean_fruit_prop_e) family_fruitcorr_plot <- ggplot(data = diet_classify_prop_familymean_subset, aes(y = mean_fruit_prop_uw*100, x= mean_fruit_prop_e*100)) + geom_abline(aes(intercept = 0, slope = 1), linetype = "dashed", colour = "grey70")+ geom_point(aes(fill = PlantFamily), pch = 21, colour = "white", size = 3) + annotate(geom = "text", x = 15, y = 75, label = paste0("Pearson's correlation = ", round(family_fruitcorr$estimate,2), "\np < 0.001") , hjust = 0, vjust = 1) + labs(y = "Mean percentage in diet\n(fruit, unweighted)", x = "Mean percentage in diet\n(fruit, weighted)") + #geom_text_repel(aes(label = PlantFamily), # direction = "x", nudge_x = c(8,0,10,-5,0,-5,0,-5,0,0), # colour = "grey20") + scale_y_continuous(limits = c(15, 75)) + scale_x_continuous(limits = c(15, 75)) + scale_fill_manual(values = custom_palette[c(1,3,4,5,6,7,10,11,12,14)]) + theme(panel.background = element_blank(), legend.position = "none") family_otherscorr <- cor.test(diet_classify_prop_familymean_subset$mean_others_prop_uw, diet_classify_prop_familymean_subset$mean_others_prop_e) family_otherscorr_plot <- ggplot(data = diet_classify_prop_familymean_subset, aes(y = mean_others_prop_uw*100, x= mean_others_prop_e*100)) + geom_abline(aes(intercept = 0, slope = 1), linetype = "dashed", colour = "grey70")+ geom_point(aes(fill = PlantFamily), pch = 21, colour = "white", size = 3) + annotate(geom = "text", x = 5, y = 40, label = paste0("Pearson's correlation = ", round(family_otherscorr$estimate,2), "\np < 0.001") , hjust = 0, vjust = 1) + labs(y = "Mean percentage in diet\n(others, unweighted)", x = "Mean percentage in diet\n(others, weighted)") + #geom_text_repel(aes(label = PlantFamily), # nudge_x = c(-2,-5,-5,5,-8,0,10,5,0,5), # nudge_y = c(3,0,2,0,0,0,0,0,0,0), # direction = "x", colour = "grey20") + scale_y_continuous(limits = c(5, 40)) + scale_x_continuous(limits = c(5, 40)) + scale_fill_manual(values = custom_palette[c(1,3,4,5,6,7,10,11,12,14)]) + theme(panel.background = element_blank(), legend.position = "none") family_corr_comb <- plot_grid( plot_grid(family_leafcorr_plot, family_fruitcorr_plot, family_otherscorr_plot, labels = "auto", nrow = 3), get_legend(family_leafcorr_plot+ theme(legend.position = "right", legend.title = element_blank())), nrow = 1, rel_widths = c(1,0.4)) ggsave(family_corr_comb, filename = file.path(fig.dir, "corrplot_family.pdf"), height = 10, width = 5) ggsave(family_corr_comb, filename = file.path(fig.dir, "corrplot_family.png"), height = 10, width = 5) # Plot proportion of each diet categories across families as stacked bars (Fig 1c) =============== diet_classify_prop_familymean_unweighted <- melt(diet_classify_prop_familymean, id.vars = "PlantFamily", measure.vars = c("mean_leaves_prop_uw", "mean_fruit_prop_uw", "mean_others_prop_uw", "mean_unknown_uw")) diet_classify_prop_familymean_weighted <- melt(diet_classify_prop_familymean, id.vars = "PlantFamily", measure.vars = c("mean_leaves_prop_e", "mean_fruit_prop_e", "mean_others_prop_e", "mean_unknown_e")) diet_classes <- c("mean_leaves_prop_uw", "mean_fruit_prop_uw", "mean_others_prop_uw", "mean_unknown_uw", "mean_leaves_prop_e", "mean_fruit_prop_e", "mean_others_prop_e", "mean_unknown_e") diet_classes_labels <- c("Leaves", "Fruits", "Others", "Unknown","Leaves", "Fruits", "Others", "Unknown") diet_classify_prop_familymean_unweighted$variable <- factor(diet_classify_prop_familymean_unweighted$variable, levels = diet_classes, labels = diet_classes_labels) diet_classify_prop_familymean_weighted$variable <- factor(diet_classify_prop_familymean_weighted$variable, levels = diet_classes, labels = diet_classes_labels) uw <- subset(diet_classify_prop_familymean_unweighted, PlantFamily %in% toptenfam) uw$PlantFamily <- factor(uw$PlantFamily, levels = toptenfam) diet_class_family_unweighted <- ggplot(data = uw) + geom_bar(aes(y = value, x = PlantFamily, fill = variable), position = "stack", stat = "identity") + labs(y = "Mean proportion of diet") + scale_y_continuous(expand = c(0,0)) + theme(legend.position = "bottom", legend.title = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1), axis.title.x = element_blank(), panel.background = element_blank()) + scale_fill_manual(values = c("#859438", "#FDB515", "#003262", "grey")) ggsave(diet_class_family_unweighted, filename = file.path(fig.dir, "diet_class_family_unweighted.pdf"), width=6, height = 4) w <- subset(diet_classify_prop_familymean_weighted, PlantFamily %in% toptenfam) w$PlantFamily <- factor(uw$PlantFamily, levels = toptenfam) diet_class_family_weighted <- ggplot(data = w) + geom_bar(aes(y = value, x = PlantFamily, fill = variable), position = "stack", stat = "identity") + labs(y = "Mean proportion of diet") + scale_y_continuous(expand = c(0,0)) + theme(legend.position = "bottom", legend.title = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1), axis.title.x = element_blank(), panel.background = element_blank()) + scale_fill_manual(values = c("#859438", "#FDB515", "#003262", "grey")) ggsave(diet_class_family_weighted, filename = file.path(fig.dir, "diet_class_family_weighted.pdf"), width=6, height = 4) ## Combine panels (Fig 1) =============== rel_impo_fam <- plot_grid(plot_grid(family_speciesprop_plot, topten_speciesprop_plot, nrow = 1, labels = c("a", "b"), align = "h", axis = "bt"), diet_class_family_weighted + theme(legend.position = "top"), nrow = 2, labels = c("","c")) ggsave(file.path(fig.dir, "rel_impo_fam.pdf"), rel_impo_fam, width = 7.5, height = 6) ## Identify which families are consumed for which part =============== test <- subset(diet_classify_prop_familymean, PlantFamily %in% toptenfam) test$PlantFamily[(test$mean_fruit_prop_e / test$mean_leaves_prop_e) > 1.5] test$PlantFamily[(test$mean_leaves_prop_e / test$mean_fruit_prop_e) > 1.5] # Sum the diet class percentages for each study =============== diet_classify_summed <- ddply(.data = subset(diet_classify_prop, !Study_ID %in% flagged_studyid), .variables = .(Study_ID), .fun = function(x){ total_leaves_prop_uw = sum(x$leaves_proportion_uw) total_fruit_prop_uw = sum(x$fruit_proportion_uw) total_others_prop_uw = sum(x$others_proportion_uw) total_unknown_prop_uw = sum(x$unknown_proportion_uw) # unknown represents items for which plant part is not known or if item is unidentified total_leaves_prop_w = sum(x$leaves_proportion_w) total_fruit_prop_w = sum(x$fruit_proportion_w) total_others_prop_w = sum(x$others_proportion_w) total_unknown_prop_w = sum(x$unknown_proportion_w) total_leaves_prop_e = sum(x$leaves_proportion_e) total_fruit_prop_e = sum(x$fruit_proportion_e) total_others_prop_e = sum(x$others_proportion_e) total_unknown_prop_e = sum(x$unknown_proportion_e) total_animal = sum(x$animal_proportion) # Calculate proportion of the "identified" fruit diet that is Moraceae if("Moraceae" %in% x$PlantFamily & sum(x$fruit_proportion_e[!x$PlantFamily == "Unknown_plant"]) > 0){ total_moraceae_fruit_prop_e = sum(subset(x, PlantFamily == "Moraceae")$fruit_proportion_e) / sum(x$fruit_proportion_e[!x$PlantFamily == "Unknown_plant"]) } else { total_moraceae_fruit_prop_e = 0 } if("Moraceae" %in% x$PlantFamily & sum(x$fruit_proportion_uw[!x$PlantFamily == "Unknown_plant"]) > 0){ total_moraceae_fruit_prop_uw = sum(subset(x, PlantFamily == "Moraceae")$fruit_proportion_uw) / sum(x$fruit_proportion_uw[!x$PlantFamily == "Unknown_plant"]) } else { total_moraceae_fruit_prop_uw = 0 } # Calculate proportion of the "identified" leaf diet that is Fabaceae if("Fabaceae" %in% x$PlantFamily & sum(x$leaves_proportion_e[!x$PlantFamily == "Unknown_plant"]) > 0){ total_fabaceae_leaf_prop_e = sum(subset(x, PlantFamily == "Fabaceae")$leaves_proportion_e) / sum(x$leaves_proportion_e[!x$PlantFamily == "Unknown_plant"]) } else { total_fabaceae_leaf_prop_e = 0 } if("Fabaceae" %in% x$PlantFamily & sum(x$leaves_proportion_uw[!x$PlantFamily == "Unknown_plant"]) > 0){ total_fabaceae_leaf_prop_uw = sum(subset(x, PlantFamily == "Fabaceae")$leaves_proportion_uw) / sum(x$leaves_proportion_uw[!x$PlantFamily == "Unknown_plant"]) } else { total_fabaceae_leaf_prop_uw = 0 } data.frame(total_leaves_prop_w, total_fruit_prop_w, total_others_prop_w, total_unknown_prop_w, total_leaves_prop_uw, total_fruit_prop_uw, total_others_prop_uw, total_unknown_prop_uw, total_leaves_prop_e, total_fruit_prop_e, total_others_prop_e, total_unknown_prop_e, total_animal, total_moraceae_fruit_prop_uw, total_moraceae_fruit_prop_e, total_fabaceae_leaf_prop_uw, total_fabaceae_leaf_prop_e) }) # Quantify the average relative proportion of different diet categories for each primate species diet_classify_summed2 <- merge(x = diet_classify_summed, y = study[,c("Study_ID", "Species")], by = "Study_ID", all.x = TRUE) diet_classify_species <- ddply(.data = diet_classify_summed2, .variables = .(Species), .fun = summarize, avg_leaves_perc_uw = mean(total_leaves_prop_uw), avg_fruit_perc_uw = mean(total_fruit_prop_uw), avg_others_perc_uw = mean(total_others_prop_uw), avg_unknown_uw = mean(total_unknown_prop_uw), avg_mora_fruit_prop_uw = mean(total_moraceae_fruit_prop_uw), avg_fabaceae_leaf_prop_uw = mean(total_fabaceae_leaf_prop_uw), avg_leaves_perc_e = mean(total_leaves_prop_e), avg_fruit_perc_e = mean(total_fruit_prop_e), avg_others_perc_e = mean(total_others_prop_e), avg_unknown_e = mean(total_unknown_prop_e), avg_animal = mean(total_animal), avg_mora_fruit_prop_e = mean(total_moraceae_fruit_prop_e), avg_fabaceae_leaf_prop_e = mean(total_fabaceae_leaf_prop_e)) write.csv(diet_classify_species, file.path(res.dir, "primate_dietcategories.csv"), row.names = FALSE) # Barplot of diet categories across primate species (Fig S3a) ============ diet_classify_species_uw <- melt(diet_classify_species, id.vars = "Species", measure.vars = c("avg_leaves_perc_uw", "avg_fruit_perc_uw", "avg_others_perc_uw", "avg_unknown_uw", "avg_animal")) diet_classify_species_e <- melt(diet_classify_species, id.vars = "Species", measure.vars = c("avg_leaves_perc_e", "avg_fruit_perc_e", "avg_others_perc_e", "avg_unknown_e", "avg_animal")) diet_classify_species_uw$prop <- diet_classify_species_uw$value / 100 diet_classify_species_e$prop <- diet_classify_species_e$value / 100 diet_classes <- c("avg_leaves_perc_uw", "avg_fruit_perc_uw", "avg_others_perc_uw", "avg_animal", "avg_unknown_uw", "avg_leaves_perc_e", "avg_fruit_perc_e", "avg_others_perc_e","avg_unknown_e") diet_classes_labels <- c("Leaves", "Fruits", "Others", "Animal", "Unknown", "Leaves", "Fruits", "Others", "Unknown") diet_classify_species_uw$variable2 <- factor(diet_classify_species_uw$variable, levels = diet_classes, labels = diet_classes_labels) diet_classify_species_e$variable2 <- factor(diet_classify_species_e$variable, levels = diet_classes, labels = diet_classes_labels) diet_class_species_uw_plot <- ggplot(diet_classify_species_uw) + geom_bar(aes(y = prop, fill= variable2, x = Species), position = "stack", stat = "identity") + scale_y_continuous(expand = c(0,0)) + labs(y = "Mean proportion of diet") + theme(legend.position = "bottom", axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.title.x = element_blank(), legend.title = element_blank()) + scale_fill_manual(values = c("#859438", "#FDB515", "#003262", "#6C3302","grey")) ggsave(diet_class_species_uw_plot, filename = file.path(fig.dir, "diet_class_species_uw.pdf"), width = 8, height = 4) diet_class_species_e_plot <- ggplot(diet_classify_species_e) + geom_bar(aes(y = prop, fill= variable2, x = Species), position = "stack", stat = "identity") + scale_y_continuous(expand = c(0,0)) + labs(y = "Mean proportion of diet") + theme(legend.position = "bottom", axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.title.x = element_blank(), legend.title = element_blank()) + scale_fill_manual(values = c("#859438", "#FDB515", "#003262", "#6C3302", "grey")) ggsave(diet_class_species_e_plot, filename = file.path(fig.dir, "diet_class_species_e.pdf"), width = 8, height = 4) # Histogram of weighted estimates of plant part categories across primates (Fig S3b) ====== diet_class_histogram <- ggplot(data = subset(diet_classify_species_e, value > 0)) + geom_histogram(aes(x = value, fill = variable2), binwidth = 10) + facet_wrap(~variable2, scales = "free_x", nrow = 1) + labs(x = "Mean proportion of diet") + scale_fill_manual(values = c("#859438", "#FDB515", "#003262", "#6C3302", "grey")) + theme(legend.position = "none") diet_class_species_comb <- plot_grid(diet_class_species_e_plot, diet_class_histogram, nrow = 2, labels = "auto", rel_heights = c(1, 0.5) ) ggsave(diet_class_species_comb, filename = file.path(fig.dir, "diet_class_species_comb.pdf"), width = 12, height = 9) ggsave(diet_class_species_comb, filename = file.path(fig.dir, "diet_class_species_comb.png"), width = 12, height = 9) # Correlated weighted and unweighted estimates of plant part importance across primates (Fig S4) ====== leafcorr <- cor.test(diet_classify_species$avg_leaves_perc_uw, diet_classify_species$avg_leaves_perc_e) leafcorrplot <- ggplot(data = diet_classify_species) + geom_abline(intercept = 0, slope = 1, colour = "darkblue", size = 1) + geom_point(aes(y = avg_leaves_perc_uw, x = avg_leaves_perc_e), pch = 21, colour = "black", size = 3) + labs(y = "Mean percentage in diet\n(leaf, unweighted)", x = "Mean percentage in diet\n(leaf, weighted)") + annotate(geom = "text", x = 0, y = 100, label = paste0("Pearson's correlation =", round(leafcorr$estimate,2), "\np < 0.001") , hjust = 0, vjust = 1) + theme(panel.background = element_blank()) fruitcorr <- cor.test(diet_classify_species$avg_fruit_perc_uw, diet_classify_species$avg_fruit_perc_e) fruitcorrplot <- ggplot(data = diet_classify_species) + geom_abline(intercept = 0, slope = 1, colour = "darkblue", size = 1) + geom_point(aes(y = avg_fruit_perc_uw, x = avg_fruit_perc_e), pch = 21, colour = "black", size = 3) + labs(y = "Mean percentage in diet\n(fruit, unweighted)", x = "Mean percentage in diet\n(fruit, weighted)") + annotate(geom = "text", x = 0, y = 100, label = paste0("Pearson's correlation =", round(fruitcorr$estimate,2), "\np < 0.001") , hjust = 0, vjust = 1) + theme(panel.background = element_blank()) othercorr <- cor.test(diet_classify_species$avg_others_perc_uw, diet_classify_species$avg_others_perc_e) othercorrplot <- ggplot(data = diet_classify_species) + geom_abline(intercept = 0, slope = 1, colour = "darkblue", size = 1) + geom_point(aes(y = avg_others_perc_uw, x = avg_others_perc_e), pch = 21, colour = "black", size = 3) + labs(y = "Mean percentage in diet\n(others, unweighted)", x = "Mean percentage in diet\n(others, weighted)") + annotate(geom = "text", x = 0, y = 100, label = paste0("Pearson's correlation =", round(leafcorr$estimate,2), "\np < 0.001") , hjust = 0, vjust = 1) + theme(panel.background = element_blank()) corrplotcomb <- plot_grid(leafcorrplot, fruitcorrplot, othercorrplot, labels = "auto", nrow = 3) ggsave(corrplotcomb, filename = file.path(fig.dir, "corrplot_primate.pdf"), height = 12, width =4) ggsave(corrplotcomb, filename = file.path(fig.dir, "corrplot_primate.png"), height = 12, width =4) # Summary of folivory and frugivory across primates length(unique(diet_classify_species$Species)) # 88 -> 97 species mean(diet_classify_species$avg_fruit_perc_e) # 38.0 sd(diet_classify_species$avg_fruit_perc_e) # 25.3 median(diet_classify_species$avg_fruit_perc_e) # 36.0 range(diet_classify_species$avg_fruit_perc_e) # 0.2 - 92.3 mean(diet_classify_species$avg_leaves_perc_e) # 26.1 median(diet_classify_species$avg_leaves_perc_e) # 17.2 sd(diet_classify_species$avg_leaves_perc_e) # 26.5 range(diet_classify_species$avg_leaves_perc_e) # 0 - 91.8 mean(diet_classify_species$avg_others_perc_e) # 21.4 median(diet_classify_species$avg_others_perc_e) # 14.0 sd(diet_classify_species$avg_others_perc_e) # 18.6 range(diet_classify_species$avg_others_perc_e) # 0 - 88 mean(diet_classify_species$avg_animal) # 4.7 median(diet_classify_species$avg_animal) # 0 sd(diet_classify_species$avg_animal)# 10.2 range(diet_classify_species$avg_animal)# 0 - 59.1 mean(diet_classify_species$avg_unknown_e) # 12.2 median(diet_classify_species$avg_unknown_e) # 4.6 sd(diet_classify_species$avg_unknown_e) # 11.9 range(diet_classify_species$avg_unknown_e) # 0 - 49.5 # Cross-validate estimated foliovory and frugivory estimates ========== diet_classify_species$Species <- gsub(diet_classify_species$Species, pattern = "_", replacement = " ") elton_traits <- read.delim(file.path(data.dir, "EltonTraits_v1_MamFuncDat.txt")) elton_traits$Diet.Animal <- apply(elton_traits[c("Diet.Inv","Diet.Vend", "Diet.Vect", "Diet.Vfish", "Diet.Vunk")], MARGIN = 1, FUN = sum) sum(diet_classify_species$Species %in% elton_traits$Scientific) # overlap with 72 -> 82 species sort(setdiff(diet_classify_species$Species, elton_traits$Scientific)) mammaldiet <- read.delim(file.path(data.dir, "MammalDIET_v1.0.txt")) mammaldiet$Binomial <- paste(mammaldiet$Genus, mammaldiet$Species) sum(diet_classify_species$Species %in% mammaldiet$Binomial) # overlap with 73 -> 82 species sort(setdiff(diet_classify_species$Species, mammaldiet$Binomial)) diet_classify_species2 <- merge(x = diet_classify_species, y = elton_traits[,c("Scientific", "Diet.Fruit", "Diet.PlantO", "Diet.Animal")], by.x = "Species", by.y = "Scientific", all.x = TRUE) diet_classify_species3 <- merge(x = diet_classify_species2, y = mammaldiet[,c("Binomial", "Fruit", "Leaf", "Animal")], by.x = "Species", by.y = "Binomial", all.x = TRUE) diet_classify_species3$Leaf <- factor(diet_classify_species3$Leaf, levels = c("0", "3", "2", "1", "NA")) diet_classify_species3$Fruit <- factor(diet_classify_species3$Fruit, levels = c("0", "2", "1", "NA")) diet_classify_species3$Animal <- factor(diet_classify_species3$Animal, levels = c("0", "3","2", "1", "NA")) diet_classify_species3$OrderedLeaf <- as.numeric(diet_classify_species3$Leaf) diet_classify_species3$OrderedFruit <- as.numeric(diet_classify_species3$Fruit) diet_classify_species3$OrderedAnimal <- as.numeric(diet_classify_species3$Animal) # Plot correlation between EltonTraits / MammalDiet with estimates (Fig S5) ======= mammaldiet_foliov_validate_plot <- ggplot(data = subset(diet_classify_species3, !is.na(Leaf))) + geom_boxplot(aes(y = avg_leaves_perc_e, x= Leaf), outlier.shape = NA) + geom_point(aes(y = avg_leaves_perc_e, x= Leaf), pch = 21, colour = "black", size = 2) + labs(x = "MammalDiet folivory ranks", y = "Estimated percent leaves in diet") + theme(panel.background = element_blank()) mammaldiet_frug_validate_plot <- ggplot(data = subset(diet_classify_species3, !is.na(Fruit))) + geom_boxplot(aes(y = avg_fruit_perc_e, x = Fruit), outlier.shape = NA) + geom_point(aes(y = avg_fruit_perc_e, x = Fruit), pch = 21, colour = "black", size = 2) + labs(x = "MammalDiet frugivory ranks", y = "Estimated percent fruit in diet") + theme(panel.background = element_blank()) mammaldiet_animal_validate_plot <- ggplot(data = subset(diet_classify_species3, !is.na(Animal))) + geom_boxplot(aes(y = avg_animal, x = Animal), outlier.shape = NA) + geom_point(aes(y = avg_animal, x = Animal), pch = 21, colour = "black", size = 2) + labs(x = "MammalDiet animal ranks", y = "Estimated percent animal in diet") + theme(panel.background = element_blank()) elton_foliov_validate_plot <- ggplot(data = diet_classify_species3, aes(y = avg_leaves_perc_e, x = Diet.PlantO)) + geom_point(pch = 21, colour = "black", size = 2) + geom_smooth(method = "lm") + labs(x = "Elton Traits plant score", y = "Estimated percent leaves in diet") + theme(panel.background = element_blank()) elton_frug_validate_plot <- ggplot(data = diet_classify_species3, aes(y = avg_fruit_perc_e, x = Diet.Fruit)) + geom_point(pch = 21, colour = "black", size = 2) + geom_smooth(method = "lm") + labs(x = "Elton Traits fruit score", y = "Estimated percent fruit in diet") + theme(panel.background = element_blank()) elton_animal_validate_plot <- ggplot(data = diet_classify_species3, aes(y = avg_animal, x = Diet.Animal)) + geom_point(pch = 21, colour = "black", size = 2) + geom_smooth(method = "lm") + labs(x = "Elton Traits animal score", y = "Estimated percent animal in diet") + theme(panel.background = element_blank()) diet_validate_plots <- plot_grid(plotlist = list(mammaldiet_foliov_validate_plot, mammaldiet_frug_validate_plot, mammaldiet_animal_validate_plot, elton_foliov_validate_plot, elton_frug_validate_plot, elton_animal_validate_plot ), nrow = 2, labels = "auto") ggsave(diet_validate_plots, filename = file.path(fig.dir, "diet_validate.pdf"), width = 9, height = 6) ggsave(diet_validate_plots, filename = file.path(fig.dir, "diet_validate.png"), width = 9, height = 6) summary(lm(avg_leaves_perc_e ~ OrderedLeaf, data = subset(diet_classify_species3, !is.na(Leaf)))) summary(lm(avg_fruit_perc_e ~ OrderedFruit, data = subset(diet_classify_species3, !is.na(Fruit)))) summary(lm(avg_animal ~ OrderedAnimal, data = subset(diet_classify_species3, !is.na(Animal)))) cor.test(diet_classify_species3$avg_leaves_perc_e, diet_classify_species3$Diet.PlantO) cor.test(diet_classify_species3$avg_fruit_perc_e, diet_classify_species3$Diet.Fruit) cor.test(diet_classify_species3$avg_animal, diet_classify_species3$Diet.Animal) ## STRUCTURAL EQUATION MODELLING ============================== # Prepare data ==== traits <- read.csv(file.path(data.dir, "Primate_Traits.csv")) traits$Species <- gsub(traits$Species, pattern = "_", replacement = " ") diet_classify_species_final <- merge(diet_classify_species, traits, by = "Species", all.x = TRUE) diet_classify_species_final$LogBM <- log10(diet_classify_species_final$BodyMass_kg) diet_classify_species_final$LogRange <- log10(diet_classify_species_final$HomeRange_ha) target.cols <- c("Species", "Family", "avg_leaves_perc_e", "avg_leaves_perc_uw", "avg_fruit_perc_e", "avg_fruit_perc_uw", "avg_mora_fruit_prop_e", "avg_mora_fruit_prop_uw", "avg_fabaceae_leaf_prop_e", "avg_fabaceae_leaf_prop_uw", "LogBM", "BodyMass_kg", "LogRange", "HomeRange_ha") diet_classify_species_final2 <- diet_classify_species_final[,target.cols] diet_classify_species_final2 <- diet_classify_species_final2[complete.cases(diet_classify_species_final2),] nrow(diet_classify_species_final2) # 84 -> 92 species diet_classify_species_final2$Frug <- diet_classify_species_final2$avg_fruit_perc_e / 100 diet_classify_species_final2$Mora <- diet_classify_species_final2$avg_mora_fruit_prop_e diet_classify_species_final2$Foli <- diet_classify_species_final2$avg_leaves_perc_e / 100 diet_classify_species_final2$Faba <- diet_classify_species_final2$avg_fabaceae_leaf_prop_e # Basic SEMs ===== frugivory_model <- sem('LogRange ~ LogBM + Frug + Mora LogBM ~ Frug + Mora Mora ~ Frug', data=diet_classify_species_final2) standardizedSolution(frugivory_model, type="std.all") # coefficients modindices(frugivory_model, free.remove = FALSE) summary(frugivory_model, stand=T, rsq=T, fit.measures=T) #semPaths(frugivory_model, what = "stand", residuals = TRUE, intercepts = FALSE, nCharNodes = 0, rotation = 3, sizeMan =10) foliovory_model <- sem('LogRange ~ LogBM + Foli + Faba LogBM ~ Foli + Faba Faba ~ Foli', data=diet_classify_species_final2) summary(foliovory_model, stand=T, rsq=T, fit.measures=T) standardizedSolution(foliovory_model, type="std.all") # coefficients modindices(foliovory_model, free.remove = F) summary(foliovory_model, stand=T, rsq=T, fit.measures=T) #semPaths(foliovory_model, what = "stand", residuals = TRUE, intercepts = FALSE, nCharNodes = 0, rotation = 3, sizeMan =10) ## Phylogenetic SEMs ===== faurby_mcc_subset$tip.label <- gsub(faurby_mcc_subset$tip.label, pattern = "_", replacement = " ") sum(diet_classify_species_final2$Species %in% faurby_mcc_subset$tip.label) # 90 species (out of 92) diet_classify_species_final2$Species[!diet_classify_species_final2$Species %in% faurby_mcc_subset$tip.label] diet_classify_phylosem <- subset(diet_classify_species_final2, diet_classify_species_final2$Species %in% faurby_mcc_subset$tip.label ) faurby_sem <- drop.tip(faurby_mcc_subset, tip = setdiff(faurby_mcc_subset$tip.label, diet_classify_phylosem$Species)) diet_classify_phylosem$Species %in% faurby_sem$tip.label diet_classify_cdata <- comparative.data(data = diet_classify_phylosem, phy = faurby_sem, names.col = "Species") write.csv(diet_classify_cdata$data, file.path(res.dir, "phylo_sem_data.csv"), row.names = TRUE) phylocor <- corBrownian(1, phy = diet_classify_cdata$phy) frugivory_phylo_model <- psem(LogRange = gls(LogRange ~ LogBM + Frug + Mora, data = diet_classify_cdata$data, correlation = phylocor), LogBM = gls(LogBM ~ Frug + Mora, data = diet_classify_cdata$data, correlation = phylocor), Faba = gls(Mora ~ Frug, data = diet_classify_cdata$data, correlation = phylocor), data = diet_classify_cdata$data) summary(frugivory_phylo_model) folivory_phylo_model <- psem(LogRange = gls(LogRange ~ LogBM + Foli + Faba, data = diet_classify_cdata$data, correlation = phylocor), LogBM = gls(LogBM ~ Foli + Faba, data = diet_classify_cdata$data, correlation = phylocor), Faba = gls(Faba ~ Foli, data = diet_classify_cdata$data, correlation = phylocor), data = diet_classify_cdata$data) summary(folivory_phylo_model) ## Plot variables on phylogeny (Fig 3c) ====== primatefamily<- sort(as.vector(unique(diet_classify_phylosem$Family))) cols <- brewer.pal(n = 9, name = "Spectral") diet_classify_cdata$data$Family <- factor(as.vector(diet_classify_cdata$data$Family), levels = primatefamily) diet_classify_cdata$data$LogRange_scaled <- scale(diet_classify_cdata$data$LogRange) diet_classify_cdata$data$LogBM_scaled <- scale(diet_classify_cdata$data$LogBM) diet_classify_cdata$data$Frug_scaled <- scale(diet_classify_cdata$data$Frug) diet_classify_cdata$data$Mora_scaled <- scale(diet_classify_cdata$data$Mora) diet_classify_cdata$data$Foli_scaled <- scale(diet_classify_cdata$data$Foli) diet_classify_cdata$data$Faba_scaled <- scale(diet_classify_cdata$data$Faba) target.col <- c("LogBM_scaled", "LogRange_scaled", "Frug_scaled", "Mora_scaled", "Foli_scaled", "Faba_scaled") range(diet_classify_cdata$data[,target.col]) for(j in target.col){ diet_classify_cdata$data[,j] <- cut(diet_classify_cdata$data[,j], breaks = c(-3.5:-0.5, 0, 0.5:3.5)) } names(diet_classify_cdata$data[,target.col]) <- c("LogBM", "LogHR", "Frug", "Mora", "Foli", "Faba") heatmapcol <- brewer.pal(n = 8, name = "RdBu") test_tree <- ggtree(faurby_sem) +layout_dendrogram() + geom_tiplab(size = 2, angle = 270, offset = -15) + xlim(c(50,-100)) test_tree2 <- test_tree %<+% diet_classify_cdata$data primate_heatmap <- gheatmap(test_tree2, data = diet_classify_cdata$data[,target.col], offset = -1, colnames = FALSE, width = 0.2) + scale_fill_manual(limits = rev(c("(-3.5,-2.5]", "(-2.5,-1.5]", "(-1.5,-0.5]", "(-0.5,0]", "(0,0.5]", "(0.5,1.5]", "(1.5,2.5]", "(2.5,3.5]")), values = heatmapcol) + theme(plot.background = element_blank(), panel.background = element_blank()) ggsave(primate_heatmap + theme(legend.position = "none"), filename = file.path(fig.dir, "primate_heatmap.pdf"), height = 7, width = 10) ggsave(get_legend(primate_heatmap + theme(legend.text = element_blank(), legend.title = element_blank())), filename = file.path(fig.dir, "primate_heatmap_leg.pdf"), height = 3, width = 1) ## Scatter plots of all variables (Fig S6 and S7) ==== plotscatter <- function(x){ x + geom_point(pch = 21, colour = "black", size = 3) + #geom_smooth(method = "lm", se = F, show.legend = T) + guides(colour = FALSE, fill = guide_legend(override.aes = list(colour = NULL, linetype = 0))) + scale_colour_manual(values = cols, breaks = primatefamily, limits = primatefamily) + scale_fill_manual(values = cols, breaks = primatefamily, limits = primatefamily) } a <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogRange, x = LogBM, fill = Family))) b <- plotscatter(ggplot(data = diet_classify_phylosem,aes(y = LogRange, x = Frug, fill = Family))) c <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogRange, x = Mora, fill = Family))) d <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogBM, x = Frug, fill = Family))) e <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogBM, x = Mora, fill = Family))) f <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = Mora, x = Frug, fill = Family))) frug_sem <- plot_grid(a + theme(legend.position = "none"), b + theme(legend.position = "none"), c + theme(legend.position = "none"), d + theme(legend.position = "none"), e + theme(legend.position = "none"), f + theme(legend.position = "none"), nrow = 2, ncol = 3) frug_sem_wleg <- plot_grid(frug_sem, get_legend(a + theme(legend.position = "bottom")), nrow = 2, rel_heights = c(1, 0.2)) ggsave(frug_sem_wleg, filename = file.path(fig.dir, "frug_sem.pdf"), width= 8, height = 6) ggsave(frug_sem_wleg, filename = file.path(fig.dir, "frug_sem.png"), width= 8, height = 6) a <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogRange, x = LogBM, fill = Family))) b <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogRange, x = Foli, fill = Family))) c <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogRange, x = Faba, fill = Family))) d <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogBM, x = Foli, fill = Family))) e <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = LogBM, x = Faba, fill = Family))) f <- plotscatter(ggplot(data = diet_classify_phylosem, aes(y = Faba, x = Foli, fill = Family))) foli_sem <- plot_grid(a + theme(legend.position = "none"), b + theme(legend.position = "none"), c + theme(legend.position = "none"), d + theme(legend.position = "none"), e + theme(legend.position = "none"), f + theme(legend.position = "none"), nrow = 2, ncol = 3) foli_sem_wleg <- plot_grid(foli_sem, get_legend(a + theme(legend.position = "bottom")), nrow = 2, rel_heights = c(1, 0.2)) ggsave(foli_sem_wleg, filename = file.path(fig.dir, "foli_sem.pdf"), width= 8, height = 6) ggsave(foli_sem_wleg, filename = file.path(fig.dir, "foli_sem.png"), width= 8, height = 6)