################################# ###PIAZZA et al., PLOSONE 2020### ################################# ### This script describes the procedures performed in the study presented in Piazza et al. 2020 (PlOSONE): # "Ocean warming affected faunal dynamics of benthic invertebrate assemblages across the Toarcian Oceanic Anoxic Event in the Iberian Basin (Spain)" ### N.B.! In the read.table() functions the name of the Excel sheet cointaining the dataset in the Supplementary Excel file is given, as multiple formats can be loaded into R. # Excel file also available from the Dryad Data Repository upon acceptance of the manuscript. ### Load required packages (references for the packages given, unless already provided in the main text) library(dplyr) # To be loaded before plyr to avoid conflicts; ref. [1] library(plyr) # ref. [2] library(reshape) # ref. [3] library(vegan) library(clustsig) library(car) library(nlme) library(forecast) library(ggplot2) # ref. [4] library(FSA) # [1] Wickham H, François R, Henry L, Müller K. dplyr: A Grammar of Data Manipulation. R package version 0.8.3. 2019. Available from: https://CRAN.R-project.org/package=dplyr # [2] Wickham H. The Split-Apply-Combine Strategy for Data Analysis. J Stat Softw. 2011; 40(1): 1–29. doi: 10.18637/jss.v040.i01. # [3] Wickham H. Reshaping data with the reshape package. J Stat Softw. 2007; 21(12): 1–20. doi: 10.18637/jss.v021.i12. # [4] Wickham H. ggplot2: Elegant Graphics for Data Analysis. 2nd ed. New York: Springer–Verlag; 2016. ################################################################################################# ####################### ###TAXONOMY ANALYSES### ####################### dat <- read.table("OCCURRENCES", sep = "\t", header = T) # Load the occurrence dataset dat[is.na(dat)] <- 0 # Add 0s in the empty spaces dat <- dat[-which(dat$Tot_occs < 17),] # Delete samples with < 17 occurrences # Save the columns with levels and total occurrences as separate vectors and delete them from the dataset so that it is only composed of taxa tot <- dat$Tot_occs levs <- dat$Level_[m] dat <- dat[, -c(1:2)] # Calculate percentage values and round the values to 2 digits dat_perc <- dat/rowSums(dat)*100 dat_perc <- round(dat_perc, dig=2) rownames(dat_perc) <- levs # Define levels as rownames dat_perc <- as.matrix(dat_perc) # Transform into matrix for the analyses ###################################### ######## Cluster Analysis ######## res <- simprof(data = dat_perc, method.distance = "braycurtis", method.cluster = "ward.D2") # Use SIMPROF to highlight the associations pl.color <- simprof.plot(res) # Plot the clusters ############################ ######## NMDS ######## dis <- vegdist(dat_perc) # Create the distance matrix mds3d <- metaMDS(dis, k = 3, try = 100) # Perform NMDS mds3d$stress # View the stress value # I extract the NMDS scores and unify them into single dataset NMDS3d <- data.frame(NMDS1 = mds3d$points[,1], NMDS2 = mds3d$points[,2], NMDS3 = mds3d$points[,3]) NMDS3d$Level <- rownames(NMDS3d) # Add levels as an additional column for future plotting # Pre-TOAE, TOAE and post-TOAE intervals added as a new column NMDS3d$Interval <- c(rep("a",10), rep("b",15), rep("c",21)) # Define convex hulls by interval for the final plot find_hull3d <- function(NMDS3d) NMDS3d[chull(NMDS3d$NMDS1, NMDS3d$NMDS2),] hulls3d <- ddply(NMDS3d, "Interval", find_hull3d) # Plot the NMDS with hulls ggplot(NMDS3d, aes(x = NMDS1, y = NMDS2, color = as.factor(Interval), fill = as.factor(Interval), shape = as.factor(Interval)))+ geom_point(size = 4)+ geom_polygon(data = hulls3d, alpha = 0.4)+ ggtitle("NMDS taxonomy 3D")+ theme(panel.background = element_rect(fill = "white"))+ theme(panel.border = element_rect(fill = NA, color = "black"))+ scale_fill_discrete(name = "Interval", breaks = c("a", "b", "c"), labels = c("Pre-TOAE", "TOAE", "Post-TOAE"))+ scale_colour_discrete(name = "Interval", breaks = c("a", "b", "c"), labels = c("Pre-TOAE", "TOAE", "Post-TOAE"))+ scale_shape_discrete(name = "Interval", breaks = c("a", "b", "c"), labels = c("Pre-TOAE", "TOAE", "Post-TOAE"))+ annotate("label", x = -0.4, y = -0.7, label = "3D Stress = 0.131") # Test with ANOSIM if there is significant difference between the intervals tax.anosim <- anosim(dat_perc, NMDS3d$Interval) # Ok to use either the % dataset or the distance matrix summary(tax.anosim) ############################################ ######## Biodiversity curves ######## dat <- read.table("OCCURRENCES", sep = "\t", header = T) # Load the occurrence dataset dat[is.na(dat)] <- 0 # Add 0s in the empty spaces dat <- dat[-which(dat$Tot_occs < 17),] # Delete samples with < 17 occurrences # Save the columns with levels and total occurrences as separate vectors and delete them from the dataset so that it is only composed of taxa tot <- dat$Tot_occs levs <- dat$Level_[m] dat <- dat[, -c(1:2)] rownames(dat) <- levs # Define levels as rownames # Calculate the biodiversity indices # SQS (R script found at http://bio.mq.edu.au/~jalroy/SQS-3-3.R) dat.m <- as.matrix(dat) # Transform into matrix (required by SQS) Mysqs <- apply(dat.m, 1, sqs, 0.6) # Quorum set a 0.6 to cover as many samples as possible Mysqs <- as.data.frame(Mysqs) # Convert to data frame Mysqs$Level <- levs # Add the levels as a new colum #Simpson's evenness sim <- diversity(dat, "simpson") sim <- as.data.frame(sim) # Raw species richness rich <- specnumber(dat) ################### ###MOLs ANALYSES### ################### dspa <- read.table("MOLs DATASET", sep = "\t", header = T) # Load the ecology dataset dspa$species <- paste(dspa$Genus, dspa$Subgenus, dspa$Qualifier_species, dspa$Species) # Complete names of taxa in one column dspa <- dspa[, -c(4:7)] # Delete the columns that have now been unified into one in the line above dspa <- dspa[-c(3669:3680),] # Delete the empty rows at the end of the dataset dspa$MOL <- paste(dspa$Motility, dspa$Tiering, dspa$Feeding) # Create one column of MOL, which combines the three studied ecological traits colnames(dspa)[1] <- "Level" # For simplicity, I rename the column with sampling levels # We count the number of species by MOL # Delete occs <17 occs levels (to be consistent with other analyses) dspa2 <- dspa[!dspa$Level %in% c(10.4, 22.65, 23.45),] # Knew the levels from before pre <- dspa2[dspa2$Interval=="a",] molspec <- count(pre, vars=c("species", "MOL")) molspec <- molspec[order(molspec$MOL),] table(molspec$MOL) to <- dspa2[dspa2$Interval == "b",] molspec2 <- count(to, vars = c("species", "MOL")) molspec2 <- molspec2[order(molspec2$MOL),] table(molspec2$MOL) post <- dspa2[dspa2$Interval == "c",] molspec3 <- count(post, vars = c("species", "MOL")) molspec3 <- molspec3[order(molspec3$MOL),] table(molspec3$MOL) # Count of individuals for each MOL by interval) # Visualize table as dataframes (I find it personally clearer this way) as.data.frame(table(pre$MOL)) as.data.frame(table(to$MOL)) as.data.frame(table(post$MOL)) # We create a dataframe with counts of MOL by Level mollev <- count(dspa, vars = c("Level", "MOL")) # Counts of each MOL by level sumlev <- count(mollev, vars = "Level", "freq") # Sum of all MOL counts by level # We add the sum as a new column based on the above-created 'mollev' and 'sumlev' table(mollev$Level) # Know how many rows have to be added for each level mollev$Tot <- c(rep(130,9), rep(84,8), rep(121,9), rep(82,4), rep(109,8), rep(138,8), rep(75,9), rep(122,9), rep(126,9), rep(163,11), rep(142,10), rep(134,8), rep(108,6), rep(55,9), rep(115,11), rep(37,7), rep(184,9), rep(159,8), rep(23,7), rep(21,8), rep(15,5), rep(26,6), rep(43,7), rep(39,4), rep(40,11), rep(58,8), rep(83,10), rep(134,13), rep(58,10), rep(60,10), rep(51,10), rep(94,9), rep(66,9), rep(56,8), rep(113,10), rep(76,10), rep(74,9), rep(44,8), rep(35,11), rep(39,10), rep(50,11), rep(26,7), rep(37,8), rep(16,5), rep(8,3), rep(72,8), rep(26,9), rep(70,9), rep(31,8)) mollev$Perc <- (mollev$freq/mollev$Tot)*100 # Calculate percentage values mollev <- mollev[-which(mollev$Tot < 17),] # Delete samples with < 17 occurrences # Dataset is rearranged to have each MOL as a column and levels as rows gui <- mollev[, -c(3:4)] # Only the most relevant colums are kept (level, MOL and percentage) mds <- melt(gui, id = c("Level", "MOL")) gs <- cast(mds, Level ~ MOL + variable) rownames(gs) <- gs$Level # Define levels as rownames guis <- as.data.frame(gs[, -1]) # Delete the column with levels colnames(guis) <- c("1.1.1", "2.1.1", "2.2.1", "3.1.1", "4.1.1", "4.1.5", "5.1.1", "5.1.5", "5.3.1", "5.4.1", "6.1.1", "6.3.1", "7.1.4", "7.3.8") # Rename the MOL columns guis <- as.matrix(guis) # Transform into matrix for the analyses guis[is.na(guis)] <- 0 # NAs replaced with zero ###################################### ######## Cluster Analysis ######## res2 <- simprof(data = guis, method.distance = "braycurtis", method.cluster = "ward.D2") # Use SIMPROF to highlight the associations pl.color <- simprof.plot(res2) # Plot cluster ############################ ######## NMDS ######## distg <- vegdist(guis) # Create the distance matrix mds3d <- metaMDS(guis, k = 3, try = 100) # Perform the NMDS mds3d$stress # View the stress value # I extract the NMDS scores and unify them into single dataset NMDS3dg <- data.frame(NMDS1 = mds3d$points[,1], NMDS2 = mds3d$points[,2], NMDS3 = mds3d$points[,3]) NMDS3dg$Level <- rownames(NMDS3dg) # Define levels as rownames # Pre-TOAE, TOAE and post-TOAE intervals added as a new column NMDS3dg$Interval <- c(rep("a",10), rep("b",15), rep("c",21)) # Define convex hulls by interval for the final plot find_hull3dg <- function(NMDS3dg) NMDS3dg[chull(NMDS3dg$NMDS1,NMDS3dg$NMDS2),] hulls3dg <- ddply(NMDS3dg, "Interval", find_hull3dg) # Plot the NMDS with hulls ggplot(NMDS3dg, aes(x = NMDS1, y = NMDS2, color = as.factor(Interval), fill = as.factor(Interval), shape = as.factor(Interval)))+ geom_point(size = 4)+ geom_polygon(data = hulls3dg, alpha = 0.4)+ ggtitle("NMDS - guilds")+ theme(panel.background = element_rect(fill = "white"))+ theme(panel.border = element_rect(fill = NA, color = "black"))+ scale_fill_discrete(name = "Interval", breaks = c("a", "b", "c"), labels = c("Pre-TOAE", "TOAE", "Post-TOAE"))+ scale_colour_discrete(name = "Interval", breaks = c("a", "b", "c"), labels = c("Pre-TOAE", "TOAE", "Post-TOAE"))+ scale_shape_discrete(name = "Interval", breaks = c("a", "b", "c"), labels = c("Pre-TOAE", "TOAE", "Post-TOAE"))+ annotate("label", x = -0.4, y = -0.7, label = "3D Stress = 0.138") # Test with ANOSIM if there is significant difference between the intervals gui.anosim <- anosim(guis, NMDS3dg$Interval) # OK to use either the distance matrix or the percentage dataset summary(gui.anosim) ################################################## ######## Functional diversity curves ######## dspa <- read.table("MOLs DATASET", sep = "\t", header = T) # Load the ecology dataset dspa$species <- paste(dspa$Genus, dspa$Subgenus, dspa$Qualifier_species, dspa$Species) # Complete names of the taxa in one column dspa <- dspa[, -c(4:7)] # Delete the columns that have now been unified into one in the line above dspa$MOL <- paste(dspa$Motility, dspa$Tiering, dspa$Feeding) # Create one column of MOL, which combines the three studied ecological traits colnames(dspa)[1] <- "Level" # For simplicity, I rename the column with sampling levels # We create a dataframe with counts of MOL by Level mollev <- count(dspa, vars = c("Level", "MOL")) # Counts of each MOL by level sumlev <- count(mollev, vars = "Level", "freq") # Sum of all MOL counts by level # We add the sum as a new column based on the above-created 'mollev' and 'sumlev' table(mollev$Level) # Know how many rows have to be added for each level mollev$Tot <- c(rep(130,9), rep(84,8), rep(121,9), rep(82,4), rep(109,8), rep(138,8), rep(75,9), rep(122,9), rep(126,9), rep(163,11), rep(142,10), rep(134,8), rep(108,6), rep(55,9), rep(115,11), rep(37,7), rep(184,9), rep(159,8), rep(23,7), rep(21,8), rep(15,5), rep(26,6), rep(43,7), rep(39,4), rep(40,11), rep(58,8), rep(83,10), rep(134,13), rep(58,10), rep(60,10), rep(51,10), rep(94,9), rep(66,9), rep(56,8), rep(113,10), rep(76,10), rep(74,9), rep(44,8), rep(35,11), rep(39,10), rep(50,11), rep(26,7), rep(37,8), rep(16,5), rep(8,3), rep(72,8), rep(26,9), rep(70,9), rep(31,8)) mollev <- mollev[-which(mollev$Tot < 17),] # Delete samples with < 17 occurrences # Dataset is rearranged to have each MOL as a column and levels as rows spamol <- melt(mollev, id = c("Level", "MOL")) counts.sp <- cast(spamol, Level ~ MOL + variable) rownames(counts.sp) <- counts.sp$Level # Define levels as rownames fundiv.sp <- as.data.frame(counts.sp[,-1]) # Delete the column with levels fundiv.sp2 <- fundiv.sp[,c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27)] # Keep only the columns with the abundances colnames(fundiv.sp2) <- c("1.1.1", "2.1.1", "2.2.1", "3.1.1", "4.1.1", "4.1.5", "5.1.1", "5.1.5", "5.3.1", "5.4.1", "6.1.1", "6.3.1", "7.1.4", "7.3.8") # Rename the MOL columns fundiv.sp2[is.na(fundiv.sp2)] <- 0 # NAs replaced with zero # Calculate the biodiversity indices # SQS (R script found at http://bio.mq.edu.au/~jalroy/SQS-3-3.R) fundiv.spm <- as.matrix(fundiv.sp2) # Transform into matrix (required by SQS) sqsf <- apply(fundiv.spm, 1, sqs, 0.8) # Quorum set a 0.8 to cover as many samples as possible sqsf <- as.data.frame(sqsf) # Convert to data frame sqsf$Level <- rownames(sqsf) # Add the levels as a new colum # Simpson's evenness simg <- diversity(fundiv.sp2, "simpson") simg <- as.data.frame(simg) # Raw MOL richness richg <- specnumber(fundiv.sp2) ########################################################## ######## Percentages plots of single traits ######## # We want to obtain percentage curves for the most important trait in each category: # Suspension feeders (feeding) # True epifauna (tiering) # Passive (motility) # We use for this purpose the simplified categories for motility and feeding dspa <- read.table("MOLs DATASET", sep = "\t", header = T) # Load the ecology dataset dspa$species <- paste(dspa$Genus, dspa$Subgenus, dspa$Qualifier_species, dspa$Species) # Taxa complete names in one column dspa <- dspa[, -c(4:7)] # Delete the columns that have now been unified into one in the line above dspa <- dspa[-c(3669:3680),] # Delete the empty rows at the end of the dataset colnames(dspa)[1] <- "Level" # For simplicity, I rename the column with sampling levels # MOTILITY spamob <- count(dspa, vars = c("Level", "Motility_2")) # Counts by level # We add the sum as a new column based on the above-found counts by level # The overall total number of individuals by level is already known mob.lev <- count(spamob, vars = "Level", "freq") table(spamob$Level) # Know how many rows for each level spamob$Tot <- c(rep(130,2), rep(84,2), rep(121,2), rep(82,2), rep(109,2), rep(138,2), rep(75,2), rep(122,2), rep(126,2), rep(163,2), rep(142,2), rep(134,2), rep(108,2), rep(55,2), rep(115,2), rep(37,2), rep(184,2), rep(159,2), rep(23,2), rep(21,2), rep(15,2), rep(26,2), rep(43,2), rep(39,2), rep(40,2), rep(58,2), rep(83,2), rep(134,2), rep(58,2), rep(60,2), rep(51,2), rep(94,2), rep(66,2), rep(56,2), rep(113,2), rep(76,2), rep(74,2), rep(44,2), rep(35,2), rep(39,2), rep(50,2), rep(26,2), rep(37,2), rep(16,2), rep(8,1), rep(72,2), rep(26,2), rep(70,2), rep(31,2)) spamob$Perc <- (spamob$freq/spamob$Tot)*100 # Calculate percentage values spamob <- spamob[-which(spamob$Tot < 17),] # Delete samples with < 17 occurrences # Dataset is rearranged to have each trait as a column and levels as rows spamob2 <- spamob[, -c(3:4)] # Only the most relevant colums are kept (level, trait and percentage) spamob2 <- melt(spamob2, id = c("Level", "Motility_2"))) spamob2 <- cast(spamob2, Level ~ Motility_2 + variable) colnames(spamob2) <- c("Level", "Passive", "Active") # Rename the trait columns spamob2[is.na(spamob2)] <- 0 # NAs replaced with zero # We count the number of species by trait for each interval #we use the already defined subsets by interval from dspa2 (where levels <17 occurrences have been deleted) pre <- dspa2[dspa2$Interval == "a",] to <- dspa2[dspa2$Interval == "b",] post <- dspa2[dspa2$Interval == "c",] mob2spec <- count(pre, vars = c("species", "Motility_2")) mob2spec <- mob2spec[order(mob2spec$Motility_2),] table(mob2spec$Mobility_2) mob2spec <- count(to, vars = c("species", "Motility_2")) mob2spec <- mob2spec[order(mob2spec$Motility_2),] table(mob2spec$Motility_2) mob2spec <- count(post, vars = c("species", "Motility_2")) mob2spec <- mob2spec[order(mob2spec$Motility_2),] table(mob2spec$Motility_2) # We count the number of individuals by trait for each interval as.data.frame(table(pre$Motility_2)) as.data.frame(table(to$Motility_2)) as.data.frame(table(post$Motility_2)) # FEEDING spafed <- count(dspa, vars = c("Level", "Feeding_2")) # Counts by level # We add the sum as a new column based on the above-found counts by level # The overall total number of individuals by level is already known fed.lev <- count(spafed, vars = "Level", "freq") table(spafed$Level) # Know how many rows for each level spafed$Tot <- c(rep(130,2), rep(84,2), rep(121,2), rep(82,1), rep(109,1), rep(138,1), rep(75,2), rep(122,1), rep(126,1), rep(163,2), rep(142,2), rep(134,1), rep(108,1), rep(55,2), rep(115,2), rep(37,2), rep(184,2), rep(159,2), rep(23,2), rep(21,1), rep(15,1), rep(26,2), rep(43,2), rep(39,2), rep(40,2), rep(58,2), rep(83,2), rep(134,2), rep(58,2), rep(60,2), rep(51,2), rep(94,2), rep(66,2), rep(56,2), rep(113,2), rep(76,2), rep(74,2), rep(44,2), rep(35,2), rep(39,2), rep(50,2), rep(26,2), rep(37,2), rep(16,2), rep(8,2), rep(72,2), rep(26,2), rep(70,2), rep(31,2)) spafed$Perc <- (spafed$freq/spafed$Tot)*100 # Calculate percentage values spafed <- spafed[-which(spafed$Tot < 17),] # Delete samples with < 17 occurrences # Dataset is rearranged to have each trait as a column and levels as rows spafed2 <- spafed[, -c(3:4)] # Only the most relevant colums are kept (level, trait and percentage) spafed2 <- melt(spafed2, id = c("Level", "Feeding_2"))) spafed2 <- cast(spafed2, Level ~ Feeding_2 + variable) colnames(spafed2) <- c("Level", "Susp", "Other") # Rename the trait columns spafed2[is.na(spafed2)] <- 0 # We count the number of species by trait for each interval fed2spec <- count(pre, vars = c("species", "Feeding_2")) fed2spec <- fed2spec[order(fed2spec$Feeding_2),] table(fed2spec$Feeding_2) fed2spec <- count(to, vars = c("species", "Feeding_2")) fed2spec <- fed2spec[order(fed2spec$Feeding_2),] table(fed2spec$Feeding_2) fed2spec <- count(post, vars = c("species", "Feeding_2")) fed2spec <- fed2spec[order(fed2spec$Feeding_2),] table(fed2spec$Feeding_2) # We count the number of individuals by trait for each interval as.data.frame(table(pre$Feeding_2)) as.data.frame(table(to$Feeding_2)) as.data.frame(table(post$Feeding_2)) # TIERING spatie <- count(dspa, vars = c("Level", "Tiering")) # Counts by level # We add the sum as a new column based on the above-found counts by level # The overall total number of individuals by level is already known tie.lev <- count(spatie, vars = "Level", "freq") table(spatie$Level) # Know how many rows for each level spatie$Tot <- c(rep(130,4), rep(84,3), rep(121,4), rep(82,2), rep(109,4), rep(138,4), rep(75,4), rep(122,4), rep(126,4), rep(163,4), rep(142,4), rep(134,4), rep(108,4), rep(55,3), rep(115,4), rep(37,4), rep(184,3), rep(159,3), rep(23,4), rep(21,4), rep(15,3), rep(26,3), rep(43,4), rep(39,1), rep(40,4), rep(58,4), rep(83,4), rep(134,4), rep(58,4), rep(60,4), rep(51,4), rep(94,4), rep(66,4), rep(56,3), rep(113,4), rep(76,4), rep(74,4), rep(44,3), rep(35,4), rep(39,3), rep(50,4), rep(26,4), rep(37,4), rep(16,3), rep(8,2), rep(72,4), rep(26,3), rep(70,4), rep(31,4)) spatie$Perc <- (spatie$freq/spatie$Tot)*100 # Calculate percentage values spatie <- spatie[-which(spatie$Tot < 17),] # Delete samples with < 17 occurrences # Dataset is rearranged to have each trait as a column and levels as rows spatie2 <- spatie[, -c(3:4)] # Only the most relevant colums are kept (level, trait and percentage) spatie2 <- melt(spatie2, id = c("Level", "Tiering"))) spatie2 <- cast(spatie2, Level ~ Tiering + variable) colnames(spatie2) <- c("Level", "Epi", "Semi", "Shainf", "Dinf") # Rename the trait columns spatie2[is.na(spatie2)] <- 0 # We count the number of species by trait for each interval tie2spec <- count(pre, vars = c("species", "Tiering")) tie2spec <- tie2spec[order(tie2spec$Tiering),] table(tie2spec$Tiering) tie2spec <- count(to, vars = c("species", "Tiering")) tie2spec <- tie2spec[order(tie2spec$Tiering),] table(tie2spec$Tiering) tie2spec <- count(post, vars = c("species", "Tiering")) tie2spec <- tie2spec[order(tie2spec$Tiering),] table(tie2spec$Tiering) # We count the number of individuals by trait for each interval as.data.frame(table(pre$Tiering)) as.data.frame(table(to$Tiering)) as.data.frame(table(post$Tiering)) # Plot the single traits ggplot(spamob2, aes(x = Level, y = Passive)) + geom_point(size = 4, colour = "darkgrey")+ geom_line()+ xlab("Level in section [m]")+ theme(axis.text.x = element_text(angle = 90), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))+ scale_x_continuous(limits = c(0, 30), breaks = seq(0, 30, by = 1))+ scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20))+ ggtitle("Motility")+ coord_flip() ggplot(spafed2, aes(x = Level, y = Susp)) + geom_point(size = 4)+ geom_line()+ xlab("Level in section [m]")+ theme(axis.text.x = element_text(angle = 90), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))+ scale_x_continuous(limits = c(0, 30), breaks = seq(0, 30, by = 1))+ scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20))+ ggtitle("Feeding")+ coord_flip() ggplot(spatie2, aes(x = Level, y = Epi)) + geom_point(size = 4, colour = "grey")+ geom_line()+ xlab("Level in section [m]")+ theme(axis.text.x = element_text(angle = 90), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))+ scale_x_continuous(limits = c(0, 30), breaks = seq(0, 30, by = 1))+ scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20))+ ggtitle("Tiering")+ coord_flip() ########################################### ###CORRELATIONS BY TAXONOMICAL DIVERSITY### ########################################### # Create database with all the biodiversity indices div <- cbind(Mysqs, sim, rich) colnames(div) <- c("sqs", "Level", "sim", "rich") div$Level <- as.numeric(as.character(div$Level)) # Make sure the levels are numeric div7 <- cbind(div, NMDS3d[, 1:2]) # Add the NMDs scores # Load the isotope database iso <- read.table("ISOTOPE DATABASE", header = T, sep = "\t", dec = ".") # Isotope data from Ullmann et al. (2020) in Scientific Reports colnames(iso) <- c("Level", "d13C", "sdC", "seC", "d18O", "sdO", "seO") # Rename the columns iso2 <- iso[-c(21, 44, 45),] # Delete the rows for which there is not a corresponding ecological value # Database of taxonomical and isotope time series divcorr <- cbind(div7, d18O = iso2$d18O, d13C = iso2$d13C) # Add the isotopes to the biodiversity indices divcorr2 <- divcorr[complete.cases(divcorr),] # Delete the rows with NAs # Start correlation procedure # 1. Investigate trend in time with OLS m1 <- lm(sqs ~ Level, data = divcorr2) summary(m1) m2 <- lm(sim ~ Level, data = divcorr2) summary(m2) m3 <- lm(rich ~ Level, data = divcorr2) summary(m3) m4 <- lm(NMDS1 ~ Level, data = divcorr2) summary(m4) m5 <- lm(NMDS2 ~ Level, data = divcorr2) summary(m5) m6 <- lm(d18O ~ Level, data = divcorr2) summary(m6) m7 <- lm(d13C ~ Level, data = divcorr2) summary(m7) # 2. Do a regular OLS regression of the whole model and check for autocorrelation of the residuals ols1 <- step(lm(formula = sqs ~ d18O + d13C, data = divcorr2)) summary(ols1) durbinWatsonTest(ols1, max.lag = 5) ols2 <- step(lm(formula = sim ~ d18O + d13C, data = divcorr2)) summary(ols2) durbinWatsonTest(ols2, max.lag = 5) ols3 <- step(lm(formula = rich ~ d18O + d13C, data = divcorr2)) summary(ols3) durbinWatsonTest(ols3, max.lag = 5) ols4 <- step(lm(formula = NMDS1 ~ d18O + d13C, data = divcorr2)) summary(ols4) durbinWatsonTest(ols4, max.lag = 5) ols5 <- step(lm(formula = NMDS2 ~ d18O + d13C, data = divcorr2)) summary(ols5) durbinWatsonTest(ols5, max.lag = 5) # 3. Differencing of time series to solve non-stationarity/autocorrelation # Source("http://www.graemetlloyd.com/pubdata/functions_2.r") dsqs <- gen.diff(divcorr2$sqs, divcorr2$Level) dsim <- gen.diff(divcorr2$sim, divcorr2$Level) drich <- gen.diff(divcorr2$rich, divcorr2$Level) dnmds1 <- gen.diff(divcorr2$NMDS1, divcorr2$Level) dnmds2 <- gen.diff(divcorr2$NMDS2, divcorr2$Level) dd18O <- gen.diff(divcorr2$d18O, divcorr2$Level) dd13C <- gen.diff(divcorr2$d13C, divcorr2$Level) gd <- data.frame(dsqs, dsim, drich, dnmds1, dnmds2, dd18O, dd13C) # New dataframe with differenced time series # 4. Check that model assumption of stationarity and non-autocorrelation are met ols1b <- step(lm(formula = dsqs ~ dd18O + dd13C, data = gd)) summary(ols1b) durbinWatsonTest(ols1b, max.lag = 5) ols2b <- step(lm(formula = dsim ~ dd18O + dd13C, data = gd)) summary(ols2b) durbinWatsonTest(ols2b, max.lag = 5) ols3b <- step(lm(formula = drich ~ dd18O + dd13C, data = gd)) summary(ols3b) durbinWatsonTest(ols3b, max.lag = 5) ols4b <- step(lm(formula = dnmds1 ~ dd18O + dd13C, data = gd)) summary(ols4b) durbinWatsonTest(ols4b, max.lag = 5) ols5b <- step(lm(formula = dnmds2 ~ dd18O + dd13C, data = gd)) summary(ols5b) durbinWatsonTest(ols5b, max.lag = 5) # 5 Fit the residuals of above OLS to ARIMA process and check for stationarity auto.arima(residuals(ols1b)) resar <- arima(residuals(ols1b), c(0,0,0))$residuals # ARIMA parameters known from code line above ndiffs(resar, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resar, alpha = 0.05, test = c("kpss"), max.d = 5) auto.arima(residuals(ols2b)) resar2 <- arima(residuals(ols2b), c(0,0,0))$residuals ndiffs(resar2, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resar2, alpha = 0.05, test = c("kpss"), max.d = 5) auto.arima(residuals(ols3b)) resar3 <- arima(residuals(ols3b), c(0,0,0))$residuals ndiffs(resar3, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resar3, alpha = 0.05, test = c("kpss"), max.d = 5) auto.arima(residuals(ols4b)) resar4 <- arima(residuals(ols4b), c(1,0,2))$residuals ndiffs(resar4, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resar4, alpha = 0.05, test = c("kpss"), max.d = 5) auto.arima(residuals(ols5b)) resar5 <- arima(residuals(ols5b), c(0,0,0))$residuals ndiffs(resar5, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resar5, alpha = 0.05, test = c("kpss"), max.d = 5) # 6. We run GLS regression gls1 <- gls(dsqs ~ dd18O + dd13C, data = gd, correlation = NULL, method = 'ML') summary(gls1) gls2 <- gls(dsim ~ dd18O + dd13C, data = gd, correlation = NULL, method = 'ML') summary(gls2) gls3 <- gls(drich ~ dd18O + dd13C, data = gd, correlation = NULL, method = 'ML') summary(gls3) gls4 <- gls(dnmds1 ~ dd18O + dd13C, data = gd, correlation = corARMA(p = 1, q = 2), method = 'ML') summary(gls4) gls5 <- gls(dnmds2 ~ dd18O + dd13C, data = gd, correlation = NULL, method = 'ML') summary(gls5) ########################################## ###CORRELATIONS BY FUNCTIONAL DIVERSITY### ########################################## # Create database of all ecological time series funcdiv <- cbind(sqsf, simg, richg) colnames(funcdiv) <- c("sqs", "Level", "Simp", "Rich") funcdiv$Level <- as.numeric(as.character(funcdiv$Level)) # Make sure the levels are numeric fdiv7 <- cbind(funcdiv, NMDS3dg[, 1:2]) # Add the NMDsscores # Load the isotope database iso <- read.table("ISOTOPE DATABASE", header = T, sep = "\t", dec = ".") Isotope data from Ullmann et al. (2020) in Scientific Reports colnames(iso) <- c("Level", "d13C", "sdC", "seC", "d18O", "sdO", "seO") # Rename the columns iso2 <- iso[-c(21, 44, 45),] # Delete the rows for which there is not a corresponding ecological value # Database of ecological and isotope time series fdivcorr <- cbind(fdiv7, d18O = iso2$d18O, d13C = iso2$d13C) # Add the isotopes to the ecological indices fdivcorr2 <- fdivcorr[complete.cases(fdivcorr),] # Delete the rows with NAs # Start correlation procedure # 1. Investigate trend in time with OLS m1g <- lm(sqs ~ Level, data = fdivcorr2) summary(m1g) m2g <- lm(Simp ~ Level, data = fdivcorr2) summary(m2g) m3g <- lm(Rich ~ Level, data = fdivcorr2) summary(m3g) m4g <- lm(NMDS1 ~ Level, data = fdivcorr2) summary(m4g) m5g <- lm(NMDS2 ~ Level, data = fdivcorr2) summary(m5g) m6g <- lm(d18O ~ Level, data = fdivcorr2) summary(m6g) m7g <- lm(d13C ~ Level, data = fdivcorr2) summary(m7g) # 2. Do a regular OLS regression of the whole model and check for autocorrelation of the residuals ols1c <- step(lm(formula = sqs ~ d18O + d13C, data = fdivcorr2)) summary(ols1c) durbinWatsonTest(ols1c, max.lag = 5) ols2c <- step(lm(formula = Simp ~ d18O + d13C, data = fdivcorr2)) summary(ols2c) durbinWatsonTest(ols2c, max.lag = 5) ols3c <- step(lm(formula = Rich ~ d18O + d13C, data = fdivcorr2)) summary(ols3c) durbinWatsonTest(ols3c, max.lag = 5) ols4c <- step(lm(formula = NMDS1 ~ d18O + d13C, data = fdivcorr2)) summary(ols4c) durbinWatsonTest(ols4c, max.lag = 5) ols5c <- step(lm(formula = NMDS2 ~ d18O + d13C, data = fdivcorr2)) summary(ols5c) durbinWatsonTest(ols5c, max.lag = 5) # 3. Differencing of time series to solve non-stationarity/autocorrelation # Source("http://www.graemetlloyd.com/pubdata/functions_2.r") dsqsg <- gen.diff(fdivcorr2$sqs, fdivcorr2$Level) dsimg <- gen.diff(fdivcorr2$Simp, fdivcorr2$Level) drichg <- gen.diff(fdivcorr2$Rich, fdivcorr2$Level) dnmds1g <- gen.diff(fdivcorr2$NMDS1, fdivcorr2$Level) dnmds2g <- gen.diff(fdivcorr2$NMDS2, fdivcorr2$Level) dd18Og <- gen.diff(fdivcorr2$d18O, fdivcorr2$Level) dd13Cg <- gen.diff(fdivcorr2$d13C, fdivcorr2$Level) gdg <- data.frame(dsqsg, dsimg, drichg, dnmds1g, dnmds2g, dd18Og, dd13Cg) # New dataframe with differenced time series # 4. OLS of the differenced time series ols1d <- step(lm(formula = dsqsg ~ dd18Og + dd13Cg, data = gdg)) summary(ols1d) durbinWatsonTest(ols1d, max.lag = 5) ols2d <- step(lm(formula = dsimg ~ dd18Og + dd13Cg, data = gdg)) summary(ols2d) durbinWatsonTest(ols2d, max.lag = 5) ols3d <- step(lm(formula = drichg ~ dd18Og + dd13Cg, data = gdg)) summary(ols3d) durbinWatsonTest(ols3d, max.lag = 5) ols4d <- step(lm(formula = dnmds1g ~ dd18Og + dd13Cg, data = gdg)) summary(ols4d) durbinWatsonTest(ols4d, max.lag = 5) ols5d <- step(lm(formula = dnmds2g ~ dd18Og + dd13Cg, data = gdg)) summary(ols5d) durbinWatsonTest(ols5d, max.lag = 5) # 5. Fit the residuals of previous OLS to ARIMA process and check for stationarity auto.arima(residuals(ols1d)) resarg <- arima(residuals(ols1d), c(0,0,0))$residuals # ARIMA parameters known from code line above ndiffs(resarg, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resarg, alpha = 0.05, test = c("kpss"), max.d = 5) auto.arima(residuals(ols2d)) resarg2 <- arima(residuals(ols2d), c(0,0,0))$residuals ndiffs(resarg2, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resarg2, alpha = 0.05, test = c("kpss"), max.d = 5) auto.arima(residuals(ols3d)) resarg3 <- arima(residuals(ols3d), c(0,0,0))$residuals ndiffs(resarg3, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resarg3, alpha = 0.05, test = c("kpss"), max.d = 5) auto.arima(residuals(ols4d)) resarg4 <- arima(residuals(ols4d), c(0,0,0))$residuals ndiffs(resarg4, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resarg4, alpha = 0.05, test = c("kpss"), max.d = 5) auto.arima(residuals(ols5d)) resarg5 <- arima(residuals(ols5d), c(0,0,0))$residuals ndiffs(resarg5, alpha = 0.05, test = c("adf"), max.d = 5) ndiffs(resarg5, alpha = 0.05, test = c("kpss"), max.d = 5) # 6. We run GLS regression gls1g <- gls(dsqsg ~ dd18Og + dd13Cg, data = gdg, correlation = NULL, method = 'ML') summary(gls1g) gls2g <- gls(dsimg ~ dd18Og + dd13Cg, data = gdg, correlation = NULL, method = 'ML') summary(gls2g) gls3g <- gls(drichg ~ dd18Og + dd13Cg, data = gdg, correlation = NULL, method = 'ML') summary(gls3g) gls4g <- gls(dnmds1g ~ dd18Og + dd13Cg, data = gdg, correlation = NULL, method = 'ML') summary(gls4g) gls5g <- gls(dnmds2g ~ dd18Og + dd13Cg, data = gdg, correlation = NULL, method = 'ML') summary(gls5g) ############################################ ### CHANGE IN PERCENTAGES BY INTERVAL ### ############################################ ###################################### ######## SINGLE MOLS ######## #guis # Starting dataset guis$int <- c(rep("a",10), rep("b",15), rep("c",21)) # Add interval column # Statistical summary (median important) by interval # !!!!MOL number as ordered in the manuscript does not correspond to the order in the R dataset Summarize(guis[,1] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 2 as numbered in the manuscript Summarize(guis[,2] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 4 Summarize(guis[,3] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 7 Summarize(guis[,4] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 8 Summarize(guis[,5] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 1 Summarize(guis[,6] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 12 Summarize(guis[,7] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 5 Summarize(guis[,8] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 11 Summarize(guis[,9] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 3 Summarize(guis[,10] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 6 Summarize(guis[,11] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 9 Summarize(guis[,12] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 14 Summarize(guis[,13] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 10 Summarize(guis[,14] ~ guis[,15], data=guis, digits=3) # Corresponds to MOL 13 # Calculate percentage change pairwise by interval (median). Median values from the Summarize function implemented above. MOLs ordered as in manuscript now!! # Post-TOAE vs. Pre-TOAE rk1 <- as.data.frame(matrix(, nrow = 14, ncol = 2)) # Create empty dataframe rk1[,1] <- c(1:14) # One column for MOL numbers as in manuscript rk1[,2] <- c((3.846-51.366),(38.462-9.521),(10.638-10.415),(2.857-6.290),(3.333-6.043),(2.857-3.399),(8.333-2.830),(0-1.01), (7.843-0.397),(6.383-0),(0-0),(0-0),(0-0),(0-0)) # One column for difference of medians colnames(rk1) <- c("rank", "change") # Columns named cols1 <- c("grey", "white")[(rk1$change > 0) + 1] # Define white columns for positive differences, and grey for negative differences barplot(rk1$change, main = "% change MOLs pre- vs. post-TOAE", names.arg = rk1$rank,axis.lty = 1, yaxt = "n", col = cols1) # Differences of medians plotted as barplots axis(2, at = seq(-50, 30, 5), las = 1) # Vertical axis customized # Post-TOAE vs. TOAE rk2 <- as.data.frame(matrix(, nrow = 14, ncol = 2)) rk2[,1] <- c(1:14) rk2[,2] <- c((3.846-0.870),(2.857-0.629),(10.638-1.852),(2.857-0.629),(3.333-2.985),(2.857-2.703),(8.333-4.225),(0-0), (7.843-0),(6.383-0.870),(0-0),(0-0),(0-0),(0-1.887)) colnames(rk2) <- c("rank", "change") cols2 <- c("grey", "white")[(rk2$change > 0) + 1] barplot(rk2$change, main = "% change MOLs TOAE vs. post-TOAE", names.arg = rk2$rank, axis.lty = 1,yaxt = "n", col = cols2) axis(2, at = seq(-5,10,5), las = 1) # TOAE vs. Pre-TOAE rk3 <- as.data.frame(matrix(, nrow = 14, ncol = 2)) rk3[,1] <- c(1:14) rk3[,2] <- c((0.870-51.366),(50.000-9.521),(1.852-10.415),(0.629-6.290),(2.985-6.043),(2.703-3.399),(4.225-2.830),(0-1.01), (0-0.397),(0.870-0),(0-0),(0-0),(0-0),(1.887-0)) colnames(rk3) <- c("rank", "change") cols3 <- c("grey", "white")[(rk3$change > 0) + 1] barplot(rk3$change, main = "% change MOLs pre- vs. TOAE", names.arg = rk3$rank, axis.lty = 1, yaxt = "n", col = cols3) axis(2, at = seq(-55,45,5), las = 1) ################################################## ######## FEEDING - TIERING - MOBILITY ######## spamob2 # Starting dataset spamob2$int <- c(rep("a",10), rep("b",15), rep("c",21)) # Add column with interval spatie2 spatie2$int <- c(rep("a",10), rep("b",15), rep("c",21)) spafed2 spafed2$int <- c(rep("a",10), rep("b",15), rep("c",21)) # Statistical summary (median important) by interval Summarize(Passive ~ int, data = spamob2, digits = 3) Summarize(Epi ~ int, data = spatie2, digits = 3) Summarize(Susp ~ int, data = spafed2, digits = 3) # Calculate percentage change pairwise by interval (median). Median values from the Summarize function implemented above. # Post-TOAE vs. Pre-TOAE db1 <- as.data.frame(matrix(, nrow = 3, ncol = 2)) # Create empty dataframe db1[,1] <- c(1:3) # One column for each of the three traits db1[,2] <- c((56.637-79.034),(80-80.548),(92.308-99.333)) # One column for difference of medians colnames(db1) <- c("rank", "change") # Columns named cols1 <- c("grey", "white")[(db1$change > 0) + 1] # Define white columns for positive differences, and grey for negative differences barplot(db1$change, main = "% change traits pre- vs. post-TOAE", names.arg = db1$rank, axis.lty = 1, yaxt = "n", col = cols1) # Differences of medians plotted as barplots axis(2, at = seq(-25, 5, 5), las = 1) # Vertical axis customized #Post-TOAE vs. TOAE db2 <- as.data.frame(matrix(, nrow = 3, ncol = 2)) db2[,1] <- c(1:3) db2[,2] <- c((56.637-71.127),(80-73.944),(92.308-96.196)) colnames(db2) <- c("rank", "change") cols2 <- c("grey", "white")[(db2$change > 0) + 1] barplot(db2$change, main = "% change trait TOAE vs. post-TOAE", names.arg = db2$rank, axis.lty = 1, yaxt = "n", col = cols2) axis(2, at = seq(-15, 5, 5), las = 1) # TOAE vs. Pre-TOAE db3 <- as.data.frame(matrix(, nrow = 3, ncol = 2)) db3[,1] <- c(1:3) db3[,2] <- c((71.127-79.034),(73.944-80.548),(96.196-99.333)) colnames(db3) <- c("rank", "change") cols3 <- c("grey", "white")[(db3$change > 0) + 1] barplot(db3$change, main = "% change traits pre- vs. TOAE", names.arg = db3$rank, axis.lty = 1, yaxt = "n", col = cols3) axis(2, at = seq(-10, 0, 5), las = 1) ################################################################################# ### K-W and DUNN'S TEST FOR FOR SIGNIFICANCE OF CHANGE BETWEEN INTERVALS ### ################################################################################# ###################################### ######## DETAILED MOLS ######## # MOLs 11, 12, 13 not included because sample size overall too small to test the significance of change. kruskal.test(guis[,1] ~ guis[,15]) #MOL 2 in manuscript dunnTest(guis[,1] ~ guis[,15], data = guis, method = 'bonferroni') # Bonferroni correction applied kruskal.test(guis[,2] ~ guis[,15]) #MOL 4 in manuscript dunnTest(guis[,2] ~ guis[,15], data = guis, method = 'bonferroni') kruskal.test(guis[,3] ~ guis[,15]) #MOL 7 in manuscript dunnTest(guis[,3] ~ guis[,15], data = guis, method = 'bonferroni') kruskal.test(guis[,4] ~ guis[,15]) #MOL 8 in manuscript #no significant results from K-W, so no Dunn's test applied kruskal.test(guis[,5] ~ guis[,15]) #MOL 1 in manuscript dunnTest(guis[,5] ~ guis[,15], data = guis, method = 'bonferroni') kruskal.test(guis[,7] ~ guis[,15]) #MOL 5 in manuscript #no significant results from K-W, so no Dunn's test applied kruskal.test(guis[,9] ~ guis[,15]) #MOL 3 in manuscript #no significant results from K-W, so no Dunn's test applied kruskal.test(guis[,10] ~ guis[,15]) #MOL 6 in manuscript #no significant results from K-W, so no Dunn's test applied kruskal.test(guis[,11] ~ guis[,15]) #MOL 9 in manuscript dunnTest(guis[,11] ~ guis[,15], data = guis, method = 'bonferroni') kruskal.test(guis[,12] ~ guis[,15]) #MOL 14 in manuscript dunnTest(guis[,12] ~ guis[,15], data = guis, method = 'bonferroni') kruskal.test(guis[,13] ~ guis[,15]) #MOL 10 in manuscript dunnTest(guis[,13] ~ guis[,15], data = guis, method = 'bonferroni') ################################################## ######## FEEDING - TIERING - MOBILITY ######## kruskal.test(spamob2$Passive ~ spamob2$int) #no significant results from K-W, so no Dunn's test applied kruskal.test(spatie2$Epi ~ spatie2$int) #no significant results from K-W, so no Dunn's test applied kruskal.test(spafed2$Susp ~ spafed2$int) dunnTest(Susp ~ int, data = spafed2, method = 'bonferroni') # Bonferroni correction applied