# This is an "R" script documenting work associated with the following # publication: # Fraser, L.H., Pither, J., Jentsch, A., Sternberg, M., Zobel, M., and the # HerbDivNet # global network. 2015. Worldwide evidence of a unimodal # relationship between productivity and plant species richness. Science. # P.I.: Lauchlan Fraser (Thompson Rivers University) (lfraser@tru.ca) # Script author: Jason Pither (University of British Columbia, Okanagan campus) # (jason.pither@ubc.ca) # --------------------------------------------------------- # THIS SCRIPT CONDUCTS MAIN GLM ANALYSES # OF THE GLOBAL EXTENT RICHNESS-BIOMASS ASSOCIATION # OUTPUTS: TABLE 1 AND FIGURE S1 # --------------------------------------------------------- # --------------------------------------------------------- # REQUIRED FILES: # fraser_plotdata.csv # --------------------------------------------------------- # --------------------------------------------------------- # REQUIRED PACKAGES: library(pscl); library(MASS); library(glmmADMB); library(lme4) library(quantreg) # --------------------------------------------------------- # --------------------------------------------------------- # read in data, and do initial data cleaning # --------------------------------------------------------- alldata <- read.csv("fraser_plotdata.csv", header = T, strip.white = T) # first 10 columns are summary data alldata.summary <- alldata[,c("grid","original.name","community.type","country","pi","plot","biomass","litter","tot.bio","sr")] # column headings are self-explanatory; but note that "pi" is what we equate with "site" throughout the paper # exclude grids that had not litter measurements conducted in all 64 quadrats complete.grid.numbers <- setdiff(unique(alldata.summary$grid),names(table(alldata.summary[is.na(alldata.summary$litter),"grid"])[table(alldata.summary[is.na(alldata.summary$litter),"grid"]) == 64])) num.complete.grids <- length(complete.grid.numbers) # now we have 151 grids with complete or mostly complete measurements # (10 have between 1 and 4 quadrats missing litter) # now subset data to only those grids alldata.litter <- alldata.summary[!is.na(match(alldata.summary$grid,as.numeric(complete.grid.numbers))),] # there are some quadrats with zero species, and two (7023, 7030) # in grid 110 that have valid litter biomass (zero) but no live species; # these remain as VALID data points # put "NA" in all other quadrats na.rownames <- row.names(alldata.litter[alldata.litter$sr == 0,][setdiff(row.names(alldata.litter[alldata.litter$sr == 0,]),c("7023","7030")),]) # remaining quadrats with neither biomass nor species can be "NA" alldata.litter[na.rownames,"biomass"] <- NA alldata.litter[na.rownames,"litter"] <- NA alldata.litter[na.rownames,"tot.bio"] <- NA alldata.litter[na.rownames,"sr"] <- NA # create a log10 total biomass (+1) column alldata.litter$log10.tot.bio <- log10(alldata.litter$tot.bio+1) # CHECK which if any quadrats have "NA" in total biomass: alldata.litter[is.na(alldata.litter$tot.bio),] ## These 33 quadrats must be excluded from analyses. good.data <- alldata.litter[!is.na(alldata.litter$tot.bio),]; # there are 33 quadrats excluded, so the null DF for the global models is # 9664 (num rows in alldata.litter) minus 33 = 9631 (num rows in good.data) # treat "grid" as a factor for analyses good.data$grid <- factor(good.data$grid) # same with "pi", which we consider equivalent to "site" good.data$pi <- as.character(good.data$pi) good.data$pi <- as.factor(good.data$pi) # --------------------------------------------------------- # DONE data input / cleaning # --------------------------------------------------------- # ------------------- # function for calculating % deviance explained from # https://modtools.wordpress.com/2013/08/14/dsquared/ # ------------------- Dsquared <- function(obs = NULL, pred = NULL, model = NULL, adjust = FALSE) { # version 1.3 (3 Jan 2015) model.provided <- ifelse(is.null(model), FALSE, TRUE) if (model.provided) { if (!("glm" %in% class(model))) stop ("'model' must be of class 'glm'.") if (!is.null(pred)) message("Argument 'pred' ignored in favour of 'model'.") if (!is.null(obs)) message("Argument 'obs' ignored in favour of 'model'.") obs <- model$y pred <- model$fitted.values } else { # if model not provided if (is.null(obs) | is.null(pred)) stop("You must provide either 'obs' and 'pred', or a 'model' object of class 'glm'") if (length(obs) != length(pred)) stop ("'obs' and 'pred' must be of the same length (and in the same order).") if (!(obs %in% c(0, 1)) | pred < 0 | pred > 1) stop ("Sorry, 'obs' and 'pred' options currently only implemented for binomial GLMs (binary response variable with values 0 or 1) with logit link.") logit <- log(pred / (1 - pred)) model <- glm(obs ~ logit, family = "binomial") } D2 <- (model$null.deviance - model$deviance) / model$null.deviance if (adjust) { if (!model.provided) return(message("Adjusted D-squared not calculated, as it requires a model object (with its number of parameters) rather than just 'obs' and 'pred' values.")) n <- length(model$fitted.values) #p <- length(model$coefficients) p <- attributes(logLik(model))$df D2 <- 1 - ((n - 1) / (n - p)) * (1 - D2) } # end if adj return (D2) } # ------------------- # END function # ------------------- # ------------------- # Conduct negative binomial GLM # ------------------- # Use of the negative binomial provides for AIC calculations, and fits assumptions well # SEE: http://www.ats.ucla.edu/stat/r/dae/nbreg.htm global.negbinom.log10.results <- glm.nb(sr~log10.tot.bio+I(log10.tot.bio^2),good.data) ## Test of dispersion parameter using asymptotic SE pnorm(1 - summary(global.negbinom.log10.results)$theta/summary(global.negbinom.log10.results)$SE.theta, lower = TRUE) # test NB over Poisson: (Poisson is nested in NB, so OK to use anova.glm) global.poisson.log10.results <- glm(sr~log10.tot.bio+I(log10.tot.bio^2),good.data,family = "poisson") anova(global.poisson.log10.results,global.negbinom.log10.results) # use "odTest" from pscl odTest(global.negbinom.log10.results); # ------------------- # Negative binomial superior fit compared to poisson # ------------------- # ------------------- # test significance of quadratic term over single term model: # ------------------- # single term model temp0 <- glm.nb(sr~1,good.data) temp1 <- glm.nb(sr~log10.tot.bio,good.data) # include quadratic global.negbinom.log10.results <- glm.nb(sr~log10.tot.bio+I(log10.tot.bio^2),good.data) # test models anova(global.negbinom.log10.results,temp1) # ------------------- # BELOW: Information for TABLE 1, line 1 # ------------------- anova(global.negbinom.log10.results,temp0) summary(global.negbinom.log10.results) # ------------------- # Finish info for TABLE 1, line 1 # ------------------- Dsquared(model = global.negbinom.log10.results) # ------------------- # NOW, use GLMMs to account for hierarchical sampling structure: # ------------------- # ------------------- # **** NOTE: because the GLMMs will take a while to run, # one can batch the two different models below ***** # ------------------- # ensure "grid" is treated as a factor good.data$grid <- factor(good.data$grid) good.data$pi <- as.character(good.data$pi) good.data$pi <- as.factor(good.data$pi) # first the unconditional means (aka variance components) model: null.nb.glmm <- glmmadmb(sr~1 + (1|pi)+(1|pi:grid),family = "nbinom",data = good.data) # then the random intercepts model: rand.intercept.nbinom <- glmmadmb(sr~log10.tot.bio+I(log10.tot.bio^2), random = ~ (1|pi)+(1|pi:grid),family = "nbinom",data = good.data); # ------------------- # Begin info for TABLE 1, line 2 # ------------------- summary(rand.intercept.nbinom) # ------------------- # TEST using the log-likelihood: # ------------------- anova(null.nb.glmm, rand.intercept.nbinom) # ------------------- # Finish info for TABLE 1, line 2 # ------------------- # ------------------- # START Figure S1 in article # ------------------- par(mfrow = c(1,1),mar = c(4,5,1,1)); plot(good.data$tot.bio,good.data$sr,col = "lightgrey",log = "x",las = 1, ylab = expression("Number of species " ~ (m^{-2})), xlab = expression("Total biomass (log10) " ~ (g ~ m^{-2})),ylim = c(-2,54)) # add GLM negbinom prediction: lines(good.data$tot.bio[order(good.data$tot.bio)], predict(global.negbinom.log10.results,type = "response")[order(good.data$log10.tot.bio)],lty = 1,lwd = 2) # add mixed model negbinom prediction: lines(good.data$tot.bio[order(good.data$tot.bio)], predict(rand.intercept.nbinom,type = "response")[order(good.data$log10.tot.bio)],lty = 1,col = "red",lwd = 2) # ------------------- # END Figure S1 in article # ------------------- # ------------------- # Global extent analysis using quantile regression # ------------------- global.quantreg.log10.results.95 <- rq(sr~log10.tot.bio+I(log10.tot.bio^2),good.data,tau = c(0.95)) # ------------------- # Begin info for TABLE 1, line 3 # ------------------- summary(global.quantreg.log10.results.95) # NOTE: 13 non-positive fis, which is not an issue, as small relative to sample size # calculation of pseudo F statistic: null.quant <- rq(sr~1,good.data,tau = c(0.95)) lin.quant <- rq(sr~log10.tot.bio,good.data,tau = c(0.95)) quad.quant <- rq(sr~log10.tot.bio+I(log10.tot.bio^2),good.data,tau = c(0.95)) anova(quad.quant,lin.quant,null.quant) # Analysis of Deviance Table # ------------------- # Finish info for TABLE 1, line 3 # ------------------- # ------------------- # Conduct global extent analysis on LIVE biomass only # ------------------- # Build a model using all quadrats with live biomass alldata.nona.biomass <- alldata.summary[!is.na(alldata.summary$biomass),] # log transform alldata.nona.biomass$log10.live.biomass <- log10(alldata.nona.biomass$biomass+1) # Negative binomial GLM: null then full model global.negbinom.log10.live.null <- glm.nb(sr~1, alldata.nona.biomass) global.negbinom.log10.live.results <- glm.nb(sr~log10.live.biomass+I(log10.live.biomass^2),alldata.nona.biomass) anova(global.negbinom.log10.live.results,global.negbinom.log10.live.null) # ------------------- # Begin info for TABLE 1, line 4 # ------------------- summary(global.negbinom.log10.live.results) # ------------------- # End info for TABLE 1, line 4 # ------------------- # ------------------- # Now use the "good.data" dataset to enable direct comparison # of the models with all biomass versus only live biomass # (keeps N the same between each) # ------------------- good.data$log10.live.biomass <- log10(good.data$biomass+1) # intercept model: global.negbinom.log10.live.biomass.null <- glm.nb(sr~1,good.data) global.negbinom.log10.live.biomass.results <- glm.nb(sr~log10.live.biomass + I(log10.live.biomass^2),good.data) global.negbinom.log10.tot.biomass.results <- glm.nb(sr~log10.tot.bio + I(log10.tot.bio^2),good.data) # ------------------- # Use "Vuong test" (library pscl) to test whether using total biomass # provides better fit than just live; # appropriate for non-nested model comparison # ------------------- vuong(global.negbinom.log10.live.biomass.results,global.negbinom.log10.tot.biomass.results)