# This is an "R" script documenting work associated with the following # publication: # Fraser, L.H., Pither, J., Jentsch, A., Sternberg, M., Zobel, M., and the # HerbDivNet # global network. 2015. Worldwide evidence of a unimodal # relationship between productivity and plant species richness. Science. # P.I.: Lauchlan Fraser (Thompson Rivers University) (lfraser@tru.ca) # Script author: Jason Pither (University of British Columbia, Okanagan campus) # (jason.pither@ubc.ca) # --------------------------------------------------------- # THIS SCRIPT CONDUCTS REGRESSION ANALYSES ASSOCIATED WITH # THE VARIED SAMPLING GRAIN ANALYSES # OUTPUT: FIGURE S2 # --------------------------------------------------------- # --------------------------------------------------------- # REQUIRED FILES: # fraser_plotdata.csv # --------------------------------------------------------- # --------------------------------------------------------- # REQUIRED PACKAGES: library(AER) # --------------------------------------------------------- # --------------------------------------------------------- # read in data, and do initial data cleaning # --------------------------------------------------------- alldata <- read.csv("fraser_plotdata.csv", header = T, strip.white = T) # first 10 columns are summary data alldata.summary <- alldata[,c("grid","original.name","community.type","country","pi","plot","biomass","litter","tot.bio","sr")] # column headings are self-explanatory; but note that "pi" is what we equate with "site" throughout the paper # remaining columns are pres/abs data alldata.prab <- alldata[,(dim(alldata.summary)[[2]]+1):length(alldata)] # convert NA values that should be zeros: alldata.prab[is.na(alldata.prab)] <- 0 # exclude grids that had not litter measurements conducted in all 64 quadrats complete.grid.numbers <- setdiff(unique(alldata.summary$grid),names(table(alldata.summary[is.na(alldata.summary$litter),"grid"])[table(alldata.summary[is.na(alldata.summary$litter),"grid"]) == 64])) num.complete.grids <- length(complete.grid.numbers) # now we have 151 grids with complete or mostly complete measurements # (10 have between 1 and 4 quadrats missing litter) # now subset data to only those grids alldata.litter <- alldata.summary[!is.na(match(alldata.summary$grid,as.numeric(complete.grid.numbers))),] # there are some quadrats with zero species, and two (7023, 7030) # in grid 110 that have valid litter biomass (zero) but no live species; # these remain as VALID data points # put "NA" in all other quadrats na.rownames <- row.names(alldata.litter[alldata.litter$sr == 0,][setdiff(row.names(alldata.litter[alldata.litter$sr == 0,]),c("7023","7030")),]) # remaining quadrats with neither biomass nor species can be "NA" alldata.litter[na.rownames,"biomass"] <- NA alldata.litter[na.rownames,"litter"] <- NA alldata.litter[na.rownames,"tot.bio"] <- NA alldata.litter[na.rownames,"sr"] <- NA # create a log10 total biomass (+1) column alldata.litter$log10.tot.bio <- log10(alldata.litter$tot.bio+1) # --------------------------------------------------------- # DONE data input / cleaning # --------------------------------------------------------- # ------------------- # NOW CREATE AN ARRAY, 151 LAYERS DEEP, WITH EACH LAYER # BEING A SITE X SPECIES MATRIX (PRES/ABS) # presence / absence site x species array, for each grid, # dimensions 64 x 149 x 151: # THIS WILL BE USEFUL FOR ALL subsequent analyses # ------------------- numgrids <- length(unique(alldata.litter$grid)) # number of unique grids gridnumbers <- as.character(unique(alldata.litter$grid)) # vector of grid identifiers numspec <- length(alldata.prab) # number of unique species across all grids numgridcells <- 64 # number in 8 x 8 grid alldata.prab.litter <- alldata.prab[row.names(alldata.litter),] # create rownames according to grid cellnames: cellnames <- paste(expand.grid(1:8,c("A","B","C","D","E","F","G","H"))[,2],expand.grid(1:8,c("A","B","C","D","E","F","G","H"))[,1],sep = ""); presabs.array <- array(dim = c(numgridcells, numspec, numgrids),0); dimnames(presabs.array) <- list(cellnames,1:numspec,gridnumbers); for (i in 1:numgrids){ presabs.array[,,gridnumbers[i]] <- as.matrix(alldata.prab.litter[row.names(alldata.litter[alldata.litter$grid == gridnumbers[i],]),]) print(i); } scaling.gridnumbers <- as.character(unique(alldata.litter$grid)) # ------------------- # Create total biomass array # dimensions 8 x 8 x number of complete grids (141) # ------------------- tot.biomass.array <- array(dim = c(sqrt(numgridcells),sqrt(numgridcells),numgrids),0); dimnames(tot.biomass.array) <- list(1:8,c("A","B","C","D","E","F","G","H"),as.character(gridnumbers)); for (i in 1:numgrids){ tot.biomass.array[,,gridnumbers[i]] <- matrix(nrow = sqrt(numgridcells),ncol = sqrt(numgridcells),alldata.litter[alldata.litter$grid == gridnumbers[i],"tot.bio"]) print(i); } # ------------------- # Create logical (T,F) arrays showing sampling locations for scaling anlalyses # ------------------- sampling.array1 <- array(dim=c(8,8,8),F); for (i in 1:8){ sampling.array1[1:i,1:i,i] <- T } sampling.array2 <- array(dim=c(8,8,8),F); for (i in 1:8){ sampling.array2[,,i]<- sampling.array1[8:1,8:1,i] } ## third orientation sampling.array3 <- array(dim=c(8,8,8),F); for (i in 1:8){ sampling.array3[,,i]<- sampling.array1[8:1,,i] } sampling.array4 <- array(dim=c(8,8,8),F); for (i in 1:8){ sampling.array4[,,i]<- sampling.array1[,8:1,i] } # ------------------- # Create array to hold all the grid data, # from 4 different orientations and grains # ------------------- scaling.data.array <- array(dim=c(nrow(expand.grid(grain=1:8, grid=scaling.gridnumbers)),5,4),0) dimnames(scaling.data.array) <- list(1:nrow(expand.grid(grain=1:8, grid=scaling.gridnumbers)), c("grain","grid","tot.sr","tot.bio","log10.tot.bio"), c(paste("orient",1:4,sep="_"))); temp <- expand.grid(grain=1:8,grid=scaling.gridnumbers); for (i in 1:4){ scaling.data.array[,"grain",i] <- temp$grain; scaling.data.array[,"grid",i] <- as.numeric(as.character(temp$grid)); } # ------------------- # FILL the array with appropriate data # ------------------- for (k in 1:4){ ## 4 orientations for (i in 1:length(scaling.gridnumbers)){ ## good grid numbers for (j in 1:8){ # 8 grain sizes ## if this is the first grain size, then don't use "colSums", just use "sum" if (j==1) { scaling.data.array[,,k][scaling.data.array[,,k][,"grid"]==scaling.gridnumbers[i]&scaling.data.array[,,k][,"grain"]==j,"tot.sr"] <- ifelse(is.na(sum(presabs.array[get(paste("sampling.array",k,sep=""))[,,j],,scaling.gridnumbers[i]])),NA,sum(presabs.array[get(paste("sampling.array",k,sep=""))[,,j],,scaling.gridnumbers[i]])); scaling.data.array[,,k][scaling.data.array[,,k][,"grid"]==scaling.gridnumbers[i]&scaling.data.array[,,k][,"grain"]==j,"tot.bio"] <-ifelse(is.na(sum(tot.biomass.array[,,scaling.gridnumbers[i]][get(paste("sampling.array",k,sep=""))[,,j]])),NA,sum(tot.biomass.array[,,scaling.gridnumbers[i]][get(paste("sampling.array",k,sep=""))[,,j]])); } else { scaling.data.array[,,k][scaling.data.array[,,k][,"grid"]==scaling.gridnumbers[i]&scaling.data.array[,,k][,"grain"]==j,"tot.sr"] <- ifelse(is.na(sum(colSums(presabs.array[get(paste("sampling.array",k,sep=""))[,,j],,scaling.gridnumbers[i]])>0)),NA,sum(colSums(presabs.array[get(paste("sampling.array",k,sep=""))[,,j],,scaling.gridnumbers[i]])>0)); scaling.data.array[,,k][scaling.data.array[,,k][,"grid"]==scaling.gridnumbers[i]&scaling.data.array[,,k][,"grain"]==j,"tot.bio"] <-ifelse(is.na(sum(tot.biomass.array[,,scaling.gridnumbers[i]][get(paste("sampling.array",k,sep=""))[,,j]])),NA,sum(tot.biomass.array[,,scaling.gridnumbers[i]][get(paste("sampling.array",k,sep=""))[,,j]])); } print(j) } print(i) } print(k) } ## calculate log10 biomass for (i in 1:4){ scaling.data.array[,"log10.tot.bio",i] <- log10(scaling.data.array[,"tot.bio",i]+1) scaling.data.array[,"log10.tot.bio",i][scaling.data.array[,"log10.tot.bio",i]=="-Inf"] <- NA; } # ------------------- # DONE filling the array with appropriate data # ------------------- # ------------------- # Create array to hold all the scaling regression results # ------------------- # note because we don't use multiple plots from a grid # no need to use more conservative alpha level p.value <- 0.05; require(AER); ## set up output table for LM gauss.scaling.glm <- array(dim=c(8,13,4),NA) dimnames(gauss.scaling.glm) <- list(1:8,c("grain","intercept","intercept.se","intercept.pval","term1.coef","term1.se","term1.pval","term2.coef","term2.se","term2.pval","type","sampsize","adj.r.square"),1:4); gauss.scaling.glm[,1,] <- 1:8 # ------------------- # BEGIN LM analyses using each orientation of data and grain size # ------------------- for (k in 1:4){ #orientation for (i in 1:8){ # grain size ## run poisson models, first quad, then linear quad.pois.glm <- glm(tot.sr~log10.tot.bio+I(log10.tot.bio^2),as.data.frame(scaling.data.array[,,k][!is.na(scaling.data.array[,"tot.bio",k])&scaling.data.array[,"grain",k]==i,]),family=poisson()); lin.pois.glm <- glm(tot.sr~log10.tot.bio,as.data.frame(scaling.data.array[,,k][!is.na(scaling.data.array[,"tot.bio",k])&scaling.data.array[,"grain",k]==i,]),family=poisson()); null.pois.glm <- glm(tot.sr~1,as.data.frame(scaling.data.array[,,k][!is.na(scaling.data.array[,"tot.bio",k])&scaling.data.array[,"grain",k]==i,]),family=poisson()); ## check ## first try quadratic, and if P<=p.value for quadratic coefficient, then keep if significant ## run gaussian models, first quad, then linear quad.gauss.glm <- lm(log10(tot.sr+1)~log10.tot.bio+I(log10.tot.bio^2),as.data.frame(scaling.data.array[,,k][!is.na(scaling.data.array[,"tot.bio",k])&scaling.data.array[,"grain",k]==i,])); lin.gauss.glm <- lm(log10(tot.sr+1)~log10.tot.bio,as.data.frame(scaling.data.array[,,k][!is.na(scaling.data.array[,"tot.bio",k])&scaling.data.array[,"grain",k]==i,])); null.gauss.glm <- lm(log10(tot.sr+1)~1,as.data.frame(scaling.data.array[,,k][!is.na(scaling.data.array[,"tot.bio",k])&scaling.data.array[,"grain",k]==i,])); ## check ## first try quadratic, and if P<=p.value for quadratic coefficient, then keep if significant if(unlist(anova(lin.gauss.glm,quad.gauss.glm))[12] <= p.value) { gauss.scaling.glm[,,k][i,2:10] <- c(t(summary(quad.gauss.glm)$coef)[-3,]); ## r-square not relevant here gauss.scaling.glm[,,k][i,11] <- NA ## get sample size gauss.scaling.glm[,,k][i,12] <- sum(summary(quad.gauss.glm)$df[1:2]) ## get adjusted r-square (Gaussian lm only) gauss.scaling.glm[,,k][i,13] <- summary(quad.gauss.glm)$adj.r.square } ## check linear significance else if (unlist(anova(null.gauss.glm,lin.gauss.glm))[12] <= p.value) { gauss.scaling.glm[,,k][i,2:7] <- t(summary(lin.gauss.glm)$coef)[-3,]; ## sample size gauss.scaling.glm[,,k][i,12] <- sum(summary(lin.gauss.glm)$df[1:2]); ## get adjusted r-square (Gaussian lm only) gauss.scaling.glm[,,k][i,13] <- summary(lin.gauss.glm)$adj.r.square ## get dispersion parameter #gauss.scaling.glm[,,k][i,13] <- summary(lin.gauss.glm)$deviance/summary(lin.gauss.glm)$df.residual } else {} print(i) } print(k) } # ------------------- # FINISH LM analyses using each orientation of data and grain size # ------------------- # ------------------- # Pick one orientation for illustration of LM # ------------------- current.data <- scaling.data.array[,,1] quad.gauss.glm <- glm(log10(tot.sr+1)~log10.tot.bio+I(log10.tot.bio^2),as.data.frame(current.data[!is.na(current.data[,"tot.bio"])¤t.data[,"grain"]==i,]),family=gaussian()) # ------------------- # check residual plots as follows # ------------------- par(mfrow=c(4,2)); ## orient page portrait, and arrange plots in 4 rows by 2 columns for (i in 1:8){ ## plot quantile plots of residuals using sqrt-richness first qqnorm(rstandard(quad.gauss.glm),type="n",ylim=c(-3.2,3.2),xlim=c(-3.2,3.2),main=paste("Grain = ", i, sep=" ")); points(qqnorm(rstandard(quad.gauss.glm),plot.it=F),col="black") points(qqnorm(rstandard(quad.gauss.glm),plot.it=F),col="red") abline(0,1,lty=2,col="darkgrey") } # these plots show two values with standardized residuals that are # slightly off the line: "1120" and "1112"; # ** BUT, in all cases the log10-transformed data provides pretty good fit # to assumption of normality of residuals; and better than the GLM approaches # ------------------- # Calculate average regression coefficients and R-square for plotting # ------------------- ## average intercept value for each of 8 grains sizes: rowMeans(gauss.scaling.glm[,"intercept",],na.rm=T) # average term1 coeff rowMeans(gauss.scaling.glm[,"term1.coef",],na.rm=T) ## average term2 coefficient value for each of 8 grains sizes: rowMeans(gauss.scaling.glm[,"term2.coef",],na.rm=T) ## average R-square adjsted value for each of 8 grains sizes: rowMeans(gauss.scaling.glm[,"adj.r.square",],na.rm=T) # ------------------- # Now generate scatterplots using the "scaling.data" dataframe; # could use any one of the 4 "scaling.data" datasets # ------------------- ## **** updated this July 22, 2014. par(mfrow=c(4,2),mar=c(6,6,4,2)) ## select which orientation to plot (1 through 4) by changing the number in scaling.data.array[,,1] grain.sizes <- paste("Grain size: ",paste(paste(1:8,1:8,sep="m x "),"m",sep=""),sep="") ## orientation number to plot as example orient.num <- 3 scaling.data <- scaling.data.array[,,orient.num] title.texts <- rep("NO",8) for (i in 1:8){ title.texts[i] <- paste(paste("Adjusted R-square =",round(gauss.scaling.glm[i,"adj.r.square",orient.num],3),sep=" "), grain.sizes[i],sep=", ") } for (i in 1:8){ temp <- as.data.frame(na.omit(scaling.data[scaling.data[,"grain"]==i,])); plot(log10(tot.sr+1)~log10.tot.bio,temp, xlab=expression("Total biomass" ~ (g ~ m^{-2}) ~ (log[10])), ylab=expression("Total species richness" ~ (log[10])),las=1) ## add sample size text text(min(na.omit(temp$log10.tot.bio))+0.1,min(log10(temp$tot.sr+1))+.1,paste("N =",length(na.omit(temp$log10.tot.bio)),sep=" ")) ## add lines for average regressions (across all 4 orientations) using the mean coefficient values lines(sort(temp$log10.tot.bio),(gauss.scaling.glm[i,"intercept",orient.num] + (gauss.scaling.glm[i,"term1.coef",orient.num])*temp$log10.tot.bio+(gauss.scaling.glm[i,"term2.coef",orient.num]*temp$log10.tot.bio^2))[order(temp$log10.tot.bio)],lty=2,lwd=2); ## add informative title title(main=title.texts[i],cex=0.8) } # ------------------- # Finish example plots # ------------------- # ------------------- ## The preceding regressions are influenced by the monoculture high biomass ## grids numbers 90 and 92; (see bottom right points ## in the scatterplot figures; ## so, re-do the preceding analyses excluding those tw plots # ------------------- ## without grids # 90 & 92 gauss.scaling.subset.glm <- array(dim=c(8,14,4),NA) dimnames(gauss.scaling.subset.glm) <- list(1:8,c("grain","intercept","intercept.se","intercept.pval","term1.coef","term1.se","term1.pval","term2.coef","term2.se","term2.pval","type","sampsize","adj.r.square","AIC"),1:4); gauss.scaling.subset.glm[,1,] <- 1:8 # ------------------- # Filling the array with appropriate data # ------------------- for (k in 1:4){ #orientation for (i in 1:8){ # grain size tempdat <- as.data.frame(scaling.data.array[,,k][!is.na(scaling.data.array[,"tot.bio",k])&scaling.data.array[,"grain",k]==i,]); tempdat <- tempdat[tempdat$grid!="90"&tempdat$grid!="92",] quad.gauss.glm <- lm(log10(tot.sr+1)~log10.tot.bio+I(log10.tot.bio^2),tempdat); lin.gauss.glm <- lm(log10(tot.sr+1)~log10.tot.bio,tempdat); null.gauss.glm <- lm(log10(tot.sr+1)~1,tempdat); ## check ## first try quadratic, and if P<=p.value for quadratic coefficient, then keep if significant if(unlist(anova(lin.gauss.glm,quad.gauss.glm))[12] <= p.value) { gauss.scaling.subset.glm[,,k][i,2:10] <- c(t(summary(quad.gauss.glm)$coef)[-3,]); ## r-square not relevant here gauss.scaling.subset.glm[,,k][i,11] <- NA ## get sample size gauss.scaling.subset.glm[,,k][i,12] <- sum(summary(quad.gauss.glm)$df[1:2]) ## get adjusted r-square (Gaussian lm only) gauss.scaling.subset.glm[,,k][i,13] <- summary(quad.gauss.glm)$adj.r.square gauss.scaling.subset.glm[,,k][i,14] <- AIC(quad.gauss.glm) } ## check linear significance else if (unlist(anova(null.gauss.glm,lin.gauss.glm))[12] <= p.value) { gauss.scaling.subset.glm[,,k][i,2:7] <- t(summary(lin.gauss.glm)$coef)[-3,]; ## sample size gauss.scaling.subset.glm[,,k][i,12] <- sum(summary(lin.gauss.glm)$df[1:2]); ## get adjusted r-square (Gaussian lm only) gauss.scaling.subset.glm[,,k][i,13] <- summary(lin.gauss.glm)$adj.r.square gauss.scaling.subset.glm[,,k][i,14] <- AIC(lin.gauss.glm) ## get dispersion parameter #gauss.scaling.subset.glm[,,k][i,13] <- summary(lin.gauss.glm)$deviance/summary(lin.gauss.glm)$df.residual } else {} print(i) } print(k) } # ------------------- # DONE filling the array with appropriate data # ------------------- # ------------------- # BEGIN Figure S2 # ------------------- par(mfrow=c(4,2),mar=c(6,6,4,2)) ## select which orientation to plot (1 through 4) by changing the number in scaling.data.array[,,1] grain.sizes <- paste("Grain size: ",paste(paste(1:8,1:8,sep="m x "),"m",sep=""),sep="") ## orientation number to plot as example orient.num <- 3 scaling.data <- as.data.frame(scaling.data.array[,,orient.num]) scaling.data <- scaling.data[scaling.data$grid!="90"&scaling.data$grid!="92",] title.texts <- rep("NO",8) for (i in 1:8){ ## get average R-square title.texts[i] <- paste(paste("Adjusted R-square =",round(mean(gauss.scaling.subset.glm[i,"adj.r.square",]),3),sep=" "), grain.sizes[i],sep=", ") } for (i in 1:8){ temp <- as.data.frame(na.omit(scaling.data[scaling.data[,"grain"]==i,])); plot(log10(tot.sr+1)~log10.tot.bio,temp, xlab=expression("Total biomass" ~ (g ~ m^{-2}) ~ (log[10])), ylab=expression("Total species richness" ~ (log[10])),las=1) ## add sample size text text(min(na.omit(temp$log10.tot.bio))+0.1,min(log10(temp$tot.sr+1))+.1,paste("N =",length(na.omit(temp$log10.tot.bio)),sep=" ")) ## add lines for average regressions (across all 4 orientations) using the mean coefficient values lines(sort(temp$log10.tot.bio), c(mean(gauss.scaling.subset.glm[i,"intercept",]) + mean(gauss.scaling.subset.glm[i,"term1.coef",])*temp$log10.tot.bio+mean(gauss.scaling.subset.glm[i,"term2.coef",])*temp$log10.tot.bio^2)[order(temp$log10.tot.bio)],lty=2,lwd=2); ## add informative title title(main=title.texts[i],cex=0.8) } # ------------------- # END Figure S2 # -------------------