# Supplemental file for: [Article title goes here] # analysis code: # Chris MacQuarrie, Natural Resources Canada Canadian Forest Service # Great Lakes Forestry Centre, Sault Ste. Marie, ON. # # graphics code: # Amanda D Roe, Natural Resources Canada Canadian Forest Service # Laurentian Forestry Centre, Ste Foy, QC. # # cmacquar@nrcan.gc.ca # # R version 2.15.0 (2012-03-30) i386-pc-mingw32/i386 (32 bit) # #NOTES: # Restart R before beginning a major block of code (denoted by S1, S2, # etc...) as some libraries loaded in one block can have adverse behviour if # loaded with libraries called in another block (e.g., nlme & lme4). # This file is formatted for section highlighting as in RStudio. Divisions # between sections are indicated by === (major divisions) # and --- (minor divisions). # S1 Analysis of Poplar Biomass data =========================================== # # S1.1 Weights of vegetative structures ======================================== # Libraries -------------------------------------------------------------------- require(ggplot2) # for plotting require(nlme) # for model fitting require(multcomp) # for multiple post-hoc comparisons # Datasets --------------------------------------------------------------------- biomass <- read.csv( "Biomass.csv") # parse the biomass data to exclude class 'X' biomass <- biomass[ biomass$tree.class %in% c("B", "D", "N"),] biomass$tree.class <- droplevels(biomass$tree.class) # Checking data ================================================================ head( biomass) # OK str( biomass) # age.class and year are recognized as integers, should be factors # set age.class as a ordered factor and year as a factor biomass$age.class <- factor( biomass$age.class, levels = c(1,2,3,4) ) biomass$year <- as.factor( biomass$year) # Preliminary Analysis # look at the distribution of age classes among samples (note, is for individual # catkins are fewer actual trees) table(biomass$age.class, biomass$tree.class) # can't test interaction of age.class and tree.class, data are too un-balanced # will treat this as a seperate analysis within tree.class 'D'for each of the # biomass measures # look at the distribution of trees sampled in each year # four trees were sampled in both years table( biomass$tree.number, biomass$year) # 1 Effect of genotype on total biomass ======================================== # 1.1 Plot total catkin weight by tree class ----------------------------------- bt <- ggplot( biomass, aes( x = tree.class, y = total.weight)) biomass.total <- bt + geom_boxplot( outlier.size=0.5, aes( fill = year) ) + labs( y = " catkin weight (g)", x = "tree class") + labs( title = "Total biomass") biomass.total # 1.2 Evaluate correlations within years --------------------------------------- # create a subset of the biomass data with just the trees sampled twice, # and the biomass data biomassBothYears <- subset( biomass, sampledTwice == "yes", select = c("tree.number", "total.weight", "year") ) # plot the data boxplot( total.weight ~ year, data = biomassBothYears, xlab = "year", ylab = "total.weight (g)" ) # evaluate correlation # reasoning: if yields are not independant there should be a correlation # between yield in 2009 and yield in 2011 for the 4 double sampled trees: # plotting # aggregate the data and compute the means per year bBYMeans <- aggregate( total.weight ~ tree.number + year, data = biomassBothYears, FUN = mean ) plot( x = bBYMeans$total.weight[bBYMeans$year == "2009"], y = bBYMeans$total.weight[bBYMeans$year == "2011"], xlab = "2009 yield", ylab = "2011 yield") abline( coef = c(0,1) ) # compute Pearsons R-square for these trees # default is Pearsons cor.test( x = bBYMeans$total.weight[bBYMeans$year == "2009"], y = bBYMeans$total.weight[bBYMeans$year == "2011"] ) # t = 1.29, df = 2, p = 0.32 # = 0.67 # non-significant correlation # simple regression per tree # test for the effect of year on biomass for each of the 4 double sampled trees # create a lmList object, fitting the model to each individual seperately tw.subset.lmList1 <- lmList( total.weight ~ year | tree.number, data = biomassBothYears) # examine the output summary( tw.subset.lmList1) # all trees with a significant effect of year # results suggest their is some effect of year at best, but not that strong. Can # address full data set for these results. Will also do companion analysis # dropping the four double sampled trees. # create a modified tree.number field to seperate 2009 and 2011 samples biomass$treeNumberYear <- paste( biomass$tree.number, "_", biomass$year, sep = "") # 1.3 Genotype vs. biomass - full dataset -------------------------------------- # fit the model tw.full.lme1 <- lme( total.weight ~ tree.class + year, random = ~1 | treeNumberYear, data = biomass) tw.full.lme1 # examine intervals of random effects intervals( tw.full.lme1, which = "var-cov") # both random effect is bounded # above 0, indiciating it should # be kept in the model. # check the model assumptions # assumption 1 # residual box plot plot( tw.full.lme1, treeNumberYear~resid(.), abline = 0) # looks OK # standardized residuals by year plot( tw.full.lme1 , resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # no problems # standardized residuals by tree class plot( tw.full.lme1 , resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # looks OK for most, but monotonic increase in class D # re-fit using 'varIdent' and check if improves the model tw.full.lme2 <- update( tw.full.lme1, weights = varIdent( form = ~ 1 | tree.class) ) anova( tw.full.lme1, tw.full.lme2) # varIdent does. plot( tw.full.lme2, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # slight improvement # qq norm plot to check normality of both variables qqnorm( tw.full.lme2, ~resid(.) | year) # looks OK qqnorm( tw.full.lme2, ~resid(.) | tree.class) # looks OK # examine the parameters of the final model summary( tw.full.lme2) anova( tw.full.lme2) # significant effect of tree.class on total biomass, non-sig effect of year. # Drop year and see is if a better fit to the data tw.full.lme3 <- update( tw.full.lme2, fixed = ~ tree.class ) anova( tw.full.lme2, tw.full.lme3) # same but comparison not valid unless models are re-fit via ML) tw.full.lme2ML <- update( tw.full.lme2, method = "ML") tw.full.lme3ML <- update( tw.full.lme3, method = "ML") anova( tw.full.lme2ML, tw.full.lme3ML) # model without year no different # means no signficant effect of yar # perform Tukey test of differences among levels of tree.class on the simple # model summary( glht( tw.full.lme3, linfct = mcp( tree.class = "Tukey") ) ) # Significant difference between class N and class B # 1.4 Genotype vs. biomass - partial data set ---------------------------------- biomassTw <- subset( biomass, # select all that were not sampled once sampledTwice == "no", select = c("tree.number", "tree.class", "year", "total.weight") ) # Fit the full model with tree as a random effect, to test the main effects of # tree class and year. # # fit the model tw.lme1 <- lme( total.weight ~ tree.class + year, random = ~1 | tree.number, data = biomassTw) tw.lme1 # examine intervals of random effects intervals( tw.lme1, which = "var-cov") # both random effect is bounded # above 0, indiciating it should # be kept in the model. # check the model assumptions # assumption 1 # residual box plot plot( tw.lme1, tree.number~resid(.), abline = 0) # looks OK # standardized residuals by year plot( tw.lme1 , resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # no problems # standardized residuals by tree class plot( tw.lme1 , resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # looks OK for most, but monotonic increase in class D # re-fit using 'varIdent' and check if improves the model tw.lme2 <- update( tw.lme1, weights = varIdent( form = ~ 1 | tree.class) ) anova( tw.lme1, tw.lme2) # varIdent does. plot( tw.lme2, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # slight improvement # qq norm plot to check normality of both variables qqnorm( tw.lme2, ~resid(.) | year) # looks OK qqnorm( tw.lme2, ~resid(.) | tree.class) # looks OK # test assumption 2: random effects normal, mean = 0, covariance matrix is # independant # for 'individuals' qqnorm( tw.lme2, ~ranef(.) , id = 0.10 ) # one outlier BPSF-053. # examine the parameters of the final model summary( tw.lme2) anova( tw.lme2) # no effect of tree.class on total biomass, # no effect of year # full dataset shows significant effect of tree genotype, dropping those trees # removes the signficant effect. Suggests effect is being driven by response # of the four double sampled trees bt <- ggplot( biomass, aes( x = tree.class, y = total.weight)) biomass.total.smple <- bt + geom_boxplot( outlier.size=0.5, aes( fill = sampledTwice) ) + labs( y = " catkin weight (g)", x = "tree class") + opts( title = "Total biomass") biomass.total.smple # 2. Effect of genotype on seed weight ========================================= # 2.1 Plot seed weight versus tree class --------------------------------------- bts <- ggplot( biomass, aes( x = tree.class, y = seed.weight) ) biomass.total.seed <- bts + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "seed weight (g)", x = "tree class" ) + opts(title="Seed biomass") biomass.total.seed # 2.2 Genotype vs. seed weight - full data set --------------------------------- # fit the model seed.full.lme1 <- lme( seed.weight ~ tree.class + year, random = ~1 | treeNumberYear, data = biomass) seed.full.lme1 summary(seed.full.lme1) intervals( seed.full.lme1, which = "var-cov") # random effects bounded # above 0, indicating it should # be kept in the model. # check the model assumptions # assumption 1 # residual box plot plot( seed.full.lme1, treeNumberYear~resid(.), abline = 0) # quite a bit of variance # among individuals, but looks OK. # standardized residuals by year plot( seed.full.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # monotonic increases in 2011 # refit using a heteroscedastic model for year see if fit improves seed.full.lme2 <- update( seed.full.lme1, weights = varIdent( form = ~ 1 | year) ) anova( seed.full.lme1, seed.full.lme2) # homoscedastic is better # standardized residuals by tree class plot( seed.full.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # monotonic increases in most classes # refit using a heteroscedastic model and examine to see if fit improves seed.full.lme3 <- update( seed.full.lme2, weights = varIdent( form = ~ 1 | tree.class) ) anova( seed.full.lme1, seed.full.lme2, seed.full.lme3) # homoscedastic is better # qq norm plot to check normality qqnorm( seed.full.lme1, ~resid(.) | year) # looks OK qqnorm( seed.full.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( seed.full.lme1, ~ranef(.) , id = 0.10, adj = 1 ) # Two outliers, rest # look OK. # examine the parameters of the final model summary( seed.full.lme1) anova( seed.full.lme1) # weak effect of tree class on seed biomass # Drop year and see is if a better fit to the data seed.full.lme4 <- update( seed.full.lme1, fixed = ~ tree.class ) anova( seed.full.lme1, seed.full.lme4) # same but comparison # not valid unless models are re-fit via ML) seed.full.lme1ML <- update( seed.full.lme1, method = "ML") seed.full.lme4ML <- update( seed.full.lme4, method = "ML") anova( seed.full.lme1ML, seed.full.lme4ML) # model without year no better anova( seed.full.lme4) # weak effect of tree.class # perform Tukey test of differences among levels of tree.class on the simple # model seed.full.lme4Tukey <- summary( glht( seed.full.lme4, linfct = mcp( tree.class = "Tukey") ) ) seed.full.lme4Tukey # Significant difference between class N and class B # 2.3 Genotype vs. seed weight - partial data set ------------------------------ # create a subset of the biomass data biomassSeed <- subset( biomass, # select all trees that were not sampled twice sampledTwice == "no", select = c("tree.number", "tree.class", "year", "seed.weight") ) # fit the model seed.lme1 <- lme( seed.weight ~ tree.class + year, random = ~1 | tree.number, data = biomassSeed) seed.lme1 summary(seed.lme1) # examine intervals of random effects intervals( seed.lme1, which = "var-cov") # random effects bounded # above 0, indicating it should # be kept in the model. # check the model assumptions # assumption 1 # residual box plot plot( seed.lme1, tree.number~resid(.), abline = 0) # quite a bit of variance # among individuals, but looks OK. # standardized residuals by year plot( seed.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # monotonic increases in 2011 # refit using a heteroscedastic model for year see if fit improves seed.lme2 <- update( seed.lme1, weights = varIdent( form = ~ 1 | year) ) anova( seed.lme1, seed.lme2) # no effect of transformation # standardized residuals by tree class plot( seed.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # monotonic increases in most classes # refit using a heteroscedastic model and examine to see if fit improves seed.lme3 <- update( seed.lme1, weights = varIdent( form = ~ 1 | tree.class) ) anova( seed.lme1, seed.lme2, seed.lme3) # homoscedastic is better # qq norm plot to check normality qqnorm( seed.lme1, ~resid(.) | year) # looks OK qqnorm( seed.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( seed.lme1, ~ranef(.) , id = 0.10, adj = 1 ) # is a single # outlier BPSF-004. Rest look OK # examine the parameters of the final model summary( seed.lme1) anova( seed.lme1) # no effect of any variables on seed biomass # same effect as before where pattern being driven by 4 double-sampled trees # 3.0 Effect of genotype on capsule weight ===================================== # 3.1 Plot capsule weight versus tree class ------------------------------------ bcap <- ggplot( biomass, aes( x = tree.class, y = capsule.weight) ) biomass.capsule <- bcap + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "capsule weight (g)", x = "tree class" ) + opts(title="capsule biomass") biomass.capsule # 3.2 Genotype vs. capsule weight - full data set ------------------------------ # fit the model cap.full.lme1 <- lme( capsule.weight ~ tree.class + year, random = ~1 | treeNumberYear, data = biomass) cap.full.lme1 summary(cap.full.lme1) intervals( cap.full.lme1, which = "var-cov") # random effects bounded # above 0, indicating it should # be kept in the model. # check the model assumptions # assumption 1 # residual box plot plot( cap.full.lme1, treeNumberYear~resid(.), abline = 0) # quite a bit of variance # among individuals, but looks OK. # standardized residuals by year plot( cap.full.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) #looks OK # standardized residuals by tree class plot( cap.full.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # looks OK # qq norm plot to check normality qqnorm( cap.full.lme1, ~resid(.) | year) # looks OK qqnorm( cap.full.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( cap.full.lme1, ~ranef(.) , id = 0.10, adj = 1 ) # Two outliers, rest # look OK. # examine the parameters of the final model summary( cap.full.lme1) anova( cap.full.lme1) # effect of tree class on capsule biomass # Drop year and see is if a better fit to the data cap.full.lme2 <- update( cap.full.lme1, fixed = ~ tree.class ) anova( cap.full.lme1, cap.full.lme2) # same but comparison not meaningfull cap.full.lme1ML <- update( cap.full.lme1, method = "ML") cap.full.lme2ML <- update( cap.full.lme2, method = "ML") anova( cap.full.lme1ML , cap.full.lme2ML) # model's are the same anova( cap.full.lme2) # signifciant effect of tree.class # perform Tukey test of differences among levels of tree.class on the simple # model cap.full.lme2Tukey <- summary( glht( cap.full.lme2, linfct = mcp( tree.class = "Tukey") ) ) # 3.3 Genotype vs. capsule weight - partial data set --------------------------- # create a subset of the biomass data biomassCapsule <- subset( biomass, # select all trees from 2009, and all trees from # 2011 that were not sampled twice sampledTwice == "no", select = c("tree.number", "tree.class", "year", "capsule.weight") ) # fit the model cap.lme1 <- lme( capsule.weight ~ tree.class + year, random = ~1 | tree.number, data = biomassCapsule) cap.lme1 summary(cap.lme1) # examine the intervals of the random effects intervals( cap.lme1, which = "var-cov") # random effect needed # check the model assumptions # assumption 1 # residual box plot plot( cap.lme1, tree.number~resid(.), abline = 0) # quite a bit of variance # among individuals, are mostly centered at 0 # standardized residuals by year plot( cap.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # most look homocedastic # standardized residuals by tree class plot( cap.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # some heteroscedascity # fit the heteroscedastic model and compare cap.lme2 <- update( cap.lme1, weights = varIdent( form = ~1 | tree.class) ) anova(cap.lme1, cap.lme2) # homoscedastic model is better # qq norm plot to check normality qqnorm( cap.lme1, ~resid(.) | year) # looks OK qqnorm( cap.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( cap.lme1, ~ranef(.) , id = 0.10, adj = -1 ) # is a single # outlier BPSF-053. Rest look OK # examine the parameters of the final model summary( cap.lme1) anova( cap.lme1) # effect of tree.class on capsule weight # drop year and re-fit the model cap.lme3 <- update( cap.lme1, fixed = ~ tree.class ) # check is if a better fit to the data anova( cap.lme1, cap.lme3) # no difference among models, # logLik values are not valid unless models # are re-fit via ML cap.lme1ML <- update( cap.lme1, method = "ML") cap.lme3ML <- update( cap.lme3, method = "ML") anova( cap.lme1ML, cap.lme3ML) # no difference among models # perform Tukey test of differences among levels of tree.class in the simpler # model cap.lme3Tukey <- summary( glht( cap.lme3, linfct = mcp( tree.class = "Tukey") ) ) cap.lme3Tukey # Significant differences between class D & class B # N & B, # 4.0 Effect of genotype on stem weight ======================================== # 4.1 Plot stem weight versus tree class --------------------------------------- bstem <- ggplot( biomass, aes( x = tree.class, y = stem.weight) ) biomass.stem <- bstem + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "stem weight (g)", x = "tree class" ) + opts(title="stem biomass") biomass.stem # 4.2 Genotype vs. stem weight - full data set --------------------------------- # fit the model stem.full.lme1 <- lme( stem.weight ~ tree.class + year, random = ~1 | treeNumberYear, data = biomass, na.action = na.omit) # are 4 missing values in the data # this tells lme to ignore those records stem.full.lme1 summary(stem.full.lme1) intervals( stem.full.lme1, which = "var-cov") # random effects bounded # above 0, indicating it should # be kept in the model. # check the model assumptions # assumption 1 # residual box plot plot( stem.full.lme1, treeNumberYear~resid(.), abline = 0) # quite a bit of # variance among individuals, but looks OK. # standardized residuals by year plot( stem.full.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) #looks OK # standardized residuals by tree class plot( stem.full.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # some heteroscedascity # fit a heteroscedastic model and compare: stem.full.lme2 <- update( stem.full.lme1, weights = varIdent( form = ~ 1 | tree.class) ) anova( stem.full.lme1, stem.full.lme2) # homoscedastic is better # qq norm plot to check normality qqnorm( stem.full.lme1, ~resid(.) | year) # looks OK qqnorm( stem.full.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( stem.full.lme1, ~ranef(.) , id = 0.10, adj = 1 ) # Two outliers, # plot is a little bent. But not bad # examine the parameters of the final model summary( stem.full.lme1) anova( stem.full.lme1) # effect of tree.class on capsule weight # drop year and re-fit the model stem.full.lme3 <- update( stem.full.lme1, fixed = ~ tree.class ) # check is if a better fit to the data anova( stem.full.lme1, stem.full.lme3) # no difference among models, # logLik values are not valid unless models # are re-fit via ML stem.full.lme1ML <- update( stem.full.lme1, method = "ML") stem.full.lme3ML <- update( stem.full.lme3, method = "ML") anova( stem.full.lme1ML, stem.full.lme3ML) # no difference among models # perform Tukey test of differences among levels of tree.class in the simpler # model stem.full.lme3Tukey <- summary( glht( stem.full.lme3, linfct = mcp( tree.class = "Tukey") ) ) stem.full.lme3Tukey # Significant differences between class D & class B # N & B, # 4.3 Genotype vs. stem weight - partial data set ------------------------------ # create a subset of the biomass data biomassStem <- subset( biomass, # select all trees that were not sampled twice sampledTwice == "no", select = c("tree.number", "tree.class", "year", "stem.weight") ) # fit the model stem.lme1 <- lme( stem.weight ~ tree.class + year, random = ~1 | tree.number, data = biomassStem, na.action = na.omit) # are 4 missing values in the data # this tells lme to ignore those records stem.lme1 summary(stem.lme1) # examine the intervals of the random effects intervals( stem.lme1, which = "var-cov") # random effect needed # check the model assumptions # assumption 1 # residual box plot plot( stem.lme1, tree.number~resid(.), abline = 0) # quite a bit of variance # among individuals, are still centered at 0 # standardized residuals by year plot( stem.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # some heteroscedascity # fit the heteroscedastic model and see if fit improves stem.lme2 <- update( stem.lme1, weights = varIdent( form = ~1 | year) ) anova( stem.lme1, stem.lme2) # no difference among models # standardized residuals by tree class plot( stem.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # some heteroscedascity # fit the heteroscedastic model and compare stem.lme3 <- update( stem.lme1, weights = varIdent( form = ~1 | tree.class) ) anova(stem.lme1, stem.lme3) # homoscedastic model is better # qq norm plot to check normality qqnorm( stem.lme1, ~resid(.) | year) # looks OK qqnorm( stem.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( stem.lme1, ~ranef(.) , id = 0.10, adj = -1 ) # looks OK # examine the parameters of the final model summary( stem.lme1) anova( stem.lme1) # significant effect of tree class # drop year and re-fit the model stem.lme4 <- update( stem.lme1, fixed = ~ tree.class ) # check is if a better fit to the data anova( stem.lme1, stem.lme4) # model with year is better fit but, # logLik values are not valid unless models # are re-fit via ML stem.lme1ML <- update( stem.lme1, method = "ML") stem.lme4ML <- update( stem.lme4, method = "ML") anova( stem.lme1ML, stem.lme4ML) # no difference among models anova( stem.lme4) # perform Tukey test of differences among levels of tree.class in the simpler # model stem.lme4Tukey <- summary( glht( stem.lme4, linfct = mcp( tree.class = "Tukey") ) ) stem.lme4Tukey # signifcant difference between class D and class B, # X & D and X & N # 5.0 Effect of tree genotype on cotton weight ================================= # 5.1 Plot cotton weight versus tree class ------------------------------------- bcot <- ggplot( biomass, aes( x = tree.class, y = cotton.weight) ) biomass.cotton <- bcot + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "cotton weight (g)", x = "tree class" ) + opts(title = "cotton biomass") biomass.cotton # 5.2 Genotype vs. cotton weight - complete data set --------------------------- # fit the model cot.full.lme1 <- lme( cotton.weight ~ tree.class + year, random = ~1 | treeNumberYear, data = biomass ) cot.full.lme1 summary(cot.full.lme1) # examine the intervals of the random effects intervals( cot.full.lme1, which = "var-cov") # random effect needed # check the model assumptions # assumption 1 # residual box plot plot( cot.full.lme1, treeNumberYear~resid(.), abline = 0) # quite a bit of # variance among individuals, all but one centred at 0, one large # outlier # standardized residuals by year plot( cot.full.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # homocedastic # standardized residuals by tree class plot( cot.full.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # most look homocedastic # qq norm plot to check normality qqnorm( cot.full.lme1, ~resid(.) | year) # looks OK qqnorm( cot.full.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( cot.full.lme1, ~ranef(.) , id = 0.10, adj = -1 ) # one outlier, rest OK # examine the parameters of the final model summary( cot.full.lme1) anova( cot.full.lme1) # effect of year on capsule weight # drop tree.class and re-fit the model cot.full.lme2 <- update( cot.full.lme1, fixed = ~ year ) # check is if a better fit to the data anova( cot.full.lme1, cot.full.lme2) # model with year is better fit but, # logLik values are not valid unless models # are re-fit via ML cot.full.lme1ML <- update( cot.full.lme1, method = "ML") cot.full.lme2ML <- update( cot.full.lme2, method = "ML") anova( cot.full.lme1ML, cot.full.lme2ML) # simpler model better anova(cot.full.lme2) #marginal effect of year on cotton weight (not. sig # at 0.05 level). Means tree class is accounting # for some of the variability. # perform Tukey test of differences among levels of year in the simpler # model cot.full.lme2Tukey <- summary( glht( cot.full.lme2, linfct = mcp( year = "Tukey") ) ) cot.full.lme2Tukey # very small difference between class 2009 and 2011 # likely not biologically significant # 5.3 Genotype vs. cotton weight - partial data set ---------------------------- # create a subset of the biomass data biomassCotton <- subset( biomass, # select all trees that were not sampled twice sampledTwice == "no", select = c("tree.number", "tree.class", "year", "cotton.weight") ) # fit the model cot.lme1 <- lme( cotton.weight ~ tree.class + year, random = ~1 | tree.number, data = biomassCotton ) cot.lme1 summary(cot.lme1) # examine the intervals of the random effects intervals( cot.lme1, which = "var-cov") # random effect needed # check the model assumptions # assumption 1 # residual box plot plot( cot.lme1, tree.number~resid(.), abline = 0) # quite a bit of variance # among individuals, all but one centred at 0 # standardized residuals by year plot( cot.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # most look homocedastic # standardized residuals by tree class plot( cot.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # most look homocedastic # qq norm plot to check normality qqnorm( cot.lme1, ~resid(.) | year) # looks OK qqnorm( cot.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( cot.lme1, ~ranef(.) , id = 0.10, adj = -1 ) # looks OK # examine the parameters of the final model summary( cot.lme1) anova( cot.lme1) # no effect of either factor on capsule weight # 6.0 Effect of genotype on 100 seed weight ==================================== # 6.1 Plot 100 Seed weight versus tree class ----------------------------------- bhsw <- ggplot( biomass, aes( x = tree.class, y = X100.seed.weight) ) biomass.hsw <- bhsw + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "weight of 100 seeds (g)", x = "tree class" ) + opts(title = "100 seed biomass") biomass.hsw # 6.2 Genotype vs. 100 seed weight - complete data set ------------------------- # fit the model hsw.full.lme1 <- lme( X100.seed.weight ~ tree.class + year, random = ~1 | treeNumberYear, data = biomass ) hsw.full.lme1 summary(hsw.full.lme1) # examine the intervals of the random effects intervals( hsw.full.lme1, which = "var-cov") # random effect needed # check the model assumptions # assumption 1 # residual box plot plot( hsw.full.lme1, treeNumberYear~resid(.), abline = 0) # OK # standardized residuals by year plot( hsw.full.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # most look homocedastic # standardized residuals by tree class plot( hsw.full.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # most look homocedastic # qq norm plot to check normality qqnorm( hsw.full.lme1, ~resid(.) | year) # looks OK qqnorm( hsw.full.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( hsw.full.lme1, ~ranef(.) , id = 0.10, adj = -1 ) # looks OK # examine the parameters of the final model summary( hsw.full.lme1) anova( hsw.full.lme1) # effect of tree on 100 seed weight, no effect of year # drop year and re-fit the model hsw.full.lme2 <- update( hsw.full.lme1, fixed = ~ tree.class ) # check is if a better fit to the data anova( hsw.full.lme1, hsw.full.lme2) # model with year is better fit but, # logLik values are not valid unless models # are re-fit via ML hsw.full.lme1ML <- update( hsw.full.lme1, method = "ML") hsw.full.lme2ML <- update( hsw.full.lme2, method = "ML") anova( hsw.full.lme1ML, hsw.full.lme2ML) # simpler model no different anova(hsw.full.lme2) # perform Tukey test of differences among levels of tree class in the simpler # model hsw.full.lme2Tukey <- summary( glht( hsw.full.lme2, linfct = mcp( tree.class = "Tukey") ) ) hsw.full.lme2Tukey # signifcant difference between N&D # 6.3 Genotype vs. 100 seed weight - partial data set -------------------------- # create a subset of the biomass data biomassHsw <- subset( biomass, # select all trees that were not sampled twice sampledTwice == "no", select = c("tree.number", "tree.class", "year", "X100.seed.weight") ) # fit the model hsw.lme1 <- lme( X100.seed.weight ~ tree.class + year, random = ~1 | tree.number, data = biomassHsw ) hsw.lme1 summary(hsw.lme1) # examine the intervals of the random effects intervals( hsw.lme1, which = "var-cov") # random effect needed # check the model assumptions # assumption 1 # residual box plot plot( hsw.lme1, tree.number~resid(.), abline = 0) # OK # standardized residuals by year plot( hsw.lme1, resid(., type = "p") ~ fitted(.) | year, abline = 0 ) # most look homocedastic # standardized residuals by tree class plot( hsw.lme1, resid(., type = "p") ~ fitted(.) | tree.class, abline = 0 ) # most look homocedastic # qq norm plot to check normality qqnorm( hsw.lme1, ~resid(.) | year) # looks OK qqnorm( hsw.lme1, ~resid(.) | tree.class) # looks OK # assumption 2 # for 'individuals' qqnorm( hsw.lme1, ~ranef(.) , id = 0.10, adj = -1 ) # looks OK # examine the parameters of the final model summary( hsw.lme1) anova( hsw.lme1) # effect of tree on 100 seed weight, no effect of year # drop tree.class and re-fit the model hsw.lme2 <- update( hsw.lme1, fixed = ~ tree.class ) # check is if a better fit to the data anova( hsw.lme1, hsw.lme2) # model with year is better fit but, # logLik values are not valid unless models # are re-fit via ML hsw.lme1ML <- update( hsw.lme1, method = "ML") hsw.lme2ML <- update( hsw.lme2, method = "ML") anova( hsw.lme1ML, hsw.lme2ML) # simpler model no different anova(hsw.lme2) # perform Tukey test of differences among levels of tree class in the simpler # model hsw.lme2Tukey <- summary( glht( hsw.lme2, linfct = mcp( tree.class = "Tukey") ) ) hsw.lme2Tukey # signifcant difference between D & B and N&D # S1.2 Yields of reproductive structures ======================================= # Libraries -------------------------------------------------------------------- library(ggplot2) # for plotting library(lme4) # for model fitting library(multcomp) # for multiple post-hoc comparisons library(coefplot2) # for plotting model co-efficients (visual test if # different from 0) # Note not on CRAN, can get it here (note dependencies): # install.packages("coefplot2", # repos="http://www.math.mcmaster.ca/bolker/R", # type="source") # Functions -------------------------------------------------------------------- calculateDispersion <- function( model, data) { # follows approach in Bolker's worked examples # on http://glmm.wikidot.com) # residual deviance rdev <- sum( residuals( model)^2) # model degrees of freedom mdf <- length( fixef( model) ) # residual degrees of freedom rdf <- nrow( data) - mdf # calculate dispersion dispersion <- rdev/rdf # do the chi-square test for overdispersion prob.disp <- pchisq( rdev, rdf, lower.tail = FALSE, log.p = TRUE ) results <- list( dispersion = dispersion, dispersionTest = prob.disp) return(results) } merResidualPlot <- function( model) { # calculate some needed values fits <- fitted( model) resids <- residuals( model) lengthFits <- length( fitted( model) ) plot( fits, resids, # do the plot xlab = "fitted values", ylab = "residauls" ) rvec <- seq( 0, max(fits), # vector for loess length = lengthFits) lines( rvec, predict( loess( resids ~ fits), newdata = rvec), col = 2, lwd = 2) abline( h = 0, col = "grey") } # Datasets --------------------------------------------------------------------- biomass <- read.csv( "Biomass.csv") # parse the biomass data to exclude class 'X' biomass <- biomass[ biomass$tree.class %in% c("B", "D", "N"),] # Checking data ================================================================ head( biomass) # OK str( biomass) # age.class and year are recognized as integers, should be factors # set age.class as a ordered factor and year as a factor biomass$age.class <- factor( biomass$age.class, levels = c(1,2,3,4) ) biomass$year <- as.factor( biomass$year) # Some of the trees in this dataset were sampled twice, once in 2009 and again # in 2011. table( biomass$tree.number, biomass$year) # four trees were sampled in both years # following procedure in biomass analysis, analyze all data together, # then do a sub-analysis dropping the double sampled trees. # create a modified tree.number field to seperate 2009 and 2011 samples biomass$treeNumberYear <- paste( biomass$tree.number, "_", biomass$year, sep = "") # 1 Effect of genotype and year on number of capsules ========================== # 1.1 Plot capsule number by tree class ---------------------------------------- cc <- ggplot( biomass, aes( x = tree.class, y = capsule.number) ) plotCountCaps <- cc + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "number of capsules", x = "tree class" ) + labs(title="number of capsules") plotCountCaps # data look to be unbalanced with respect to year, and otherwise # unremarkable # 1.2 Genotype vs. capsule number - full model --------------------------------- # fit a lmList model (lme4 version!) to see if random effect terms are needed # for each individual caps.full.lmlist1 <- lmList( capsule.number ~ 1 | tree.number, biomass, family = poisson) caps.full.lmlist1 # all intercepts are > 0, suggests a random effect term may # be justified # fit the full random effects model, # note model is additive. Not interested in changes of tree class within each # year. Only want to account for year to year variation caps.full.lmer1 <- lmer( capsule.number ~ tree.class + year + (1|treeNumberYear), data = biomass, family = poisson ) # data are counts caps.full.lmer1 # examine the model caps.full.dispersion <- calculateDispersion( caps.full.lmer1, biomass) caps.full.dispersion # dispersion is 0.55 and prob is -5.3e-0.8 # values >4 suggest Poisson is a poor fit. values # approaching 1 are good. This value is acceptable # probability of dispersal is also pretty good. # plot the residuals merResidualPlot(caps.full.lmer1) # looks OK # conditional means of random effects # random effects for each tree # for tree number plot( ranef(caps.full.lmer1), aspect = 1, type = c("g","p"))[[1]] # tree.number are not great, but not bad. qqmath( ranef( caps.full.lmer1, post = TRUE) )$treeNumberYear # structure not bad, and lack of 0 intercept suggests all are needed # examine the model summary( caps.full.lmer1) coefplot2( caps.full.lmer1) # test to see if there is a signifcant effect of year, tree.class caps.full.lmer1NoClass <- lmer( capsule.number ~ year + (1|treeNumberYear), data = biomass, family = poisson ) caps.full.lmer1NoYear <- lmer( capsule.number ~ tree.class + (1|treeNumberYear), data = biomass, family = poisson ) caps.full.lmer1Degen <- lmer( capsule.number ~ (1|treeNumberYear), data = biomass, family = poisson ) anova( caps.full.lmer1Degen, caps.full.lmer1NoYear, caps.full.lmer1NoClass, caps.full.lmer1) # significant effect of class, no effect of year summary( caps.full.lmer1NoYear) # post-hoc test on the model with only 'class' effects caps.full.lmer1NoYearTukey <- summary( glht( caps.full.lmer1NoYear, linfct = mcp( tree.class = "Tukey") ) ) caps.full.lmer1NoYearTukey # 1.3 Genotype vs. capsule number - partial model ------------------------------ # create a subset of the data countCapsules <- subset( biomass, sampledTwice == "no", select = c("tree.number", "tree.class", "year", "capsule.number") ) # drop records with no capsule data countCapsules <- countCapsules[ !is.na( countCapsules$capsule.number), ] # fit a lmList model (lme4 version!) to see if random effect terms are needed # for each individual caps.lmlist1 <- lmList( capsule.number ~ 1 | tree.number, countCapsules, family = poisson) caps.lmlist1 # all intercepts are > 0, suggests a random effect term may # be justified # fit the full random effects model, # note model is additive. Not interested in changes of tree class within each # year. Only want to account for year to year variation caps.lmer1 <- lmer( capsule.number ~ tree.class + year + (1|tree.number), data = countCapsules, family = poisson ) # data are counts caps.lmer1 # examine the model # check for overdispersion (Chi-square method) (caps.dispersion <- calculateDispersion( caps.lmer1, countCapsules)) # 0.66, test statistic is -0.002. Slight under-dispersion but # not bad.values >4 suggest Poisson is a poor fit. values # approaching 1 are good. This value is acceptable # plot the residuals merResidualPlot(caps.lmer1) # looks OK # conditional means of random effects # random effects for each tree plot( ranef(caps.lmer1), aspect = 1, type = c("g","p"))[[1]] # tree.number # are not great, but not bad. qqmath( ranef( caps.lmer1, post = TRUE) )$tree.number # a few extreme intercepts >0, but difficult to approximate with only 3 or 4 # measures per individual # examine the model summary( caps.lmer1) coefplot2( caps.lmer1) # test to see if there is a signifcant effect of year, tree.class caps.lmer1NoClass <- lmer( capsule.number ~ year + (1|tree.number), data = countCapsules, family = poisson ) caps.lmer1NoYear <- lmer( capsule.number ~ tree.class + (1|tree.number), data = countCapsules, family = poisson ) caps.lmer1Degen <- lmer( capsule.number ~ (1|tree.number), data = countCapsules, family = poisson ) anova( caps.lmer1, caps.lmer1NoClass, caps.lmer1NoYear, caps.lmer1Degen ) # significant effect of tree class, not of year # post-hoc test on the model with only 'class' effects caps.lmer1NoYearTukey <- summary( glht( caps.lmer1NoYear, linfct = mcp( tree.class = "Tukey") ) ) # 2 Effect of genotype and year on number of seeds ============================= # 2.1 Plot of capsule number by tree class ------------------------------------- cs <- ggplot( biomass, aes( x = tree.class, y = seeds.per.catkin ) ) plotCountSeeds <- cs + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "number of seeds", x = "tree class" ) + opts(title="number of seeds") plotCountSeeds # 2.2 Genotype vs. number of seeds - full data set ----------------------------- # fit a lmList model (lme4 version!) to see if random effect terms are needed # for each individual seeds.full.lmlist1 <- lmList( seeds.per.catkin ~ 1 | tree.number, biomass, family = poisson) seeds.full.lmlist1 # all intercepts are > 0, suggests a random effect term may # be justified # fit the full random effects model, # Note the model is additive. Not interested in changes of tree class within # year. Only want to account for year to year variation seeds.full.lmer1 <- lmer( seeds.per.catkin ~ tree.class + year + (1|treeNumberYear), data = biomass, family = poisson ) # data are counts seeds.full.lmer1 # examine the model # check for overdispersion (Chi-square method) (seeds.full.lmer1Disp <- calculateDispersion( seeds.full.lmer1, biomass) ) # dispersion is 17.9, test stat is -1376 # Suggests poisson is a bad fit to the data # try fitting the log-poisson model to see if improves the fit biomass$obs <- 1:nrow(biomass) seeds.full.lmer2 <- lmer( seeds.per.catkin ~ tree.class + year + (1|treeNumberYear) + (1|obs), data = biomass, family = poisson ) anova( seeds.full.lmer1, seeds.full.lmer2) # yes, a lot! # check dispersion in the new model (seeds.full.lmer2Disp <- calculateDispersion( seeds.full.lmer2, biomass) ) # dispersion is now 0.08, test stat is -5.39. However this method of examining # dispersion might not be appropirate for the log-poisson model. # continuing, # plot the residuals merResidualPlot( seeds.full.lmer2) # some odd-ness to this structure, slightly cone-shaped # conditional means of random effects # random effects for each tree plot( ranef(seeds.full.lmer2), aspect = 1, type = c("g","p"))[[1]] # tree.number # are not great, but not bad. # these look OK, but not looking for all to intercept 0, is normal for # the distribution being investigated. qqmath( ranef( seeds.full.lmer2, post = TRUE) )$treeNumberYear qqmath( ranef( seeds.full.lmer2, post = TRUE) )$obs # test to see if there is a signifcant effect of year, tree.class seeds.full.lmer2NoClass <- lmer( seeds.per.catkin ~ year + (1|treeNumberYear) + (1|obs), data = biomass, family = poisson ) seeds.full.lmer2NoYear <- lmer( seeds.per.catkin ~ tree.class + (1|treeNumberYear) + (1|obs), data = biomass, family = poisson ) # the degeerate model (random effects only) seeds.full.lmer2Degen <- lmer( seeds.per.catkin ~ (1|treeNumberYear) + (1|obs), data = biomass, family = poisson ) # no effect of any variable. Models basically the same as the degenerate. anova( seeds.full.lmer2, seeds.full.lmer2NoClass, seeds.full.lmer2NoYear, seeds.full.lmer2Degen) # 2.3 Genotype vs. number of seeds - partial data set -------------------------- # create a subset of the data countSeeds <- subset( biomass, # select all trees from 2009, and all trees from # 2011 that were not sampled twice sampledTwice == "no", select = c("tree.number", "tree.class", "year", "seeds.per.catkin") ) # fit a lmList model (lme4 version!) to see if random effect terms are needed # for each individual seeds.lmlist1 <- lmList( seeds.per.catkin ~ 1 | tree.number, countSeeds, family = poisson) seeds.lmlist1 # all intercepts are > 0, suggests a random effect term may # be justified # fit the full random effects model, # Note the model is additive. Not interested in changes of tree class within # year. Only want to account for year to year variation seeds.lmer1 <- lmer( seeds.per.catkin ~ tree.class + year + (1|tree.number), data = countSeeds, family = poisson ) # data are counts seeds.lmer1 # examine the model # check for overdispersion (Chi-square method) (seeds.lmer1Disp <- calculateDispersion( seeds.lmer1, countSeeds) ) # 17.4, test stat is -864. suggest model may be a poor fit # try fitting the log-poisson model to see if improves the fit countSeeds$obs <- 1:nrow(countSeeds) seeds.lmer2 <- lmer( seeds.per.catkin ~ tree.class + year + (1|tree.number) + (1|obs), data = countSeeds, family = poisson ) anova( seeds.lmer1, seeds.lmer2) # yes, a lot! # check dispersion in the new model (seeds.lmer2Disp <- calculateDispersion( seeds.lmer2, countSeeds) ) # 0.08, test stat is -7.51e-45, now underdispered. Underdispersed means model # estimates will be conservative (SE's smaller than they should be. # these alternate versions of the model do improve the fit but dont' deal well # with overdispersion. Will use log poission model just have to deal # with reduced estimates. Less of an issue here as not attmepting prediction # just seeing if there is an efffect. # working with seeds.lmer2: # test to see if there is a signifcant effect of year, tree.class seeds.lmer2NoClass <- lmer( seeds.per.catkin ~ year + (1|tree.number) + (1|obs), data = countSeeds, family = poisson ) seeds.lmer2NoYear <- lmer( seeds.per.catkin ~ tree.class + (1|tree.number) + (1|obs), data = countSeeds, family = poisson ) # the degeerate model (random effects only) seeds.lmer2Degen <- lmer( seeds.per.catkin ~ (1|tree.number) + (1|obs), data = countSeeds, family = poisson ) # compare the drop-term models to the additive and interaction model anova( seeds.lmer2, seeds.lmer2NoClass, seeds.lmer2NoYear, seeds.lmer2Degen) # no effect of either term # this analysis is weird, no significant effect of either factor, unless you # fit an interaction term, but then only slightly better. Data seems to suggest # a trend towards difference in seed number in 2011, but not in 2009. # I'm guessing the difference in the trend among years is driving the # but it's not powerfull enough to be meaningfull. Suggests potential # for lots of variation among years? # ============================================================================== # 3 Effect of genotype and year on number of seeds per capsule ================= # 3.1 Plot of capsule number by tree class ------------------------------------- spy <- ggplot( biomass, aes( x = tree.class, y = seeds.per.capsule ) ) plotCountSeedsPerCap <- spy + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "number of seeds per capsule", x = "tree class" ) + labs(title="number of seeds per capsule") plotCountSeedsPerCap # 3.2 Genotype vs. seeds per capsule - full data set -------------------------- # fit a lmList model to see if random effects are neccesary ( seedsPerCap.full.lmlist1 <- lmList( seeds.per.catkin ~ 1 | tree.number, biomass, family = poisson, offset = log(capsule.number) ) ) # all intercepts are > 0, suggests a random effect term may be justified # fit the full model seedsPer.full.lmer1 <- lmer( seeds.per.catkin ~ tree.class + year + (1|treeNumberYear), data = biomass, family = poisson, # data are counts offset = log(capsule.number) ) seedsPer.full.lmer1 # examine the model # check for overdispersion (Chi-square method) (seedsPer.full.lmer1Disp <- calculateDispersion( seedsPer.full.lmer1, biomass) ) # 11.9 and test stat of -830 suggest model may be a poor fit # fit a log-poisson model and compare. biomass$obs <- 1:nrow(biomass) seedsPer.full.lmer2 <- lmer( seeds.per.catkin ~ tree.class + year + (1|treeNumberYear) + (1|obs), data = biomass, family = poisson, # data are counts offset = log(capsule.number) ) anova( seedsPer.full.lmer1, seedsPer.full.lmer2) # yes, a lot! (seedsPer.full.lmer2Disp <- calculateDispersion( seedsPer.full.lmer2, biomass) ) # 0.09 and test stat of -3.5*10-83 suggest model # may be a poor fit. Similar situation as with the seeds data # results should be good for estimates of model effects, # not so good for prediction. # plot the residuals of the model # plot the residuals merResidualPlot( seedsPer.full.lmer2) # looks OK # conditional means of random effects # random effects for each tree # tree.number plot( ranef(seedsPer.full.lmer2), aspect = 1, type = c("g","p"))[[1]] # are not great, but not bad. # these look OK, but not looking for all to intercept 0, is normal for # the distribution being investigated. qqmath( ranef( seedsPer.full.lmer2, post = TRUE) )$treeNumberYear qqmath( ranef( seedsPer.full.lmer2, post = TRUE) )$obs # examine the model summary( seedsPer.full.lmer2) coefplot2( seedsPer.full.lmer2) # test to see if there is a signifcant effect of year, tree.class seedsPerNoYear.lmer2NoClass <- lmer( seeds.per.catkin ~ year + (1|treeNumberYear) + (1|obs), data = biomass, family = poisson, # data are counts offset = log(capsule.number) ) seedsPerNoYear.lmer2NoYear <- lmer( seeds.per.catkin ~ tree.class + (1|treeNumberYear) + (1|obs), data = biomass, family = poisson, # data are counts offset = log(capsule.number) ) seedsPerNoYear.Degen <- lmer( seeds.per.catkin ~ (1|treeNumberYear) + (1|obs), data = biomass, family = poisson, # data are counts offset = log(capsule.number) ) anova( seedsPer.full.lmer2, seedsPerNoYear.lmer2NoClass, seedsPerNoYear.lmer2NoYear, seedsPerNoYear.Degen ) # model with both paramaters best, effect of 'year' is weak to non-significant. # regardless set up the Tukey test for an additive model # extract the contrast matricies seedsPer.full.K1 <- glht( seedsPer.full.lmer2, mcp( year = "Tukey"))$linfct seedsPer.full.K2 <- glht( seedsPer.full.lmer2, mcp( tree.class = "Tukey") )$linfct seedsPer.full.K <- rbind( seedsPer.full.K1, seedsPer.full.K2) (seedsPer.fullTukey <- summary( glht( seedsPer.full.lmer2, linfct = seedsPer.full.K) ) ) # 3.3 Genotype vs. seeds per capsule - - partial data set -------------------- # create a subset of the data countSeedsPerCap <- subset( biomass, # select all trees from 2009, and all trees from # 2011 that were not sampled twice sampledTwice == "no", select = c("tree.number", "tree.class", "year", "capsule.number", "seeds.per.catkin", "seeds.per.capsule") ) # remove records with no data countSeedsPerCap <- countSeedsPerCap[ !is.na(countSeedsPerCap$capsule.number), ] # fit a lmList model to see if random effects are neccesary seedsPerCap.lmlist1 <- lmList( seeds.per.catkin ~ 1 | tree.number, countSeedsPerCap, family = poisson, offset = log(capsule.number) ) seedsPerCap.lmlist1 # all intercepts are > 0, suggests a random effect term may # be justified seedsPer.lmer1 <- lmer( seeds.per.catkin ~ tree.class + year + (1|tree.number), data = countSeedsPerCap, family = poisson, # data are counts offset = log(capsule.number) ) seedsPer.lmer1 # examine the model # check for overdispersion (Chi-square method) (seedsPer.lmer1Disp <- calculateDispersion( seedsPer.lmer1, countSeedsPerCap) ) # 12.4 and a test stat of -524 suggest model may be a poor fit # try fitting the log-poisson model to see if improves the fit countSeedsPerCap$obs <- 1:nrow(countSeedsPerCap) seedsPer.lmer2 <- lmer( seeds.per.catkin ~ tree.class + year + (1|tree.number) + (1|obs), data = countSeedsPerCap, family = poisson, # data are counts offset = log(capsule.number) ) anova( seedsPer.lmer1, seedsPer.lmer2) # yes, a lot! # check dispersion: (seedsPer.lmer2Disp <- calculateDispersion( seedsPer.lmer2, countSeedsPerCap) ) # 0.10 and a test stat of -1.86e-35. suggest underdispersion # similar to seeds data and results of full data set. will # proceded as before. # plot the residuals merResidualPlot( seedsPer.lmer2) # slight cone-shape to the residuals but not much more that can be done # conditional means of random effects # for each observation qqmath( ranef( seedsPer.lmer2, post = TRUE) )$obs # could be better, but most # intersect 0 # random effects for each tree, look OK. Not expecting all to intecept 0 qqmath( ranef( seedsPer.lmer2, post = TRUE) )$tree.number # examine the model summary( seedsPer.lmer2) coefplot2( seedsPer.lmer2) # test to see if there is a signifcant effect of year, tree.class seedsPerNoYear.lmer2 <- lmer( seeds.per.catkin ~ tree.class + (1|tree.number) + (1|obs), data = countSeedsPerCap, family = poisson, offset = log(capsule.number) ) seedsPerNoClass.lmer2 <- lmer( seeds.per.catkin ~ year + (1|tree.number) + (1|obs), data = countSeedsPerCap, family = poisson, # data are counts offset = log(capsule.number) ) seedsPerDegen.lmer2 <- lmer( seeds.per.catkin ~ (1|tree.number) + (1|obs), data = countSeedsPerCap, family = poisson, # data are counts offset = log(capsule.number) ) anova( seedsPer.lmer2, seedsPerNoYear.lmer2, seedsPerNoClass.lmer2, seedsPerDegen.lmer2) # both terms are significant # set up the Tukey test for an additive model # extract the contrast matricies seedsPer.K1 <- glht( seedsPer.lmer2, mcp( year = "Tukey"))$linfct seedsPer.K2 <- glht( seedsPer.lmer2, mcp( tree.class = "Tukey") )$linfct seedsPer.K <- rbind( seedsPer.K1, seedsPer.K2) (seedsPerTukey <- summary( glht( seedsPer.lmer2, linfct = seedsPer.K) ) ) # more seeds/capsule in 2009 than 2011; B > X, N > D , D > X # S2.0 Analysis of Poplar Germination data ===================================== # Libraries -------------------------------------------------------------------- require(ggplot2) # for plotting require(MASS) require(plyr) require(setRNG) require(multcomp) # Datasets --------------------------------------------------------------------- germ <- read.csv( "Germination.csv") # parse the biomass data to exclude class 'X' germ <- germ[ germ$tree.class %in% c("B", "D", "N"),] germ$tree.class <- droplevels(germ$tree.class) # Checking data ================================================================ head( germ) str( germ) # set 'year' as a factor and 'age.class' as an ordered factor germ$year <- factor( germ$year) germ$age.class <- factor( germ$age.class, levels = c(1,2,3,4) ) # centre total seed weight (in milligrams) before procedeing germ$tswCentre <- scale(germ$tsw * 1000, scale = F) germ$tswCentre <- as.numeric( germ$tswCentre) # some trees were sampled in all years, some only once or twice: table( germ$tree.number, germ$year, germ$tree.class) # also look at the distribution of tree.classes sampled within years: table( germ$tree.class, germ$year) # Data explanation ------------------------------------------------------------- # These data are problematic because the number of observations are not # balanced among years and tree classes. Therefore can not test (reliably) the # combination of year and tree.class. Second problem is that some of the samples # in 2010 and 2011 were re-samples of trees sampled in 2009 and 2010, therefore # to treat these independantly may not be appropriate. Third problem is that # class D is more abundant in the data than the other trees. # Solutions # To deal with the unbalanced replication will analyze all tree classes for 2009 # only (since is a single factor analysis unbalance is not an issue). Then will # analyze year and tree class for B, D and N trees only. To address the multiple # sample issues we will re-balance the data by randomly selecting one year of # data to include in the analysis (while attempting to balance the number of # replicates of each tree class across years). # 2.0 Generate data set for analyses 3.0 & 4.0 ================================= # For this exeriment multiple samples were taken from some trees over more than # one year. To keep things balanced will be haphazardly select records from the # full set so that no tree is represented more than once in the analysis # dataset. Tree class 'X' was not sampled enough across multiple years to permit # analysis, these records will not be included. #select all records from trees sampled just once germOnce <- subset( germ, timesSampled == 1) # grab all 2011 N and D trees # explanation: fewer trees in these classes were sampled than in previous years, # would like these all to be retained in the data set so can test year over year # effects germ2011 <- subset( germ, timesSampled > 1 & tree.class != "B" & year == "2011" ) # bind the two subsets germBalance <- rbind( germOnce, germ2011) # randomly select from the remaining records: # first create a vector of unselected trees: trees <- germ$tree.number[ !germ$tree.number %in% germBalance$tree.number] # set conditions for reproducible selection oldSeed <- .Random.seed setRNG( kind="Wichmann-Hill", seed=c(500,1389,1648) ) # reproducible germMultiple <- ddply( subset( germ, tree.number %in% trees), .(tree.number), function(x){ x[ sample( nrow(x), size = 1),] } ) assign(".Random.seed", oldSeed, envir=globalenv()) # re-set the global seed # to avoid messing with other functions # merge, creating the final dataset germBalance <- rbind( germBalance, germMultiple) # examine the distribution of samples among years for the balanced dataset table( germBalance$tree.class, germBalance$year) # 3.0 Germination (all years) ================================================= # 3.1 Plot the data ------------------------------------------------------------ gmbal <- ggplot( germBalance, aes( x = tree.class, y = total.germinated/total.seed) ) plotBalanceGerm <- gmbal + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "total germinated/ seeds tested", x = "tree class" ) + labs(title="all years") plotBalanceGerm # relationship between thousand seed weight (in milligrams) and # proportion germinated gmbalsw <- ggplot( germBalance, aes( x = tsw * 1000, # convert grams to milligrams y = total.germinated/total.seed) ) plotTswVsGermAll <- gmbalsw + geom_point( aes( colour = tree.class, shape = year) ) + stat_smooth( method = lm, aes( colour = tree.class) ) + labs( x = "thousand seed weight (mg)", y = "proportion germinated") + labs( title = "all years") plotTswVsGermAll #3.2 Analysis ------------------------------------------------------------------ # fit the model germ.glm1 <- glm( total.germinated ~ tswCentre + tree.class + year, data = germBalance, family = poisson, offset = log( total.seed) ) germ.glm1 summary(germ.glm1) # inspect the model (visually) par( mfrow = c(2,2) ) plot( germ.glm1) # looks OK # check dispersion ##Chi-square to check overdispersion (c-hat) germ.glm1.chisq <- sum( ( ( germBalance$total.germinated - germ.glm1$fitted)^2)/ germ.glm1$fitted ) germ.glm1.c_hat <- germ.glm1.chisq/germ.glm1$df.residual # = 20 (not good) # suggests signficant overdispersion, try fitting a negative binomial germ.glmnb1 <- glm.nb( total.germinated ~ tswCentre + tree.class + year + offset( log(total.seed) ), data = germBalance ) germ.glmnb1 summary( germ.glmnb1) # inspect the model (visually) par( mfrow = c(2,2) ) plot( germ.glmnb1) # looks very good. # examine the results # see if a simpler model is suggested germ.glmnb1Step <- stepAIC(germ.glmnb1) germ.glmnb1Step$anova # suggests dropping year dropterm( germ.glmnb1Step, test = "Chisq") # supports # drop year and compare: germ.glmnb2 <- glm.nb( total.germinated ~ tswCentre + tree.class + offset( log(total.seed) ), data = germBalance ) anova( germ.glmnb2, germ.glmnb1) # model with year not significantly different # from model without year. anova( germ.glmnb2) # do Tukey comparison amoung tree.class germ.glmnb2Tukey <- summary( glht( germ.glmnb2, linfct = mcp( tree.class = "Tukey") ) ) germ.glmnb2Tukey # 4.0 Abnormal germination (all years) ========================================= # 4.1 Plot the data ------------------------------------------------------------ abbal <- ggplot( germBalance, aes( x = tree.class, y = total.abnormal/total.seed) ) plotAbnormalGerm <- abbal + geom_boxplot( outlier.size = 0.5, aes( fill = year) ) + labs( y = "total abnormal germinates / seeds tested", x = "tree class" ) + opts(title="all years") plotAbnormalGerm # relationship between thousand seed weight (in milligrams) and # proportion germinated abbalsw <- ggplot( germBalance, aes( x = tsw * 1000, # convert grams to milligrams y = total.abnormal/total.seed) ) plotTswVsAbnormalAll <- abbalsw + geom_point( aes( colour = tree.class, shape = year) ) + stat_smooth( method = lm, aes( colour = tree.class) ) + labs( x = "thousand seed weight (mg)", y = "proportion germinated") + opts( title = "all years") plotTswVsAbnormalAll # 4.2 analysis ----------------------------------------------------------------- abnorm.glm1 <- glm( total.abnormal ~ tswCentre + tree.class + year, data = germBalance, family = poisson, offset = log( total.seed) ) abnorm.glm1 summary(abnorm.glm1) # inspect the model (visually) par( mfrow = c(2,2) ) plot( abnorm.glm1) # looks OK ##Chi-square to check overdispersion (c-hat) abnorm.glm1.chisq <- sum( ( ( germBalance$total.abnormal - abnorm.glm1$fitted)^2)/ abnorm.glm1$fitted ) abnorm.glm1.c_hat <- abnorm.glm1.chisq/abnorm.glm1$df.residual # = 14.7 # suggests signficant overdispersion, try fitting a negative binomial abnorm.glmnb1 <- glm.nb( total.abnormal ~ tswCentre + tree.class + year + offset( log(total.seed) ), data = germBalance ) abnorm.glmnb1 summary( abnorm.glmnb1) # see if a simpler model is suggested abnorm.glmnb1Step <- stepAIC(abnorm.glmnb1) abnorm.glmnb1Step$anova # suggests dropping year dropterm( abnorm.glmnb1Step, test = "Chisq") # supports # drop year and compare: abnorm.glmnb2 <- glm.nb( total.abnormal ~ tswCentre + tree.class + offset( log(total.seed) ), data = germBalance ) anova( abnorm.glmnb2, abnorm.glmnb1) # model with year not significantly # different from model without year. anova( abnorm.glmnb2) # do Tukey comparison amoung tree.class abnorm.glmnb2Tukey <- summary( glht( abnorm.glmnb2, linfct = mcp( tree.class = "Tukey") ) ) # S3.0 Analysis of Fungal Innoculation data ==================================== # Libraries -------------------------------------------------------------------- require(ggplot2) # for plotting require(lme4) # for model fitting require(multcomp) # for multiple post-hoc comparisons library(coefplot2) # for plotting model co-efficients (visual test if # different from 0) # Note not on CRAN, can get it here (note dependencies): # install.packages("coefplot2", # repos="http://www.math.mcmaster.ca/bolker/R", # type="source") # Datasets --------------------------------------------------------------------- innoc <- read.csv( "Inoculations.csv") # parse the innoc data to exclude class 'X' innoc <- innoc[ innoc$tree.class %in% c("B", "D", "N"),] innoc$tree.class <- droplevels(innoc$tree.class) # Checking data ================================================================ head( innoc) str( innoc) # looks OK # 1.0 Genotype vs. number of MLP uridia ---------------------------------------- # create a subset of the data mlp <- subset( innoc, fungal.sp == "MLP", c("tree.number", "tree.class", "number.uredia", "surface.area", "uredia.cm2" ) ) # 1.1 Plot the data ------------------------------------------------------------ mlpGgplotBox <- ggplot( mlp, aes( x = tree.class, y = uredia.cm2)) mlpPlot <- mlpGgplotBox + geom_boxplot( outlier.size=0.5 ) + labs( y = " number of fungal uredia per square cm", x = "tree class") + labs( title = "MLP uredia") mlpPlot # a very odd outlier in class N mlp[ mlp$uredia.cm2 > 10, ] # apppears to be either an abnormally high number # of uredia (20), or an abnormally small leaf # (1.09 cm2) # check leaf areas hist( innoc$surface.area) # is one abnormally small leaf, rest are normally # distributed # The error is likely one in coding, the leaf area should be 10.9 cm2, not 1.09 # so to fix: mlp$surface.area[ mlp$surface.area == 1.09] <- 10.9 # and re-calculate all uredia.cm2 values (uredia per square cm) mlp <- transform( mlp, uredia.cm2 = number.uredia / surface.area) # re-plot mlpPlot mlpGgplotBox2 <- ggplot( mlp, aes( x = tree.class, y = number.uredia)) mlpPlot2 <- mlpGgplotBox2 + geom_boxplot( outlier.size=0.5 ) + labs( y = " number of fungal uredia", x = "tree class") + labs( title = "MLP uredia") mlpPlot2 # 1.2 Analysis ----------------------------------------------------------------- # fit the full model # response is the number of fungal uredia as a function of genotype. Each tree # is fit as a random effect. The size of the sampled leaf is used as a offset # (allows us to fit the actual number of uredia rather than being forced to use # number per unit surface area) mlp.lmer1 <- lmer( number.uredia ~ tree.class + (1|tree.number), data = mlp, family = poisson, offset = log(surface.area) ) mlp.lmer1 # examine the model # check for overdispersion (Chi-square method) # (follows approach on Bolker's worked examples on http://glmm.wikidot.com) (mlp.lmer1.dispersion <- calculateDispersion( mlp.lmer1, mlp ) ) # 5.39, test value is -82.555; values >4 suggest Poisson is a poor fit. # This suggests a poisson model is a poor fit to the data. # try re-fitting with a per-observation random effect (a # log-poisson distribution) mlp$obs <- 1:nrow(mlp) # create a row of obsevation numbers # re-fit the model with a random observation effect mlp.lmer2 <- lmer( number.uredia ~ tree.class + (1|tree.number) + (1|obs), data = mlp, family = poisson, offset = log(surface.area) ) mlp.lmer2 # check for overdispersion ( as before) ( mlp.lmer2.dispersion <- calculateDispersion(mlp.lmer2, mlp) ) # returns values of 0.11 for dispersion, -2.43e-18 for test # model now likely underdispersed #suggests mlp.lmer2 is it a better fit. Can adjust standard errors by # multiplying by the sqrt of 0.11 or: sqrt( mlp.lmer2.dispersion$dispersion ) # or acknowedlge that SE's on parameter estimates are 0.33 (1/3rd) too big. # explicitly test if log poisson is a better model: anova( mlp.lmer1, mlp.lmer2) # yes # insepct the model # plot the residuals merResidualPlot(mlp.lmer2) # some 0 inflation in the data # examination of random effects # conditional means of random effects # for each observation plot( ranef(mlp.lmer2), aspect = 1, type = c("g","p"))[[1]] # obs number qqmath( ranef( mlp.lmer2, post = TRUE) )$obs # all but one intercepts 0 # random effects for each tree plot( ranef(mlp.lmer2), aspect = 1, type = c("g","p"))[[2]] # tree.number # are not great, but not bad. qqmath( ranef( mlp.lmer2, post = TRUE) )$tree.number # all but one intercepts # 0, but difficult to approximate with only 3 or 4 measures # per individual # both these plots look OK. Data do suffer from some 0 inflation and # underdispersion, but not much more can be done without opting for rather # exotic methods. Suggests that findings be interpreted with caution. # examine the model summary( mlp.lmer2) # note that even with adjusting standard errors that # tree.classD would still be significant from the other # tree classes. Strong effect here suggests pattern is # robust coefplot2( mlp.lmer2) # now test to see if 'genotype' has an effect. # this can be done by liklihood ratio tests, by re-fitting the model without # the fixed effect and comparing to the full model # then comparing it (via anova) to the full model mlp.lmer2NoClass <- lmer( number.uredia ~ (1|tree.number) + (1|obs), data = mlp, family = poisson, offset = log(surface.area) ) anova( mlp.lmer2, mlp.lmer2NoClass) # yes significant effect of tree.class # post-hoc test on the significant model mlp.lmer2Tukey <- summary( glht( mlp.lmer2, linfct = mcp( tree.class = "Tukey") ) ) mlp.lmer2Tukey # 2.0 Genotype vs. number of MMD uredia ======================================== # create a subset of the data mmd <- subset( innoc, fungal.sp == "MMD", c("tree.number", "tree.class", "number.uredia", "surface.area", "uredia.cm2" ) ) # 2.1 Plot the data ------------------------------------------------------------ mmdGgplotBox <- ggplot( mmd, aes( x = tree.class, y = number.uredia)) mmdPlot <- mmdGgplotBox + geom_boxplot( outlier.size=0.5 ) + labs( y = " number of uredia ", x = "tree class") + labs( title = "MMD uredia") mmdPlot # 2.2 Analysis ----------------------------------------------------------------- # fit the full model mmd.lmer1 <- lmer( number.uredia ~ tree.class + (1|tree.number), data = mmd, family = poisson, offset = log(surface.area) ) mmd.lmer1 # examine the full model # check for overdispersion (Chi-square method) ( mmd.lmer1.dispersion <- calculateDispersion(mmd.lmer1, mmd) ) # 6.011 & test stat = -110; values >4 suggest Poisson is a poor fit. # re-fit with a per-observation random effect (a # log-poisson distribution) mmd$obs <- 1:nrow(mmd) # create a row of obsevation numbers # re-fit the model with a random observation effect mmd.lmer2 <- lmer( number.uredia ~ tree.class + (1|tree.number) + (1|obs), data = mmd, family = poisson, offset = log(surface.area) ) mmd.lmer2 # check for overdispersion (Chi-square method) ( mmd.lmer2.dispersion <- calculateDispersion(mmd.lmer2, mmd) ) # 0.22 and test stat of -8.001, ( >5 is suggested) #suggests mmd.lmer2 is it a better fit. Can adjust standard errors by # multiplying by the sqrt of 0.22 or: sqrt( mmd.lmer2.dispersion$dispersion ) # or acknowedlge that SE's on parameter estimates are 0.47 (50%) too big. # explicitly test if log poisson is a better model: anova( mmd.lmer1, mmd.lmer2) # yes # inspect the model # plot the residuals merResidualPlot(mmd.lmer2) # some 0 inflation in the data # examination of random effects # conditional means of random effects # for each observation plot( ranef(mmd.lmer2), aspect = 1, type = c("g","p"))[[1]] # obs number qqmath( ranef( mmd.lmer2, post = TRUE) )$obs # could be better, not all # intersect 0 # random effects for each tree plot( ranef(mmd.lmer2), aspect = 1, type = c("g","p"))[[2]] # all are centred # and intersect 0 qqmath( ranef( mmd.lmer2, post = TRUE) )$tree.number # all intercept 0 # examine the model summary( mmd.lmer2) # note that even with adjusting standard errors that # tree.classD would still be significant from the other # tree classes. Strong effect here suggests pattern is # robust coefplot2( mmd.lmer2) # now test to see if 'genotype' has an effect. mmd.lmer2NoClass <- lmer( number.uredia ~ (1|tree.number) + (1|obs), data = mmd, family = poisson, offset = log(surface.area) ) anova( mmd.lmer2, mmd.lmer2NoClass) # yes significant effect of tree.class # post-hoc test on the significant model mmd.lmer2Tukey <- summary( glht( mmd.lmer2, linfct = mcp( tree.class = "Tukey") ) ) mmd.lmer2Tukey # 3.0 Genotype vs. number of MO ureida ----------------------------------------- # create a subset of the data mo <- subset( innoc, fungal.sp == "MO", c("tree.number", "tree.class", "number.uredia", "surface.area", "uredia.cm2" ) ) # 3.1 Plot the data ------------------------------------------------------------ moGgplotBox <- ggplot( mo, aes( x = tree.class, y = number.uredia)) moPlot <- moGgplotBox + geom_boxplot( outlier.size=0.5 ) + labs( y = " number of uredia ", x = "tree class") + opts( title = "MO uredia") moPlot # 3.2 Analysis ----------------------------------------------------------------- # fit the full model mo.lmer1 <- lmer( number.uredia ~ tree.class + (1|tree.number), data = mo, family = poisson, offset = log(surface.area) ) mo.lmer1 # check for overdispersion (Chi-square method) (mo.lmer1.dispersion <- calculateDispersion(mo.lmer1, mo) ) # 3.32, test stat of -46; values >4 suggest Poisson is a poor fit. # proceede with poisson model. # inspect the model # plot the residuals merResidualPlot(mo.lmer1) # looks good # examination of random effects # random effects for each tree plot( ranef(mo.lmer1), aspect = 1, type = c("g","p"))[[1]] # most are centred # but not great qqmath( ranef( mo.lmer1, post = TRUE) )$tree.number # as above, some # not intersecting 0 # model looks like an OK fit to the data. # we will proceede with examining the model, this analysis should be interpeted # with caution. # examine the model summary( mo.lmer1) coefplot2( mo.lmer1) # now test to see if 'genotype' has an effect. mo.lmer1NoClass <- lmer( number.uredia ~ (1|tree.number), data = mo, family = poisson, offset = log(surface.area) ) anova( mo.lmer1, mo.lmer1NoClass) # yes significant effect of tree.class # should be interpreted with caution # because of the poor fit of the model # post-hoc test on the significant model mo.lmer1Tukey <- summary( glht( mo.lmer1, linfct = mcp( tree.class = "Tukey") ) ) mo.lmer1Tukey # S4.0 Analysis of Poplar tree characteristics ================================= library(ggplot2) library(reshape) require(MASS) require(multcomp) # Datasets --------------------------------------------------------------------- trees <- read.csv( "Stand_TreeData.csv") # parse the data to exclude class 'X' trees <- trees[ trees$tree.class %in% c("B", "D", "N"),] trees$tree.class <- droplevels(trees$tree.class) # 1.1 Plot the data ============================================================ bpAge <- ggplot( trees, aes( x = tree.class, y = ave.tree.age ) ) + geom_boxplot( outlier.size = 0.5) + labs( y = "average tree age (years)", x = "tree class", title = "tree age" ) bpAge bpDbh <- ggplot( trees, aes( x = tree.class, y = tree.DBH ) ) + geom_boxplot( outlier.size = 0.5) + labs( y = "dbh (mm)", x = "tree class", title = "tree dbh" ) bpDbh bpHeight <- ggplot( trees, aes( x = tree.class, y = tree.height ) ) + geom_boxplot( outlier.size = 0.5) + labs( y = "height (m)", x = "tree class", title = "tree height" ) bpHeight # 2.0 Analysis ================================================================= # 2.1 Tree age ----------------------------------------------------------------- # fit the model age.glm1 <- glm( ave.tree.age ~ tree.class, data = trees ) summary(age.glm1) # inspect the model (visually) par( mfrow = c(2,2) ) plot( age.glm1) # looks OK # examine the results # see if a simpler model is suggested age.glm1Step <- stepAIC(age.glm1) age.glm1Step$anova # suggests no drops dropterm( age.glm1Step, test = "Chisq") # supports anova( age.glm1, test = "F") # do Tukey comparison among tree.class age.glm1Tukey <- summary( glht( age.glm1, linfct = mcp( tree.class = "Tukey") ) ) age.glm1Tukey # 2.2 Tree DBH ----------------------------------------------------------------- # fit the model dbh.glm1 <- glm( tree.DBH ~ tree.class, data = trees ) summary(dbh.glm1) # inspect the model (visually) par( mfrow = c(2,2) ) plot( dbh.glm1) # looks OK # examine the results # see if a simpler model is suggested dbh.glm1Step <- stepAIC(dbh.glm1) age.glm1Step$anova # suggests no drops dropterm( dbh.glm1Step, test = "Chisq") # supports anova( age.glm1, test = "F") # do Tukey comparison among tree.class dbh.glm1Tukey <- summary( glht( dbh.glm1, linfct = mcp( tree.class = "Tukey") ) ) dbh.glm1Tukey # 2.3 Tree height -------------------------------------------------------------- # fit the model height.glm1 <- glm( tree.height ~ tree.class, data = trees ) summary(height.glm1) # inspect the model (visually) par( mfrow = c(2,2) ) plot( height.glm1) # looks OK # examine the results # see if a simpler model is suggested height.glm1Step <- stepAIC(height.glm1) height.glm1Step$anova # suggests no drops dropterm( height.glm1Step, test = "Chisq") # supports anova( height.glm1, test = "F") # do Tukey comparison among tree.class height.glm1Tukey <- summary( glht( height.glm1, linfct = mcp( tree.class = "Tukey") ) ) height.glm1Tukey # eof ==========================================================================