### Title: Senescence in duckweed: age-related declines in survival, reproduction, and offspring quality ### Authors: Patrick Barks, Robert Laird ### Journal: Functional Ecology ### R Scripts for analysis of data from Phase 1 #### 1. Load relevant packages library(reshape) library(geepack) library(gee) #### 2. Read data file dat1 <- read.csv("BarksAndLaird_RawData_Phase1.csv") #### 3. Extract subset containing reproduction data only (number of offspring produced each day by each frond) rawReproData1 <- subset(dat1, select=Oct.06.2012:Nov.15.2012) #### 4. Determine dates of last reproduction # get column name (i.e. date) associated with last reproduction dat1$date.last.repro <- apply(rawReproData1, 1, function (x) names(rawReproData1)[max(which(x > 0))]) #### 5. Convert date values to R's date format dat1$date.birth <- as.Date(dat1$date.birth, format="%b.%d.%Y") dat1$date.last.repro <- as.Date(dat1$date.last.repro, format="%b.%d.%Y") #### 6. Calculate reproductive lifespan dat1$lifespan <- as.numeric(dat1$date.last.repro - dat1$date.birth + 1) #### 7. Align reproduction data relative to dates of birth # i.e. so that column 1 is always the first day of a frond's life alignedReproData1 <- matrix(NA, nrow=nrow(rawReproData1), ncol=max(dat1$lifespan)) birth.col <- as.numeric(dat1$date.birth - as.Date("2012-10-06") + 1) # column index for date of birth death.col <- as.numeric(dat1$date.last.repro - as.Date("2012-10-06") + 1) # column index for date of death # loop through rows to align reproduction data for (i in 1:nrow(rawReproData1)) { alignedReproData1[i,1:dat1$lifespan[i]] <- as.numeric(rawReproData1[i,(birth.col[i]:death.col[i])]) } #### 8. Transform aligned reproduction data into binomial format # i.e. replace values > 1 with 1 alignedReproData1Binom <- apply(alignedReproData1, 2, function (x) replace(x, which(x > 1), 1)) #### 9. Create life table n <- nrow(dat1) # total starting sample size maxAge <- max(dat1$lifespan) # max lifespan lifeTab <- data.frame(matrix(nrow=maxAge, ncol=6)) names(lifeTab) <- c("age", "n.last.repro", "n.surv.cum", "p.surv.cum", "n.repro", "p.repro") lifeTab$age <- 1:maxAge # ages lifeTab$n.last.repro <- tabulate(dat1$lifespan, nbins=maxAge) # number of deaths at each age lifeTab$n.surv.cum <- c(n, n - cumsum(lifeTab$n.last.repro[1:(maxAge-1)])) # total cumulative survivorship at age lifeTab$p.surv.cum <- lifeTab$n.surv.cum / n # proportional cumulative survivorship at age lifeTab$n.repro <- colSums(alignedReproData1Binom, na.rm=TRUE) # number reproducing at each age lifeTab$p.repro <- lifeTab$n.repro / lifeTab$n.surv.cum # proportion reproducing at age #### 10. Fit survival models t <- dat1$lifespan # times to death n <- length(t) # number of data points used to fit models c <- list(fnscale=-1) # control for optim function to get max log-likelihood instead of min # log-likelihood functions for the 4 survival models # described in Pletcher et al. 2000 and Sherratt et al. 2010 loglik.exp <- function(par) sum(log(par[1] * exp(-par[1]*t))) loglik.weib <- function(par) sum(log(par[1]^par[2] * par[2]*t^(par[2]-1) * exp(-(par[1]*t)^par[2]))) loglik.gomp <- function(par) sum(log(par[1]*exp(par[2]*t - (par[1]/par[2]) * (exp(par[2]*t) - 1) ))) loglik.log <- function(par) sum(log(par[1] * exp(par[2]*t) * (1 + (par[1]*par[3]/par[2]) * (exp(par[2]*t) - 1) )^(-(par[3]+1)/par[3]))) # maximize log-likelihood functions for each of the 4 survival models # use 'Brent' method for exponential b/c default 'Nelder-Mead' method not suitable for single param optimization ml.fit.exp <- optim(c(0.1), loglik.exp, control=c, method="Brent", lower=0, upper=1) ml.fit.weib <- optim(c(0.1, 0.1), loglik.weib, control=c) ml.fit.gomp <- optim(c(0.001, 0.1), loglik.gomp, control=c) ml.fit.log <- optim(c(0.00001, 0.5, 2), loglik.log, control=c) # extract parameter estimates exp.a <- ml.fit.exp$par[1] weib.a <- ml.fit.weib$par[1]; weib.b <- ml.fit.weib$par[2] gomp.a <- ml.fit.gomp$par[1]; gomp.b <- ml.fit.gomp$par[2] log.a <- ml.fit.log$par[1]; log.b <- ml.fit.log$par[2]; log.c <- ml.fit.log$par[3] # specify best-fit curves exp.surv.curve <- function(t) exp(-exp.a*t) weib.surv.curve <- function(t) exp(-(weib.a*t)^weib.b) gomp.surv.curve <- function(t) exp(-(gomp.a/gomp.b)*(exp(gomp.b*t)-1)) log.surv.curve <- function(t) (1+log.c*log.a/log.b*(exp(log.b*t)-1))^(-1/log.c) # visually assess best-fit curves plot(p.surv.cum ~ age, data=lifeTab, log="y") curve(exp.surv.curve, add=T, lwd=2, col=1) curve(weib.surv.curve, add=T, lwd=2, col=2) curve(gomp.surv.curve, add=T, lwd=2, col=3) curve(log.surv.curve, add=T, lwd=2, col=4) legend("bottomleft", c("Exponential", "Weibull", "Gompertz", "Logistic"), col=1:4, bty="n", lwd=2) #### 11. Compare fitted survival models with AICc aicTab <- data.frame(matrix(ncol=8, nrow=4)) names(aicTab) <- c("k", "log.lik", "deviance", "aic", "aicc", "delta.aicc", "model.lik", "aicc.weight") row.names(aicTab) <- c("Exponential", "Weibull", "Gompertz", "Logistic") aicTab$k <- c(1, 2, 2, 3) aicTab$log.lik <- c(ml.fit.exp$value, ml.fit.weib$value, ml.fit.gomp$value, ml.fit.log$value) aicTab$deviance <- -2 * aicTab$log.lik aicTab$aic <- aicTab$deviance + 2*aicTab$k aicTab$aicc <- aicTab$aic + 2*aicTab$k*(aicTab$k+1) / (n - aicTab$k - 1) aicTab$delta.aicc <- aicTab$aicc - min(aicTab$aicc) aicTab$model.lik <- exp(0.5 * (min(aicTab$aicc) - aicTab$aicc)) aicTab$aicc.weight <- aicTab$model.lik / sum(aicTab$model.lik) aicTab #### 12. Convert reproduction data to flat form using melt.data.frame function in package 'reshape' # first column excluded because there was no reproduction on day 1 alignedBinomWithIdent <- data.frame(ident=dat1$ident, alignedReproData1Binom[,-1]) flatBinom <- melt.data.frame(alignedBinomWithIdent, id.vars="ident", variable_name="age", na.rm=TRUE) flatBinom$age <- as.numeric(flatBinom$age) # convert age to numeric flatBinom <- flatBinom[order(flatBinom$ident), ] # sort data frame by focal identity #### 13. Model probability of reproduction as a function of age using GEEs # modeling three correlation structures: independence, AR1, and exchangeable gee1 <- geeglm(value ~ age, data=flatBinom, family=binomial("logit"), id=ident, corstr="independence", scale.fix=T) gee2 <- geeglm(value ~ age, data=flatBinom, family=binomial("logit"), id=ident, corstr="ar1", scale.fix=T) gee3 <- geeglm(value ~ age, data=flatBinom, family=binomial("logit"), id=ident, corstr="exchangeable", scale.fix=T) summary(gee1) summary(gee2) summary(gee3) # negative alpha suggests fronds from same parent LESS similar than fronds selected at random # check for positive definite working correlation matrices eigen(gee1$geese$vbeta.naiv)$values eigen(gee2$geese$vbeta.naiv)$values eigen(gee3$geese$vbeta.naiv)$values # negative eigenvalue, therefore correlation matrix not positive definite # fit gee3 using an alternate function ('gee' function in package 'gee') # still fails to achieve positive definite working correlation matrix gee3b <- gee(value ~ age, data=flatBinom, family=binomial("logit"), id=ident, corstr="exchangeable", scale.fix=T) # calculate RJ values for each GEE to determine best correlation structure # described in Wang and Carey 2004 (J. American Statistical Association 99:845-853) # and Shults et al. 2009 (Statistics in Medicine 28:2338?Ž2355) Q.gee1 <- solve(gee1$geese$vbeta.naiv) %*% gee1$geese$vbeta Q.gee2 <- solve(gee2$geese$vbeta.naiv) %*% gee2$geese$vbeta Q.gee3 <- solve(gee3$geese$vbeta.naiv) %*% gee3$geese$vbeta RJ1.gee1 <- sum(diag(Q.gee1)) / 2 RJ1.gee2 <- sum(diag(Q.gee2)) / 2 RJ1.gee3 <- sum(diag(Q.gee3)) / 2 RJ2.gee1 <- sum(diag(Q.gee1 %*% Q.gee1)) / 2 RJ2.gee2 <- sum(diag(Q.gee2 %*% Q.gee2)) / 2 RJ2.gee3 <- sum(diag(Q.gee3 %*% Q.gee3)) / 2 RJ3.gee1 <- sum((eigen(Q.gee1)$values-1)^2) / 2 RJ3.gee2 <- sum((eigen(Q.gee2)$values-1)^2) / 2 RJ3.gee3 <- sum((eigen(Q.gee3)$values-1)^2) / 2 RJ3alt.gee1 <- RJ2.gee1 - 2*RJ1.gee1 + 1 # alternate method of calculating RJ3 RJ3alt.gee2 <- RJ2.gee2 - 2*RJ1.gee2 + 1 RJ3alt.gee3 <- RJ2.gee3 - 2*RJ1.gee3 + 1 # summary of RJ values # AR1 correlation structure is best (RJ1 and RJ2 nearest to 1, and RJ3 nearest to 0) rjTab <- cbind(c(RJ1.gee1, RJ2.gee1, RJ3.gee1, RJ3alt.gee1), c(RJ1.gee2, RJ2.gee2, RJ3.gee2, RJ3alt.gee2)) colnames(rjTab) <- c("gee1 (independence)", "gee2 (AR1)") rownames(rjTab) <- c("RJ1", "RJ2", "RJ3", "RJ3 Alt") rjTab #### 14. Plot reproduction versus age with fitted GEE curve int.gee <- coef(gee2)[1]; age.gee <- coef(gee2)[2] # extract params from best-fit GEE rep.curve.gee <- function(t) exp(int.gee+age.gee*t)/(1+exp(int.gee+age.gee*t)) # specify curve (logit function) plot(p.repro ~ age, data=lifeTab) curve(rep.curve.gee, add=TRUE, lwd=2, lty=1)