#### Overview #### # This script accompanies the data file "IoM_mousedata_dietandimmunity.csv" and carries out the analysis and plots described in the paper Taylor et al. 2019 "Immune state is associated with natural dietary variation in wild mice Mus musculus domesticus" Functional Ecology. #### Custom functions #### ## doModAv function ## # DESCRIPTION: function to automate the multiple models analysis process. Dredges through possible predictor combinations, selects those with AIC values within a threshold from the best model, and takes an average across those models. # # ARGUMENTS: # M - a model object # delt - threshold deltaAIC value (default = 2) # ... - other arguments to be passed to "dredge" (such as subset) # # VAUE: # A list containing # coefficients (including confidence intervals) # term codes # component models # importance values # If there is only a single model, then a warning will be given, # and that model's summary output will be returned doModAv <- function(M, delt = 2, ...) { require(MuMIn) # Ensure correct NA settings (and restore when complete) rest <- getOption("na.action") on.exit(options(na.action = rest)) options(na.action = "na.fail") # Find all models within specified deltaAICc of the best mods <- dredge(M, ...) %>% get.models(delta < delt) # Collate output for multiple qualifying models if (length(mods) > 1) { am <- model.avg(mods) coef <- cbind(summary(am)$coefmat.full, confint(am, full = T)) termn <- attr(terms(summary(am)$formula), "term.labels") terms <- 1:length(termn) names(terms) <- termn comps <- summary(am)$msTable imp <- summary(am)$importance out <- list("Model" = am, "Coefficients" = coef, "Terms" = terms, "Component models" = comps, "Importance" = imp) # If there is only a single model, warn and return output for that model } else { warning("Only one model in the specified range") # Currently supported are outputs from (g)lm or glmer - other model types may give undefined behaviour if (typeof(mods[[1]]) == "S4") { am <- eval(mods[[1]]@call) } else { am <- eval(mods[[1]]$call) } out <- list("Model" = am, "Output" = summary(am)) } return(out) } # End of doModAv function ## SMI function ## # DESCRIPTION: # Calculates the Scaled Mass Index, as described in Peig & Green 2009. # ARGUMENTS: # len - vector of length values # wt - vector of weight values (same length as len) # omit (optional) - vector of indices for individuals that should not be used in calculating the regression line. # VALUE: # Vector containing Scaled Mass Index values for each individual SMI <- function(len, wt, omit = NULL) { # Required package for SMA analysis require(smatr) # len and wt vectors must be the same length if(length(len) != length(wt)) { stop("Vectors for length and weight must be the same length") } # Remove any records specified in "omit" and log weight and length values if(length(omit) > 0) { wmod <- log(wt[ -omit ]) lmod <- log(len[ -omit ]) } else { wmod <- log(wt) lmod <- log(len) } # Calculate slope in SMA regression bmod <- sma(wmod ~ lmod, na.action = "na.exclude") bsma <- bmod$groupsummary$Slope # Calculate condition indices L0 <- mean(len, na.rm = T) SMI <- wt * (L0 / len) ^ bsma } # End of SMI function ## label_offsets function ## # DESCRIPTION: Applies a multiplier to x and y coordinates for use with plot labels. The aim is to place text beyond the end of an arrow rather than directly over the top of it. # # ARGUMENTS: # data - data frame in which transformations are to be applied # xname - variable name (string form) containing the x coordinate data # yname - variable name (string form) containing the y coordinate data # xprop - amount by which the label should be offset on the x axis. Full data range will be divided by this amount. # yprop - as above, for y axis. # # VALUE: # A copy of the data frame with two variables added, which are the transformed x and y coordinates. These will have the same names as the original variables with ".lab" appended label_offsets <- function(data, xname, yname, xprop = 15, yprop = 30) { # Select x and y variables x <- data[ , xname ] y <- data[ , yname ] # Calculate offset amounts xoffset <- (max(x) - min(x)) / xprop yoffset <- (max(y) - min(y)) / yprop # Apply offset to the coordinates type <- abs(y) > abs(x) x2 <- x + ifelse(type, abs(x)*yoffset/abs(y), xoffset) * x/abs(x) y2 <- y + ifelse(type, yoffset, abs(y)*xoffset/abs(x)) * y/abs(y) # Add the newly created variables to the data frame xlab <- paste(xname, "lab", sep = ".") ylab <- paste(yname, "lab", sep = ".") data[ , xlab ] <- x2 data[ , ylab ] <- y2 return(data) } # End of label_offsets function #### Loading packages #### library(tidyverse) library(nlme) library(lme4) library(vegan) #### Loading data #### mice <- read_csv("IoM_mousedata_dietandimmunity.csv") #### Data transforms #### mice <- mice %>% mutate(SMI = SMI(Length_SV, Weight_total), # Calculate Scaled Mass Index P_Trichuris = Trichuris > 0, # Presence/absence trichuris P_Syphacia = Syphacia > 0, # Presence/absence syphacia GPS_long_t = GPS_long * 0.55, # This transformation ensures that latitude and longitude values represent equivalent distances over the geographical area of this study dummy = 1) %>% # Dummy variable for variograms mutate_at(vars(spleen_FoxP3:Leptin), log) # Log all protein/RNA variables #### Redundancy analysis #### ## Spleen RDA ## # Select spleen variables # Exclude spleen_IL2 due to large numbers of missing values spleenVars <- mice %>% select(spleen_FoxP3:spleen_TNFa, -spleen_IL2) %>% na.omit # Valid individuals for spleen RDA (no missing spleen data) valid.spleen <- !1:74 %in% attr(spleenVars, "na.action") # Conduct redundancy analysis rda1 <- rda(spleenVars ~ d13C + d15N + P_Syphacia + P_Trichuris + Leptin + Age_days + Sex, data = filter(mice, valid.spleen)) # Check fit anova.cca(rda1, permutations = 9999) RsquareAdj(rda1) ## MLN RDA ## # Select MLN variables # Exclude MLN_IL12b due to large numbers of missing values MLNVars <- mice %>% select(MLN_IL1B:MLN_Irf5, -MLN_IL12b) %>% na.omit # Valid individuals for MLN RDA (no missing MLN data) MLNvalid <- !1:74 %in% attr(MLNVars, "na.action") # Conduct redundancy analysis rda2 <- rda(MLNVars ~ d13C + d15N + P_Syphacia + P_Trichuris + Leptin + Age_days + Sex, data = filter(mice, MLNvalid)) # Check fit anova.cca(rda2, permutations = 9999) RsquareAdj(rda2) # Follow up significant overall RDA fit: # Which RDA axes are significant? anova.cca(rda2, permutations = 9999, by = "axis") # Which terms are significant (approximate)? anova.cca(rda2, permutations = 9999, by = "margin") ## Bioplex RDA ## # Bioplex variables bioVars <- mice %>% select(IL1b:TNF.a) # Note no missing data in these variables # Conduct redundancy analysis rda3 <- rda(bioVars ~ d13C + d15N + P_Syphacia + P_Trichuris + Leptin + Age_days + Sex, data = mice) # Check fit anova.cca(rda3, permutations = 9999) RsquareAdj(rda3) #### Linear models #### ## d15N model ## # Construct variogram to check for spatial correlation M.15N.V <- lme(d15N ~ Vegetation + PuffinDensity + Age_days + Sex + P_Trichuris + P_Syphacia, random = ~ 1 | dummy, data = mice, method = "ML") plot(Variogram(M.15N.V, resType = "normalized", form = ~ GPS_lat + GPS_long_t)) # Mixed effects model M.15N.M <- lmer(d15N ~ Vegetation + PuffinDensity + Age_days + Sex + P_Trichuris + P_Syphacia + (1|Location), data = mice, REML = F) # Fixed effects model (no random effect of location) M.15N.F <- lm(d15N ~ Vegetation + PuffinDensity + Age_days + Sex + P_Trichuris + P_Syphacia, data = mice) # Test difference between mixed and fixed model anova(M.15N.M, M.15N.F, test = "Chisq") # Random effect of location is significant, so proceed with mixed model # Checks of model assumptions: # Normality of residuals (qqplot) qqnorm(resid(M.15N.M)) qqline(resid(M.15N.M)) # Homoscedasticity (scale vs location) plot(M.15N.M, abs(resid(.))^0.5 ~ fitted(.)) # Linearity of continuous predictors (plotted against model residuals) plot(M.15N.M, resid(.) ~ PuffinDensity) plot(M.15N.M, resid(.) ~ Age_days) # Model dredging and averaging M.15N.A <- doModAv(M.15N.M) ## d13C model ## # Construct variogram to check for spatial correlation M.13C.V <- lme(d13C ~ Vegetation + PuffinDensity + Age_days + Sex + P_Trichuris + P_Syphacia, random = ~ 1 | dummy, data = mice, method = "ML") plot(Variogram(M.13C.V, resType = "normalized", form = ~ GPS_lat + GPS_long_t)) # Mixed effects model M.13C.M <- lmer(d13C ~ Vegetation + PuffinDensity + Age_days + Sex + P_Trichuris + P_Syphacia + (1|Location), data = mice, REML = F) # Fixed effects model (no random effect of location) M.13C.F <- lm(d13C ~ Vegetation + PuffinDensity + Age_days + Sex + P_Trichuris + P_Syphacia, data = mice) # Test difference between mixed and fixed model anova(M.13C.M, M.13C.F, test = "Chisq") # Random effect of location is not significant, so proceed with fixed model # Checks of model assumptions: # Normality of residuals (qqplot) plot(M.13C.F, which = 2) # Homoscedasticity (scale vs location) plot(M.13C.F, which = 3) # Linearity of continuous predictors (plotted against model residuals) plot(resid(M.13C.F) ~ mice$PuffinDensity) plot(resid(M.13C.F) ~ mice$Age_days) # Model dredging and averaging M.13C.A <- doModAv(M.13C.F) ## Leptin model ## # Mixed effects model M.lep.M <- lmer(Leptin ~ d13C + d15N + Age_days + Sex + P_Trichuris + P_Syphacia + (1|Location), data = mice, REML = F) # Fixed effects model M.lep.F <- lm(Leptin ~ d13C + d15N + Age_days + Sex + P_Trichuris + P_Syphacia, data = mice) # Test difference between mixed and fixed models anova(M.lep.M, M.lep.F) # Random effect of location is not significant # Checks of model assumptions: # Normality of residuals (qqplot) plot(M.lep.F, which = 2) # Homoscedasticity (scale vs location) plot(M.lep.F, which = 3) # Linearity of continuous predictors (plotted against model residuals) plot(resid(M.lep.F) ~ mice$d13C) plot(resid(M.lep.F) ~ mice$d15N) plot(resid(M.lep.F) ~ mice$Age_days) # Model dredging and averaging M.lep.A <- doModAv(M.lep.F) ## SMI model ## # Mixed effects model M.SMI.M <- lmer(SMI ~ d13C + d15N + Age_days + Sex + P_Trichuris + P_Syphacia + (1|Location), data = mice, REML = F) # Fixed effects model M.SMI.F <- lm(SMI ~ d13C + d15N + Age_days + Sex + P_Trichuris + P_Syphacia, data = mice) # Test difference between mixed and fixed models anova(M.SMI.M, M.SMI.F) # Random effect of location is not significant # Checks of model assumptions: # Normality of residuals (qqplot) plot(M.SMI.F, which = 2) # Homoscedasticity (scale vs location) plot(M.SMI.F, which = 3) # Linearity of continuous predictors (plotted against model residuals) plot(resid(M.SMI.F) ~ mice$d13C) plot(resid(M.SMI.F) ~ mice$d15N) plot(resid(M.SMI.F) ~ mice$Age_days) # Model dredging and averaging M.SMI.A <- doModAv(M.SMI.F) #### Plots #### ## Figure 2: Triplot from redundancy analysis of expression of immune-related genes in the MLNs # Collate RDA scores and add appropriate labels genescore <- rda2 %>% scores(choices = 1:2, scaling = 0, display = "sp") %>% as.data.frame %>% mutate(lab = c("Il1b", "Il4", "Il6", "Il17a", "Gata3", "Il13", "Tnfa", "Il10", "Ifng", "Tbx21", "Irf5")) %>% label_offsets("RDA1", "RDA2") indscore <- rda2 %>% scores(choices = 1:2, scaling = 0, display = "site") %>% as.data.frame dietscore <- rda2 %>% scores(choices = 1:2, scaling = 0, display = "bp") %>% as.data.frame %>% mutate(lab = c("C13", "N15", "Syphacia", "Trichuris", "Leptin", "Age", "Sex"), lab2 = c('delta^13*C*"**"', "delta^15*N", "Syphacia", "Trichuris", "Leptin", "Age", "Sex")) %>% label_offsets("RDA1", "RDA2") # Plot figure fig2 <- ggplot(genescore, aes(x = RDA1, y = RDA2)) + geom_point(data = indscore, alpha = 0.3, pch = 4) + geom_segment(xend = 0, yend = 0, lty = 2, arrow = arrow(ends = "first", length = unit(0.1, "inches"))) + geom_text(aes(x = RDA1.lab, y = RDA2.lab, label = lab), fontface = "italic", size = 5) + geom_segment(data = dietscore, xend = 0, yend = 0, arrow = arrow(ends = "first", length = unit(0.1, "inches"))) + geom_text(data = dietscore, aes(x = RDA1.lab, y = RDA2.lab, label = lab2), size = 5, parse = T) + theme_classic(18) + coord_fixed() + labs(x = "RDA1\n(11% of gene expression variation)", y = "RDA2\n(3% of gene expression variation)") ## Figure 3: Variation in circulating leptin concentration with carbon isotope values fig3 <- ggplot(mice, aes(x = d13C, y = exp(Leptin))) + geom_point(size = 2) + geom_smooth(method = "lm", size = 1.2, colour = "black") + scale_y_log10() + labs(x = expression(paste(delta^13, "C (\u2030)",sep="")), y = expression(Leptin~(pg~ml^-1))) + theme_classic(18) ## Figure 4: Variation in mouse nitrogen isotope values among trapping locations # Collate aggregate stats by location bysite <- mice %>% group_by(Location) %>% summarise(N = n(), NSE = sd(d15N)/sqrt(length(d15N)), d15N = mean(d15N), PuffinDensity = mean(PuffinDensity), Vegetation = Vegetation[1]) %>% mutate(lab = paste0(Location, " (", N, ")")) # Plot figure fig4 <- ggplot(bysite, aes(x = PuffinDensity, y = d15N, fill = Vegetation)) + geom_pointrange(shape = 21, size = 1.2, aes(ymax = d15N + NSE, ymin = d15N - NSE)) + geom_text(aes(label = lab, x = PuffinDensity + 130)) + labs(x = expression(paste("Puffin burrows ", ha^-1, sep ="")), y = expression(paste(delta^15, "N (\u2030)",sep=""))) + scale_fill_manual(values = c("black", "white")) + theme_classic(18) + theme(legend.position = "none")