#check what is in r console from last time ls() #remove everything rm(list=ls()) #check what working directory is set getwd() #setting to required one where data is based setwd("C:/Users/UEA/Documents/Neat data/nature heatwave") #reading in data spermcount <- read.csv("heatwavespermcount.csv", header = TRUE) spermlivedead <- read.csv("heatwavespermsurvival.csv", header = TRUE) #### SIMPLE COUNT AND LIVE DEAD DATA CHECK, CLEAN, DESCRIPTION AND SUMMARY ######################################################################### #data checks str(spermcount) # 'data.frame': 94 obs. of 3 variables: #$ Replicate : Factor w/ 94 levels "1","1.C.1","1.C.2",..: 1 24 43 59 90 91 92 93 94 14 ... # male 1D # $ Mean.sperm.count: int 4638 57991 4830 118630 94198 2690 4528 3888 8912 6663 ... # count in ejaculate estimated from sampling # $ Temperature.oC : Factor w/ 2 levels "control","heat": 1 1 1 1 1 1 1 1 1 1 ... # male heatwave treatment spermcount$Mean.sperm.count.x10.to.5<-spermcount$Mean.sperm.count/100000 str(spermlivedead) # 'data.frame': 26 obs. of 6 variables: # $ Replicate.Block: Factor w/ 26 levels "1.C.1","1.C.2",..: 1 2 3 4 5 6 11 12 13 14 ... # male 1D # $ Treatment.oC : Factor w/ 2 levels "control","heat": 1 1 1 1 1 1 1 1 1 1 ... # male heatwave treatment # $ Dye.live : int 110244 146813 129604 57542 138747 131756 92121 47324 197364 96800 ... # green sperm heads estimated from sampling # $ Dye.dead : int 1613 23124 25813 26889 7529 10218 34525 17747 17209 63727 ... # red sperm heads estimated from sampling # $ Dye.total : int 111858 169938 155418 84431 146276 141973 126647 65071 214573 160527 ... # count in ejaculate estimated from sampling green and red # $ Dye.prop : num 0.986 0.864 0.834 0.682 0.949 0.928 0.727 0.727 0.92 0.603 ... # proportion of sperm suriving green/total spermlivedead$Cbind.dye.proportion<-cbind(spermlivedead$Dye.live, spermlivedead$Dye.dead) # vector for model of success/fail hatch ################# SIMPLE COUNT AND LIVE DEAD NEAT PLOTTING ########################## library(ggplot2) # live dead recovery temp <- expression(paste('Temperature (',degree,'C)')) #the temperature label with degrees sign # ~ is a space label30oC <- expression(""*30~degree*C) label42oC <- expression(""*42~degree*C) names(spermcount) ############ ! NAT COMMS FIGURE 3 A PLOT ################# graphsimplelivedeadcount<-ggplot(spermcount, aes(x=Temperature.oC, y=Mean.sperm.count.x10.to.5, fill=Temperature.oC)) + #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual geom_boxplot(notch=F, #change to F if want to get rid of notchs outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked) width=0.5, lwd=0.5, fatten=0.5, color="black", position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked) stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + scale_fill_manual(values=c("ghostwhite", "tomato"), # changes the colour of the bars name = temp, #adds in temperature label on the legend breaks = c("control", "heat"), #the order listed in the legend label = c("Control", "Heatwave")) + #how things are labeled in the lgend geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + labs(x= "Male treatment", y=expression(bold(Sperm~count~x~10^{5}))) + #adding title to the x axis and y axis scale_x_discrete(breaks=c("control", "heat"), #the order of the variables on the x axis labels=c("Control", "Heatwave")) + # the names on the x axis coord_cartesian(ylim=c(-0.1, 2.5)) + #set axis limits scale_y_continuous(breaks=seq(0, 2.5, 0.5), #ticks from 0 to 16000 and show number every 16000 expand = c(0, 0)) + #cuts the axis off at 0 theme_classic() + #the theme of the whole plot theme( #legend.position="none", #get rid of the hashtag to get rid of legend panel.grid.major=element_blank(), #getting rid of majorgridlines panel.border=element_blank(), #getting rid of minorgridlines panel.grid.minor=element_blank(), axis.line.x=element_line(color="black", size = 1), axis.line.y=element_line(color="black", size = 1), axis.text.x=element_text(color="black", size=12), axis.text.y=element_text(color="black", size=12), axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)), axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)), legend.position="none", panel.background=element_blank(), plot.background=element_rect(fill="transparent", colour = NA)) setwd("C:/Users/UEA/Desktop") ggsave("graphsimplelivedeadcount.png",width=2.5, height=4, dpi=300, bg = "transparent") setwd("C:/Users/UEA/Documents/Neat data/thesis/chapter4") names(spermlivedead) ################ ! NAT COMMS FIGURE 3 C PLOT ################# graphsimplelivedeadsurvival <-ggplot(spermlivedead, aes(x=Temperature.oC, y=Dye.prop, fill= Temperature.oC)) + #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual geom_boxplot(notch=F, #change to F if want to get rid of notchs outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked) width=0.5, lwd=0.5, fatten=0.5, color="black", position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked) stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + scale_fill_manual(values=c("ghostwhite", "tomato"), # changes the colour of the bars name = temp, #adds in temperature label on the legend breaks = c("control", "heat"), #the order listed in the legend label = c("Control", "Heatwave")) + #how things are labeled in the lgend geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + labs (x= "Male treatment", y= "Proportion of sperm live") + #adding title to the x axis and y axis scale_x_discrete(breaks=c("control", "heat"), #the order of the variables on the x axis labels=c("Control", "Heatwave")) + # the names on the x axis coord_cartesian(ylim=c(-0.02, 1.02)) + #set axis limits scale_y_continuous(breaks=seq(0, 1, 0.2), #ticks from 0 to 16000 and show number every 16000 expand = c(0, 0)) + #cuts the axis off at 0 theme_classic() + #the theme of the whole plot theme( #legend.position="none", #get rid of the hashtag to get rid of legend panel.grid.major=element_blank(), #getting rid of majorgridlines panel.border=element_blank(), #getting rid of minorgridlines panel.grid.minor=element_blank(), axis.line.x=element_line(color="black", size = 1), axis.line.y=element_line(color="black", size = 1), axis.text.x=element_text(color="black", size=12), axis.text.y=element_text(color="black", size=12), axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)), axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)), legend.position="none", panel.background=element_blank(), plot.background=element_rect(fill="transparent", colour = NA)) setwd("C:/Users/UEA/Desktop") ggsave("graphsimplelivedeadsurvival.png",width=2.5, height=3, dpi=300, bg = "transparent") setwd("C:/Users/UEA/Documents/Neat data/thesis/chapter4") ############################################################################################################################################################## SIMPLE COUNT AND LIVE DEAD PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES ############################### ############# Count summary(spermcount) # dataset names(spermcount) ################# ! NAT COMMS DECRIPTIVE STATS ######################## #### ! library(psych) #gives you vars n, mean, sd, median, trimmed, mad, min, max, range, skew, kurtosis, se describeBy(spermcount$Mean.sperm.count, spermcount$Temperature.oC) # $control # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 38 59669.26 64651.49 24594 53824.81 36463.06 0 214573 214573 0.69 -0.98 10487.86 # # $heat # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 56 15818.57 30654.77 4628.5 8396.61 6862.21 0 148427 148427 2.59 6.45 4096.42 ### in base # 30 hist(spermcount$Mean.sperm.count[spermcount$Temperature.oC == "control"], main = list("Control", cex = 2), xlab = "paternity share", ylab ="Frequency", ylim = c(0,20), nclass = 10) # 42 hist(spermcount$Mean.sperm.count[spermcount$Temperature.oC == "heat"], col = "red", density = 30, angle = 180, border = "red", main = list("42", cex = 2), xlab = "paternity share", ylab ="Frequency", ylim = c(0,20), nclass = 10) # keep nclass = 10, keep scales default # 30/40 platykurtotic, 42 positive skew ###### plotting differences # base boxplots of data distribution grouped by temperature boxplot(spermcount$Mean.sperm.count ~ spermcount$Temperature.oC, ylab="sperm count", xlab="Temperature") #heats less variance # notice plot has automatically produced a scatterplot # if the mean.sperm.count is made as an integar ########### Normality - Failed in all groups shapiro.test (spermcount$Mean.sperm.count[spermcount$Temperature.oC == "control"]) # W = 0.83008, p-value = 4.557e-05 ks.test(spermcount$Mean.sperm.count[spermcount$Temperature.oC == "control"], pnorm) # D = 0.89474, p-value < 2.2e-16 shapiro.test (spermcount$Mean.sperm.count[spermcount$Temperature.oC == "heat"]) # W = 0.5684, p-value = 1.409e-11 ks.test(spermcount$Mean.sperm.count[spermcount$Temperature.oC == "heat"], pnorm) # D = 0.67857, p-value < 2.2e-16 ########### Homogeneity of Variances - Failed bartlett.test(spermcount$Mean.sperm.count ~ spermcount$Temperature.oC) # Bartlett's K-squared = 24.53, df = 1, p-value = 7.317e-07 fligner.test(spermcount$Mean.sperm.count ~ spermcount$Temperature.oC) # Fligner-Killeen:med chi-squared = 32.93, df = 1, p-value = 9.552e-09 #! need library(car) leveneTest(spermcount$Mean.sperm.count ~ spermcount$Temperature.oC) #Df F value Pr(>F) 1 23.352 5.373e-06 *** ################# ! NAT COMMS DECRIPTIVE STATS ######################## ############# Survival summary(spermlivedead) # dataset names(spermlivedead) #### ! library(psych) #gives you vars n, mean, sd, median, trimmed, mad, min, max, range, skew, kurtosis, se describeBy(spermlivedead$Dye.prop, spermlivedead$Temperature.oC) # $control # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 10 0.82 0.13 0.85 0.83 0.16 0.6 0.99 0.38 -0.29 -1.57 0.04 # # $heat # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 16 0.31 0.26 0.23 0.29 0.35 0 0.77 0.77 0.32 -1.46 0.06 ### in base # 30 hist(spermlivedead$Dye.prop[spermlivedead$Temperature.oC == "control"], main = list("Control", cex = 2), xlab = "paternity share", ylab ="Frequency", ylim = c(0,20), nclass = 3) # 42 hist(spermlivedead$Dye.prop[spermlivedead$Temperature.oC == "heat"], col = "red", density = 30, angle = 180, border = "red", main = list("42", cex = 2), xlab = "paternity share", ylab ="Frequency", ylim = c(0,20), nclass = 3) # keep nclass = 10, keep scales default # 30/40 platykurtotic, 42 right skew ###### plotting differences # base boxplots of data distribution grouped by temperature boxplot(spermlivedead$Dye.prop ~ spermlivedead$Temperature.oC, ylab="sperm count", xlab="Temperature") #heats less variance # notice plot has automatically produced a scatterplot # if the mean.sperm.count is made as an integar ########### Normality - failed shapiro.test (spermlivedead$Dye.prop[spermlivedead$Temperature.oC == "control"]) # W = 0.93167, p-value = 0.4645 ks.test(spermlivedead$Dye.prop[spermlivedead$Temperature.oC == "control"], pnorm) # D = 0.72675, p-value = 5.17e-05 shapiro.test(spermlivedead$Dye.prop[spermlivedead$Temperature.oC == "heat"]) # W = 0.9147, p-value = 0.1386 ks.test(spermlivedead$Dye.prop[spermlivedead$Temperature.oC == "heat"], pnorm) # D = 0.5, p-value = 0.0006709 ########### Homogeneity of Variances - Failed bartlett.test(spermlivedead$Dye.prop ~ spermlivedead$Temperature.oC) # Bartlett's K-squared = 4.2019, df = 1, p-value = 0.04038 fligner.test(spermlivedead$Dye.prop ~ spermlivedead$Temperature.oC) # Fligner-Killeen:med chi-squared = 3.3812, df = 1, p-value = 0.06594 #! need library(car) leveneTest(spermlivedead$Dye.prop ~ spermlivedead$Temperature.oC) #Df F value Pr(>F) 1 4.329 0.04831 * #################################################################################################################################################################### SIMPLE COUNT AND LIVE DEAD OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### # None of the the data is normally distributed # That which is homogenous in variance can have mann whitney Us # That which is not homogenous in variance in groups there is debate over the best method so WHUs and welch t-test on ranks tried ######### Count # method 1 non parametric wilcox.test(spermcount$Mean.sperm.count ~ spermcount$Temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE) # W = 1550, p-value = 0.0001668 # method 2 welch t-test on ranks spermcount$rank<-rank(spermcount$Mean.sperm.count) t.test(spermcount$rank ~ spermcount$Temperature.oC, var.equal=FALSE, exact = TRUE, conf.int = TRUE, paired = FALSE) # t = 4.0235, df = 76.225, p-value = 0.000134 ####### Survival # method 1 non parametric wilcox.test(spermlivedead$Dye.prop ~ spermlivedead$Temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE) # W = 154, p-value = 0.0001057 # method 2 welch t-test on ranks spermlivedead$rank<-rank(spermlivedead$Dye.prop) t.test(spermlivedead$rank ~ spermlivedead$Temperature.oC, var.equal=FALSE, exact = TRUE, conf.int = TRUE, paired = FALSE) # t = 6.6245, df = 23.488, p-value = 8.352e-07 #################################################################################################################################################################### SIMPLE COUNT AND LIVE DEAD NEW MODELLING METHOD ##################################### library(car); library(MASS); library (lme4); library (nlme) ################# ! NAT COMMS MODEL SELECTION ######################## ########## Sperm Count ################## #### Poisson family error structures # As data is very right skewed count, fitting normal distibution does not give normal and homogenity of variance in residuals names(spermcount) # Creating a global model globalmodposs<-glm(Mean.sperm.count ~ Temperature.oC, poisson(link = "log"), data=spermcount) globalmodpossID<-glm(Mean.sperm.count ~ Temperature.oC, poisson(link = "identity"), data=spermcount) globalmodpossRT<-glm(Mean.sperm.count ~ Temperature.oC, poisson(link = "sqrt"), data=spermcount) # alternative long way globalmodpossion<-glm(spermcount1$Mean.sperm.count$ ~ spermcount1$Temperature.oC, poisson(link = "log")) summary(globalmodposs); summary(globalmodpossID); summary(globalmodpossRT) # No R^2, AIC given # AIC: 4847239,AIC: 4847239,AIC: 4847239 # link change seem to do little pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance # (thomas et al., 2015) pseudoR # 0.2089186 pseudoR<-(globalmodpossID$null.deviance-globalmodpossID$deviance) / globalmodpossID$null.deviance # (thomas et al., 2015) pseudoR # 0.2089186 pseudoR<-(globalmodpossRT$null.deviance-globalmodpossRT$deviance) / globalmodpossRT$null.deviance # (thomas et al., 2015) pseudoR # 0.2089186 # seems changing the link does nothing to R^2 or AIC # poisson explains more variation in data than gaussian AICc<-(-2*logLik(globalmodposs))+((2*1*(1+1)/(94-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 4847235 qAICc<-(-2*logLik(globalmodposs)/52678.34)+((2*1*(1+1)/(94-1-1))); qAICc # 92.05919 ## Overdispersion check par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1)) theta<-globalmodposs$deviance/globalmodposs$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 52678.34, massively overdispersed is >1 is overdispersion. VAR 3434952756, U 45248.99 #! library(AER) alternative test dispersiontest(globalmodposs) # dispersion 50799.5 # z = 4.2516, p-value = 1.061e-05 # Checking reasons: no other covariates and samples are already mean of males, no 0s, no pseudo rep as different males and females used for each datapoint :. True overdispersion recommendation of quasipoisson (2-15) or negative binomial (>15) describeBy(spermcount$Mean.sperm.count); 5346.26^2 #variance much bigger than mean 28582496 table(spermcount$Mean.sperm.count); str(spermcount$Mean.sperm.count); 22/94 # 23% data are 0s probably 0 inflated. # all same generation nothing else to add ############### Negative bionimal ## Fitting negative binomial globalmodnvebinom<-glm.nb(Mean.sperm.count ~ Temperature.oC, link = "log", data=spermcount) summary(globalmodnvebinom,cor = F) #Theta: 0.1787 pseudoR<-(globalmodnvebinom$null.deviance-globalmodnvebinom$deviance) / globalmodnvebinom$null.deviance # (thomas et al., 2015) pseudoR # 0.05875877 # negative binomial structure exaplins no more variation than poisson AICc<-(-2*logLik(globalmodnvebinom))+((2*1*(1+1)/(94-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 1813.332 # AICc smaller so model more efficient ### assumption checks, recommendation of residual dev (contribution of each obs to resid dev) rather than pearson (Thomas et al., 2015) # 1) Errors normally distributed? - Somewhat devresid<-resid(globalmodnvebinom, type = "deviance"); hist(devresid) shapiro.test(devresid);ks.test(devresid, pnorm) qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2) # Q-Q points left pull, sresid histogram positively skewed, normality tests p<0.05. # 2) Homogenous/homoscedasticity variance of residuals - yes plot(devresid ~ globalmodnvebinom$fitted.values, pch = 20, cex = 1, cex.lab = 1.5) fligner.test(devresid~spermlivedead25$Temperature.oC) # P1 Resids~Fitted no wedge and slight slope, test passed # 3) Independences of independent variables - YES # Only one independent variable # 4) No serial auto-correlation with time/space - No #! need library(car) durbinWatsonTest(globalmodnvebinom) # test failed # 5) No bias by unduly influential datapoints - no influence<-influence.measures(globalmodnvebinom); summary(influence) # none reported # 6) Independent variables measured without error - BEST OF ABILITY ## Overdispersion re-check theta<-globalmodnvebinom$deviance/globalmodnvebinom$df.residual; theta # 1.257499 qAICc<-(-2*logLik(globalmodnvebinom)/1.257499)+((2*1*(1+1)/(29-1-1))); qAICc # 1442. # still overdispersed ### MODEL REFINEMENT names(globalmodnvebinom)#lists the possible objects to pull out ## Global model pseudoR<-(globalmodnvebinom$null.deviance-globalmodnvebinom$deviance) / globalmodnvebinom$null.deviance # (thomas et al., 2015) pseudoR # 0.05875877 # Null deviance: 1927807 on 29 degrees of freedom # Residual deviance: 1909116 on 28 degrees of freedom #(1927807-1909116)/1927807 = <1% variance explained AICc<-(-2*logLik(globalmodnvebinom))+((2*1*(1+1)/(94-1-1))); AICc # 1813.332 ## Null model nullmod<-glm.nb(Mean.sperm.count ~ 1, link = "log", data=spermcount) # creating null of just intercept (and random in glmms) pseudoR<-(nullmod$null.deviance-nullmod$deviance) / nullmod$null.deviance; pseudoR # (thomas et al., 2015) # 2.136642e AICc<-(-2*logLik(nullmod))+((2*1*(1+1)/(94-1-1))); AICc # 1928169 ### Global model explains spermcount significantly more than null model because # 1) pseudo R is higher and AIC more than 2 lower # 2) anova comparison anova(globalmodnvebinom, nullmod, test = "Chi") anova(nullmod, globalmodnvebinom, test = "Chi") #! Use "F" for continuous dependent variables # Likelihood ratio tests of Negative Binomial Models # Model theta Resid. df 2 x log-lik. Test df LR stat. Pr(Chi) # 1 1 0.1682855 93 -1820.296 # 2 Temperature.oC 0.1786674 92 -1813.289 1 vs 2 1 7.007747 0.008115773 anova(globalmodnvebinom) # Analysis of Deviance Table # Model: Negative Binomial(0.1787), link: log # Response: Mean.sperm.count # Terms added sequentially (first to last) # Df Deviance Resid. Df Resid. Dev Pr(>Chi) # NULL 93 122.91 # Temperature.oC 1 7.2222 92 115.69 0.007201 ** # --- # 3) liklihood ratio test library(lmtest) lrtest(globalmodnvebinom,nullmod) lrtest(nullmod,globalmodnvebinom)#produces same result just different order # Likelihood ratio test # # Model 1: Mean.sperm.count ~ Temperature.oC # Model 2: Mean.sperm.count ~ 1 # #Df LogLik Df Chisq Pr(>Chisq) # 1 3 -906.64 # 2 2 -910.15 -1 7.0077 0.008116 ** ## Therefore complex model kept (although the simplest prefered for NS difference) ################# ! NAT COMMS MODEL SIGNICIANCE ######################## anova(globalmodnvebinom, test= "Chi") drop1(globalmodnvebinom, test= "Chi") # Df Deviance AIC LRT Pr(>Chi) # 115.69 1817.3 # Temperature.oC 1 122.91 1822.5 7.2222 0.007201 ** ################# ! NAT COMMS MODEL POST HOC ######################## summary(globalmodnvebinom) # Call: # glm.nb(formula = Mean.sperm.count ~ Temperature.oC, data = spermcount, # link = "log", init.theta = 0.1786674124) # # Deviance Residuals: # Min 1Q Median 3Q Max # -2.1319 -0.9238 -0.4213 0.1056 1.4817 # # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 10.9966 0.3838 28.65 < 2e-16 *** # Temperature.oCheat -1.3276 0.4972 -2.67 0.00758 ** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # (Dispersion parameter for Negative Binomial(0.1787) family taken to be 1) # # Null deviance: 122.91 on 93 degrees of freedom # Residual deviance: 115.69 on 92 degrees of freedom # AIC: 1819.3 # # Number of Fisher Scoring iterations: 1 # # # Theta: 0.1787 # Std. Err.: 0.0234 # # 2 x log-likelihood: -1813.2890 exp(10.9966) # 59670.92 exp(10.9966-1.3276) #15819.52 describeBy(spermcount$Mean.sperm.count, spermcount$Temperature.oC) # $control # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 38 59669.26 64651.49 24594 53824.81 36463.06 0 214573 214573 0.69 -0.98 10487.86 # # $heat # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 56 15818.57 30654.77 4628.5 8396.61 6862.21 0 148427 148427 2.59 6.45 4096.42 1-(15819.52/59669.26) #59669.26 ################# ! NAT COMMS MODEL SELECTION ######################## ################ Sperm survival ################## # ! Holman 2009 pitfalls live dead recommends including total count as a random factor for survival # no correlation in control plot(spermlivedead1control$Dye.total, spermlivedead1control$Dye.prop) cor.test(spermlivedead1control$Dye.total, spermlivedead1control$Dye.prop, method = "spearman") # positivecorrelation in heat plot(spermlivedead1heat$Dye.total, spermlivedead1heat$Dye.prop) cor.test(spermlivedead1heat$Dye.total, spermlivedead1heat$Dye.prop, method = "spearman") #### Binomial family error structures # Creating a global model globalmodbinom2<-glmer(Cbind.dye.proportion ~ Temperature.oC + (1|Dye.total), binomial(link = "logit"), data=spermlivedead, na.action = na.exclude) library(MuMIn) r.squaredGLMM(globalmodbinom2) # R2m R2c # 0.2708160 0.4629123 AIC(globalmodbinom2) #61200.7 summary(globalmodbinom2) sresid<-resid(globalmodbinom2, type="pearson"); hist(sresid) # not necassary for normal residuals, but looks it fits<-fitted(globalmodbinom2); plot(sresid~fits) # checking for heteroscedasicity; scatter no wedge or slight trending formation/link/error family change needed #! library(LMERConvenienceFunctions) mcp.fnc(globalmodbinom2) plotLMER.fnc(globalmodbinom2) plot(sresid~spermlivedead$Replicate.Block)# no patterns no new x/interactions etc needed ## AIC AIC(globalmodbinom2)# 61200.7 AICc<-(-2*logLik(globalmodbinom2))+((2*2*(2+1)/(26-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for erameters(p) and sample size (n) # 61195.22 ## model refinement globalmodbinom2<-glmer(Cbind.dye.proportion ~ Temperature.oC + (1|Dye.total), binomial(link = "logit"), data=spermlivedead, na.action = na.exclude) nullglobalmodbinom<-glmer(Cbind.dye.proportion ~ 1 + (1|Dye.total), binomial(link = "logit"), data=spermlivedead, na.action = na.exclude) # creating null of just intercept (and random in glmms) # 1) anova comparison anova(globalmodbinom2, nullglobalmodbinom, test = "Chi") # Data: spermlivedead # nullglobalmodbinom: Cbind.dye.proportion ~ 1 + (1 | Dye.total) # globalmodbinom2: Cbind.dye.proportion ~ Temperature.oC + (1 | Dye.total) # Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq) # nullglobalmodbinom 2 61209 61211 -30602 61205 # globalmodbinom2 3 61201 61204 -30597 61195 10.109 1 0.001475 ** # 2) liklihood ratio test library(lmtest) lrtest(globalmodbinom2, nullglobalmodbinom) # Likelihood ratio test # # Model 1: Cbind.dye.proportion ~ Temperature.oC + (1 | Dye.total) # Model 2: Cbind.dye.proportion ~ 1 + (1 | Dye.total) # #Df LogLik Df Chisq Pr(>Chisq) # 1 3 -30597 # 2 2 -30602 -1 10.109 0.001475 ** ## Global model significantly lower LL. Therefore complex model kept (although the simplest prefered for NS difference) ################# ! NAT COMMS MODEL SIGNIFICANCE ######################## drop1(globalmodbinom2, test= "Chi") # Single term deletions # # # Model: # Cbind.dye.proportion ~ Temperature.oC + (1 | Dye.total) # Df AIC LRT Pr(Chi) # 61201 # Temperature.oC 1 61209 10.109 0.001475 ** ################# ! NAT COMMS MODEL POST HOC ######################## ## summary summary(globalmodbinom2) # Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod'] # Family: binomial ( logit ) # Formula: Cbind.dye.proportion ~ Temperature.oC + (1 | Dye.total) # Data: spermlivedead # # AIC BIC logLik deviance df.resid # 61200.7 61204.5 -30597.4 61194.7 23 # # Scaled residuals: # Min 1Q Median 3Q Max # -166.840 -0.001 0.001 0.002 166.841 # # Random effects: # Groups Name Variance Std.Dev. # Dye.total (Intercept) 8.885 2.981 # Number of obs: 26, groups: Dye.total, 25 # # Fixed effects: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 1.8738 0.9426 1.988 0.046821 * # Temperature.oCheat -4.3393 1.2248 -3.543 0.000396 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Correlation of Fixed Effects: # (Intr) # Temprtr.Cht -0.770 # estimate logit look ok exp(1.8738)/ (1+ exp(1.8738)) #0.8668974 exp(1.8738-4.3393)/ (1+ exp(1.8738-4.3393)) # 0.07831243 describeBy(spermlivedead$Dye.prop, spermlivedead$Temperature.oC) # $control # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 10 0.82 0.13 0.85 0.83 0.16 0.6 0.99 0.38 -0.29 -1.57 0.04 # # $heat # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 16 0.31 0.26 0.23 0.29 0.35 0 0.77 0.77 0.32 -1.46 0.06