#check what is in r console from last time ls() #remove everything rm(list=ls()) #check what working directory is set getwd() #setting to required one where data is based setwd("C:/Users/UEA/Documents/Dissertation and phd/d- data for phd/Neat data/life stage recovery") #reading in data repfit10D <- read.csv("heatwavereproductivefitness.csv", header = TRUE) # full data set ### checking for outliers/errors summary(repfit10D) # produces general (unsplit) range, quantiles, median, count and mean summary stats for each variable str(repfit10D) # checks the variable types # 'data.frame': 281 obs. of 4 variables: # $ Replicate : int 1 2 3 4 5 6 7 8 9 10 ... # $ Offspring.count.10D: int 204 0 173 155 117 124 153 206 152 132 ... # $ Temperature.oC : Factor w/ 3 levels "30","42","42both": 1 1 1 1 1 1 1 1 1 1 ... # $ Date : int 1 1 1 1 1 1 1 1 1 1 ... # $ Temperature.oC : Factor w/ 5 levels "30","42","42both",..: 1 1 1 1 1 1 1 1 1 1 ...# heatwave treatment # 30oC controls, 42 males, 42 both sexes # no converting necassary is.na(repfit10D) # returns TRUE of x is missing # nothing missing repfit10D$Date<-as.factor(repfit10D$Date) # changing to categorical factor as coding is not proportional to differences levels(repfit10D$Temperature.oC) # [1] "30" "42" "42both" ###################### ! PAPER FIG 2 PLOT ######################################### names(repfit10D) str(repfit10D) library(ggplot2) temp <- expression(paste('Temperature (',degree,'C)')) #the temperature label with degrees sign # ~ is a space graphrepfit<-ggplot(subset(repfit10D, Temperature.oC %in% c("30", "42", "42both")), aes(x=Temperature.oC, y=Offspring.count.10D, fill= Temperature.oC)) + #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual geom_boxplot(notch=F, #change to F if want to get rid of notchs outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked) width=0.5, lwd=0.5, fatten=0.5, color="black", position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked) stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + #so all the data points are not ontop of each other labs (x= "Heatwave treatment group", y="10 day offspring production") + #adding title to the x axis and y axis scale_x_discrete(breaks=c("30", "42", "42both"), #the order of the variables on the x axis labels=c("Control", "Male", "Male & Female")) + # the names on the x axis coord_cartesian(ylim=c(-5, 255)) + #set axis limits scale_y_continuous(breaks=seq(0, 250, 50), #ticks from 0 to 16000 and show number every 16000 expand = c(0, 0)) + #cuts the axis off at 0 scale_fill_manual(values=c("white", "firebrick2", "orange1"), # changes the colour of the bars name = temp, #adds in temperature label on the legend breaks = c("30", "42", "42both"), #the order listed in the legend label = c("Control", "Male", "Male & Female")) + #how things are labeled in the lgend theme_classic() + #the theme of the whole plot theme( #legend.position="none", #get rid of the hashtag to get rid of legend panel.grid.major=element_blank(), #getting rid of majorgridlines panel.border=element_blank(), #getting rid of minorgridlines panel.grid.minor=element_blank(), axis.line.x=element_line(color="black", size = 1), axis.line.y=element_line(color="black", size = 1), axis.text.x=element_text(color="black", size=12), axis.text.y=element_text(color="black", size=12), axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)), axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)), legend.position="none", panel.background=element_blank(), plot.background=element_rect(fill="transparent", colour = NA)) setwd("C:/Users/UEA/Desktop") ggsave("fig2repfit.png",width=3.5, height=4, dpi=300, bg = "transparent") setwd("C:/Users/UEA/Documents/Dissertation and phd/d- data for phd/Neat data/life stage recovery") ############# DESCRIPTIVE STATISTICS TOTAL ############### library(psych) #gives you vars n, mean, sd, median, trimmed, mad, min, max, range, skew, kurtosis, se describeBy(repfit10D$Offspring.count.10D, repfit10D$Temperature.oC) # $`30` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 134 135 48 147 140 32 0 216 216 -1.1 1 4.2 # # $`42` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 114 62 70 7 54 10 0 220 220 0.57 -1.2 6.6 # # $`42both` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 33 68 55 79 67 74 0 160 160 -0.07 -1.5 9.6 describeBy(repfit10D$Offspring.count.10D, list(repfit10D$Date, repfit10D$Temperature.oC)) ################################################################################################################################################# NEW METHOD: USE GLM(M) WITH NON-GAUSSIAN ERROR STRUCTURE OF ORGANISM HEAT ###################################################### ######################## MODEL SELECTION ################## library(car); library(MASS); library (lme4); library (nlme); library(MuMIn); library(multcomp) library(glmmADMB)# glmmADMB() #### Non-mixed poisson family error structures # As data is very right skewed count discrete count, fitting normal distibution does not give normal and homogenity of variance in residuals # Creating a global model globalmodposs<-glm(Offspring.count.10D ~ Temperature.oC, poisson(link = "log"), data=repfit10D) summary(globalmodposs) # AIC: 17534 AICc<-(-2*logLik(globalmodposs))+((2*1*(1+1)/(281-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 17528 pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance; pseudoR # 0.19 (thomas et al., 2015) theta<-globalmodposs$deviance/globalmodposs$df.residual; theta # 58 dispersiontest(globalmodposs) # 46 # poisson poor fit; overdispersion >15, negative binomial recommended table(repfit10D$Offspring.count.10D) str(repfit10D$Offspring.count.10D) # Checking importance of random factor globalmodposs<-glm(Offspring.count.10D ~ Date, poisson(link = "log"), data=repfit10D) summary(globalmodposs)# 20449 pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance; pseudoR # 0.043(thomas et al., 2015) lrtest(globalmodposs)# Good as generation blocks explain little variation but significant; needs to be accounted for in mixed model describeBy(repfit10D$Offspring.count.10D, list(repfit10D$Temperature.oC, repfit10D$Date), mat=TRUE) ########### As a mixed model #################### # to take into account blocking effects globalmixmodposs<- glmer(Offspring.count.10D ~ Temperature.oC + (1|Date), data=repfit10D, family=poisson(link = "log"), REML = TRUE) nullmixmodposs<- glmer(Offspring.count.10D ~ 1 + (1|Date), data=repfit10D, family=poisson(link = "log"), REML = TRUE) summary(globalmixmodposs) # instantly looks like poor fit # AIC AIC(globalmixmodposs)# 17390 AICc<-(-2*logLik(globalmixmodposs))+((2*2*(2+1)/(281-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 17390 # R^2 #! library r.squaredGLMM(globalmixmodposs) # Marginal R proportion of variance explained by fixed factors R2m = 0.8936916 # Conditional R proportion of variance explained by random facotrs R2c = 0.9437268 # Overdispersion function for mixed model; Thomas et al., 2015 couldn't get to work; perhaps formatting, looks same as below overdisp_fun <- function(model) {vpars<-function(m) {nrow(n)*(nrow(m)+1)/2} model.df <- sum(sapply(VarCorr(model),vpars))+ length(fixef(model)) rdf <- nrow(model.frame(model))-model.df rp <- residuals (model,type="pearson") Pearson.chisq <- sum(rp^2) prat <- Pearson.chisq/rdf pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE) c(chisq=Pearson.chisq, ratio=prat, rdf=rdf, p=pval)} # http://glmm.wikidot.com/faq overdisp_fun <- function(model) { ## number of variance parameters in ## an n-by-n variance-covariance matrix vpars <- function(m) { nrow(m)*(nrow(m)+1)/2 } model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model)) rdf <- nrow(model.frame(model))-model.df rp <- residuals(model,type="pearson") Pearson.chisq <- sum(rp^2) prat <- Pearson.chisq/rdf pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE) c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval) } overdisp_fun(globalmixmodposs) # chisq ratio rdf p # 12659 46 277 0 # A ratio of >2 shows excessive overdispersion (Thomas et al., 2015) # From histograms and summary of raw data 0 inflation likely / # Checking reasons: no other covariates and samples are already mean of males, no pseudo rep as different males and females used for each datapoint hist(repfit10D$Offspring.count.10D) # around 1/3 of sample (80/281) cases of are 0. 0s are true 0s of no offspring counts as each petri dish was sifted and any adults would have been collected, no escapees either. # therefore separating 0 presence and absence from the counts manually like a 0 inflated model # Assumption checks sresid<-resid(globalmixmodposs, type="pearson"); hist(sresid) # not necassary for normal residuals, but look quite normal fits<-fitted(globalmixmodposs); plot(sresid~fits) # checking for heteroscedasicity; some trend plot(sresid~repfit10D$Replicate)# pattern of horizontal line could be 0s plot(sresid~repfit10D$Temperature.oC) # 42 different plot(sresid~repfit10D$Date) # no pattern ####### NEGATIVE BINOMIAL INCLUDING 0s. After discussing with Matt, makes more biological sense to include 0s as a continuum however, negative binomial modesl failing to iterate. Gaussian and poisson models poor fit. Random factor is important needs to be kept in. Tried with dataset of just balanced 30s and 42s still doesnt iterate ############## ############ organismheatsnegbin <- glmer.nb(Offspring.count.10D ~ Temperature.oC + (1|Date), link= "log", data=repfit10D, REML = TRUE) # poisson organismheatspoiss <- glmer(Offspring.count.10D ~ Temperature.oC + (1|Date), poisson(link= log), data=repfit10D) ## AIC AIC(organismheatsnegbin)# 17245 AICc<-(-2*logLik(organismheatsnegbin))+((2*2*(2+1)/(207-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 17235 ## R^2 r.squaredGLMM(organismheatsnegbin) # 0.8917000 0.9418100 ## Overdispersion check overdisp_fun(organismheatsnegbin) # chisq ratio rdf p # 12508 45 277 0 # A ratio of 1 is no overdispersion; fixed (Thomas et al., 2015) ## Assumption checks sresid<-resid(organismheatsnegbin, type="pearson"); hist(sresid) # not necassary for normal residuals, better fits<-fitted(organismheatsnegbin); plot(sresid~fits) # checking for heteroscedasicity; better plot(sresid~repfit10D$Replicate)# random plot(sresid~repfit10D$Temperature.oC) # similar plot(sresid~repfit10D$Date) # no pattern ## model refinement organismheatsnegbin <- glmer.nb(Offspring.count.10D ~ Temperature.oC + (1|Date), link= "log", data=repfit10D, glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000)), REML = TRUE) nullmod <- glmer.nb(Offspring.count.10D ~ 1 + (1|Date), link = "log", data=repfit10D, glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000)), REML = TRUE) # creating null of just intercept (and random in glmms) # 1) anova comparison anova(organismheatsnegbin, nullmod, test = "Chi") # Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq) # nullmod 3 20327 20338 -10160 20321 # organismheatsnegbin 5 17245 17263 -8618 17235 3086 2 <2e-16 *** # 2) liklihood ratio test library(lmtest) lrtest(nullmod,organismheatsnegbin) # Model 1: Offspring.count.10D ~ 1 + (1 | Date) # Model 2: Offspring.count.10D ~ Temperature.oC + (1 | Date) # #Df LogLik Df Chisq Pr(>Chisq) # 1 3 -10160 # 2 5 -8618 2 3086 <2e-16 *** ## Global model significantly lower LL. Therefore complex model kept (although the simplest prefered for NS difference) drop1(organismheatsnegbin, test= "Chi") ## summary summary(organismheatsnegbin) # Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod'] # Family: Negative Binomial(6186) ( log ) # Formula: Offspring.count.10D ~ Temperature.oC + (1 | Date) # Data: organismheat # # AIC BIC logLik deviance df.resid # 17245 17264 -8618 17235 276 # # Scaled residuals: # Min 1Q Median 3Q Max # -11.429 -7.642 0.562 3.646 19.127 # # Random effects: # Groups Name Variance Std.Dev. # Date (Intercept) 0.00845 0.0919 # Number of obs: 281, groups: Date, 4 # # Fixed effects: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 4.9192 0.0466 105.5 <2e-16 *** # Temperature.oC42 -0.8168 0.0162 -50.3 <2e-16 *** # Temperature.oC42both -0.5612 0.0258 -21.7 <2e-16 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Correlation of Fixed Effects: # (Intr) Temprtr.C42 # Temprtr.C42 -0.081 # Tmprtr.C42b -0.044 0.000 exp(4.9192) #137 exp(4.9192-0.8168) #60 exp(4.9192-0.5612) #78 ## pairwise comparisons library(multcomp) summary(glht(organismheatsnegbin, mcp(Temperature.oC="Tukey"))) # Simultaneous Tests for General Linear Hypotheses # # Multiple Comparisons of Means: Tukey Contrasts # # Fit: glmer(formula = Offspring.count.10D ~ Temperature.oC + (1 | Date), # data = organismheat, family = negative.binomial(theta = 6185.77753027824), # link = "log", REML = TRUE) # # Linear Hypotheses: # Estimate Std. Error z value Pr(>|z|) # 42 - 30 == 0 -0.8168 0.0162 -50.31 <2e-16 *** # 42both - 30 == 0 -0.5612 0.0258 -21.75 <2e-16 *** # 42both - 42 == 0 0.2557 0.0305 8.39 <2e-16 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # (Adjusted p values reported -- single-step method) ################# Separate analysis of 0s (binomial) and non 0 counts ############# #! After couple of lab meetings decided that it is meaningful to analyse the 0s separately as there may be more than one mechanism present. #! there was also discussion of regression which was not unanimous, but pairwise testing of the three treatments supported #! The manual method produces the same significance outputs as the zero inflated models and is better because i)is more transparent ii) can get more decriptives from it. glmmADMB doesnt work with non-mixed models but is what the Gage lab nature paper used # Consists of # Presence/absence model of offpsring to see if heatwaves produce more 0s # Count model of offspring to see if heatwaves produce lower counts when offspring are produced organismheat0pa<-data.frame(repfit10D,Zeros=ifelse(repfit10D$Offspring.count.10D>=1,1,0)) # creating dataframe with hardening 0 presence/absence organismheatcounts<-data.frame(organismheat0pa[organismheat0pa$Zeros>0,]) # creating dataframe of just counts > 0 ################################## ! DESCRIPTIVE STATS - MANUAL ZERO INFLATED ZEROS ################## describeBy(organismheat0pa$Zeros, organismheat0pa$Temperature.oC) # $`30` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 134 0.96 0.21 1 1 0 0 1 1 -4.35 17.08 0.02 # # $`42` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 114 0.5 0.5 0.5 0.5 0.74 0 1 1 0 -2.02 0.05 # # $`42both` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 33 0.67 0.48 1 0.7 0 0 1 1 -0.68 -1.59 0.08 table(organismheat0pa$Zeros, organismheat0pa$Temperature.oC) # 30 42 42both # 0 6 57 11 # 1 128 57 22 ################################## MODEL SELECTION - MANUAL ZERO INFLATED ZEROS ################## ########### Zero presence/absence model # bernoulli GLM orgheat0pamixbinom <- glmer(Zeros ~ Temperature.oC + (1|Temperature.oC/Date), binomial(link= "logit"), data=organismheat0pa, REML = TRUE) nullorgheat0pamixbinom <- glmer(Zeros ~ 1 + (1|Temperature.oC/Date), binomial(link= "logit"), data=organismheat0pa, REML = TRUE) ## AIC AIC(orgheat0pamixbinom); AIC(nullorgheat0pamixbinom)# 257. 319.187 AICc<-(-2*logLik(orgheat0pamixbinom))+((2*2*(2+1)/(281-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 249 AICc<-(-2*logLik(nullorgheat0pamixbinom))+((2*2*(2+1)/(281-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 315.2301 ## R^2 r.squaredGLMM(orgheat0pamixbinom) # Marginal R proportion of variance explained by fixed factors R2m = 0.3959172 # Conditional R proportion of variance explained by random facotrs R2c = 0.3959172 ## assumption check devresid<-resid(orgheat0pamixbinom, type = "deviance") hist(devresid) # deviance residuals <2 so ok thomas et al., 2015 # Overdispersion cannot occur on Bernoulli glm ################################## ! MODEL SIGNIFICANCE - MANUAL ZERO INFLATED ZEROS ################## ## model refinement lrtest(nullorgheat0pamixbinom, orgheat0pamixbinom) # Df LogLik Df Chisq Pr(>Chisq) # 3 -130.42 # 2 5 -124.52 2 11.79 0.002753 ** # global model lower AIC and significantly different in explainative power drop1(orgheat0pamixbinom, test = "Chi") # Df AIC LRT Pr(Chi) # 259.05 # Temperature.oC 2 266.84 11.79 0.002753 ** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ################################## ! MODEL POST HOC - MANUAL ZERO INFLATED ZEROS ################## summary(orgheat0pamixbinom) # Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod'] # Family: binomial ( logit ) # Formula: Zeros ~ Temperature.oC + (1 | Temperature.oC/Date) # Data: organismheat0pa # # AIC BIC logLik deviance df.resid # 259.0 277.2 -124.5 249.0 276 # # Scaled residuals: # Min 1Q Median 3Q Max # -4.6188 -1.0000 0.2165 0.7071 1.0000 # # Random effects: # Groups Name Variance Std.Dev. # Date:Temperature.oC (Intercept) 0 0 # Temperature.oC (Intercept) 0 0 # Number of obs: 281, groups: Date:Temperature.oC, 7; Temperature.oC, 3 # # Fixed effects: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 3.0603 0.4177 7.326 2.36e-13 *** # Temperature.oC42 -3.0603 0.4578 -6.685 2.31e-11 *** # Temperature.oC42both -2.3671 0.5575 -4.246 2.18e-05 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Correlation of Fixed Effects: # (Intr) Temprtr.C42 # Temprtr.C42 -0.912 # Tmprtr.C42b -0.749 0.684 library(lsmeans) lsmeans(orgheat0pamixbinom, pairwise~Temperature.oC, adjust="tukey") # Results are given on the logit (not the response) scale. # Confidence level used: 0.95 # # $contrasts # contrast estimate SE df z.ratio p.value # 30 - 42 3.0602708 0.4577800 NA 6.685 <.0001 # 30 - 42both 2.3671236 0.5575261 NA 4.246 0.0001 # 42 - 42both -0.6931472 0.4140658 NA -1.674 0.2152 ################################## ! DESCRIPTIVE STATS - MANUAL ZERO INFLATED COUNT ################## describeBy(organismheatcounts$Offspring.count.10D, organismheatcounts$Temperature.oC) # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 128 140.95 39.21 149.5 143.92 31.13 23 216 193 -0.81 0.52 3.47 # # $`42` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 57 123 47.47 120 123.68 53.37 14 220 206 -0.14 -0.61 6.29 # # $`42both` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 22 102.09 32.15 100 102 42.25 53 160 107 -0.02 -1.31 6.85 describeBy(organismheatcounts$Offspring.count.10D, list(organismheatcounts$Date, organismheatcounts$Temperature.oC)) ################################## MODEL SELECTION - MANUAL ZERO INFLATED COUNT ################## ## AIC AIC(organismheatspoiss); # 4228.456 AICc<-(-2*logLik(organismheatspoiss))+((2*2*(2+1)/(207-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 4218.515 ## R^2 r.squaredGLMM(organismheatspoiss) # Marginal R proportion of variance explained by fixed factors R2m = 0.07566624 # Conditional R proportion of variance explained by random facotrs R2c = 0.08470460 # Overdispersion function for mixed model; Thomas et al., 2015 couldn't get to work; perhaps formatting, looks same as below overdisp_fun <- function(model) {vpars<-function(m) {nrow(n)*(nrow(m)+1)/2} model.df <- sum(sapply(VarCorr(model),vpars))+ length(fixef(model)) rdf <- nrow(model.frame(model))-model.df rp <- residuals (model,type="pearson") Pearson.chisq <- sum(rp^2) prat <- Pearson.chisq/rdf pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE) c(chisq=Pearson.chisq, ratio=prat, rdf=rdf, p=pval)} # http://glmm.wikidot.com/faq overdisp_fun <- function(model) { ## number of variance parameters in ## an n-by-n variance-covariance matrix vpars <- function(m) { nrow(m)*(nrow(m)+1)/2 } model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model)) rdf <- nrow(model.frame(model))-model.df rp <- residuals(model,type="pearson") Pearson.chisq <- sum(rp^2) prat <- Pearson.chisq/rdf pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE) c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval) } overdisp_fun(organismheatspoiss) # chisq ratio rdf p # 2491 12 202 0 # A ratio of >2 shows excessive overdispersion (Thomas et al., 2015) # Checking reasons: no other covariates and samples are already mean of males, no pseudo rep as different males and females used for each datapoint, no 0s, true overdispersion, negative binomial recommended hist(organismheatcounts$Offspring.count.10D) ## Assumption checks sresid<-resid(organismheatspoiss, type="pearson"); hist(sresid) # not necassary for normal residuals, but look quite normal fits<-fitted(organismheatspoiss); plot(sresid~fits) # checking for heteroscedasicity; not much trend plot(sresid~organismheatcounts$Replicate)# random plot(sresid~organismheatcounts$Temperature.oC) # similar plot(sresid~organismheatcounts$Date) # no pattern ########## Non-zero count negative binomial model organismheatsnegbin <- glmer.nb(Offspring.count.10D ~ Temperature.oC + (1|Date), link= "log", data=organismheatcounts, REML = TRUE) ## AIC AIC(organismheatsnegbin)# 2180 AICc<-(-2*logLik(organismheatsnegbin))+((2*2*(2+1)/(207-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for erameters(p) and sample size (n) # 2169 ## R^2 r.squaredGLMM(organismheatsnegbin) # R-squared not calculable ## Overdispersion check overdisp_fun(organismheatsnegbin) # chisq ratio rdf p # 156.2394499 0.7696525 203.0000000 0.9936790 # A ratio of 1 is no overdispersion; fixed (Thomas et al., 2015) ## Assumption checks sresid<-resid(organismheatsnegbin, type="pearson"); hist(sresid) # not necassary for normal residuals, better fits<-fitted(organismheatsnegbin); plot(sresid~fits) # checking for heteroscedasicity; better plot(sresid~organismheatcounts$Replicate)# random plot(sresid~organismheatcounts$Temperature.oC) # similar plot(sresid~organismheatcounts$Date) # no pattern ## model refinement organismheatsnegbin <- glmer.nb(Offspring.count.10D ~ Temperature.oC + (1|Date), link= "log", data=organismheatcounts, REML = TRUE) nullmod <- glmer.nb(Offspring.count.10D ~ 1 + (1|Date), link = "log", data=organismheatcounts, REML = TRUE) # creating null of just intercept (and random in glmms) # 1) anova comparison anova(organismheatsnegbin, nullmod, test = "Chi") # 2) liklihood ratio test library(lmtest) lrtest(nullmod,organismheatsnegbin) # Likelihood ratio test # # Model 1: Offspring.count.10D ~ 1 + (1 | Date) # Model 2: Offspring.count.10D ~ Temperature.oC + (1 | Date) # #Df LogLik Df Chisq Pr(>Chisq) # 1 3 -1091.7 # 2 5 -1084.6 2 14.291 0.0007885 *** ################################## ! MODEL SIGNIFICANCE - MANUAL ZERO INFLATED COUNT ################## ## Global model significantly lower LL. Therefore complex model kept (although the simplest prefered for NS difference) drop1(organismheatsnegbin, test= "Chi") # Df AIC LRT Pr(Chi) # 2179.1 # Temperature.oC 2 2189.7 14.567 0.0006866 *** ################################## ! MODEL POST HOC - MANUAL ZERO INFLATED COUNTS ################## ## summary summary(organismheatsnegbin) # Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod'] # Family: Negative Binomial(8.2084) ( log ) # Formula: Offspring.count.10D ~ Temperature.oC + (1 | Date) # Data: organismheatcounts # # AIC BIC logLik deviance df.resid # 2179.1 2195.8 -1084.6 2169.1 202 # # Scaled residuals: # Min 1Q Median 3Q Max # -2.4582 -0.5157 0.1325 0.5939 2.1876 # # Random effects: # Groups Name Variance Std.Dev. # Date (Intercept) 4.073e-12 2.018e-06 # Number of obs: 207, groups: Date, 4 # # Fixed effects: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 4.94837 0.03174 155.92 < 2e-16 *** # Temperature.oC42 -0.13619 0.05733 -2.38 0.017530 * # Temperature.oC42both -0.32251 0.08361 -3.86 0.000115 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Correlation of Fixed Effects: # (Intr) Temprtr.C42 # Temprtr.C42 -0.554 # Tmprtr.C42b -0.380 0.210 ################################## ! MODEL POST HOC - MANUAL ZERO INFLATED COUNTS ################## library(lsmeans) lsmeans(organismheatsnegbin, pairwise~Temperature.oC, adjust="tukey") # $lsmeans # Temperature.oC lsmean SE df asymp.LCL asymp.UCL # 30 4.948371 0.03173633 NA 4.886169 5.010573 # 42 4.812181 0.04774857 NA 4.718595 4.905766 # 42both 4.625865 0.07734841 NA 4.474265 4.777465 # # Results are given on the log (not the response) scale. # Confidence level used: 0.95 # # $contrasts # contrast estimate SE df z.ratio p.value # 30 - 42 0.1361901 0.05733342 NA 2.375 0.0461 # 30 - 42both 0.3225060 0.08360602 NA 3.857 0.0003 # 42 - 42both 0.1863158 0.09089941 NA 2.050 0.1006 ######################################################################################################################################### CHECKING WITH SIMPLE STATS, PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES. PARAMETRIC NORMAL > TRY AND TRANSFORM TO PARAMETRIC > NON PARAMETRIC ############################### names(organismheat) levels(organismheat$Temperature.oC) str(organismheat) ### in base # 30 hist(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "30"], main = list("30", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60), nclass = 10) # 42 hist(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42"], col = "red", density = 30, angle = 180, border = "red", main = list("42", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60), nclass = 10) # 42both hist(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42both"], col = "red4", density = 30, angle = 180, border = "red4", main = list("42both", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60), nclass = 10) # keep nclass = 10, keep scales default ###### plotting differences # base boxplots of data distribution grouped by temperature boxplot(organismheat$Offspring.count.10D ~ organismheat$Temperature.oC, ylab="10D Adult Count", xlab="Temperature") # notice plot has automatically produced a scatterplot if x is made as an integar ########### Normality - Failed in all groups, control negative skew and leptokurtic, heat positive skew and platykurtic shapiro.test (organismheat$Offspring.count.10D[organismheat$Temperature.oC == "30"]) # W = 0.9, p-value = 5e-08 ks.test(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "30"], pnorm) # D = 1, p-value <2e-16 shapiro.test (organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42"]) # W = W = 0.8, p-value = 3e-11 ks.test(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42"], pnorm) # D = 0.5, p-value <2e-16 shapiro.test (organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42both"]) # W = 0.9, p-value = 0.001 ks.test(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42both"], pnorm) # D = 0.7, p-value = 4e-13 ########### Homogeneity of Variances - Failed, hea groups much more variance bartlett.test(organismheat$Offspring.count.10D ~ organismheat$Temperature.oC) # Bartlett's K-squared = 20, df = 2, p-value = 2e-04 fligner.test(organismheat$Offspring.count.10D ~ organismheat$Temperature.oC) # Fligner-Killeen:med chi-squared = 10, df = 2, p-value = 0.004 #! need library(car) leveneTest(organismheat$Offspring.count.10D ~ organismheat$Temperature.oC) #Df F value Pr(>F) 2 9.51 1e-04 *** ########## Transformation with just hist and shapiro # comparing plots and tests before and after # two other methods: by(df$response, df$treatment, shapiro.test) # with(df, tapply(response, treatment, shapiro.test)) ## RIGHT SKEW FIXING par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird #raw data not normal; positive/right skew hist (sqrt(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "30"])) hist (sqrt(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42"])) hist (sqrt(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42both"])) shapiro.test (sqrt(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "30"])) shapiro.test (sqrt(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42"])) shapiro.test (sqrt(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42both"])) #sqrt not normal hist (log10(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "30"]+0.01)) hist (log10(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42"]+0.01)) hist (log10(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42both"]+0.01)) shapiro.test (log10(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "30"]+0.01)) shapiro.test (log10(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42"]+0.01)) shapiro.test (log10(organismheat$Offspring.count.10D[organismheat$Temperature.oC == "42both"]+0.01)) #log10 not normal #no transformation can fix groups with opposite skew/kurtosis #### 2+ sample Kurskall Wallis # As the data is both not normal and not homogenous in variance in groups there is debate over the best method so both tried #1) Homogeneity of variance not strict assumpation of Kurskall wallace kruskal.test(organismheat$Offspring.count.10D ~ organismheat$Temperature.oC) # Kruskal-Wallis chi-squared = 73.601, df = 2, p-value <2e-16 # there is a significant difference between groups # Posthoc testing #! library(pgirmess) kruskalmc(organismheat$Offspring.count.10D ~ organismheat$Temperature.oC, probs = 0.05, cont=NULL) # change to 2 tailed for all to control # obs.dif critical.dif difference # 30-42 82.2 25 TRUE # 30-42both 83.6 38 TRUE # 42-42both 1.4 38 FALSE #! library(PMCMR) posthoc.kruskal.nemenyi.test(organismheat$Offspring.count.10D ~ organismheat$Temperature.oC, dist="Chisquare") #"Tukey" for no ties # 30 42 # 42 1.2e-14 - # 42both 6.4e-07 1 # dunn.test.control(x=survivalsex$Prop.survivors,g=survivalsex$Treatment, p.adjust="bonferroni") # dunn.test.control(x=survivalsex$Prop.survivors,g=survivalsex$Treatment) posthoc.kruskal.dunn.test(x=organismheat$Offspring.count.10D, g=organismheat$Temperature.oC, p.adjust.method="bonferroni") # ULTRA CONSERVATIVE posthoc.kruskal.dunn.test(x=organismheat$Offspring.count.10D, g=organismheat$Temperature.oC) # THIS METHOD PREFERRED FOR NON PARAMETRIC UNEQUAL SAMPLE SIZES # 30 42 # 42 3.4e-15 - # 42both 1.9e-07 0.93 #also see Games-Howell test as more robust to violation of assumptions #2) Convert data to ranks and submitt to a welch anova and tukey HSD Ruxton (2006) organismheat$rank<-rank(organismheat$Offspring.count.10D) # ranking all data inter-group by ascending count oneway.test(organismheat$Offspring.count.10D~ organismheat$Temperature.oC, var.equal=FALSE) # F = 50, num df = 2, denom df = 90, p-value = 6e-16 TukeyHSD(aov) # only works with aov not oneway, #! library(userfriendlyscience) posthocTGH(y=organismheat$Offspring.count.10D, x=organismheat$Temperature.oC, method="games-howell") # use games-howell when different # n means variances # 30 134 135 2324 # 42 114 62 4932 # 42both 33 68 3067 # # t df p # 30:42 9.39 195 6.0e-14 # 30:42both 6.34 45 3.0e-07 # 42:42both 0.56 65 8.4e-01