#check what is in r console from last time ls() #remove everything rm(list=ls()) #check what working directory is set getwd() #setting to required one where data is based setwd("C:/Users/UEA/Documents/nature heatwave") #reading in data lifestage10D <- read.csv("heatwaveegg.csv", header = TRUE) # 10D data set str(lifestage10D) # 'data.frame': 135 obs. of 3 variables: # $ Replicate : int 1 2 3 4 5 6 7 8 9 10 ... # $ Temperature.oC: int 30 30 30 30 30 30 30 30 30 30 ... # male heatwave temperature # $ Egg.sum.10D : int 96 235 177 198 194 208 143 194 208 68 ... lifestage10D$Egg.sum <- lifestage10D$Egg.sum.10D lifestage10D$Temperature.oC<- as.factor(lifestage10D$Temperature.oC) #### PLOTTING DATA ######### library(ggplot2) temp <- expression(paste('Temperature (',degree,'C)',sep='')) #the temperature label with degrees sign ############ ! NAT COMMS FIGURE S2 ######## grapheggext<-ggplot(lifestage10D, aes(x=Temperature.oC, y=Egg.sum.10D, fill= Temperature.oC)) + #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual geom_boxplot(notch=F, #change to F if want to get rid of notchs outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked) width=0.5, lwd=0.5, fatten=0.5, color="black", position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked) stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + scale_fill_manual(values=c("ghostwhite", "tomato"), # changes the colour of the bars name = temp, #adds in temperature label on the legend breaks = c("30", "42"), #the order listed in the legend label = c("Control", "Heatwave")) + #how things are labeled in the lgend geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + #so all the data points are not ontop of each other labs (x= "Male treatment", y= "Female fecundity") + #adding title to the x axis and y axis scale_x_discrete(breaks=c("30", "42"), #the order of the variables on the x axis labels=c("Control", "Heatwave")) + # the names on the x axis coord_cartesian(ylim=c(-5, 255)) + #set axis limits scale_y_continuous(breaks=seq(0, 250, 50), #ticks from 0 to 16000 and show number every 16000 expand = c(0, 0)) + #cuts the axis off at 0 theme_classic() + #the theme of the whole plot theme( #legend.position="none", #get rid of the hashtag to get rid of legend panel.grid.major=element_blank(), #getting rid of majorgridlines panel.border=element_blank(), #getting rid of minorgridlines panel.grid.minor=element_blank(), axis.line.x=element_line(color="black", size = 1), axis.line.y=element_line(color="black", size = 1), axis.text.x=element_text(color="black", size=12), axis.text.y=element_text(color="black", size=12), axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)), axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)), legend.position="none", panel.background=element_blank(), plot.background=element_rect(fill="transparent", colour = NA)) setwd("C:/Users/UEA/Desktop") ggsave("grapheggext.png",width=2.5, height=4, dpi=300, bg = "transparent") setwd("C:/Users/UEA/Documents/nature heatwave") ################################################################################################################### ######################################## EGG COUNT ALL DATA ####################################################### ################################################################################################################### ########## ! NAT COMMS DESCRIPTIVE STATS ############### library(psych) describeBy(lifestage10D$Egg.sum, lifestage10D$Temperature.oC) # $`30` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 59 173.14 34.62 186 176.06 23.72 68 235 167 -0.85 0.23 4.51 # # $`42` # vars n mean sd median trimmed mad min max range skew kurtosis se # X1 1 76 132.58 59.27 137.5 132.26 69.68 33 246 213 -0.03 -1.08 6.8 # 30 hist(lifestage10D$Egg.sum[lifestage10D$Temperature.oC == "30"], main = list("Control", cex = 2), xlab = "10 day egg count", ylab ="Frequency", ylim = c(0,20), nclass = 10) # 42 hist(lifestage10D$Egg.sum[lifestage10D$Temperature.oC == "42"], col = "red", density = 30, angle = 180, border = "red", main = list("Heatwave", cex = 2), xlab = "10 day egg count", ylab ="Frequency", ylim = c(0,20), nclass = 10) # keep nclass = 10, keep scales default ###### plotting differences # base boxplots of data distribution grouped by temperature boxplot(lifestage10D$Egg.sum ~ lifestage10D$Temperature.oC, ylab="10 day egg count", xlab="Temperature") ########### Normality - Failed in all groups shapiro.test (lifestage10D$Egg.sum[lifestage10D$Temperature.oC == "30"]) # W = 0.92777, p-value = 0.001772 ks.test(lifestage10D$Egg.sum[lifestage10D$Temperature.oC == "30"], pnorm) # D = 1, p-value < 2.2e-16 shapiro.test (lifestage10D$Egg.sum[lifestage10D$Temperature.oC == "42"]) # W = 0.9634, p-value = 0.02738 ks.test(lifestage10D$Egg.sum[lifestage10D$Temperature.oC == "42"], pnorm) # D = 1, p-value < 2.2e-16 ########### Homogeneity of Variances - Failed bartlett.test(lifestage10D$Egg.sum ~ lifestage10D$Temperature.oC) # Bartlett's K-squared = 17.19, df = 1, p-value = 3.382e-05 fligner.test(lifestage10D$Egg.sum ~ lifestage10D$Temperature.oC) # Fligner-Killeen:med chi-squared = 17.286, df = 1, p-value = 3.215e-05 #! need library(car) leveneTest(lifestage10D$Egg.sum ~ lifestage10D$Temperature.oC) #Df F value Pr(>F) 1 20.44 1.346e-05 *** #1 wilcox.test(lifestage10D$Egg.sum, lifestage10D$Temperaure.oC, exact = TRUE, conf.int = TRUE, paired = FALSE) # V = 9180, p-value < 2.2e-16 # method 2 welch t-test on ranks lifestage10D$rank<-rank(lifestage10D$Egg.sum) t.test(lifestage10D$rank ~ lifestage10D$Temperature.oC, var.equal=FALSE, exact = TRUE, conf.int = TRUE, paired = FALSE) # t = 4.4868, df = 132.73, p-value = 1.551e-05 ################################################################################################################### ######################################## EGG COUNT ALL DATA ####################################################### ################################################################################################################### library(car); library(MASS); library (lme4); library (nlme) ########## ! NAT COMMS MODEL SELECTION #################### globalmodposs<-glm(Egg.sum ~ Temperature.oC, poisson(link = "log"), data=lifestage10D) par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1)) theta<-globalmodposs$deviance/globalmodposs$df.residual; theta # 19.51122 some overdispersion # -ve binomial model (to deal with overdispersion of >15) library(MASS) globalnvebinom<-glm.nb(Egg.sum ~ Temperature.oC, link = "log", data=lifestage10D) par(mfrow=c(2,2)); plot(globalnvebinom);par(mfrow=c(1,1)) theta<-globalnvebinom$deviance/globalnvebinom$df.residual; theta # 1.050605 ### MODEL REFINEMENT globalnvebinom<-glm.nb(Egg.sum ~ Temperature.oC, link = "log", data=lifestage10D) pseudoR<-(globalnvebinom$null.deviance-globalnvebinom$deviance) / globalnvebinom$null.deviance; pseudoR # 0.09825105 AICc<-(-2*logLik(globalnvebinom))+((2*1*(1+1)/(134-1-1))); AICc # 1471.181 ## Null model nullmod<-glm.nb(Egg.sum ~ 1, link = "log", data=lifestage10D) # creating null of just intercept (and random in glmms) pseudoR<-(nullmod$null.deviance-nullmod$deviance) / nullmod$null.deviance; pseudoR # (thomas et al., 2015) # 1.220057e-15 AICc<-(-2*logLik(nullmod))+((2*1*(1+1)/(134-1-1))); AICc # 1485.62 ### Global model explains offspring significantly more than null model because # 1) pseudo R is higher and AIC more than 2 lower # 2) anova comparison anova(globalnvebinom, nullmod, test = "Chi") anova(nullmod, globalnvebinom, test = "Chi") #! Use "F" for continuous dependent variables # Likelihood ratio tests of Negative Binomial Models # Response: Offspring.count.20D # Model theta Resid. df 2 x log-lik. Test df LR stat. Pr(Chi) # 1 1 5.974723 134 -1485.59 # 2 Temperature.oC 6.663834 133 -1471.15 1 vs 2 1 14.43936 0.0001447449 anova(globalnvebinom) # Df Deviance Resid. Df Resid. Dev Pr(>Chi) # NULL 134 154.96 # Temperature.oC 1 15.225 133 139.73 9.546e-05 *** ########## ! NAT COMMS MODEL SIGNIFIANCE ############## drop1(globalnvebinom, test = "Chi") # Df Deviance AIC LRT Pr(>Chi) # 139.73 1475.2 # Temperature.oC 1 154.96 1488.4 15.225 9.546e-05 *** # 3) liklihood ratio test library(lmtest) lrtest(globalnvebinom,nullmod) lrtest(nullmod,globalnvebinom)#produces same result just different order # Likelihood ratio test # # Model 1: Egg.sum ~ 1 # Model 2: Egg.sum ~ Temperature.oC # #Df LogLik Df Chisq Pr(>Chisq) # 1 2 -742.79 # 2 3 -735.58 1 14.439 0.0001447 *** ## Therefore complex model kept (although the simplest prefered for NS difference) globalnvebinom # shortened with basic information # redundant ########## ! NAT COMMS MODEL POST HOC ############## summary(globalnvebinom) # Call: # glm.nb(formula = Egg.sum ~ Temperature.oC, data = noNAlifestage10Degg, # link = "log", init.theta = 6.663833991) # # Deviance Residuals: # Min 1Q Median 3Q Max # -2.7989 -0.6264 0.1201 0.4145 1.7426 # # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 5.15408 0.05139 100.285 < 2e-16 *** # Temperature.oC42 -0.26690 0.06867 -3.887 0.000102 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # (Dispersion parameter for Negative Binomial(6.6638) family taken to be 1) # # Null deviance: 154.95 on 134 degrees of freedom # Residual deviance: 139.73 on 133 degrees of freedom # AIC: 1477.2 # # Number of Fisher Scoring iterations: 1 # # # Theta: 6.664 # Std. Err.: 0.841 # # 2 x log-likelihood: -1471.150 #checking estimates, exponent of log link exp(5.20977) exp(5.20977-0.43327)