# Analysis of F2 hatch rates # Knegt etal # Title: Detection of genetic incompatibilities in non-model systems using simple # genetic markers: hybrid breakdown in the haplodiploid spider mite # Tetranychus evansi Knegt_etal_F2_hatch_rate <- read.csv("X:/YourDirectory/.../Knegt_etal_F2_hatch_rate.csv") Mites <- Knegt_etal_F2_hatch_rate rm(Knegt_etal_F2_hatch_rate) # GENERAL INFORMATION # F2 hatch rate is a proportion of F2 eggs that hatch # So I expect a binomial distribution # The numbers of eggs laid are not correlated, so I do not expect a mixed model # The explanatory variables I have are Cross and fertilized, this is my fixed part # But I will include the factor exp too # UPKEEP # Get rid of NAs and of 0 eggs Mites2 <- subset(Mites, F2.eggs > 0) # DATA EXPLORATION # Outliers boxplot(Mites2$F2.hatch/Mites2$F2.eggs) # Ok # Ouliers, variation and median for fertilized boxplot(F2.hatch/F2.eggs ~ fertilized, data = Mites2) # Could be that fertilized eggs hatch more often # Ouliers, variation and median for Cross boxplot(F2.hatch/F2.eggs ~ Cross, data = Mites2) # Huge treatment effect # Ouliers, variation and median for fertilized * Cross boxplot(F2.hatch/F2.eggs ~ Cross * fertilized, data = Mites2) # Does not look like an interaction # Ouliers, variation and median for Exp boxplot(F2.hatch/F2.eggs ~ Exp, data = Mites2) # Could be that eggs hatch more often in the first experiment # Zeroes table(Mites2$F2.hatch/Mites2$F2.eggs) # or (nrow(subset(Mites2, F2.hatch/F2.eggs == 0, select = F2.hatch))/length(Mites2$F2.hatch))*100 #percentage zeroes # Ow, could be zero inflated # Let's wait for the residuals # Interaction between Cross and fertilized? interaction.plot(Mites2$fertilized, Mites2$Cross, Mites2$F2.hatch/Mites2$F2.eggs, ylim=c(0,1)) # Does not look like an interaction # MODEL SELECTION AND VALIDATION # This is the model I expect: M1 <- glm(cbind(F2.hatch, F2.eggs-F2.hatch) ~ Cross * fertilized + Exp, family = binomial, data = Mites2) drop1(M1, test = "Chi") # Drop the interaction M2 <- glm(cbind(F2.hatch, F2.eggs-F2.hatch) ~ Cross + fertilized + Exp, family = binomial, data = Mites2) drop1(M2, test = "Chi") # Fertilization is not significant: Chi(1) = 1.19, p = 0.275 M3 <- glm(cbind(F2.hatch, F2.eggs-F2.hatch) ~ Cross + Exp, family = binomial, data = Mites2) drop1(M3, test = "Chi") # Cross and Exp are both significant # Overdispersion? E3 <- residuals(M3, type = "pearson") p3 <- length(coef(M3)) Overdisp3 <- sum(E3^2) / (nrow(Mites2) - p3) Overdisp3 # Within limits # Residual patterns? F3 <- fitted(M3, type = "response") par(mar=c(5,5,2,2)) plot(F3,E3, xlab = "Fitted values", ylab ="Pearson residuals", cex.lab = 1.5) abline(h=0, lty= 2) # Hmm, some dependence? # Independent from Cross? boxplot(E3 ~ Cross, data = Mites2) abline(0,0) # Small patterns here, but Cross is in the model and the deviations suggest even # stronger effects. So I think it has no effect on my conclusions. # Independent from fertilized? boxplot(E3 ~ fertilized, data = Mites2) abline(0,0) # yes # Independent from Exp? boxplot(E3 ~ Exp, data = Mites2) abline(0,0) # Yes # Influential observations? plot(M3, which = 4) # No # MODEL INTERPRETATION # I fitted a generalized linear model with a binomial distribution # The response variable is the proportion of F2 eggs that hatch # The factor Cross was significantly affecting the response variable # Chi^2 (3) = 1679.79, p < 0.001 # Exp was also significant: Chi^2(1) = 15.75, p < 0.001 # fertilization did not affect hatch rates: Chi^2(1) = 1.19, p = 0.275 summary(M3) # Now, let's do a post hoc test to see which Cross levels differ significantly library(multcomp) contr <- rbind("IxI vs IxII" = c(1, -1, 0, 0), "IxI vs IIxI" = c(1, 0, -1, 0), "IxI vs IIxII" = c(1, 0, 0, -1), "IxII vs IIxI" = c(0, 1, -1, 0), "IxII vs IIxII" = c(0, 1, 0, -1), "IIxI vs IIxII" = c(0, 0, 1, -1)) summary(glht(M1, mcp(Cross = contr))) # This test has automatically applied a Bonferroni correction on the p-values # We can thus assign significance classes # I x I: a # I x II: b # II x I: b # II x II: a # END