--- title: "AllelePurge_Fig3Table3" author: "Audrey" date: "30/04/2020" output: html_document editor_options: chunk_output_type: console --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` Load libraries and data ```{r} library(readr) library(ggplot2) library(tidyverse) library(devtools) library(gridExtra) library(effects) library(emmeans) library(car) library(lme4) library(binom) library(plotrix) ``` ```{r} true_freqs <- read.csv("../data/AW_Allele_purge_true_freq_counts.csv") data_true_freqs <- true_freqs[- c(12, 22), ] data_true_freqs$p <- (1 - data_true_freqs$q) data_true_freqs$dummy_offset <- qlogis(0.7) data_true_freqs$replicate <- as.factor(data_true_freqs$replicate) data_true_freqs <- mutate(data_true_freqs, q_count = ifelse( mutant_type == "yellow" | mutant_type == "white" | mutant_type == "forked", 2*mutant, (2*mutant) + hetero_wt)) data_true_freqs<- mutate(data_true_freqs, p_count = ifelse( mutant_type == "yellow" | mutant_type == "white" | mutant_type == "forked", 2*(data_true_freqs$homo_wt + data_true_freqs$mutant) - data_true_freqs$q_count, 2*(data_true_freqs$homo_wt + data_true_freqs$hetero_wt + data_true_freqs$mutant) - data_true_freqs$q_count)) ``` Model This code includes all replicates, including the third forked replicate. The ANOVA output is used in Table S2 and the contrast estimates generated from emtrends are used in Table S3 of the MS. ```{r} freqs_glmm <- glmer(cbind(q_count, p_count) ~ 0 + generation:treatment + generation:mutant_type + generation:treatment:mutant_type + (0 + generation | mutant_type:treatment:replicate) + offset(dummy_offset), family = "binomial", data = data_true_freqs) summary(freqs_glmm) Anova(freqs_glmm) freq_est <- emtrends(freqs_glmm, c("treatment", "mutant_type"), var = "generation", transform = "none") freq_est summary(freq_est) plot(freq_est) pairs(emtrends(freqs_glmm, c("treatment"), var = "generation", transform = "none")) freqs_predicted <- as.data.frame(predictorEffects(freqs_glmm, ~ generation, focal.levels = 9)) ``` Model without forked SCT R3 The ANOVA output is used in Table 1 and the contrast estimates generated from emtrends are used in Table 2 of the MS. This code needs to be run to generate figure 3 of the MS. ```{r} #setting up the data forkSCT3 <- (data_true_freqs[data_true_freqs$mutant_type == "forked" & data_true_freqs$replicate == "3" & data_true_freqs$treatment == "SCT",]) nofork3 <- data_true_freqs %>% anti_join(forkSCT3) #glmer nofork3_glmm <- glmer( cbind(q_count, p_count) ~ 0 + generation:treatment + generation:mutant_type + generation:treatment:mutant_type + (0 + generation | mutant_type:treatment:replicate) +offset(dummy_offset), family = "binomial", data = nofork3) summary(nofork3_glmm) Anova(nofork3_glmm) est2 <- emtrends(nofork3_glmm, c("treatment", "mutant_type"), var = "generation", transform = "none") summary(est2) plot(est2) pairs(emtrends(nofork3_glmm, c("treatment"), var = "generation", transform = "none")) plot(predictorEffects(nofork3_glmm, ~ generation, focal.levels = 9)) nofork_predicted <- as.data.frame(predictorEffects(nofork3_glmm, ~ generation, focal.levels = 9)) ``` Selection Coefficients - this code is a mess but I could not get the loop to work over the whole table This code and resulting figure was removed from the MS in the final draft due to redundancy. ```{r} #s = 1 - (q'/q) #order so we have all replicates for each treatment together to make calculation easier (need to divide below row by one above) ordered <- nofork3[-2, ] %>% #brown NT R2 did not make it to generation 3 or 6 so remove group_by(mutant_type, treatment, replicate) %>% arrange(replicate, treatment) ordered$s <- rep(NA, nrow(ordered)) #for(j in nrow(ordered)){ # for(i in 2:3){ # ordered$s[i] <- 1 - (ordered$q[i]/ordered$q[i-1]) # } #} #for (i in seq(2, nrow(ordered), by = 3)){ # ordered$s[i] <- 1 - (ordered$q[i]/ordered$q[i-1]) #} for (i in 2:nrow(ordered)){ ordered$s[i] <- 1 - (ordered$q[i]/ordered$q[i-1]) } coefs <- ordered[ordered$generation != "0", ] %>% group_by(treatment, replicate, mutant_type) %>% summarise(mean(s)) colnames(coefs)[4] <- "coef" coefs <- coefs %>% group_by(treatment, mutant_type) %>% summarise(selec_coef = mean(coef), stderror = std.error(coef)) %>% arrange(mutant_type) coefs$ci_up <- coefs$selec_coef + 2*coefs$stderror coefs$ci_low <- coefs$selec_coef - 2*coefs$stderror coefs <- within(coefs, treatment <- factor(treatment, levels =c("NT", "UCT", "SCT"))) ggplot(coefs, aes(x = treatment, y = selec_coef, colour = treatment, fill = treatment)) + geom_col() + geom_errorbar(aes(ymin = ci_low, ymax = ci_up), colour = "black") + facet_wrap(~mutant_type) + theme_classic(base_size = 24) + theme(strip.text = element_text(face = "italic")) + labs(x = "Treatment", y = "Mean Selection Coefficient") + scale_color_manual(breaks = c("NT", "UCT", "SCT"), values = c("#616161", "#039BE5", "#E69F00")) + scale_fill_manual(breaks = c("NT", "UCT", "SCT"), values = c("#616161", "#039BE5", "#E69F00")) + theme(legend.position = "none") ``` Plot Current plot in manuscript - Figure 3 ```{r} nofork_predicted <- as.data.frame(predictorEffects(nofork3_glmm, ~ generation, focal.levels = 9)) nofork_predicted <- as.data.frame(nofork_predicted) colnames(nofork_predicted) <- c("generation", "treatment", "mutant_type", "mutant_freq", "se", "lower", "upper") #mutant_freq column is actually the estimates of the fit of the model but had to rename for generating the figure averaged_data <- nofork3 %>% group_by(mutant_type, treatment, generation) %>% summarise(mutant_freq = mean(q), sd = sd(q)) #averaged_data <- cbind(nofork3, nofork_predicted[ ,4:7]) linear_plot <- ggplot(data = nofork_predicted, aes(x = generation, y = mutant_freq, colour = treatment)) + theme_classic(base_size = 24) + theme(strip.text = element_text(face = "italic")) + labs(x = "Generation", y = "Mutant Allele Frequency") + geom_ribbon(aes(x = generation, ymin = lower, ymax = upper, fill = treatment), alpha = 0.4) + facet_wrap(~mutant_type) + scale_colour_manual(breaks = c("NT", "UCT", "SCT"), values = c("#616161", "#E69F00", "#039BE5")) + scale_x_continuous(breaks = seq(0, 10, 2)) + scale_y_continuous(breaks = seq(0, 1, 0.3)) + expand_limits(y = 0) + scale_fill_manual(breaks = c("NT", "UCT", "SCT"), values = c("#616161", "#E69F00", "#039BE5")) + theme(legend.position = "none") + geom_pointrange(data = averaged_data, aes(ymin = (mutant_freq - sd), ymax = (mutant_freq + sd)), size = 1, shape = 16, position = position_jitterdodge(dodge.width = 0.4)) linear_plot #The points are the average mutant frequencies of each mutant type over time with the lines representing the 95% confidence intervals. The ribbons are estimated confidence intervals based on the models' fixed effects. est2 <- as.data.frame(est2) contrasts_plot <- ggplot(data = est2, aes(x = treatment, y = generation.trend)) + geom_pointrange(aes(ymin = asymp.LCL, ymax = asymp.UCL, colour = treatment), size = 1) + coord_flip() + facet_wrap(~mutant_type, ncol = 1, strip.position = "right") + scale_colour_manual(breaks = c("NT", "UCT", "SCT"), values = c("#616161", "#E69F00", "#039BE5")) + theme_classic(base_size = 24) + labs(x = NULL, y = "Generation Trend (s)", colour = "Treatment") + theme(axis.text.y = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank()) Fig3 <- grid.arrange(linear_plot, contrasts_plot, nrow = 1, widths = c(1.5, 1)) ``` Plot of the forked mutants while including all data - Figure S2. This plot needs to be generated with data from "AW_Allele_purge1_6_mutants" script. Run script at line 30, 64 and 376 to load data and generate partial plot. Then run line 29, 50 and 222 of this script. ```{r} freqs_glmm_fork <- as.data.frame(predictorEffects(freqs_glmm, ~ generation, focal.levels = 9, fixed.predictors = list(offset = qlogis(0.7)))) freqs_glmm_fork <- as.data.frame(freqs_glmm_fork) freqs_glmm_fork <- subset(freqs_glmm_fork, generation.mutant_type == "forked") colnames(freqs_glmm_fork) <- c("generation", "treatment", "mutant_type", "mutant_freq", "se", "lower", "upper") averaged_freqs_fork <- data_true_freqs %>% subset(mutant_type == "forked") %>% group_by(treatment, generation) %>% summarise(mutant_freq = mean(q), sd = sd(q)) freq_fork_plot <- ggplot(data = freqs_glmm_fork, aes(x = generation, y = mutant_freq, colour = treatment)) + theme_classic(base_size = 24) + theme(strip.text = element_text(face = "italic")) + labs(x = "Generation", y = "Mutant Allele \n\ Frequency") + geom_ribbon(aes(x = generation, ymin = lower, ymax = upper, fill = treatment), alpha = 0.4) + scale_colour_manual(breaks = c("NT", "UCT", "SCT"), values = c("#616161", "#E69F00", "#039BE5")) + scale_x_continuous(breaks = seq(0, 10, 2)) + scale_y_continuous(breaks = seq(0, 1, 0.3)) + scale_fill_manual(breaks = c("NT", "UCT", "SCT"), values = c("#616161", "#E69F00", "#039BE5")) + theme(legend.position = "none") + geom_pointrange(data = averaged_freqs_fork, aes(ymin = (mutant_freq - sd), ymax = (mutant_freq + sd)), size = 1, shape = 16, position = position_jitterdodge(dodge.width = 0.4)) freq_est <- as.data.frame(freq_est) contrasts_freqs_fork <- ggplot(data = subset(freq_est, mutant_type == "forked"), aes(x = treatment, y = generation.trend)) + geom_pointrange(aes(ymin = asymp.LCL, ymax = asymp.UCL, colour = treatment), size = 1) + coord_flip() + scale_colour_manual(breaks = c("NT", "UCT", "SCT"), values = c("#616161", "#E69F00", "#039BE5")) + theme_classic(base_size = 24) + labs(x = NULL, y = "Generation Trend", colour = "Treatment") + theme(axis.text.y = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank()) contrasts_freqs_fork grid.arrange(freq_fork_plot, contrasts_freqs_fork, nrow = 1, widths = c(1.5, 1)) FigS2 <- grid.arrange(fork_plot, contrasts_fork, freq_fork_plot, contrasts_freqs_fork, nrow = 2, widths = c(1.5, 1)) #this line needs to be used with the script that also includes all 6 mutants with mutant frequencies ``` Testing HWE - This code generates data for Table S4 of MS. ```{r} chisq <- function(observed, expected) { chisqval <- sum((observed - expected)^2/expected) return(chisqval) } #subsetting for only autosomal alleles HW_data_auto <- nofork3[nofork3$mutant_type == "brown" | nofork3$mutant_type == "vestigial" | nofork3$mutant_type == "plexus", 1:9] #calculate N HW_data_auto$N <- HW_data_auto$homo_wt + HW_data_auto$hetero_wt + HW_data_auto$mutant #expected frequencies HW_data_auto$p_sq <- (HW_data_auto$p)^2 HW_data_auto$two_pq <- (HW_data_auto$p)*(HW_data_auto$q)*2 HW_data_auto$q_sq <- (HW_data_auto$q)^2 #expected number of individuals HW_data_auto$exp_homo_wt <- HW_data_auto$p_sq* HW_data_auto$N HW_data_auto$exp_hetero <- HW_data_auto$two_pq * HW_data_auto$N HW_data_auto$exp_mutant <- HW_data_auto$q_sq * HW_data_auto$N #Chi square test HW_data_auto <- HW_data_auto %>% rowwise() %>% mutate(X_sq = chisq(c(homo_wt, hetero_wt, mutant), c(exp_homo_wt, exp_hetero, exp_mutant))) #P-values HW_data_auto$pval <- pchisq(HW_data_auto$X_sq, 1, lower.tail = FALSE) HW_data_auto <- HW_data_auto[HW_data_auto$generation != "0" ,] #we know gen 0 is in HWE HW_data_auto <- HW_data_auto[, c(1:7, 14:18)] HW_data_auto[, 8:12] <- round(HW_data_auto[,8:12], 3) ```