library(tidyverse)
library(magrittr)
library(readxl)
library(openxlsx)
library(lubridate)
library(cowplot)
library(lsmeans)
library(broom)
library(lme4)
library(lmerTest) # import lmerTest *AFTER* lme4
Input E1 data
file_name <- "E1_plants_LogFlr.txt"
in_name <- file.path(path.expand("~"),"FlrGWAS", "output", file_name)
df.E1 <- read_tsv(in_name)
## Parsed with column specification:
## cols(
## Experiment = col_character(),
## RefPop = col_character(),
## RackNum = col_character(),
## LogFlrAge = col_double()
## )
df.E1 <- df.E1 %>% mutate(RefPop = as.character(RefPop),
RackNum = as.character(RackNum),
Rack = str_c("E1.", RackNum))
df.E1 <- df.E1 %>% select(-RackNum)
glimpse(df.E1)
## Rows: 3,222
## Columns: 4
## $ Experiment <chr> "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1"…
## $ RefPop <chr> "RP003", "RP003", "RP003", "RP003", "RP003", "RP003", "RP0…
## $ LogFlrAge <dbl> 1.255, 1.230, 1.255, 1.230, 1.230, 1.230, 1.230, 1.255, 1.…
## $ Rack <chr> "E1.1A", "E1.1B", "E1.2C", "E1.2D", "E1.3A", "E1.4B", "E1.…
Input W3 data
file_name <- "W3_plants_LogFlr.txt"
in_name <- file.path(path.expand("~"),"FlrGWAS", "output", file_name)
df.W3 <- read_tsv(in_name)
## Parsed with column specification:
## cols(
## Experiment = col_character(),
## RefPop = col_character(),
## RackNum = col_double(),
## LogFlrAge = col_double()
## )
df.W3 <- df.W3 %>% mutate(RefPop = as.character(RefPop),
RackNum = as.character(RackNum),
Rack = str_c("W3.", RackNum))
df.W3 <- df.W3 %>% select(-RackNum)
glimpse(df.W3)
## Rows: 1,203
## Columns: 4
## $ Experiment <chr> "W3", "W3", "W3", "W3", "W3", "W3", "W3", "W3", "W3", "W3"…
## $ RefPop <chr> "RP278", "RP278", "RP278", "RP453", "RP507", "RP659", "RP0…
## $ LogFlrAge <dbl> 1.114, 1.114, 1.114, 1.114, 1.114, 1.114, 1.146, 1.146, 1.…
## $ Rack <chr> "W3.20", "W3.29", "W3.2", "W3.10", "W3.21", "W3.36", "W3.3…
Bind_rows from experiments E1 & W3
df.13 <- bind_rows(df.E1, df.W3)
df.13 <- df.13 %>% rename(Expt = Experiment)
(num.plants <- nrow(df.13))
## [1] 4425
(num.genos <- nrow(unique(select(df.13, RefPop))))
## [1] 464
Get family mean flowering date, and filter out families with < 4 data points.
df.nr <- df.13 %>% select(Expt, RefPop, LogFlrAge) %>%
mutate(RefPop = as.factor(RefPop),
Expt = as.factor(Expt)) %>%
arrange(RefPop, Expt)
df.nrg <- group_by(df.nr, RefPop, Expt)
## Warning: Factor `RefPop` contains implicit NA, consider using
## `forcats::fct_explicit_na`
df.nrm <- summarize(df.nrg, FD = mean(LogFlrAge),
N = n())
# Only retain families with >= 4 flowering individuals
df.nrm <- df.nrm %>% filter(N >= 4) %>% select(-N)
# Only retain families with data in both environments
df.wide <- df.nrm %>%
pivot_wider(names_from = Expt, values_from = FD) %>% drop_na()
# glimpse(df.wide)
df.tall <- df.wide %>%
pivot_longer(-RefPop,
names_to = "Envirn",
values_to = "LogFlrDate") %>%
arrange(RefPop, Envirn)
(num.genos <- nrow(unique(select(df.13, RefPop))))
## [1] 464
# glimpse(df.tall)
Get PopGroup Info
file_in <- "ID_PopGroup_worldclim_PCA.txt"
name.PopGroup <- file.path(path.expand("~"),"FlrGWAS", "output", file_in)
df.PG <- read_tsv(name.PopGroup)
## Parsed with column specification:
## cols(
## .default = col_double(),
## ID = col_character(),
## PopGroup = col_character()
## )
## See spec(...) for full column specifications.
df.PG <- df.PG %>% select(ID, PopGroup) %>% rename(RefPop = ID)
# glimpse(df.PG)
df.wide <- left_join(df.wide, df.PG)
## Joining, by = "RefPop"
## Warning: Column `RefPop` joining factor and character vector, coercing into
## character vector
glimpse(df.wide)
## Rows: 131
## Columns: 4
## Groups: RefPop [131]
## $ RefPop <chr> "RP008", "RP009", "RP012", "RP029", "RP044", "RP051", "RP070…
## $ W3 <dbl> 1.488, 1.333, 1.312, 1.831, 1.391, 1.311, 1.305, 1.326, 1.33…
## $ E1 <dbl> 1.294, 1.264, 1.267, 1.212, 1.218, 1.230, 1.348, 1.283, 1.23…
## $ PopGroup <chr> "UTA", "NOR", "UTA", "COL", "ADM", "COL", "WES", "WES", NA, …
Create new dataframe with Expt as numeric, to control ggplot X-axis:
df.norm.rxn <- df.tall
df.norm.rxn$Envirn <- as.character(df.norm.rxn$Envirn)
df.norm.rxn$Expt <- if_else(df.norm.rxn$Envirn == "E1", "1", "3")
df.norm.rxn$Expt <- as.numeric(df.norm.rxn$Expt)
Draw Norm of Reaction plot:
RxnNormPlot <- ggplot(df.norm.rxn, aes(x = Expt, y = LogFlrDate)) + theme_classic() +
stat_summary(aes(group = RefPop), geom = "path") + xlim(0.9, 3.1) +
geom_point(aes(), size = 0.5) + theme(legend.position="none") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ggtitle("Norm of Reaction for Environments E1 vs. W3") + xlab("Environment E1 or W3") +
ylab("Age at First Flower (Log10)") + xlab("Environment E1 vs. W3")
RxnNormPlot
## No summary function supplied, defaulting to `mean_se()`
ggsave("Reaction_Norm.E1.W3.png")
## Saving 7 x 5 in image
## No summary function supplied, defaulting to `mean_se()`
Using all four PopGroups, this graph shows 131 families with at least 4 flowering plans in both environments. Thus, the divergent lines are not due to low sample sizes.
Compute random effects ANOVA:
# In formula: fixed effects first, then random
mod1 <- lmer(LogFlrAge ~ Expt +
(1 | RefPop) + (1 | Rack) %in% Expt + (1 | RefPop:Expt), data=df.13)
(sum.mod <- summary(mod1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogFlrAge ~ Expt + (1 | RefPop) + (1 | Rack) %in% Expt + (1 |
## RefPop:Expt)
## Data: df.13
##
## REML criterion at convergence: -11741
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.569 -0.351 -0.045 0.279 9.804
##
## Random effects:
## Groups Name Variance Std.Dev.
## RefPop:Expt (Intercept) 0.004124 0.0642
## RefPop (Intercept) 0.003995 0.0632
## Rack (Intercept) 0.000181 0.0134
## Residual 0.002534 0.0503
## Number of obs: 4423, groups: RefPop:Expt, 709; RefPop, 463; Rack, 145
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.30e+00 4.85e-03 6.59e+02 267.8 <2e-16 ***
## ExptW3 1.09e-01 6.11e-03 4.00e+02 17.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## ExptW3 -0.524
# Next compute components of variance
# var.Tot == var.Expt + var.RefPop + var.RefPopxExpt + var.Rack + var.Resid
var.Tot <- var(df.13$LogFlrAge)
var.RefPop <- 0.003995
var.RefPopxExpt <- 0.004124
var.Rack <- 0.000181
var.Resid <- 0.002534
var.Expt <- var.Tot - var.RefPop - var.RefPopxExpt - var.Rack - var.Resid
var.explained <- var.Expt + var.RefPop + var.RefPopxExpt + var.Rack
More results for fixed and random effects:
anova(mod1)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Expt 0.806 0.806 1 400 318 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ranova(mod1)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## LogFlrAge ~ Expt + (1 | RefPop) + (1 | Rack) + (1 | RefPop:Expt)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 6 5870 -11729
## (1 | RefPop) 5 5839 -11669 62 1 3.4e-15 ***
## (1 | Rack) 5 5818 -11627 104 1 < 2e-16 ***
## (1 | RefPop:Expt) 5 5449 -10888 843 1 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Compute R-squared and heritability
(R2 <- var.explained / var.Tot)
## [1] 0.7611
(H2 <- var.RefPop / var.Tot)
## [1] 0.3767
(prop.var.Expt <- var.Expt / var.Tot)
## [1] -0.02152
(prop.var.GxE <- var.RefPopxExpt / var.Tot)
## [1] 0.3888
(prop.var.Rack <- var.Rack / var.Tot)
## [1] 0.01707
This analysis of log10 flowering time in the E1 and W3 experiments has 4425 plants, and 464 genotypes. R2 is 76.1074%, heritability equals 37.6681%, -2.1518% of the variation is attributable to differences between the two experiments, and 38.8844% of the variation is due to GxE.
Sasaki et al (2015) found heritabilities greater than 90% in two environments with divergent temperatures. GxE explained 66% of flowering time variation. In addition, they found no relationsip between geography and flowering phenology.
We performed GWAS to find QTLs controlling changes in log flowering time between the divergent environments E1 and W3. We computed the difference between standardized flowering time (difference between z-scores), and performed GWAS for this composite trait. No significant LOD peaks were found.
In our experiments, we found QxE for flowering time when focusing on large effect QTLs that control flowering within environments (qtl6.2 & qtl7.2), but no significant LOD peaks are found in a genome-wide search for GxE QTLs. In ANOVA, we found that broad sense heritability equals 27.8%, 10.6% of the variation is is attributable to differences between the two experiments, and 19.5% of the variation is due to GxE.
Input log flowering times in E1 and W3:
infile.BLUPs <- "LogFlr_BLUPs_10_Expts.txt"
infile.BLUPs <- file.path(path.expand("~"),"FlrGWAS", "output", infile.BLUPs)
df.E1W3 <- read_tsv(infile.BLUPs)
## Parsed with column specification:
## cols(
## ID = col_character(),
## RP_blupE1 = col_double(),
## RP_blupW1 = col_double(),
## RP_blupW2 = col_double(),
## RP_blupW3 = col_double(),
## RP_blupW4 = col_double(),
## RP_blup1 = col_double(),
## RP_blup3 = col_double(),
## RP_blup4 = col_double(),
## RP_blup5 = col_double(),
## RP_blup6 = col_double()
## )
df.E1W3 <- df.E1W3 %>%
select("ID", "RP_blupE1", "RP_blupW3") %>% drop_na() %>%
rename(RefPop = ID,
E1 = RP_blupE1,
W3 = RP_blupW3)
Convert E1 and W3 to Z-scores, then subtract:
df.E1W3 <- df.E1W3 %>% mutate(zE1 = (E1 - mean(E1)) / sd(E1),
zW3 = (W3 - mean(W3)) / sd(W3),
diffE1W3 = zE1 - zW3)
df.E1W3 <- df.E1W3 %>% mutate(Zdiff = diffE1W3 / sd(diffE1W3))
mean(df.E1W3$Zdiff); sd(df.E1W3$Zdiff)
## [1] -1.506e-17
## [1] 1
# df.E1W3$Zdiff has zero mean and unit variance
dfZ.E1W3 <- df.E1W3 %>% select(RefPop, Zdiff)
glimpse(dfZ.E1W3)
## Rows: 246
## Columns: 2
## $ RefPop <chr> "RP003", "RP008", "RP009", "RP010", "RP012", "RP014", "RP016",…
## $ Zdiff <dbl> -0.38057, -0.79529, 0.11118, 0.96097, 0.02429, -0.08561, -0.93…
file_out <- "LogFlr_BLUPs_Zscore_E1W3.txt"
out_name <- file.path(path.expand("~"),"FlrGWAS", "output", file_out)
write_tsv(dfZ.E1W3, out_name)
Sasaki, E., et al. (2015). “”Missing" G x E Variation Controls Flowering Time in Arabidopsis thaliana." PLoS Genetics 11(10).