library(tidyverse)
library(magrittr)
library(readxl)
library(openxlsx)
library(lubridate)
library(cowplot)
library(lsmeans)
library(broom)
library(lme4)
library(lmerTest) # import lmerTest *AFTER* lme4

Input E1 data

file_name <- "E1_plants_LogFlr.txt"
in_name <- file.path(path.expand("~"),"FlrGWAS", "output", file_name)
df.E1 <- read_tsv(in_name)
## Parsed with column specification:
## cols(
##   Experiment = col_character(),
##   RefPop = col_character(),
##   RackNum = col_character(),
##   LogFlrAge = col_double()
## )
df.E1 <- df.E1 %>% mutate(RefPop  = as.character(RefPop),
                          RackNum = as.character(RackNum),
                          Rack    = str_c("E1.", RackNum)) 
df.E1 <- df.E1 %>% select(-RackNum)
glimpse(df.E1)
## Rows: 3,222
## Columns: 4
## $ Experiment <chr> "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1"…
## $ RefPop     <chr> "RP003", "RP003", "RP003", "RP003", "RP003", "RP003", "RP0…
## $ LogFlrAge  <dbl> 1.255, 1.230, 1.255, 1.230, 1.230, 1.230, 1.230, 1.255, 1.…
## $ Rack       <chr> "E1.1A", "E1.1B", "E1.2C", "E1.2D", "E1.3A", "E1.4B", "E1.…

Input W3 data

file_name <- "W3_plants_LogFlr.txt"
in_name <- file.path(path.expand("~"),"FlrGWAS", "output", file_name)
df.W3 <- read_tsv(in_name)
## Parsed with column specification:
## cols(
##   Experiment = col_character(),
##   RefPop = col_character(),
##   RackNum = col_double(),
##   LogFlrAge = col_double()
## )
df.W3 <- df.W3 %>% mutate(RefPop  = as.character(RefPop),
                          RackNum = as.character(RackNum),
                          Rack    = str_c("W3.", RackNum)) 
df.W3 <- df.W3 %>% select(-RackNum)
glimpse(df.W3)
## Rows: 1,203
## Columns: 4
## $ Experiment <chr> "W3", "W3", "W3", "W3", "W3", "W3", "W3", "W3", "W3", "W3"…
## $ RefPop     <chr> "RP278", "RP278", "RP278", "RP453", "RP507", "RP659", "RP0…
## $ LogFlrAge  <dbl> 1.114, 1.114, 1.114, 1.114, 1.114, 1.114, 1.146, 1.146, 1.…
## $ Rack       <chr> "W3.20", "W3.29", "W3.2", "W3.10", "W3.21", "W3.36", "W3.3…

Bind_rows from experiments E1 & W3

df.13 <- bind_rows(df.E1, df.W3)
df.13 <- df.13 %>% rename(Expt = Experiment)
(num.plants <- nrow(df.13))
## [1] 4425
(num.genos <- nrow(unique(select(df.13, RefPop))))
## [1] 464

Get family mean flowering date, and filter out families with < 4 data points.

df.nr <- df.13 %>% select(Expt, RefPop, LogFlrAge) %>%
                   mutate(RefPop = as.factor(RefPop),  
                          Expt   = as.factor(Expt)) %>% 
                   arrange(RefPop, Expt)

df.nrg <- group_by(df.nr, RefPop, Expt)
## Warning: Factor `RefPop` contains implicit NA, consider using
## `forcats::fct_explicit_na`
df.nrm <- summarize(df.nrg, FD = mean(LogFlrAge),
                            N = n())
# Only retain families with >= 4 flowering individuals
df.nrm <- df.nrm %>%  filter(N >= 4) %>% select(-N) 

# Only retain families with data in both environments
df.wide <- df.nrm %>%
  pivot_wider(names_from = Expt, values_from = FD) %>% drop_na()
# glimpse(df.wide)

df.tall <- df.wide %>%
           pivot_longer(-RefPop,
                        names_to  = "Envirn",
                        values_to = "LogFlrDate") %>% 
           arrange(RefPop, Envirn)
(num.genos <- nrow(unique(select(df.13, RefPop))))
## [1] 464
# glimpse(df.tall)

Get PopGroup Info

file_in <- "ID_PopGroup_worldclim_PCA.txt"
name.PopGroup <- file.path(path.expand("~"),"FlrGWAS", "output", file_in)
df.PG <- read_tsv(name.PopGroup)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   ID = col_character(),
##   PopGroup = col_character()
## )
## See spec(...) for full column specifications.

df.PG <- df.PG %>% select(ID, PopGroup) %>% rename(RefPop = ID)
# glimpse(df.PG)

df.wide <- left_join(df.wide, df.PG)
## Joining, by = "RefPop"
## Warning: Column `RefPop` joining factor and character vector, coercing into
## character vector
glimpse(df.wide)
## Rows: 131
## Columns: 4
## Groups: RefPop [131]
## $ RefPop   <chr> "RP008", "RP009", "RP012", "RP029", "RP044", "RP051", "RP070…
## $ W3       <dbl> 1.488, 1.333, 1.312, 1.831, 1.391, 1.311, 1.305, 1.326, 1.33…
## $ E1       <dbl> 1.294, 1.264, 1.267, 1.212, 1.218, 1.230, 1.348, 1.283, 1.23…
## $ PopGroup <chr> "UTA", "NOR", "UTA", "COL", "ADM", "COL", "WES", "WES", NA, …

Create new dataframe with Expt as numeric, to control ggplot X-axis:

df.norm.rxn <- df.tall
df.norm.rxn$Envirn <- as.character(df.norm.rxn$Envirn)
df.norm.rxn$Expt <- if_else(df.norm.rxn$Envirn == "E1", "1", "3")
df.norm.rxn$Expt <- as.numeric(df.norm.rxn$Expt)

Draw Norm of Reaction plot:

RxnNormPlot <- ggplot(df.norm.rxn, aes(x = Expt, y = LogFlrDate)) + theme_classic() +
  stat_summary(aes(group = RefPop), geom = "path") + xlim(0.9, 3.1) +
  geom_point(aes(), size = 0.5) + theme(legend.position="none") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) + 
  ggtitle("Norm of Reaction for Environments E1 vs. W3") + xlab("Environment E1 or W3") + 
  ylab("Age at First Flower (Log10)") + xlab("Environment E1 vs. W3")

RxnNormPlot
## No summary function supplied, defaulting to `mean_se()`

ggsave("Reaction_Norm.E1.W3.png")
## Saving 7 x 5 in image
## No summary function supplied, defaulting to `mean_se()`

Using all four PopGroups, this graph shows 131 families with at least 4 flowering plans in both environments. Thus, the divergent lines are not due to low sample sizes.


Compute random effects ANOVA:

# In formula: fixed effects first, then random
mod1 <- lmer(LogFlrAge ~ Expt + 
             (1 | RefPop) + (1 | Rack) %in% Expt + (1 | RefPop:Expt), data=df.13)
(sum.mod <- summary(mod1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogFlrAge ~ Expt + (1 | RefPop) + (1 | Rack) %in% Expt + (1 |  
##     RefPop:Expt)
##    Data: df.13
## 
## REML criterion at convergence: -11741
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -8.569 -0.351 -0.045  0.279  9.804 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  RefPop:Expt (Intercept) 0.004124 0.0642  
##  RefPop      (Intercept) 0.003995 0.0632  
##  Rack        (Intercept) 0.000181 0.0134  
##  Residual                0.002534 0.0503  
## Number of obs: 4423, groups:  RefPop:Expt, 709; RefPop, 463; Rack, 145
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept) 1.30e+00   4.85e-03 6.59e+02   267.8   <2e-16 ***
## ExptW3      1.09e-01   6.11e-03 4.00e+02    17.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##        (Intr)
## ExptW3 -0.524

# Next compute components of variance
# var.Tot == var.Expt + var.RefPop + var.RefPopxExpt + var.Rack + var.Resid
var.Tot         <- var(df.13$LogFlrAge)
var.RefPop      <- 0.003995
var.RefPopxExpt <- 0.004124
var.Rack        <- 0.000181
var.Resid       <- 0.002534
var.Expt        <- var.Tot - var.RefPop - var.RefPopxExpt - var.Rack - var.Resid
var.explained   <- var.Expt + var.RefPop + var.RefPopxExpt + var.Rack

More results for fixed and random effects:

anova(mod1)
## Type III Analysis of Variance Table with Satterthwaite's method
##      Sum Sq Mean Sq NumDF DenDF F value Pr(>F)    
## Expt  0.806   0.806     1   400     318 <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ranova(mod1)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## LogFlrAge ~ Expt + (1 | RefPop) + (1 | Rack) + (1 | RefPop:Expt)
##                   npar logLik    AIC LRT Df Pr(>Chisq)    
## <none>               6   5870 -11729                      
## (1 | RefPop)         5   5839 -11669  62  1    3.4e-15 ***
## (1 | Rack)           5   5818 -11627 104  1    < 2e-16 ***
## (1 | RefPop:Expt)    5   5449 -10888 843  1    < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Compute R-squared and heritability

(R2             <- var.explained / var.Tot)
## [1] 0.7611
(H2             <- var.RefPop / var.Tot)
## [1] 0.3767
(prop.var.Expt  <- var.Expt / var.Tot)
## [1] -0.02152
(prop.var.GxE   <- var.RefPopxExpt / var.Tot)
## [1] 0.3888
(prop.var.Rack  <- var.Rack / var.Tot)
## [1] 0.01707

This analysis of log10 flowering time in the E1 and W3 experiments has 4425 plants, and 464 genotypes. R2 is 76.1074%, heritability equals 37.6681%, -2.1518% of the variation is attributable to differences between the two experiments, and 38.8844% of the variation is due to GxE.

Sasaki et al (2015) found heritabilities greater than 90% in two environments with divergent temperatures. GxE explained 66% of flowering time variation. In addition, they found no relationsip between geography and flowering phenology.

We performed GWAS to find QTLs controlling changes in log flowering time between the divergent environments E1 and W3. We computed the difference between standardized flowering time (difference between z-scores), and performed GWAS for this composite trait. No significant LOD peaks were found.

In our experiments, we found QxE for flowering time when focusing on large effect QTLs that control flowering within environments (qtl6.2 & qtl7.2), but no significant LOD peaks are found in a genome-wide search for GxE QTLs. In ANOVA, we found that broad sense heritability equals 27.8%, 10.6% of the variation is is attributable to differences between the two experiments, and 19.5% of the variation is due to GxE.


Input log flowering times in E1 and W3:

infile.BLUPs <- "LogFlr_BLUPs_10_Expts.txt"
infile.BLUPs <- file.path(path.expand("~"),"FlrGWAS", "output", infile.BLUPs)
df.E1W3 <- read_tsv(infile.BLUPs)
## Parsed with column specification:
## cols(
##   ID = col_character(),
##   RP_blupE1 = col_double(),
##   RP_blupW1 = col_double(),
##   RP_blupW2 = col_double(),
##   RP_blupW3 = col_double(),
##   RP_blupW4 = col_double(),
##   RP_blup1 = col_double(),
##   RP_blup3 = col_double(),
##   RP_blup4 = col_double(),
##   RP_blup5 = col_double(),
##   RP_blup6 = col_double()
## )

df.E1W3 <- df.E1W3 %>% 
  select("ID", "RP_blupE1", "RP_blupW3") %>% drop_na() %>% 
  rename(RefPop = ID,
         E1     = RP_blupE1,
         W3     = RP_blupW3) 

Convert E1 and W3 to Z-scores, then subtract:

df.E1W3 <- df.E1W3 %>% mutate(zE1      = (E1 - mean(E1)) / sd(E1),
                              zW3      = (W3 - mean(W3)) / sd(W3),
                              diffE1W3 = zE1 - zW3)
df.E1W3 <- df.E1W3 %>% mutate(Zdiff    = diffE1W3 / sd(diffE1W3))

mean(df.E1W3$Zdiff); sd(df.E1W3$Zdiff)
## [1] -1.506e-17
## [1] 1
# df.E1W3$Zdiff has zero mean and unit variance

dfZ.E1W3 <- df.E1W3 %>% select(RefPop, Zdiff)
glimpse(dfZ.E1W3)
## Rows: 246
## Columns: 2
## $ RefPop <chr> "RP003", "RP008", "RP009", "RP010", "RP012", "RP014", "RP016",…
## $ Zdiff  <dbl> -0.38057, -0.79529, 0.11118, 0.96097, 0.02429, -0.08561, -0.93…

file_out <- "LogFlr_BLUPs_Zscore_E1W3.txt"
out_name <- file.path(path.expand("~"),"FlrGWAS", "output", file_out)
write_tsv(dfZ.E1W3, out_name)

References:

Sasaki, E., et al. (2015). “”Missing" G x E Variation Controls Flowering Time in Arabidopsis thaliana." PLoS Genetics 11(10).