Libraries

library(tidyverse)
library(ggmap)
library(readxl)
library(openxlsx)

How does flowering time vary geographically?

Import mean flowering dates (log-means) for genotypes in Experiment W1

in_name1 <- "Wenjie_first_LogFlr_BLUPS.txt" 
in_name1 <- file.path(path.expand("~"),"FlrGWAS","output", in_name1)
df.W1.FlrDat <- read_tsv(in_name1)
## Parsed with column specification:
## cols(
##   ID = col_character(),
##   RP_meanW1 = col_double(),
##   RP_blupW1 = col_double()
## )
df.W1.FlrDat <- df.W1.FlrDat %>% rename(LogFlrDat = RP_meanW1) %>% 
  select(ID, LogFlrDat)
# Note: These are log flowering dates
names(df.W1.FlrDat)
## [1] "ID"        "LogFlrDat"
dim(df.W1.FlrDat)
## [1] 402   2

Import latitude, longitude, and elevation in meters at site of origin each genotype

in_name2 <- "Bstricta_RefPop_20_02_21.FlrGWAS.xlsx"
in_name2 <- file.path(path.expand("~"),"FlrGWAS","Flr_Data", in_name2)
df.RP <- read_excel(in_name2, sheet = 1, na = "NA")
df.RP <- df.RP %>% mutate(prefix = str_sub(ID, 1, 2))
df.RP <- df.RP %>% filter(prefix == "RP")
df.RP <- df.RP %>% select(ID, Latitude, Longitude, Elev_m, PopGroup)
names(df.RP)
## [1] "ID"        "Latitude"  "Longitude" "Elev_m"    "PopGroup"
dim(df.RP)
## [1] 603   5

Merge data files

df.merge <- left_join(df.W1.FlrDat, df.RP, by = 'ID') 
head(df.merge)
## # A tibble: 6 x 6
##   ID    LogFlrDat Latitude Longitude Elev_m PopGroup
##   <chr>     <dbl>    <dbl>     <dbl>  <dbl> <chr>   
## 1 RP001      1.54     38.5     -109.   2587 COL     
## 2 RP003      1.40     44.6     -115.   2642 NOR     
## 3 RP004      1.53     44.4     -115.   2128 <NA>    
## 4 RP008      1.59     40.4     -112.   2400 UTA     
## 5 RP009      1.31     44.0     -115.   2624 <NA>    
## 6 RP010      1.37     44.6     -114.   2799 NOR
tail(df.merge)
## # A tibble: 6 x 6
##   ID    LogFlrDat Latitude Longitude Elev_m PopGroup
##   <chr>     <dbl>    <dbl>     <dbl>  <dbl> <chr>   
## 1 RP649      1.37     44.6     -113.   2465 WES     
## 2 RP653      1.36     39.0     -108.   2883 COL     
## 3 RP667      1.52     40.0     -105.   2371 <NA>    
## 4 RP668      1.41     38.0     -112.   2959 UTA     
## 5 RP669      1.49     38.5     -111.   2956 UTA     
## 6 RP670      1.47     38.0     -112.   2928 UTA
dim(df.merge)
## [1] 402   6

df.merge2 <- df.merge %>% filter(ID != "RP180") %>% # possible contaminant
  drop_na() %>% arrange(desc(PopGroup))
dim(df.merge2)
## [1] 341   6

Supplementary Figure PopGroups

myMap <- get_stamenmap(bbox = c(left=-117.5, bottom=36.2, right=-101.5, top=49), 
                       zoom = 5, maptype = "toner-background")
## Source : http://tile.stamen.com/toner-background/5/5/10.png
## Source : http://tile.stamen.com/toner-background/5/6/10.png
## Source : http://tile.stamen.com/toner-background/5/5/11.png
## Source : http://tile.stamen.com/toner-background/5/6/11.png
## Source : http://tile.stamen.com/toner-background/5/5/12.png
## Source : http://tile.stamen.com/toner-background/5/6/12.png

ggmap(myMap) + theme_void() +
  geom_point(aes(x = Longitude, y = Latitude, color = PopGroup), size = 2, 
              na.rm = T, data = df.merge2)  

  scale_colour_gradient2()
## <ScaleContinuous>
##  Range:  
##  Limits:    0 --    1

For upcoming color codes, rescale log flowering date between -1 and +1

# Remove the early flowering FLC mutant Moonrise Ridge!
df.merge <- df.merge %>% filter(ID != "RP116")
dim(df.merge)
## [1] 401   6

flr.min <- min(df.merge$LogFlrDat)
flr.max <- max(df.merge$LogFlrDat)
flr.range <- flr.max - flr.min

# rescale log flowering date from -1 to +1
df.merge <- df.merge %>% 
            mutate(Rel_Flr_Age = (((LogFlrDat - flr.min)/flr.range) - 0.5) * 2.0) %>% 
            arrange(Rel_Flr_Age) # Late flowering last, so they show up on Map
names(df.merge)
## [1] "ID"          "LogFlrDat"   "Latitude"    "Longitude"   "Elev_m"     
## [6] "PopGroup"    "Rel_Flr_Age"
min(df.merge$Rel_Flr_Age)
## [1] -1
tail(df.merge)
## # A tibble: 6 x 7
##   ID    LogFlrDat Latitude Longitude Elev_m PopGroup Rel_Flr_Age
##   <chr>     <dbl>    <dbl>     <dbl>  <dbl> <chr>          <dbl>
## 1 RP595      1.80     37.6     -108.   2597 COL            0.699
## 2 RP611      1.84     37.5     -107.   3088 COL            0.817
## 3 RP625      1.88     39.0     -106.   3231 COL            0.939
## 4 RP501      1.88     39.8     -106.   3056 COL            0.954
## 5 RP485      1.90     40.3     -106.   2661 COL            1    
## 6 RP494      1.90     39.8     -106.   2925 COL            1

Check distribution of rescaled log flowering age

ggplot(df.merge) + geom_histogram(aes(x = Rel_Flr_Age), bins = 30)


myMap <- get_stamenmap(bbox = c(left=-117.5, bottom=36.2, right=-101.5, top=49), 
                       zoom = 5, maptype = "toner-background")

ggmap(myMap) + theme_void() +
  geom_jitter(aes(x = Longitude, y = Latitude, fill = Rel_Flr_Age), 
             color="black", shape=21, size = 3, alpha=.8,
             width = 0.15, height = 0.15, na.rm = T, data = df.merge) +
  scale_fill_distiller(palette="RdBu", # scale fill manually with red/blue palette
        limits=c(-1,1),  # set upper and lower limits for scale
        #breaks=c(0,25,50,75,100), # define which breaks show up on scale bar
        guide=guide_colorbar(nbin=100, # define number of bins in continuous scale
            draw.ulim = FALSE, # remove horizontal ticks from upper limit
            draw.llim = FALSE, # remove horizontal ticks from lower limit
            label.position = "left")) # place numbers on left of scale bar

ggsave("Rel_Flr_Age_Map.png")
## Saving 7 x 5 in image

Do Latitude, Longitude, and Elev_m influence flowering age?

flr.mod <- lm(Rel_Flr_Age ~ Latitude + Longitude + Elev_m, data = df.merge)
summary(flr.mod)
## 
## Call:
## lm(formula = Rel_Flr_Age ~ Latitude + Longitude + Elev_m, data = df.merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7379 -0.2283 -0.0025  0.1765  1.1535 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.63e+00   8.87e-01    6.35  5.9e-10 ***
## Latitude    -3.21e-02   9.24e-03   -3.48  0.00057 ***
## Longitude    3.05e-02   7.33e-03    4.16  3.9e-05 ***
## Elev_m      -4.18e-04   6.57e-05   -6.35  5.9e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.325 on 392 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.112,  Adjusted R-squared:  0.106 
## F-statistic: 16.5 on 3 and 392 DF,  p-value: 3.89e-10

Earlier flowering genotypes are found in the north, towards the west, and at higher elevation.

Caution: This does not take account of Population Group, because this is missing for some accessions


Output ID, Lat, Lon, Elev, FlrDate:

df.out <- df.merge %>% 
          select(ID, LogFlrDat, Elev_m, Latitude, Longitude) %>% 
          rename(lat = Latitude,
                 lon = Longitude)
out_name <- "RefPop_lat_lon.txt"
out_name <- file.path(path.expand("~"),"FlrGWAS", "output", out_name)
write_tsv(df.out, out_name)
head(df.out)
## # A tibble: 6 x 5
##   ID    LogFlrDat Elev_m   lat   lon
##   <chr>     <dbl>  <dbl> <dbl> <dbl>
## 1 RP507      1.26   3638  39.7 -106.
## 2 RP628      1.27   3501  39.6 -106.
## 3 RP259      1.28   2619  44.6 -115.
## 4 RP033      1.30   2552  43.5 -111.
## 5 RP075      1.30   3089  40.7 -111.
## 6 RP009      1.31   2624  44.0 -115.
tail(df.out)
## # A tibble: 6 x 5
##   ID    LogFlrDat Elev_m   lat   lon
##   <chr>     <dbl>  <dbl> <dbl> <dbl>
## 1 RP595      1.80   2597  37.6 -108.
## 2 RP611      1.84   3088  37.5 -107.
## 3 RP625      1.88   3231  39.0 -106.
## 4 RP501      1.88   3056  39.8 -106.
## 5 RP485      1.90   2661  40.3 -106.
## 6 RP494      1.90   2925  39.8 -106.
dim(df.out)
## [1] 401   5