library(tidyverse)
library(ggmap)
library(readxl)
library(openxlsx)
How does flowering time vary geographically?
Import mean flowering dates (log-means) for genotypes in Experiment W1
in_name1 <- "Wenjie_first_LogFlr_BLUPS.txt"
in_name1 <- file.path(path.expand("~"),"FlrGWAS","output", in_name1)
df.W1.FlrDat <- read_tsv(in_name1)
## Parsed with column specification:
## cols(
## ID = col_character(),
## RP_meanW1 = col_double(),
## RP_blupW1 = col_double()
## )
df.W1.FlrDat <- df.W1.FlrDat %>% rename(LogFlrDat = RP_meanW1) %>%
select(ID, LogFlrDat)
# Note: These are log flowering dates
names(df.W1.FlrDat)
## [1] "ID" "LogFlrDat"
dim(df.W1.FlrDat)
## [1] 402 2
Import latitude, longitude, and elevation in meters at site of origin each genotype
in_name2 <- "Bstricta_RefPop_20_02_21.FlrGWAS.xlsx"
in_name2 <- file.path(path.expand("~"),"FlrGWAS","Flr_Data", in_name2)
df.RP <- read_excel(in_name2, sheet = 1, na = "NA")
df.RP <- df.RP %>% mutate(prefix = str_sub(ID, 1, 2))
df.RP <- df.RP %>% filter(prefix == "RP")
df.RP <- df.RP %>% select(ID, Latitude, Longitude, Elev_m, PopGroup)
names(df.RP)
## [1] "ID" "Latitude" "Longitude" "Elev_m" "PopGroup"
dim(df.RP)
## [1] 603 5
Merge data files
df.merge <- left_join(df.W1.FlrDat, df.RP, by = 'ID')
head(df.merge)
## # A tibble: 6 x 6
## ID LogFlrDat Latitude Longitude Elev_m PopGroup
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 RP001 1.54 38.5 -109. 2587 COL
## 2 RP003 1.40 44.6 -115. 2642 NOR
## 3 RP004 1.53 44.4 -115. 2128 <NA>
## 4 RP008 1.59 40.4 -112. 2400 UTA
## 5 RP009 1.31 44.0 -115. 2624 <NA>
## 6 RP010 1.37 44.6 -114. 2799 NOR
tail(df.merge)
## # A tibble: 6 x 6
## ID LogFlrDat Latitude Longitude Elev_m PopGroup
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 RP649 1.37 44.6 -113. 2465 WES
## 2 RP653 1.36 39.0 -108. 2883 COL
## 3 RP667 1.52 40.0 -105. 2371 <NA>
## 4 RP668 1.41 38.0 -112. 2959 UTA
## 5 RP669 1.49 38.5 -111. 2956 UTA
## 6 RP670 1.47 38.0 -112. 2928 UTA
dim(df.merge)
## [1] 402 6
df.merge2 <- df.merge %>% filter(ID != "RP180") %>% # possible contaminant
drop_na() %>% arrange(desc(PopGroup))
dim(df.merge2)
## [1] 341 6
Supplementary Figure PopGroups
myMap <- get_stamenmap(bbox = c(left=-117.5, bottom=36.2, right=-101.5, top=49),
zoom = 5, maptype = "toner-background")
## Source : http://tile.stamen.com/toner-background/5/5/10.png
## Source : http://tile.stamen.com/toner-background/5/6/10.png
## Source : http://tile.stamen.com/toner-background/5/5/11.png
## Source : http://tile.stamen.com/toner-background/5/6/11.png
## Source : http://tile.stamen.com/toner-background/5/5/12.png
## Source : http://tile.stamen.com/toner-background/5/6/12.png
ggmap(myMap) + theme_void() +
geom_point(aes(x = Longitude, y = Latitude, color = PopGroup), size = 2,
na.rm = T, data = df.merge2)
scale_colour_gradient2()
## <ScaleContinuous>
## Range:
## Limits: 0 -- 1
For upcoming color codes, rescale log flowering date between -1 and +1
# Remove the early flowering FLC mutant Moonrise Ridge!
df.merge <- df.merge %>% filter(ID != "RP116")
dim(df.merge)
## [1] 401 6
flr.min <- min(df.merge$LogFlrDat)
flr.max <- max(df.merge$LogFlrDat)
flr.range <- flr.max - flr.min
# rescale log flowering date from -1 to +1
df.merge <- df.merge %>%
mutate(Rel_Flr_Age = (((LogFlrDat - flr.min)/flr.range) - 0.5) * 2.0) %>%
arrange(Rel_Flr_Age) # Late flowering last, so they show up on Map
names(df.merge)
## [1] "ID" "LogFlrDat" "Latitude" "Longitude" "Elev_m"
## [6] "PopGroup" "Rel_Flr_Age"
min(df.merge$Rel_Flr_Age)
## [1] -1
tail(df.merge)
## # A tibble: 6 x 7
## ID LogFlrDat Latitude Longitude Elev_m PopGroup Rel_Flr_Age
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 RP595 1.80 37.6 -108. 2597 COL 0.699
## 2 RP611 1.84 37.5 -107. 3088 COL 0.817
## 3 RP625 1.88 39.0 -106. 3231 COL 0.939
## 4 RP501 1.88 39.8 -106. 3056 COL 0.954
## 5 RP485 1.90 40.3 -106. 2661 COL 1
## 6 RP494 1.90 39.8 -106. 2925 COL 1
Check distribution of rescaled log flowering age
ggplot(df.merge) + geom_histogram(aes(x = Rel_Flr_Age), bins = 30)
myMap <- get_stamenmap(bbox = c(left=-117.5, bottom=36.2, right=-101.5, top=49),
zoom = 5, maptype = "toner-background")
ggmap(myMap) + theme_void() +
geom_jitter(aes(x = Longitude, y = Latitude, fill = Rel_Flr_Age),
color="black", shape=21, size = 3, alpha=.8,
width = 0.15, height = 0.15, na.rm = T, data = df.merge) +
scale_fill_distiller(palette="RdBu", # scale fill manually with red/blue palette
limits=c(-1,1), # set upper and lower limits for scale
#breaks=c(0,25,50,75,100), # define which breaks show up on scale bar
guide=guide_colorbar(nbin=100, # define number of bins in continuous scale
draw.ulim = FALSE, # remove horizontal ticks from upper limit
draw.llim = FALSE, # remove horizontal ticks from lower limit
label.position = "left")) # place numbers on left of scale bar
ggsave("Rel_Flr_Age_Map.png")
## Saving 7 x 5 in image
Do Latitude, Longitude, and Elev_m influence flowering age?
flr.mod <- lm(Rel_Flr_Age ~ Latitude + Longitude + Elev_m, data = df.merge)
summary(flr.mod)
##
## Call:
## lm(formula = Rel_Flr_Age ~ Latitude + Longitude + Elev_m, data = df.merge)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7379 -0.2283 -0.0025 0.1765 1.1535
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.63e+00 8.87e-01 6.35 5.9e-10 ***
## Latitude -3.21e-02 9.24e-03 -3.48 0.00057 ***
## Longitude 3.05e-02 7.33e-03 4.16 3.9e-05 ***
## Elev_m -4.18e-04 6.57e-05 -6.35 5.9e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.325 on 392 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.112, Adjusted R-squared: 0.106
## F-statistic: 16.5 on 3 and 392 DF, p-value: 3.89e-10
Earlier flowering genotypes are found in the north, towards the west, and at higher elevation.
Caution: This does not take account of Population Group, because this is missing for some accessions
Output ID, Lat, Lon, Elev, FlrDate:
df.out <- df.merge %>%
select(ID, LogFlrDat, Elev_m, Latitude, Longitude) %>%
rename(lat = Latitude,
lon = Longitude)
out_name <- "RefPop_lat_lon.txt"
out_name <- file.path(path.expand("~"),"FlrGWAS", "output", out_name)
write_tsv(df.out, out_name)
head(df.out)
## # A tibble: 6 x 5
## ID LogFlrDat Elev_m lat lon
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 RP507 1.26 3638 39.7 -106.
## 2 RP628 1.27 3501 39.6 -106.
## 3 RP259 1.28 2619 44.6 -115.
## 4 RP033 1.30 2552 43.5 -111.
## 5 RP075 1.30 3089 40.7 -111.
## 6 RP009 1.31 2624 44.0 -115.
tail(df.out)
## # A tibble: 6 x 5
## ID LogFlrDat Elev_m lat lon
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 RP595 1.80 2597 37.6 -108.
## 2 RP611 1.84 3088 37.5 -107.
## 3 RP625 1.88 3231 39.0 -106.
## 4 RP501 1.88 3056 39.8 -106.
## 5 RP485 1.90 2661 40.3 -106.
## 6 RP494 1.90 2925 39.8 -106.
dim(df.out)
## [1] 401 5