1 Introduction
This is an R-Markdown script describing the statistical analysis presented in the article. Figure names correspond to the ones used in the article and supplemental material. Additional figures were not named.
2 Data preparation and sorting
2.1 Load libraries and data
library(adegenet)
library(lattice)
library(ggplot2)
library(car)
library(ggthemes)
library(multcomp)
library(effects)
library(nlme)
library(ggbiplot)
library(broom)
library(ggmap)
man_cols = c("mediumorchid", "darkolivegreen4", "chartreuse3", "darkturquoise",
"goldenrod1")
asso = read.table("/home/hannes/Stomata_Data/R_final/stomata_data.csv", sep = ",",
header = TRUE, stringsAsFactors = FALSE)
asso$genotype = as.character(asso$genotype)
asso$ID = as.character(asso$ID)
asso$block = as.character(asso$block)
asso$tray = as.character(asso$tray)
summary(asso)
## result_array1 genotype stomata leafsize
## Length:870 Length:870 Min. : 60.0 Min. : 29.63
## Class :character Class :character 1st Qu.:120.0 1st Qu.: 99.31
## Mode :character Mode :character Median :143.3 Median :139.42
## Mean :144.4 Mean :157.26
## 3rd Qu.:166.7 3rd Qu.:197.52
## Max. :260.0 Max. :639.66
## NA's :1
## block tray tray_position
## Length:870 Length:870 Length:870
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## origin control stomatasize Latitude
## Length:870 Min. : 60.0 Min. : 89.86 Min. :38.33
## Class :character 1st Qu.:101.7 1st Qu.:104.97 1st Qu.:41.97
## Mode :character Median :133.3 Median :111.01 Median :51.41
## Mean :149.1 Mean :111.21 Mean :50.16
## 3rd Qu.:180.0 3rd Qu.:116.63 3rd Qu.:55.84
## Max. :293.3 Max. :158.73 Max. :63.02
## NA's :855
## Longitude ID Images
## Min. :-7.800 Length:870 Min. : 6.0
## 1st Qu.:-1.630 Class :character 1st Qu.:14.0
## Median :13.000 Mode :character Median :20.0
## Mean : 7.483 Mean :19.2
## 3rd Qu.:14.134 3rd Qu.:25.0
## Max. :21.950 Max. :30.0
##
2.2 Load genind SNP object and perform PCA
load("variants_tair_maf0.05.Rdata")
variants_tair
## /// GENIND OBJECT /////////
##
## // 330 individuals; 70,410 loci; 141,029 alleles; size: 207.9 Mb
##
## // Basic content
## @tab: 330 x 141029 matrix of allele counts
## @loc.n.all: number of alleles per locus (range: 2-3)
## @loc.fac: locus factor for the 141029 columns of @tab
## @all.names: list of allele names for each locus
## @ploidy: ploidy of each individual (range: 2-2)
## @type: codom
## @call: df2genind(X = genotypes_tair, sep = "/")
##
## // Optional content
## @pop: population of each individual (group size range: 15-125)
variants_tair_scaled = scaleGen(variants_tair, NA.method = "mean")
pca1_tair <- dudi.pca(variants_tair_scaled, cent = T, scale = F, scannf = F,
nf = 20)
axis_tair = pca1_tair$eig * 100/sum(pca1_tair$eig)
barplot(axis_tair[1:61], names.arg = as.character(1:61), xlab = "principal component",
ylab = "% variance explained", cex.lab = 1.4, cex.axis = 1.5, cex.names = 1.4)
Figure S10: Barplot of amount of variance explained by each genetic principal component.
sum(axis_tair[1:20]) # variance explained by first 20 PCs
## [1] 28.98139
s.class(pca1_tair$li, pop(variants_tair), xax = 1, yax = 2, cstar = 0, cpoint = 2,
cellipse = 0, grid = T, clabel = 1, axesell = F, col = transp(man_cols,
alpha = 0.5), addaxes = T)
Figure S11: Plot of first two genetic principal components. Each point is one accession and accessions are colored by region of origin.
for (i in 1:nrow(asso)) {
if (asso$ID[i] %in% row.names(pca1_tair$li)) {
asso$pc1[i] = pca1_tair$li$Axis1[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc2[i] = pca1_tair$li$Axis2[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc3[i] = pca1_tair$li$Axis3[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc4[i] = pca1_tair$li$Axis4[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc5[i] = pca1_tair$li$Axis5[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc6[i] = pca1_tair$li$Axis6[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc7[i] = pca1_tair$li$Axis7[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc8[i] = pca1_tair$li$Axis8[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc9[i] = pca1_tair$li$Axis9[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc10[i] = pca1_tair$li$Axis10[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc11[i] = pca1_tair$li$Axis11[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc12[i] = pca1_tair$li$Axis12[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc13[i] = pca1_tair$li$Axis13[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc14[i] = pca1_tair$li$Axis14[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc15[i] = pca1_tair$li$Axis15[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc16[i] = pca1_tair$li$Axis16[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc18[i] = pca1_tair$li$Axis18[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc19[i] = pca1_tair$li$Axis19[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc20[i] = pca1_tair$li$Axis20[row.names(pca1_tair$li) == asso$ID[i]]
asso$pc17[i] = pca1_tair$li$Axis17[row.names(pca1_tair$li) == asso$ID[i]]
}
}
2.3 Calculate mean for each phenotype
asso_mean = aggregate(data = asso, cbind(stomata, stomatasize, leafsize, Images) ~
origin + genotype + ID + Latitude + Longitude + pc1 + pc2 + pc3 + pc4 +
pc5 + pc6 + pc7 + pc8 + pc9 + pc10 + pc11 + pc12 + pc13 + pc14 + pc15 +
pc16 + pc17 + pc18 + pc19 + pc20, FUN = mean)
summary(asso_mean)
## origin genotype ID Latitude
## Length:330 Length:330 Length:330 Min. :38.33
## Class :character Class :character Class :character 1st Qu.:42.10
## Mode :character Mode :character Mode :character Median :52.05
## Mean :50.23
## 3rd Qu.:55.84
## Max. :63.02
## Longitude pc1 pc2 pc3
## Min. :-7.800 Min. :-191.37 Min. :-95.567 Min. :-79.98
## 1st Qu.:-1.830 1st Qu.: -55.93 1st Qu.:-34.299 1st Qu.:-47.33
## Median :13.101 Median : -21.22 Median :-24.375 Median :-20.96
## Mean : 7.438 Mean : 0.00 Mean : 0.000 Mean : 0.00
## 3rd Qu.:14.133 3rd Qu.: 69.63 3rd Qu.: -9.947 3rd Qu.: 21.18
## Max. :21.950 Max. : 203.24 Max. :310.766 Max. :249.78
## pc4 pc5 pc6
## Min. :-121.284 Min. :-82.047 Min. :-124.754
## 1st Qu.: -40.258 1st Qu.: -9.244 1st Qu.: -17.772
## Median : -1.877 Median : -2.358 Median : 3.794
## Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 34.193 3rd Qu.: 4.370 3rd Qu.: 30.134
## Max. : 150.942 Max. :357.784 Max. : 94.236
## pc7 pc8 pc9
## Min. :-131.706 Min. :-93.5793 Min. :-220.4749
## 1st Qu.: -23.598 1st Qu.:-16.5196 1st Qu.: -10.7177
## Median : 1.428 Median : 0.3027 Median : 0.2351
## Mean : 0.000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 19.184 3rd Qu.: 14.8899 3rd Qu.: 12.4899
## Max. : 114.517 Max. :118.5451 Max. : 151.6343
## pc10 pc11 pc12
## Min. :-102.5807 Min. :-86.5280 Min. :-109.82
## 1st Qu.: -11.1104 1st Qu.:-19.4674 1st Qu.: -15.93
## Median : -0.8647 Median : 0.9832 Median : -1.15
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00
## 3rd Qu.: 8.3061 3rd Qu.: 18.7685 3rd Qu.: 12.78
## Max. : 285.1735 Max. : 98.8388 Max. : 118.53
## pc13 pc14 pc15
## Min. :-163.349 Min. :-106.883 Min. :-132.590
## 1st Qu.: -7.796 1st Qu.: -8.942 1st Qu.: -8.744
## Median : 2.166 Median : -0.249 Median : 1.815
## Mean : 0.000 Mean : 0.000 Mean : 0.000
## 3rd Qu.: 12.508 3rd Qu.: 7.831 3rd Qu.: 11.670
## Max. : 131.096 Max. : 143.703 Max. : 102.367
## pc16 pc17 pc18
## Min. :-186.934 Min. :-113.5674 Min. :-130.6167
## 1st Qu.: -8.086 1st Qu.: -8.8850 1st Qu.: -6.7775
## Median : 1.460 Median : -0.8977 Median : 0.8731
## Mean : 0.000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 10.287 3rd Qu.: 7.7347 3rd Qu.: 6.0719
## Max. : 97.893 Max. : 163.9065 Max. : 157.8232
## pc19 pc20 stomata stomatasize
## Min. :-135.413 Min. :-139.6936 Min. : 86.67 Min. : 95.0
## 1st Qu.: -8.157 1st Qu.: -7.1600 1st Qu.:130.28 1st Qu.:106.5
## Median : 1.988 Median : 0.1498 Median :142.78 Median :110.8
## Mean : 0.000 Mean : 0.0000 Mean :144.37 Mean :111.1
## 3rd Qu.: 13.546 3rd Qu.: 7.9754 3rd Qu.:157.78 3rd Qu.:115.4
## Max. : 79.630 Max. : 98.3158 Max. :204.44 Max. :135.1
## leafsize Images
## Min. : 46.38 Min. : 6.00
## 1st Qu.:117.91 1st Qu.:16.00
## Median :147.78 Median :19.50
## Mean :156.45 Mean :19.23
## 3rd Qu.:183.12 3rd Qu.:22.50
## Max. :375.72 Max. :30.00
# find populations with more than one sample and remove duplicate samples
asso_dups = as.character(asso_mean$genotype[duplicated(cbind(asso_mean$Latitude,
asso_mean$Longitude))])
asso_dedup = asso[!asso$genotype %in% asso_dups, ]
2.4 Map of all accessions
load("stomataMap.Rdata")
ggmap(stomataMap) + geom_point(data = asso_mean, aes(x = Longitude, y = Latitude,
col = origin), pch = 20, size = 5, alpha = 1) + theme(legend.title = element_blank(),
text = element_text(size = 20)) + xlab("") + ylab("") + scale_color_manual(values = man_cols)
Figure S1: Map of all accessions used in the study. Accessions are colored by assigned region of origin.
2.5 Load climate data and run PCA
clim_data = read.table("climate_data.csv", sep = ",", header = T, stringsAsFactors = F)
clim_data$ID = as.character(clim_data$ID)
for (j in 1:nrow(asso_mean)) {
asso_mean[j, "swc_grow"] = clim_data$swc_grow[which(clim_data$ID == asso_mean$ID[j])]
asso_mean[j, "temp_grow"] = clim_data$temp_grow[which(clim_data$ID == asso_mean$ID[j])]
asso_mean[j, "rain_grow"] = clim_data$rain_grow[which(clim_data$ID == asso_mean$ID[j])]
asso_mean[j, "rad_grow"] = clim_data$rad_grow[which(clim_data$ID == asso_mean$ID[j])]
asso_mean[j, "wind_grow"] = clim_data$wind_grow[which(clim_data$ID == asso_mean$ID[j])]
asso_mean[j, "log_Spring_Summer_ratio"] = clim_data$log_Spring_Summer_ratio[which(clim_data$ID ==
asso_mean$ID[j])]
asso_mean[j, "vapr_grow"] = clim_data$vapr_grow[which(clim_data$ID == asso_mean$ID[j])]
}
env_pca = prcomp(asso_mean[!is.na(asso_mean$log_Spring_Summer_ratio), c("temp_grow",
"vapr_grow", "rain_grow", "rad_grow", "wind_grow", "log_Spring_Summer_ratio",
"swc_grow")], center = T, scale. = T)
as.data.frame(env_pca$rotation) # display the loadings for each PC
PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | |
---|---|---|---|---|---|---|---|
temp_grow | 0.0503866 | -0.6879015 | 0.2906137 | -0.2880448 | 0.3007713 | -0.1771720 | -0.4847405 |
vapr_grow | -0.4459249 | -0.5055674 | -0.0012939 | -0.1314263 | 0.0163533 | 0.1104677 | 0.7181974 |
rain_grow | -0.3286333 | 0.2785345 | 0.7122476 | 0.2199407 | 0.0986594 | -0.4875595 | 0.1063021 |
rad_grow | 0.3938885 | 0.2797076 | 0.2762534 | -0.4136083 | 0.5806288 | 0.2958251 | 0.3075475 |
wind_grow | -0.3172682 | 0.2869423 | -0.1540063 | -0.8076173 | -0.2325291 | -0.2792502 | -0.0948204 |
log_Spring_Summer_ratio | -0.3664680 | 0.1233951 | -0.5066039 | 0.1673864 | 0.7123117 | -0.2231584 | -0.0928520 |
swc_grow | -0.5481845 | 0.1334659 | 0.2270771 | 0.0069079 | 0.0307877 | 0.7095068 | -0.3545717 |
Loadings for each climatic principal component
env_pca_var = env_pca$sdev^2/sum(env_pca$sdev^2)
barplot(env_pca_var, names.arg = as.character(1:7), ylab = "variance explained",
xlab = "principal component", cex.axis = 1.5, cex.lab = 1.4)
Figure S7: Barplot of amount of variance explained by each climatic principal component.
ggbiplot(env_pca, obs.scale = 1, choices = 1:2, groups = asso_mean[!is.na(asso_mean$log_Spring_Summer_ratio),
"origin"], var.axes = F, var.scale = 1, ellipse = F, circle = F) + scale_color_manual(values = man_cols) +
theme(text = element_text(size = 20))
Figure S8: Plot of first two climatic principal components. Each point is one accession and accessions are colored by region of origin.
ggbiplot(env_pca, obs.scale = 1, choices = c(2, 5), groups = asso_mean[!is.na(asso_mean$log_Spring_Summer_ratio),
"origin"], var.scale = 1, var.axes = F, ellipse = F, circle = F) + scale_color_manual(values = man_cols) +
theme(text = element_text(size = 20))
Figure S9: Plot of climatic principal components two and five, which significantly correlate with stomatal patterns. Each point is one accession and accessions are colored by region of origin.
env_pca_values = as.data.frame(env_pca$x, stringsAsFactors = F)
env_pca_values = cbind(env_pca_values, as.character(asso_mean[!is.na(asso_mean$log_Spring_Summer_ratio),
"ID"]), stringsAsFactors = F)
names(env_pca_values)[ncol(env_pca_values)] = "ID"
for (i in 1:nrow(env_pca_values)) {
asso_mean$env_pc1[asso_mean$ID == env_pca_values$ID[i]] = env_pca_values$PC1[i]
asso_mean$env_pc2[asso_mean$ID == env_pca_values$ID[i]] = env_pca_values$PC2[i]
asso_mean$env_pc3[asso_mean$ID == env_pca_values$ID[i]] = env_pca_values$PC3[i]
asso_mean$env_pc4[asso_mean$ID == env_pca_values$ID[i]] = env_pca_values$PC4[i]
asso_mean$env_pc5[asso_mean$ID == env_pca_values$ID[i]] = env_pca_values$PC5[i]
asso_mean$env_pc6[asso_mean$ID == env_pca_values$ID[i]] = env_pca_values$PC6[i]
asso_mean$env_pc7[asso_mean$ID == env_pca_values$ID[i]] = env_pca_values$PC7[i]
}
# remove duplicate samples from same population
asso_mean_dedup = asso_mean[!duplicated(cbind(asso_mean$Latitude, asso_mean$Longitude)),
]
2.6 Load delta 13C data and combine with stomata data
asso_block1 = asso[asso$block == 1, ]
c13 = read.table("deltaC13Full.csv", sep = ",", header = TRUE, stringsAsFactors = FALSE)
assoC13 = merge(asso_block1, c13, by.x = "genotype", by.y = "SampleDescription")
assoC13$origin = as.factor(assoC13$origin)
assoC13$genotype = as.character(assoC13$genotype)
for (j in 1:nrow(assoC13)) {
assoC13[j, "swc_grow"] = asso_mean$swc_grow[which(asso_mean$genotype ==
assoC13$genotype[j])]
assoC13[j, "vapr_grow"] = asso_mean$vapr_grow[which(asso_mean$genotype ==
assoC13$genotype[j])]
assoC13[j, "wind_grow"] = asso_mean$wind_grow[which(asso_mean$genotype ==
assoC13$genotype[j])]
assoC13[j, "temp_grow"] = asso_mean$temp_grow[which(asso_mean$genotype ==
assoC13$genotype[j])]
assoC13[j, "rad_grow"] = asso_mean$rad_grow[which(asso_mean$genotype ==
assoC13$genotype[j])]
assoC13[j, "rain_grow"] = asso_mean$rain_grow[which(asso_mean$genotype ==
assoC13$genotype[j])]
assoC13[j, "log_Spring_Summer_ratio"] = asso_mean$log_Spring_Summer_ratio[which(asso_mean$genotype ==
assoC13$genotype[j])]
assoC13[j, "env_pc1"] = asso_mean$env_pc1[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "env_pc2"] = asso_mean$env_pc2[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "env_pc3"] = asso_mean$env_pc3[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "env_pc4"] = asso_mean$env_pc4[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "env_pc5"] = asso_mean$env_pc5[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "env_pc6"] = asso_mean$env_pc6[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "env_pc7"] = asso_mean$env_pc7[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc1"] = asso_mean$pc1[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc2"] = asso_mean$pc2[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc3"] = asso_mean$pc3[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc4"] = asso_mean$pc4[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc5"] = asso_mean$pc5[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc6"] = asso_mean$pc6[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc7"] = asso_mean$pc7[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc8"] = asso_mean$pc8[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc9"] = asso_mean$pc9[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc10"] = asso_mean$pc10[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc11"] = asso_mean$pc11[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc12"] = asso_mean$pc12[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc13"] = asso_mean$pc13[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc14"] = asso_mean$pc14[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc15"] = asso_mean$pc15[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc16"] = asso_mean$pc16[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc17"] = asso_mean$pc17[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc18"] = asso_mean$pc18[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc19"] = asso_mean$pc19[which(asso_mean$genotype == assoC13$genotype[j])]
assoC13[j, "pc20"] = asso_mean$pc20[which(asso_mean$genotype == assoC13$genotype[j])]
}
# remove duplicate samples from the same population
assoC13_dedup = assoC13[!assoC13$genotype %in% asso_dups, ]
3 Analysis of raw data
3.1 Correlation of manually and automatically determined stomata densities
3.1.1 Control of stomata density accuracy in pre-experiment
pre_ctrl = read.table("pre_control.csv", sep = ",", header = T)
pre_ctrl$stomata_median = pre_ctrl$stomata_median/0.15
pre_ctrl$control = pre_ctrl$control/0.15
cor.test(pre_ctrl$stomata_median, pre_ctrl$control)$estimate^2
## cor
## 0.8444945
ggplot(data = pre_ctrl, aes(x = control, y = stomata_median)) + geom_point(pch = 20,
size = 5) + geom_smooth(method = "lm", col = "red") + xlab("Manual median stomata density [1/mm²]") +
ylab("Automatic median stomata density [1/mm²]") + theme_few() + theme(legend.position = "none",
text = element_text(size = 20))
Figure S3: Correlation plot of manual stomata counts and automatic stomata counts in a pre-experiment. Each point represents one leaf. The red line represents a linear model fit of the data and the gray shadows indicate the error of the fit.
3.1.2 Control of stomata density accuracy in main experiment
cor.test(asso$control, asso$stomata)
##
## Pearson's product-moment correlation
##
## data: asso$control and asso$stomata
## t = 10.16, df = 13, p-value = 1.495e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8316522 0.9810600
## sample estimates:
## cor
## 0.9424207
ggplot(data = asso, aes(x = control, y = stomata)) + geom_point(pch = 20, size = 5) +
geom_smooth(method = "lm", col = "red") + xlab("Manual median stomata density [1/mm²]") +
ylab("Automatic median stomata density [1/mm²]") + theme_few() + theme(legend.position = "none",
text = element_text(size = 20))
Figure S5: Correlation plot of manual stomata counts and automatic stomata counts in the main experiment. Each point represents one leaf. The red line represents a linear model fit of the data and the gray shadows indicate the error of the fit.
3.1.3 Number of analyzed images per sample
ggplot(data = asso, aes(x = Images, fill = origin)) + geom_bar() + scale_fill_manual(values = man_cols,
name = "region") + theme_few() + theme(text = element_text(size = 20)) +
xlab("Number of images per sample")
Figure S2: The plot shows the distribution of the number of succesfully analyzed images per sample.
3.1.4 Correlation of stomata density and number of analyzed images
cor.test(asso$stomatasize, asso$Images)
##
## Pearson's product-moment correlation
##
## data: asso$stomatasize and asso$Images
## t = 0.52575, df = 868, p-value = 0.5992
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.04868113 0.08420820
## sample estimates:
## cor
## 0.01784234
ggplot(data = asso, aes(x = Images, y = stomata)) + geom_point(aes(col = origin),
size = 4, pch = 20) + geom_smooth(method = "lm", col = "red") + scale_color_manual(values = man_cols,
name = "region") + theme_few() + theme(text = element_text(size = 20)) +
xlab("Number of images") + ylab("Stomata densitiy [1/mm²]")
Figure S5: Correlation plot of the number of analyzed images versus stomata density.
3.1.5 Correlation of stomata density and number of analyzed images
cor.test(asso$stomata, asso$Images)
##
## Pearson's product-moment correlation
##
## data: asso$stomata and asso$Images
## t = 6.3593, df = 868, p-value = 3.274e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1465786 0.2736175
## sample estimates:
## cor
## 0.2109888
ggplot(data = asso, aes(x = Images, y = stomatasize)) + geom_point(aes(col = origin),
size = 4, pch = 20) + geom_smooth(method = "lm", col = "red") + scale_color_manual(values = man_cols,
name = "region") + theme_few() + theme(text = element_text(size = 20)) +
xlab("Number of images") + ylab("Stomata size [µm²]")
Correlation plot of the number of analyzed images versus stomata size.
3.2 Correlation of stomata density/size and leafsize
cor.test(asso$stomata, asso$leafsize)
##
## Pearson's product-moment correlation
##
## data: asso$stomata and asso$leafsize
## t = -0.072377, df = 867, p-value = 0.9423
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06895075 0.06405642
## sample estimates:
## cor
## -0.002458037
ggplot(data = asso, aes(x = leafsize, y = stomata, col = origin)) + geom_point(size = 4,
pch = 20) + geom_smooth(method = "lm", col = "red") + theme_few() + theme(text = element_text(size = 20)) +
ylab(expression("stomata density [1/mm²]")) + xlab("Leaf size [mm²]") +
scale_color_manual(values = man_cols, name = "region")
Figure S12: Correlation plot of stomata density and leaf size
cor.test(asso$stomatasize, asso$leafsize)
##
## Pearson's product-moment correlation
##
## data: asso$stomatasize and asso$leafsize
## t = -1.5983, df = 867, p-value = 0.1103
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.12027327 0.01234565
## sample estimates:
## cor
## -0.05420283
ggplot(data = asso, aes(x = leafsize, y = stomatasize, col = origin)) + geom_point(size = 4,
pch = 20) + geom_smooth(method = "lm", col = "red") + theme_few() + theme(text = element_text(size = 20)) +
ylab(expression("stomata size [µm²]")) + xlab("Leaf size [mm²]") + scale_color_manual(values = man_cols,
name = "region")
Figure S13: Correlation plot of stomata size and leaf size
3.3 Estimation of broad-sense heritability
3.3.1 Stomata density
h2.model_SD <- lme(fixed = stomata ~ Images + block, random = ~1 | genotype,
data = asso_dedup, na.action = na.exclude)
sigma2.g_SD <- as.numeric(VarCorr(h2.model_SD)[1])
sigma2.e_SD <- as.numeric(VarCorr(h2.model_SD)[2])
h2_SD <- sigma2.g_SD/(sigma2.g_SD + sigma2.e_SD)
h2_SD # Broad sense heritability
## [1] 0.3007983
3.3.2 Stomata size
h2.model_SS <- lme(fixed = stomatasize ~ block, random = ~1 | genotype, data = asso_dedup,
na.action = na.exclude)
sigma2.g_SS <- as.numeric(VarCorr(h2.model_SS)[1])
sigma2.e_SS <- as.numeric(VarCorr(h2.model_SS)[2])
h2_SS <- sigma2.g_SS/(sigma2.g_SS + sigma2.e_SS)
h2_SS # Broad sense heritability
## [1] 0.4095585
3.4 Correlation of stomata density and size
cor.test(asso_mean$stomata, asso_mean$stomatasize)
##
## Pearson's product-moment correlation
##
## data: asso_mean$stomata and asso_mean$stomatasize
## t = -10.732, df = 328, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5855405 -0.4252472
## sample estimates:
## cor
## -0.5098052
ggplot(data = asso_mean, aes(x = stomata, y = stomatasize, col = origin)) +
geom_point(size = 4, pch = 20) + geom_smooth(method = "lm", col = "red") +
theme_few() + theme(text = element_text(size = 20)) + ylab("mean stomata size [µm²]") +
xlab(expression("mean stomata density [1/mm²]")) + scale_color_manual(values = man_cols,
name = "region") + xlim(85, 210)
Figure 1: Correlation plot of mean stomata density and mean stomata size. Each point represents the mean for one accession. Accessions are colored by region of origin. The red line represents a linear model fit of the data and the gray shadows indicate the error of the fit.
cor.test(assoC13$X13C, assoC13$stomatasize)
##
## Pearson's product-moment correlation
##
## data: assoC13$X13C and assoC13$stomatasize
## t = -2.9124, df = 259, p-value = 0.0039
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.29315567 -0.05790624
## sample estimates:
## cor
## -0.1780742
ggplot(data = assoC13, aes(x = stomatasize, y = X13C, col = origin)) + geom_point(size = 4,
shape = 20) + geom_smooth(method = "lm", colour = "red") + theme_few() +
theme(text = element_text(size = 20)) + xlab("stomata size [µm²]") + ylab(delta ~
"13C") + scale_color_manual(values = man_cols, name = "region")
Figure 2: Correlation plot of delta 13C and stomata size. Each point represents one individual. Accessions are colored by region of origin. The red line represents a linear model fit of the data and the gray shadows indicate the error of the fit.
cor.test(assoC13[assoC13$stomatasize < 140, ]$X13C, assoC13[assoC13$stomatasize <
140, ]$stomatasize) #remove outlier stomata size outlier
##
## Pearson's product-moment correlation
##
## data: assoC13[assoC13$stomatasize < 140, ]$X13C and assoC13[assoC13$stomatasize < 140, ]$stomatasize
## t = -2.6213, df = 258, p-value = 0.00928
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.27728441 -0.04019783
## sample estimates:
## cor
## -0.1610639
cor.test(assoC13$X13C, assoC13$stomata)
##
## Pearson's product-moment correlation
##
## data: assoC13$X13C and assoC13$stomata
## t = -0.12569, df = 259, p-value = 0.9001
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1291075 0.1137179
## sample estimates:
## cor
## -0.007809918
ggplot(data = assoC13, aes(x = stomata, y = X13C, col = origin)) + geom_point(size = 4,
shape = 20) + geom_smooth(method = "lm", colour = "red") + theme_few() +
theme(text = element_text(size = 20)) + xlab("stomata density [1/mm²]") +
ylab(delta ~ "13C") + scale_color_manual(values = man_cols, name = "region")
Figure S14: Correlation plot of delta 13C and stomata density. Each point represents one individual. Accessions are colored by region of origin. The red line represents a linear model fit of the data and the gray shadows indicate the error of the fit.
4 Analysis of regional differentiation
4.1 Regional differentiation of stomata density
asso_dedup$origin = as.factor(asso_dedup$origin)
SD_glm1 = glm(log(stomata) ~ block + Images + leafsize + tray_position + origin,
na.action = na.exclude, data = asso_dedup, family = "quasipoisson")
summary(SD_glm1)
##
## Call:
## glm(formula = log(stomata) ~ block + Images + leafsize + tray_position +
## origin, family = "quasipoisson", data = asso_dedup, na.action = na.exclude)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.34691 -0.05662 0.00296 0.05797 0.22384
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.545e+00 6.320e-03 244.371 < 2e-16 ***
## block2 1.741e-02 3.488e-03 4.992 7.45e-07 ***
## block3 4.054e-02 3.517e-03 11.526 < 2e-16 ***
## Images 1.198e-03 2.264e-04 5.290 1.61e-07 ***
## leafsize -1.237e-05 1.851e-05 -0.668 0.50418
## tray_positioncorner -5.188e-03 5.058e-03 -1.026 0.30532
## tray_positionedge 8.739e-04 3.458e-03 0.253 0.80054
## originnorth_sweden 1.467e-02 6.121e-03 2.397 0.01677 *
## originsouth_sweden 2.865e-02 4.547e-03 6.302 5.05e-10 ***
## originspain 6.943e-03 4.380e-03 1.585 0.11337
## originwestern_europe 2.313e-02 7.297e-03 3.170 0.00159 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.007150041)
##
## Null deviance: 7.1809 on 751 degrees of freedom
## Residual deviance: 5.3181 on 741 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 3
Anova(SD_glm1)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
block | 135.0823584 | 2 | 0.0000000 |
Images | 28.0045371 | 1 | 0.0000001 |
leafsize | 0.4467909 | 1 | 0.5038629 |
tray_position | 1.9455341 | 2 | 0.3780355 |
origin | 61.5142836 | 4 | 0.0000000 |
SD_glm2 = glm(log(stomata) ~ block + origin + Images, na.action = na.exclude,
data = asso_dedup, family = "quasipoisson")
summary(SD_glm2)
##
## Call:
## glm(formula = log(stomata) ~ block + origin + Images, family = "quasipoisson",
## data = asso_dedup, na.action = na.exclude)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.34547 -0.05606 0.00293 0.05893 0.22752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.5435754 0.0057770 267.194 < 2e-16 ***
## block2 0.0179867 0.0034509 5.212 2.42e-07 ***
## block3 0.0409786 0.0034920 11.735 < 2e-16 ***
## originnorth_sweden 0.0149854 0.0060933 2.459 0.01415 *
## originsouth_sweden 0.0289306 0.0045046 6.422 2.39e-10 ***
## originspain 0.0071414 0.0043373 1.647 0.10008
## originwestern_europe 0.0231599 0.0072787 3.182 0.00152 **
## Images 0.0011101 0.0002161 5.137 3.56e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.007141617)
##
## Null deviance: 7.1812 on 752 degrees of freedom
## Residual deviance: 5.3405 on 745 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 3
Anova(SD_glm2)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
block | 139.32143 | 2 | 0e+00 |
origin | 62.72020 | 4 | 0e+00 |
Images | 26.41022 | 1 | 3e-07 |
SD_glm2_glht = glht(SD_glm2, mcp(origin = "Tukey"))
tidy(summary(SD_glm2_glht))
lhs | rhs | estimate | std.error | statistic | p.value |
---|---|---|---|---|---|
north_sweden - central_europe | 0 | 0.0149854 | 0.0060933 | 2.4593275 | 0.0920650 |
south_sweden - central_europe | 0 | 0.0289306 | 0.0045046 | 6.4224261 | 0.0000000 |
spain - central_europe | 0 | 0.0071414 | 0.0043373 | 1.6465086 | 0.4481333 |
western_europe - central_europe | 0 | 0.0231599 | 0.0072787 | 3.1818703 | 0.0113175 |
south_sweden - north_sweden | 0 | 0.0139452 | 0.0053839 | 2.5901437 | 0.0659348 |
spain - north_sweden | 0 | -0.0078440 | 0.0052448 | -1.4955720 | 0.5463685 |
western_europe - north_sweden | 0 | 0.0081745 | 0.0078518 | 1.0410986 | 0.8253440 |
spain - south_sweden | 0 | -0.0217891 | 0.0032668 | -6.6699111 | 0.0000000 |
western_europe - south_sweden | 0 | -0.0057707 | 0.0066965 | -0.8617439 | 0.9041427 |
western_europe - spain | 0 | 0.0160185 | 0.0065851 | 2.4325142 | 0.0984215 |
SD_sig = cld(SD_glm2_glht)$mcletters$Letters
ggplot(data = asso_mean_dedup, aes(x = origin, y = stomata)) + geom_boxplot(fill = alpha("black",
0.25)) + theme_few() + theme(legend.position = "none", text = element_text(size = 20)) +
geom_text(data = data.frame(), aes(x = names(SD_sig), y = max(asso_mean$stomata) +
0.02 * max(asso_mean$stomata), label = SD_sig), size = 6) + ylab("mean stomata density [1/mm²]") +
xlab("region of origin")
Figure S24: Boxplot of mean stomata density per accession grouped by region of origin. Significance of differentiation was tested using a generalized linear model followed by a post-hoc test. Significance is indicated by letters on top: Groups that do not share a common letter are significantly different.
4.2 Regional differentiation of stomata size
SS_glm1 = glm(data = asso_dedup, stomatasize ~ block + leafsize + origin + tray_position,
family = quasipoisson, na.action = na.exclude)
summary(SS_glm1)
##
## Call:
## glm(formula = stomatasize ~ block + leafsize + origin + tray_position,
## family = quasipoisson, data = asso_dedup, na.action = na.exclude)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9538 -0.5667 -0.0483 0.4998 4.2292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.748e+00 1.025e-02 463.169 < 2e-16 ***
## block2 -4.471e-03 6.635e-03 -0.674 0.50058
## block3 1.245e-02 6.615e-03 1.882 0.06021 .
## leafsize -8.925e-05 3.451e-05 -2.586 0.00989 **
## originnorth_sweden -3.023e-02 1.173e-02 -2.576 0.01018 *
## originsouth_sweden -5.371e-02 8.709e-03 -6.166 1.15e-09 ***
## originspain -9.872e-03 8.302e-03 -1.189 0.23476
## originwestern_europe -2.022e-02 1.400e-02 -1.445 0.14899
## tray_positioncorner 2.026e-04 9.736e-03 0.021 0.98340
## tray_positionedge -4.416e-03 6.644e-03 -0.665 0.50648
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.5961454)
##
## Null deviance: 483.2 on 751 degrees of freedom
## Residual deviance: 437.9 on 742 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 3
Anova(SS_glm1)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
block | 7.3556332 | 2 | 0.0252781 |
leafsize | 6.7142147 | 1 | 0.0095647 |
origin | 60.8738295 | 4 | 0.0000000 |
tray_position | 0.6365886 | 2 | 0.7273887 |
SS_glm2 = glm(data = asso_dedup, stomatasize ~ block + origin, family = quasipoisson,
na.action = na.exclude)
summary(SS_glm2)
##
## Call:
## glm(formula = stomatasize ~ block + origin, family = quasipoisson,
## data = asso_dedup, na.action = na.exclude)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2639 -0.5652 -0.0307 0.5135 4.0875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.731104 0.008166 579.369 < 2e-16 ***
## block2 -0.002417 0.006620 -0.365 0.71510
## block3 0.013425 0.006627 2.026 0.04315 *
## originnorth_sweden -0.031256 0.011727 -2.665 0.00786 **
## originsouth_sweden -0.054031 0.008659 -6.240 7.33e-10 ***
## originspain -0.011258 0.008250 -1.365 0.17277
## originwestern_europe -0.018026 0.014014 -1.286 0.19872
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.6001391)
##
## Null deviance: 483.68 on 752 degrees of freedom
## Residual deviance: 443.78 on 746 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 3
Anova(SS_glm2)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
block | 6.901211 | 2 | 0.0317264 |
origin | 59.821590 | 4 | 0.0000000 |
SS_glm2_glht = glht(SS_glm2, mcp(origin = "Tukey"))
tidy(summary(SS_glm2_glht))
lhs | rhs | estimate | std.error | statistic | p.value |
---|---|---|---|---|---|
north_sweden - central_europe | 0 | -0.0312557 | 0.0117273 | -2.6651983 | 0.0542710 |
south_sweden - central_europe | 0 | -0.0540315 | 0.0086588 | -6.2400743 | 0.0000000 |
spain - central_europe | 0 | -0.0112585 | 0.0082500 | -1.3646589 | 0.6329269 |
western_europe - central_europe | 0 | -0.0180262 | 0.0140135 | -1.2863455 | 0.6835078 |
south_sweden - north_sweden | 0 | -0.0227758 | 0.0104948 | -2.1701895 | 0.1781180 |
spain - north_sweden | 0 | 0.0199972 | 0.0101598 | 1.9682603 | 0.2651852 |
western_europe - north_sweden | 0 | 0.0132295 | 0.0152159 | 0.8694487 | 0.9012953 |
spain - south_sweden | 0 | 0.0427730 | 0.0063755 | 6.7089214 | 0.0000000 |
western_europe - south_sweden | 0 | 0.0360053 | 0.0130011 | 2.7694085 | 0.0406428 |
western_europe - spain | 0 | -0.0067677 | 0.0127321 | -0.5315518 | 0.9827474 |
SS_sig = cld(SS_glm2_glht)$mcletters$Letters
ggplot(data = asso_mean_dedup, aes(x = origin, y = stomatasize)) + geom_boxplot(fill = alpha("black",
0.25)) + theme_few() + theme(legend.position = "none", text = element_text(size = 20)) +
geom_text(data = data.frame(), aes(x = names(SS_sig), y = max(asso_mean$stomatasize) +
0.02 * max(asso_mean$stomatasize), label = SS_sig), size = 6) + xlab("region of origin") +
ylab("mean stomata size [µm²]")
Figure 4: Boxplot of mean stomata size per accession grouped by region of origin. Significance of differentiation was tested using a generalized linear model followed by a post-hoc test. Significance is indicated by letters on top: Groups that do not share a common letter are significantly different.
4.3 Regional differentiation of delta 13C
glmC13 = glm(assoC13_dedup$X13C ~ assoC13_dedup$origin + assoC13_dedup$tray_position)
summary(glmC13)
##
## Call:
## glm(formula = assoC13_dedup$X13C ~ assoC13_dedup$origin + assoC13_dedup$tray_position)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.5101 -0.7112 -0.0954 0.7000 4.4011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -36.55651 0.23310 -156.827 < 2e-16
## assoC13_dedup$originnorth_sweden 1.76533 0.31214 5.656 4.80e-08
## assoC13_dedup$originsouth_sweden 1.45274 0.24467 5.937 1.12e-08
## assoC13_dedup$originspain 0.86983 0.23428 3.713 0.00026
## assoC13_dedup$originwestern_europe 0.51340 0.36183 1.419 0.15734
## assoC13_dedup$tray_positioncorner -0.03699 0.23680 -0.156 0.87603
## assoC13_dedup$tray_positionedge -0.06584 0.18199 -0.362 0.71785
##
## (Intercept) ***
## assoC13_dedup$originnorth_sweden ***
## assoC13_dedup$originsouth_sweden ***
## assoC13_dedup$originspain ***
## assoC13_dedup$originwestern_europe
## assoC13_dedup$tray_positioncorner
## assoC13_dedup$tray_positionedge
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.134762)
##
## Null deviance: 307.06 on 226 degrees of freedom
## Residual deviance: 249.65 on 220 degrees of freedom
## AIC: 681.79
##
## Number of Fisher Scoring iterations: 2
Anova(glmC13, type = 2)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
assoC13_dedup$origin | 49.3433299 | 4 | 0.0000000 |
assoC13_dedup$tray_position | 0.1336696 | 2 | 0.9353497 |
glmC13_2 = glm(X13C ~ origin, data = assoC13_dedup)
summary(glmC13_2)
##
## Call:
## glm(formula = X13C ~ origin, data = assoC13_dedup)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.5230 -0.7146 -0.0759 0.6854 4.3882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -36.5937 0.2041 -179.254 < 2e-16 ***
## originnorth_sweden 1.7520 0.3086 5.676 4.27e-08 ***
## originsouth_sweden 1.4370 0.2398 5.992 8.31e-09 ***
## originspain 0.8592 0.2313 3.714 0.000258 ***
## originwestern_europe 0.5240 0.3581 1.463 0.144791
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.125223)
##
## Null deviance: 307.06 on 226 degrees of freedom
## Residual deviance: 249.80 on 222 degrees of freedom
## AIC: 677.92
##
## Number of Fisher Scoring iterations: 2
Anova(glmC13_2)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
origin | 50.88944 | 4 | 0 |
glmC13_2_glht = glht(glmC13_2, mcp(origin = "Tukey"))
tidy(summary(glmC13_2_glht))
lhs | rhs | estimate | std.error | statistic | p.value |
---|---|---|---|---|---|
north_sweden - central_europe | 0 | 1.7519598 | 0.3086372 | 5.676437 | 0.0000001 |
south_sweden - central_europe | 0 | 1.4370073 | 0.2398398 | 5.991531 | 0.0000000 |
spain - central_europe | 0 | 0.8592020 | 0.2313425 | 3.713982 | 0.0017477 |
western_europe - central_europe | 0 | 0.5240093 | 0.3580929 | 1.463333 | 0.5693773 |
south_sweden - north_sweden | 0 | -0.3149525 | 0.2634962 | -1.195283 | 0.7409798 |
spain - north_sweden | 0 | -0.8927579 | 0.2557860 | -3.490253 | 0.0040076 |
western_europe - north_sweden | 0 | -1.2279506 | 0.3743496 | -3.280224 | 0.0083911 |
spain - south_sweden | 0 | -0.5778054 | 0.1664111 | -3.472156 | 0.0042574 |
western_europe - south_sweden | 0 | -0.9129981 | 0.3200059 | -2.853066 | 0.0323833 |
western_europe - spain | 0 | -0.3351927 | 0.3136878 | -1.068555 | 0.8122240 |
C13_sig = cld(glmC13_2_glht)$mcletters$Letters
ggplot(data = assoC13_dedup, aes(x = origin, y = X13C)) + geom_boxplot(fill = "black",
alpha = 0.25) + theme_few() + theme(legend.position = "none", text = element_text(size = 20)) +
xlab("region of origin") + ylab(delta ~ "13C") + geom_text(data = data.frame(),
aes(x = names(C13_sig), y = max(assoC13$X13C) - 0.02 * max(assoC13$X13C),
label = C13_sig), size = 6)
Figure 4: Boxplof delta 13C grouped by region of origin. Significance of differentiation was tested using a generalized linear model followed by a post-hoc test. Significance is indicated by letters on top: Groups that do not share a common letter are significantly different.
5 Analysis of GWAS results
5.1 QTL on Chr2
chr2_min = scan("gwas_chr2_lines.txt")
load("W224_.rda")
chr2_min = as.character(chr2_min)
c13$ID = NA
c13$origin = NA
c13$stomata = NA
c13$stomatasize = NA
c13$env_pc1 = NA
c13$env_pc2 = NA
c13$env_pc3 = NA
c13$env_pc4 = NA
c13$env_pc5 = NA
c13$env_pc6 = NA
c13$env_pc7 = NA
asso_mean$origin = as.character(asso_mean$origin)
asso_mean$genotype = as.character(asso_mean$genotype)
asso_mean$ID = as.character(asso_mean$ID)
for (i in 1:nrow(c13)) {
if (c13$SampleDescription[i] %in% asso_mean$genotype) {
c13$ID[i] = asso_mean$ID[asso_mean$genotype == c13$SampleDescription[i]]
c13$origin[i] = asso_mean$origin[asso_mean$genotype == c13$SampleDescription[i]]
c13$stomata[i] = asso_mean$stomata[asso_mean$genotype == c13$SampleDescription[i]]
c13$stomatasize[i] = asso_mean$stomatasize[asso_mean$genotype == c13$SampleDescription[i]]
c13$env_pc1[i] = asso_mean$env_pc1[asso_mean$genotype == c13$SampleDescription[i]]
c13$env_pc2[i] = asso_mean$env_pc2[asso_mean$genotype == c13$SampleDescription[i]]
c13$env_pc3[i] = asso_mean$env_pc3[asso_mean$genotype == c13$SampleDescription[i]]
c13$env_pc4[i] = asso_mean$env_pc4[asso_mean$genotype == c13$SampleDescription[i]]
c13$env_pc5[i] = asso_mean$env_pc5[asso_mean$genotype == c13$SampleDescription[i]]
c13$env_pc6[i] = asso_mean$env_pc6[asso_mean$genotype == c13$SampleDescription[i]]
c13$env_pc7[i] = asso_mean$env_pc7[asso_mean$genotype == c13$SampleDescription[i]]
} else {
print(c13$SampleDescription[i])
}
}
## [1] "372"
## [1] "373"
## [1] "374"
## [1] "375"
## [1] "376"
## [1] "463"
## [1] "471"
## [1] "491"
## [1] "495"
## [1] "509"
## [1] "512"
## [1] "517"
## [1] "524"
## [1] "526"
## [1] "527"
## [1] "528"
## [1] "529"
## [1] "530"
## [1] "550"
## [1] "555"
## [1] "556"
## [1] "577"
## [1] "619"
## [1] "629"
## [1] "638"
## [1] "639"
## [1] "657"
## [1] "711"
## [1] "712"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "acetanilide"
## [1] "actanilide"
## [1] "Limestone"
## [1] "limestone TS"
## [1] "LSVEC"
## [1] "LSVEC"
c13 = c13[!is.na(c13$ID), ]
c13_min = c13[c13$ID %in% chr2_min, ]
c13_maj = c13[!c13$ID %in% chr2_min, ]
c13$allele = NA
c13$allele[c13$ID %in% chr2_min] = 0
c13$allele[!c13$ID %in% chr2_min] = 1
c13$allele4 = NA
c13$allele4[c13$ID %in% W_0$ID] = 0
c13$allele4[!c13$ID %in% W_0$ID] = 1
c13_min = c13[c13$ID %in% chr2_min, ]
c13_maj = c13[!c13$ID %in% chr2_min, ]
boxplot(c13_min$X13C[c13_min$origin == "south_sweden"], c13_maj$X13C[c13_maj$origin ==
"south_sweden"], xlab = "Allele", ylab = "delta 13C", names = c("Minor",
"Major"), cex.lab = 1.5, cex.axis = 1.5)
Figure S17: Boxplot of delta 13C in Southern Swedish accession carrying either the major or minor allele of the QTL on Chr 2. Significance is tested below.
wilcox.test(c13_min$X13C[c13_min$origin == "south_sweden"], c13_maj$X13C[c13_maj$origin ==
"south_sweden"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: c13_min$X13C[c13_min$origin == "south_sweden"] and c13_maj$X13C[c13_maj$origin == "south_sweden"]
## W = 1868, p-value = 6.569e-05
## alternative hypothesis: true location shift is not equal to 0
boxplot(c13_min$X13C[c13_min$origin == "south_sweden" | c13_min$origin == "north_sweden"],
c13_maj$X13C[c13_maj$origin == "south_sweden" | c13_maj$origin == "north_sweden"],
xlab = "Allele", ylab = "delta 13C", names = c("Minor", "Major"), cex.lab = 1.5,
cex.axis = 1.5)
Boxplot of delta 13C in all Swedish accession carrying either the major or minor allele of the QTL on Chr 2. Significance is tested below.
wilcox.test(c13_min$X13C[c13_min$origin == "south_sweden" | c13_min$origin ==
"north_sweden"], c13_maj$X13C[c13_maj$origin == "south_sweden" | c13_maj$origin ==
"north_sweden"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: c13_min$X13C[c13_min$origin == "south_sweden" | c13_min$origin == and c13_maj$X13C[c13_maj$origin == "south_sweden" | c13_maj$origin == "north_sweden"] and "north_sweden"]
## W = 2574, p-value = 9.716e-05
## alternative hypothesis: true location shift is not equal to 0
boxplot(c13_min$stomatasize[c13_min$origin == "south_sweden" | c13_min$origin ==
"north_sweden"], c13_maj$stomatasize[c13_maj$origin == "south_sweden" |
c13_maj$origin == "north_sweden"], xlab = "Allele", ylab = "stomatasize",
names = c("Minor", "Major"), cex.lab = 1.5, cex.axis = 1.5)
Boxplot of stomatsize in all Swedish accessions carrying either the major or minor allele of the QTL on Chr 2. Significance is tested below.
wilcox.test(c13_min$stomatasize[c13_min$origin == "south_sweden" | c13_min$origin ==
"north_sweden"], c13_maj$stomatasize[c13_maj$origin == "south_sweden" |
c13_maj$origin == "north_sweden"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: c13_min$stomatasize[c13_min$origin == "south_sweden" | c13_min$origin == and c13_maj$stomatasize[c13_maj$origin == "south_sweden" | c13_maj$origin == "north_sweden"] and "north_sweden"]
## W = 1606, p-value = 0.4812
## alternative hypothesis: true location shift is not equal to 0
5.2 QTL on Chr4
assoC13_224 = assoC13[assoC13$ID %in% W_0$ID, ] # create two new tables including individuals of each allele
assoC13_37 = assoC13[!(assoC13$ID %in% W_0$ID), ]
cor_224 = cor.test(assoC13_224$X13C, assoC13_224$stomatasize) # test if correlation with stomata size changes
cor_37 = cor.test(assoC13_37$X13C, assoC13_37$stomatasize)
boxplot(assoC13_37$X13C[assoC13_37$origin == "south_sweden"], assoC13_224$X13C[assoC13_224$origin ==
"south_sweden"], xlab = "Allele", ylab = "delta 13C", names = c("Minor",
"Major"), cex.lab = 1.5, cex.axis = 1.5)
Figure S20: Boxplot of delta 13C in Southern Swedish accession carrying either the major or minor allele of the significantly associating SNP from GWAS analysis. Significance is tested below.
mean(assoC13_37$X13C[assoC13_37$origin == "south_sweden"]) - mean(assoC13_224$X13C[assoC13_224$origin ==
"south_sweden"])
## [1] 1.342374
wilcox.test(assoC13_37$X13C[assoC13_37$origin == "south_sweden"], assoC13_224$X13C[assoC13_224$origin ==
"south_sweden"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: assoC13_37$X13C[assoC13_37$origin == "south_sweden"] and assoC13_224$X13C[assoC13_224$origin == "south_sweden"]
## W = 1707, p-value = 1.15e-06
## alternative hypothesis: true location shift is not equal to 0
boxplot(assoC13_37$stomatasize[assoC13_37$origin == "south_sweden"], assoC13_224$stomatasize[assoC13_224$origin ==
"south_sweden"], xlab = "Allele", ylab = "stomata size", names = c("Minor",
"Major"), cex.lab = 1.5, cex.axis = 1.5)
Boxplot of stomata size in Southern Swedish accession carrying either the major or minor allele of the significantly associating SNP from GWAS analysis. Significance is tested below.
wilcox.test(assoC13_37$stomatasize[assoC13_37$origin == "south_sweden"], assoC13_224$stomatasize[assoC13_224$origin ==
"south_sweden"])
##
## Wilcoxon rank sum test with continuity correction
##
## data: assoC13_37$stomatasize[assoC13_37$origin == "south_sweden"] and assoC13_224$stomatasize[assoC13_224$origin == "south_sweden"]
## W = 1227, p-value = 0.2262
## alternative hypothesis: true location shift is not equal to 0
6 Analysis of correlations between phenotypes and climatic PCs
6.1 Stomata size
envpop_SS_glm = glm(data = asso_mean_dedup, stomatasize ~ env_pc1 + env_pc2 +
env_pc3 + env_pc4 + env_pc5 + env_pc6 + env_pc7 + pc1 + pc2 + pc3 + pc4 +
pc5 + pc6 + pc7 + pc8 + pc9 + pc10 + pc11 + pc12 + pc13 + pc14 + pc15 +
pc16 + pc17 + pc18 + pc19 + pc20, family = "quasipoisson")
summary(envpop_SS_glm)
##
## Call:
## glm(formula = stomatasize ~ env_pc1 + env_pc2 + env_pc3 + env_pc4 +
## env_pc5 + env_pc6 + env_pc7 + pc1 + pc2 + pc3 + pc4 + pc5 +
## pc6 + pc7 + pc8 + pc9 + pc10 + pc11 + pc12 + pc13 + pc14 +
## pc15 + pc16 + pc17 + pc18 + pc19 + pc20, family = "quasipoisson",
## data = asso_mean_dedup)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.24515 -0.43165 -0.07419 0.37999 1.67128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.709e+00 3.701e-03 1272.232 < 2e-16 ***
## env_pc1 -7.928e-03 5.027e-03 -1.577 0.11609
## env_pc2 -1.171e-02 3.837e-03 -3.053 0.00251 **
## env_pc3 -4.239e-03 5.285e-03 -0.802 0.42331
## env_pc4 -1.642e-03 5.404e-03 -0.304 0.76152
## env_pc5 -1.118e-02 5.518e-03 -2.026 0.04382 *
## env_pc6 5.632e-03 1.002e-02 0.562 0.57446
## env_pc7 1.722e-02 1.853e-02 0.929 0.35374
## pc1 2.537e-04 8.552e-05 2.966 0.00331 **
## pc2 4.905e-05 4.989e-05 0.983 0.32654
## pc3 1.311e-04 6.427e-05 2.040 0.04237 *
## pc4 -2.097e-04 9.922e-05 -2.113 0.03557 *
## pc5 4.545e-05 1.403e-04 0.324 0.74626
## pc6 1.986e-04 1.156e-04 1.718 0.08711 .
## pc7 -2.009e-04 1.185e-04 -1.696 0.09118 .
## pc8 -3.607e-04 1.141e-04 -3.161 0.00177 **
## pc9 -2.739e-04 1.483e-04 -1.848 0.06586 .
## pc10 -6.233e-05 1.096e-04 -0.569 0.57006
## pc11 7.156e-05 1.145e-04 0.625 0.53259
## pc12 -1.580e-04 1.210e-04 -1.306 0.19291
## pc13 -1.846e-04 1.282e-04 -1.440 0.15113
## pc14 -1.401e-04 1.368e-04 -1.025 0.30653
## pc15 7.948e-05 1.410e-04 0.564 0.57359
## pc16 -2.478e-04 1.615e-04 -1.534 0.12628
## pc17 -2.437e-04 1.333e-04 -1.828 0.06881 .
## pc18 -2.691e-04 1.354e-04 -1.988 0.04793 *
## pc19 1.405e-04 1.543e-04 0.911 0.36336
## pc20 9.410e-05 1.742e-04 0.540 0.58958
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.3706497)
##
## Null deviance: 117.372 on 275 degrees of freedom
## Residual deviance: 91.374 on 248 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 3
Anova(envpop_SS_glm)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
env_pc1 | 2.4909453 | 1 | 0.1145029 |
env_pc2 | 9.3346397 | 1 | 0.0022486 |
env_pc3 | 0.6437755 | 1 | 0.4223469 |
env_pc4 | 0.0922916 | 1 | 0.7612838 |
env_pc5 | 4.1037679 | 1 | 0.0427878 |
env_pc6 | 0.3163653 | 1 | 0.5738002 |
env_pc7 | 0.8633834 | 1 | 0.3527936 |
pc1 | 8.7991273 | 1 | 0.0030137 |
pc2 | 0.9639992 | 1 | 0.3261813 |
pc3 | 4.1536561 | 1 | 0.0415447 |
pc4 | 4.4659356 | 1 | 0.0345771 |
pc5 | 0.1044265 | 1 | 0.7465810 |
pc6 | 2.9508367 | 1 | 0.0858331 |
pc7 | 2.8751148 | 1 | 0.0899586 |
pc8 | 10.0038450 | 1 | 0.0015621 |
pc9 | 3.3941797 | 1 | 0.0654269 |
pc10 | 0.3237534 | 1 | 0.5693607 |
pc11 | 0.3906581 | 1 | 0.5319537 |
pc12 | 1.7038052 | 1 | 0.1917911 |
pc13 | 2.0702565 | 1 | 0.1501963 |
pc14 | 1.0503031 | 1 | 0.3054373 |
pc15 | 0.3176368 | 1 | 0.5730313 |
pc16 | 2.3469716 | 1 | 0.1255267 |
pc17 | 3.3494941 | 1 | 0.0672257 |
pc18 | 3.9450208 | 1 | 0.0470103 |
pc19 | 0.8308378 | 1 | 0.3620304 |
pc20 | 0.2918799 | 1 | 0.5890183 |
plot(effect("env_pc2", envpop_SS_glm, partial.residuals = T), xlab = "environmental principal component 2",
ylab = "mean stomata size [µm²]", main = "")
Figure 3a: Effect plot of generalized linear models of stomata size with climatic principal component 2 as predictors. Plots show the linear fit (red solid line) and the smoothed fit of partial residuals (black) of the specific predictor. Red shadows represent the errror of the fit. Grey dots are partial residuals.
plot(effect("env_pc5", envpop_SS_glm, partial.residuals = T), xlab = "environmental principal component 5",
ylab = "mean stomata size [µm²]", main = "")
Figure 3b: Effect plot of generalized linear models of stomata size with climatic principal component 5 as predictors. Plots show the linear fit (red solid line) and the smoothed fit of partial residuals (black) of the specific predictor. Red shadows represent the errror of the fit. Grey dots are partial residuals.
6.2 Stomata density
envpop_SD_glm = glm(data = asso_mean_dedup, log(stomata) ~ Images + env_pc1 +
env_pc2 + env_pc3 + env_pc4 + env_pc5 + env_pc6 + env_pc7 + pc1 + pc2 +
pc3 + pc4 + pc5 + pc6 + pc7 + pc8 + pc9 + pc10 + pc11 + pc12 + pc13 + pc14 +
pc15 + pc16 + pc17 + pc18 + pc19 + pc20, family = "quasipoisson")
summary(envpop_SD_glm)
##
## Call:
## glm(formula = log(stomata) ~ Images + env_pc1 + env_pc2 + env_pc3 +
## env_pc4 + env_pc5 + env_pc6 + env_pc7 + pc1 + pc2 + pc3 +
## pc4 + pc5 + pc6 + pc7 + pc8 + pc9 + pc10 + pc11 + pc12 +
## pc13 + pc14 + pc15 + pc16 + pc17 + pc18 + pc19 + pc20, family = "quasipoisson",
## data = asso_mean_dedup)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.200808 -0.036794 0.003788 0.038337 0.147132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.586e+00 7.764e-03 204.324 < 2e-16 ***
## Images 8.226e-04 3.931e-04 2.092 0.03743 *
## env_pc1 1.443e-03 2.458e-03 0.587 0.55761
## env_pc2 5.583e-03 1.884e-03 2.964 0.00333 **
## env_pc3 -7.238e-05 2.597e-03 -0.028 0.97779
## env_pc4 6.776e-04 2.658e-03 0.255 0.79899
## env_pc5 6.289e-03 2.712e-03 2.319 0.02119 *
## env_pc6 1.282e-03 4.930e-03 0.260 0.79499
## env_pc7 -2.294e-02 9.084e-03 -2.525 0.01218 *
## pc1 -1.049e-04 4.188e-05 -2.506 0.01286 *
## pc2 -7.019e-05 2.441e-05 -2.876 0.00438 **
## pc3 -5.740e-05 3.142e-05 -1.827 0.06896 .
## pc4 2.283e-05 4.855e-05 0.470 0.63860
## pc5 6.222e-05 6.845e-05 0.909 0.36425
## pc6 2.069e-06 5.621e-05 0.037 0.97067
## pc7 5.350e-05 5.776e-05 0.926 0.35525
## pc8 6.583e-05 5.608e-05 1.174 0.24162
## pc9 4.270e-05 7.260e-05 0.588 0.55698
## pc10 6.821e-05 5.315e-05 1.283 0.20059
## pc11 -1.156e-04 5.627e-05 -2.054 0.04104 *
## pc12 1.495e-04 5.932e-05 2.521 0.01235 *
## pc13 -1.168e-05 6.303e-05 -0.185 0.85314
## pc14 1.152e-04 6.639e-05 1.735 0.08392 .
## pc15 -8.849e-05 6.875e-05 -1.287 0.19923
## pc16 2.151e-04 7.838e-05 2.745 0.00650 **
## pc17 5.235e-05 6.571e-05 0.797 0.42639
## pc18 1.915e-04 6.606e-05 2.900 0.00407 **
## pc19 6.135e-05 7.556e-05 0.812 0.41759
## pc20 1.762e-05 8.366e-05 0.211 0.83337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.003930387)
##
## Null deviance: 1.32034 on 275 degrees of freedom
## Residual deviance: 0.97455 on 247 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 3
Anova(envpop_SD_glm)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
Images | 4.3800469 | 1 | 0.0363620 |
env_pc1 | 0.3446832 | 1 | 0.5571384 |
env_pc2 | 8.7788505 | 1 | 0.0030474 |
env_pc3 | 0.0007766 | 1 | 0.9777675 |
env_pc4 | 0.0649933 | 1 | 0.7987711 |
env_pc5 | 5.3803718 | 1 | 0.0203645 |
env_pc6 | 0.0676737 | 1 | 0.7947545 |
env_pc7 | 6.3760969 | 1 | 0.0115668 |
pc1 | 6.2790801 | 1 | 0.0122172 |
pc2 | 8.3007671 | 1 | 0.0039628 |
pc3 | 3.3399440 | 1 | 0.0676169 |
pc4 | 0.2211243 | 1 | 0.6381846 |
pc5 | 0.8207838 | 1 | 0.3649511 |
pc6 | 0.0013549 | 1 | 0.9706379 |
pc7 | 0.8578876 | 1 | 0.3543305 |
pc8 | 1.3774329 | 1 | 0.2405389 |
pc9 | 0.3462094 | 1 | 0.5562668 |
pc10 | 1.6453567 | 1 | 0.1995921 |
pc11 | 4.2166224 | 1 | 0.0400298 |
pc12 | 6.3560573 | 1 | 0.0116981 |
pc13 | 0.0343334 | 1 | 0.8529994 |
pc14 | 3.0105070 | 1 | 0.0827264 |
pc15 | 1.6564609 | 1 | 0.1980818 |
pc16 | 7.5500862 | 1 | 0.0060007 |
pc17 | 0.6343679 | 1 | 0.4257577 |
pc18 | 8.4192510 | 1 | 0.0037127 |
pc19 | 0.6598405 | 1 | 0.4166163 |
pc20 | 0.0443576 | 1 | 0.8331898 |
plot(effect("env_pc2", envpop_SD_glm, partial.residuals = T), xlab = "environmental principal component 2",
ylab = "log mean stomata density [1/mm²]", main = "")
Figure S22a: Effect plot of generalized linear models of stomata density with climatic principal component 2 as predictors. Plots show the linear fit (red solid line) and the smoothed fit of partial residuals (black) of the specific predictor. Red shadows represent the errror of the fit. Grey dots are partial residuals.
plot(effect("env_pc5", envpop_SD_glm, partial.residuals = T), xlab = "environmental principal component 5",
ylab = " log mean stomata density [1/mm²]", main = "")
Figure S22b: Effect plot of generalized linear models of stomata density with climatic principal component 5 as predictors. Plots show the linear fit (red solid line) and the smoothed fit of partial residuals (black) of the specific predictor. Red shadows represent the errror of the fit. Grey dots are partial residuals.
plot(effect("env_pc7", envpop_SD_glm, partial.residuals = T), xlab = "environmental principal component 7",
ylab = " log mean stomata density [1/mm²]", main = "")
Figure S22c: Effect plot of generalized linear models of stomata density with climatic principal component 7 as predictors. Plots show the linear fit (red solid line) and the smoothed fit of partial residuals (black) of the specific predictor. Red shadows represent the errror of the fit. Grey dots are partial residuals.
6.3 delta 13C
envpop_C13_glm = glm(data = assoC13_dedup, X13C ~ env_pc1 + env_pc2 + env_pc3 +
env_pc4 + env_pc5 + env_pc6 + env_pc7 + pc1 + pc2 + pc3 + pc4 + pc5 + pc6 +
pc7 + pc8 + pc9 + pc10 + pc11 + pc12 + pc13 + pc14 + pc15 + pc16 + pc17 +
pc18 + pc19 + pc20)
summary(envpop_C13_glm)
##
## Call:
## glm(formula = X13C ~ env_pc1 + env_pc2 + env_pc3 + env_pc4 +
## env_pc5 + env_pc6 + env_pc7 + pc1 + pc2 + pc3 + pc4 + pc5 +
## pc6 + pc7 + pc8 + pc9 + pc10 + pc11 + pc12 + pc13 + pc14 +
## pc15 + pc16 + pc17 + pc18 + pc19 + pc20, data = assoC13_dedup)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2204 -0.6398 -0.0675 0.6293 3.3855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.553e+01 1.076e-01 -330.137 < 2e-16 ***
## env_pc1 1.440e-01 1.001e-01 1.439 0.151807
## env_pc2 6.778e-02 7.238e-02 0.936 0.350219
## env_pc3 1.147e-01 1.068e-01 1.074 0.284107
## env_pc4 -7.257e-02 1.064e-01 -0.682 0.495920
## env_pc5 -2.220e-03 1.095e-01 -0.020 0.983849
## env_pc6 -2.757e-01 1.993e-01 -1.383 0.168158
## env_pc7 8.900e-02 3.767e-01 0.236 0.813471
## pc1 -5.697e-03 1.700e-03 -3.350 0.000974 ***
## pc2 2.726e-03 1.238e-03 2.202 0.028897 *
## pc3 4.837e-03 1.512e-03 3.199 0.001616 **
## pc4 6.274e-03 2.176e-03 2.883 0.004395 **
## pc5 4.962e-03 1.141e-02 0.435 0.664055
## pc6 -4.842e-03 3.498e-03 -1.384 0.167966
## pc7 3.259e-03 3.470e-03 0.939 0.348781
## pc8 -1.078e-04 2.237e-03 -0.048 0.961605
## pc9 3.847e-03 2.937e-03 1.310 0.191784
## pc10 2.099e-03 3.935e-03 0.533 0.594341
## pc11 -4.140e-03 2.344e-03 -1.767 0.078909 .
## pc12 3.691e-03 2.672e-03 1.381 0.168819
## pc13 3.102e-06 2.676e-03 0.001 0.999076
## pc14 -4.582e-04 2.620e-03 -0.175 0.861322
## pc15 1.304e-03 3.009e-03 0.433 0.665244
## pc16 -2.218e-03 3.623e-03 -0.612 0.541220
## pc17 6.278e-04 2.566e-03 0.245 0.806981
## pc18 2.832e-03 2.657e-03 1.066 0.287894
## pc19 -2.201e-03 3.417e-03 -0.644 0.520184
## pc20 -6.491e-03 3.409e-03 -1.904 0.058420 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.043425)
##
## Null deviance: 299.01 on 217 degrees of freedom
## Residual deviance: 198.25 on 190 degrees of freedom
## (9 observations deleted due to missingness)
## AIC: 655.96
##
## Number of Fisher Scoring iterations: 2
Anova(envpop_C13_glm)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
env_pc1 | 2.0706088 | 1 | 0.1501616 |
env_pc2 | 0.8769780 | 1 | 0.3490307 |
env_pc3 | 1.1538493 | 1 | 0.2827448 |
env_pc4 | 0.4654480 | 1 | 0.4950888 |
env_pc5 | 0.0004109 | 1 | 0.9838274 |
env_pc6 | 1.9138646 | 1 | 0.1665346 |
env_pc7 | 0.0558275 | 1 | 0.8132166 |
pc1 | 11.2247072 | 1 | 0.0008072 |
pc2 | 4.8471004 | 1 | 0.0276926 |
pc3 | 10.2332904 | 1 | 0.0013793 |
pc4 | 8.3108780 | 1 | 0.0039408 |
pc5 | 0.1892268 | 1 | 0.6635609 |
pc6 | 1.9155982 | 1 | 0.1663427 |
pc7 | 0.8822411 | 1 | 0.3475886 |
pc8 | 0.0023236 | 1 | 0.9615542 |
pc9 | 1.7160468 | 1 | 0.1902027 |
pc10 | 0.2845757 | 1 | 0.5937177 |
pc11 | 3.1207115 | 1 | 0.0773030 |
pc12 | 1.9079039 | 1 | 0.1671963 |
pc13 | 0.0000013 | 1 | 0.9990752 |
pc14 | 0.0306000 | 1 | 0.8611358 |
pc15 | 0.1878018 | 1 | 0.6647525 |
pc16 | 0.3746359 | 1 | 0.5404881 |
pc17 | 0.0598605 | 1 | 0.8067166 |
pc18 | 1.1357983 | 1 | 0.2865419 |
pc19 | 0.4150701 | 1 | 0.5194073 |
pc20 | 3.6252594 | 1 | 0.0569087 |
6.4 delta 13C without population structure correction
env_C13_glm = glm(data = assoC13_dedup, X13C ~ env_pc1 + env_pc2 + env_pc3 +
env_pc4 + env_pc5 + env_pc6 + env_pc7)
summary(env_C13_glm)
##
## Call:
## glm(formula = X13C ~ env_pc1 + env_pc2 + env_pc3 + env_pc4 +
## env_pc5 + env_pc6 + env_pc7, data = assoC13_dedup)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3263 -0.8325 -0.0285 0.6942 4.5681
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -35.57293 0.07830 -454.327 <2e-16 ***
## env_pc1 -0.04330 0.04533 -0.955 0.3406
## env_pc2 0.14646 0.05677 2.580 0.0106 *
## env_pc3 -0.12485 0.08233 -1.516 0.1309
## env_pc4 -0.13193 0.08995 -1.467 0.1440
## env_pc5 0.16005 0.09280 1.725 0.0861 .
## env_pc6 0.32391 0.16930 1.913 0.0571 .
## env_pc7 -0.04597 0.28254 -0.163 0.8709
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.314472)
##
## Null deviance: 299.01 on 217 degrees of freedom
## Residual deviance: 276.04 on 210 degrees of freedom
## (9 observations deleted due to missingness)
## AIC: 688.12
##
## Number of Fisher Scoring iterations: 2
Anova(env_C13_glm)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
env_pc1 | 0.9123717 | 1 | 0.3394859 |
env_pc2 | 6.6551096 | 1 | 0.0098872 |
env_pc3 | 2.2993860 | 1 | 0.1294251 |
env_pc4 | 2.1510232 | 1 | 0.1424749 |
env_pc5 | 2.9743846 | 1 | 0.0845923 |
env_pc6 | 3.6603121 | 1 | 0.0557232 |
env_pc7 | 0.0264722 | 1 | 0.8707524 |
plot(effect("env_pc2", env_C13_glm, partial.residuals = T), xlab = "environmental principal component 2",
ylab = bquote(delta^13 * "C"), main = "")
Figure S23a: Effect plot of generalized linear models of delta 13C with climatic principal component 2 as predictors. Plots show the linear fit (red solid line) and the smoothed fit of partial residuals (black) of the specific predictor. Red shadows represent the errror of the fit. Grey dots are partial residuals.
plot(effect("env_pc3", env_C13_glm, partial.residuals = T), xlab = "environmental principal component 3",
ylab = bquote(delta^13 * "C"), main = "")
Figure S23b: Effect plot of generalized linear models of delta 13C with climatic principal component 3 as predictors. Plots show the linear fit (red solid line) and the smoothed fit of partial residuals (black) of the specific predictor. Red shadows represent the errror of the fit. Grey dots are partial residuals.
plot(effect("env_pc4", env_C13_glm, partial.residuals = T), xlab = "environmental principal component 4",
ylab = bquote(delta^13 * "C"), main = "")
Figure S23c: Effect plot of generalized linear models of delta 13C with climatic principal component 4 as predictors. Plots show the linear fit (red solid line) and the smoothed fit of partial residuals (black) of the specific predictor. Red shadows represent the errror of the fit. Grey dots are partial residuals.
6.5 Analysis of correlation between QTL alleles and climate
6.5.1 QTL on Chr 2
allele_glm = glm(data = c13[c13$origin == "south_sweden", ], allele ~ env_pc1 +
env_pc1 + env_pc2 + env_pc3 + env_pc4 + env_pc5 + env_pc6 + env_pc7, family = "binomial")
summary(allele_glm)
##
## Call:
## glm(formula = allele ~ env_pc1 + env_pc1 + env_pc2 + env_pc3 +
## env_pc4 + env_pc5 + env_pc6 + env_pc7, family = "binomial",
## data = c13[c13$origin == "south_sweden", ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4476 0.3234 0.4207 0.6251 1.3423
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.2106 3.7406 0.858 0.391
## env_pc1 0.5039 3.0613 0.165 0.869
## env_pc2 0.6067 2.5546 0.238 0.812
## env_pc3 2.6962 2.5808 1.045 0.296
## env_pc4 -1.1675 2.5673 -0.455 0.649
## env_pc5 3.4645 3.8991 0.889 0.374
## env_pc6 1.4407 3.1501 0.457 0.647
## env_pc7 -0.9111 2.0046 -0.455 0.649
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.258 on 109 degrees of freedom
## Residual deviance: 92.573 on 102 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: 108.57
##
## Number of Fisher Scoring iterations: 5
Anova(allele_glm)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
env_pc1 | 0.0280200 | 1 | 0.8670618 |
env_pc2 | 0.0598972 | 1 | 0.8066585 |
env_pc3 | 1.2642986 | 1 | 0.2608390 |
env_pc4 | 0.2395481 | 1 | 0.6245327 |
env_pc5 | 1.0770991 | 1 | 0.2993474 |
env_pc6 | 0.2171533 | 1 | 0.6412176 |
env_pc7 | 0.2181546 | 1 | 0.6404496 |
6.5.2 QTL on Chr 4
allele_glm = glm(data = c13[c13$origin == "south_sweden", ], allele4 ~ env_pc1 +
env_pc1 + env_pc2 + env_pc3 + env_pc4 + env_pc5 + env_pc6 + env_pc7, family = "binomial")
summary(allele_glm)
##
## Call:
## glm(formula = allele4 ~ env_pc1 + env_pc1 + env_pc2 + env_pc3 +
## env_pc4 + env_pc5 + env_pc6 + env_pc7, family = "binomial",
## data = c13[c13$origin == "south_sweden", ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.61618 -1.15493 0.06165 1.08423 1.50563
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.014 2.633 -0.765 0.444
## env_pc1 -1.322 2.031 -0.651 0.515
## env_pc2 -1.663 1.819 -0.915 0.360
## env_pc3 -2.361 1.866 -1.265 0.206
## env_pc4 2.245 1.820 1.233 0.217
## env_pc5 -2.974 2.759 -1.078 0.281
## env_pc6 -2.187 2.295 -0.953 0.341
## env_pc7 -1.241 1.344 -0.924 0.356
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 152.49 on 109 degrees of freedom
## Residual deviance: 146.71 on 102 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: 162.71
##
## Number of Fisher Scoring iterations: 4
Anova(allele_glm)
LR Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
env_pc1 | 0.4645538 | 1 | 0.4955034 |
env_pc2 | 0.9531884 | 1 | 0.3289090 |
env_pc3 | 1.8104245 | 1 | 0.1784573 |
env_pc4 | 1.8807582 | 1 | 0.1702480 |
env_pc5 | 1.4274935 | 1 | 0.2321739 |
env_pc6 | 0.9945531 | 1 | 0.3186321 |
env_pc7 | 0.8727100 | 1 | 0.3502061 |
7 Analysis of Fst and Qst
7.1 Load Fst data
pop_combs = combn(levels(as.factor(asso_mean_dedup$origin)), 2)
pop_comb_fstats = list()
# pairwise Fst code for calculating populations_fstats_maf5_16102017.Rdata
# for (i in 1:ncol(pop_combs)){ curr_comb=pop_combs[,i]
# variants_tair_2pop=variants_tair[variants_tair$pop==curr_comb[1]|
# variants_tair$pop==curr_comb[2],]
# pop_comb_fstats[[i]]=basic.stats(variants_tair_2pop) }
load("/home/hannes/Stomata_Data/R_final/popcomb_fst_maf05_duprem.Rdata") # load pre-calculated data
7.2 Calculate Qst for all phenotypes
qst_popcomb_SS = numeric()
for (i in 1:ncol(pop_combs)) {
# QST for stomata size
curr_comb = pop_combs[, i]
asso_currcomb = asso_dedup[asso_dedup$origin == curr_comb[1] | asso_dedup$origin ==
curr_comb[2], ]
qst_SS_model <- lme(fixed = stomatasize ~ block, random = ~1 | origin/genotype,
data = asso_currcomb)
sigmaB_SS <- as.numeric(VarCorr(qst_SS_model)[2])
sigmaW_SS <- as.numeric(VarCorr(qst_SS_model)[4])
qst_SS = sigmaB_SS/(sigmaB_SS + sigmaW_SS)
qst_popcomb_SS = c(qst_popcomb_SS, qst_SS)
}
qst_popcomb_SD = numeric()
for (i in 1:ncol(pop_combs)) {
# Qst for stomata density
curr_comb = pop_combs[, i]
asso_currcomb = asso_dedup[asso_dedup$origin == curr_comb[1] | asso_dedup$origin ==
curr_comb[2], ]
qst_SD_model <- lme(fixed = stomata ~ block, random = ~1 | origin/genotype,
data = asso_currcomb)
sigmaB_SD <- as.numeric(VarCorr(qst_SD_model)[2])
sigmaW_SD <- as.numeric(VarCorr(qst_SD_model)[4])
qst_SD = sigmaB_SD/(sigmaB_SD + sigmaW_SD)
qst_popcomb_SD = c(qst_popcomb_SD, qst_SD)
}
qst_popcomb_13C = numeric()
for (i in 1:ncol(pop_combs)) {
# Qst for delta 13C
curr_comb = pop_combs[, i]
asso_currcomb = assoC13_dedup[assoC13_dedup$origin == curr_comb[1] | assoC13_dedup$origin ==
curr_comb[2], ]
qst_13C_model_nogen <- lme(fixed = X13C ~ tray_position, random = ~1 | origin,
data = asso_currcomb, na.action = na.exclude)
sigmaB_13C <- as.numeric(VarCorr(qst_13C_model_nogen)[1])
sigmaW_13C <- as.numeric(VarCorr(qst_13C_model_nogen)[2])
qst_13C = sigmaB_13C/(sigmaB_13C + sigmaW_13C)
qst_popcomb_13C = c(qst_popcomb_13C, qst_13C)
}
7.3 Combine Qst, Fst and climatic distance of all regions and phenotypes to single table
qst_fst_all = data.frame(fst = numeric(), qst = numeric(), pheno = character(),
pop1 = character(), pop2 = character(), stringsAsFactors = F)
for (i in 1:ncol(pop_combs)) {
# combine Qst, Fst and environmental distance in single table for all
# phenotypes
curr_fstats = pop_comb_fstats[[i]]
curr_fstats$perloc$Fst[curr_fstats$perloc$Fst < 0] = 0
fst_quant = quantile(curr_fstats$perloc$Fst, 0.95, na.rm = T)
curr_pops = pop_combs[, i]
envdist_mod = manova(data = asso_mean[asso_mean$origin == curr_pops[1] |
asso_mean$origin == curr_pops[2], ], cbind(env_pc1, env_pc2, env_pc3,
env_pc4, env_pc5, env_pc6, env_pc7) ~ origin)
envdist_sum = summary(envdist_mod)
envdist = envdist_sum$stats[1, 3]
curr_qst = qst_popcomb_SS[i]
curr_pheno = "stomata size"
qst_fst_all = rbind(qst_fst_all, data.frame(fst_quant, curr_qst, curr_pheno,
curr_pops[1], curr_pops[2], envdist, stringsAsFactors = F))
curr_pops = pop_combs[, i]
curr_qst = qst_popcomb_SD[i]
curr_pheno = "stomata density"
qst_fst_all = rbind(qst_fst_all, data.frame(fst_quant, curr_qst, curr_pheno,
curr_pops[1], curr_pops[2], envdist, stringsAsFactors = F))
curr_pops = pop_combs[, i]
curr_qst = qst_popcomb_13C[i]
curr_pheno = as.character("delta 13C")
qst_fst_all = rbind(qst_fst_all, data.frame(fst_quant, curr_qst, curr_pheno,
curr_pops[1], curr_pops[2], envdist, stringsAsFactors = F))
}
names(qst_fst_all) = c("fst", "qst", "phenotype", "pop1", "pop2", "climdist")
qst_fst_all$pop1 = as.factor(qst_fst_all$pop1)
qst_fst_all$pop2 = as.factor(qst_fst_all$pop2)
qst_fst_all$phenotype = factor(qst_fst_all$phenotype, levels = c("delta 13C",
"stomata density", "stomata size"))
7.3.1 Test pairwise correlation between climatic distance and Qst-Fst distance using Mantel test
envdist_mat = matrix(NA, nrow = 5, ncol = 5)
rownames(envdist_mat) = c(levels(qst_fst_all$pop1), "western_europe")
colnames(envdist_mat) = c(levels(qst_fst_all$pop1), "western_europe")
qstdist_mat_SS = matrix(NA, nrow = 5, ncol = 5)
rownames(qstdist_mat_SS) = c(levels(qst_fst_all$pop1), "western_europe")
colnames(qstdist_mat_SS) = c(levels(qst_fst_all$pop1), "western_europe")
qstdist_mat_SD = matrix(NA, nrow = 5, ncol = 5)
rownames(qstdist_mat_SD) = c(levels(qst_fst_all$pop1), "western_europe")
colnames(qstdist_mat_SD) = c(levels(qst_fst_all$pop1), "western_europe")
qstdist_mat_13C = matrix(NA, nrow = 5, ncol = 5)
rownames(qstdist_mat_13C) = c(levels(qst_fst_all$pop1), "western_europe")
colnames(qstdist_mat_13C) = c(levels(qst_fst_all$pop1), "western_europe")
for (i in 1:nrow(qst_fst_all)) {
curr = qst_fst_all[i, ]
if (curr$phenotype == "stomata size") {
envdist_mat[rownames(envdist_mat) == curr$pop1, colnames(envdist_mat) ==
curr$pop2] = curr$climdist
envdist_mat[colnames(envdist_mat) == curr$pop2, rownames(envdist_mat) ==
curr$pop1] = curr$climdist
qstdist_mat_SS[rownames(qstdist_mat_SS) == curr$pop1, colnames(qstdist_mat_SS) ==
curr$pop2] = curr$qst - curr$fst
qstdist_mat_SS[colnames(qstdist_mat_SS) == curr$pop2, rownames(qstdist_mat_SS) ==
curr$pop1] = curr$qst - curr$fst
} else if (curr$phenotype == "stomata density") {
qstdist_mat_SD[rownames(qstdist_mat_SD) == curr$pop1, colnames(qstdist_mat_SD) ==
curr$pop2] = curr$qst - curr$fst
qstdist_mat_SD[colnames(qstdist_mat_SD) == curr$pop2, rownames(qstdist_mat_SD) ==
curr$pop1] = curr$qst - curr$fst
} else if (curr$phenotype == "delta 13C") {
qstdist_mat_13C[rownames(qstdist_mat_13C) == curr$pop1, colnames(qstdist_mat_13C) ==
curr$pop2] = curr$qst - curr$fst
qstdist_mat_13C[colnames(qstdist_mat_13C) == curr$pop2, rownames(qstdist_mat_13C) ==
curr$pop1] = curr$qst - curr$fst
}
}
envdist_mat[is.na(envdist_mat)] = 0
qstdist_mat_SS[is.na(qstdist_mat_SS)] = 0
qstdist_mat_SD[is.na(qstdist_mat_SD)] = 0
qstdist_mat_13C[is.na(qstdist_mat_13C)] = 0
mantel.rtest(as.dist(abs(envdist_mat)), as.dist(abs(qstdist_mat_SS))) # Mantel test for stomata size
## Monte-Carlo test
## Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)
##
## Observation: -0.2932816
##
## Based on 99 replicates
## Simulated p-value: 0.84
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.86399340 0.06025254 0.16743333
mantel.rtest(as.dist(abs(envdist_mat)), as.dist(abs(qstdist_mat_SD))) # Mantel test for stomata density
## Monte-Carlo test
## Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)
##
## Observation: -0.0926982
##
## Based on 99 replicates
## Simulated p-value: 0.53
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.1171089 -0.0533284 0.1130178
mantel.rtest(as.dist(abs(envdist_mat)), as.dist(abs(qstdist_mat_13C))) # Mantel test for delta 13C
## Monte-Carlo test
## Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)
##
## Observation: -0.1715109
##
## Based on 99 replicates
## Simulated p-value: 0.56
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.22607107 -0.08466614 0.14756984
7.3.2 Code for QST permuation
# due to the long run-time this section is not evaluated for the markdown
# file
asso_rand = asso_dedup
set.seed(9)
asso_rand$ID = as.character(asso_rand$ID)
for (i in 1:nrow(asso_rand)) {
if (asso_rand$ID[i] %in% assoC13$ID & asso_rand$block[i] == "1") {
asso_rand[i, "delta_13C"] = assoC13$X13C[assoC13$ID == asso_rand$ID[i]]
}
}
asso_rand$ID = as.factor(asso_rand$ID)
N = 1000
qst_sum = numeric(N)
qst_sum_13C = numeric(N)
qst_sum_SS = numeric(N)
qst_sum_SD = numeric(N)
qst_mean_dist = numeric(N)
qst_max_dist_13C = numeric(N)
qst_max_dist_SD = numeric(N)
qst_max_dist_SS = numeric(N)
qst_max_dist = numeric(N)
qst_max_all_dist = numeric(N)
qst_fst_all_list = list(N)
for (j in 1:N) {
print(j)
levels(asso_rand$ID) = sample(levels(asso_rand$ID))
asso_rand$ID = as.character(asso_rand$ID)
asso_rand$origin = as.character(asso_rand$origin)
asso_rand$genotype = as.character(asso_rand$genotype)
for (i in 1:nrow(asso_rand)) {
# change here also the source used to create asso_rand
asso_rand[i, "origin"] = as.character(asso$origin[asso$ID == asso_rand$ID[i]])[1]
asso_rand[i, "genotype"] = as.character(asso$genotype[asso$ID == asso_rand$ID[i]])[1]
}
asso_rand$ID = as.factor(asso_rand$ID)
asso_rand$origin = as.factor(asso_rand$origin)
asso_rand$genotype = as.factor(asso_rand$genotype)
qst_popcomb_SS_rand = numeric()
for (i in 1:ncol(pop_combs)) {
curr_comb = pop_combs[, i]
asso_currcomb = asso_rand[asso_rand$origin == curr_comb[1] | asso_rand$origin ==
curr_comb[2], ]
print(summary(asso_currcomb))
qst_SS_model <- withTimeout({
lme(fixed = stomatasize ~ block, random = ~1 | origin/genotype,
data = asso_currcomb)
}, timeout = 60, onTimeout = "error")
if (is.null(qst_SS_model)) {
qst_popcomb_SS_rand = c(qst_popcomb_SS_rand, NA)
next
}
sigmaB_SS <- as.numeric(VarCorr(qst_SS_model)[2])
sigmaW_SS <- as.numeric(VarCorr(qst_SS_model)[4])
qst_SS = sigmaB_SS/(sigmaB_SS + sigmaW_SS)
qst_popcomb_SS_rand = c(qst_popcomb_SS_rand, qst_SS)
}
qst_popcomb_SD_rand = numeric()
for (i in 1:ncol(pop_combs)) {
curr_comb = pop_combs[, i]
asso_currcomb = asso_rand[asso_rand$origin == curr_comb[1] | asso_rand$origin ==
curr_comb[2], ]
qst_SD_model <- withTimeout({
lme(fixed = stomata ~ block, random = ~1 | origin/genotype, data = asso_currcomb)
}, timeout = 60, onTimeout = "error")
if (is.null(qst_SD_model)) {
qst_popcomb_SD_rand = c(qst_popcomb_SD_rand, NA)
next
}
sigmaB_SD <- as.numeric(VarCorr(qst_SD_model)[2])
sigmaW_SD <- as.numeric(VarCorr(qst_SD_model)[4])
qst_SD = sigmaB_SD/(sigmaB_SD + sigmaW_SD)
qst_popcomb_SD_rand = c(qst_popcomb_SD_rand, qst_SD)
}
qst_popcomb_C13_rand = numeric()
for (i in 1:ncol(pop_combs)) {
curr_comb = pop_combs[, i]
asso_currcomb = asso_rand[asso_rand$origin == curr_comb[1] | asso_rand$origin ==
curr_comb[2], ]
qst_C13_model_nogen <- withTimeout({
lme(fixed = delta_13C ~ tray_position, random = ~1 | origin, data = asso_currcomb,
na.action = na.exclude)
}, timeout = 60, onTimeout = "error")
if (is.null(qst_C13_model_nogen)) {
qst_popcomb_C13_rand = c(qst_popcomb_C13_rand, NA)
next
}
sigmaB_C13 <- as.numeric(VarCorr(qst_C13_model_nogen)[1])
sigmaW_C13 <- as.numeric(VarCorr(qst_C13_model_nogen)[2])
qst_C13 = sigmaB_C13/(sigmaB_C13 + sigmaW_C13)
qst_popcomb_C13_rand = c(qst_popcomb_C13_rand, qst_C13)
}
qst_fst_all = data.frame(fst = numeric(), qst = numeric(), pheno = character(),
pop1 = character(), pop2 = character(), stringsAsFactors = F)
for (i in 1:ncol(pop_combs)) {
curr_fstats = pop_comb_fstats[[i]]
curr_fstats$perloc$Fst[curr_fstats$perloc$Fst < 0] = 0
fst_quant = quantile(curr_fstats$perloc$Fst, 0.95, na.rm = T)
curr_pops = pop_combs[, i]
envdist_mod = manova(data = asso_mean[asso_mean$origin == curr_pops[1] |
asso_mean$origin == curr_pops[2], ], cbind(env_pc1, env_pc2, env_pc3,
env_pc4, env_pc5, env_pc6, env_pc7) ~ origin)
envdist_sum = summary(envdist_mod)
envdist = envdist_sum$stats[1, 3]
curr_qst = qst_popcomb_SS_rand[i]
curr_pheno = "stomata size"
qst_fst_all = rbind(qst_fst_all, data.frame(fst_quant, curr_qst, curr_pheno,
curr_pops[1], curr_pops[2], envdist, stringsAsFactors = F))
curr_pops = pop_combs[, i]
curr_qst = qst_popcomb_SD_rand[i]
curr_pheno = "stomata density"
qst_fst_all = rbind(qst_fst_all, data.frame(fst_quant, curr_qst, curr_pheno,
curr_pops[1], curr_pops[2], envdist, stringsAsFactors = F))
curr_pops = pop_combs[, i]
curr_qst = qst_popcomb_C13_rand[i]
curr_pheno = as.character("delta 13C")
qst_fst_all = rbind(qst_fst_all, data.frame(fst_quant, curr_qst, curr_pheno,
curr_pops[1], curr_pops[2], envdist, stringsAsFactors = F))
}
names(qst_fst_all) = c("fst", "qst", "phenotype", "pop1", "pop2", "climdist")
qst_fst_all$pop1 = as.factor(qst_fst_all$pop1)
qst_fst_all$pop2 = as.factor(qst_fst_all$pop2)
qst_fst_all$phenotype = as.factor(qst_fst_all$phenotype)
qst_fst_all$qst[is.na(qst_fst_all$qst)] = 0
qst_sum[j] = sum(qst_fst_all$qst > qst_fst_all$fst)
qst_sum_13C[j] = sum(qst_fst_all[qst_fst_all$phenotype == "delta 13C", ]$qst >
qst_fst_all[qst_fst_all$phenotype == "delta 13C", ]$fst)
qst_sum_SS[j] = sum(qst_fst_all[qst_fst_all$phenotype == "stomata size",
]$qst > qst_fst_all[qst_fst_all$phenotype == "stomata size", ]$fst)
qst_sum_SD[j] = sum(qst_fst_all[qst_fst_all$phenotype == "stomata density",
]$qst > qst_fst_all[qst_fst_all$phenotype == "stomata density", ]$fst)
qst_mean_dist[j] = mean(qst_fst_all$qst[qst_fst_all$qst > qst_fst_all$fst] -
qst_fst_all$fst[qst_fst_all$qst > qst_fst_all$fst])
qst_max_dist[j] = max(qst_fst_all$qst[qst_fst_all$qst > qst_fst_all$fst] -
qst_fst_all$fst[qst_fst_all$qst > qst_fst_all$fst])
qst_max_dist_13C[j] = max(qst_fst_all[qst_fst_all$phenotype == "delta 13C",
]$qst[qst_fst_all[qst_fst_all$phenotype == "delta 13C", ]$qst > qst_fst_all[qst_fst_all$phenotype ==
"delta 13C", ]$fst] - qst_fst_all[qst_fst_all$phenotype == "delta 13C",
]$fst[qst_fst_all[qst_fst_all$phenotype == "delta 13C", ]$qst > qst_fst_all[qst_fst_all$phenotype ==
"delta 13C", ]$fst])
qst_max_dist_SS[j] = max(qst_fst_all[qst_fst_all$phenotype == "stomata size",
]$qst[qst_fst_all[qst_fst_all$phenotype == "stomata size", ]$qst > qst_fst_all[qst_fst_all$phenotype ==
"stomata size", ]$fst] - qst_fst_all[qst_fst_all$phenotype == "stomata size",
]$fst[qst_fst_all[qst_fst_all$phenotype == "stomata size", ]$qst > qst_fst_all[qst_fst_all$phenotype ==
"stomata size", ]$fst])
qst_max_dist_SD[j] = max(qst_fst_all[qst_fst_all$phenotype == "stomata density",
]$qst[qst_fst_all[qst_fst_all$phenotype == "stomata density", ]$qst >
qst_fst_all[qst_fst_all$phenotype == "stomata density", ]$fst] - qst_fst_all[qst_fst_all$phenotype ==
"stomata density", ]$fst[qst_fst_all[qst_fst_all$phenotype == "stomata density",
]$qst > qst_fst_all[qst_fst_all$phenotype == "stomata density", ]$fst])
qst_max_all_dist[j] = max(qst_fst_all$qst - qst_fst_all$fst)
qst_fst_all_list[[j]] = qst_fst_all
}
qst_max_dist[is.infinite(qst_max_dist)] = NA
qst_max_dist[is.na(qst_max_dist)] = 0
qst_max_dist_13C[is.infinite(qst_max_dist_13C)] = 0
qst_max_dist_SS[is.infinite(qst_max_dist_SS)] = 0
qst_max_dist_SD[is.infinite(qst_max_dist_SD)] = 0
threshold_13C = quantile(qst_max_dist_13C, 0.95, na.rm = T)
threshold_SS = quantile(qst_max_dist_SS, 0.95, na.rm = T)
threshold_SD = quantile(qst_max_dist_SD, 0.95, na.rm = T)