#====================================== # SNP dataset genetic differentiation # # European lobster (Homarus gammarus) # # Tom Jenkins | t.l.jenkins@exeter.ac.uk # # Jenkins et al. 2019 Evolutionary Applications # #====================================== ## Load packages lib = c("adegenet","ggplot2","RColorBrewer","pegas","poppr","mmod","hierfstat", "ggrepel","diveRsity","tidyr") lapply(lib, library, character.only=TRUE) ## Load RData SNP dataset load("../lobster_1278ind_79snps_40pop.RData") data_filt nPop(data_filt) summary(data_filt)$n.by.pop ## Create outlier Genind dataset outliers = c("11291","15581","32358","42395","53935","58053", "65064","65576") data.outlier = data_filt[loc=outliers] data.outlier ## Create neutral Genind dataset neutral = setdiff(locNames(data_filt), outliers) data.neutral = data_filt[loc=neutral] data.neutral # Section 1: full dataset ------------------------------------------------- ## Notes before running these scripts: ## 1. Export Genind objects to Genepop file ## 2. In Genepop file, change order of populations to the order ## Functions available from ResearchGate: ## https://www.researchgate.net/publication/326560230_Export_a_biallelic_SNP_Genind_object_to_a_Genepop_file # ----------------- # # # Full SNP dataset # # ----------------- # ## Calculate differentiation statistics and CIs # diffstats = diffCalc("lobster_1278ind_79snps_40pop.gen", # fst=TRUE, pairwise=TRUE, bs_locus=TRUE, bs_pairwise=TRUE, # boots=1000, alpha=0.05, para=FALSE) # save(diffstats, file="full_dataset.RData") load("full_dataset.RData") names(diffstats) ## Global statistics with 95 % CIs diffstats$global_bs ## Create dataframe ordered by highest WC Fst df = data.frame(diffstats$bs_locus$Fst) df = df[order(df$actual, decreasing=TRUE), ] head(df) ## Get indexes of outliers and neutral snps index.out = which(df$locus %in% outliers) index.out index.neu = which(df$locus %in% neutral) index.neu ## Add column to dataframe df$panel = "neutral" df$panel[index.out] = "outliers" head(df) ## Plot the number of SNPs against WC Fst ggplot(data=df, aes(x=seq(1,nrow(df)), y=actual))+ geom_point(aes(colour=panel), size=2)+ scale_colour_manual(values = c("blue", "red"), label=c(" neutral SNPs"," outlier SNPs "))+ ylab(expression(italic("F")[st]))+ xlab("Number of SNPs")+ theme( legend.title = element_blank(), legend.text = element_text(colour="black", size=10), legend.position = "top", axis.text.y = element_text(colour="black", size=10), axis.text.x = element_text(colour="black", size=10), axis.title = element_text(colour="black", size=10) ) # ----------------- # # # W&C Fst heatmap # # ----------------- # ## Load differentiation data load("full_dataset.RData") names(diffstats) ## Prepare dataframe of pairwise Fsts df.sig = diffstats$bs_pairwise$Fst names(df.sig) head(df.sig) df.sig = separate(data=df.sig, col=populations, into=c("site1", "site2"), sep="vs ") head(df.sig) names(df.sig) ## Edit site names unique(df.sig$site1) df.sig$site1 = substr(df.sig$site1, 1, 3) df.sig$site2 = substr(df.sig$site2, 1, 3) unique(df.sig$site1) unique(df.sig$site2) df.sig$site1 = gsub("Idr", "Idr17", df.sig$site1) df.sig$site2 = gsub("Idr", "Idr17", df.sig$site2) df.sig$site1 = gsub("Lro", "Idr16", df.sig$site1) df.sig$site2 = gsub("Lro", "Idr16", df.sig$site2) df.sig$site1 = gsub("Sar", "Sar17", df.sig$site1) df.sig$site2 = gsub("Sar", "Sar17", df.sig$site2) df.sig$site1 = gsub("Sr1", "Sar13", df.sig$site1) df.sig$site2 = gsub("Sr1", "Sar13", df.sig$site2) unique(df.sig$site1) unique(df.sig$site2) head(df.sig) ## Convert minus and NA values to zero df.sig$actual[df.sig$actual < 0] = 0 min(df.sig$actual) ## Add column for significance df.sig$Sig = ifelse(df.sig$lower < 0 & df.sig$upper > 0 | df.sig$lower == df.sig$upper, "", "*") head(df.sig) ## Export Excel # write.csv(df.sig, file="FstWC_lobster.csv", row.names = FALSE) ## Reorder factor levels for plotting site_order = c("Tro","Ber","Flo","Gul","Kav","Lys","Sin","Hel","Oos", "Cro","Brd","Eye", "She","Ork","Sul","Heb","Sbs","Jer","Iom","Lyn","Pem", "Loo","Pad","Ios","Mul","Kil","Ven","Cor","Hoo", "Idr16","Idr17","Vig", "Sar13","Sar17","Laz","Tar","Sky","The","Tor","Ale") df.sig$site1 = factor(df.sig$site1, levels=site_order) df.sig$site2 = factor(df.sig$site2, levels=site_order) levels(df.sig$site1) levels(df.sig$site2) ## ggplot theme ggtheme = theme( axis.title = element_blank(), axis.line = element_blank(), axis.text.x = element_text(colour="black", size=10, face = "bold"), axis.text.y = element_text(colour="black", size=10, face = "bold"), panel.grid.major = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.justification = c(1,0), legend.position = c(0.5,0.8), legend.direction = "horizontal", legend.text = element_text(size=15), legend.title = element_text(size=15) ) fstlab = expression(italic("F")[st]) ## Maximum Fst value max = round(max(df.sig$actual), 2) ;max mid = max/2 ## Create Fst heatmap Fst=ggplot(data=df.sig, aes(x=site2, y=site1, fill = actual))+ geom_tile(color="white")+ scale_fill_gradient2(low="blue", high="red", mid="pink", midpoint=mid, name=fstlab, breaks=c(0,0.1,0.2))+ geom_text(aes(site2, site1, label=Sig), color="black", size = 4) + scale_y_discrete(position="right")+ ggtheme+ guides(fill = guide_colorbar(barwidth = 10, barheight = 1.8, title.position = "top", title.hjust=0.5)) Fst # ggsave("FstWC_heatmap.png", width=15, height=10, dpi=600) # ggsave("FstWC_heatmap.pdf", width=15, height=10) # ----------------- # # # Jost's D heatmap # # ----------------- # ## Load differentiation data load("full_dataset.RData") names(diffstats) ## Prepare dataframe of pairwise Fsts df.sig = diffstats$bs_pairwise$D names(df.sig) head(df.sig) df.sig = separate(data=df.sig, col=populations, into=c("site1", "site2"), sep="vs ") head(df.sig) names(df.sig) ## Edit site names unique(df.sig$site1) df.sig$site1 = substr(df.sig$site1, 1, 3) df.sig$site2 = substr(df.sig$site2, 1, 3) unique(df.sig$site1) unique(df.sig$site2) df.sig$site1 = gsub("Idr", "Idr17", df.sig$site1) df.sig$site2 = gsub("Idr", "Idr17", df.sig$site2) df.sig$site1 = gsub("Lro", "Idr16", df.sig$site1) df.sig$site2 = gsub("Lro", "Idr16", df.sig$site2) df.sig$site1 = gsub("Sar", "Sar17", df.sig$site1) df.sig$site2 = gsub("Sar", "Sar17", df.sig$site2) df.sig$site1 = gsub("Sr1", "Sar13", df.sig$site1) df.sig$site2 = gsub("Sr1", "Sar13", df.sig$site2) unique(df.sig$site1) unique(df.sig$site2) head(df.sig) ## Convert minus and NA values to zero df.sig$actual[df.sig$actual < 0] = 0 min(df.sig$actual) ## Add column for significance df.sig$Sig = ifelse(df.sig$lower < 0 & df.sig$upper > 0 | df.sig$lower == df.sig$upper, "", "*") head(df.sig) ## Export Excel # write.csv(df.sig, file="FstWC_lobster.csv", row.names = FALSE) ## Reorder factor levels for plotting site_order = c("Tro","Ber","Flo","Gul","Kav","Lys","Sin","Hel","Oos", "Cro","Brd","Eye", "She","Ork","Sul","Heb","Sbs","Jer","Iom","Lyn","Pem", "Loo","Pad","Ios","Mul","Kil","Ven","Cor","Hoo", "Idr16","Idr17","Vig", "Sar13","Sar17","Laz","Tar","Sky","The","Tor","Ale") df.sig$site1 = factor(df.sig$site1, levels=site_order) df.sig$site2 = factor(df.sig$site2, levels=site_order) levels(df.sig$site1) levels(df.sig$site2) ## ggplot theme ggtheme = theme( axis.title = element_blank(), axis.line = element_blank(), axis.text.x = element_text(colour="black", size=10, face = "bold"), axis.text.y = element_text(colour="black", size=10, face = "bold"), panel.grid.major = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.justification = c(1,0), legend.position = c(0.5,0.8), legend.direction = "horizontal", legend.text = element_text(size=15), legend.title = element_text(size=15) ) dlab = expression(italic("D")) ## Maximum Fst value max = round(max(df.sig$actual), 2) ;max mid = max/2 ## Create Fst heatmap D=ggplot(data=df.sig, aes(x=site2, y=site1, fill = actual))+ geom_tile(color="white")+ scale_fill_gradient2(low="blue", high="red", mid="pink", midpoint=mid, name=dlab)+ geom_text(aes(site2, site1, label=Sig), color="black", size = 4) + scale_y_discrete(position="right")+ ggtheme+ guides(fill = guide_colorbar(barwidth = 10, barheight = 1.8, title.position = "top", title.hjust=0.5)) D # ggsave("JostD_heatmap.png", width=15, height=10, dpi=600) # ggsave("JostD_heatmap.pdf", width=15, height=10) #----------------- # # # Combined plot # # ----------------- # # Combine 2 ggplots and make pdf # library(gridExtra) # png("Fst_d_heatmaps.png", width=10, height=9, res=600, units="in") # grid.arrange(Fst, D, nrow=2) # dev.off() # Section 2 outlier SNPs -------------------------------------------------- # ----------------- # # # Outlier SNP dataset # # ----------------- # ## Calculate differentiation statistics and CIs # diffstats.out = diffCalc("lobster_1278ind_8snps_40pop.gen", # fst=TRUE, pairwise=FALSE, bs_locus=TRUE, bs_pairwise=FALSE, # boots=1000, alpha=0.05, para=FALSE) # save(diffstats.out, file="diff_outlier_snps.RData") load("diff_outlier_snps.RData") names(diffstats.out) ## Global statistics with 95 % CIs diffstats.out$global_bs # Section 3: neutral SNPs ------------------------------------------------- # ----------------- # # # Neutral SNP dataset # # ----------------- # ## Calculate differentiation statistics and CIs # diffstats.neutral = diffCalc("lobster_1278ind_71snps_40pop.gen", # fst=TRUE, pairwise=FALSE, bs_locus=TRUE, bs_pairwise=FALSE, # boots=1000, alpha=0.05, para=FALSE) # save(diffstats.neutral, file="diff_neutral_snps.RData") load("diff_neutral_snps.RData") names(diffstats.neutral) ## Global statistics with 95 % CIs diffstats.neutral$global_bs # Section 4: Mantel tests ------------------------------------------------- # ----------------- # # # Prepare geographic matrices # # ----------------- # ## Download default resolution bathymetric data library(marmap) dat = getNOAA.bathy(lon1=-15, lon2=30, lat1=35, lat2=65, res=4, keep=TRUE) summary(dat) ## Download high resolution bathymetric data ** very slow and might crash ** # highres_dat = getNOAA.bathy(lon1=-15, lon2=30, lat1=35, lat2=65, res=1) # summary(highres_dat) ## Plot data pal = colorRampPalette(c("black","darkblue","blue","lightblue")) plot(dat, image=TRUE, bpal=pal(100), col="grey40", lwd=0.7, main="Bathymetric map of the northeast Atlantic") ## Import coordinates coord = read.csv("IBD_coords_all.csv") gps = subset(coord, select=Lon:Lat) ## Add points to plot points(gps$Lon, gps$Lat, col="yellow", cex=1.2, pch=20) ## Get depth of points get.depth(dat, x=gps$Lon, y=gps$Lat, locator=FALSE) # Create transition object with a low negative (-10) depth # constraint to avoid inaccuracies as per authors recommendation: # path impossible in waters shallower than -10 meters depth # trans1 = trans.mat(dat, min.depth=-10) # save(trans1, file="trans1.RData") # ?trans.mat load("trans1.RData") ## Compute least cost distances (km) and plot path on the map # path1 = lc.dist(trans1, gps, res="path") # long run time # save(path1, file="path1.RData") load("path1.RData") plot(dat, image=TRUE, bpal=pal(100), col="grey40", lwd=0.7, main="Bathymetric map of the northeast Atlantic") lapply(path1, lines, col="yellow", lwd=0.5, lty=1) ## Compute least cost distances (km) matrix # dist1 = lc.dist(trans1, gps, res="dist") # dist1 # save(dist1, file="distance_matrix.RData") load("distance_matrix.RData") ## Import coordinates coord = read.csv("IBD_coords_all.csv") gps = subset(coord, select=Lon:Lat) ## Convert to matrix geodist = as.matrix(dist1) ## Rename columns and rows colnames(geodist) = as.vector(coord$Code) ; colnames(geodist) rownames(geodist) = as.vector(coord$Code) ; rownames(geodist) ## Reorder alphabetically as per Genind file order = c("Ale","Ber","Brd","Cor","Cro","Eye","Flo","Gul","Heb","Hel", "Hoo","Idr","Iom","Ios","Jer","Kav","Kil","Laz","Loo","Lyn", "Lys","Mul","Oos","Ork","Pad","Pem","Sar","Sbs","She","Sin", "Sky","Sul","The","Tor","Tro","Ven","Vig") geodist = geodist[order,order] geodist ## Export geodist file # write.csv(geodist, file="pairwise_geog_distances_km.csv") # --------------# # # Prepare genetic distances # # --------------# ## Combine temporal samples (Sar13 & Sar17 and Idr16 & Idr17) data.ibd = data.neutral popNames(data.ibd) = gsub("Sar13", "Sar", popNames(data.ibd)) popNames(data.ibd) = gsub("Sar17", "Sar", popNames(data.ibd)) popNames(data.ibd) = gsub("Idr16", "Idr", popNames(data.ibd)) popNames(data.ibd) = gsub("Idr17", "Idr", popNames(data.ibd)) popNames(data.ibd) = gsub("Tar", "Laz", popNames(data.ibd)) ## Calculate WC Fst on data.ibd dataset # fstWC = pairwise.WCfst(genind2hierfstat(data.ibd), diploid = TRUE) # save(fstWC, file="fst_pairwise.RData") load("fst_pairwise.RData") fstWC ## Replace minus and NA values with really low values (for Mantel test) fstWC[fstWC < 0] = 0.0000001 fstWC[is.na(fstWC)] = 0.0000001 fstWC ## Check column names are the same colnames(geodist) colnames(fstWC) colnames(geodist) == colnames(fstWC) # --------------# # # Mantel tests # # --------------# ## Convert to dist object geodist = as.dist(geodist) ; geodist fstWC = as.dist(fstWC) ; fstWC plot(fstWC ~ geodist) #### Mantel test using all sites man1 = mantel.rtest(geodist, fstWC, nrepet = 1000) man1 ## Create labels for plots fstlab = expression(italic("F")[st]) r1 = round(man1$obs,2) p1 = format(round(man1$pvalue, 3), nsmall=3) ## Create dataframe of distance matrices df = data.frame(geodistance=as.vector(geodist), gendistance=as.vector(fstWC)) ## ggplot all=ggplot(data=df)+ geom_point(aes(x=geodistance, y=gendistance), size=3)+ geom_smooth(aes(x=geodistance, y=gendistance), method="lm", se=TRUE, level=0.95)+ xlab("Geographic distance (km)")+ ylab(fstlab)+ labs(title="All sites")+ geom_label(aes(label="r^2 == 0.87^'***'", x=-Inf,y=Inf), colour="black", size=5, label.padding=unit(0.25,"cm"), label.size = 0.25, hjust=-0.1, vjust=1.2, parse=TRUE)+ theme( axis.text.y = element_text(colour="black", size=15), axis.text.x = element_text(colour="black", size=15), axis.title.y = element_text(colour="black", size=20), axis.title.x = element_text(colour="black", size=15), panel.border = element_rect(colour="black", fill=NA, size=1), plot.title = element_text(hjust=0.5, size=20), # title centered plot.margin = margin(5.5,13.5,5.5,5.5), plot.tag = element_text(size=20) ) all man1 #### Mantel test using only Atlantic sites ## Sites to keep toKeep = c("Brd","Cor","Cro","Eye","Flo","Gul","Heb","Hel","Sul", "Hoo","Idr","Iom","Ios","Jer","Kav","Kil","Loo","Lyn","Lys", "Mul","Ork","Pad","Pem","Sbs","She","Sin","Ven","Vig", "Oos","Ber","Tro") ## Prepare geographic distance subset geodist_atl = as.matrix(geodist) geodist_atl = geodist_atl[toKeep,toKeep] geodist_atl = as.dist(geodist_atl) ## Prepare fst subset fst_atl = as.matrix(fstWC) fst_atl = fst_atl[toKeep,toKeep] fst_atl = as.dist(fst_atl) ## Plot fst plot(fst_atl ~ geodist_atl) ## Perform Mantel test man2 = mantel.rtest(geodist_atl, fst_atl, nrepet = 1000) man2 ## Create labels for plots fstlab = expression(italic("F")[st]) ## Create dataframe of distance matrices df2 = data.frame(geodistance=as.vector(geodist_atl), gendistance=as.vector(fst_atl)) ## ggplot atl=ggplot(data=df2)+ geom_point(aes(x=geodistance, y=gendistance), size=3)+ geom_smooth(aes(x=geodistance, y=gendistance), method="lm", se=TRUE, level=0.95)+ xlab("Geographic distance (km)")+ ylab(fstlab)+ labs(title="Atlantic sites")+ geom_label(aes(label="r^2 == 0.17", x=-Inf,y=Inf), colour="black", size=5, label.padding=unit(0.25,"cm"), label.size = 0.25, hjust=-0.1, vjust=1.2, parse=TRUE)+ theme( axis.text.y = element_text(colour="black", size=15), axis.text.x = element_text(colour="black", size=15), axis.title.y = element_text(colour="black", size=20), axis.title.x = element_text(colour="black", size=15), panel.border = element_rect(colour="black", fill=NA, size=1), plot.title = element_text(hjust=0.5, size=20), # title centered plot.margin = margin(5.5,13.5,5.5,5.5), plot.tag = element_text(size=20) ) atl man2 #### Mantel test excluding Oosterschelde ## Sites to keep toKeep = c("Brd","Cor","Cro","Eye","Flo","Gul","Heb","Hel","Sul", "Hoo","Idr","Iom","Ios","Jer","Kav","Kil","Loo","Lyn","Lys", "Mul","Ork","Pad","Pem","Sbs","She","Sin","Ven","Vig", "Ber","Tro") ## Prepare geographic distance subset geodist_oos = as.matrix(geodist) geodist_oos = geodist_oos[toKeep,toKeep] geodist_oos = as.dist(geodist_oos) ## Prepare fst subset fst_oos = as.matrix(fstWC) fst_oos = fst_oos[toKeep,toKeep] fst_oos = as.dist(fst_oos) ## Plot fst plot(fst_oos ~ geodist_oos) ## Perform Mantel test man3 = mantel.rtest(geodist_oos, fst_oos, nrepet = 1000) man3 ## Create labels for plots fstlab = expression(italic("F")[st]) ## Create dataframe of distance matrices df3 = data.frame(geodistance=as.vector(geodist_oos), gendistance=as.vector(fst_oos)) ## ggplot oos=ggplot(data=df3)+ geom_point(aes(x=geodistance, y=gendistance), size=3)+ geom_smooth(aes(x=geodistance, y=gendistance), method="lm", se=TRUE, level=0.95)+ xlab("Geographic distance (km)")+ ylab(fstlab)+ labs(title="Atlantic sites excluding Oosterschelde")+ geom_label(aes(label="r^2 == 0.45^'***'", x=-Inf,y=Inf), colour="black", size=5, label.padding=unit(0.25,"cm"), label.size = 0.25, hjust=-0.1, vjust=1.2, parse=TRUE)+ theme( axis.text.y = element_text(colour="black", size=15), axis.text.x = element_text(colour="black", size=15), axis.title.y = element_text(colour="black", size=20), axis.title.x = element_text(colour="black", size=15), panel.border = element_rect(colour="black", fill=NA, size=1), plot.title = element_text(hjust=0.5, size=20), # title centered plot.margin = margin(5.5,13.5,5.5,5.5), plot.tag = element_text(size=20) ) oos man3 #### Mantel test using Med sites ## Sites to keep toKeep = c("Ale","Tor","The","Sky","Sar","Laz") ## Prepare geographic distance subset geodist_med = as.matrix(geodist) geodist_med = geodist_med[toKeep,toKeep] geodist_med = as.dist(geodist_med) ## Prepare fst subset fst_med = as.matrix(fstWC) fst_med = fst_med[toKeep,toKeep] fst_med = as.dist(fst_med) ## Plot fst plot(fst_med ~ geodist_med) ## Perform mantel test man4 = mantel.rtest(geodist_med, fst_med, nrepet = 1000) man4 ## Create labels for plots fstlab = expression(italic("F")[st]) ## Create dataframe of distance matrices df4 = data.frame(geodistance=as.vector(geodist_med), gendistance=as.vector(fst_med)) ## ggplot med=ggplot(data=df4)+ geom_point(aes(x=geodistance, y=gendistance), size=3)+ geom_smooth(aes(x=geodistance, y=gendistance), method="lm", se=TRUE, level=0.95)+ xlab("Geographic distance (km)")+ ylab(fstlab)+ # ggtitle("IBD: all samples") labs(title="Mediterranean sites")+ geom_label(aes(label="r^2 == 0.89", x=-Inf,y=Inf), colour="black", size=5, label.padding=unit(0.25,"cm"), label.size = 0.25, hjust=-0.1, vjust=1.2, parse=TRUE)+ theme( axis.text.y = element_text(colour="black", size=15), axis.text.x = element_text(colour="black", size=15), axis.title.y = element_text(colour="black", size=20), axis.title.x = element_text(colour="black", size=15), panel.border = element_rect(colour="black", fill=NA, size=1), plot.title = element_text(hjust=0.5, size=20), # title centered plot.margin = margin(5.5,13.5,5.5,5.5) ) med man4 #### Combine plots using cowplot library(cowplot) #### add panel.border in code ggtheme = theme( axis.text.y = element_text(colour="black", size=15), axis.text.x = element_text(colour="black", size=15), axis.title.y = element_text(colour="black", size=15), axis.title.x = element_text(colour="black", size=15), plot.title = element_text(hjust=0.5, size=15, face="bold"), plot.margin = margin(5.5,15.5,5.5,5.5) # trbl ) plot_grid(all + ggtheme, atl + ggtheme, oos + ggtheme, med + ggtheme, align = "vh", labels = c("A","B","C","D"), nrow = 2, ncol = 2) # ggsave(file="IBD_composite.png", width=13, height=10, dpi=600) # ggsave(file="IBD_composite.pdf", width=13, height=10)