################################################################################ ### collection of scripts used for plotting figures in: ### Zohren et al. (2016) Unidirectional diploid-tetraploid introgression among ### British birch trees with shifting ranges shown by RAD markers. ### Molecular Ecology - Special Issue on Genomics of Hybridization. ################################################################################ # Figure 1: Sampling locations ------------------------------------------------- library(maps) library(mapdata) # supply a comma separated table with (at least) the columns 'Organism', # 'Latitude', and 'Longitude' locations <- read.csv(file.choose()) pop_italic <- expression(italic("B. nana"), italic("B. pendula"), italic("B. pubescens")) UK_maps <- c('UK', 'Isle of Man','Isle of Wight') x11(250, 300) map('worldHires', UK_maps, xlim = c(-9, 3), ylim = c(49.7, 59.5), resolution = 0.8) points(subset(locations, Organism == "B. pubescens")$Longitude, subset(locations, Organism == "B. pubescens")$Latitude, col = "blue", pch = 17, cex = 1.3) points(subset(locations, Organism == "B. pendula")$Longitude, subset(locations, Organism == "B. pendula")$Latitude, col = "orange", pch = 15, cex = 1.3) points(subset(locations, Organism == "B. nana")$Longitude, subset(locations, Organism == "B. nana")$Latitude, col = "red", pch = 16, cex = 1.3) points(subset(locations, Organism == "Hybrid")$Longitude, subset(locations, Organism == "Hybrid")$Latitude, col = "lightpink", pch = 18, cex = 1.3) legend("topleft", c(pop_italic, "Hybrid"), col = c("red", "orange", "blue", "lightpink"), pch = c(16, 15, 17, 18)) # ------------------------------------------------------------------------------ # Figure 2: Distribution of raw read frequencies # Analysis documented in "analyses.R". library(graphics) x11(35, 25) mat_layout <- matrix(c(1,1,1,1, 2,2,2,2, 3,3,3,3, 0,0, 4,4,4,4, 5,5,5,5, 0,0), 2, 12, byrow = TRUE) layout(mat_layout) par(mar = c(2, 1, 0.5, 1), yaxt = "n", cex = 1.3) hist(plot_nana[plot_nana > 0.1 & plot_nana < 0.9], main = "", xlab = "", ylab = "") legend("topleft", "", pch = "A", bty = "n") hist(plot_pend, main = "", xlab = "", ylab = "") legend("topleft", "", pch = "B", bty = "n") hist(plot_pub, main = "", xlab = "", ylab = "") legend("topleft", "", pch = "C", bty = "n") hist(pend_auto, main = "", xlab = "", ylab = "") legend("topleft", "", pch = "D", bty = "n") hist(pub_hybrid, main = "", xlab = "", ylab = "") legend("topleft", "", pch = "E", bty = "n") # ------------------------------------------------------------------------------ # Figure 3: PCA plot of data_80% set (with admixture individuals) # Analysis documented in "analyses.R". myCol <- c(rep("red", 36), rep("orange", 34), rep("blue", 130)) myPch <- c(rep(16, 36), rep(15, 34), rep(17, 130)) pc1 <- round(summary(pca_imp)$imp[2, 1] * 100, 1) pc2 <- round(summary(pca_imp)$imp[2, 2] * 100, 1) xlab <- paste("PC 1 (", pc1, "%)", sep = "") ylab <- paste("PC 2 (", pc2, "%)", sep = "") # sample names of the most admixed individuals read in from the clipboard nanaAdmix <- read.delim("clipboard", header = F) pendAdmix <- read.delim("clipboard", header = F) nanaAdmix <- as.vector(unlist(nanaAdmix)) pendAdmix <- as.vector(unlist(pendAdmix)) nanas <- is.element(rownames(pca_imp$x), nanaAdmix) pends <- is.element(rownames(pca_imp$x), pendAdmix) tmp <- "pub_1173x_GTACTCGT" hyb <- grepl(tmp, rownames(pca_imp$x)) x11() plot(pca_imp$x[, 1:2], col = myCol, pch = myPch, cex = 0.8, xlab = xlab, ylab = ylab, cex.axis = 1.3, cex.lab = 1.3) legend("topleft", c(expression(italic(B.~nana)), expression(italic(B.~pendula)), "Hybrid", expression(italic(B.~pubescens))), col = c("red", "orange", "lightpink", "blue"), pch = c(16, 15, 18, 17), cex = 1.3, bty = "n") legend(-40.5, 17, c(expression(admixed~with~italic(B.~nana)), expression(admixed~with~italic(B.~pendula))), col = c("mediumpurple1", "cyan2"), pch = c(24, 24), cex = 1.3, pt.bg = "blue", bty = "n") lines(c(-44, -3), c(11.5, 11.5)) lines(c(-3, -3), c(25, 11.5)) points(pca_imp$x[pends, 1:2], col = "cyan2", pch = 2, cex = 0.7) points(pca_imp$x[nanas, 1:2], col = "mediumpurple1", pch = 2, cex = 0.7) points(pca_imp$x[hyb, 1], pca_imp$x[hyb, 2], col = "white", pch = 19, cex = 2) points(pca_imp$x[hyb, 1], pca_imp$x[hyb, 2], col = "lightpink", pch = 18, cex = 1.2) # ------------------------------------------------------------------------------ # Figure 5: Plotting geographical cline analysis # Analysis documented in "analyses.R". x11(15, 25) par(mfrow = c(2, 1), mar = c(0, 0, 0, 0), oma = c(4, 4, 0.5, 0.5), mgp = c(2, 0.6, 0)) plot(lat, nana, cex = .7, ylab = "", yaxt = 'n', xaxt = 'n', ylim = c(-0.001, 0.035)) axis(2, at = c(0, 0.01, 0.02, 0.03), labels = c("0", "0.01", "0.02", "0.03"), cex.axis = 1.3) points(lat, sin(fitted(mod1, level = 1)) ^ 2 * (fitted(mod1) > 0), col = 'red', pch = 5, cex = 1.2) tmp <- cbind(lat, sin(fitted(mod1, level = 0)) ^ 2 * (fitted(mod1) > 0)) lines(tmp[order(tmp[, 1]), ]) legend("topleft", "", pch = "A", cex = 1.3, bty = "n") plot(lat, pend, cex = .7, ylab = "", yaxt = 'n', cex.axis = 1.3, ylim = c(-0.01, 0.405)) axis(2, at = c(0, 0.1, 0.2, 0.3, 0.4), cex.axis = 1.3) points(lat, sin(fitted(mod2, level = 1)) ^ 2 * (fitted(mod2) > 0), col = 'red', pch = 5, cex = 1.2) tmp <- cbind(lat, sin(fitted(mod2, level = 0)) ^ 2 * (fitted(mod2) > 0)) lines(tmp[order(tmp[, 1]), ]) text(51.17476 + 0.6, max(pend), "hybrid") legend("topleft", "", pch = "B", cex = 1.3, bty = "n") mtext("Latitude", side = 1, outer = T, cex = 1.3, line = 2.2) mtext("Proportion of admixture", side = 2, outer = T, cex = 1.3, line = 2) # ------------------------------------------------------------------------------ # Figure 6: Plotting Q values (genetic admixture) # Comparison of microsatellite (MS) and RAD data. # Analysis documented in "analyses.R". rad_q <- as.vector(unlist(rad)) ms_q <- as.vector(unlist(ms)) auto <- which(grepl("574", rownames(rad))) scatterhist <- function(x, y, xlab = "", ylab = "") { zones <- matrix(c(2, 0, 1, 3), ncol = 2, byrow = T) layout(zones, widths = c(4/5, 1/5), heights = c(1/5, 4/5)) xhist <- hist(x, plot = F, breaks = 80) yhist <- hist(y, plot = F, breaks = 80) top <- max(c(xhist$counts, yhist$counts)) par(mar = c(2.5, 2.5, 1, 1)) plot(x, y, xlab = "", ylab = "", cex.axis = 1.3, bty = "n") text(x[auto] + 0.05, y[auto] + 0.04, "574", cex = 1.3) text(x[auto + 177], y[auto + 177] - 0.04, "574", cex = 1.3) text(x[auto + (177 * 2)], y[auto + (177 * 2)] + 0.04, "574", cex = 1.3) par(mar = c(0, 2.5, 1, 1)) barplot(xhist$counts, axes = F, ylim = c(0, top), space = 0) par(mar = c(2.5, 0, 1, 1)) barplot(yhist$counts, axes = F, xlim = c(0, top), space = 0, horiz = T) par(oma = c(2.5, 2.5, 0, 0)) mtext(xlab, side = 1, line = 1, outer = T, adj = 0, at = 0.5 * 0.8, cex = 1.3) mtext(ylab, side = 2, line = 1, outer = T, adj = 0, at = 0.5 * 0.8, cex = 1.3) } scatterhist(ms_q, rad_q, xlab = "MS", ylab = "RAD") # ------------------------------------------------------------------------------ # Supplementary Figure 2: PCA plot of data_80% set (with > 10% NA individuals) # Analysis documented in "analyses.R". pop_italic <- expression(italic("B. nana"), italic("B. pendula"), italic("B. pubescens")) myCol <- c(rep("red", 36), rep("orange", 34), rep("blue", 130)) myPch <- c(rep(16, 36), rep(15, 34), rep(17, 130)) pc1 <- round(summary(pca_imp)$imp[2, 1] * 100, 1) pc2 <- round(summary(pca_imp)$imp[2, 2] * 100, 1) xlab <- paste("PC 1 (", pc1, "%)", sep = "") ylab <- paste("PC 2 (", pc2, "%)", sep = "") x11() plot(pca_imp$x[, 1:2], col = myCol, pch = myPch, cex = 0.8, xlab = xlab, ylab = ylab, cex.axis = 1.3, cex.lab = 1.3) legend("topleft", c(pop_italic, "> 10% NAs"), col = c("red", "orange", "blue", "black"), pch = c(16, 15, 17, 4), cex = 1.3) points(pca_imp$x[rows, 1:2], col = "grey50", pch = 4, cex = 1.3) # ------------------------------------------------------------------------------ # Supplementary Figure 3: Allele frequency distributions # based on genotype calls produced with genotyping script # Analysis documented in "analyses.R". files <- list.files(pattern = ".txt") pdf("individualSamples_newscript0.1-0.9.pdf") for (f in files) { input <- read.table(f, header = T, row.names = 1, sep = "\t") reads <- as.vector(input$counts) counts_split <- data.frame(do.call('rbind', strsplit(reads, ",", fixed = T))) counts_split <- as.matrix(counts_split) counts_split <- apply(counts_split, c(1, 2), as.numeric) counts_30x <- subset(counts_split, rowSums(counts_split) >= 30) plot_in <- apply(counts_30x, 1, function(x) x / sum(x)) plot_in <- as.vector(plot_in) plot_in <- plot_in[plot_in > 0.1 & plot_in < 0.9] hist(plot_in, main = f) } dev.off() # ------------------------------------------------------------------------------ # Supplementary Figure 5: pairwise Fst between species # Analysis documented in "analyses.R". x_names <- c(expression(italic("B. nana - B. pendula")), expression(italic("B. nana - B. pubescens")), expression(italic("B. pendula - B. pubescens"))) x11(15, 17) #bottom, left, top, right par(mai = c(2, 1, 1, 0.5), cex.axis = 1.3, cex.lab = 1.3) boxplot(loci_npe, loci_npu, loci_pp, ylab = "Pairwise Fst", main = c("Pairwise Fst between each species pair")) points(1, mean(loci_npe, na.rm = T), pch = 22, col = "darkgrey", lwd = 7) points(2, mean(loci_npu, na.rm = T), pch = 22, col = "darkgrey", lwd = 7) points(3, mean(loci_pp, na.rm = T), pch = 22, col = "darkgrey", lwd = 7) text(x = c(1.5, 2.5, 3.5), y = -0.08, x_names, xpd = T, srt = 30, pos = 2, cex = 1.3)