#=============================================================================================================# # # Script created by Angelica Cuevas # # Study published in Molecular Ecology : # Cite as : Cuevas et al., 2020. Intraspecific genomic variation and local adaptation in a young hybrid species. Molecular Ecology # # # This script: run all analysis in .... Produce statistical analysis, figures and supplementary material # # Usage notes: run line by line #=============================================================================================================# #===========================================================================================================================================================================#### #-------------------------------------- Genomic variation for House, Spanish and Italian sparrows --------------------------------------------------------------------------#### working.directory<-setwd("/path/to/your/working/directory/files.and.scripts") working.directory source('multiplot_function.R', chdir = F) source('summarySE.R', chdir = F) #######-------------------------------------------------------------------------------------------####### #######----------------------------------- PCA ----------------------------------------####### #######-------------------------------------------------------------------------------------------####### #### packages #### # install.packages("adegenet") # install.packages("ggfortify") # install.packages("dplyr") # install.packages("gdata") library(adegenet) library(ggfortify) library(dplyr) library(ggplot2) library("lattice") ####------------------ Figure 1.C ------------------#### ####------------------ 3 Species, house - Italian - Spanish sparrows ------------------#### sps<-read.PLINK(file="Ita.Parent_MF_maf0.02_raw.raw", map.file = "Ita.Parent_MF_maf0.02.map") dim(sps) # individuals=288 SNPs=2737 pca.sps <- glPca(sps, nf=5) scores.sps<-pca.sps$scores popu<-read.table("ID.Ita.Parent_MF.maf0.02.txt",header=T) dim(popu) scores.sps<-as.data.frame(cbind(scores.sps,popu)) head(scores.sps) # write.table(scores_Italian_MF,file="Pca_scores_Italian_MF.txt",sep = "\t") # eigen values pve <- (pca.sps$eig/sum(pca.sps$eig))*100 # plot PCA3<-ggplot(scores.sps, aes(PC1, PC2, colour = Species)) + geom_point(size=3.5) + scale_color_manual(values=c("#0000FF","#FF0000","#33CC00")) #+ geom_text(aes(label=ID),size=3,hjust=1,vjust=1) # PCA3 <- PCA3 + xlab(paste0("PC1 (", signif(pve[1], 3), "%)")) PCA3 <- PCA3 + ylab(paste0("PC2 (", signif(pve[2], 3), "%)")) PCA3 # setEPS() # setwd("/Users/Figures") # postscript("PCA.Ita.Parent_MF.NORimini.maf0.02.eps", width = 10, height = 7) # plot(PCA3) # dev.off() # Checking individual missingness % in these final vcfs #### Ita.Parent.missing<-read.table("Ita.Parent_MF_maf0.02_missingdata.imiss", header = T) dim(Ita.Parent.missing) head(Ita.Parent.missing) miss.Ita.Parent<-ggplot(Ita.Parent.missing, aes(F_MISS)) + geom_histogram()+ xlim(0,1) miss.Ita.Parent mean(Ita.Parent.missing$F_MISS) # 0.1320561 # samples with missing data between 0.7 and 0.5: SA029(Sardinia), I299(Spanish from FontanaRosa), PH11-184(Badajoz sample with a F_MISS=0.98) # re running PCA without those individuals to check for variation due to missigness Dat <- sps[indNames(sps) != "SA029"] Dat <- Dat[indNames(Dat) != "PH11-151"] Dat <- Dat[indNames(Dat) != "PI08-416"] Dat <- Dat[indNames(Dat) != "SA026"] Dat <- Dat[indNames(Dat) != "SA028"] Dat <- Dat[indNames(Dat) != "PO364"] Dat <- Dat[indNames(Dat) != "PI08-213"] Dat <- Dat[indNames(Dat) != "PI08-217"] Dat <- Dat[indNames(Dat) != "I299"] Dat <- Dat[indNames(Dat) != "PH11-184"] dim(Dat) popu2<-as.data.frame(popu[-c(287, 149,230, 285, 286, 241, 203, 207,84,162),]) pca_Dat <- glPca(Dat, nf=5) scores_pca_Dat<-pca_Dat$scores scores_pca_Dat<-as.data.frame(cbind(scores_pca_Dat,popu2)) # eigen values pve <- (pca_Dat$eig/sum(pca_Dat$eig))*100 var_frac <- (pca_Dat$eig/sum(pca_Dat$eig)) signif(sum(var_frac[1:5]) * 100, 5) var.explained<-sum(pca_Dat$eig/sum(pca_Dat$eig)) # plot PCA.Ita.parent2<-ggplot(scores_pca_Dat, aes(PC1, PC2, colour = Species)) + geom_point(size=3.5) + scale_color_manual(values=c("#0000FF","#FF0000","#33CC00")) #+ geom_text(aes(label=ID),size=3,hjust=1,vjust=1) # PCA.Ita.parent2 <- PCA.Ita.parent2 + xlab(paste0("PC1 (", signif(pve[1], 3), "%)")) PCA.Ita.parent2 <- PCA.Ita.parent2 + ylab(paste0("PC2 (", signif(pve[2], 3), "%)")) PCA.Ita.parent2 ####------------------ Figure 1.B ----------------------#### ####------------------ Only Mainland Italians ----------------------#### Ita<-read.PLINK(file="Ita.only_MF_maf0.02_raw.raw", map.file = "Ita.only_MF_maf0.02.map") dim(Ita) # individuals=131 SNPs=4387 pca.Ita <- glPca(Ita, nf=5) scores.Ita<-pca.Ita$scores popu<-read.table("ID.Ita.only_MF.maf0.02.txt",header=T) scores.Ita<-as.data.frame(cbind(scores.Ita,popu)) # write.table(scores_Italian_MF,file="Pca_scores_Italian_MF.txt",sep = "\t") # eigen values pve <- (pca.Ita$eig/sum(pca.Ita$eig))*100 var_frac <- (pca.Ita$eig/sum(pca.Ita$eig)) signif(sum(var_frac[1:5]) * 100, 5) var.explained<-sum(pca.Ita$eig/sum(pca.Ita$eig)) # plot PCA.Ita<-ggplot(scores.Ita, aes(PC1, PC2, colour = Populaion)) + geom_point(size=3.5) # + geom_text(aes(label=ID),size=4,hjust=1,vjust=1) #+ scale_color_manual(values=c("#0000FF","#FF0000","#33CC00")) PCA.Ita <- PCA.Ita + xlab(paste0("PC1 (", signif(pve[1], 3), "%)")) PCA.Ita <- PCA.Ita + ylab(paste0("PC2 (", signif(pve[2], 3), "%)")) PCA.Ita # setEPS() # setwd("/Users/Figures") # postscript("PCA.Ita.only_MF.NORimini.maf0.02.eps", width = 10, height = 7) # plot(PCA.Ita) # dev.off() # Checking individual missingness % in these final vcf #### Ita.only.missing<-read.table("Ita.only_MF_maf0.02_missingdata.imiss", header = T) dim(Ita.only.missing) miss<-ggplot(Ita.only.missing, aes(F_MISS)) + geom_histogram()+ xlim(0,1) miss mean(Ita.only.missing$F_MISS) # 0.1247388 # samples with missing data between 0.7 and 0.5: I126, PI08-416, PI08-213, PI08-217 # re running PCA without those individuals Dat <- Ita[indNames(Ita) != "I126"] Dat <- Dat[indNames(Dat) != "PI08-416"] Dat <- Dat[indNames(Dat) != "PI08-213"] Dat <- Dat[indNames(Dat) != "PI08-217"] dim(Dat) popu2<-as.data.frame(popu[-c(6,99,72,76),]) pca_Dat <- glPca(Dat, nf=5) scores_pca_Dat<-pca_Dat$scores scores_pca_Dat<-as.data.frame(cbind(scores_pca_Dat,popu2)) # eigen values pve <- (pca_Dat$eig/sum(pca_Dat$eig))*100 var_frac <- (pca_Dat$eig/sum(pca_Dat$eig)) signif(sum(var_frac[1:5]) * 100, 5) var.explained<-sum(pca_Dat$eig/sum(pca_Dat$eig)) # plot PCA.Ita2<-ggplot(scores_pca_Dat, aes(PC1, PC2, colour = Populaion)) + geom_point(size=4) #+ geom_text(aes(label=ID),size=4,hjust=1,vjust=1) #+ scale_color_manual(values=c("#0000FF","#FF0000","#33CC00")) PCA.Ita2 <- PCA.Ita2 + xlab(paste0("PC1 (", signif(pve[1], 3), "%)")) PCA.Ita2 <- PCA.Ita2 + ylab(paste0("PC2 (", signif(pve[2], 3), "%)")) PCA.Ita2 #######-------------------------------------------------------------------------------------------####### #######----------------------------------- ADMIXTURE ----------------------------------------####### #######-------------------------------------------------------------------------------------------####### #### packages #### # install.packages("spatstat") # install.packages("strataG") # install.packages("apex") # install.packages("assertthat") # install.packages("glue") # install.packages("dplyr") # install.packages("htmlwidgets") library (spatstat) library (strataG) library(apex) library(assertthat) library(tidyverse) ####------------------ Figure 1.D ------------------#### ####------------------ 3 Species, house - Italian - Spanish sparrows ------------------#### ItaParent_maf0.02.K2<-read.table("Ita.Parent_MF_maf0.02.2.Q", header = T) head(ItaParent_maf0.02.K2) ID<-read.table("ID.Ita.Parent_MF.maf0.02_copy.txt", header = T) head(ID) view(ID) dim(ID) ItaParent_maf0.02.K2<-cbind(ID,ItaParent_maf0.02.K2) head(ItaParent_maf0.02.K2) dim(ItaParent_maf0.02.K2) ItaParent_maf0.02.K2.part1<-ItaParent_maf0.02.K2[,1:6] list1 <- 1:288 list2 <- rep("P1",length(list1)) ItaParent_maf0.02.K2.part1$cluster<-list2 colnames(ItaParent_maf0.02.K2.part1)[6]<-"Q" ItaParent_maf0.02.K2.part2<-ItaParent_maf0.02.K2[,c(1:5,7)] list1 <- 1:288 list2 <- rep("P2",length(list1)) ItaParent_maf0.02.K2.part2$cluster<-list2 colnames(ItaParent_maf0.02.K2.part2)[6]<-"Q" head(ItaParent_maf0.02.K2.part2) ItaParent_maf0.02.K2 <- rbind(ItaParent_maf0.02.K2.part1,ItaParent_maf0.02.K2.part2) head(ItaParent_maf0.02.K2) k2_cols <- c("blue3","red") a <- ggplot(ItaParent_maf0.02.K2, aes(as.character(ID), Q, fill=cluster)) + geom_bar(stat = "identity") a <- a + scale_fill_manual(values = k2_cols) a <- a + xlab(NULL) + ylab("Ancestry") + scale_y_continuous(limits = c(0,1.01), expand = c(0, 0)) a <- a + facet_wrap(~Species, scales = "free_x", nrow = 1) a <- a + theme_light() + theme(legend.position = "none", panel.border = element_blank(), panel.grid = element_blank(), axis.text.y = element_text(size = 10), axis.text.x = element_text(size = 6, angle = 90), axis.ticks.x = element_blank(), strip.text.x = element_text(colour = "black", face = "bold"), strip.background = element_rect(colour = "black", fill = "white")) a # setEPS() # postscript("Admix.Ita.Parent_MF.NORimini.maf0.02.v2.eps", width = 12, height = 7) # plot(a) # dev.off() ####------------------ Figure. S1 ------------------#### ####------------------ Only Mainland Italians ------------------#### Ita_maf0.02.K2<-read.table("Ita.only_MF_maf0.02.2.Q",head=T) ID<-read.table("ID.Ita.only_MF.maf0.02_copy.txt", header = T) Ita_maf0.02.K2<-cbind(ID,Ita_maf0.02.K2) head(Ita_maf0.02.K2) Ita_maf0.02.K2.part1<-Ita_maf0.02.K2[,1:6] list1 <- 1:131 list2 <- rep("P1",length(list1)) Ita_maf0.02.K2.part1$cluster<-list2 colnames(Ita_maf0.02.K2.part1)[6]<-"Q" Ita_maf0.02.K2.part2<-Ita_maf0.02.K2[,c(1:5,7)] list1 <- 1:131 list2 <- rep("P2",length(list1)) Ita_maf0.02.K2.part2$cluster<-list2 colnames(Ita_maf0.02.K2.part2)[6]<-"Q" head(Ita_maf0.02.K2.part2) Ita_maf0.02.K2 <- rbind(Ita_maf0.02.K2.part1,Ita_maf0.02.K2.part2) head(Ita_maf0.02.K2) k2_cols <- c("blue3", "red") a <- ggplot(Ita_maf0.02.K2, aes(as.character(ID), Q, fill=cluster)) + geom_bar(stat = "identity") a <- a + scale_fill_manual(values = k2_cols) a <- a + xlab(NULL) + ylab("Ancestry") + scale_y_continuous(limits = c(0,1.01), expand = c(0, 0)) a <- a + facet_wrap(~Populaion, scales = "free_x", nrow = 1) a <- a + theme_light() + theme(legend.position = "none", panel.border = element_blank(), panel.grid = element_blank(), axis.text.y = element_text(size = 10), axis.text.x = element_text(size = 6, angle = 90), axis.ticks.x = element_blank(), strip.text.x = element_text(colour = "black", face = "bold"), strip.background = element_rect(colour = "black", fill = "white")) a # setEPS() # postscript("Admix.Ita.Only.NORimini.maf0.02.eps", width = 12, height = 7) # plot(a) # dev.off() #######-------------------------------------------------------------------------------------------####### #######----------------------------------- FST ----------------------------------------####### #######-------------------------------------------------------------------------------------------####### #### packages #### # install.packages("ggplot2") # install.packages("cowplot") # install.packages("gridExtra") # everytime library(ggplot2) library("gridExtra") library("cowplot") library("dplyr") #-------------------------------------- Genome scan for populations --------------------------------------------------------------------------###### ####------------------ Figure S2.A ------------------#### ## Italian windowed Fst (w.size:100kb, step:25kb) - maf 0.02 #### global_Ita.maf0.02.w <- read.table("FST.Ita.global.windowed_Ita.only_MF.0.02.windowed.weir.fst", header = T) mean(global_Ita.maf0.02.w$WEIGHTED_FST) dim(global_Ita.maf0.02.w) # SNPs=8637 overlaping windows # head(global_Ita.maf0.02.w) global_Ita.maf0.02.w <- filter(global_Ita.maf0.02.w, CHROM != "chrLGE22" ) global_Ita.maf0.02.w <- filter(global_Ita.maf0.02.w, CHROM =='chr1' | CHROM =='chr1A' | CHROM== 'chr2' | CHROM== 'chr3' | CHROM== 'chr4' | CHROM== 'chr5' | CHROM== 'chr6' | CHROM== 'chr7' | CHROM== 'chr8' | CHROM== 'chr9'| CHROM== 'chr10'| CHROM== 'chr11' | CHROM == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') global_Ita.maf0.02.w$CHROM2_f = factor(global_Ita.maf0.02.w$CHROM, levels=o.chrom) FstIta.w<-ggplot(global_Ita.maf0.02.w, aes(BIN_START,WEIGHTED_FST)) + geom_col(size=1, colour="#333333")+ ylim(0,0.4) + #par() facet_grid(.~CHROM2_f, scales = "free_x", space = "free")+ theme( axis.text.x=element_blank()) + # axis.ticks.x=element_blank())+ labs(x=" ", y="Global Italian Fst") FstIta.w # setEPS() # postscript("GlobalItalian.windowed.Fst.eps", width = 18, height = 5) # plot(FstIta.w) # dev.off() ####------------------ Figure S2.D ------------------#### ## House windowed Fst (w.size:100kb, step:25kb) - maf 0.01 #### global_hou.maf0.01.w <- read.table("FST_House_MF.0.01_global_windowed.weir.fst", header = T) dim(global_hou.maf0.01.w) # SNPs=11451 overlaping windows # head(global_hou.maf0.01.w) global_hou.maf0.01.w <- filter(global_hou.maf0.01.w, CHROM != "chrLGE22" ) global_hou.maf0.01.w <- filter(global_hou.maf0.01.w, CHROM =='chr1' | CHROM =='chr1A' | CHROM== 'chr2' | CHROM== 'chr3' | CHROM== 'chr4' | CHROM== 'chr5' | CHROM== 'chr6' | CHROM== 'chr7' | CHROM== 'chr8' | CHROM== 'chr9'| CHROM== 'chr10'| CHROM== 'chr11' | CHROM == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') global_hou.maf0.01.w$CHROM2_f = factor(global_hou.maf0.01.w$CHROM, levels=o.chrom) FstHou.w<-ggplot(global_hou.maf0.01.w, aes(BIN_START,WEIGHTED_FST)) + geom_col(size=1, colour="#333333")+ ylim(0,0.4) + #par() facet_grid(.~CHROM2_f, scales = "free_x", space = "free")+ theme( axis.text.x=element_blank()) + # axis.ticks.x=element_blank())+ labs(x=" ", y="Global House Fst") FstHou.w # setEPS() # postscript("GlobalHouse.windowed.Fst.eps", width = 18, height = 5) # plot(FstHou.w) # dev.off() ####------------------ Figure S2.G ------------------#### ## Spanish windowed Fst (w.size:100kb, step:25kb) - maf 0.01 #### global_spa.maf0.01.w <- read.table("FST_Spanish_MF.0.01_global_windowed.weir.fst", header = T) dim(global_spa.maf0.01.w) # SNPs=2953 overlaping windows # head(global_spa.maf0.01.w) global_spa.maf0.01.w <- filter(global_spa.maf0.01.w, CHROM != "chrLGE22" ) global_spa.maf0.01.w <- filter(global_spa.maf0.01.w, CHROM =='chr1' | CHROM =='chr1A' | CHROM== 'chr2' | CHROM== 'chr3' | CHROM== 'chr4' | CHROM== 'chr5' | CHROM== 'chr6' | CHROM== 'chr7' | CHROM== 'chr8' | CHROM== 'chr9'| CHROM== 'chr10'| CHROM== 'chr11' | CHROM == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') global_spa.maf0.01.w$CHROM2_f = factor(global_spa.maf0.01.w$CHROM, levels=o.chrom) FstSpa.w<-ggplot(global_spa.maf0.01.w, aes(BIN_START,WEIGHTED_FST)) + geom_col(size=1, colour="#333333")+ ylim(0,0.4) + #par() facet_grid(.~CHROM2_f, scales = "free_x", space = "free")+ theme( axis.text.x=element_blank()) + # axis.ticks.x=element_blank())+ labs(x=" ", y="Global Spanish Fst") FstSpa.w # setEPS() # postscript("GlobalSpanish.windowed.Fst.eps", width = 18, height = 5) # plot(FstSpa.w) # dev.off() #######-------------------------------------------------------------------------------------------####### #######---------------------------------- Tajima's D ----------------------------------------####### #######-------------------------------------------------------------------------------------------####### #### packages #### library(ggplot2) library("gridExtra") library("cowplot") library("dplyr") ####------------------ Figure S2.B ------------------#### ## Italian sparrows without MAF filter #### TD.Ita <- read.table("TajimaD_Ita.only_MF.noMAF.txt", header = T) head(TD.Ita) dim(TD.Ita) # SNPs=9322 densityplot(TD.Ita$tajimasD) t.test(TD.Ita$tajimasD) TD.Ita.filt <- filter(TD.Ita, chr =='chr1' | chr =='chr1A' | chr== 'chr2' | chr== 'chr3' | chr== 'chr4' | chr== 'chr5' | chr== 'chr6' | chr== 'chr7' | chr== 'chr8' | chr== 'chr9'| chr== 'chr10'| chr== 'chr11' | chr == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') TD.Ita.filt$CHROM2 = factor(TD.Ita.filt$chr, levels=o.chrom) TD.Ita.plot<-ggplot(TD.Ita.filt, aes(bin_start,tajimasD,colour=control))+ scale_colour_gradient(low="#999999",high="#003333")+ ylim(-1.5,3.5)+ geom_col(size=0.5)+ facet_grid(.~CHROM2,scales = "free_x", space = "free")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x="Position (bp)", y="Global Tajima's D Italian") TD.Ita.plot # setEPS() # postscript("TajimasD.Italian_wo.MAF.eps", width = 15, height = 5) # TD.Ita.plot # dev.off() ####------------------ Figure S2.E ------------------#### ## House sparrows without MAF filter ##### TD.Hou <- read.table("TajimaD_House_MF.noMAF.txt", header = T) head(TD.Hou) dim(TD.Hou) # SNPs=9344 TD.Hou.filt <- filter(TD.Hou, chr =='chr1' | chr =='chr1A' | chr== 'chr2' | chr== 'chr3' | chr== 'chr4' | chr== 'chr5' | chr== 'chr6' | chr== 'chr7' | chr== 'chr8' | chr== 'chr9'| chr== 'chr10'| chr== 'chr11' | chr == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') TD.Hou.filt$CHROM2 = factor(TD.Hou.filt$chr, levels=o.chrom) TD.Hou.plot<-ggplot(TD.Hou.filt, aes(bin_start,tajimasD,colour=control))+ scale_colour_gradient(low="#999999",high="#003333")+ ylim(-2,3.5)+ geom_col(size=0.5)+ facet_grid(.~CHROM2,scales = "free_x", space = "free")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x="Position (bp)", y="Global Tajima's D House") TD.Hou.plot # setEPS() # postscript("TajimasD.House_wo.MAF.eps", width = 15, height = 5) # TD.Hou.plot # dev.off() ####------------------ Figure S2.H ------------------#### ## Spanish without MAF filter ##### TD.Spa <- read.table("TajimaD_Spanish_MF.noMAF.txt", header = T) dim(TD.Spa) # SNPs=8618 TD.Spa.filt <- filter(TD.Spa, chr =='chr1' | chr =='chr1A' | chr== 'chr2' | chr== 'chr3' | chr== 'chr4' | chr== 'chr5' | chr== 'chr6' | chr== 'chr7' | chr== 'chr8' | chr== 'chr9'| chr== 'chr10'| chr== 'chr11' | chr == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') TD.Spa.filt$CHROM2 = factor(TD.Spa.filt$chr, levels=o.chrom) TD.Spa.plot<-ggplot(TD.Spa.filt, aes(bin_start,tajimasD,colour=control))+ scale_colour_gradient(low="#999999",high="#003333")+ ylim(-2,3.5)+ geom_col(size=0.5)+ facet_grid(.~CHROM2,scales = "free_x", space = "free")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x="Position (bp)", y="Global Tajima's D Spanish") TD.Spa.plot # setEPS() # postscript("TajimasD.Spanish_wo.MAF.eps", width = 15, height = 5) # TD.Spa.plot # dev.off() ####------------------ comparing Tajima's D among species ------------------#### head(TD.Spa) head(TD.Hou) head(TD.Ita) t.test(TD.Ita$tajimasD) ## t = -28.9, df = 2918, p-value < 2.2e-16, mean = -0.4322598 t.test(TD.Hou$tajimasD) ## t = -21.74, df = 3208, p-value < 2.2e-16, mean = -0.3211995 t.test(TD.Spa$tajimasD) ## t = -26.525, df = 956, p-value < 2.2e-16, mean = -0.6239173 t.test(TD.Hou$tajimasD,TD.Spa$tajimasD) ## comparison of House v.s. Spanish # t = 10.898, df = 1776.7, p-value < 2.2e-16 # sample estimates: # mean of x = -0.3211995 , mean of y = -0.6239173 t.test(TD.Hou$tajimasD,TD.Ita$tajimasD) ## comparison of House v.s. Italian # t = 5.2826, df = 6104.2, p-value = 1.318e-07 # sample estimates: # mean of x = -0.3211995 , mean of y = -0.4322598 t.test(TD.Ita$tajimasD,TD.Spa$tajimasD) ## comparison of Spanish v.s. Italian # t = 6.8756, df = 1789.5, p-value = 8.496e-12 # sample estimates: # mean of x = -0.6239173, mean of y = -0.4322598 ###------------------ checking Tajima's D (calculated without MAF) in italian outliers ------------------#### outliers<-read.table("FST.Ita.only_MF.0.02_OutliersValues1.intervals", header = T) # Italian Tajima in Italian outliers #### TD.Ita<-read.table("TajimaD_Ita.only_MF.noMAF_mod2.Tajima.D.txt", header = T) levels(TD.Ita$chr) # loop through outliers_Ita.tajima <- sapply(unique(outliers$chrom), function(y){ x <- filter(outliers, chrom == y) z <- filter(TD.Ita, chr == y) # get var target_pos <- x %>% .$pos window_mid <- z %>% .$mid rr <- sapply(1:length(target_pos), function(w) { index <- which.min(abs(window_mid - target_pos[w])) slice(z, index) %>% .$tajimasD }) x$TD.Ita <- rr x }, simplify = F) # collapse outliers_Ita.tajima <- do.call(rbind,outliers_Ita.tajima) dim(outliers_Ita.tajima) list1 <- 1:122 list2 <- rep("italian",length(list1)) outliers_Ita.tajima$tajima.category<-list2 head(outliers_Ita.tajima) t.test(outliers_Ita.tajima$TD.Ita) #t = -5.8517, df = 121, p-value = 4.251e-08, mean = -0.369696 # House Tajima in Italian outliers #### TD.Hou<-read.table("TajimaD_House_MF.noMAF_mod2.Tajima.D.txt", header = T) # loop through outliers_Hou.tajima <- sapply(unique(outliers$chrom), function(y){ x <- filter(outliers, chrom == y) z <- filter(TD.Hou, chr == y) # get var target_pos <- x %>% .$pos window_mid <- z %>% .$mid rr <- sapply(1:length(target_pos), function(w) { index <- which.min(abs(window_mid - target_pos[w])) slice(z, index) %>% .$tajimasD }) x$TD.Hou <- rr x }, simplify = F) # collapse outliers_Hou.tajima <- do.call(rbind,outliers_Hou.tajima) dim(outliers_Hou.tajima) list1 <- 1:122 list2 <- rep("house",length(list1)) outliers_Hou.tajima$tajima.category<-list2 t.test(outliers_Hou.tajima$TD.Hou) # t = -3.62, df = 106, p-value = 0.0004534, mean = -0.2590327 # Spanish Tajima in Italian outliers #### TD.Spa<-read.table("TajimaD_Spanish_MF.noMAF_mod2.Tajima.D.txt", header = T) # loop through outliers_Spa.tajima <- sapply(unique(outliers$chrom), function(y){ x <- filter(outliers, chrom == y) z <- filter(TD.Spa, chr == y) # get var target_pos <- x %>% .$pos window_mid <- z %>% .$mid rr <- sapply(1:length(target_pos), function(w) { index <- which.min(abs(window_mid - target_pos[w])) slice(z, index) %>% .$tajimasD }) x$TD.Spa <- rr x }, simplify = F) # collapse outliers_Spa.tajima <- do.call(rbind,outliers_Spa.tajima) dim(outliers_Spa.tajima) list1 <- 1:122 list2 <- rep("spanish",length(list1)) outliers_Spa.tajima$tajima.category<-list2 t.test(outliers_Spa.tajima$TD.Spa) # t = -4.3675, df = 43, p-value = 7.784e-05, mean = -0.5356591 #######-------------------------------------------------------------------------------------------####### #######---------------------------------- Pi ------------------------------------------------####### #######-------------------------------------------------------------------------------------------####### ####------------------ Figure S2.C Italian ------------------#### ### ----- for non-variants-non-maf ----- #### italian_pi<-read.table("Pi_Ita.only_MF_NORimini_non-variants_non-maf.windowed.pi", header = T) head(house_pi) mean(house_pi$PI) # 2.995712e-06 mean(italian_pi$PI) # 2.594765e-06 mean(spanish_pi$PI) # 1.642306e-06 Ita.pi<-italian_pi head(Ita.pi) dim(Ita.pi) # SNPs=8164 mean(Ita.pi$PI) Ita.pi <- filter(Ita.pi, CHROM != "chrLGE22" ) Italian <- filter(Ita.pi, CHROM =='chr1' | CHROM =='chr1A' | CHROM== 'chr2' | CHROM== 'chr3' | CHROM== 'chr4' | CHROM== 'chr5' | CHROM== 'chr6' | CHROM== 'chr7' | CHROM== 'chr8' | CHROM== 'chr9'| CHROM== 'chr10'| CHROM== 'chr11' | CHROM == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') Italian$CHROM2_f = factor(Italian$CHROM, levels=o.chrom) PiIta<-ggplot(Italian, aes(BIN_START,PI)) + geom_col(size=1, colour="#333333")+scale_y_continuous(limits=c(0,3e-05)) + #par() facet_grid(.~CHROM2_f,scales = "free_x", space = "free")+ theme( axis.text.x=element_blank()) + # axis.ticks.x=element_blank())+ labs(x=" ", y="Italian Pi")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) PiIta t.test(Ita.pi$PI) # t = 70.725, df = 8163, p-value < 2.2e-16, mean = 2.594765e-06 #### Pi in Italian outliers #### outliers<-read.table("FST.Ita.only_MF.0.02_OutliersValues1.intervals", header = T) head(outliers) dim(outliers) levels(outliers$chrom) Ita.pi_mod<-read.table("Pi_Ita.only_MF_NORimini_non-variants_non-maf.windowed_mod.pi", header = T) head(Ita.pi_mod) levels(Ita.pi_mod$chr) # loop through outliers_Ita.pi <- sapply(unique(outliers$chrom), function(y){ x <- filter(outliers, chrom == y) z <- filter(Ita.pi_mod, chr == y) # get var target_pos <- x %>% .$pos window_mid <- z %>% .$mid rr <- sapply(1:length(target_pos), function(w) { index <- which.min(abs(window_mid - target_pos[w])) slice(z, index) %>% .$pi }) x$Ita.pi_mod <- rr x }, simplify = F) # collapse outliers_Ita.pi <- do.call(rbind,outliers_Ita.pi) head(outliers_Ita.pi) dim(outliers_Ita.pi) t.test(outliers_Ita.pi$Ita.pi_mod) # t = 8.3793, df = 121, p-value = 1.117e-13, mean = 3.782086e-06 ####------------------ Figure S2.F House ------------------#### house_pi<-read.table("Pi_House_MF_non-variants_non-maf.windowed.pi", header = T) House.pi<-house_pi dim(House.pi) # SNPs=7542 overlaping windows House.pi <- filter(House.pi, CHROM != "chrLGE22" ) House.pi_2 <- filter(House.pi, CHROM =='chr1' | CHROM =='chr1A' | CHROM== 'chr2' | CHROM== 'chr3' | CHROM== 'chr4' | CHROM== 'chr5' | CHROM== 'chr6' | CHROM== 'chr7' | CHROM== 'chr8' | CHROM== 'chr9'| CHROM== 'chr10'| CHROM== 'chr11' | CHROM == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') House.pi_2$CHROM2_f = factor(House.pi_2$CHROM, levels=o.chrom) House.pi_plot<-ggplot(House.pi_2, aes(BIN_START,PI)) + geom_col(size=1, colour="#333333")+scale_y_continuous(limits=c(0,3e-05)) + #par() facet_grid(.~CHROM2_f,scales = "free_x", space = "free")+ theme( axis.text.x=element_blank()) + # axis.ticks.x=element_blank())+ labs(x=" ", y="House Pi")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) House.pi_plot t.test(House.pi$PI) # t = 72.918, df = 7541, p-value < 2.2e-16, mean = 2.995712e-06 #### House Pi in Italian outliers #### House.pi_mod<-read.table("Pi_House_MF_non-variants_non-maf.windowed_mod.pi", header = T) levels(House.pi_mod$chr) head(House.pi_mod) # loop through outliers_House.pi <- sapply(unique(outliers$chrom), function(y){ x <- filter(outliers, chrom == y) z <- filter(House.pi_mod, chr == y) # get var target_pos <- x %>% .$pos window_mid <- z %>% .$mid rr <- sapply(1:length(target_pos), function(w) { index <- which.min(abs(window_mid - target_pos[w])) slice(z, index) %>% .$pi }) x$House.pi_mod <- rr x }, simplify = F) # collapse outliers_House.pi <- do.call(rbind,outliers_House.pi) head(outliers_House.pi) dim(outliers_House.pi) # write.table(outliers_House.pi, file = "Ita.outliers_in.House.pi.txt") mean(outliers_House.pi$House.pi_mod) ####------------------ Figure S2.I Spanish ------------------#### spanish_pi<-read.table("Pi_Spanish_MF_non-variants_non-maf.windowed.pi", header = T) Spanish.pi <-spanish_pi dim(Spanish.pi) # SNPs=7386 overlaping windows Spanish.pi <- filter(Spanish.pi, CHROM != "chrLGE22" ) Spanish.pi_2 <- filter(Spanish.pi, CHROM =='chr1' | CHROM =='chr1A' | CHROM== 'chr2' | CHROM== 'chr3' | CHROM== 'chr4' | CHROM== 'chr5' | CHROM== 'chr6' | CHROM== 'chr7' | CHROM== 'chr8' | CHROM== 'chr9'| CHROM== 'chr10'| CHROM== 'chr11' | CHROM == 'chrZ') o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chrZ') Spanish.pi_2$CHROM2_f = factor(Spanish.pi_2$CHROM, levels=o.chrom) Spanish.pi_plot<-ggplot(Spanish.pi_2, aes(BIN_START,PI)) + geom_col(size=1, colour="#333333")+scale_y_continuous(limits=c(0,3e-05)) + #par() facet_grid(.~CHROM2_f,scales = "free_x", space = "free")+ theme( axis.text.x=element_blank()) + # axis.ticks.x=element_blank())+ labs(x=" ", y="Spanish Pi")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) Spanish.pi_plot t.test(Spanish.pi$PI) # t = 59.436, df = 7385, p-value < 2.2e-16, mean = 1.642306e-06 #### Spanish Pi in Italian outliers #### # Spanish.pi_mod<-read.table("Pi_Spanish_MF_maf0.01.windowed_mod.pi", header = T) # Spanish.pi_mod<-read.table("Pi_Spanish_MF_non-maf.windowed_mod.pi", header = T) Spanish.pi_mod<-read.table("Pi_Spanish_MF_non-variants_non-maf.windowed_mod.pi", header = T) # loop through outliers_Spanish.pi <- sapply(unique(outliers$chrom), function(y){ x <- filter(outliers, chrom == y) z <- filter(Spanish.pi_mod, chr == y) # get var target_pos <- x %>% .$pos window_mid <- z %>% .$mid rr <- sapply(1:length(target_pos), function(w) { index <- which.min(abs(window_mid - target_pos[w])) slice(z, index) %>% .$pi }) x$Spanish.pi_mod <- rr x }, simplify = F) # collapse outliers_Spanish.pi <- do.call(rbind,outliers_Spanish.pi) t.test(outliers_Spanish.pi$Spanish.pi_mod) # t = 7.4305, df = 121, p-value = 1.689e-11, mean = 2.133827e-06 #=============================================================================================================================================================================#### #----------------------- Local adaptation to climate. Isolation by environment (IBE) and Isolation by distance (IBD) in the Italian sparrow ----------------------------------#### #######------------------------------------------------------------------------------------------####### #######-------------------------- Environmental variables across Italy -----------------------####### #######--------------------- Maps - Loading values for the climatic variables -------------------####### #######------------------------------------------------------------------------------------------####### # setwd("/Users/angelimc/Documents/Angelica/RAD_seq.Analysis/RAD_2019/Inland.Italians/Environment") #### packages #### install.packages("raster") install.packages("rgdal") library(sp) library(raster) library(rgdal) # CLIMATE # BIO1 = Annual Mean Temperature # BIO4 = Temperature Seasonality (standard deviation *100) # BIO12 = Annual Precipitation # BIO15 = Precipitation Seasonality (Coefficient of Variation) # rasters of environmental variables env<-getData("worldclim", var="bio", res=10) r <- env[[c(1,4,12,15)]] names(r) <- c("Mean Anual Temp","Temperature Seasonality","Mean Anual Prec","Precipitation Seasonality") # Maps: maps<-plot(r[[1]]) newext <- drawExtent() # NOTE!: select the area of interest with two clicks (Italy) before going to the next step # the script wont move on if you don't select the aera you want to plot. maps.c <- crop(r, newext) maps.c<-plot(maps.c) # read coordinates coords<-data.frame(read.table("Ita.Coordinates.txt", header =T)) points <- SpatialPoints(coords[,2:3]) values <- extract(r,points) df <- cbind.data.frame(coords,values) df # write.table(df, "Climatic_variables.txt", sep="\t") # ALTITUDE # getData('ISO3') altitudeITA<-getData("alt", country="ITA") plot(altitudeITA) Altitude <- extract(altitudeITA,points) dat<- cbind.data.frame(df,Altitude) dat$Altitude[4:5]<-c(5,10) ## The altitud of Lago di Burano and Lago Salso was not retrieved from IS03, both localities are in the sea side dat # Include BEAK data beak<-read.table("beak.Inland.Italians.txt", header = T) dat<-cbind(dat,beak[,2:3]) ######### Create distance matrices ######## #### Standarizing Distance Matrices #functions for standardizing the values so coefficients comparable as beta-weights standardize = function(x){tmp=(x-mean(x,na.rm=T))/sd(x,na.rm=T);diag(tmp)=0;return(tmp)} prep.standardize <- function(x){x=data.matrix(x) x=x[lower.tri(x, diag = FALSE)] x=(x-mean(x))/sqrt(var(x)) x} prep <- function(x){x=data.matrix(x) x=x[lower.tri(x, diag = FALSE)] x} #### GEOGRAPHIC DISTANCE - GREAT CIRCLE DISTANCE MATRIX tmp = dat[,c('Population','Longitude','Latitude')] pop.loc = as.matrix(tmp[,c('Longitude','Latitude')]) rownames(pop.loc) = tmp[,'Population'] geo = spDists(pop.loc,longlat=TRUE) ; colnames(geo) = rownames(geo) = rownames(pop.loc) GEO=as.matrix(standardize(geo)) # A.TEMP tmp = dat[,c('Population','Mean.Anual.Temp')] ma.temp = as.matrix(tmp[,'Mean.Anual.Temp']) rownames(ma.temp) = tmp[,'Population'] order= rownames(pop.loc) ma.temp.raw=ma.temp[order,] A.TEMP = as.matrix(standardize(as.matrix(dist(ma.temp.raw)))) # TEMP.S tmp = dat[,c('Population','Temperature.Seasonality')] temp.s = as.matrix(tmp[,'Temperature.Seasonality']) rownames(temp.s) = tmp[,'Population'] order= rownames(pop.loc) temp.s.raw=temp.s[order,] TEMP.S = as.matrix(standardize(as.matrix(dist(temp.s.raw)))) # A.PREC tmp = dat[,c('Population','Mean.Anual.Prec')] ma.prec = as.matrix(tmp[,'Mean.Anual.Prec']) rownames(ma.prec) = tmp[,'Population'] order= rownames(pop.loc) ma.prec.raw=ma.prec[order,] A.PREC = as.matrix(standardize(as.matrix(dist(ma.prec.raw)))) # PREC.S tmp = dat[,c('Population','Precipitation.Seasonality')] prec.s = as.matrix(tmp[,'Precipitation.Seasonality']) rownames(prec.s) = tmp[,'Population'] order= rownames(pop.loc) prec.s.raw=prec.s[order,] PREC.S = as.matrix(standardize(as.matrix(dist(prec.s.raw)))) #### ALT tmp = dat[,c('Population','Altitude')] alt = as.matrix(tmp[,'Altitude']) rownames(alt) = tmp[,'Population'] order= rownames(pop.loc) alt.raw=alt[order,] ALT = as.matrix(standardize(as.matrix(dist(alt.raw)))) #### BeakH tmp = dat[,c('Population','BH')] BeakH = as.matrix(tmp[,'BH']) rownames(BeakH) = tmp[,'Population'] order= rownames(pop.loc) BeakH.raw=BeakH[order,] BEAK.H = as.matrix(standardize(as.matrix(dist(BeakH.raw)))) #### BeakL tmp = dat[,c('Population','BL')] BeakL = as.matrix(tmp[,'BL']) rownames(BeakL) = tmp[,'Population'] order= rownames(pop.loc) BeakL.raw=BeakL[order,] BEAK.L = as.matrix(standardize(as.matrix(dist(BeakL.raw)))) #### reading FST the matrix fst.m<-as.matrix(read.table("Matrix.Ita.NORimini.pairwise.Fst.txt")) head(fst.m) fst.m[upper.tri(fst.m)] = fst.m[lower.tri(fst.m)] head(fst.m) #######-------------------------------------------------------------------------------------------####### #######------------------------------- IBD and IBE --------------------------------------------####### #######-- Univariate, Multivariate Regression with radomization (MMRR) and Commonality Analyses --####### #######-------------------------------------------------------------------------------------------####### source('MMRR_function.R', chdir = F) source('multiplot_function.R', chdir = F) #install.packages("yhat") library(yhat) # defining predictors predictors=list(GEO=GEO, A.TEMP=A.TEMP, A.PREC=A.PREC, TEMP.S=TEMP.S, PREC.S=PREC.S, ALT=ALT) # predictors=list(GEO=GEO, A.TEMP=A.TEMP, A.PREC=A.PREC, TEMP.S=TEMP.S, PREC.S=PREC.S, ALT=ALT, BEAK.H=BEAK.H, BEAK.L=BEAK.H) # predictors=list(GEO=GEO, A.PREC=A.PREC, TEMP.S=TEMP.S, PREC.S=PREC.S, BEAK.H=BEAK.H, BEAK.L=BEAK.L) ####------------------ Table 1. ------------------#### ### Table 1 - UNIVARIATE TESTS with randomization #### colnames = c('predictor','r2','coefficient','tstatistic','pvalue')# uni.table = data.frame(matrix(ncol=length(colnames),nrow=length(predictors)))# colnames(uni.table) = colnames# i = 1# for(i in 1:length(predictors)){# cat(i,'\n')# tmp.fst.m = fst.m[colnames(predictors[[i]]),colnames(predictors[[i]])]# fit = MMRR(tmp.fst.m,predictors[i],nperm=1000)# uni.table[i,'predictor'] = names(predictors)[i]# uni.table[i,'r2'] = sprintf("%.3f", round(fit$r.squared,3))# uni.table[i,'coefficient'] = sprintf("%.3f", round(fit$coefficients[2],3))# uni.table[i,'tstatistic'] = sprintf("%.3f", round(fit$tstatistic[2],3))# uni.table[i,'pvalue'] = sprintf("%.3f", round(fit$Fpvalue,3))# }# colnames(uni.table) = c('','r2','B','t','p')# uni.table[uni.table[,'p'] < 0.001,'p'] = '0.001'# uni.table = uni.table[rev(order(uni.table$r2)),]# uni.table # write.table(uni.table,file = "table.uni_italianMF.0.02.txt",quote=F,col.names=T,row.names=F,sep='\t')# #### Plotting of univariant regressions ###### # FST vs TemperatureSeasonality_Distance TemperatureS<- read.table("FST.v.s.TEMP.S.txt", header = T) ts.fst<-ggplot(TemperatureS, aes(TEMP.S,Mean.FST)) + geom_point(size = 3) + stat_smooth(method = "lm", col = "black", size = 0.3)+ labs(y="Mean Pairwise FST",x="Pairwise Temperature Seasonality Distance", size=0.10)+ theme(axis.title=element_text(size=12)) ts.fst #regression linear_regres<-lm(Mean.FST~TEMP.S, TemperatureS) summary(linear_regres) # Multiple R-squared: 0.163, Adjusted R-squared: 0.1309 # F-statistic: 5.065 on 1 and 26 DF, p-value: 0.0331* # Mantel test is significant as well, see below ####------------------ Table 2. ------------------#### #### Table 2 - Multivariate Matrix Regression with Randomization - MMRR #### multi.table = c() resBootstrap.list = list() # MMRR to get regression coefficients and significance values for multivariate model # data for MMRR fit = MMRR(Y = fst.m, X = predictors, nperm=1000) fit # #### Distance matrices # FST = prep.standardize(fst.m) C.GEO = prep(GEO) C.TEMP.S=prep(TEMP.S) C.A.PREC=prep(A.PREC) C.ALT=prep(ALT) C.A.TEMP=prep(A.TEMP) C.PREC.S=prep(PREC.S) C.BEAK.H=prep(BEAK.H) C.BEAK.L=prep(BEAK.L) # Do Commonality Analysis to get Unique, Common, and Total contribution of each predictor variable data=data.frame(FST = FST, C.GEO, C.TEMP.S, C.A.PREC, C.ALT, C.A.TEMP, C.PREC.S) ca_i = regr(lm(FST ~ C.GEO + C.TEMP.S + C.A.PREC + C.ALT + C.A.TEMP + C.PREC.S, data=data)) data=data.frame(FST = FST, C.GEO, C.TEMP.S, C.A.PREC, C.PREC.S, C.BEAK.H, C.BEAK.L) ca_i = regr(lm(FST ~ C.GEO + C.TEMP.S + C.A.PREC + C.PREC.S + C.BEAK.H + C.BEAK.L, data=data)) #add in the common colnames = c('model','predictor','coefficient','tstatistic','pvalue','Unique','Common','Total') table = data.frame(matrix(ncol=length(colnames),nrow=length(predictors))) colnames(table) = colnames model = paste0('Fst ~ ',paste(names(predictors),collapse=' + ')) table[,'model'] = c(model,paste0('r2 = ',round(fit$r.squared,2)),'','','','') #table[,'model'] = c(model,paste0('r2 = ',round(fit$r.squared,2)),'') table[,'predictor'] = names(predictors) table[,'coefficient'] = round(fit$coefficients[-1],3) table[,'tstatistic'] = round(fit$tstatistic[-1],2) table[,'pvalue'] = round(fit$tpvalue[-1],2) table[,'Unique'] = round(ca_i$Commonality_Data$CCTotalbyVar[,'Unique'],3) table[,'Common'] = round(ca_i$Commonality_Data$CCTotalbyVar[,'Common'],2) table[,'Total'] = round(ca_i$Commonality_Data$CCTotalbyVar[,'Total'],2) table = apply(table,2,as.character) table #multi.table = rbind(multi.table,table,rep('',ncol(table))) ####------------------ Figure. S3. ------------------#### # Commonality analysis with confidence intervals #### #bootstrap procedure modified from code in supplementary of Prunier et al. (2015), which was based on methods in Peterman et al. (2014) # Prunier JG, Colyn M, Legendre X, Nimon KF, Flamand MC (2015) Multicollinearity in spatial genetics: Separating the wheat from the chaff using commonality analyses. Molecular Ecology, 24, 263–283. # Peterman WE, Connette GM, Semlitsch RD, Eggert LS (2014) Ecological resistance surfaces predict fine-scale genetic differentiation in a terrestrial woodland salamander. Molecular Ecology, 23, 2402–2413. nperm=1000 n.predictors = 6 ncombos = ((2^n.predictors)-1) boot=matrix(data = 0, nrow = nperm, ncol = ncombos) resBootstrap=data.frame(n=rep(0,ncombos),o=rep(0,ncombos),l=rep(0,ncombos),u=rep(0,ncombos),p=rep(0,ncombos)) n=ncol(fst.m) sn=0.9*n for (i in 1:nperm){ rarray= sort(sample(n,sn,replace=F)) mmFst = fst.m[rarray,rarray][lower.tri(fst.m[rarray,rarray],diag=F)] mmGEO= prep(GEO[rarray,rarray]) mmALT = prep(ALT[rarray,rarray]) mmMA.TEMP = prep(A.TEMP[rarray,rarray]) mmMA.PREC =prep(A.PREC[rarray,rarray]) mmS.TEMP = prep(TEMP.S[rarray,rarray]) mmS.PREC =prep(PREC.S[rarray,rarray]) comm=regr(lm(mmFst ~ mmGEO + mmALT + mmMA.TEMP + mmMA.PREC + mmS.TEMP + mmS.PREC)) boot[i,]=comm$Commonality_Data$CC[c(1:ncombos),1] print(i) } for (f in 1:ncombos){ q=quantile(boot[,f], c(.025,.975)) resBootstrap[f,1]=f resBootstrap[f,3]=q[1] resBootstrap[f,4]=q[2] } resBootstrap[,2]=comm$Commonality_Data$CC[c(1:ncombos),1] resBootstrap[,5]=comm$Commonality_Data$CC[c(1:ncombos),2] rownames(resBootstrap) = rownames(ca_i$Commonality_Data$CC)[1:ncombos] resBootstrap[,c('o','l','u')] = resBootstrap[,c('o','l','u')] ca_i$Commonality_Data$CC[c(1:ncombos),1] new.rownames = gsub('\\s|[?!Unique$|Common$|to$|and$]','',rownames(resBootstrap)) rownames(resBootstrap) = new.rownames tmp = resBootstrap tmp = round(tmp,3) tmp$n = rev(tmp$n) par(mar=c(4,2,4,30)) xlim = c(min(tmp[,3]), max(tmp[,4])) plot(tmp[,1],xlim=xlim,font=5,lab=c(ncombos, 7, 1),xaxt="n",yaxt="n",cex.lab=1,xlab='',ylab='',cex=1.5) arrows(tmp[,3],tmp[,1],tmp[,4],tmp[,1],code=3,length=0.05,angle=90,lwd=1) abline(v=0,lty=2) axis(1,cex.axis=0.9) mtext('Confidence Intervals of Correlation Coefficient',1,line=2.5,cex=0.9) #yaxis x=cbind(1:63,rev(rownames(tmp))) axis(4,at=x[,1],labels=x[,2],cex.axis=0.75,las=2) mtext('Predictor Sets',2,cex=0.9,line=.5) #upper xaxis at = seq(0,round(max(tmp[,4]),2),by=round(max(tmp[,4]),2)/5) labels= round(at/sum(tmp[,'o']),2) axis(3,at=at,labels=labels,cex.axis=0.75) mtext('% Total',3,line=2.5,cex=1) #coefficients mtext('Coefficient',4,at = 65,las=2,line=17,adj=.2) axis(4,line=17,at=tmp[,1],labels=tmp[,'o'],tick=F,cex.axis=0.8,las=2,hadj=1) mtext('% Total',4,at = 65,las=2,line=24,adj=.5) axis(4,line=24,at=tmp[,1],labels=tmp[,'p'],tick=F,cex.axis=0.75,las=2,hadj=1) mtext('Total',4,at = -.5,las=2,line=14,adj=.5, cex=0.9) mtext(sum(tmp[,'o']),4,at = -.5,las=2,line=17,adj=.5, cex=0.8) mtext(100,4,at = -.5,las=2,line=24.5,adj=.5, cex=0.8) par(mfrow=c(1,1)) dev.off() #######-------------------------------------------------------------------------------------------####### #######------------------------------- Mantel test Beak/TempS v.s. Fst -------------------------------####### #######-------------------------------------------------------------------------------------------####### # install.packages("ade4") #=> this required objects of class *.dist to run the mantel test library(ade4) # does not accept only the matrix I created for Geographic and Genomic (fst) Distance # install.packages("vegan") # -> for mantel test library(vegan) # FST matrix fst.m # Beak matrices BEAK.H BEAK.L # TEMP.S matrix TEMP.S mantel_test_BH<-mantel(fst.m, BEAK.H, method="pearson", permutations=999) mantel_test_BH # Mantel statistic r: 0.1908 - Significance: 0.266 mantel_test_BL<-mantel(fst.m, BEAK.L, method="pearson", permutations=999) mantel_test_BL # Mantel statistic r: 0.1723 - Significance: 0.208 mantel_test_TS<-mantel(fst.m, TEMP.S, method="pearson", permutations=999) mantel_test_TS # Mantel statistic r: 0.4038 - Significance: 0.029 * #=======================================================================================================================================================#### #######---------------------------------------- Outlier Analysis - BayeScan - BayeScEnv ------------------------------------------------------------#### ##### packages and functions ##### # only once: install.packages("qqman") # Loading required libraries library(qqman) library(ggplot2) library("gridExtra") library("cowplot") #-------------------------------------- Plotting BayeScan results ---------------------------------------##### source('plot_R.function_bayescan.R', chdir = F) # Italians #### dev.off() # result<-plot_bayescan("Ita.only_maf0.02.bayescan-out_fst.txt") ####--------- Figure. 3A. ------------------#### #### Italian sparrows #### Ita.bayescan<-read.table("Ita.only_maf0.02.bayescan-out_fst.txt",head=T) SNP_coordinates<-read.table("Ita.only_maf0.02.coordinates.txt", head = T) Ita.bayescan<-cbind(SNP_coordinates,Ita.bayescan) head(Ita.bayescan) # log10.qval<-as.data.frame(-log(Ita.bayescan$qval,base = 10)) # colnames(log10.qval)<-"log10.qval" # head(log10.qval) # Ita.bayescan2<-cbind(Ita.bayescan,log10.qval) # head(Ita.bayescan2) bayes.ita<-ggplot(Ita.bayescan, aes(-log(qval,10),fst))+ geom_point(size=2.5)+ylim(0,0.22)+xlim(0,3.7)+geom_vline(xintercept = 1.30, linetype="dashed")+ labs(x="- log (qval, 10)",y="fst") + theme(legend.position="none") bayes.ita ####--------- Figure. 3B. ------------------#### #### House sparrows #### Hou.bayescan<-read.table("House_maf0.01.bayescan-out_fst.txt",head=T) SNP_coordinates<-read.table("House_maf0.01.coordinates.txt", head = T) Hou.bayescan<-cbind(SNP_coordinates,Hou.bayescan) head(Hou.bayescan) bayes.hou<-ggplot(Hou.bayescan, aes(-log(qval,10),fst))+ geom_point(size=2.5)+ylim(0,0.22)+xlim(0,3.7)+geom_vline(xintercept = 1.30, linetype="dashed")+ labs(x="- log (qval, 10)",y="House fst") + theme(legend.position="none") bayes.hou ####--------- Figure. 3C. ------------------#### #### Spanish sparrows #### Spa.bayescan<-read.table("Spanish_maf0.01.bayescan-out_fst.txt",head=T) SNP_coordinates<-read.table("Spanish_maf0.01.coordinates.txt", head = T) Spa.bayescan<-cbind(SNP_coordinates,Spa.bayescan) bayes.spa<-ggplot(Spa.bayescan, aes(-log(qval,10),fst))+ geom_point(size=2.5)+ylim(0,0.22)+xlim(0,3.7)+geom_vline(xintercept = 1.30, linetype="dashed")+ labs(x="- log (qval, 10)",y="Spanish fst") + theme(legend.position="none") bayes.spa #-------------------------------------- Plotting BayeScEnv results ------------------------------------------# #------------------------------------ correlating environmental variables ------------------------------------# SNP_coordinates<-read.table("Ita.only_maf0.02.coordinates.txt", head = T) ####--------- Figure. 2A. ------------------#### ### Mean Anual Precipitation #### MAPrep<-read.table("MAP_Ita.only_maf0.02.bayescenv_fst.txt",head=T) MAPrep<-cbind(SNP_coordinates,MAPrep) MAPrep<- filter(MAPrep, CHROM != "chrLGE22" ) o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10','chr11','chr12','chr13', 'chr14', 'chr15', 'chr17' , 'chr18', 'chr19','chr20', 'chr21','chr24','chr26','chr28','chrZ') chr_colors<-c("#CC0000","#333333","#CC0000","#333333","#CC0000", "#333333","#CC0000","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333") MAPrep$CHROM2 = factor(MAPrep$CHROM, levels=o.chrom) MAPrep2<-ggplot(MAPrep, aes(POS,-log(qval_g,10), colour=CHROM2))+ geom_point(size=2.5)+ylim(0,2)+scale_color_manual(values=chr_colors)+geom_hline(yintercept = 1.30, linetype="dashed")+ facet_grid(.~CHROM2,scales = "free_x")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x=" ",y="Mean Anual Precipitation -log(qval_g,10)") + theme(legend.position="none") MAPrep2 # setEPS() # postscript("BayeScEnv.MAPrep.Italians.eps", width = 12, height = 5) # MAPrep2 # dev.off() ####--------- Figure. 2B. ------------------#### ### Precipitation Seasonality #### PrepS<-read.table("PS_Ita.only_maf0.02.bayescenv_fst.txt",head=T) PrepS<-cbind(SNP_coordinates,PrepS) PrepS<- filter(PrepS, CHROM != "chrLGE22" ) o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10','chr11','chr12','chr13', 'chr14', 'chr15', 'chr17' , 'chr18', 'chr19','chr20', 'chr21','chr24','chr26','chr28','chrZ') chr_colors<-c("#CC0000","#333333","#CC0000","#333333","#CC0000", "#333333","#CC0000","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333") PrepS$CHROM2 = factor(PrepS$CHROM, levels=o.chrom) PrepS2<-ggplot(PrepS, aes(POS,-log(qval_g,10), colour=CHROM2))+ geom_point(size=2.5)+ylim(0,2)+scale_color_manual(values=chr_colors)+geom_hline(yintercept = 1.30, linetype="dashed")+ facet_grid(.~CHROM2,scales = "free_x")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x=" ",y="Precipitation Seasonality -log(qval_g,10)") + theme(legend.position="none") PrepS2 # setEPS() # postscript("BayeScEnv.PrepS.Italians.eps", width = 12, height = 5) # PrepS2 # dev.off() ####--------- Figure. 2C. ------------------#### ### Beak Height #### beakh<-read.table("BH_Ita.only_maf0.02.bayescenv_fst.txt",head=T) beakh<-cbind(SNP_coordinates,beakh) beakh<- filter(beakh, CHROM != "chrLGE22" ) o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10','chr11','chr12','chr13', 'chr14', 'chr15', 'chr17' , 'chr18', 'chr19','chr20', 'chr21','chr24','chr26','chr28','chrZ') chr_colors<-c("#CC0000","#333333","#CC0000","#333333","#CC0000", "#333333","#CC0000","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333") beakh$CHROM2 = factor(beakh$CHROM, levels=o.chrom) beakh2<-ggplot(beakh, aes(POS,-log(qval_g,10), colour=CHROM2))+ geom_point(size=2.5)+ylim(0,2.5)+scale_color_manual(values=chr_colors)+geom_hline(yintercept = 1.30, linetype="dashed")+ facet_grid(.~CHROM2,scales = "free_x")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x=" ",y="Beak Height -log(qval_g,10)") + theme(legend.position="none") beakh2 # setEPS() # postscript("BayeScEnv.BeakH.Italians.eps", width = 12, height = 5) # beakh2 # dev.off() ####--------- Figure. S5.A. ------------------#### ### Mean Annual Temperature #### MATemp<-read.table("MAT_Ita.only_maf0.02.bayescenv_fst.txt",head=T) MATemp<-cbind(SNP_coordinates,MATemp) MATemp<- filter(MATemp, CHROM != "chrLGE22" ) o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10','chr11','chr12','chr13', 'chr14', 'chr15', 'chr17' , 'chr18', 'chr19','chr20', 'chr21','chr24','chr26','chr28','chrZ') chr_colors<-c("#CC0000","#333333","#CC0000","#333333","#CC0000", "#333333","#CC0000","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333") MATemp$CHROM2 = factor(MATemp$CHROM, levels=o.chrom) MATemp2<-ggplot(MATemp, aes(POS,-log(qval_g,10), colour=CHROM2))+ geom_point(size=2.5)+ylim(0,2)+scale_color_manual(values=chr_colors)+geom_hline(yintercept = 1.30, linetype="dashed")+ facet_grid(.~CHROM2,scales = "free_x")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x=" ",y="Mean Annual Temperature -log(qval_g,10)") + theme(legend.position="none") MATemp2 # setEPS() # postscript("BayeScEnv.MATemp.Italians.eps", width = 12, height = 5) # MATemp2 # dev.off() ####--------- Figure. S5.B. ------------------#### ### Temperature Seasonality #### TempS<-read.table("TS_Ita.only_maf0.02.bayescenv_fst.txt",head=T) TempS<-cbind(SNP_coordinates,TempS) TempS<- filter(TempS, CHROM != "chrLGE22" ) o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10','chr11','chr12','chr13', 'chr14', 'chr15', 'chr17' , 'chr18', 'chr19','chr20', 'chr21','chr24','chr26','chr28','chrZ') chr_colors<-c("#CC0000","#333333","#CC0000","#333333","#CC0000", "#333333","#CC0000","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333") TempS$CHROM2 = factor(TempS$CHROM, levels=o.chrom) TempS2<-ggplot(TempS, aes(POS,-log(qval_g,10), colour=CHROM2))+ geom_point(size=2.5)+ylim(0,2)+scale_color_manual(values=chr_colors)+geom_hline(yintercept = 1.30, linetype="dashed")+ facet_grid(.~CHROM2,scales = "free_x")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x=" ",y="Temperature Seasonality -log(qval_g,10)") + theme(legend.position="none") TempS2 # setEPS() # postscript("BayeScEnv.TempS.Italians.eps", width = 12, height = 5) # TempS2 # dev.off() ####--------- Figure. S5.C. ------------------#### ### Altitude #### Alt<-read.table("Alt_Ita.only_maf0.02.bayescenv_fst.txt",head=T) Alt<-cbind(SNP_coordinates,Alt) Alt<- filter(Alt, CHROM != "chrLGE22" ) o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10','chr11','chr12','chr13', 'chr14', 'chr15', 'chr17' , 'chr18', 'chr19','chr20', 'chr21','chr24','chr26','chr28','chrZ') chr_colors<-c("#CC0000","#333333","#CC0000","#333333","#CC0000", "#333333","#CC0000","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333") Alt$CHROM2 = factor(Alt$CHROM, levels=o.chrom) Alt2<-ggplot(Alt, aes(POS,-log(qval_g,10), colour=CHROM2))+ geom_point(size=2.5)+ylim(0,2)+scale_color_manual(values=chr_colors)+geom_hline(yintercept = 1.30, linetype="dashed")+ facet_grid(.~CHROM2,scales = "free_x")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x=" ",y="Altitude -log(qval_g,10)") + theme(legend.position="none") Alt2 # setEPS() # postscript("BayeScEnv.Alt.Italians.eps", width = 12, height = 5) # Alt2 # dev.off() ####--------- Figure. S5.D. ------------------#### ### Beak Length #### beakl<-read.table("BL_Ita.only_maf0.02.bayescenv_fst.txt",head=T) beakl<-cbind(SNP_coordinates,beakl) beakl<- filter(beakl, CHROM != "chrLGE22" ) o.chrom<-c('chr1','chr1A','chr2','chr3','chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10','chr11','chr12','chr13', 'chr14', 'chr15', 'chr17' , 'chr18', 'chr19','chr20', 'chr21','chr24','chr26','chr28','chrZ') chr_colors<-c("#CC0000","#333333","#CC0000","#333333","#CC0000", "#333333","#CC0000","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333", "#CC0000","#333333","#333333","#CC0000","#333333") beakl$CHROM2 = factor(beakl$CHROM, levels=o.chrom) beakl2<-ggplot(beakl, aes(POS,-log(qval_g,10), colour=CHROM2))+ geom_point(size=2.5)+ylim(0,2)+scale_color_manual(values=chr_colors)+geom_hline(yintercept = 1.30, linetype="dashed")+ facet_grid(.~CHROM2,scales = "free_x")+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank())+ labs(x=" ",y="Beak Lenght -log(qval_g,10)") + theme(legend.position="none") beakl2 # setEPS() # postscript("BayeScEnv.BeakL.Italians.eps", width = 12, height = 5) # beakl2 # dev.off() detach("package:graphics", unload = T) ####----------------------------------------------------------------------------#### ####---------------------- per-gene population statistics ----------------------#### ####----------------------------------------------------------------------------#### # setwd("/Users/angelimc/Documents/Angelica/RAD_seq.Analysis/RAD_2019/Inland.Italians") addLine <- function (a = NULL, b = NULL, v = NULL, h = NULL, ..., once = FALSE) { tcL <- trellis.currentLayout() k <- 0 for (i in 1:nrow(tcL)) for (j in 1:ncol(tcL)) if (tcL[i, j] > 0) { k <- k + 1 trellis.focus("panel", j, i, highlight = FALSE) if (once) panel.abline(a = a[k], b = b[k], v = v[k], h = h[k], ...) else panel.abline(a = a, b = b, v = v, h = h, ...) trellis.unfocus() } } data<-read.table("per-gene_pop-analysis.txt", header = T) head(data) ita_fst <- filter(data, fst_wc_it != "NA" ) head(ita_fst) m=mean(ita_fst$fst_wc_it_corrected) # mean = 0.009276132 std.dev=sd(ita_fst$fst_wc_it_corrected) # 0.02068975 threshold=m+std.dev*1.65 # Z-Scores: 1.96=5% 2.58=1%, for one-tailed 1.65=5% threshold # for one-tailed 1.65=5% = 0.04341421 densityplot(~fst_wc_it_corrected,data=ita_fst, group=status, main="Italian") addLine(v=0.06265568) addLine(v=0.04982804) addLine(v=0.04508432) addLine(v=0.04341421) quantile(data$fst_wc_it_corrected, c(.95, .975), na.rm = T) # 95% = 0.04703503 AND 97.5% = 0.06691614 quantile(data$td_it, c(.95, .975), na.rm = T) quantile(data$dxy_ho_it, c(.95, .975), na.rm = T) quantile(data$dxy_sp_it, c(.95, .975), na.rm = T) quantile(data$dxy_ho_sp, c(.95, .975), na.rm = T) spa_fst <- filter(data, fst_wc_sp_corrected != "NA" ) m=mean(spa_fst$fst_wc_sp_corrected) # mean = 0.01695106 std.dev=sd(spa_fst$fst_wc_sp_corrected) # 0.03351726 threshold=m+std.dev*1.65 # Z-Scores: 1.96=5% 2.58=1% threshold # 0.1034256 densityplot(~fst_wc_sp_corrected,data=spa_fst, group=status, main="Spanish") addLine(v=0.1034256) addLine(v=0.08264489) addLine(v=0.07225454) # for one-tailed 1.65=5% quantile(data$fst_wc_sp, c(.95, .975), na.rm = T) quantile(data$fst_wc_sp_corrected, c(.95, .975), na.rm = T) hou_fst <- filter(data, fst_wc_ho_corrected != "NA" ) m=mean(hou_fst$fst_wc_ho_corrected) # mean = 0.007046091 std.dev=sd(hou_fst$fst_wc_ho_corrected) # 0.01632243 threshold=m+std.dev*1.65 # Z-Scores: 1.96=5% 2.58=1% threshold # 0.04915796 densityplot(~fst_wc_ho_corrected,data=hou_fst, group=status, main="House") addLine(v=0.04915796) addLine(v=0.03903805) addLine(v=0.0339781) # for one-tailed 1.65=5% quantile(data$fst_wc_ho, c(.95, .975), na.rm = T) quantile(data$fst_wc_ho_corrected, c(.95, .975), na.rm = T) #==================================================================================================================================================#### #----------------------- Hybrid constraints to population divergence. ----------------------------------------------------------------------------#### #######-------------------------------------------------------------------------------------------####### #######------------------------------ Outlier Analysis -----------------------------------------####### #######-------------------------------------------------------------------------------------------####### # pakages #### # install.packages("cowplot") # install.packages("lattice") # install.packages("gplots") # install.packages("visreg") # library(ggplot2) library("gridExtra") library("cowplot") library("visreg") library(lattice) library(gplots) ###--------------------------------------------- Figure 4 --------------------------------------------------##### #setwd("/Users/angelimc/Documents/Angelica/RAD_seq.Analysis/RAD_2019/Inland.Italians/FST/windowed_FST/Fig.4_files") ####------------------ Figure 4.A ------------------#### # Fig4A within.Ita.FST_v.s_HouSpa.FSt.txt #### Ita_vs_HouSpa<-read.table("Fig4A_within.Ita.FST_v.s_HouSpa.FST.txt", header = T) head(Ita_vs_HouSpa) a<-ggplot(Ita_vs_HouSpa, aes(WEIGHTED_FST_SpaHou,WEIGHTED_FST_Italian, colour = Outlier_Italian)) + geom_point(size = 5) + ggtitle("Italian FST Outliers") + theme(legend.position="none") + labs(y="Italian Fst",x="Spanish - House Fst", size=0.10)+ theme(axis.title=element_text(size=12))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) a ####------------------ Figure 4.B ------------------#### ### ItaHou vs ItaSpa Fst - plot with the Italian Outliers #### SpaIta_vs_HouIta<-read.table("Fig4B_SpaIta.FST_v.s_HouIta.FST.txt", header = T) head(SpaIta_vs_HouIta) b<-ggplot(SpaIta_vs_HouIta, aes(WEIGHTED_FST_HouIta,WEIGHTED_FST_SpaIta, colour = Outlier_Italian)) + geom_point(size = 5) + ggtitle("Italian FST Outliers") + theme(legend.position="none") + # labs(y="Italian - Spanish Fst",x="Italian - House Fst", size=0.10)+ theme(axis.title=element_text(size=12))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) b # in the plot the outliers seems hiden under the other points, they can be brought infrot with an image-editor ####------------------ Figure 4.C ------------------#### # Fig4C within.Ita.FST_v.s_within.Hou.FSt.txt #### Ita_vs_Hou<-read.table("Fig4C_Ita.FST_v.s_Hou.FST.txt", header = T) head(Ita_vs_Hou) c<-ggplot(Ita_vs_Hou, aes(WEIGHTED_FST_Hou,WEIGHTED_FST_Ita)) + geom_point(size = 5) + theme(legend.position="none") + # ggtitle("Italian FST Outliers") + #labs(y="Italian Fst",x="House Fst", size=0.10)+ theme(axis.title=element_text(size=12))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) c ####------------------ Figure 4.D ------------------#### # Fig4D within.Ita.FST_v.s_within.Spa.FSt.txt #### Ita_vs_Spa<-read.table("Fig4D_Ita.FST_v.s_Spa.FST.txt", header = T) head(Ita_vs_Spa) d<-ggplot(Ita_vs_Spa, aes(WEIGHTED_FST_Spa,WEIGHTED_FST_Ita)) + geom_point(size = 5) + theme(legend.position="none") + # ggtitle("Italian FST Outliers") + #labs(y="Italian Fst",x="Spanish Fst", size=0.10)+ theme(axis.title=element_text(size=12))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) d multiplot(a,b,c,d, cols = 2) ####------------------ Figure 5.C ------------------#### sp.outliers<-read.table("SpaHouFST_in.intraspecific.outliers.txt", header = T) head(sp.outliers) linecolors <- c("#0000FF","#33CC00","#FF0000") fillcolors <- c("#0000FF","#33CC00","#FF0000") # partially transparent points by setting `alpha = 0.5` ggplot(sp.outliers, aes(outlier,WEIGHTED_FST, colour = outlier, fill= outlier)) + geom_point(position=position_jitter(h=0.0, w=0.1), shape = 21, size = 4) + scale_color_manual(values=linecolors) + scale_fill_manual(values=fillcolors) + ylim(0,0.8) + theme_bw() means<-plotmeans(WEIGHTED_FST ~ outlier, data = sp.outliers, frame = FALSE, mean.labels = FALSE, connect = FALSE) tgc <- summarySE(sp.outliers, measurevar="WEIGHTED_FST", groupvars="outlier") # The errorbars overlapped, so use position_dodge to move them horizontally pd <- position_dodge(0.1) # move them .05 to the left and right # Use 95% confidence interval instead of SEM ggplot(tgc, aes(x=outlier, y=WEIGHTED_FST)) + geom_errorbar(aes(ymin=WEIGHTED_FST-ci, ymax=WEIGHTED_FST+ci), colour="grey",width=.4, position=pd) + geom_line(position=pd) + geom_point(position=pd,size=3) + ylab("FST") + xlab("")+ ylim(0,0.8)+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) ####---- difference between species outliers #### Hou.outliers<-read.table("FST.House.outliers_in.FST.SpaHou.txt", header = T) Spa.outliers<-read.table("FST.Spanish.outliers_in.FST.SpaHou.txt", header = T) Ita.outliers<-read.table("FST.Italian.outliers_in.FST.SpaHou.txt" , header = T) mean(Hou.outliers$WEIGHTED_FST) # 0.1450933 mean(Spa.outliers$WEIGHTED_FST) # 0.2835705 mean(Ita.outliers$WEIGHTED_FST) # 0.09213232 a<-t.test(Ita.outliers$WEIGHTED_FST,Spa.outliers$WEIGHTED_FST) a # t = -8.0922, df = 141.61, p-value = 2.406e-13 # windowed FST b<-t.test(Ita.outliers$WEIGHTED_FST,Hou.outliers$WEIGHTED_FST) b # t = -2.4993, df = 239.32, p-value = 0.01311 # windowed FST c<-t.test(Hou.outliers$WEIGHTED_FST,Spa.outliers$WEIGHTED_FST) c # t = -5.3196, df = 172.27, p-value = 3.199e-07 # windowed FST ####------------------ Table 3. - Logistic regression ------------------#### #### Logistic regression on the probability of being an outlier FST for Italian sparrows #### model.a <- glm(as.numeric(Outlier_FST.Italian.1) ~ FST_SpaHou, na.action = "na.omit", family = binomial(link = "logit"), data = outliers) summary(model.a) model.b <- glm(as.numeric(Outlier_FST.Italian.1) ~ FST_HouIta+FST_SpaIta, na.action = "na.omit", family = binomial(link = "logit"), data = outliers) summary(model.b) model.b <- glm(as.numeric(Outlier_FST.Italian.1) ~ FST_HouIta*FST_SpaIta, na.action = "na.omit", family = binomial(link = "logit"), data = outliers) summary(model.b) #######-------------------------------------------------------------------------------------------####### #######---------------------------- Recombination Rate vs FST -----------------------------------####### #######-------------------------------------------------------------------------------------------####### # The files are created on the "Combine_SNPs_RecombRate.R" script # from individual species vcf # fst_recomb_Spanish.from.Spanish_MF.tsv # fst_recomb_House.from.House_MF.tsv # fst_recomb_Italian.from.Ita.only_MF.tsv # from Ita.Parent.vcf # fst_recomb_House.from.Ita.Parent_MF.tsv # fst_recomb_Italian.from.Ita.Parent_MF.tsv # fst_recomb_Spanish.from.Ita.Parent_MF.tsv ####------------------ Figure S8.A ------------------#### ### House sparrow #### Hou.fst_recomb <- read.delim("fst_recomb_House.from.House_MF.tsv") head(Hou.fst_recomb) dim(Hou.fst_recomb) # 6447 SNPs # only for fst_recomb_House.from.Ita.Parent_MF.tsv Hou.fst_recomb <- filter(Hou.fst_recomb, weir_and_cockerham_fst != "NaN") # 2642 SNPs # Logistic regression on the probability to be a FST outlier #### # Create a column with the probability of being an outlier (0,1) - conditional factor x>0.05733128 table = data.frame((matrix(ncol=5,nrow=length(Hou.fst_recomb$weir_and_cockerham_fst)))) colnames(table) = c('chrom', 'pos','fst','recomb','outlier')# table[,'chrom'] = Hou.fst_recomb[,'chrom'] table[,'pos'] = Hou.fst_recomb[,'pos'] table[,'fst'] = Hou.fst_recomb[,'weir_and_cockerham_fst'] table[,'recomb'] = Hou.fst_recomb[,'recomb'] table[table[,'fst']>0.1075277,'outlier'] = '1'# for fst_recomb_House.from.House_MF.tsv table[table[,'fst']<0.1075277,'outlier'] = '0'# for fst_recomb_House.from.House_MF.tsv # table2[table[,'fst']>0.0988,'outlier'] = '1'# for fst_recomb_House.from.Ita.Parent_MF.tsv # table2[table[,'fst']<0.0988,'outlier'] = '0'# for fst_recomb_House.from.Ita.Parent_MF.tsv table_hou <- filter(table, recomb > 0 ) dim(table) dim(table_hou) # check that there are not zeros in recomb.rate before log10 transformation table_hou$recomb_log10<-log10(table_hou$recomb) head(table_hou) dim(table_hou) model.f <- glm(as.numeric(outlier) ~ recomb_log10, na.action = "na.omit", family = binomial(link = "logit"), data = table_hou) summary(model.f) # Estimate Std. Error Pr(>|z|) # recomb -0.01069 0.13563 0.937 # model.f1 <- glm(as.numeric(outlier) ~ recomb_log10, # na.action = "na.omit", # data = table_hou) # summary(model.f1) # # Estimate Std. Error Pr(>|z|) # # recomb -0.0002939 0.0037276 0.937 # regression on Fst and recombination rate model.g <- lm(fst ~ recomb_log10, na.action = "na.omit", data = table_hou) summary(model.g) # Estimate=-0.0007516, Std.Error=0.0007539 p=0.319 # Multiple R-squared: 0.0001606, Adjusted R-squared: -9.67e-07 # with(summary(model.g), 1 - deviance/null.deviance) # if a glm() is run instead the the R-square is obtained by 1-(deviance/null.deviance) # correlation on Fst and recombination rate model.h<-cor.test(x=table_hou$recomb_log10 , table_hou$fst) model.h # t = -0.997, df = 6188, p-value = 0.3188, correlation = -0.0126732 # plot o_colors=c('#333333', '#CC3300') h.rec<-ggplot(table_hou, aes(recomb_log10, fst, colour = outlier)) + geom_point(size=5) + scale_color_manual(values=o_colors) + theme(legend.position="none") + ylab("Global House FST") + xlab("Recombination rate (LOG10)") + #scale_x_continuous(limits = c(0,2.6)) scale_y_continuous(limits = c(0, 0.35)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) h.rec ####------------------ Figure S8.B ------------------#### ### Italian sparrow #### Ita.fst_recomb <- read.delim("fst_recomb_Italian.from.Ita.only_MF.tsv") head(Ita.fst_recomb) dim(Ita.fst_recomb) # 4361 SNPs # only for fst_recomb_Italian.from.Ita.only_MF.tsv Ita.fst_recomb <- filter(Ita.fst_recomb, weir_and_cockerham_fst != "NaN") # 2709 SNPs # Logistic regression on the probability to be a FST outlier #### # Create a column with the probability of being an outlier (0,1) - conditional factor x>0.03911853 table = data.frame((matrix(ncol=5,nrow=length(Ita.fst_recomb$weir_and_cockerham_fst)))) colnames(table) = c('chrom', 'pos','fst','recomb','outlier')# table[,'chrom'] = Ita.fst_recomb[,'chrom'] table[,'pos'] = Ita.fst_recomb[,'pos'] table[,'fst'] = Ita.fst_recomb[,'weir_and_cockerham_fst'] table[,'recomb'] = Ita.fst_recomb[,'recomb'] table[table[,'fst']>0.0648710,'outlier'] = '1'# for fst_recomb_Italian.from.Ita.only_MF.tsv table[table[,'fst']<0.0648710,'outlier'] = '0'# for fst_recomb_Italian.from.Ita.only_MF.tsv # table[table[,'fst']>0.0627513,'outlier'] = '1'# for fst_recomb_Italian.from.Ita.Parent_MF.tsv # table[table[,'fst']<0.0627513,'outlier'] = '0'# for fst_recomb_Italian.from.Ita.Parent_MF.tsv head(table) dim(table) table_ita <- filter(table, recomb > 0 ) dim(table_ita) # check that there are not zeros in recomb.rate before log10 transformation, if so design them to 0.0001 table_ita$recomb_log10<-log10(table_ita$recomb) head(table_ita) # write.table(table, "Outliers_categories/Ita.recom_fstOutlier.txt") # Ita.fst_recomb$outlier <- as.factor("NA") model.d <- glm(as.numeric(outlier) ~ recomb_log10, na.action = "na.omit", family = binomial(link = "logit"), data = table_ita) summary(model.d) # Estimate Std. Error Pr(>|z|) # recomb -0.01462 0.16549 0.93 # regression on Fst and recombination rate model.e <- lm(fst ~ recomb_log10, na.action = "na.omit", data = table_ita) summary(model.e) #with(summary(model.e), 1 - deviance/null.deviance) # Estimate Std. Error Pr(>|z|) # recomb -0.0011804 0.0005532 0.0329 * # Multiple R-squared: 0.001089, Adjusted R-squared: 0.0008498 # correlation on Fst and recombination rate model.h<-cor.test(x=table_ita$recomb_log10 , table_ita$fst) model.h # t = -2.1337, df = 4176, p-value = 0.03293* , correlation = -0.03300017 # plot o_colors=c('#333333', '#CC3300') i.rec<-ggplot(table_ita, aes(recomb_log10, fst, colour = outlier)) + geom_point(size=5) + scale_color_manual(values=o_colors) + theme(legend.position="none") + ylab("Global Italian FST") + xlab("Recombination rate (LOG10)") + scale_y_continuous(limits = c(0, 0.35))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) i.rec ####------------------ Figure S8.C ------------------#### ### Spanish #### Spa.fst_recomb <- read.delim("fst_recomb_Spanish.from.Spanish_MF.tsv") head(Spa.fst_recomb) dim(Spa.fst_recomb) # 1307 SNPs # only for fst_recomb_Spanish.from.Ita.Parent_MF.tsv Spa.fst_recomb <- filter(Spa.fst_recomb, weir_and_cockerham_fst != "NaN") # 2334 SNPs # Logistic regression on the probability to be a FST outlier #### # Create a column with the probability of being an outlier (0,1) - conditional factor x>0.05733128 table = data.frame((matrix(ncol=5,nrow=length(Spa.fst_recomb$weir_and_cockerham_fst)))) colnames(table) = c('chrom', 'pos','fst','recomb','outlier')# table[,'chrom'] = Spa.fst_recomb[,'chrom'] table[,'pos'] = Spa.fst_recomb[,'pos'] table[,'fst'] = Spa.fst_recomb[,'weir_and_cockerham_fst'] table[,'recomb'] = Spa.fst_recomb[,'recomb'] table[table[,'fst']>0.1143911,'outlier'] = '1'# for fst_recomb_Spanish.from.Spanish_MF.tsv table[table[,'fst']<0.1143911,'outlier'] = '0'# for fst_recomb_Spanish.from.Spanish_MF.tsv # table2[table[,'fst']>0.1196344,'outlier'] = '1'# for fst_recomb_Spanish.from.Ita.Parent_MF.tsv # table2[table[,'fst']<0.1196344,'outlier'] = '0'# for fst_recomb_Spanish.from.Ita.Parent_MF.tsv head(table) dim(table) table_spa <- filter(table, recomb > 0 ) dim(table_spa) # check that there are not zeros in recomb.rate before log10 transformation, if so design them to 0.0001 table_spa$recomb_log10<-log10(table_spa$recomb) head(table_spa) model.f <- glm(as.numeric(outlier) ~ recomb_log10, na.action = "na.omit", family = binomial(link = "logit"), data = table_spa) summary(model.f) # Estimate Std. Error Pr(>|z|) # recomb -0.6791 0.2977 0.0225 * # regression on Fst and recombination rate model.g <- lm(fst ~ recomb_log10, na.action = "na.omit", data = table_spa) summary(model.g) # Estimate Std. Error Pr(>|z|) # recomb -0.004047 0.001813 0.0258 * Multiple R-squared: 0.004018, Adjusted R-squared: 0.003211 # correlation on Fst and recombination rate model.h<-cor.test(x=table_spa$recomb_log10 , table_spa$fst) model.h # t = -2.2321, df = 1235, p-value = 0.02579* , correlation = -0.06338724 # plot o_colors=c('#333333', '#CC3300') s.rec<-ggplot(table_spa, aes(log10(recomb), fst, colour = outlier)) + geom_point(size=5) + scale_color_manual(values=o_colors) + theme(legend.position="none") + ylab("Global Spanish FST") + xlab("Recombination rate (LOG10)") + scale_y_continuous(limits = c(0, 0.35))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) s.rec multiplot(h.rec, i.rec, s.rec, cols = 3) #######-------------------------------------------------------------------------------------------####### #######------------------------------- HETEROZYGOSITY ----------------------------------####### #######-------------------------------------------------------------------------------------------####### #####--------------- Heterozygosity v.s. Recom.Rate - 3 species -----------------------#### # install.packages('tidyverse') library(tidyverse) recomb <- read_csv("recomb_int.csv") het.div.loci<-read.table("Het.AFD_Italian_wgs_All_divergent.loci_afd1_mod.txt", header = T) # AFD=1 dim(het.div.loci) # 17887 head(het.div.loci) # loop through Het_recomb <- sapply(unique(het.div.loci$chr), function(y){ x <- filter(het.div.loci, chr == y) z <- filter(recomb, chr == y) # get var target_pos <- x %>% .$pos window_mid <- z %>% .$mid rr <- sapply(1:length(target_pos), function(w) { index <- which.min(abs(window_mid - target_pos[w])) slice(z, index) %>% .$male_int_rate }) x$recomb <- rr x }, simplify = F) # collapse Het_recomb <- do.call(rbind, Het_recomb) head(Het_recomb) dim(Het_recomb) # write.table(Het_recomb, file="Het_recomb_WGS.txt") Het_recomb2<-read.table("Het_recomb_WGS.txt", header = T) head(Het_recomb2) dim(Het_recomb2) Het_recomb2<-na.omit(Het_recomb2) dim(Het_recomb2) # regressions - correlation Het_recomb$recomb_log10<-log10(Het_recomb$recomb) model_recomb<-lm(Het_recomb$heterozygosity~Het_recomb$recomb_log10) summary(model_recomb) # Estimate = -0.0013318 Std Error=0.0010850 p-value=0.22 cor.test(x = Het_recomb$recomb_log10, Het_recomb$heterozygosity) # t = -1.2276, df = 15308, p-value = 0.2196, correlation = -0.009921121 Het_recomb2$recomb_log10<-log10(Het_recomb2$recomb2) head(Het_recomb2) dim(Het_recomb2) model_recomb2<-lm(Het_recomb2$heterozygosity~Het_recomb2$recomb_log10) summary(model_recomb2) # Estimate = -0.0019293 Std Error= 0.0006398 p-value=0.00257 ** cor.test(x = Het_recomb2$recomb_log10, Het_recomb2$heterozygosity) # t = -3.0154, df = 16238, p-value = 0.00257, correlation = -0.02365722 # plots het_recomb_plot<-ggplot(Het_recomb2,aes(recomb_log10,heterozygosity)) + geom_point() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) het_recomb_plot #######-------------------------------------------------------------------------------------------####### #######----------------------------------- ANCESTRY ----------------------------------------####### #######-------------------------------------------------------------------------------------------####### # pakages #### library(plyr) library(dplyr) library(stats) library(ggplot2) library("gridExtra") library("cowplot") setwd("ancestry") # reading all chromosome datasets #### ldf <- list() # creates a list list_tables <- dir(pattern = "Fst_Ancestry_chr*") # creates the list of all the txt files in the directory for (k in 1:length(list_tables)){ ldf[[k]] <- read.table(list_tables[k], header = F) } #str(ldf[[1]]) #head(ldf[[20]]) for (i in 1:length(ldf)){ colnames(ldf[[i]])<-c("chr_fst", "position_fst", "FST","chr_anc", "position_anc" , "ID1" ,"ID2","ID3","ID4","ID5", "ID6","ID7","ID8","ID9","ID10","ID11","ID12","ID13","ID14","ID15","ID16","ID17","ID18","ID19","ID20","ID21","ID22", "ID23","ID24","ID25","ID26","ID27","ID28","ID29","ID30","ID31","ID32","ID33","ID34","ID35","ID36","ID37","ID38","ID39", "ID40","ID41","ID42","ID43","ID44","ID45","ID46","ID47","ID48","ID49","ID50","ID51","ID52","ID53","ID54","ID55","ID56","ID57", "ID58","ID59","ID60")} for (j in 1:length(ldf)){ for (i in 6:65) { assign(paste("dataset",j,"ID", i, sep = ""), ldf[[j]][,c(1:3,5,i)]) } } # head(dataset24ID7) # dim(dataset24ID6) # mydata_list <- list() for (j in 1:length(ldf)){ assign(paste("mydata",j,sep = ""), lapply( paste0('dataset',j, "ID", 6:65 ), get)) mydata_list[j]<-lapply( paste0("mydata",j), get) for (i in 1:60){ colnames(mydata_list[[j]][[i]])<- c("chr_fst","position_fst","FST","position_anc","ANC") } } # View(mydata_list) for (j in 1:length(ldf)){ assign(paste("mydata_",j,"_collapsed", sep = ""),do.call(rbind, mydata_list[[j]])) } View(ldf) #### reading tables with outliers specified #### chr1<-read.table("Fst_Ancestry_chr1_weighted_collapsed.txt", header = T) chr1A<-read.table("Fst_Ancestry_chr1A_weighted_collapsed.txt", header = T) chr2<-read.table("Fst_Ancestry_chr2_weighted_collapsed.txt", header = T) chr3<-read.table("Fst_Ancestry_chr3_weighted_collapsed.txt", header = T) chr4<-read.table("Fst_Ancestry_chr4_weighted_collapsed.txt", header = T) chr5<-read.table("Fst_Ancestry_chr5_weighted_collapsed.txt", header = T) chr6<-read.table("Fst_Ancestry_chr6_weighted_collapsed.txt", header = T) chr7<-read.table("Fst_Ancestry_chr7_weighted_collapsed.txt", header = T) chr8<-read.table("Fst_Ancestry_chr8_weighted_collapsed.txt", header = T) chr9<-read.table("Fst_Ancestry_chr9_weighted_collapsed.txt", header = T) chr10<-read.table("Fst_Ancestry_chr10_weighted_collapsed.txt", header = T) chr11<-read.table("Fst_Ancestry_chr11_weighted_collapsed.txt", header = T) chr12<-read.table("Fst_Ancestry_chr12_weighted_collapsed.txt", header = T) chr13<-read.table("Fst_Ancestry_chr13_weighted_collapsed.txt", header = T) chr14<-read.table("Fst_Ancestry_chr14_weighted_collapsed.txt", header = T) chr15<-read.table("Fst_Ancestry_chr15_weighted_collapsed.txt", header = T) chr17<-read.table("Fst_Ancestry_chr17_weighted_collapsed.txt", header = T) chr18<-read.table("Fst_Ancestry_chr18_weighted_collapsed.txt", header = T) chr19<-read.table("Fst_Ancestry_chr19_weighted_collapsed.txt", header = T) chr20<-read.table("Fst_Ancestry_chr20_weighted_collapsed.txt", header = T) chr21<-read.table("Fst_Ancestry_chr21_weighted_collapsed.txt", header = T) chr24<-read.table("Fst_Ancestry_chr24_weighted_collapsed.txt", header = T) chr26<-read.table("Fst_Ancestry_chr26_weighted_collapsed.txt", header = T) chr28<-read.table("Fst_Ancestry_chr28_weighted_collapsed.txt", header = T) ####-------------------------- Figure 5.B ------------------------------#### ####---------------- FST vs weigthed ancestry - all chromosomes ----------------#### weigthed_ancestry<-rbind(chr1,chr1A,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr17,chr18,chr19,chr20,chr21,chr24,chr26,chr28) head(weigthed_ancestry) chr.fst.ancestry<-ggplot(weigthed_ancestry, aes(ANC,FST,colour=outlier_fst))+xlim(-1,1)+ #ylim(0,0.2)+ geom_point(size=3)+ labs(x="weighted ancestry", y="FST")# + chr.fst.ancestry ###----------------- Testing heteroscedasticity - homogeneity of variances ---------#### install.packages("olsrr") library(olsrr) # getting absolute values for ancestry difference head(weigthed_ancestry) weigthed_ancestry$allele_diff_abs<-abs(weigthed_ancestry$ANC) head(weigthed_ancestry) # testing heteroscedasticity (Bartlett's Test of Homogenity of Variances) hv_test<-ols_test_bartlett(weigthed_ancestry, 'allele_diff_abs', 'FST') hv_test # testing heteroscedasticity (Bartlett's Test of Homogenity of Variances) without the absolute value ols_test_bartlett(weigthed_ancestry, 'ANC', 'FST') ####-------------------------- Ancestry proportion --------------------------#### ####-------------------------- Figure 5.A ------------------------------#### ancestry_prop<-read.table("AncestryProportion-HI-Fst.txt", header = F) colnames(ancestry_prop)<-c("CHROM","POS","FST","Chrom_anc", "pos_anc", "HI", "outlier_fst") head(ancestry_prop) chr.plot<-ggplot(ancestry_prop, aes(HI, FST,colour=outlier_fst))+ geom_point(size=3) # chr.plot head(ancestry_prop) model.a <- glm(as.numeric(outlier_fst) ~ HI, na.action = "na.omit", family = binomial(link = "logit"), data = ancestry_prop) summary(model.a) ###### # # # ######---------------------------------------------------------------------------------------------------------------------------------- # Cite as : Cuevas et al., 2020. Intraspecific genomic variation and local adaptation in a young hybrid species. Molecular Ecology ######---------------------------------------------------------------------------------------------------------------------------------- # # # #======================================================================================================================================================================================###### ############################################################################################################################################################################################# #======================================================================================================================================================================================######