#====================================== # Allele frequency plots of SNPs # # European lobster (Homarus gammarus) # # Tom Jenkins | t.l.jenkins@exeter.ac.uk # # Jenkins et al. 2019 Evolutionary Applications # #====================================== ## Load packages lib = c("adegenet","ggplot2","RColorBrewer","pegas","poppr","reshape") lapply(lib, library, character.only=TRUE) # ----------------- # # # Data import and preparation # # ----------------- # ## Load RData SNP dataset load("../lobster_1278ind_79snps_40pop.RData") data_filt nPop(data_filt) summary(data_filt$pop) ## Combine temporal samples (Sar13 & Sar17 and Idr16 & Idr17) popNames(data_filt) = gsub("Sar13", "Sar", popNames(data_filt)) popNames(data_filt) = gsub("Sar17", "Sar", popNames(data_filt)) popNames(data_filt) = gsub("Idr16", "Idr", popNames(data_filt)) popNames(data_filt) = gsub("Idr17", "Idr", popNames(data_filt)) summary(data_filt$pop) ## Genind object with top assignment SNPs SNPs = c("58053","42395","53935","31462","15581","29889","7502","11291") snpdata = data_filt[loc=SNPs] snpdata locNames(snpdata) summary(snpdata$pop) ## Genind object with 8 outlier SNPs # outliers = c("11291","15581","32358","42395","53935","58053", # "65064","65576") # snpdata = data_filt[loc=outliers] # snpdata ## Genind object with LD SNPs # SNPs = c("15531","53935","28357","56785", # "22740","33066","51507","53052","53263","65064") # snpdata = data_filt[loc=SNPs] # snpdata # ----------------- # # # Visualise allele frequencies # # ----------------- # ## Calculate allele frequencies for each site al.freq = data.frame(rraf(snpdata, by_pop=TRUE, correction = FALSE)) ## Keep only the first of the two alleles for each SNP (since p=1-q). al.freq = al.freq[, seq(1, dim(al.freq)[2], 2)] ## Manually select columns # snps_to_keep = c("X15531.A","X53935.T","X28357.G","X56785.T", # "X22740.C","X33066.C","X51507.G","X53052.A","X53263.A","X65064.C") # al.freq = subset(al.freq, select=snps_to_keep) ## Add column of population labels al.freq$pop = rownames(al.freq) unique(al.freq$pop) ## Function to add country labels to population labels addcountry = function(x){ # If pop label is present function will output the country if(x=="Ale"|x=="The"|x=="Tor"|x=="Sky") y = " Aegean Sea " if(x=="Sar"|x=="Tar"|x=="Laz") y = " Central Mediterranean " if(x=="Jer") y = " Atlantic " if(x=="Idr") y = " Atlantic " if(x=="Hel") y = " Atlantic " if(x=="Cor"|x=="Hoo"|x=="Kil"|x=="Mul"|x=="Ven") y = " Atlantic " if(x=="Oos") y = " Atlantic " if(x=="Tro"|x=="Ber"|x=="Flo"|x=="Sin") y = " Atlantic " if(x=="Gul"|x=="Kav"|x=="Lys") y = " Atlantic " if(x=="Vig") y = " Atlantic " if(x=="Brd"|x=="Cro"|x=="Eye"|x=="Heb"|x=="Iom"|x=="Ios"|x=="Loo"|x=="Lyn"|x=="Ork"|x=="Pad"|x=="Pem"|x=="She"|x=="Sbs"|x=="Sul") y = " Atlantic " return(y) } ## Add country labels al.freq$country = sapply(al.freq$pop, addcountry) head(al.freq) ## Convert dataframe to long format df = melt(al.freq, id.vars=c("pop","country")) df$variable = gsub("X", "", df$variable) # ----------------- # # # Visualise using ggplot facet_wrap # # ----------------- # ## Define order of facets using the levels argument in factor unique(df$pop) site_order = c("Tro","Ber","Flo","Gul","Kav","Lys","Sin","Hel","Oos", "Cro","Brd","Eye", "She","Ork","Heb","Sul","Cor","Hoo","Iom","Ios","Jer","Kil", "Loo","Lyn","Mul","Pad","Pem","Sbs","Ven", "Idr","Vig", "Sar","Laz","Tar","Ale","Sky","The","Tor") df$pop_ord = factor(df$pop, levels=site_order) country_order = c(" Atlantic "," Central Mediterranean ", " Aegean Sea ") df$country = factor(df$country, levels=country_order) # snp_order = c("15531.A","53935.T","28357.G","56785.T", # "22740.C","33066.C","51507.G","53052.A","53263.A","65064.C") # df$variable = factor(df$variable, levels=snp_order) ## ggplot theme ggtheme = theme( # axis.text.x = element_text(colour="black", size=5, angle=90), axis.text.x = element_blank(), axis.text.y = element_text(colour="black", size=6), axis.title = element_text(colour="black", size=15), # facet labels strip.text = element_text(colour="black", size=14), # panel.grid.minor = element_line(colour="grey90"), # panel.grid.major = element_line(colour="grey90"), panel.background = element_rect(fill="white"), panel.grid.minor = element_blank(), panel.grid.major = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=0.5), plot.title = element_text(hjust = 0.5, size=18), # title centered legend.title = element_blank(), legend.text = element_text(size=15), legend.position = "top", legend.justification = "centre" ) ## Create colour scheme col_scheme = c("#2c7bb6","#fdae61","#d7191c") ## Labels maplabs = c(" Atlantic "," Central Mediterranean "," Aegean Sea ") xlabs = levels(df$pop_ord) ; xlabs ## Plot barplot ggplot(data=df, aes(x=pop_ord, y=value, fill=country))+ geom_bar(stat="identity", colour="black", size=0.5)+ facet_wrap(~variable, scales="free")+ scale_y_continuous(limits=c(0,1), expand=c(0,0))+ scale_fill_manual(values=col_scheme, labels=maplabs)+ ylab("Allele frequency")+ xlab("Sampling site")+ ggtheme # ggsave("allele_freq.png", width=10, height=8, dpi=900) # ggsave("allele_freq.pdf", width=10, height=8) # ggsave("allele_freq_LD.png", width=10, height=8, dpi=900) # ggsave("allele_freq_LD.pdf", width=10, height=8) # ggsave("allele_freq_outlier.png", width=10, height=8, dpi=900) # ggsave("allele_freq_outlier.pdf", width=10, height=8)