# These file contains all commands used to analyse the data. # Most commands are executed in R, except for the commands needed to run Bayescan (and PGDspider). # The R commands depends on the R package SambaR, which can be downloaded from Github: https://github.com/mennodejong1986/SambaR # SAMBAR DATA ANALYSIS in R: source("/path/to/SAMBAR.txt") setwd("/path/to/working_directory/") getpackages(myrepos='http://cran.us.r-project.org') importdata(inputprefix="reindeer.new",sumstatsfile=FALSE,depthfile=TRUE,colourvector=c("darkgreen","blue","darkred"),samplefile="popfile.txt") filterdata(indmiss=0.1,snpmiss=0.15) findstructure() calcdistance() calcdiversity(nrsites=9472001) # reindeer # RUN BAYESCAN on Unix environment: # convert from ped to geste format: java -jar PGDSpider2-cli.jar -inputfile pheno.filter2.letter.ped -outputfile pheno.filter2.geste -spid PED2Bayescan.spid java -jar PGDSpider2-cli.jar -inputfile Busen_Norway.filter2.letter.ped -outputfile Busen_Norway.filter2.geste -spid PED2Bayescan.spid java -jar PGDSpider2-cli.jar -inputfile Barff_Norway.filter2.letter.ped -outputfile Barff_Norway.filter2.geste -spid PED2Bayescan.spid # run bayescan: mkdir bayescan_pheno mkdir bayescan_BusenNorway mkdir bayescan_BarffNorway /path/to/BayeScan2.1/bayescan_2.1 pheno.filter2.geste -od bayescan_pheno -threads 10 & /path/to/BayeScan2.1/bayescan_2.1 Busen_Norway.filter2.geste -od bayescan_BusenNorway -threads 10 & /path/to/BayeScan2.1/bayescan_2.1 Barff_Norway.filter2.geste -od bayescan_BarffNorway -threads 10 & # IMPORT Bayescan output into R: getbayescan(FDR=0.01,export=NULL,my_dataset="Busen_Norway",use_merge=TRUE,inputmapfile=NULL,baye_overwrite=TRUE,silent=FALSE,dolog10=TRUE) getbayescan(FDR=0.01,export=NULL,my_dataset="Barff_Norway",use_merge=TRUE,inputmapfile=NULL,baye_overwrite=TRUE,silent=FALSE,dolog10=TRUE) getbayescan(FDR=0.01,export=NULL,my_dataset="pheno",use_merge=TRUE,inputmapfile=NULL,baye_overwrite=TRUE,silent=FALSE,dolog10=TRUE) # SELECTION ANALYSES in R: inds$type<-ifelse(inds$pop=="Norway",FALSE,TRUE) selectionanalyses(export="pdf",do_meta=FALSE,do_pheno=TRUE,onlypooled=FALSE,add_bayescan=TRUE,bayescanFDR=0.01,logp_max=11,phenolabels=c("founder","source"),authors="DeJong_2020",species="reindeer",selclaim="yes",demography="founder_source",geneflow="absent") multiplotp_old(doexport="pdf",add_numbers=FALSE) plotpoverlap(mycomparison=1,mylog="",mycex=1,addnumbers=FALSE) # SIMULATIONS in R: # validate simulations tool: runsim_retained(n_loci=1100,n_selectedloci=1000,sel_coef=0.1,do_heatmap=TRUE,do_export=TRUE) runsim_fixation(n_loci=1100,ne_F=50,n_selectedloci=1000,do_export=TRUE,n_gen=500) runsim_fixationtime(n_loci=1100,n_selectedloci=1000,sel_coef=0.1,maf_source=0.15,do_export=TRUE) multimafburnin(mymaf_means=c(0.1,0.125,0.15,0.2),maf_vector=snps$maf_Norway[snps$filter]) # estimate power, specificity and false discovery rates of selection scans GWDS, OutFLANK and PCadapt for various demographic scenarios: setwd("/path/to/working_directory/simulationsdir") plotfdr(do_analysis=TRUE,do_export=TRUE,my_comp=1,loci_nr=100000,gen_nr=20,selected_nr=10000,vector_ne=c(20,40,60,80,100,120,160,200),my_ylims=NULL,samples_nr=30,selection_coefficient=0.1,sel_discrete=TRUE,dohaploidy=TRUE,myinputmatrix=NULL,plot_fdr=TRUE,add_bh=TRUE) plotfdr(do_analysis=FALSE,do_export=TRUE,my_comp=1,loci_nr=100000,gen_nr=20,selected_nr=10000,vector_ne=c(20,40,60,80,100,120,160,200),my_ylims=NULL,samples_nr=30,selection_coefficient=0.1,sel_discrete=TRUE,dohaploidy=TRUE,myinputmatrix="FDR_vs_correctionmethod.comp1.s0.1.nSNPs100000.samplesize30.ngen20",plot_fdr=TRUE,add_bh=TRUE) plotfdr(do_analysis=FALSE,do_export=TRUE,my_comp=1,loci_nr=100000,gen_nr=20,selected_nr=10000,vector_ne=c(20,40,60,80,100,120,160,200),my_ylims=NULL,samples_nr=30,selection_coefficient=0.1,sel_discrete=TRUE,dohaploidy=TRUE,myinputmatrix="FDR_vs_correctionmethod.comp1.s0.1.nSNPs100000.samplesize30.ngen20",plot_fdr=TRUE,add_bh=FALSE) plotfdr(do_analysis=TRUE,do_export=TRUE,my_comp=3,loci_nr=100000,gen_nr=20,selected_nr=10000,vector_ne=c(20,40,60,80,100,120,160,200),my_ylims=NULL,samples_nr=30,selection_coefficient=0.1,sel_discrete=TRUE,dohaploidy=TRUE,myinputmatrix=NULL,plot_fdr=TRUE,add_bh=TRUE) plotfdr(do_analysis=FALSE,do_export=TRUE,my_comp=3,loci_nr=100000,gen_nr=20,selected_nr=10000,vector_ne=c(20,40,60,80,100,120,160,200),my_ylims=NULL,samples_nr=30,selection_coefficient=0.1,sel_discrete=TRUE,dohaploidy=TRUE,myinputmatrix="FDR_vs_correctionmethod.comp3.s0.1.nSNPs100000.samplesize30.ngen20",plot_fdr=TRUE,add_bh=TRUE) plotfdr(do_analysis=FALSE,do_export=TRUE,my_comp=3,loci_nr=100000,gen_nr=20,selected_nr=10000,vector_ne=c(20,40,60,80,100,120,160,200),my_ylims=NULL,samples_nr=30,selection_coefficient=0.1,sel_discrete=TRUE,dohaploidy=TRUE,myinputmatrix="FDR_vs_correctionmethod.comp3.s0.1.nSNPs100000.samplesize30.ngen20",plot_fdr=TRUE,add_bh=FALSE) # estimate false discovery rate particularly for South Georgia reindeer demographic scenario: setwd("C:/Users/Menno_de_Jong/Documents/SNPdatasets/ReindeerNew/simulation02022021_reindeerscenario") outliersim(export=TRUE,mycorrection="bonf",nloc=60000,ngen=20,nesourcepop=1000,nefounderpop1=50,nefounderpop2=50,meansourcemaf=0.15,nfounders=10,samplesize=30,nselectedloci=1000,selcoef=0.1) # RUN BAYESCAN on Unix environment for simulated dataset: java -jar /ddn/data/fjsq43/Programs/fileconversion/PGDSpider_2.1.0.3/PGDSpider2-cli.jar -inputfile simdata.ped -outputfile simdata.geste -spid /ddn/data/fjsq43/Programs/fileconversion/PGDSpider_2.1.0.3/PED2Bayescan.spid mkdir simdata /ddn/data/fjsq43/Programs/selectionanalysis/BayeScan2.1/source/bayescan_2.1 simdata.geste -od simdata -threads 10 & multiplotp_sim(export="pdf",add_numbers=TRUE,nr_loci=60000,nr_selected=1000,silent=FALSE,comp_nr=3,logpmax=11,bayescanfile="simdata.bayescanout.fst.txt",FDR=0.01) setwd(mysambar$inputdatadir) plotvennsim(export="pdf",plotname="Simulateddata.Venn",myrealoutliers=my_realoutliers,mycomp_nr=3,bayescanfile="simdata.bayescanout.fst.txt",bayescanFDR=0.01) exp_nout(bayescanfile="simdata.bayescanout.fst.txt",export="pdf") # PLOTS FOR MAIN FIGURES: # some of the plots are not specifically listed here and need to be obtained from directories and output files generated with previous commands: # figure 1: ape_pcoa(labels=FALSE,method="nei",export="pdf",addlegend=TRUE,legendpos="right",legendcex=3.5,symbolsize=4.5) plot_ind_neimatrix(export="pdf") adegenet_dapc(export="pdf",use_genind=FALSE,min_explainedvariance=80,sumstatsplot=TRUE,maxclusters=3,transparent=0.6,symbolsize=4.5,addlegend=FALSE,legendpos="right",legendcex=2.5,use_current_dir=FALSE,silent=FALSE) plotmigration(export="pdf") # use information in supplementary table # figure 2: plot_diagram(export="pdf") pheno_Fdist_plot(doexport="pdf",allpairwise=FALSE,popnames=mysambar$populations,addoutliers=TRUE,mylabels=c("founder","source"),outlier_cex=3) # figure 4: plotvennsim(export="pdf",plotname="Simulateddata.Venn",mycomp_nr=3,bayescanfile="simdata.bayescanout.fst.txt",bayescanFDR=0.01) plotpvalues(export="pdf",test1="rfisherGWDS",test2="PCadapt",mydataset="pheno",mycex=1,addnumbers=FALSE,silent=TRUE,pmax=11,nsample=20000) plotpvalues(export="pdf",test1="OutFLANK",test2="PCadapt",mydataset="pheno",mycex=1,addnumbers=FALSE,silent=TRUE,pmax=11,nsample=20000) plotpvalues(export="pdf",test1="OutFLANK",test2="rfisherGWDS",mydataset="pheno",mycex=1,addnumbers=FALSE,silent=TRUE,pmax=11,nsample=20000) plotpvalues_sim(export="pdf",mycomparison=1,addnumbers=TRUE,nrloci=60000,nrselected=1000,my_comp_nr=3,pmax=11,nsample=20000) plotpvalues_sim(export="pdf",mycomparison=2,addnumbers=TRUE,nrloci=60000,nrselected=1000,my_comp_nr=3,pmax=11,nsample=20000) plotpvalues_sim(export="pdf",mycomparison=3,addnumbers=TRUE,nrloci=60000,nrselected=1000,my_comp_nr=3,pmax=11,nsample=20000)