# File generated on 06-01-2023. # This file contains R commands to reproduce figures of a paper with the working title: 'Range-wide whole-genome resequencing of the brown bear reveals drivers of intraspecies divergence' # In compliance with the CC0 license waiver, users are free to distribute, remix, adapt, and build upon these commands in any medium or format, with no conditions (i.e. attribution, etc.). # The commands need to be executed in the software R, in the order of appearance. # The commands expect to find in the working directory input files which can be downloaded from the Dryad repository, as detailed below. ##### CREATE AND DEFINE DIRECTORIES ###### # The haploid datasets (Y chromosome and mtDNA), as well as the haplodiploid dataset (X chromosome), should first be converted into diploid datasets (i.e., 0 becomes 0/0, and 1 becomes 1/1) using the script VCF_haploid2diploid.sh. # Diploid data in gzipped vcf-files can be converted into binary plink files (RAW and BIM) using the script CONVERT_vcf2ped.sh (or any other way). # Create on your pc the following directories with the following files: # path to directories: autodatadir <- "C:/path/to/auto_data" ###### specify here directory containing files Brown135_popfile.allinfo.txt, Brown135_auto.mysnps.thinned.20000.bim*, Brown135_auto.mysnps.thinned.20000.raw*, Distances.brown135_auto.thin100.txt, Brown135_mtDNA.allsites.globalfilter.bim*, Brown135_mtDNA.allsites.globalfilter.raw*, IQtree_MTgenome.newick.txt, Treemixout.4.covse.gz, Treemixout.4.edges.gz, Treemixout.4.vertices.gz xdatadir <- "C:/path/to/X_data" ###### specify here directory containing files Brown135_popfile.allinfo.txt, Brown135_xchrom.mysnps.thinned10000.bim*, Brown135_xchrom.mysnps.thinned10000.raw* ydatadir <- "C:/path/to/Y_data" ###### specify here directory containing files Brown135_popfile.allinfo.txt, Brown135_ychrom.mysnps.missfilter.bim*, Brown135_ychrom.mysnps.missfilter.raw*, IQtree_ychrom.newick.txt, RAxML_ychrom.newick.txt hedatadir <- "C:/path/to/he_data" ###### specify here directory containing files Brown135_popfile.allinfo.txt, mywindowhe.20000.Brown135He, myvcfsamples.txt, and bcftools.stats.he.txt microsatdir <- "C:/path/to/microsat_data" ###### specify here directory containing files Brown135_popfile.allinfo.txt, Brown135_3000microsat_calls.stru mscdir <- "C:/path/to/msc_data" ###### specify here directory containing files Brown135_popfile.allinfo.txt, haploblocktrees.3075loci.newick.txt,haploblocktrees.3075loci.ASTRALsupertree.newick.txt, Poptree.neiD.newick.txt and 36 files called twisst.HiC_scaffold_[1-36].quartetscores.txt, created by unzipping the file twisst.allscaffolds.quartetscores.zip # *files indicated with an asterix (raw and bim-files), should be created from vcf-file using the script CONVERT_vcf2ped.sh. # Do not forget to change the string 'C:/path/to/auto_data") with the actual path. # Subsequently, open an R session and copy paste the above six lines (which define directory path) into the R console. ##### LOAD FUNCTIONS AND DEPENDENCIES ##### # Load SambaR functions: source("https://github.com/mennodejong1986/SambaR/raw/master/Oldversions/SAMBAR_v1.08.txt") # Install all required R packages (for trouble-shooting, have a look at the SambaR manual on Github): getpackages() # Optionally, source additional functions: source("SUPERTREE_twisst_plotinR.txt") # needed only for section 'multi species coalescent (MSC) analyses'. Can be downloaded from: https://github.com/mennodejong1986/PopMSC source("VCF_darwindow.plotinR.txt") # needed only for section 'genome-wide heterozygosity'. Can be downloaded from: https://github.com/mennodejong1986/Darwindow # Define vectors with population names, colour and order of appearance in figures: #mypopnames <- c("ABCa","ABCbc","ABCcoast1","ABCcoast2","Alaska","Aleutian","Amur","Baltic","Black","CentreRus","CentreRus2","Europe","Himalaya","Hokkaido","HudsonBay","Kamtchatka","Kodiak","Magadan","MiddleEast","MidScand","NorthScand","polar","Sakhalin","SouthScand","Ural","Westcoast","Yakutia") mypopcolours <- c("mediumblue","blue4","darkorchid2","slateblue3","steelblue3","deepskyblue","darkgreen","gold3","black","greenyellow","yellowgreen","darkred","grey25","darkcyan","darkorchid4","cyan2","lightskyblue3","aquamarine2","grey70","indianred1","orange","cornsilk3","lightseagreen","orangered3","yellow2","mediumpurple2","limegreen") mypoporder <- c("MiddleEast","Himalaya","Europe","SouthScand","MidScand","NorthScand","Baltic","Ural","CentreRus2","CentreRus","Yakutia","Amur","Hokkaido","Sakhalin","Magadan","Kamtchatka","Aleutian","Kodiak","Alaska","ABCa","ABCbc","ABCcoast2","Westcoast","ABCcoast1","HudsonBay","polar","Black") # From here onwards, simply run the commands below to reproduce the figures. ###### AUTOSOMAL DATASET ###### setwd(autodatadir) importdata(inputprefix="Brown135_auto.mysnps.thinned.20000",samplefile="Brown135_popfile.allinfo.txt",sumstatsfile=FALSE,depthfile=FALSE,colourvector=mypopcolours,legend_cex=2,pop_order=mypoporder) filterdata(snpmiss=0.05,indmiss=0.2,legend_cex=2) backupdata("brownauto_data",overwrite=TRUE) # FIGURE 1a: # correct coordinates to allow to recentre map: shiftranges(longthreshold=15) excludepop(c("Black","polar")) # In case brown bear IUCN range data files have been downloaded and stored in autodatadir: # getshapepg() # getsingleshape(myprefix="data_0",mydir=autodatadir) # mapsingleshape(plotratios=1.8,export="pdf",mybg="lightblue4",boxcol="lightblue4",myfilter=mydata$LEGEND!="Extinct",mycolours="grey60",transparency_level=0.5,mycoord=mysambar$geocoord,mycoordcol=mysambar$geocols,mycex=2,showborders=FALSE,mylegend=c("historic range","present range"),legendcex=1.25) # FIGURE 1b: excludepop(c("Alaska","ABCa","ABCbc","ABCcoast1","ABCcoast2","HudsonBay","Westcoast","Kodiak","Aleutian","polar","Black","Hokkaido"),retain=FALSE) ape_pcoa(method="pi",do_mirror=c(TRUE,TRUE),addlegend=FALSE,export="pdf",axis1=1,axis2=2,symbolsize=4.5,exportname="Eurasia",addtitle="Eurasia") # FIGURE 1c: excludepop(c("Black","polar")) ape_pcoa(method="pi",do_mirror=c(TRUE,FALSE),addlegend=FALSE,export="pdf",axis1=1,axis2=2,symbolsize=4.5,exportname="WholeRange",addtitle="All") # FIGURE 1d: excludepop(c("Alaska","ABCa","ABCbc","ABCcoast1","ABCcoast2","HudsonBay","Westcoast","Kodiak","Aleutian"),retain=TRUE) ape_pcoa(method="pi",do_mirror=c(FALSE,FALSE),addlegend=FALSE,export="pdf",axis1=1,axis2=2,symbolsize=4.5,exportname="North America",addtitle="North America") # SUPPLEMENTAL FIGURE S1c: excludepop(c("Black","polar")) popprtree(export=NULL,dopathlength=FALSE,do_analysis=TRUE,mydistance="pi",mymethod="bionj",mytype="unrooted",labelangle="axial",labelcex=1.25,edgecolors=TRUE,tiplabels=TRUE,mylwd=2.5,addnodelabels=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE) plotindmatrix(n_bins=9,export="pdf",add_lab=FALSE,ind_matrix=mysambar$mytree_distmat,export_name="UncorrectedDistance",plottitle="Allele sharing distance",legendcex=2) plotindmatrix(n_bins=20,export="pdf",add_lab=FALSE,ind_matrix=mysambar$mytree_distmat,export_name="UncorrectedDistance",plottitle="Allele sharing distance",legendcex=1) # SUPPLEMENTAL FIGURE S1d: fareastpops<-c("Amur","Yakutia","Magadan","CentreRus","CentreRus2","Kamtchatka","Ural") popbool<- mysambar$poporder%in%fareastpops excludepop(fareastpops,retain=TRUE) inds$filter[inds$name=="CentralRussia3"]<- FALSE # remove one of a pair of related individuals inds$filter[inds$name=="CentralRussia4"]<- FALSE # remove one of a pair of related individuals ape_pcoa(method="pi",axis1=1,axis2=2,do_mirror=c(TRUE,FALSE),legendpos="topleft",legendcex=2,export="pdf",exportname="EasternEurasia",symbolsize=4.5,mylegend=mysambar$poporder[popbool],mylegendcol=mysambar$colorder[popbool],addtitle="FarEast") ape_pcoa(method="pi",axis1=1,axis2=2,do_mirror=c(TRUE,FALSE),legendpos="topleft",legendcex=2,export=NULL,exportname="EasternEurasia",symbolsize=4.5,mylegend=mysambar$poporder[popbool],mylegendcol=mysambar$colorder[popbool],addtitle="FarEast") # SUPPLEMENTAL FIGURE S1e: excludepop() calckinship(legend_cex=1) # SUPPLEMENTAL FIGURE S2a: europepops <- c("Ural","Baltic","NorthScand","MidScand","SouthScand","Europe","MiddleEast") popbool <- mysambar$poporder%in%europepops excludepop(europepops,retain=TRUE) ape_pcoa(method="pi",do_mirror=c(TRUE,FALSE),addlegend=TRUE,legendpos="bottom",mylegend=mysambar$poporder[popbool],mylegendcol=mysambar$colorder[popbool],export="pdf",axis1=1,axis2=2,symbolsize=4.5,exportname="Europe",addtitle="Europe") # SUPPLEMENTAL FIGURE S2b: excludepop() popprtree(export="pdf",nshorten=1,dopathlength=TRUE,do_analysis=TRUE,mydistance="euclidean",mymethod="OLS",mytype="unrooted",labelangle="axial",labelcex=1.25,edgecolors=TRUE,tiplabels=TRUE,mylwd=2.5,addnodelabels=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE) # FIGURE 1e-f: excludepop() popprtree(export="pdf",nshorten=1,dopathlength=TRUE,do_analysis=TRUE,mydistance="pi",mymethod="bionj",mytype="unrooted",labelangle="axial",labelcex=1.25,edgecolors=TRUE,tiplabels=TRUE,mylwd=2.5,addnodelabels=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE) # SUPPLEMENTAL FIGURE S2c: pathlength_scatterplot(export="pdf",yline=4.75,addlegend=TRUE,focuspop=NULL,excludepop=NULL,plotname="Pathlengths_vs_GeneticDistance",plottitle=NULL,symbolsize=1.5,legendcex=1,legendpos="topleft",plotrange=NULL) pathlength_scatterplot(export="pdf",yline=4.75,addlegend=TRUE,focuspop="polar",excludepop=NULL,plotname="Pathlengths_vs_GeneticDistance",plottitle="polar",symbolsize=1.5,legendcex=1,legendpos="topleft",plotrange=NULL) # FIGURE 2a (highlighted dendrogram): excludepop() popprtree(export=NULL,nshorten=1,dopathlength=FALSE,do_analysis=TRUE,mydistance="pi",mymethod="bionj",mytype="unrooted",labelangle="axial",labelcex=1.25,edgecolors=TRUE,tiplabels=TRUE,mylwd=2.5,addnodelabels=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE) cladedf<- data.frame("name"=c("Himalaya","MiddleEast","WestEurasia","WestEurasia","WestEurasia","EastEurasia","SW_Alaska","NorthAmerica"),"node"=c(70,210,149,137,140,167,192,194),"colour"=c("deepskyblue","blue","darkred","darkred","darkred","darkgreen","aquamarine2","darkorchid4")) plotcladetree(colourdf=cladedf,exportname="AutosomalSNPs") # FIGURE 2a (map): X <- mysambar$mytree_distmat[!inds$pop%in%c("polar","Black"),!inds$pop%in%c("polar","Black")] d <- dist(X) PC <- prcomp(X) M <- mst(d) excludepop(c("polar","Black")) plotlocations2(msnmatrix=M,mycex=1.25,my_col=inds$nodecol[inds$filter],my_pch=21,export="pdf",longthreshold=15,mybg="lightblue4",boxcol="lightblue4",addaxislabels=FALSE,mydeviation=1.25,exportname="Autosomal_clusters",silent=TRUE,mygeodf=NULL,mygeofile=NULL,dolabels=FALSE,showborders=FALSE) # FIGURE 4b: setwd(mysambar$demographydir) excludepop() inferdemography(do_LEA=FALSE,do_Dstats=FALSE,do_f3=TRUE,f3_triplets=NULL,f3_preparefiles=TRUE) # Replace in the next line 'plink' with the path to the software plink (in order to create binary plink files RAW and BIM): system(command="plink --file metapop.retainedinds.filter.number --chr-set 80 --allow-extra-chr --make-bed --recode A --out metapop.retainedinds.filter.number") inferdemography(do_LEA=FALSE,do_Dstats=FALSE,do_f3=TRUE,f3_triplets=NULL,f3_preparefiles=FALSE) # FIGURE 4g: excludepop() exportsambarfiles(do_pednumber=TRUE,do_pedletter=FALSE,do_treemix=TRUE,do_immanc=FALSE,per_pop=FALSE) # User can run Treemix using the two Unix commands below, but output has also been provided at Dryad and is expected to be present in autodatadir. # gzip Treemixinput.snpsfilter.txt # /treemix-1.13/bin/treemix -i Treemixinput.snpsfilter.txt.gz -m 1 -root Black -o Treemixout.4 setwd(mysambar$inputdatadir) plottreemix(prefix="Treemixout.4",export="pdf",myxmin=0,plotname="Treemixoutput.4",labelcex=1.5,xmargin=0.02) # FIGURE 4h: excludepop(c("polar","Black")) inds$filter[inds$name=="CentralRussia3"] <- FALSE # remove one of a pair of related individuals inds$filter[inds$name=="CentralRussia4"] <- FALSE # remove one of a pair of related individuals inds$filter[inds$name=="Norway2"] <- FALSE # remove one of a pair of related individuals inds$filter[inds$name=="Norway3"] <- FALSE # remove one of a pair of related individuals myleacolours <- mysambar$colorder2[mysambar$poporder2%in%c("ABCa","ABCbc","ABCcoast1","ABCcoast2","Kamtchatka","Kodiak","Alaska","Westcoast")] runLEA(mindemes=6,maxdemes=7,popnames=mysambar$populations,poporder=mysambar$poporder) LEAstructureplot(mymatrixlist=mysambar$leaqmatrixlist,mindemes=6,maxdemes=7,side4labels=NULL,borderlwd=0.5,bordercol="white",colourvector=myleacolours,dolabelcol=TRUE,export="pdf",popnames=mysambar$populations,poporder=mysambar$poporder,addindname=FALSE,order_on_longitude=FALSE,shortpop_nrchars=15,heightfactor=1.5,addindnr=FALSE,axiscex=0.75) # FIGURE 5d: excludepop() pop_neimatrix(export="pdf") getpoptree(doexport=TRUE,mymatrix=mysambar$popneimatrix,exportlabel="neiD",donj=TRUE,edgecolors=TRUE,mytype="unrooted",labelcex=0.75,mylwd=1,writetree=TRUE) # SUPPLEMENTAL FIGURE S14a: excludepop() comparetrees(do_analysis=TRUE,export="pdf",subtitle="Autosomal data",do_parsimony=TRUE,do_likelihood=FALSE) #### MITOCHONDRIAL-DNA PHYLOGENY ##### getdata("brownauto_data") # SUPPLEMENTARY FIGURE S3: excludepop("Black") anndf <- inds[inds$filter,c("name","MT","MTcol")] colnames(anndf) <- c("name","clade","col") popprtree(plotname="MTDNA_IQtree_WithoutBlack",inputnewickfile="IQtree_MTgenome.newick.txt",drop_ind=inds$name[!inds$filter],makelinear=TRUE,ann_df=anndf,ann_legendpos="bottomleft",axiscex=1.5,mutrate=1.9*10^-8,myrect=c(18000,30000,95000,98000),rectcol=c("grey70","grey70"),recttext=c("LGM","Beringian landbridge opens"),export="pdf",do_analysis=FALSE,mytype="phylogram",labelangle="axial",edgecolors=FALSE,tiplabels=TRUE,mylwd=1.5,addnodelabels=FALSE,maxnodelabel=100,nodelabelcex=1.5,showpop=FALSE,fullplotname=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE,labelcex=0.75) # FIGURE 2b (highlighted dendrogram): inds$filter <- inds$MT=="3a" mtcladedf <- data.frame("name"=paste("clade",c(1:9),sep=""),"node"=c(88,78,94,138,67,111,124,19,20),"colour"=c("darkorchid4","aquamarine2","darkgreen","darkcyan","yellowgreen","darkred","indianred1","greenyellow","greenyellow")) popprtree(export="pdf",plotname="MT3a",inputnewickfile="IQtree_MTgenome.newick.txt",drop_ind=inds$name[!inds$filter],makelinear=FALSE,axiscex=1.5,do_analysis=FALSE,mytype="unrooted",labelangle="axial",edgecolors=FALSE,tiplabels=TRUE,mylwd=1.5,addnodelabels=FALSE,fullplotname=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE,heightfac=0.08,labelcex=1.25) popprtree(export=NULL,plotname=NULL,inputnewickfile="IQtree_MTgenome.newick.txt",drop_ind=inds$name[!inds$filter],makelinear=TRUE,axiscex=1.5,mutrate=1.7*10^-8,do_analysis=FALSE,mytype="phylogram",labelangle="axial",edgecolors=FALSE,tiplabels=TRUE,mylwd=1.5,addnodelabels=FALSE,maxnodelabel=100,nodelabelcex=1.5,showpop=FALSE,fullplotname=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE,heightfac=0.08,labelcex=0.75) plotcladetree(mytree=mysambar$plottree,colourdf=mtcladedf,exportname="MT3a_clades",listtips=TRUE) # FIGURE 2b (map): range_filter <- !inds$pop%in%c("polar","Black") plotlocations2(mycex=1.25,export="pdf",exportname="MT3a_clades",longthreshold=15,my_pch=21,my_col=inds$nodecol[inds$filter],mybg="lightblue4",boxcol="lightblue4",addaxislabels=FALSE,mydeviation=1.25,silent=TRUE,mygeodf=NULL,mygeofile=NULL,dolabels=FALSE,showborders=FALSE,rangefilter=range_filter) ##### AUTOSOMAL MICROSATELLITE DATA ##### setwd(microsatdir) importmultidata(structurefile="Brown135_3000microsat_calls.stru",colourvector=mypopcolours,onerow=FALSE,popfile="Brown135_popfile.allinfo.txt",nloc=3007,pop_order=mypoporder) filtermultidata(indmiss=0.7,snpmiss=0.7) backupdata("brownmicrosat_data",overwrite=TRUE) # SUPPLEMENTARY FIGURE S2d: popprtree(export="pdf",dopathlength=TRUE,mydistance="pi",mymethod="bionj",mytype="unrooted",labelangle="axial",labelcex=1.25,edgecolors=TRUE,tiplabels=TRUE,mylwd=2.5,addnodelabels=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE) ##### X-CHROMOSOMAL DATA ###### setwd(xdatadir) importdata(inputprefix="Brown135_xchrom.thinned.10000",samplefile="Brown135_popfile.allinfo.txt",sumstatsfile=FALSE,depthfile=FALSE,colourvector=mypopcolours,legend_cex=2,pop_order=mypoporder) filterdata(snpmiss=0.25,indmiss=0.5,legend_cex=2,dohefilter=FALSE) backupdata("brownX_data",overwrite=TRUE) # FIGURE 1h-i: popprtree(export="pdf",nshorten=1,dopathlength=TRUE,pathlengthbreaks=c(-0.4,-0.08,-0.05,0.05,0.08,0.4),do_analysis=TRUE,mydistance="euclidean",mymethod="bionj",mytype="unrooted",labelangle="axial",labelcex=1.25,edgecolors=TRUE,tiplabels=TRUE,mylwd=2.5,addnodelabels=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE) # SUPPLEMENTAL FIGURE S14b: comparetrees(do_analysis=TRUE,export="pdf",subtitle="X-chromosomal data",do_parsimony=TRUE,do_likelihood=FALSE) ##### Y-CHROMOSOMAL DATA ###### setwd(ydatadir) importdata(inputprefix="Brown135_ychrom.mysnps.missfilter",samplefile="Brown135_popfile.allinfo.txt",sumstatsfile=FALSE,depthfile=TRUE,colourvector=mypopcolours,legend_cex=2,pop_order=mypoporder) filterdata(indmiss=0.7,snpmiss=0.5,dohefilter=FALSE,ychrom="HiC_scaffold_38",min_spacing=0) backupdata("brownY_data",overwrite=TRUE) # FIGURE 1g: excludepop(c("Black","polar","Himalaya","MiddleEast")) popprtree(plotname="RAXMLtree_WITHOUTcentralAsia",inputnewickfile="RAxML_ychrom.newick.txt",drop_ind=inds$name[!inds$filter],export="pdf",do_analysis=FALSE,mytype="unrooted",labelangle="axial",edgecolors=TRUE,tiplabels=TRUE,mylwd=1.5,fullplotname=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE) excludepop(c("Black","polar")) popprtree(plotname="RAXMLtree_WITHcentralAsia",inputnewickfile="RAxML_ychrom.newick.txt",drop_ind=inds$name[!inds$filter],export="pdf",do_analysis=FALSE,mytype="unrooted",labelangle="axial",edgecolors=TRUE,tiplabels=TRUE,mylwd=1.5,fullplotname=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE) # FIGURE 2d (highlighted dendrogram): excludepop(c("Black","polar","Himalaya","MiddleEast")) popprtree(export=NULL,plotname=file_name,inputnewickfile="IQtree_ychrom.newick.txt",makelinear=FALSE,genomeprop=0.00628067,drop_ind=inds$name[!inds$filter],axiscex=1.5,mutrate=1.3*10^-9,do_analysis=FALSE,mytype="unrooted",labelangle="axial",edgecolors=FALSE,tiplabels=TRUE,mylwd=1.5,addnodelabels=FALSE,maxnodelabel=100,nodelabelcex=1.5,showpop=FALSE,fullplotname=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE,heightfac=0.08,labelcex=0.75,ann_legend=FALSE) ycladedf <- data.frame("name"=paste("clade",c(1:9),sep=""),"node"=c(111,90,160,39,128,143,139,125,116),"colour"=c("grey50","darkorchid4","yellowgreen","midnightblue","darkgreen","darkred","indianred1","#009595","aquamarine2")) plotcladetree(mytree=mysambar$plottree,colourdf=ycladedf,exportname="Ychrom_clades",listtips=TRUE) # FIGURE 2d (map): excludepop(c("polar","Black")) inds$nodecol[inds$pop=="MiddleEast"] <- "blue" inds$nodecol[inds$pop=="Himalaya"] <- "deepskyblue" inds$latitude[inds$name=="Canada8"] <- inds$latitude[inds$name=="Canada8"]-2 # make this sample better visible plotlocations2(mycex=1.25,export="pdf",exportname="Ychrom_clades",longthreshold=15,my_pch=21,my_col=inds$nodecol[inds$filter],mybg="lightblue4",boxcol="lightblue4",addaxislabels=FALSE,mydeviation=1.25,silent=TRUE,mygeodf=NULL,mygeofile=NULL,dolabels=FALSE,showborders=FALSE) # FIGURE 2e: excludepop("Black") inds$filter[inds$name=="FarEast9"|inds$name=="polarSvalbard1"|inds$name=="polarSvalbard2"|inds$name=="polar4"] <- FALSE inds$nodecol[inds$pop=="polar"] <- "cornsilk3" anndf <- inds[inds$filter,c("name","node","nodecol")] colnames(anndf) <- c("name","clade","col") popprtree(plotname="IQtree_WITHcentralAsia",inputnewickfile="IQtree_ychrom.newick.txt",printnodes=FALSE,ann_df=anndf,ann_legendpos=NULL,makelinear=TRUE,interval_col="lightblue",interval_lwd=1.5,genomeprop=0.00628067,drop_ind=inds$name[!inds$filter],axiscex=1.5,mutrate=1.3*10^-9,myrect=c(18000,30000,95000,98000),rectcol=c("grey70","grey70"),recttext=c("LGM","Beringian landbridge opens"),export="pdf",do_analysis=FALSE,mytype="phylogram",labelangle="axial",edgecolors=FALSE,tiplabels=TRUE,mylwd=1.5,addnodelabels=TRUE,maxnodelabel=95,nodelabelcex=0.5,showpop=FALSE,fullplotname=FALSE,calc_parsimony=FALSE,calc_likelihood=FALSE,heightfac=0.08,labelcex=0.75,ann_legend=FALSE,nodelabelcol="red",marknode=TRUE) ##### UNCORRECTED GENOME-WIDE GENETIC DISTANCES ##### getdata("brownauto_data") add2inds2(myfile="Distances.brown135_auto.thin100.txt") # FIGURE 4d: mymat <- df2mat(myscore="d",doheatmap=TRUE,twosided=FALSE) mymat <- df2mat(myscore="d",doheatmap=TRUE,twosided=FALSE,nr_bins=8) neighbournetwork(mybg="white",doexport=TRUE,mydistmat=mymat,do_analysis=TRUE,plotname="Phylonetwork_neighbornet",mylwd=0.5,edgecolors=TRUE,silent=TRUE,dolabels=TRUE,labelcex=1,addlegend=TRUE,legendpos="bottomright",legendcex=1.25) # FIGURE 6c: excludepop(c("polar","Black")) pops2scatter(xscore="geodist",yscore="d",legendcex=1,export="pdf",addlegend="topleft",xlabel="Distance (100 km)",plotname="Dxy_vs_GeographicDistance") # FIGURE 6d: pops2scatter(xscore="meanhe",yscore="d",legendcex=1,export="pdf",addlegend=FALSE,xlabel="Heterozygosity (%)",plotname="Dxy_vs_He") ##### MULTI SPECIES COALESCENT (MSC) ANALYSES ##### # Expects that the functions of the script 'SUPERTREE_twisst_plotinR.txt' have been loaded using the source command (see above). setwd(mscdir) mydf1 <- read.table("Brown135_popfile.allinfo.txt",header=TRUE) mydf1$name <- paste(mydf1$name,"1",sep="-") mydf2 <- read.table("Brown135_popfile.allinfo.txt",header=TRUE) mydf2$name <- paste(mydf2$name,"2",sep="-") mydf <- rbind(mydf1,mydf2) # split populations Europe and Hokkaido according to mtDNA clade: mydf$pop <- ifelse(mydf$pop=="Hokkaido"|mydf$pop=="Europe",paste(mydf$pop,mydf$MT,sep=""),mydf$pop) mtdnapopnames <- c("ABCa","ABCbc","ABCcoast1","ABCcoast2","Alaska","Aleutian","Amur","Baltic","Black","CentreRus","CentreRus2","Europe1a","Europe1b","Europe3a","Himalaya","Hokkaido3a","Hokkaido3b","Hokkaido4","HudsonBay","Kamtchatka","Kodiak","Magadan","MiddleEast","MidScand","NorthScand","polar","Sakhalin","SouthScand","Ural","Westcoast","Yakutia") mtdnapopcolours <- c("mediumblue","blue4","darkorchid2","slateblue3","steelblue3","deepskyblue","darkgreen","gold3","black","greenyellow","yellowgreen","darkred","darkred","darkred","grey25","darkcyan","darkcyan","darkcyan","darkorchid4","cyan2","lightskyblue3","aquamarine2","grey70","indianred1","orange","cornsilk3","lightseagreen","orangered3","yellow2","mediumpurple2","limegreen") mtdnapoporder <- c("MiddleEast","Himalaya","Europe1a","Europe1b","Europe3a","SouthScand","MidScand","NorthScand","Baltic","Ural","CentreRus2","CentreRus","Yakutia","Amur","Hokkaido3a","Hokkaido3b","Hokkaido4","Sakhalin","Magadan","Kamtchatka","Aleutian","Kodiak","Alaska","ABCa","ABCbc","ABCcoast2","Westcoast","ABCcoast1","HudsonBay","polar","Black") # create SambaR objects (with dummy data): mymat <- matrix(sample(c(0,1),nrow(mydf)*100,replace=TRUE),nrow=nrow(mydf),ncol=100) rownames(mymat) <- mydf$name colnames(mymat) <- paste("snp",c(1:100),sep="_") mygl <- as.genlight(mymat) genlight2sambar(genlight_object="mygl",do_confirm=FALSE,popvector=as.character(mydf$pop),pop_order=mtdnapoporder,colourvector=mtdnapopcolours,major=1,minor=4,qcplot=TRUE,heplot=TRUE,silent=TRUE,allow_edit=FALSE) mydf$pop <- NULL mydf$popcol <- NULL inds <- merge(inds,mydf,by="name") # filter data (essential SambaR step even for dummy data): filterdata(min_spacing=0,min_mac=1,dohefilter=FALSE) backupdata("brownhaploblock_data",overwrite=TRUE) # FIGURE 5b: popprtree(inputnewickfile="haploblocktrees.3075loci.ASTRALsupertree.newick.txt",mytype="unrooted",do_analysis=FALSE,fullplotname=FALSE,plotname="Supertree_Astral_scored",labelangle="axial",legendpos="topleft",export=NULL,dopathlength=FALSE,addnodelabels=TRUE,calc_parsimony=FALSE,calc_likelihood=FALSE) mytree <- mysambar$plottree mytree$edge.length[is.na(mytree$edge.length)]<-0 mytree$node.label <- as.numeric(mytree$node.label) popprtree(inputtree=mytree,mytype="unrooted",do_analysis=FALSE,fullplotname=FALSE,plotname="Supertree_Astral_branchlengths",labelangle="axial",legendpos="topleft",export="pdf",dopathlength=FALSE,addnodelabels=TRUE,maxnodelabel=0.8,marknode=TRUE,marklabelcex=0.25,calc_parsimony=FALSE,calc_likelihood=FALSE,nshorten=2,labelcex=0.75) # FIGURE 5a: gettwisst_data(mywd=mysambar$inputdatadir,inputnewickfile="Poptree.neiD.newick.txt",myoutgroup="Black") twisst_zscores(mywd=mysambar$inputdatadir,inputnewickfile="Poptree.neiD.newick.txt",myoutgroup="Black") twisst_heatmap(zvec=c(5)) # FIGURE 5c: twisst_simplex(export="pdf",zthres=5,mycols=c("darkgreen","darkred"),nmax=10000,t3model=FALSE,mycex=0.2,myalpha=30,do_mirror=TRUE) # FIGURE 5e: twisst_barplot(p1="ABCbc",p2="Europe1b",p3="polar",p4="Black") twisst_barplot(p1="Europe1b",p2="Ural",p3="Himalaya",p4="MiddleEast",ylabel=NULL) twisst_barplot(p1="Europe1a",p2="Europe1b",p3="SouthScand",p4="NorthScand",ylabel=NULL) twisst_barplot(p1="Hokkaido3a",p2="Hokkaido4",p3="Amur",p4="Black",ylabel=NULL) ##### GENOME-WIDE HETEROZYGOSITY AND RUN OF HOMOZYGOSITY ANALYSES ##### # Expects that the functions of the script 'VCF_darwindow.plotinR.txt' have been loaded using the source command (see above). getdata("brownauto_data") window_size <- 20000 nr_windows <- 10 missmax <- 0.8 setwd(hedatadir) getwindowdata(maxmiss=0.8,suffix="20000.Brown135He.txt",vcfsamples="myvcfsamples.txt",samplefile="Brown135_popfile.allinfo.txt",annotated=FALSE,indlevel=TRUE,poplevel=FALSE,mydir=hedatadir) reorder_pop(poporder=mypoporder) exclude_pop(c("polar","Black")) calcwindowhe(maxmiss=missmax) calcregionhe(maxmiss=missmax,nwindows=nr_windows) findroh(silent=TRUE,hethreshold=0.03,min_rle_length=1,windowsize=window_size,nwindows=nr_windows) getrohlengths(windowsize=window_size,nrwindows=nr_windows) getrohbin() # SUPPLEMENTARY FIGURE S7a: indscatter(export="pdf",addlegend=FALSE,plotname="Darwindow_vs_Bcftools",xscore="he",yscore="bcfhe",xlabel="Darwindow heterozygosity (%)",ylabel="Bcftools heterozygosity (%)",yline=5.5,symbolsize=2.5,labcex=2.75,add_diagonal=TRUE) # FIGURE 6a: runindscaffold(do_export=TRUE,input_df1=dwd$hedf,input_df2=dwd$frohdf,plot_label="He_withROH",add_roh=TRUE,add_he=TRUE,add_dxy=FALSE,max_miss=missmax,n_windows=nr_windows,min_rle_len=1,window_size=window_size) # FIGURE 6b: mytextdf <-data.frame("name"=c("Italy1","Estonia1","Spain1","US2","Rumania1","ABC15","ABC16","TurkeyMartin"),"cex"=2,"pos"=c(2,2,4,1,2,4,3,2)) indscatter(export="pdf",textdf=mytextdf,plotname="Froh_mean_vs_sd",xscore="froh",yscore="froh_sd_scaffold",xlabel="F-roh mean",ylabel="F-roh sd (across chromosomes)",addlegend=FALSE,yline=5.75) # FIGURE 6e: dwd$ind$trait <- NA dwd$ind$trait <- ifelse(dwd$ind$pop=="Black"|dwd$ind$pop=="polar","OtherSpecies",dwd$ind$trait) dwd$ind$trait <- ifelse(dwd$ind$pop=="ABCa"|dwd$ind$pop=="ABCbc"|dwd$ind$pop=="Kodiak"|dwd$ind$pop=="Hokkaido"|dwd$ind$pop=="Sakhalin","Insular",dwd$ind$trait) dwd$ind$trait <- ifelse(dwd$ind$pop=="CentreRus2"|dwd$ind$pop=="Yakutia"|dwd$ind$pop=="CentreRus"|dwd$ind$pop=="Amur"|dwd$ind$pop=="Magadan","East Eurasia",dwd$ind$trait) dwd$ind$trait <- ifelse(dwd$ind$pop=="Aleutian"|dwd$ind$pop=="Alaska"|dwd$ind$pop=="Kamtchatka","Beringia",dwd$ind$trait) dwd$ind$trait <- ifelse(dwd$ind$pop=="HudsonBay"|dwd$ind$pop=="Westcoast"|dwd$ind$pop=="ABCcoast1"|dwd$ind$pop=="ABCcoast2","North America",dwd$ind$trait) dwd$ind$trait <- ifelse(dwd$ind$pop=="Europe"|dwd$ind$pop=="SouthScand"|dwd$ind$pop=="MidScand","Europe",dwd$ind$trait) dwd$ind$trait <- ifelse(dwd$ind$pop=="NorthScand"|dwd$ind$pop=="Ural"|dwd$ind$pop=="Baltic"|dwd$ind$pop=="MiddleEast","West Eurasia",dwd$ind$trait) dwd$ind$trait <- ifelse(dwd$ind$name=="TurkeyMartin"|dwd$ind$pop=="IranGudrun"|dwd$ind$pop=="Himalaya","Zoo",dwd$ind$trait) dwd$ind$trait <- ifelse(dwd$ind$name=="Spain1"|dwd$ind$name=="Italy1","MountainTop",dwd$ind$trait) group_plot(export="pdf",indscore="regionhe",plotlabel="combi",ylabel="Genome-wide He (%)",groupvector="trait",grouplevels=c("Europe","West Eurasia","East Eurasia","Beringia","North America","Insular","Zoo","MountainTop")) group_plot(export="pdf",indscore="lroh",plotlabel="lROH_group",ylabel="ROH-sum (Mb)",groupvector="trait",grouplevels=c("Europe","West Eurasia","East Eurasia","Beringia","North America","Insular","Zoo","MountainTop")) # FIGURE 6g: indscatter(export="pdf",plotname="Froh_vs_NrohPER100Mb_20000",xscore="nroh_per100Mb",yscore="froh",x_lim=c(0,100),y_lim=c(0,1),xlabel="# ROHs per 100Mb",ylabel="F-roh",legendpos="topleft",yline=4,addlegend=FALSE,symbolsize=3) # FIGURE 6h: rohbarplot(inputdf=dwd$frohbindf,ylabel="F-roh",plotname="ROHf_barplot",export="pdf",yline=3,mywidth=0.2,legendcex=1.75,addlegend=TRUE,mycolours=NULL,ypopcol=0.775,legx=20,legy=0.725)