library(tibble) library(splitstackshape) library(dplyr) library(stringr) library(adegenet) library(ggplot2) library(radiator) library(pophelper) library(marmap) library(phangorn) library(poppr) library(devtools) library(stackr) library(hierfstat) library(pegas) library (vcfR) library(inbreedR) library(pcadapt) library(pacman) library(qvalue) library(reshape2) library(plotrix) library(maps) library(mapdata) library(mapproj) library(maptools) library(GISTools) library(prettymapr) library("rworldmap") library("rworldxtra") library(SDMTools) library(SpatialEpi) library(fields) library(gdata) library(vegan) library("TeachingDemos") library(raster) library (gdistance) library(sp) library(rgdal) library(parallel) library(doParallel) library(foreach) library(diveRsity) library(genepop) devtools::install_github('wrengels/HWxtest', subdir='pkg') library(coda) library(PopGenKit) library(nortest) library(ggpubr) library(grur) #order individuals so that they are grouped by population - full_dataset_SNP.txt has 103963 loci, 544 inds denovo7.prep <- read.table ("full_dataset_SNP.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") denovo7.ord <- denovo7.prep[c(1:103963),c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 397, 398, 399, 400, 401, 407, 417, 427, 434, 435, 436, 437, 438, 439, 440, 441, 442, 402, 403, 404, 405, 406, 408, 409, 410, 411, 412, 413, 414, 415, 416, 418, 419, 420, 421, 422, 423, 424, 425, 426, 428, 429, 430, 431, 432, 433, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 338, 345, 346, 347, 348, 335, 336, 337, 339, 340, 341, 342, 343, 344, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 195, 206, 210, 211, 212, 213, 214, 215, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 207, 208, 209, 216, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 390, 391, 392, 393, 394, 395, 396, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 156, 157, 158, 159, 160, 161, 162, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 473, 480, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 494, 512, 517, 304, 305, 306, 307, 308, 309, 310, 311, 493, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 513, 514, 515, 516, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 474, 475, 476, 477, 478, 479, 481, 518, 519, 520, 521, 522, 523, 524, 525, 526)] write.table(denovo7.ord, file = "C:/Users/aaa/bbb/ordered_samples_SNP.txt", sep="\t",quote=FALSE,col.names = T,row.names=F) #genotypes of replicates #replicate 1 denovo7.ord.r1 <- subset( denovo7.ord, select = c(SLPDX20, SLPDX20_2, SLPDX20_3, SLPDX20_4, SLPDX20_5, SLPDX20_6.1, SLPDX20_6, SLPDX20_7, SLPDX20_8.1, SLPDX20_8 )) split_r1 <- cSplit(denovo7.ord.r1,c("SLPDX20","SLPDX20_2", "SLPDX20_3", "SLPDX20_4", "SLPDX20_5", "SLPDX20_6.1", "SLPDX20_6", "SLPDX20_7", "SLPDX20_8.1", "SLPDX20_8"), sep=":", type.convert=FALSE) gen_r1 <- split_r1[, c(1,5,9,13,17,21,25,29,33,37 )] gen_r1.df <- as.data.frame (gen_r1) gen_r1.df[ gen_r1.df == "0/0" ] = 10 gen_r1.df[ gen_r1.df == "0/1" ] = 1000 gen_r1.df[ gen_r1.df == "1/0" ] = 100000 gen_r1.df[ gen_r1.df == "1/1" ] = 10000000 gen_r1.df.num <- lapply(gen_r1.df, as.numeric) gen_r1.df.num.df <- as.data.frame(gen_r1.df.num) gen_r1.df.num.df$tot <- apply(gen_r1.df.num.df,1,sum,na.rm=TRUE) gen_r1.df.num.df$cond <- ifelse(gen_r1.df.num.df$tot == 10 ,1,ifelse(gen_r1.df.num.df$tot == 20,1,ifelse(gen_r1.df.num.df$tot == 30,1,ifelse(gen_r1.df.num.df$tot == 40,1,ifelse(gen_r1.df.num.df$tot == 50,1,ifelse(gen_r1.df.num.df$tot == 60,1,ifelse(gen_r1.df.num.df$tot == 70,1,ifelse(gen_r1.df.num.df$tot == 80,1,ifelse(gen_r1.df.num.df$tot == 90,1,ifelse(gen_r1.df.num.df$tot == 100,1,ifelse(gen_r1.df.num.df$tot == 1000,1,ifelse(gen_r1.df.num.df$tot == 2000,1,ifelse(gen_r1.df.num.df$tot == 3000,1,ifelse(gen_r1.df.num.df$tot == 4000,1,ifelse(gen_r1.df.num.df$tot == 5000,1,ifelse(gen_r1.df.num.df$tot == 6000,1,ifelse(gen_r1.df.num.df$tot == 7000,1,ifelse(gen_r1.df.num.df$tot == 8000,1,ifelse(gen_r1.df.num.df$tot == 9000,1,ifelse(gen_r1.df.num.df$tot == 10000,1,ifelse(gen_r1.df.num.df$tot == 100000,1,ifelse(gen_r1.df.num.df$tot == 200000,1,ifelse(gen_r1.df.num.df$tot == 300000,1,ifelse(gen_r1.df.num.df$tot == 400000,1,ifelse(gen_r1.df.num.df$tot == 500000,1,ifelse(gen_r1.df.num.df$tot == 600000,1,ifelse(gen_r1.df.num.df$tot == 700000,1,ifelse(gen_r1.df.num.df$tot == 800000,1,ifelse(gen_r1.df.num.df$tot == 900000,1,ifelse(gen_r1.df.num.df$tot == 1000000,1,ifelse(gen_r1.df.num.df$tot == 10000000,1,ifelse(gen_r1.df.num.df$tot == 20000000 ,1, ifelse(gen_r1.df.num.df$tot == 30000000 ,1,ifelse(gen_r1.df.num.df$tot == 40000000 ,1,ifelse(gen_r1.df.num.df$tot == 50000000 ,1,ifelse(gen_r1.df.num.df$tot == 60000000 ,1,ifelse(gen_r1.df.num.df$tot == 70000000 ,1,ifelse(gen_r1.df.num.df$tot == 80000000 ,1,ifelse(gen_r1.df.num.df$tot == 90000000 ,1,ifelse(gen_r1.df.num.df$tot == 100000000 ,1,0)))))))))))))))))))))))))))))))) )))))))) gen_r1.df.num.df$cond1 <- ifelse(gen_r1.df.num.df$tot == 0 ,1,0) gen_r1.df.num.df[ gen_r1.df.num.df == 10 ] = "0/0" gen_r1.df.num.df[ gen_r1.df.num.df == 1000 ] = "0/1" gen_r1.df.num.df[ gen_r1.df.num.df == 100000 ] = "1/0" gen_r1.df.num.df[ gen_r1.df.num.df == 10000000 ] = "1/1" #replicate 2 denovo7.ord.r2 <- subset( denovo7.ord, select = c(SLPDK16, SLPDK16_2, SLPDK16_3, SLPDK16_4, SLPDK16_5, SLPDK16_6.1, SLPDK16_6, SLPDK16_7, SLPDK16_8.1, SLPDK16_8 )) split_r2 <- cSplit(denovo7.ord.r2,c("SLPDK16","SLPDK16_2", "SLPDK16_3", "SLPDK16_4", "SLPDK16_5", "SLPDK16_6.1", "SLPDK16_6", "SLPDK16_7", "SLPDK16_8.1", "SLPDK16_8"), sep=":", type.convert=FALSE) gen_r2 <- split_r2[, c(1,5,9,13,17,21,25,29,33,37 )] gen_r2.df <- as.data.frame (gen_r2) gen_r2.df[ gen_r2.df == "0/0" ] = 10 gen_r2.df[ gen_r2.df == "0/1" ] = 1000 gen_r2.df[ gen_r2.df == "1/0" ] = 100000 gen_r2.df[ gen_r2.df == "1/1" ] = 10000000 gen_r2.df.num <- lapply(gen_r2.df, as.numeric) gen_r2.df.num.df <- as.data.frame(gen_r2.df.num) gen_r2.df.num.df$tot <- apply(gen_r2.df.num.df,1,sum,na.rm=TRUE) gen_r2.df.num.df$cond <- ifelse(gen_r2.df.num.df$tot == 10 ,1,ifelse(gen_r2.df.num.df$tot == 20,1,ifelse(gen_r2.df.num.df$tot == 30,1,ifelse(gen_r2.df.num.df$tot == 40,1,ifelse(gen_r2.df.num.df$tot == 50,1,ifelse(gen_r2.df.num.df$tot == 60,1,ifelse(gen_r2.df.num.df$tot == 70,1,ifelse(gen_r2.df.num.df$tot == 80,1,ifelse(gen_r2.df.num.df$tot == 90,1,ifelse(gen_r2.df.num.df$tot == 100,1,ifelse(gen_r2.df.num.df$tot == 1000,1,ifelse(gen_r2.df.num.df$tot == 2000,1,ifelse(gen_r2.df.num.df$tot == 3000,1,ifelse(gen_r2.df.num.df$tot == 4000,1,ifelse(gen_r2.df.num.df$tot == 5000,1,ifelse(gen_r2.df.num.df$tot == 6000,1,ifelse(gen_r2.df.num.df$tot == 7000,1,ifelse(gen_r2.df.num.df$tot == 8000,1,ifelse(gen_r2.df.num.df$tot == 9000,1,ifelse(gen_r2.df.num.df$tot == 10000,1,ifelse(gen_r2.df.num.df$tot == 100000,1,ifelse(gen_r2.df.num.df$tot == 200000,1,ifelse(gen_r2.df.num.df$tot == 300000,1,ifelse(gen_r2.df.num.df$tot == 400000,1,ifelse(gen_r2.df.num.df$tot == 500000,1,ifelse(gen_r2.df.num.df$tot == 600000,1,ifelse(gen_r2.df.num.df$tot == 700000,1,ifelse(gen_r2.df.num.df$tot == 800000,1,ifelse(gen_r2.df.num.df$tot == 900000,1,ifelse(gen_r2.df.num.df$tot == 1000000,1,ifelse(gen_r2.df.num.df$tot == 10000000,1,ifelse(gen_r2.df.num.df$tot == 20000000 ,1, ifelse(gen_r2.df.num.df$tot == 30000000 ,1,ifelse(gen_r2.df.num.df$tot == 40000000 ,1,ifelse(gen_r2.df.num.df$tot == 50000000 ,1,ifelse(gen_r2.df.num.df$tot == 60000000 ,1,ifelse(gen_r2.df.num.df$tot == 70000000 ,1,ifelse(gen_r2.df.num.df$tot == 80000000 ,1,ifelse(gen_r2.df.num.df$tot == 90000000 ,1,ifelse(gen_r2.df.num.df$tot == 100000000 ,1,0)))))))))))))))))))))))))))))))) )))))))) gen_r2.df.num.df$cond1 <- ifelse(gen_r2.df.num.df$tot == 0 ,1,0) gen_r2.df.num.df[ gen_r2.df.num.df == 10 ] = "0/0" gen_r2.df.num.df[ gen_r2.df.num.df == 1000 ] = "0/1" gen_r2.df.num.df[ gen_r2.df.num.df == 100000 ] = "1/0" gen_r2.df.num.df[ gen_r2.df.num.df == 10000000 ] = "1/1" #replicate 3 denovo7.ord.r3 <- subset( denovo7.ord, select = c(SLPDK6, SLPDK6_2, SLPDK6_3, SLPDK6_4, SLPDK6_5, SLPDK6_6.1, SLPDK6_6, SLPDK6_7, SLPDK6_8.1, SLPDK6_8 )) split_r3 <- cSplit(denovo7.ord.r3,c("SLPDK6","SLPDK6_2", "SLPDK6_3", "SLPDK6_4", "SLPDK6_5", "SLPDK6_6.1", "SLPDK6_6", "SLPDK6_7", "SLPDK6_8.1", "SLPDK6_8"), sep=":", type.convert=FALSE) gen_r3 <- split_r3[, c(1,5,9,13,17,21,25,29,33,37 )] gen_r3.df <- as.data.frame (gen_r3) gen_r3.df[ gen_r3.df == "0/0" ] = 10 gen_r3.df[ gen_r3.df == "0/1" ] = 1000 gen_r3.df[ gen_r3.df == "1/0" ] = 100000 gen_r3.df[ gen_r3.df == "1/1" ] = 10000000 gen_r3.df.num <- lapply(gen_r3.df, as.numeric) gen_r3.df.num.df <- as.data.frame(gen_r3.df.num) gen_r3.df.num.df$tot <- apply(gen_r3.df.num.df,1,sum,na.rm=TRUE) gen_r3.df.num.df$cond <- ifelse(gen_r3.df.num.df$tot == 10 ,1,ifelse(gen_r3.df.num.df$tot == 20,1,ifelse(gen_r3.df.num.df$tot == 30,1,ifelse(gen_r3.df.num.df$tot == 40,1,ifelse(gen_r3.df.num.df$tot == 50,1,ifelse(gen_r3.df.num.df$tot == 60,1,ifelse(gen_r3.df.num.df$tot == 70,1,ifelse(gen_r3.df.num.df$tot == 80,1,ifelse(gen_r3.df.num.df$tot == 90,1,ifelse(gen_r3.df.num.df$tot == 100,1,ifelse(gen_r3.df.num.df$tot == 1000,1,ifelse(gen_r3.df.num.df$tot == 2000,1,ifelse(gen_r3.df.num.df$tot == 3000,1,ifelse(gen_r3.df.num.df$tot == 4000,1,ifelse(gen_r3.df.num.df$tot == 5000,1,ifelse(gen_r3.df.num.df$tot == 6000,1,ifelse(gen_r3.df.num.df$tot == 7000,1,ifelse(gen_r3.df.num.df$tot == 8000,1,ifelse(gen_r3.df.num.df$tot == 9000,1,ifelse(gen_r3.df.num.df$tot == 10000,1,ifelse(gen_r3.df.num.df$tot == 100000,1,ifelse(gen_r3.df.num.df$tot == 200000,1,ifelse(gen_r3.df.num.df$tot == 300000,1,ifelse(gen_r3.df.num.df$tot == 400000,1,ifelse(gen_r3.df.num.df$tot == 500000,1,ifelse(gen_r3.df.num.df$tot == 600000,1,ifelse(gen_r3.df.num.df$tot == 700000,1,ifelse(gen_r3.df.num.df$tot == 800000,1,ifelse(gen_r3.df.num.df$tot == 900000,1,ifelse(gen_r3.df.num.df$tot == 1000000,1,ifelse(gen_r3.df.num.df$tot == 10000000,1,ifelse(gen_r3.df.num.df$tot == 20000000 ,1, ifelse(gen_r3.df.num.df$tot == 30000000 ,1,ifelse(gen_r3.df.num.df$tot == 40000000 ,1,ifelse(gen_r3.df.num.df$tot == 50000000 ,1,ifelse(gen_r3.df.num.df$tot == 60000000 ,1,ifelse(gen_r3.df.num.df$tot == 70000000 ,1,ifelse(gen_r3.df.num.df$tot == 80000000 ,1,ifelse(gen_r3.df.num.df$tot == 90000000 ,1,ifelse(gen_r3.df.num.df$tot == 100000000 ,1,0)))))))))))))))))))))))))))))))) )))))))) gen_r3.df.num.df$cond1 <- ifelse(gen_r3.df.num.df$tot == 0 ,1,0) gen_r3.df.num.df[ gen_r3.df.num.df == 10 ] = "0/0" gen_r3.df.num.df[ gen_r3.df.num.df == 1000 ] = "0/1" gen_r3.df.num.df[ gen_r3.df.num.df == 100000 ] = "1/0" gen_r3.df.num.df[ gen_r3.df.num.df == 10000000 ] = "1/1" vcf.df <- as.data.frame(denovo7.ord) #inspecting different populations for amplification success #POR Por <- subset(vcf.df, select = c(SLPOR1, SLPOR10,SLPOR11,SLPOR5,SLPOR6 )) Por_sp <- cSplit(Por,c("SLPOR1","SLPOR10", "SLPOR11", "SLPOR5", "SLPOR6"), sep=":", type.convert=FALSE) Por_sp_gen <- Por_sp[, c(1, 5, 9, 13, 17 )] Por_miss <- apply(Por_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Por_pres <- 5-Por_miss Por_pres.df <- as.data.frame (Por_pres) #percentage good amp: (sum(Por_pres)/(nrow(vcf.df)*5))*100 #missing data by ind Por.miss <- apply(Por_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Por.miss.df <- as.data.frame(Por.miss) Por.miss.df.per <- (Por.miss.df/nrow(Por_sp_gen))*100 Por.miss.df.per <- cbind(Row.Names = rownames(Por.miss.df.per), Por.miss.df.per) Por.miss.df.per.ord <- Por.miss.df.per[order(-Por.miss),] Por.name <- str_sub(Por.miss.df.per.ord$Row.Names, start=1, end=-3) Por.name.df <- as.data.frame(Por.name) Por.miss.df.per.ord <- Por.miss.df.per.ord[,-1] Por.ind <- cbind(Por.name,Por.miss.df.per.ord) Por.ind.df <- as.data.frame(Por.ind) #IdBase 15 Id15 <- subset(vcf.df, select = c(SLPDC52, SLPDC53, SLPDC54, SLPDC55, SLPDC56, SLPDC57, SLPDC58, SLPDC59, SLPDC60, SLPDC62, SLPDC63, SLPDC64, SLPDC65, SLPDC66, SLPDC68, SLPDC70, SLPDC71, SLPDC72, SLPDC73, SLPDC74, SLPDC75, SLPDC76, SLPDC77, SLPDC78, SLPDC79, SLPDC80, SLPDC81, SLPDC82 )) Id15_sp <- cSplit(Id15,c("SLPDC52", "SLPDC53", "SLPDC54", "SLPDC55", "SLPDC56", "SLPDC57", "SLPDC58", "SLPDC59", "SLPDC60", "SLPDC62", "SLPDC63", "SLPDC64", "SLPDC65", "SLPDC66", "SLPDC68", "SLPDC70", "SLPDC71", "SLPDC72", "SLPDC73", "SLPDC74", "SLPDC75", "SLPDC76", "SLPDC77", "SLPDC78", "SLPDC79", "SLPDC80", "SLPDC81", "SLPDC82"), sep=":", type.convert=FALSE) Id15_sp_gen <- Id15_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109 )] Id15_miss <- apply(Id15_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id15_pres <- 28-Id15_miss Id15_pres.df <- as.data.frame (Id15_pres) #percentage good amp: (sum(Id15_pres)/(nrow(vcf.df)*28))*100 #missing data by ind Id15.miss <- apply(Id15_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id15.miss.df <- as.data.frame(Id15.miss) Id15.miss.df.per <- (Id15.miss.df/nrow(Id15_sp_gen))*100 Id15.miss.df.per <- cbind(Row.Names = rownames(Id15.miss.df.per), Id15.miss.df.per) Id15.miss.df.per.ord <- Id15.miss.df.per[order(-Id15.miss),] Id15.name <- str_sub(Id15.miss.df.per.ord$Row.Names, start=1, end=-3) Id15.name.df <- as.data.frame(Id15.name) Id15.miss.df.per.ord <- Id15.miss.df.per.ord[,-1] Id15.ind <- cbind(Id15.name,Id15.miss.df.per.ord) Id15.ind.df <- as.data.frame(Id15.ind) #IdBase 16 Id16 <- subset(vcf.df, select = c(SLCON10, SLCON2, SLCON3, SLCON4, SLCON5, SLCON6, SLCON7, SLCON8, SLCON9 )) Id16_sp <- cSplit(Id16,c("SLCON10", "SLCON2", "SLCON3", "SLCON4", "SLCON5", "SLCON6", "SLCON7", "SLCON8", "SLCON9"), sep=":", type.convert=FALSE) Id16_sp_gen <- Id16_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33 )] Id16_miss <- apply(Id16_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id16_pres <- 9-Id16_miss Id16_pres.df <- as.data.frame (Id16_pres) #percentage good amp: (sum(Id16_pres)/(nrow(vcf.df)*9))*100 #missing data by ind Id16.miss <- apply(Id16_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id16.miss.df <- as.data.frame(Id16.miss) Id16.miss.df.per <- (Id16.miss.df/nrow(Id16_sp_gen))*100 Id16.miss.df.per <- cbind(Row.Names = rownames(Id16.miss.df.per), Id16.miss.df.per) Id16.miss.df.per.ord <- Id16.miss.df.per[order(-Id16.miss),] Id16.name <- str_sub(Id16.miss.df.per.ord$Row.Names, start=1, end=-3) Id16.name.df <- as.data.frame(Id16.name) Id16.miss.df.per.ord <- Id16.miss.df.per.ord[,-1] Id16.ind <- cbind(Id16.name,Id16.miss.df.per.ord) Id16.ind.df <- as.data.frame(Id16.ind) #IdBase 17 Id17 <- subset(vcf.df, select = c(SLELL1, SLELL18, SLELL19, SLELL20, SLELL21, SLELL22, SLELL23, SLELL24, SLELL26, SLELL29, SLELL31, SLELL4, SLELL9 )) Id17_sp <- cSplit(Id17,c("SLELL1", "SLELL18", "SLELL19", "SLELL20", "SLELL21", "SLELL22", "SLELL23", "SLELL24", "SLELL26", "SLELL29", "SLELL31", "SLELL4", "SLELL9"), sep=":", type.convert=FALSE) Id17_sp_gen <- Id17_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49 )] Id17_miss <- apply(Id17_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id17_pres <- 13-Id17_miss Id17_pres.df <- as.data.frame (Id17_pres) #percentage good amp: (sum(Id17_pres)/(nrow(vcf.df)*13))*100 #missing data by ind Id17.miss <- apply(Id17_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id17.miss.df <- as.data.frame(Id17.miss) Id17.miss.df.per <- (Id17.miss.df/nrow(Id17_sp_gen))*100 Id17.miss.df.per <- cbind(Row.Names = rownames(Id17.miss.df.per), Id17.miss.df.per) Id17.miss.df.per.ord <- Id17.miss.df.per[order(-Id17.miss),] Id17.name <- str_sub(Id17.miss.df.per.ord$Row.Names, start=1, end=-3) Id17.name.df <- as.data.frame(Id17.name) Id17.miss.df.per.ord <- Id17.miss.df.per.ord[,-1] Id17.ind <- cbind(Id17.name,Id17.miss.df.per.ord) Id17.ind.df <- as.data.frame(Id17.ind) #IdBase 20 Id20 <- subset(vcf.df, select = c(SLTRM1, SLTRM10, SLTRM11, SLTRM12, SLTRM13, SLTRM14, SLTRM15, SLTRM16, SLTRM17, SLTRM18, SLTRM19, SLTRM2, SLTRM22, SLTRM23, SLTRM24, SLTRM25, SLTRM26, SLTRM27, SLTRM28, SLTRM29, SLTRM3, SLTRM4, SLTRM5, SLTRM6, SLTRM7, SLTRM8, SLTRM9 )) Id20_sp <- cSplit(Id20,c("SLTRM1", "SLTRM10", "SLTRM11", "SLTRM12", "SLTRM13", "SLTRM14", "SLTRM15", "SLTRM16", "SLTRM17", "SLTRM18", "SLTRM19", "SLTRM2", "SLTRM22", "SLTRM23", "SLTRM24", "SLTRM25", "SLTRM26", "SLTRM27", "SLTRM28", "SLTRM29", "SLTRM3", "SLTRM4", "SLTRM5", "SLTRM6", "SLTRM7", "SLTRM8", "SLTRM9"), sep=":", type.convert=FALSE) Id20_sp_gen <- Id20_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105 )] Id20_miss <- apply(Id20_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id20_pres <- 27-Id20_miss Id20_pres.df <- as.data.frame (Id20_pres) #percentage good amp: (sum(Id20_pres)/(nrow(vcf.df)*27))*100 #missing data by ind Id20.miss <- apply(Id20_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id20.miss.df <- as.data.frame(Id20.miss) Id20.miss.df.per <- (Id20.miss.df/nrow(Id20_sp_gen))*100 Id20.miss.df.per <- cbind(Row.Names = rownames(Id20.miss.df.per), Id20.miss.df.per) Id20.miss.df.per.ord <- Id20.miss.df.per[order(-Id20.miss),] Id20.name <- str_sub(Id20.miss.df.per.ord$Row.Names, start=1, end=-3) Id20.name.df <- as.data.frame(Id20.name) Id20.miss.df.per.ord <- Id20.miss.df.per.ord[,-1] Id20.ind <- cbind(Id20.name,Id20.miss.df.per.ord) Id20.ind.df <- as.data.frame(Id20.ind) #IdBase 20 - excluding SLTRM10 SLTRM27 SLTRM28 which are digitata Id20 <- subset(vcf.df, select = c(SLTRM1, SLTRM11, SLTRM12, SLTRM13, SLTRM14, SLTRM15, SLTRM16, SLTRM17, SLTRM18, SLTRM19, SLTRM2, SLTRM22, SLTRM23, SLTRM24, SLTRM25, SLTRM26, SLTRM29, SLTRM3, SLTRM4, SLTRM5, SLTRM6, SLTRM7, SLTRM8, SLTRM9 )) Id20_sp <- cSplit(Id20,c("SLTRM1", "SLTRM11", "SLTRM12", "SLTRM13", "SLTRM14", "SLTRM15", "SLTRM16", "SLTRM17", "SLTRM18", "SLTRM19", "SLTRM2", "SLTRM22", "SLTRM23", "SLTRM24", "SLTRM25", "SLTRM26", "SLTRM29", "SLTRM3", "SLTRM4", "SLTRM5", "SLTRM6", "SLTRM7", "SLTRM8", "SLTRM9"), sep=":", type.convert=FALSE) Id20_sp_gen <- Id20_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93 )] Id20_miss <- apply(Id20_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id20_pres <- 24-Id20_miss Id20_pres.df <- as.data.frame (Id20_pres) #percentage good amp: (sum(Id20_pres)/(nrow(vcf.df)*24))*100 #missing data by ind Id20.miss <- apply(Id20_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id20.miss.df <- as.data.frame(Id20.miss) Id20.miss.df.per <- (Id20.miss.df/nrow(Id20_sp_gen))*100 Id20.miss.df.per <- cbind(Row.Names = rownames(Id20.miss.df.per), Id20.miss.df.per) Id20.miss.df.per.ord <- Id20.miss.df.per[order(-Id20.miss),] Id20.name <- str_sub(Id20.miss.df.per.ord$Row.Names, start=1, end=-3) Id20.name.df <- as.data.frame(Id20.name) Id20.miss.df.per.ord <- Id20.miss.df.per.ord[,-1] Id20.ind <- cbind(Id20.name,Id20.miss.df.per.ord) Id20.ind.df <- as.data.frame(Id20.ind) #IdBase 21 Id21 <- subset(vcf.df, select = c(SLPMI1, SLPMI10, SLPMI12, SLPMI13, SLPMI14, SLPMI2, SLPMI3, SLPMI4, SLPMI5, SLPMI6, SLPMI7, SLPMI8, SLPMI83, SLPMI84, SLPMI85, SLPMI86, SLPMI9 )) Id21_sp <- cSplit(Id21,c("SLPMI1", "SLPMI10", "SLPMI12", "SLPMI13", "SLPMI14", "SLPMI2", "SLPMI3", "SLPMI4", "SLPMI5", "SLPMI6", "SLPMI7", "SLPMI8", "SLPMI83", "SLPMI84", "SLPMI85", "SLPMI86", "SLPMI9"), sep=":", type.convert=FALSE) Id21_sp_gen <- Id21_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65 )] Id21_miss <- apply(Id21_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id21_pres <- 17-Id21_miss Id21_pres.df <- as.data.frame (Id21_pres) #percentage good amp: (sum(Id21_pres)/(nrow(vcf.df)*17))*100 #missing data by ind Id21.miss <- apply(Id21_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id21.miss.df <- as.data.frame(Id21.miss) Id21.miss.df.per <- (Id21.miss.df/nrow(Id21_sp_gen))*100 Id21.miss.df.per <- cbind(Row.Names = rownames(Id21.miss.df.per), Id21.miss.df.per) Id21.miss.df.per.ord <- Id21.miss.df.per[order(-Id21.miss),] Id21.name <- str_sub(Id21.miss.df.per.ord$Row.Names, start=1, end=-3) Id21.name.df <- as.data.frame(Id21.name) Id21.miss.df.per.ord <- Id21.miss.df.per.ord[,-1] Id21.ind <- cbind(Id21.name,Id21.miss.df.per.ord) Id21.ind.df <- as.data.frame(Id21.ind) #IdBase 22 Id22 <- subset(vcf.df, select = c(SLPMI15, SLPMI16, SLPMI17, SLPMI18, SLPMI19, SLPMI20, SLPMI21, SLPMI22, SLPMI23, SLPMI24, SLPMI26, SLPMI27, SLPMI28, SLPMI29, SLPMI30, SLPMI31, SLPMI32, SLPMI33, SLPMI35, SLPMI36, SLPMI37, SLPMI38, SLPMI39, SLPMI40, SLPMI42, SLPMI43, SLPMI44, SLPMI45, SLPMI46 )) Id22_sp <- cSplit(Id22,c("SLPMI15", "SLPMI16", "SLPMI17", "SLPMI18", "SLPMI19", "SLPMI20", "SLPMI21", "SLPMI22", "SLPMI23", "SLPMI24", "SLPMI26", "SLPMI27", "SLPMI28", "SLPMI29", "SLPMI30", "SLPMI31", "SLPMI32", "SLPMI33", "SLPMI35", "SLPMI36", "SLPMI37", "SLPMI38", "SLPMI39", "SLPMI40", "SLPMI42", "SLPMI43", "SLPMI44", "SLPMI45", "SLPMI46"), sep=":", type.convert=FALSE) Id22_sp_gen <- Id22_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113 )] Id22_miss <- apply(Id22_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id22_pres <- 29-Id22_miss Id22_pres.df <- as.data.frame (Id22_pres) #percentage good amp: (sum(Id22_pres)/(nrow(vcf.df)*29))*100 #missing data by ind Id22.miss <- apply(Id22_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id22.miss.df <- as.data.frame(Id22.miss) Id22.miss.df.per <- (Id22.miss.df/nrow(Id22_sp_gen))*100 Id22.miss.df.per <- cbind(Row.Names = rownames(Id22.miss.df.per), Id22.miss.df.per) Id22.miss.df.per.ord <- Id22.miss.df.per[order(-Id22.miss),] Id22.name <- str_sub(Id22.miss.df.per.ord$Row.Names, start=1, end=-3) Id22.name.df <- as.data.frame(Id22.name) Id22.miss.df.per.ord <- Id22.miss.df.per.ord[,-1] Id22.ind <- cbind(Id22.name,Id22.miss.df.per.ord) Id22.ind.df <- as.data.frame(Id22.ind) #IdBase 23 - with repeats Id23 <- subset(vcf.df, select = c(SLPDX1, SLPDX11, SLPDX12, SLPDX13, SLPDX14, SLPDX15, SLPDX16, SLPDX17, SLPDX18, SLPDX19, SLPDX2, SLPDX20, SLPDX20_2, SLPDX20_3, SLPDX20_4, SLPDX20_5, SLPDX20_6.1, SLPDX20_6, SLPDX20_7, SLPDX20_8.1, SLPDX20_8, SLPDX21, SLPDX22, SLPDX3, SLPDX4, SLPDX5, SLPDX6, SLPDX8 )) Id23_sp <- cSplit(Id23,c("SLPDX1", "SLPDX11", "SLPDX12", "SLPDX13", "SLPDX14", "SLPDX15", "SLPDX16", "SLPDX17", "SLPDX18", "SLPDX19", "SLPDX2", "SLPDX20", "SLPDX20_2", "SLPDX20_3", "SLPDX20_4", "SLPDX20_5", "SLPDX20_6.1", "SLPDX20_6", "SLPDX20_7", "SLPDX20_8.1", "SLPDX20_8", "SLPDX21", "SLPDX22", "SLPDX3", "SLPDX4", "SLPDX5", "SLPDX6", "SLPDX8"), sep=":", type.convert=FALSE) Id23_sp_gen <- Id23_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109 )] Id23_miss <- apply(Id23_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id23_pres <- 28-Id23_miss Id23_pres.df <- as.data.frame (Id23_pres) #percentage good amp: (sum(Id23_pres)/(nrow(vcf.df)*28))*100 #missing data by ind Id23.miss <- apply(Id23_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id23.miss.df <- as.data.frame(Id23.miss) Id23.miss.df.per <- (Id23.miss.df/nrow(Id23_sp_gen))*100 Id23.miss.df.per <- cbind(Row.Names = rownames(Id23.miss.df.per), Id23.miss.df.per) Id23.miss.df.per.ord <- Id23.miss.df.per[order(-Id23.miss),] Id23.name <- str_sub(Id23.miss.df.per.ord$Row.Names, start=1, end=-3) Id23.name.df <- as.data.frame(Id23.name) Id23.miss.df.per.ord <- Id23.miss.df.per.ord[,-1] Id23.ind <- cbind(Id23.name,Id23.miss.df.per.ord) Id23.ind.df <- as.data.frame(Id23.ind) #IdBase 23 - with the best repeat (least missing data) Id23BestRep <- subset(vcf.df, select = c(SLPDX1, SLPDX11, SLPDX12, SLPDX13, SLPDX14, SLPDX15, SLPDX16, SLPDX17, SLPDX18, SLPDX19, SLPDX2, SLPDX20_8.1, SLPDX21, SLPDX22, SLPDX3, SLPDX4, SLPDX5, SLPDX6, SLPDX8 )) Id23BestRep_sp <- cSplit(Id23BestRep,c("SLPDX1", "SLPDX11", "SLPDX12", "SLPDX13", "SLPDX14", "SLPDX15", "SLPDX16", "SLPDX17", "SLPDX18", "SLPDX19", "SLPDX2", "SLPDX20_8.1", "SLPDX21", "SLPDX22", "SLPDX3", "SLPDX4", "SLPDX5", "SLPDX6", "SLPDX8"), sep=":", type.convert=FALSE) Id23BestRep_sp_gen <- Id23BestRep_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73 )] Id23BestRep_miss <- apply(Id23BestRep_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id23BestRep_pres <- 19-Id23BestRep_miss Id23BestRep_pres.df <- as.data.frame (Id23BestRep_pres) #percentage good amp: (sum(Id23BestRep_pres)/(nrow(vcf.df)*19))*100 #missing data by ind Id23BestRep.miss <- apply(Id23BestRep_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id23BestRep.miss.df <- as.data.frame(Id23BestRep.miss) Id23BestRep.miss.df.per <- (Id23BestRep.miss.df/nrow(Id23BestRep_sp_gen))*100 Id23BestRep.miss.df.per <- cbind(Row.Names = rownames(Id23BestRep.miss.df.per), Id23BestRep.miss.df.per) Id23BestRep.miss.df.per.ord <- Id23BestRep.miss.df.per[order(-Id23BestRep.miss),] Id23BestRep.name <- str_sub(Id23BestRep.miss.df.per.ord$Row.Names, start=1, end=-3) Id23BestRep.name.df <- as.data.frame(Id23BestRep.name) Id23BestRep.miss.df.per.ord <- Id23BestRep.miss.df.per.ord[,-1] Id23BestRep.ind <- cbind(Id23BestRep.name,Id23BestRep.miss.df.per.ord) Id23BestRep.ind.df <- as.data.frame(Id23BestRep.ind) #IdBase 24 Id24 <- subset(vcf.df, select = c(SLPDX26, SLPDX28, SLPDX29, SLPDX30, SLPDX31, SLPDX32, SLPDX33, SLPDX34, SLPDX35 )) Id24_sp <- cSplit(Id24,c("SLPDX26", "SLPDX28", "SLPDX29", "SLPDX30", "SLPDX31", "SLPDX32", "SLPDX33", "SLPDX34", "SLPDX35"), sep=":", type.convert=FALSE) Id24_sp_gen <- Id24_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33 )] Id24_miss <- apply(Id24_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id24_pres <- 9-Id24_miss Id24_pres.df <- as.data.frame (Id24_pres) #percentage good amp: (sum(Id24_pres)/(nrow(vcf.df)*9))*100 #missing data by ind Id24.miss <- apply(Id24_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id24.miss.df <- as.data.frame(Id24.miss) Id24.miss.df.per <- (Id24.miss.df/nrow(Id24_sp_gen))*100 Id24.miss.df.per <- cbind(Row.Names = rownames(Id24.miss.df.per), Id24.miss.df.per) Id24.miss.df.per.ord <- Id24.miss.df.per[order(-Id24.miss),] Id24.name <- str_sub(Id24.miss.df.per.ord$Row.Names, start=1, end=-3) Id24.name.df <- as.data.frame(Id24.name) Id24.miss.df.per.ord <- Id24.miss.df.per.ord[,-1] Id24.ind <- cbind(Id24.name,Id24.miss.df.per.ord) Id24.ind.df <- as.data.frame(Id24.ind) #IdBase 25 Id25 <- subset(vcf.df, select = c(SLBIZ10, SLBIZ11, SLBIZ12, SLBIZ13, SLBIZ14, SLBIZ15, SLBIZ16, SLBIZ17, SLBIZ19, SLBIZ2, SLBIZ20, SLBIZ21, SLBIZ22, SLBIZ23, SLBIZ24, SLBIZ25, SLBIZ26, SLBIZ27, SLBIZ28, SLBIZ29, SLBIZ30, SLBIZ31, SLBIZ32, SLBIZ4, SLBIZ5, SLBIZ6, SLBIZ7, SLBIZ8, SLBIZ9 )) Id25_sp <- cSplit(Id25,c("SLBIZ10", "SLBIZ11", "SLBIZ12", "SLBIZ13", "SLBIZ14", "SLBIZ15", "SLBIZ16", "SLBIZ17", "SLBIZ19", "SLBIZ2", "SLBIZ20", "SLBIZ21", "SLBIZ22", "SLBIZ23", "SLBIZ24", "SLBIZ25", "SLBIZ26", "SLBIZ27", "SLBIZ28", "SLBIZ29", "SLBIZ30", "SLBIZ31", "SLBIZ32", "SLBIZ4", "SLBIZ5", "SLBIZ6", "SLBIZ7", "SLBIZ8", "SLBIZ9"), sep=":", type.convert=FALSE) Id25_sp_gen <- Id25_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113 )] Id25_miss <- apply(Id25_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id25_pres <- 29-Id25_miss Id25_pres.df <- as.data.frame (Id25_pres) #percentage good amp: (sum(Id25_pres)/(nrow(vcf.df)*29))*100 #missing data by ind Id25.miss <- apply(Id25_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id25.miss.df <- as.data.frame(Id25.miss) Id25.miss.df.per <- (Id25.miss.df/nrow(Id25_sp_gen))*100 Id25.miss.df.per <- cbind(Row.Names = rownames(Id25.miss.df.per), Id25.miss.df.per) Id25.miss.df.per.ord <- Id25.miss.df.per[order(-Id25.miss),] Id25.name <- str_sub(Id25.miss.df.per.ord$Row.Names, start=1, end=-3) Id25.name.df <- as.data.frame(Id25.name) Id25.miss.df.per.ord <- Id25.miss.df.per.ord[,-1] Id25.ind <- cbind(Id25.name,Id25.miss.df.per.ord) Id25.ind.df <- as.data.frame(Id25.ind) #IdBase 26 Id26 <- subset(vcf.df, select = c(SLPER57, SLPER58, SLPER59, SLPER60, SLPER61, SLPER62, SLPER63, SLPER64, SLPER65, SLPER66, SLPER67, SLPER68, SLPER69, SLPER70, SLPER71, SLPER72, SLPER73, SLPER74, SLPER75, SLPER76, SLPER77, SLPER78, SLPER79, SLPER80, SLPER81, SLPER82, SLPER83, SLPER84, SLPER85, SLPER86 )) Id26_sp <- cSplit(Id26,c("SLPER57", "SLPER58", "SLPER59", "SLPER60", "SLPER61", "SLPER62", "SLPER63", "SLPER64", "SLPER65", "SLPER66", "SLPER67", "SLPER68", "SLPER69", "SLPER70", "SLPER71", "SLPER72", "SLPER73", "SLPER74", "SLPER75", "SLPER76", "SLPER77", "SLPER78", "SLPER79", "SLPER80", "SLPER81", "SLPER82", "SLPER83", "SLPER84", "SLPER85", "SLPER86"), sep=":", type.convert=FALSE) Id26_sp_gen <- Id26_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117 )] Id26_miss <- apply(Id26_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id26_pres <- 30-Id26_miss Id26_pres.df <- as.data.frame (Id26_pres) #percentage good amp: (sum(Id26_pres)/(nrow(vcf.df)*30))*100 #missing data by ind Id26.miss <- apply(Id26_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id26.miss.df <- as.data.frame(Id26.miss) Id26.miss.df.per <- (Id26.miss.df/nrow(Id26_sp_gen))*100 Id26.miss.df.per <- cbind(Row.Names = rownames(Id26.miss.df.per), Id26.miss.df.per) Id26.miss.df.per.ord <- Id26.miss.df.per[order(-Id26.miss),] Id26.name <- str_sub(Id26.miss.df.per.ord$Row.Names, start=1, end=-3) Id26.name.df <- as.data.frame(Id26.name) Id26.miss.df.per.ord <- Id26.miss.df.per.ord[,-1] Id26.ind <- cbind(Id26.name,Id26.miss.df.per.ord) Id26.ind.df <- as.data.frame(Id26.ind) #IdBase 27 Id27 <- subset(vcf.df, select = c(SLSTA1, SLSTA10, SLSTA11, SLSTA12, SLSTA13, SLSTA14, SLSTA15, SLSTA16, SLSTA17, SLSTA18, SLSTA19, SLSTA2, SLSTA20, SLSTA21, SLSTA22, SLSTA23, SLSTA24, SLSTA25, SLSTA26, SLSTA27, SLSTA28, SLSTA29, SLSTA3, SLSTA30, SLSTA4, SLSTA5, SLSTA6, SLSTA7, SLSTA8, SLSTA9 )) Id27_sp <- cSplit(Id27,c("SLSTA1", "SLSTA10", "SLSTA11", "SLSTA12", "SLSTA13", "SLSTA14", "SLSTA15", "SLSTA16", "SLSTA17", "SLSTA18", "SLSTA19", "SLSTA2", "SLSTA20", "SLSTA21", "SLSTA22", "SLSTA23", "SLSTA24", "SLSTA25", "SLSTA26", "SLSTA27", "SLSTA28", "SLSTA29", "SLSTA3", "SLSTA30", "SLSTA4", "SLSTA5", "SLSTA6", "SLSTA7", "SLSTA8", "SLSTA9"), sep=":", type.convert=FALSE) Id27_sp_gen <- Id27_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117 )] Id27_miss <- apply(Id27_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id27_pres <- 30-Id27_miss Id27_pres.df <- as.data.frame (Id27_pres) #percentage good amp: (sum(Id27_pres)/(nrow(vcf.df)*30))*100 #missing data by ind Id27.miss <- apply(Id27_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id27.miss.df <- as.data.frame(Id27.miss) Id27.miss.df.per <- (Id27.miss.df/nrow(Id27_sp_gen))*100 Id27.miss.df.per <- cbind(Row.Names = rownames(Id27.miss.df.per), Id27.miss.df.per) Id27.miss.df.per.ord <- Id27.miss.df.per[order(-Id27.miss),] Id27.name <- str_sub(Id27.miss.df.per.ord$Row.Names, start=1, end=-3) Id27.name.df <- as.data.frame(Id27.name) Id27.miss.df.per.ord <- Id27.miss.df.per.ord[,-1] Id27.ind <- cbind(Id27.name,Id27.miss.df.per.ord) Id27.ind.df <- as.data.frame(Id27.ind) #IdBase 28 Id28 <- subset(vcf.df, select = c(SLHEF1, SLHEF2, SLHEF3, SLHEF4, SLHEF5, SLHEF6, SLHEF7, SLHEF8 )) Id28_sp <- cSplit(Id28,c("SLHEF1", "SLHEF2", "SLHEF3", "SLHEF4", "SLHEF5", "SLHEF6", "SLHEF7", "SLHEF8"), sep=":", type.convert=FALSE) Id28_sp_gen <- Id28_sp[, c(1, 5, 9, 13, 17, 21, 25, 29 )] Id28_miss <- apply(Id28_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id28_pres <- 8-Id28_miss Id28_pres.df <- as.data.frame (Id28_pres) #percentage good amp: (sum(Id28_pres)/(nrow(vcf.df)*8))*100 #missing data by ind Id28.miss <- apply(Id28_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id28.miss.df <- as.data.frame(Id28.miss) Id28.miss.df.per <- (Id28.miss.df/nrow(Id28_sp_gen))*100 Id28.miss.df.per <- cbind(Row.Names = rownames(Id28.miss.df.per), Id28.miss.df.per) Id28.miss.df.per.ord <- Id28.miss.df.per[order(-Id28.miss),] Id28.name <- str_sub(Id28.miss.df.per.ord$Row.Names, start=1, end=-3) Id28.name.df <- as.data.frame(Id28.name) Id28.miss.df.per.ord <- Id28.miss.df.per.ord[,-1] Id28.ind <- cbind(Id28.name,Id28.miss.df.per.ord) Id28.ind.df <- as.data.frame(Id28.ind) #IdBase 29 Id29 <- subset(vcf.df, select = c(SLHEF10, SLHEF11, SLHEF12, SLHEF13, SLHEF14, SLHEF15, SLHEF16, SLHEF17, SLHEF18, SLHEF19, SLHEF20, SLHEF21, SLHEF22, SLHEF9 )) Id29_sp <- cSplit(Id29,c("SLHEF10", "SLHEF11", "SLHEF12", "SLHEF13", "SLHEF14", "SLHEF15", "SLHEF16", "SLHEF17", "SLHEF18", "SLHEF19", "SLHEF20", "SLHEF21", "SLHEF22", "SLHEF9"), sep=":", type.convert=FALSE) Id29_sp_gen <- Id29_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53 )] Id29_miss <- apply(Id29_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id29_pres <- 14-Id29_miss Id29_pres.df <- as.data.frame (Id29_pres) #percentage good amp: (sum(Id29_pres)/(nrow(vcf.df)*14))*100 #missing data by ind Id29.miss <- apply(Id29_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id29.miss.df <- as.data.frame(Id29.miss) Id29.miss.df.per <- (Id29.miss.df/nrow(Id29_sp_gen))*100 Id29.miss.df.per <- cbind(Row.Names = rownames(Id29.miss.df.per), Id29.miss.df.per) Id29.miss.df.per.ord <- Id29.miss.df.per[order(-Id29.miss),] Id29.name <- str_sub(Id29.miss.df.per.ord$Row.Names, start=1, end=-3) Id29.name.df <- as.data.frame(Id29.name) Id29.miss.df.per.ord <- Id29.miss.df.per.ord[,-1] Id29.ind <- cbind(Id29.name,Id29.miss.df.per.ord) Id29.ind.df <- as.data.frame(Id29.ind) #IdBase 30 - with repeats Id30 <- subset(vcf.df, select = c(SLPDK1, SLPDK10, SLPDK11, SLPDK12, SLPDK13, SLPDK14, SLPDK15, SLPDK16, SLPDK16_2, SLPDK16_3, SLPDK16_4, SLPDK16_5, SLPDK16_6.1, SLPDK16_6, SLPDK16_7, SLPDK16_8.1, SLPDK16_8, SLPDK17, SLPDK18, SLPDK19, SLPDK2, SLPDK20, SLPDK21, SLPDK22, SLPDK23, SLPDK24, SLPDK25, SLPDK26, SLPDK27, SLPDK28, SLPDK29, SLPDK3, SLPDK30, SLPDK4, SLPDK5, SLPDK6, SLPDK6_2, SLPDK6_3, SLPDK6_4, SLPDK6_5, SLPDK6_6.1, SLPDK6_6, SLPDK6_7, SLPDK6_8.1, SLPDK6_8, SLPDK7, SLPDK8, SLPDK9)) Id30_sp <- cSplit(Id30,c("SLPDK1", "SLPDK10", "SLPDK11", "SLPDK12", "SLPDK13", "SLPDK14", "SLPDK15", "SLPDK16", "SLPDK16_2", "SLPDK16_3", "SLPDK16_4", "SLPDK16_5", "SLPDK16_6.1", "SLPDK16_6", "SLPDK16_7", "SLPDK16_8.1", "SLPDK16_8", "SLPDK17", "SLPDK18", "SLPDK19", "SLPDK2", "SLPDK20", "SLPDK21", "SLPDK22", "SLPDK23", "SLPDK24", "SLPDK25", "SLPDK26", "SLPDK27", "SLPDK28", "SLPDK29", "SLPDK3", "SLPDK30", "SLPDK4", "SLPDK5", "SLPDK6", "SLPDK6_2", "SLPDK6_3", "SLPDK6_4", "SLPDK6_5", "SLPDK6_6.1", "SLPDK6_6", "SLPDK6_7", "SLPDK6_8.1", "SLPDK6_8", "SLPDK7", "SLPDK8", "SLPDK9"), sep=":", type.convert=FALSE) Id30_sp_gen <- Id30_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117,121,125,129,133,137,141,145,149,153,157,161,165,169,173,177,181, 185,189 )] Id30_miss <- apply(Id30_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id30_pres <- 48-Id30_miss Id30_pres.df <- as.data.frame (Id30_pres) #percentage good amp: (sum(Id30_pres)/(nrow(vcf.df)*48))*100 #missing data by ind Id30.miss <- apply(Id30_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id30.miss.df <- as.data.frame(Id30.miss) Id30.miss.df.per <- (Id30.miss.df/nrow(Id30_sp_gen))*100 Id30.miss.df.per <- cbind(Row.Names = rownames(Id30.miss.df.per), Id30.miss.df.per) Id30.miss.df.per.ord <- Id30.miss.df.per[order(-Id30.miss),] Id30.name <- str_sub(Id30.miss.df.per.ord$Row.Names, start=1, end=-3) Id30.name.df <- as.data.frame(Id30.name) Id30.miss.df.per.ord <- Id30.miss.df.per.ord[,-1] Id30.ind <- cbind(Id30.name,Id30.miss.df.per.ord) Id30.ind.df <- as.data.frame(Id30.ind) #IdBase 30 - with the best repeat (least missing data) Id30BestRep <- subset(vcf.df, select = c(SLPDK1, SLPDK10, SLPDK11, SLPDK12, SLPDK13, SLPDK14, SLPDK15, SLPDK16_4, SLPDK17, SLPDK18, SLPDK19, SLPDK2, SLPDK20, SLPDK21, SLPDK22, SLPDK23, SLPDK24, SLPDK25, SLPDK26, SLPDK27, SLPDK28, SLPDK29, SLPDK3, SLPDK30, SLPDK4, SLPDK5, SLPDK6_7, SLPDK7, SLPDK8, SLPDK9)) Id30BestRep_sp <- cSplit(Id30BestRep,c("SLPDK1", "SLPDK10", "SLPDK11", "SLPDK12", "SLPDK13", "SLPDK14", "SLPDK15", "SLPDK16_4", "SLPDK17", "SLPDK18", "SLPDK19", "SLPDK2", "SLPDK20", "SLPDK21", "SLPDK22", "SLPDK23", "SLPDK24", "SLPDK25", "SLPDK26", "SLPDK27", "SLPDK28", "SLPDK29", "SLPDK3", "SLPDK30", "SLPDK4", "SLPDK5", "SLPDK6_7", "SLPDK7", "SLPDK8", "SLPDK9"), sep=":", type.convert=FALSE) Id30BestRep_sp_gen <- Id30BestRep_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117 )] Id30BestRep_miss <- apply(Id30BestRep_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id30BestRep_pres <- 30-Id30BestRep_miss Id30BestRep_pres.df <- as.data.frame (Id30BestRep_pres) #percentage good amp: (sum(Id30BestRep_pres)/(nrow(vcf.df)*30))*100 #missing data by ind Id30BestRep.miss <- apply(Id30BestRep_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id30BestRep.miss.df <- as.data.frame(Id30BestRep.miss) Id30BestRep.miss.df.per <- (Id30BestRep.miss.df/nrow(Id30BestRep_sp_gen))*100 Id30BestRep.miss.df.per <- cbind(Row.Names = rownames(Id30BestRep.miss.df.per), Id30BestRep.miss.df.per) Id30BestRep.miss.df.per.ord <- Id30BestRep.miss.df.per[order(-Id30BestRep.miss),] Id30BestRep.name <- str_sub(Id30BestRep.miss.df.per.ord$Row.Names, start=1, end=-3) Id30BestRep.name.df <- as.data.frame(Id30BestRep.name) Id30BestRep.miss.df.per.ord <- Id30BestRep.miss.df.per.ord[,-1] Id30BestRep.ind <- cbind(Id30BestRep.name,Id30BestRep.miss.df.per.ord) Id30BestRep.ind.df <- as.data.frame(Id30BestRep.ind) #IdBase 31 Id31 <- subset(vcf.df, select = c(SLCAL1, SLCAL10, SLCAL2, SLCAL3, SLCAL4, SLCAL5, SLCAL6, SLCAL7, SLCAL8, SLCAL9 )) Id31_sp <- cSplit(Id31,c("SLCAL1", "SLCAL10", "SLCAL2", "SLCAL3", "SLCAL4", "SLCAL5", "SLCAL6", "SLCAL7", "SLCAL8", "SLCAL9"), sep=":", type.convert=FALSE) Id31_sp_gen <- Id31_sp[, c(1, 5, 9, 13, 17, 21, 25, 29,33,37 )] Id31_miss <- apply(Id31_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id31_pres <- 10-Id31_miss Id31_pres.df <- as.data.frame (Id31_pres) #percentage good amp: (sum(Id31_pres)/(nrow(vcf.df)*10))*100 #missing data by ind Id31.miss <- apply(Id31_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id31.miss.df <- as.data.frame(Id31.miss) Id31.miss.df.per <- (Id31.miss.df/nrow(Id31_sp_gen))*100 Id31.miss.df.per <- cbind(Row.Names = rownames(Id31.miss.df.per), Id31.miss.df.per) Id31.miss.df.per.ord <- Id31.miss.df.per[order(-Id31.miss),] Id31.name <- str_sub(Id31.miss.df.per.ord$Row.Names, start=1, end=-3) Id31.name.df <- as.data.frame(Id31.name) Id31.miss.df.per.ord <- Id31.miss.df.per.ord[,-1] Id31.ind <- cbind(Id31.name,Id31.miss.df.per.ord) Id31.ind.df <- as.data.frame(Id31.ind) #IdBase 32 Id32 <- subset(vcf.df, select = c(SLPER102, SLPER103, SLPER108, SLPER109, SLPER110, SLPER111, SLPER112, SLPER115, SLPER117, SLPER118, SLPER120, SLPER92, SLPER93, SLPER94_6, SLPER94_8, SLPER96, SLPER98, SLPER99 )) Id32_sp <- cSplit(Id32,c("SLPER102", "SLPER103", "SLPER108", "SLPER109", "SLPER110", "SLPER111", "SLPER112", "SLPER115", "SLPER117", "SLPER118", "SLPER120", "SLPER92", "SLPER93", "SLPER94_6", "SLPER94_8", "SLPER96", "SLPER98", "SLPER99"), sep=":", type.convert=FALSE) Id32_sp_gen <- Id32_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69 )] Id32_miss <- apply(Id32_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id32_pres <- 18-Id32_miss Id32_pres.df <- as.data.frame (Id32_pres) #percentage good amp: (sum(Id32_pres)/(nrow(vcf.df)*18))*100 #missing data by ind Id32.miss <- apply(Id32_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id32.miss.df <- as.data.frame(Id32.miss) Id32.miss.df.per <- (Id32.miss.df/nrow(Id32_sp_gen))*100 Id32.miss.df.per <- cbind(Row.Names = rownames(Id32.miss.df.per), Id32.miss.df.per) Id32.miss.df.per.ord <- Id32.miss.df.per[order(-Id32.miss),] Id32.name <- str_sub(Id32.miss.df.per.ord$Row.Names, start=1, end=-3) Id32.name.df <- as.data.frame(Id32.name) Id32.miss.df.per.ord <- Id32.miss.df.per.ord[,-1] Id32.ind <- cbind(Id32.name,Id32.miss.df.per.ord) Id32.ind.df <- as.data.frame(Id32.ind) #IdBase 32 - with the best repeat (least missing data) Id32BestRep <- subset(vcf.df, select = c(SLPER102, SLPER103, SLPER108, SLPER109, SLPER110, SLPER111, SLPER112, SLPER115, SLPER117, SLPER118, SLPER120, SLPER92, SLPER93, SLPER94_8, SLPER96, SLPER98, SLPER99 )) Id32BestRep_sp <- cSplit(Id32BestRep,c("SLPER102", "SLPER103", "SLPER108", "SLPER109", "SLPER110", "SLPER111", "SLPER112", "SLPER115", "SLPER117", "SLPER118", "SLPER120", "SLPER92", "SLPER93", "SLPER94_8", "SLPER96", "SLPER98", "SLPER99"), sep=":", type.convert=FALSE) Id32BestRep_sp_gen <- Id32BestRep_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65 )] Id32BestRep_miss <- apply(Id32BestRep_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id32BestRep_pres <- 17-Id32BestRep_miss Id32BestRep_pres.df <- as.data.frame (Id32BestRep_pres) #percentage good amp: (sum(Id32BestRep_pres)/(nrow(vcf.df)*17))*100 #missing data by ind Id32BestRep.miss <- apply(Id32BestRep_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id32BestRep.miss.df <- as.data.frame(Id32BestRep.miss) Id32BestRep.miss.df.per <- (Id32BestRep.miss.df/nrow(Id32BestRep_sp_gen))*100 Id32BestRep.miss.df.per <- cbind(Row.Names = rownames(Id32BestRep.miss.df.per), Id32BestRep.miss.df.per) Id32BestRep.miss.df.per.ord <- Id32BestRep.miss.df.per[order(-Id32BestRep.miss),] Id32BestRep.name <- str_sub(Id32BestRep.miss.df.per.ord$Row.Names, start=1, end=-3) Id32BestRep.name.df <- as.data.frame(Id32BestRep.name) Id32BestRep.miss.df.per.ord <- Id32BestRep.miss.df.per.ord[,-1] Id32BestRep.ind <- cbind(Id32BestRep.name,Id32BestRep.miss.df.per.ord) Id32BestRep.ind.df <- as.data.frame(Id32BestRep.ind) #IdBase 34 Id34 <- subset(vcf.df, select = c(SLNYA24, SLNYA25, SLNYA26, SLNYA27, SLNYA28, SLNYA29, SLNYA30, SLNYA31, SLNYA32, SLNYA33, SLNYA34)) Id34_sp <- cSplit(Id34,c("SLNYA24", "SLNYA25", "SLNYA26", "SLNYA27", "SLNYA28", "SLNYA29", "SLNYA30", "SLNYA31", "SLNYA32", "SLNYA33", "SLNYA34"), sep=":", type.convert=FALSE) Id34_sp_gen <- Id34_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41 )] Id34_miss <- apply(Id34_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id34_pres <- 11-Id34_miss Id34_pres.df <- as.data.frame (Id34_pres) #percentage good amp: (sum(Id34_pres)/(nrow(vcf.df)*11))*100 #missing data by ind Id34.miss <- apply(Id34_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id34.miss.df <- as.data.frame(Id34.miss) Id34.miss.df.per <- (Id34.miss.df/nrow(Id34_sp_gen))*100 Id34.miss.df.per <- cbind(Row.Names = rownames(Id34.miss.df.per), Id34.miss.df.per) Id34.miss.df.per.ord <- Id34.miss.df.per[order(-Id34.miss),] Id34.name <- str_sub(Id34.miss.df.per.ord$Row.Names, start=1, end=-3) Id34.name.df <- as.data.frame(Id34.name) Id34.miss.df.per.ord <- Id34.miss.df.per.ord[,-1] Id34.ind <- cbind(Id34.name,Id34.miss.df.per.ord) Id34.ind.df <- as.data.frame(Id34.ind) #IdBase 35 Id35 <- subset(vcf.df, select = c(SLBRA10, SLBRA11_4, SLBRA11_8, SLBRA12, SLBRA13, SLBRA15, SLBRA16)) Id35_sp <- cSplit(Id35,c("SLBRA10", "SLBRA11_4", "SLBRA11_8", "SLBRA12", "SLBRA13", "SLBRA15", "SLBRA16"), sep=":", type.convert=FALSE) Id35_sp_gen <- Id35_sp[, c(1, 5, 9, 13, 17, 21, 25 )] Id35_miss <- apply(Id35_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id35_pres <- 7-Id35_miss Id35_pres.df <- as.data.frame (Id35_pres) #percentage good amp: (sum(Id35_pres)/(nrow(vcf.df)*7))*100 #missing data by ind Id35.miss <- apply(Id35_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id35.miss.df <- as.data.frame(Id35.miss) Id35.miss.df.per <- (Id35.miss.df/nrow(Id35_sp_gen))*100 Id35.miss.df.per <- cbind(Row.Names = rownames(Id35.miss.df.per), Id35.miss.df.per) Id35.miss.df.per.ord <- Id35.miss.df.per[order(-Id35.miss),] Id35.name <- str_sub(Id35.miss.df.per.ord$Row.Names, start=1, end=-3) Id35.name.df <- as.data.frame(Id35.name) Id35.miss.df.per.ord <- Id35.miss.df.per.ord[,-1] Id35.ind <- cbind(Id35.name,Id35.miss.df.per.ord) Id35.ind.df <- as.data.frame(Id35.ind) #IdBase 35 - with the best repeat (least missing data) Id35BestRep <- subset(vcf.df, select = c(SLBRA10, SLBRA11_4, SLBRA12, SLBRA13, SLBRA15, SLBRA16)) Id35BestRep_sp <- cSplit(Id35BestRep,c("SLBRA10", "SLBRA11_4", "SLBRA12", "SLBRA13", "SLBRA15", "SLBRA16"), sep=":", type.convert=FALSE) Id35BestRep_sp_gen <- Id35BestRep_sp[, c(1, 5, 9, 13, 17, 21 )] Id35BestRep_miss <- apply(Id35BestRep_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id35BestRep_pres <- 6-Id35BestRep_miss Id35BestRep_pres.df <- as.data.frame (Id35BestRep_pres) #percentage good amp: (sum(Id35BestRep_pres)/(nrow(vcf.df)*6))*100 #missing data by ind Id35BestRep.miss <- apply(Id35BestRep_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id35BestRep.miss.df <- as.data.frame(Id35BestRep.miss) Id35BestRep.miss.df.per <- (Id35BestRep.miss.df/nrow(Id35BestRep_sp_gen))*100 Id35BestRep.miss.df.per <- cbind(Row.Names = rownames(Id35BestRep.miss.df.per), Id35BestRep.miss.df.per) Id35BestRep.miss.df.per.ord <- Id35BestRep.miss.df.per[order(-Id35BestRep.miss),] Id35BestRep.name <- str_sub(Id35BestRep.miss.df.per.ord$Row.Names, start=1, end=-3) Id35BestRep.name.df <- as.data.frame(Id35BestRep.name) Id35BestRep.miss.df.per.ord <- Id35BestRep.miss.df.per.ord[,-1] Id35BestRep.ind <- cbind(Id35BestRep.name,Id35BestRep.miss.df.per.ord) Id35BestRep.ind.df <- as.data.frame(Id35BestRep.ind) #IdBase 36 Id36 <- subset(vcf.df, select = c(SLARC68, SLARC69, SLARC70, SLARC72, SLARC73, SLARC75, SLARC76, SLARC77, SLARC78, SLARC79, SLARC80, SLARC81, SLARC82, SLARC84, SLARC85, SLARC87, SLARC88, SLARC89, SLARC90, SLARC91, SLARC92, SLARC93 )) Id36_sp <- cSplit(Id36,c("SLARC68", "SLARC69", "SLARC70", "SLARC72", "SLARC73", "SLARC75", "SLARC76", "SLARC77", "SLARC78", "SLARC79", "SLARC80", "SLARC81", "SLARC82", "SLARC84", "SLARC85", "SLARC87", "SLARC88", "SLARC89", "SLARC90", "SLARC91", "SLARC92", "SLARC93"), sep=":", type.convert=FALSE) Id36_sp_gen <- Id36_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85 )] Id36_miss <- apply(Id36_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id36_pres <- 22-Id36_miss Id36_pres.df <- as.data.frame (Id36_pres) #percentage good amp: (sum(Id36_pres)/(nrow(vcf.df)*22))*100 #missing data by ind Id36.miss <- apply(Id36_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id36.miss.df <- as.data.frame(Id36.miss) Id36.miss.df.per <- (Id36.miss.df/nrow(Id36_sp_gen))*100 Id36.miss.df.per <- cbind(Row.Names = rownames(Id36.miss.df.per), Id36.miss.df.per) Id36.miss.df.per.ord <- Id36.miss.df.per[order(-Id36.miss),] Id36.name <- str_sub(Id36.miss.df.per.ord$Row.Names, start=1, end=-3) Id36.name.df <- as.data.frame(Id36.name) Id36.miss.df.per.ord <- Id36.miss.df.per.ord[,-1] Id36.ind <- cbind(Id36.name,Id36.miss.df.per.ord) Id36.ind.df <- as.data.frame(Id36.ind) #IdBase 37 Id37 <- subset(vcf.df, select = c(SLSTG1, SLSTG11, SLSTG12, SLSTG13, SLSTG14, SLSTG15, SLSTG16, SLSTG2, SLSTG28, SLSTG3, SLSTG31, SLSTG32, SLSTG4, SLSTG5, SLSTG7, SLSTG8)) Id37_sp <- cSplit(Id37,c("SLSTG1", "SLSTG11", "SLSTG12", "SLSTG13", "SLSTG14", "SLSTG15", "SLSTG16", "SLSTG2", "SLSTG28", "SLSTG3", "SLSTG31", "SLSTG32", "SLSTG4", "SLSTG5", "SLSTG7", "SLSTG8"), sep=":", type.convert=FALSE) Id37_sp_gen <- Id37_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61 )] Id37_miss <- apply(Id37_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id37_pres <- 16-Id37_miss Id37_pres.df <- as.data.frame (Id37_pres) #percentage good amp: (sum(Id37_pres)/(nrow(vcf.df)*16))*100 #missing data by ind Id37.miss <- apply(Id37_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id37.miss.df <- as.data.frame(Id37.miss) Id37.miss.df.per <- (Id37.miss.df/nrow(Id37_sp_gen))*100 Id37.miss.df.per <- cbind(Row.Names = rownames(Id37.miss.df.per), Id37.miss.df.per) Id37.miss.df.per.ord <- Id37.miss.df.per[order(-Id37.miss),] Id37.name <- str_sub(Id37.miss.df.per.ord$Row.Names, start=1, end=-3) Id37.name.df <- as.data.frame(Id37.name) Id37.miss.df.per.ord <- Id37.miss.df.per.ord[,-1] Id37.ind <- cbind(Id37.name,Id37.miss.df.per.ord) Id37.ind.df <- as.data.frame(Id37.ind) #IdBase 38 Id38 <- subset(vcf.df, select = c(SLPDR1, SLPDR11, SLPDR12, SLPDR13, SLPDR23, SLPDR3, SLPDR6, SLPDR7)) Id38_sp <- cSplit(Id38,c("SLPDR1", "SLPDR11", "SLPDR12", "SLPDR13", "SLPDR23", "SLPDR3", "SLPDR6", "SLPDR7"), sep=":", type.convert=FALSE) Id38_sp_gen <- Id38_sp[, c(1, 5, 9, 13, 17, 21, 25, 29 )] Id38_miss <- apply(Id38_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id38_pres <- 8-Id38_miss Id38_pres.df <- as.data.frame (Id38_pres) #percentage good amp: (sum(Id38_pres)/(nrow(vcf.df)*8))*100 #missing data by ind Id38.miss <- apply(Id38_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id38.miss.df <- as.data.frame(Id38.miss) Id38.miss.df.per <- (Id38.miss.df/nrow(Id38_sp_gen))*100 Id38.miss.df.per <- cbind(Row.Names = rownames(Id38.miss.df.per), Id38.miss.df.per) Id38.miss.df.per.ord <- Id38.miss.df.per[order(-Id38.miss),] Id38.name <- str_sub(Id38.miss.df.per.ord$Row.Names, start=1, end=-3) Id38.name.df <- as.data.frame(Id38.name) Id38.miss.df.per.ord <- Id38.miss.df.per.ord[,-1] Id38.ind <- cbind(Id38.name,Id38.miss.df.per.ord) Id38.ind.df <- as.data.frame(Id38.ind) #IdBase 39 Id39 <- subset(vcf.df, select = c(SLSTG46, SLSTG50, SLSTG52, SLSTG54, SLSTG55, SLSTG56, SLSTG57, SLSTG58, SLSTG59, SLSTG60, SLSTG61, SLSTG62, SLSTG63, SLSTG64, SLSTG65, SLSTG66, SLSTG67, SLSTG69, SLSTG70, SLSTG71, SLSTG72, SLSTG76 )) Id39_sp <- cSplit(Id39,c("SLSTG46", "SLSTG50", "SLSTG52", "SLSTG54", "SLSTG55", "SLSTG56", "SLSTG57", "SLSTG58", "SLSTG59", "SLSTG60", "SLSTG61", "SLSTG62", "SLSTG63", "SLSTG64", "SLSTG65", "SLSTG66", "SLSTG67", "SLSTG69", "SLSTG70", "SLSTG71", "SLSTG72", "SLSTG76"), sep=":", type.convert=FALSE) Id39_sp_gen <- Id39_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85 )] Id39_miss <- apply(Id39_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id39_pres <- 22-Id39_miss Id39_pres.df <- as.data.frame (Id39_pres) #percentage good amp: (sum(Id39_pres)/(nrow(vcf.df)*22))*100 #missing data by ind Id39.miss <- apply(Id39_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id39.miss.df <- as.data.frame(Id39.miss) Id39.miss.df.per <- (Id39.miss.df/nrow(Id39_sp_gen))*100 Id39.miss.df.per <- cbind(Row.Names = rownames(Id39.miss.df.per), Id39.miss.df.per) Id39.miss.df.per.ord <- Id39.miss.df.per[order(-Id39.miss),] Id39.name <- str_sub(Id39.miss.df.per.ord$Row.Names, start=1, end=-3) Id39.name.df <- as.data.frame(Id39.name) Id39.miss.df.per.ord <- Id39.miss.df.per.ord[,-1] Id39.ind <- cbind(Id39.name,Id39.miss.df.per.ord) Id39.ind.df <- as.data.frame(Id39.ind) #IdBase 40 Id40 <- subset(vcf.df, select = c(SLAUD19, SLAUD20, SLAUD21, SLAUD22, SLAUD23, SLAUD24, SLAUD25, SLAUD26, SLAUD27, SLAUD29, SLAUD30, SLAUD31, SLAUD32, SLAUD33, SLAUD34, SLAUD35, SLAUD37, SLAUD38, SLAUD39, SLAUD40, SLAUD41, SLAUD42, SLAUD43, SLAUD44, SLAUD45, SLAUD46, SLAUD47, SLAUD48, SLAUD49, SLAUD50)) Id40_sp <- cSplit(Id40,c("SLAUD19", "SLAUD20", "SLAUD21", "SLAUD22", "SLAUD23", "SLAUD24", "SLAUD25", "SLAUD26", "SLAUD27", "SLAUD29", "SLAUD30", "SLAUD31", "SLAUD32", "SLAUD33", "SLAUD34", "SLAUD35", "SLAUD37", "SLAUD38", "SLAUD39", "SLAUD40", "SLAUD41", "SLAUD42", "SLAUD43", "SLAUD44", "SLAUD45", "SLAUD46", "SLAUD47", "SLAUD48", "SLAUD49", "SLAUD50"), sep=":", type.convert=FALSE) Id40_sp_gen <- Id40_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117 )] Id40_miss <- apply(Id40_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id40_pres <- 30-Id40_miss Id40_pres.df <- as.data.frame (Id40_pres) #percentage good amp: (sum(Id40_pres)/(nrow(vcf.df)*30))*100 #missing data by ind Id40.miss <- apply(Id40_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id40.miss.df <- as.data.frame(Id40.miss) Id40.miss.df.per <- (Id40.miss.df/nrow(Id40_sp_gen))*100 Id40.miss.df.per <- cbind(Row.Names = rownames(Id40.miss.df.per), Id40.miss.df.per) Id40.miss.df.per.ord <- Id40.miss.df.per[order(-Id40.miss),] Id40.name <- str_sub(Id40.miss.df.per.ord$Row.Names, start=1, end=-3) Id40.name.df <- as.data.frame(Id40.name) Id40.miss.df.per.ord <- Id40.miss.df.per.ord[,-1] Id40.ind <- cbind(Id40.name,Id40.miss.df.per.ord) Id40.ind.df <- as.data.frame(Id40.ind) #IdBase 41 Id41 <- subset(vcf.df, select = c(SLAUD52, SLAUD53, SLAUD54, SLAUD55, SLAUD56, SLAUD57, SLAUD58, SLAUD59, SLAUD60, SLAUD61, SLAUD62, SLAUD63, SLAUD64, SLAUD65, SLAUD66, SLAUD67, SLAUD68, SLAUD69, SLAUD70, SLAUD71, SLAUD72, SLAUD73, SLAUD74, SLAUD75, SLAUD76, SLAUD77, SLAUD78, SLAUD79, SLAUD81, SLAUD83)) Id41_sp <- cSplit(Id41,c("SLAUD52", "SLAUD53", "SLAUD54", "SLAUD55", "SLAUD56", "SLAUD57", "SLAUD58", "SLAUD59", "SLAUD60", "SLAUD61", "SLAUD62", "SLAUD63", "SLAUD64", "SLAUD65", "SLAUD66", "SLAUD67", "SLAUD68", "SLAUD69", "SLAUD70", "SLAUD71", "SLAUD72", "SLAUD73", "SLAUD74", "SLAUD75", "SLAUD76", "SLAUD77", "SLAUD78", "SLAUD79", "SLAUD81", "SLAUD83"), sep=":", type.convert=FALSE) Id41_sp_gen <- Id41_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117 )] Id41_miss <- apply(Id41_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id41_pres <- 30-Id41_miss Id41_pres.df <- as.data.frame (Id41_pres) #percentage good amp: (sum(Id41_pres)/(nrow(vcf.df)*30))*100 #missing data by ind Id41.miss <- apply(Id41_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id41.miss.df <- as.data.frame(Id41.miss) Id41.miss.df.per <- (Id41.miss.df/nrow(Id41_sp_gen))*100 Id41.miss.df.per <- cbind(Row.Names = rownames(Id41.miss.df.per), Id41.miss.df.per) Id41.miss.df.per.ord <- Id41.miss.df.per[order(-Id41.miss),] Id41.name <- str_sub(Id41.miss.df.per.ord$Row.Names, start=1, end=-3) Id41.name.df <- as.data.frame(Id41.name) Id41.miss.df.per.ord <- Id41.miss.df.per.ord[,-1] Id41.ind <- cbind(Id41.name,Id41.miss.df.per.ord) Id41.ind.df <- as.data.frame(Id41.ind) #IdBase 42 Id42 <- subset(vcf.df, select = c(SLAUD101, SLAUD103, SLAUD104, SLAUD105, SLAUD106, SLAUD107, SLAUD108, SLAUD109, SLAUD110, SLAUD111, SLAUD112, SLAUD113, SLAUD114, SLAUD115, SLAUD84, SLAUD85, SLAUD86, SLAUD87, SLAUD88, SLAUD89, SLAUD90, SLAUD91, SLAUD92, SLAUD93, SLAUD94, SLAUD95, SLAUD96, SLAUD97, SLAUD98, SLAUD99)) Id42_sp <- cSplit(Id42,c("SLAUD101", "SLAUD103", "SLAUD104", "SLAUD105", "SLAUD106", "SLAUD107", "SLAUD108", "SLAUD109", "SLAUD110", "SLAUD111", "SLAUD112", "SLAUD113", "SLAUD114", "SLAUD115", "SLAUD84", "SLAUD85", "SLAUD86", "SLAUD87", "SLAUD88", "SLAUD89", "SLAUD90", "SLAUD91", "SLAUD92", "SLAUD93", "SLAUD94", "SLAUD95", "SLAUD96", "SLAUD97", "SLAUD98", "SLAUD99"), sep=":", type.convert=FALSE) Id42_sp_gen <- Id42_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117 )] Id42_miss <- apply(Id42_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id42_pres <- 30-Id42_miss Id42_pres.df <- as.data.frame (Id42_pres) #percentage good amp: (sum(Id42_pres)/(nrow(vcf.df)*30))*100 #missing data by ind Id42.miss <- apply(Id42_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id42.miss.df <- as.data.frame(Id42.miss) Id42.miss.df.per <- (Id42.miss.df/nrow(Id42_sp_gen))*100 Id42.miss.df.per <- cbind(Row.Names = rownames(Id42.miss.df.per), Id42.miss.df.per) Id42.miss.df.per.ord <- Id42.miss.df.per[order(-Id42.miss),] Id42.name <- str_sub(Id42.miss.df.per.ord$Row.Names, start=1, end=-3) Id42.name.df <- as.data.frame(Id42.name) Id42.miss.df.per.ord <- Id42.miss.df.per.ord[,-1] Id42.ind <- cbind(Id42.name,Id42.miss.df.per.ord) Id42.ind.df <- as.data.frame(Id42.ind) #IdBase 43 Id43 <- subset(vcf.df, select = c(SLSTG100, SLSTG101, SLSTG102, SLSTG103, SLSTG106, SLSTG109, SLSTG110, SLSTG85, SLSTG87, SLSTG89, SLSTG90, SLSTG91, SLSTG92, SLSTG94, SLSTG98, SLSTG99 )) Id43_sp <- cSplit(Id43,c("SLSTG100", "SLSTG101", "SLSTG102", "SLSTG103", "SLSTG106", "SLSTG109", "SLSTG110", "SLSTG85", "SLSTG87", "SLSTG89", "SLSTG90", "SLSTG91", "SLSTG92", "SLSTG94", "SLSTG98", "SLSTG99"), sep=":", type.convert=FALSE) Id43_sp_gen <- Id43_sp[, c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61 )] Id43_miss <- apply(Id43_sp_gen,1,FUN=function(x) length(which(x=='./.'))) Id43_pres <- 16-Id43_miss Id43_pres.df <- as.data.frame (Id43_pres) #percentage good amp: (sum(Id43_pres)/(nrow(vcf.df)*16))*100 #missing data by ind Id43.miss <- apply(Id43_sp_gen,2,FUN=function(x) length(which(x=='./.'))) Id43.miss.df <- as.data.frame(Id43.miss) Id43.miss.df.per <- (Id43.miss.df/nrow(Id43_sp_gen))*100 Id43.miss.df.per <- cbind(Row.Names = rownames(Id43.miss.df.per), Id43.miss.df.per) Id43.miss.df.per.ord <- Id43.miss.df.per[order(-Id43.miss),] Id43.name <- str_sub(Id43.miss.df.per.ord$Row.Names, start=1, end=-3) Id43.name.df <- as.data.frame(Id43.name) Id43.miss.df.per.ord <- Id43.miss.df.per.ord[,-1] Id43.ind <- cbind(Id43.name,Id43.miss.df.per.ord) Id43.ind.df <- as.data.frame(Id43.ind) #remove from ordered_samples_SNP the repeats (only keeping the individual with least missing data), the 3 digitata individuals, and the very low amplification samples (with more than 10% of missing data) - retained_samples_SNP has 103963 loci, 458 inds denovo7.ord.mod_prep <- read.table ("ordered_samples_SNP.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") denovo7.ord.mod <- denovo7.ord.mod_prep[c(1:103963),-c(15, 29, 30, 54, 55, 56, 61, 62, 63, 66, 82, 83, 90, 138, 142, 148, 149, 150, 151, 152, 153, 154, 155, 156, 158, 167, 170, 171, 177, 178, 181, 196, 199, 200, 201, 203, 256, 286, 293, 294, 295, 297, 298, 299, 300, 301, 302, 311, 319, 321, 322, 323, 324, 325, 326, 327, 329, 330, 345, 354, 356, 357, 362, 365, 367, 375, 377, 388, 400, 405, 410, 412, 417, 418, 419, 422, 423, 425, 427, 431, 453, 463, 521, 538, 545, 546)] write.table(denovo7.ord.mod, file = "C:/Users/aaa/bbb/retained_samples_SNP.txt", sep="\t",quote=FALSE,col.names = T,row.names=F) #selecting samples only from the 11 localities of interest to the study #for more details on the selected localities see Figure 1 and Table S1A #note that the names of the localities were changed for the publication: #FERM = FER #GAL = POR #GOEL = LEZ #HELG = HEL #LANIL = LAN #LOCQ = LOC #PDC = AUD #POR = CAS #SCOT = ELL #STGU = STG #SVAL = NYA glob.pops <-read.table("pop_ElevenLocalities.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") glob.pops$STRATA <- as.factor(glob.pops$STRATA) table(glob.pops$STRATA) #FERM GAL GOEL HELG LANIL LOCQ PDC POR SCOT STGU SVAL #10 22 52 22 46 27 87 5 7 45 13 FERM <- subset(glob.pops,glob.pops$STRATA== "FERM") FERM$STRATA <- droplevels(FERM$STRATA) FERM.df <- as.data.frame(FERM) GAL <- subset(glob.pops,glob.pops$STRATA== "GAL") GAL$STRATA <- droplevels(GAL$STRATA) GAL.df <- as.data.frame(GAL) HELG <- subset(glob.pops,glob.pops$STRATA== "HELG") HELG$STRATA <- droplevels(HELG$STRATA) HELG.df <- as.data.frame(HELG) POR <- subset(glob.pops,glob.pops$STRATA== "POR") POR$STRATA <- droplevels(POR$STRATA) POR.df <- as.data.frame(POR) SCOT <- subset(glob.pops,glob.pops$STRATA== "SCOT") SCOT$STRATA <- droplevels(SCOT$STRATA) SCOT.df <- as.data.frame(SCOT) SVAL <- subset(glob.pops,glob.pops$STRATA== "SVAL") SVAL$STRATA <- droplevels(SVAL$STRATA) SVAL.df <- as.data.frame(SVAL) #need to randomly select 24 samples from GOEL, LANIL, LOCQ, PDC, STGU GOEL <- subset(glob.pops,glob.pops$STRATA== "GOEL") GOEL$STRATA <- droplevels(GOEL$STRATA) GOEL.RAND <- sample(GOEL$INDIVIDUALS) GOEL.RAND.df <- as.data.frame(GOEL.RAND) GOEL.RAND_24 <- GOEL.RAND.df[1:24,] droplevels(GOEL.RAND_24) GOEL.RAND_24.df <- as.data.frame(GOEL.RAND_24) names(GOEL.RAND_24.df)[1] <- "INDIVIDUALS" GOEL.RAND_24.df$STRATA <- "GOEL" LANIL <- subset(glob.pops,glob.pops$STRATA== "LANIL") LANIL$STRATA <- droplevels(LANIL$STRATA) LANIL.RAND <- sample(LANIL$INDIVIDUALS) LANIL.RAND.df <- as.data.frame(LANIL.RAND) LANIL.RAND_24 <- LANIL.RAND.df[1:24,] droplevels(LANIL.RAND_24) LANIL.RAND_24.df <- as.data.frame(LANIL.RAND_24) names(LANIL.RAND_24.df)[1] <- "INDIVIDUALS" LANIL.RAND_24.df$STRATA <- "LANIL" LOCQ <- subset(glob.pops,glob.pops$STRATA== "LOCQ") LOCQ$STRATA <- droplevels(LOCQ$STRATA) LOCQ.RAND <- sample(LOCQ$INDIVIDUALS) LOCQ.RAND.df <- as.data.frame(LOCQ.RAND) LOCQ.RAND_24 <- LOCQ.RAND.df[1:24,] droplevels(LOCQ.RAND_24) LOCQ.RAND_24.df <- as.data.frame(LOCQ.RAND_24) names(LOCQ.RAND_24.df)[1] <- "INDIVIDUALS" LOCQ.RAND_24.df$STRATA <- "LOCQ" PDC <- subset(glob.pops,glob.pops$STRATA== "PDC") PDC$STRATA <- droplevels(PDC$STRATA) PDC.RAND <- sample(PDC$INDIVIDUALS) PDC.RAND.df <- as.data.frame(PDC.RAND) PDC.RAND_24 <- PDC.RAND.df[1:24,] droplevels(PDC.RAND_24) PDC.RAND_24.df <- as.data.frame(PDC.RAND_24) names(PDC.RAND_24.df)[1] <- "INDIVIDUALS" PDC.RAND_24.df$STRATA <- "PDC" STGU <- subset(glob.pops,glob.pops$STRATA== "STGU") STGU$STRATA <- droplevels(STGU$STRATA) STGU.RAND <- sample(STGU$INDIVIDUALS) STGU.RAND.df <- as.data.frame(STGU.RAND) STGU.RAND_24 <- STGU.RAND.df[1:24,] droplevels(STGU.RAND_24) STGU.RAND_24.df <- as.data.frame(STGU.RAND_24) names(STGU.RAND_24.df)[1] <- "INDIVIDUALS" STGU.RAND_24.df$STRATA <- "STGU" glob.pops.mod <- rbind ( FERM.df,GAL.df) glob.pops.mod1 <- rbind (glob.pops.mod,GOEL.RAND_24.df) glob.pops.mod2 <- rbind (glob.pops.mod1,HELG.df) glob.pops.mod3 <- rbind (glob.pops.mod2,LANIL.RAND_24.df) glob.pops.mod4 <- rbind (glob.pops.mod3,LOCQ.RAND_24.df) glob.pops.mod5 <- rbind (glob.pops.mod4,PDC.RAND_24.df) glob.pops.mod6 <- rbind (glob.pops.mod5,POR.df) glob.pops.mod7 <- rbind (glob.pops.mod6,SCOT.df ) glob.pops.mod8 <- rbind (glob.pops.mod7,STGU.RAND_24.df ) glob.pops.mod9 <- rbind (glob.pops.mod8,SVAL.df ) class(glob.pops.mod9) write.table(glob.pops.mod9, file = "C:/Users/aaa/bbb/pop_ElevenLocalities_SelectedSamples.txt", sep="\t",quote=FALSE,col.names = T,row.names=F) #filtering of loci in the samples from the 11 localities of interest denovo7.ord.mod.GLOB_Rand_prep <-read.table("retained_samples_SNP.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") #remove samples other than in pop_ElevenLocalities_SelectedSamples.txt denovo7.ord.mod.GLOB_Rand <- denovo7.ord.mod.GLOB_Rand_prep[c(1:103963),-c(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 60, 61, 62, 65, 66, 68, 73, 74, 76, 77, 82, 83, 84, 85, 89, 92, 95, 97, 99, 100, 101, 103, 104, 105, 107, 112, 113, 116, 120, 122, 123, 124, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 204, 205, 207, 209, 210, 211, 212, 213, 215, 217, 220, 221, 222, 223, 226, 249, 261, 270, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 334, 337, 339, 341, 342, 343, 345, 346, 347, 350, 353, 354, 355, 357, 359, 362, 366, 368, 369, 370, 371, 372, 373, 374, 376, 377, 378, 379, 380, 381, 383, 384, 385, 387, 388, 389, 390, 391, 392, 394, 398, 401, 402, 403, 405, 406, 407, 408, 409, 410, 413, 414, 416, 417, 418, 419, 421, 422, 423, 424, 425, 427, 430, 431, 435, 436, 437, 438, 439, 440, 441, 444, 445, 446, 447, 448, 450, 451, 452, 454, 455, 456, 457, 461, 464, 466, 467)] #procedure to leave only one randomly selected SNP for each locus - need to make sure that the ID is not modified (i.e. such that SNPs from the same locus will all share the same ID) #random order (so that not always the first SNP in the locus is eleminated) denovo7.ord.mod.GLOB_Rand.prep.random <- denovo7.ord.mod.GLOB_Rand[order(runif(nrow(denovo7.ord.mod.GLOB_Rand))),] #new data frame without two SNPs within the same locus (denovo1.ord.mod.sel.no.dupl$ID column is all different) denovo7.ord.mod.GLOB_Rand_no.dupl <- denovo7.ord.mod.GLOB_Rand.prep.random[!duplicated(denovo7.ord.mod.GLOB_Rand.prep.random[3]),] length(denovo7.ord.mod.GLOB_Rand_no.dupl$ID) #13122 #order by ID denovo7.ord.mod.GLOB_Rand_no.dupl <- denovo7.ord.mod.GLOB_Rand_no.dupl[ order(denovo7.ord.mod.GLOB_Rand_no.dupl[,3]), ] #write the modified VCF out to the directory write.table(denovo7.ord.mod.GLOB_Rand_no.dupl, file = "C:/aaa/bbb/ElevenLocalities_SelectedSamples_no.dupl.txt", sep="\t",quote=FALSE,col.names = T,row.names=F) #perform maf filtering on ElevenLocalities_SelectedSamples_no.dupl with LOCAL MAF of 0.04 (thus a locus is retained if the alternative allele occurs at least approx 2 times out of 48 times (as median number of individuals in localities with ten or more inds per locality is 24) (for localities with less than ten inds per locality, the loci were removed if they pass solely due to meeting the thresholds in only those localities)) OR GLOBAL MAF of 0.01 (thus a locus is retained if the alternative allele occurs at least approx 5 times out of 498 times (199 diploid individuals*2)) maf.filt <-maf_filt("ElevenLocalities_SelectedSamples_no.dupl.vcf", strata = "pop_ElevenLocalities_SelectedSamples.txt", interactive.filter = F, maf.approach = "SNP", maf.thresholds = c(0.04, 0.01), maf.operator ="OR",common.markers=F, filename="ElevenLocalities_SelectedSamples_no.dupl.sel",maf.pop.num.threshold=1) ####################################################################### ##################### radiator::tidy_genomic_data ##################### ####################################################################### Importing and tidying the VCF... VCF is biallelic Working on the vcf... Making the vcf population-wise... Recoding bi-allelic VCF... Calculating REF/ALT alleles... Generating REF/ALT dictionary Number of markers with REF/ALT change(s) = 10 Integrating new genotype codings... Erasing genotype: no Removing monomorphic markers: yes Scanning for monomorphic markers... Number of markers before = 13122 Number of monomorphic markers removed = 5611 Number of markers after = 7511 ############################### RESULTS ############################### Tidy data written in global environment Data format: vcf.file Biallelic data Number of markers: 7511 Number of chromosome/contig/scaffold: 1 Number of individuals: 199 Number of populations: 11 Computation time: 4343 sec ################ radiator::tidy_genomic_data completed ################ Summarizing the data by populations and globally Calculating global and local MAF on large data set may take some time... 2 versions (pdf and png) of the violin plot for the global MAF were written in the folder maf.global.summary.tsv was saved in the folder 2 versions (pdf and png) of the violin plot for the global MAF were saved in the folder 2 versions (pdf and png) of the local maf spectrum plot were saved in the folder Generating maf.helper.table... A table of local and global MAF (maf.helper.table.tsv) was written in the directory The MAF summary statistics (maf.data.tsv) were written in the folder Writing the filtered tidy data set: denovo7.ord.mod.GLOB_Rand_no.dupl.sel.rad Writing the whitelist of markers in your working directory whitelist.markers.maf.tsv Writing the blacklist of markers in your working directory blacklist.markers.maf.tsv ############################### RESULTS ############################### maf.approach: SNP maf.thresholds: local = 0.04, global = 0.01 maf.operator: OR maf.pop.num.threshold: 1 The number of markers removed by the MAF filter: 3011 The number of markers before -> after the MAF filter SNP: 7511 -> 4500 Computation time: 4374 sec ############################## completed ############################## #steps to obtain VCF only with the markers on the whitelist generated by filter_maf #read in the vcf file but as text file, and with no # on the header row txt.vcf<-read.table("ElevenLocalities_SelectedSamples_no.dupl.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") #read in the white list (markers retained after the maf filter) white<-read.table("whitelist.markers.maf_GLOB_Rand.no.dupl.tsv",header=T) txt.key <- data.table(txt.vcf, key='POS') white.key <- data.table(white, key='POS') denovo7.ord.mod.sel.GLOB_RandLOC_no.dupl <- txt.key[white.key] denovo7.ord.mod.sel.GLOB_RandLOC_no.dupl <- denovo7.ord.mod.sel.GLOB_RandLOC_no.dupl [,c(1:208)] nrow(denovo7.ord.mod.sel.GLOB_RandLOC_no.dupl) #4500 SNPs after MAF #and to obtain the number of loci (number of unique LOCUS IDs) whiteLoc <- white whiteLoc$LOCUS <- as.factor (whiteLoc$LOCUS) nlevels (whiteLoc$LOCUS) #4500 #investigating maf (copy the maf.data file to the main directory) maf <- read.table ("maf.data_ElevenLocalities_SelectedSamples_no.dupl.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") maf$MAF_LOCAL <- as.numeric(as.character(maf$MAF_LOCAL)) maf$MAF_GLOBAL <- as.numeric(as.character(maf$MAF_GLOBAL)) str(maf) #loci which are fixed for the other allele in at least one pop (local MAF = 1) maf1 <- subset(maf, maf$MAF_LOCAL==1) dim(maf1) #489 instances within pops (for different loci could different number of pops) length(which(maf1$OR == "pass")) #489 - all are retained with the current MAF parameters #loci with global maf of 0.5 (but not necessiraly local maf of 0.5) maf_het <- subset(maf, maf$MAF_GLOBAL==0.5) nrow(maf_het) #33 #Por (and other low individual pops) maybe biasing the MAF for loci that pass, as it is enough to be a het (with local maf of at least 0.04) just within this one pop for the locus to be retained maf_pass <- subset(maf, maf$OR_POP_THRESHOLD=="pass") #49 379 rows maf_pass$MARKERS <- as.factor(maf_pass$MARKERS) str(maf_pass) #4500 markers that passed the MAF criteria #select MARKERS with GLOBAL_MAF lower or equal to 0.025 - thus at the level at which the bias from low individual pops is most likely to be substantial) from the markers that passed the MAF selection maf_GLOB0.025 <- subset(maf_pass, maf_pass$MAF_GLOBAL<=0.025) dim(maf_GLOB0.025) #34 349 rows maf_GLOB0.025$MARKERS <- as.factor(maf_GLOB0.025$MARKERS) maf_GLOB0.025$MARKERS <- droplevels(maf_GLOB0.025$MARKERS) str(maf_GLOB0.025) #3130 markers #keep MARKERS were none of the pops exceeds LOCAL_MAF of 0.1 but remove MARKERS if even one pop exceeds LOCAL_MAF of 0.1 maf_LOC0.1 <- maf_GLOB0.025 maf_LOC0.1$SCORE <- ifelse(maf_LOC0.1$MAF_LOCAL <=0.1, 2,1) markers <- group_by(maf_LOC0.1, MARKERS) score_mean <- summarize(markers, score.mean = mean(SCORE,na.rm = T)) score_mean.df <- as.data.frame(score_mean) markers_below0.1 <- subset (score_mean.df, score.mean==2) markers_below0.1$MARKERS <- droplevels(markers_below0.1$MARKERS) txt.key <- data.table(maf_LOC0.1, key='MARKERS') white.key <- data.table(markers_below0.1, key='MARKERS') markers_below0.1OUT <- txt.key[white.key] markers_below0.1OUT.df<-as.data.frame(markers_below0.1OUT) dim(markers_below0.1OUT.df) #22 173 rows str(markers_below0.1OUT.df) #2021 - markers with at least one pop with LOCAL MAF below or equal to 0.1 (and GLOBAL MAF below 0.025) and no pops with LOCAL MAF above 0.1 #only markers with GLOBAL MAF below 0.004 (and LOCAL MAF below or equal to 0.1), in most of these cases the marker passed MAF due to have LOCAL MAF of at least 0.1 in just one pop (often one with low number of samples) maf_GLOB0.004 <- subset(markers_below0.1OUT.df, markers_below0.1OUT.df$MAF_GLOBAL<0.004) maf_GLOB0.004$MARKERS <- droplevels(maf_GLOB0.004$MARKERS) dim (maf_GLOB0.004) #7787 rows str (maf_GLOB0.004) #709 markers #subset by pop (only low sample pops - less than 10 inds) and blacklist markers found in those pops maf_GLOB0.004$POP_ID <- as.factor(maf_GLOB0.004$POP_ID) #POR (5 inds) maf_GLOB0.004_POR <- subset(maf_GLOB0.004, maf_GLOB0.004$POP_ID=="POR") maf_GLOB0.004_POR$POP_ID <- droplevels(maf_GLOB0.004_POR$POP_ID) #SCOT (7 inds) maf_GLOB0.004_SCOT <- subset(maf_GLOB0.004, maf_GLOB0.004$POP_ID=="SCOT") maf_GLOB0.004_SCOT$POP_ID <- droplevels(maf_GLOB0.004_SCOT$POP_ID) maf_GLOB0.004_TOT <- rbind (maf_GLOB0.004_POR , maf_GLOB0.004_SCOT) maf_GLOB0.004_TOT_BAD.LOC <- subset(maf_GLOB0.004_TOT, maf_GLOB0.004_TOT$OR=="pass") markers_NoDupl <- maf_GLOB0.004_TOT_BAD.LOC[!duplicated(maf_GLOB0.004_TOT_BAD.LOC$MARKERS), ] blackMARKERS <- markers_NoDupl$MARKERS blackMARKERS.df <- as.data.frame(blackMARKERS) nrow(blackMARKERS.df) #431 #read in the white list (markers retained after the maf filter) white<-read.table("whitelist.markers.maf_GLOB_Rand.no.dupl.tsv",header=T) #remove the blacklisted markers from the white list produced after MAF white.key <- data.table(white, key='POS') blackMARKERS.df.split <- cSplit(blackMARKERS.df,"blackMARKERS", sep="__", type.convert=FALSE) blackMARKERS.pos <- subset(blackMARKERS.df.split[, c(3 )]) blackMARKERS.pos.df <- as.data.frame(blackMARKERS.pos) names(blackMARKERS.pos.df)[1] <- "POS" blackMARKERS.pos.df$POS <- as.integer (blackMARKERS.pos.df$POS) black.key <- data.table(blackMARKERS.pos.df, key='POS') white_no.black_key <- white.key[!black.key] nrow(white_no.black_key) #4069 SNPs after MAF and above manipulations #and to obtain the number of loci (number of unique LOCUS IDs) white_no.black_keyLoc <- white_no.black_key white_no.black_keyLoc$LOCUS <- as.factor (white_no.black_keyLoc$LOCUS) nlevels (white_no.black_keyLoc$LOCUS) #4069 #retain only the white listed loci (and those removed after the adjustment) in the VCF txt.vcf<-read.table("ElevenLocalities_SelectedSamples_no.dupl.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") txt.key <- data.table(txt.vcf, key='POS') denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl <- txt.key[white_no.black_key] denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl [,c(1:208)] nrow(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl) #4069 write.table(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl, file = "C:/aaa/bbb/ElevenLocalities_SelectedSamples_no.dupl_sel.txt", sep="\t",quote=FALSE,col.names = T,row.names=F) #preparation of genlight file (4,069 loci and 199 inds) #preparation of vcf for downstream manipulation denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.vcf <- genomic_converter (data="ElevenLocalities_SelectedSamples_no.dupl_sel.vcf", output="vcf",filename="ElevenLocalities_SelectedSamples_no.dupl_sel_ForGenlight", strata = "pop_ElevenLocalities_SelectedSamples.txt", common.markers = F,monomorphic.out=T) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl <-read.table("ElevenLocalities_SelectedSamples_no.dupl_sel_ForGenlight.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") dim(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df <- as.data.frame(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl) #modify the vcf for each population: #POR Por <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLPOR1, SLPOR10,SLPOR11,SLPOR5,SLPOR6 )) Por_sp <- cSplit(Por,c("SLPOR1","SLPOR10", "SLPOR11", "SLPOR5", "SLPOR6"), sep=":", type.convert=FALSE) Por_sp.df <- as.data.frame(Por_sp) #SCOT Scot <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLELL1, SLELL18, SLELL22, SLELL23, SLELL24, SLELL26, SLELL9 )) Scot_sp <- cSplit(Scot,c("SLELL1", "SLELL18", "SLELL22", "SLELL23", "SLELL24", "SLELL26", "SLELL9"), sep=":", type.convert=FALSE) Scot_sp.df <- as.data.frame(Scot_sp) #LANIL Lanil <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLPMI5, SLPMI83, SLPMI15, SLPMI26, SLPMI29, SLPMI10, SLPMI86, SLPMI1, SLPMI17, SLPMI28, SLPMI31, SLPMI43, SLPMI30, SLPMI38, SLPMI6, SLPMI8, SLPMI35, SLPMI39, SLPMI12, SLPMI21, SLPMI85, SLPMI4, SLPMI40, SLPMI36)) Lanil_sp <- cSplit(Lanil,c("SLPMI5", "SLPMI83", "SLPMI15", "SLPMI26", "SLPMI29", "SLPMI10", "SLPMI86", "SLPMI1", "SLPMI17", "SLPMI28", "SLPMI31", "SLPMI43", "SLPMI30", "SLPMI38", "SLPMI6", "SLPMI8", "SLPMI35", "SLPMI39", "SLPMI12", "SLPMI21", "SLPMI85", "SLPMI4", "SLPMI40", "SLPMI36"), sep=":", type.convert=FALSE) Lanil_sp.df <- as.data.frame(Lanil_sp) #GAL Gal <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLPDX11, SLPDX12, SLPDX13, SLPDX15, SLPDX16, SLPDX17, SLPDX18, SLPDX19, SLPDX20, SLPDX21, SLPDX22, SLPDX3, SLPDX4, SLPDX5, SLPDX6, SLPDX8, SLPDX26, SLPDX29, SLPDX30, SLPDX33, SLPDX34, SLPDX35)) Gal_sp <- cSplit(Gal,c("SLPDX11", "SLPDX12", "SLPDX13", "SLPDX15", "SLPDX16", "SLPDX17", "SLPDX18", "SLPDX19", "SLPDX20", "SLPDX21", "SLPDX22", "SLPDX3", "SLPDX4", "SLPDX5", "SLPDX6", "SLPDX8", "SLPDX26", "SLPDX29", "SLPDX30", "SLPDX33", "SLPDX34", "SLPDX35"), sep=":", type.convert=FALSE) Gal_sp.df <- as.data.frame(Gal_sp) #GOEL Goel <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLSTA8, SLTRM12, SLTRM5, SLTRM1, SLTRM24, SLSTA29, SLTRM17, SLTRM22, SLSTA7, SLSTA26, SLTRM29, SLSTA24, SLTRM18, SLTRM9, SLSTA17, SLSTA28, SLTRM26, SLTRM13, SLSTA12, SLSTA13, SLTRM25, SLSTA19, SLTRM11, SLSTA14)) Goel_sp <- cSplit(Goel,c("SLSTA8", "SLTRM12", "SLTRM5", "SLTRM1", "SLTRM24", "SLSTA29", "SLTRM17", "SLTRM22", "SLSTA7", "SLSTA26", "SLTRM29", "SLSTA24", "SLTRM18", "SLTRM9", "SLSTA17", "SLSTA28", "SLTRM26", "SLTRM13", "SLSTA12", "SLSTA13", "SLTRM25", "SLSTA19", "SLTRM11", "SLSTA14"), sep=":", type.convert=FALSE) Goel_sp.df <- as.data.frame(Goel_sp) #HELG Helg <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLHEF1, SLHEF2, SLHEF3, SLHEF4, SLHEF5, SLHEF6, SLHEF7, SLHEF8, SLHEF10, SLHEF11, SLHEF12, SLHEF13, SLHEF14, SLHEF15, SLHEF16, SLHEF17, SLHEF18, SLHEF19, SLHEF20, SLHEF21, SLHEF22, SLHEF9 )) Helg_sp <- cSplit(Helg,c("SLHEF1", "SLHEF2", "SLHEF3", "SLHEF4", "SLHEF5", "SLHEF6", "SLHEF7", "SLHEF8", "SLHEF10", "SLHEF11", "SLHEF12", "SLHEF13", "SLHEF14", "SLHEF15", "SLHEF16", "SLHEF17", "SLHEF18", "SLHEF19", "SLHEF20", "SLHEF21", "SLHEF22", "SLHEF9"), sep=":", type.convert=FALSE) Helg_sp.df <- as.data.frame(Helg_sp) #LOCQ Locq <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLPDK14, SLPDK2, SLPDK25, SLPDK20, SLPDK13, SLPDK12, SLPDK28, SLPDK19, SLPDK5, SLPDK7, SLPDK22, SLPDK6, SLPDK23, SLPDK9, SLPDK27, SLPDK18, SLPDK16, SLPDK17, SLPDK3, SLPDK29, SLPDK11, SLPDK26, SLPDK15, SLPDK8)) Locq_sp <- cSplit(Locq,c("SLPDK14", "SLPDK2", "SLPDK25", "SLPDK20", "SLPDK13", "SLPDK12", "SLPDK28", "SLPDK19", "SLPDK5", "SLPDK7", "SLPDK22", "SLPDK6", "SLPDK23", "SLPDK9", "SLPDK27", "SLPDK18", "SLPDK16", "SLPDK17", "SLPDK3", "SLPDK29", "SLPDK11", "SLPDK26", "SLPDK15", "SLPDK8"), sep=":", type.convert=FALSE) Locq_sp.df <- as.data.frame(Locq_sp) #FERM Ferm <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLCAL1, SLCAL10, SLCAL2, SLCAL3, SLCAL4, SLCAL5, SLCAL6, SLCAL7, SLCAL8, SLCAL9 )) Ferm_sp <- cSplit(Ferm,c("SLCAL1", "SLCAL10", "SLCAL2", "SLCAL3", "SLCAL4", "SLCAL5", "SLCAL6", "SLCAL7", "SLCAL8", "SLCAL9"), sep=":", type.convert=FALSE) Ferm_sp.df <- as.data.frame(Ferm_sp) #SVAL Sval <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLNYA25, SLNYA26, SLNYA28, SLNYA30, SLNYA31, SLNYA32, SLNYA33, SLNYA34, SLBRA10, SLBRA11, SLBRA12, SLBRA15, SLBRA16)) Sval_sp <- cSplit(Sval,c("SLNYA25", "SLNYA26", "SLNYA28", "SLNYA30", "SLNYA31", "SLNYA32", "SLNYA33", "SLNYA34", "SLBRA10", "SLBRA11", "SLBRA12", "SLBRA15", "SLBRA16"), sep=":", type.convert=FALSE) Sval_sp.df <- as.data.frame(Sval_sp) #STGU Stgu <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLSTG1, SLSTG63, SLSTG55, SLSTG46, SLSTG69, SLSTG14, SLSTG52, SLSTG70, SLSTG12, SLSTG110, SLSTG71, SLSTG66, SLSTG16, SLSTG94, SLSTG65, SLSTG106, SLSTG7, SLSTG3, SLSTG57, SLSTG90, SLSTG76, SLSTG91, SLSTG61, SLSTG109)) Stgu_sp <- cSplit(Stgu,c("SLSTG1", "SLSTG63", "SLSTG55", "SLSTG46", "SLSTG69", "SLSTG14", "SLSTG52", "SLSTG70", "SLSTG12", "SLSTG110", "SLSTG71", "SLSTG66", "SLSTG16", "SLSTG94", "SLSTG65", "SLSTG106", "SLSTG7", "SLSTG3", "SLSTG57", "SLSTG90", "SLSTG76", "SLSTG91", "SLSTG61", "SLSTG109"), sep=":", type.convert=FALSE) Stgu_sp.df <- as.data.frame(Stgu_sp) #PDC Pdc <- subset(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.df, select = c(SLAUD71, SLAUD41, SLAUD60, SLAUD87, SLAUD56, SLAUD88, SLAUD37, SLAUD104, SLAUD68, SLAUD53, SLAUD52, SLAUD101, SLAUD105, SLAUD50, SLAUD108, SLAUD67, SLAUD55, SLAUD76, SLAUD94, SLAUD110, SLAUD98, SLAUD109, SLAUD48, SLAUD27)) Pdc_sp <- cSplit(Pdc,c("SLAUD71", "SLAUD41", "SLAUD60", "SLAUD87", "SLAUD56", "SLAUD88", "SLAUD37", "SLAUD104", "SLAUD68", "SLAUD53", "SLAUD52", "SLAUD101", "SLAUD105", "SLAUD50", "SLAUD108", "SLAUD67", "SLAUD55", "SLAUD76", "SLAUD94", "SLAUD110", "SLAUD98", "SLAUD109", "SLAUD48", "SLAUD27"), sep=":", type.convert=FALSE) Pdc_sp.df <- as.data.frame(Pdc_sp) all.df <- cbind(Por_sp.df, Scot_sp.df, Lanil_sp.df,Gal_sp.df, Goel_sp.df, Helg_sp.df, Locq_sp.df, Ferm_sp.df, Sval_sp.df, Stgu_sp.df, Pdc_sp.df) all.df[ all.df == "0/0" ] = 0 all.df[ all.df == "0/1" ] = 1 all.df[ all.df == "1/0" ] = 1 all.df[ all.df == "1/1" ] = 2 all.df.num <- lapply(all.df, as.numeric) all.df.num.df <- as.data.frame(all.df.num) data.list <- as.list(all.df.num.df) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl.gl <- new("genlight", data.list) #199 genotypes, 4,069 binary SNPs, size: 559.4 Kb #19517 (2.41 %) missing data #imputation.method = "rf" On-the-fly-imputations using Random Forests algorithm imp_rf <- genomic_converter("ElevenLocalities_SelectedSamples_no.dupl_sel.genepop.gen", strata = "pop_ElevenLocalities_SelectedSamples.txt", output = c("genlight", "genepop", "hierftsat", "structure"), filename = "ElevenLocalities_SelectedSamples_no.dupl_sel_IMPrf",vcf.metadata = FALSE, monomorphic.out = F, common.markers = F, imputation.method = "rf", hierarchical.levels = "strata",num.tree = 100, pred.mean.matching = 0, verbose = FALSE, parallel.core = detectCores() - 1) ##Tidy genomic data: Number of markers: 4069 Number of chromosome/contig/scaffold: no chromosome info Number of individuals: 199 Number of populations: 11 Imputation method: rf Hierarchical levels: strata Number of populations: 11 Number of individuals: 199 Number of markers: 4069 Proportion of missing genotypes before imputations: 0.024103 Imputations computed by strata, take a break... Imputations for pop: POR Imputations for pop: SCOT Imputations for pop: LANIL Imputations for pop: GAL Imputations for pop: GOEL Imputations for pop: HELG Imputations for pop: LOCQ Imputations for pop: FERM Imputations for pop: SVAL Imputations for pop: STGU Imputations for pop: PDC Proportion of missing genotypes after imputations: 0.001387 Writing tidy data set: ##denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_IMPrf.rad ImpRf_genlight <- imp_rf$genlight.imputed #199 genotypes, 4,069 binary SNPs, size: 503.9 Kb #1123 (0.14 %) missing data - loci for which there is missing data for all individuals within a population cannot be imputed #check for which no of retained PCs a.score is best using optim.ascore #first perform dapc with many PCs retained ImpRf_genlight_centre_check.a.score<- dapc(ImpRf_genlight, pop(ImpRf_genlight), n.pca=100, n.da=10,scale=F, var.contrib=TRUE, var.loadings=FALSE, pca.info=TRUE,pca.select="nbEig", perc.pca=NULL, glPca=NULL,parallel=F) #perform optim.a.score on DAPC with many PCs retained optim.a.score(ImpRf_genlight_centre_check.a.score,n.sim=100,smart=F) #best is 4 #with a.score of 0.8423931 #creating genlight object denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf <- read.table ("ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_ForGenlight <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf[1:4069,10:208] dim(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_ForGenlight) #4069 199 all.df <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_ForGenlight all.df[ all.df == "0/0" ] = 0 all.df[ all.df == "0/1" ] = 1 all.df[ all.df == "1/0" ] = 1 all.df[ all.df == "1/1" ] = 2 all.df.num <- lapply(all.df, as.numeric) all.df.num.df <- as.data.frame(all.df.num) data.list <- as.list(all.df.num.df) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl <- new("genlight", data.list) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl # /// GENLIGHT OBJECT ///////// // 199 genotypes, 4,069 binary SNPs, size: 502 Kb 1123 (0.14 %) missing data // Basic content @gen: list of 199 SNPbin // Optional content @ind.names: 199 individual labels @other: a list containing: elements without names # #adjusting the order of pops for genlight object denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd <-denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd # /// GENLIGHT OBJECT ///////// // 199 genotypes, 4,069 binary SNPs, size: 790.4 Kb 1123 (0.14 %) missing data // Basic content @gen: list of 199 SNPbin // Optional content @ind.names: 199 individual labels @loc.names: 4069 locus labels @chromosome: factor storing chromosomes of the SNPs @position: integer storing positions of the SNPs @pop: population of each individual (group size range: 5-24) @other: a list containing: elements without names # #add pop information to genlight pop_denovo7.ord.mod.sel.GLOB_Rand_NEW_POP_NAMES <- read.table( "pop_ElevenLocalities_SelectedSamples_NEW_POP_NAMES.txt", header=T) pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd) <- pop_denovo7.ord.mod.sel.GLOB_Rand_NEW_POP_NAMES$STRATA denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd@pop <- factor(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd@pop, levels = c("CAS", "POR", "LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA")) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd@pop # [1] CAS CAS CAS CAS CAS ELL ELL ELL ELL ELL ELL ELL LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN POR POR POR POR POR POR POR POR POR [46] POR POR POR POR POR POR POR POR POR POR POR POR POR LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ HEL HEL HEL HEL HEL HEL HEL HEL [91] HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC FER FER FER FER FER FER FER [136] FER FER FER NYA NYA NYA NYA NYA NYA NYA NYA NYA NYA NYA NYA NYA STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG AUD AUD AUD AUD AUD [181] AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD Levels: CAS POR LOC STG LAN LEZ FER AUD HEL ELL NYA # #dapc with pre-designated pops (i.e. 11 pops) - with centering only and n.pca = 4 (after optim.a.score) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4 <- dapc(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd, pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd), n.pca=4, n.da=10,scale=FALSE, var.contrib=TRUE, var.loadings=FALSE, pca.info=TRUE,pca.select="nbEig", perc.pca=NULL, glPca=NULL,parallel=F) #Select the number of axes retained in the Principal Component Analysis (PCA) step: 4 (n.pca=4) #Select the the number of axes retained in the Discriminant Analysis step: 10 (n.da=10) summary(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4) # $`n.dim` [1] 4 $n.pop [1] 11 $assign.prop [1] 0.9949749 $assign.per.pop CAS POR LOC STG LAN LEZ FER AUD HEL ELL NYA 1.0000000 1.0000000 1.0000000 1.0000000 0.9583333 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 $prior.grp.size CAS POR LOC STG LAN LEZ FER AUD HEL ELL NYA 5 22 24 24 24 24 10 24 22 7 13 $post.grp.size CAS POR LOC STG LAN LEZ FER AUD HEL ELL NYA 5 22 24 25 23 24 10 24 22 7 13 # #percentage of variance retained on the first axis (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$eig[1]/sum(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$eig) )*100 #43.7674 #percentage of variance retained on the second axis (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$eig[2]/sum(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$eig) )*100 #39.65884 #percentage of variance retained on the third axis (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$eig[3]/sum(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$eig) )*100 #13.37059 #visualising the DAPC - scatter - axes 1 and 2 myCol <- c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20") scatter(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4, xax=1, yax=2, bg="white",pch=19,clab=0, cstar=1,cellipse = 1.25,cex=0.75,solid=.7, col=myCol, scree.pca=FALSE,posi.da="topright",leg=TRUE,posi.leg="topleft",cleg=1) #improved graphic PopData <- as.data.frame(levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd))) myCol <- c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20") ColData<- as.data.frame(myCol) PopColData <- cbind(PopData, ColData) PopColData$myCol <- as.character(PopColData$myCol) colnames(PopColData)[colnames(PopColData)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd))"] <- "Pop" colnames(PopColData)[colnames(PopColData)=="myCol"] <- "Col" denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df1_2 <- data.frame(x = denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,1], y = denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,2]) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df1_2 <- data.frame (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df1_2 , denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$grp) colnames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df1_2)[colnames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df1_2)=="denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.grp"] <- "Pop" col.points <- PopColData$Col[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df1_2$Pop, PopColData$Pop)] Shape1Data<- as.data.frame (c(19, 19, 19, 19, 19, 19, 19, 19, 19, 19,19)) PopShape1Data <- cbind(PopData, Shape1Data) colnames(PopShape1Data)[colnames(PopShape1Data)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd))"] <- "Pop" colnames(PopShape1Data)[colnames(PopShape1Data)=="c(19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19)"] <- "Shape1" PopShape1Data$Shape1 <- as.integer(PopShape1Data$Shape1) Shape1 <- PopShape1Data$Shape1[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df1_2$Pop, PopShape1Data$Pop)] Shape2Data<- as.data.frame (c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1)) PopShape2Data <- cbind(PopData, Shape2Data) colnames(PopShape2Data)[colnames(PopShape2Data)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd))"] <- "Pop" colnames(PopShape2Data)[colnames(PopShape2Data)=="c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)"] <- "Shape2" PopShape2Data$Shape2 <- as.integer(PopShape2Data$Shape2) Shape2 <- PopShape2Data$Shape2[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df1_2$Pop, PopShape2Data$Pop)] #tiff tiff("Figure 2A.tif", res=600, compression = "lzw", height=21, width=28.75, units="cm") scatter(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4, col="black", bg="gray99", pch="", cstar=1, clab=0, legend=F, scree.da=0, posi.da="topright") par(xpd=TRUE) points(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,1], denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,2], pch=Shape1, col=col.points, cex=1.1) points(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,1], denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,2], pch=Shape2, col="black", cex=1.15,lwd=0.5) text(cex=1.2, x=-80.55, y=-1.55, "DF1 (43.77 %)", xpd=TRUE, srt=0, pos=2) text(cex=1.2, x=3, y=-54.9, "DF2 (39.66 %)", xpd=TRUE, srt=90, pos=2) legend(x=-98.15,y=37,cex=0.8,c("Castelo do Neiva (CAS)","Portiño de Dexo (POR)" , "Locmariaquer (LOC)", "St Guenolé (STG)", "Lanildut (LAN)", "Lézardrieux (LEZ)", "Fermanville (FER)", "Audresselles (AUD)", "Helgoland (HEL)", "Ellenabeich (ELL)", "Ny-Ålesund (NYA)"),pch=c(21, 21, 21, 21, 21, 21, 21, 21, 21, 21),col=c("black", "black", "black", "black", "black", "black", "black", "black", "black", "black"),pt.bg=c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20"),pt.lwd = 2,pt.cex=1.5,ncol=2,xjust=0,text.width=c(19.25,19.25),x.intersp=0.7) add.scatter.eig(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$eig,15,1,2, posi="topright", inset=.02) dev.off() #visualising the DAPC - scatter - axes 2 and 3 myCol <- c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20") scatter(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4, xax=2, yax=3, bg="white",pch=19,clab=0, cstar=1,cellipse = 1.25,cex=0.75,solid=.7, col=myCol, scree.pca=FALSE,posi.da="bottomleft",leg=TRUE,posi.leg="topleft",cleg=1) #improved graphic PopData <- as.data.frame(levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd))) myCol <- c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20") ColData<- as.data.frame(myCol) PopColData <- cbind(PopData, ColData) PopColData$myCol <- as.character(PopColData$myCol) colnames(PopColData)[colnames(PopColData)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd))"] <- "Pop" colnames(PopColData)[colnames(PopColData)=="myCol"] <- "Col" denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df2_3 <- data.frame(x = denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,2], y = denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,3]) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df2_3 <- data.frame (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df2_3 , denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$grp) colnames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df2_3)[colnames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df2_3)=="denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.grp"] <- "Pop" col.points <- PopColData$Col[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df2_3$Pop, PopColData$Pop)] Shape1Data<- as.data.frame (c(19, 19, 19, 19, 19, 19, 19, 19, 19, 19,19)) PopShape1Data <- cbind(PopData, Shape1Data) colnames(PopShape1Data)[colnames(PopShape1Data)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd))"] <- "Pop" colnames(PopShape1Data)[colnames(PopShape1Data)=="c(19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19)"] <- "Shape1" PopShape1Data$Shape1 <- as.integer(PopShape1Data$Shape1) Shape1 <- PopShape1Data$Shape1[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df2_3$Pop, PopShape1Data$Pop)] Shape2Data<- as.data.frame (c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1)) PopShape2Data <- cbind(PopData, Shape2Data) colnames(PopShape2Data)[colnames(PopShape2Data)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd))"] <- "Pop" colnames(PopShape2Data)[colnames(PopShape2Data)=="c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)"] <- "Shape2" PopShape2Data$Shape2 <- as.integer(PopShape2Data$Shape2) Shape2 <- PopShape2Data$Shape2[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4.df2_3$Pop, PopShape2Data$Pop)] #tiff tiff("SNP_all_Axes23.tif", res=600, compression = "lzw", height=21, width=28.75, units="cm") scatter(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4,xax=2, yax=3, col="black", bg="gray99", pch="", cstar=1, clab=0, legend=F, scree.da=0, posi.da="bottomleft") par(xpd=TRUE) points(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,2], denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,3], pch=Shape1, col=col.points, cex=1.1) points(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,2], denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$ind.coord[,3], pch=Shape2, col="black", cex=1.15,lwd=0.5) text(cex=1.2, x=-55, y=-1.05, "DF2 (39.66 %)", xpd=TRUE, srt=0, pos=2) text(cex=1.2, x=1.95, y=-30.35, "DF3 (13.37 %)", xpd=TRUE, srt=90, pos=2) legend(x=-66.4,y=30.4,cex=0.8,c("Castelo do Neiva (CAS)","Portiño de Dexo (POR)" , "Locmariaquer (LOC)", "St Guenolé (STG)", "Lanildut (LAN)", "Lézardrieux (LEZ)", "Fermanville (FER)", "Audresselles (AUD)", "Helgoland (HEL)", "Ellenabeich (ELL)", "Ny-Ålesund (NYA)"),pch=c(21, 21, 21, 21, 21, 21, 21, 21, 21, 21),col=c("black", "black", "black", "black", "black", "black", "black", "black", "black", "black"),pt.bg=c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20"),pt.lwd = 2,pt.cex=1.5,ncol=2,xjust=0,text.width=c(13,13),x.intersp=0.7) add.scatter.eig(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$eig,15,2,3, posi="bottomleft", inset=.02) dev.off() #group memberships of DAPC - Structure like chart #first have to order the individuals in accordance with the order of south to north clusters... cosmoplotIndMatrix <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$posterior cosmoplotIndMatrix_CAS <- cosmoplotIndMatrix [1:5,1:11] cosmoplotIndMatrix_POR <- cosmoplotIndMatrix [37:58,1:11] cosmoplotIndMatrix_LOC <- cosmoplotIndMatrix [105:128,1:11] cosmoplotIndMatrix_STG <- cosmoplotIndMatrix [152:175,1:11] cosmoplotIndMatrix_LAN <- cosmoplotIndMatrix [13:36,1:11] cosmoplotIndMatrix_LEZ <- cosmoplotIndMatrix [59:82,1:11] cosmoplotIndMatrix_FER <- cosmoplotIndMatrix [129:138,1:11] cosmoplotIndMatrix_AUD <- cosmoplotIndMatrix [176:199,1:11] cosmoplotIndMatrix_HEL <- cosmoplotIndMatrix [83:104,1:11] cosmoplotIndMatrix_ELL <- cosmoplotIndMatrix [6:12,1:11] cosmoplotIndMatrix_NYA <- cosmoplotIndMatrix [139:151,1:11] cosmoplotIndMatrix1 <- rbind (cosmoplotIndMatrix_CAS,cosmoplotIndMatrix_POR) cosmoplotIndMatrix2 <- rbind (cosmoplotIndMatrix1,cosmoplotIndMatrix_LOC) cosmoplotIndMatrix3 <- rbind (cosmoplotIndMatrix2,cosmoplotIndMatrix_STG) cosmoplotIndMatrix4 <- rbind (cosmoplotIndMatrix3,cosmoplotIndMatrix_LAN) cosmoplotIndMatrix5 <- rbind (cosmoplotIndMatrix4,cosmoplotIndMatrix_LEZ) cosmoplotIndMatrix6 <- rbind (cosmoplotIndMatrix5,cosmoplotIndMatrix_FER) cosmoplotIndMatrix7 <- rbind (cosmoplotIndMatrix6,cosmoplotIndMatrix_AUD) cosmoplotIndMatrix8 <- rbind (cosmoplotIndMatrix7,cosmoplotIndMatrix_HEL) cosmoplotIndMatrix9 <- rbind (cosmoplotIndMatrix8,cosmoplotIndMatrix_ELL) cosmoplotIndMatrix10 <- rbind (cosmoplotIndMatrix9,cosmoplotIndMatrix_NYA) #rectangle version - tiff tiff("Figure 2C.tif", res=600, compression = "lzw", height=17, width=32, units="cm") myCol <- c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20") #barplot_scale <- barplot( t(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$posterior) ) compoplot(cosmoplotIndMatrix10,space=0, lab="", col=myCol,cleg=1,legend=F,font.lab=2,xlab="population of origin of each sampled individual",cex.axis=1) axis(1, at = c(0,5,27,51,75,99,123,133,157,179,186,199), labels = F, lwd=1, lwd.ticks=1) axis(1, at = c(2.5, 16, 39, 63, 87, 111, 128, 145, 168, 182.5, 192.5), labels = c("CAS", "POR", "LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA"), las=1, tick=F,cex.axis=1) dev.off() #perform likelihood-based genetic clustering on a genind object using adegenet (snapclust function - still experimental (?)) #create genind object (from hierfstat input) (hierfstat file has pop info already specified) GLOB_no.dupl_ImpRf <-read.fstat("ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf_hierfstat_NEW_POP_NAMES.dat") #choose the number of clusters for snapclust using AIC with snapclust.choose.k (search for k from 2 to 22 (double the number of pops)) GLOB_no.dupl_ImpRf.aic <- snapclust.choose.k(max=22, GLOB_no.dupl_ImpRf,IC= AIC, IC.only=T) #Large dataset syndrome: #for 199 individuals, differences in log-likelihoods exceed computer precision; group membership probabilities are approximated (only trust clear-cut values) plot(GLOB_no.dupl_ImpRf.aic, type = "b", cex = 2, xlab = "k", ylab = "AIC") #points(which.min(GLOB_no.dupl_ImpRf.aic), min(GLOB_no.dupl_ImpRf.aic), col = "blue", pch = 20, cex = 2) #true K =6 #perform snapclust - pop.ini "ward" with k=6 GLOB.aic.clust_ImpRf <- snapclust(GLOB_no.dupl_ImpRf, k = 6, pop.ini = "ward", max.iter = 10000) #Large dataset syndrome: #for 198 individuals, differences in log-likelihoods exceed computer precision; group membership probabilities are approximated (only trust clear-cut values) GLOB.aic.clust_ImpRf$converged #TRUE str(GLOB.aic.clust_ImpRf) # List of 6 $ group : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ... ..- attr(*, "names")= chr [1:199] "1" "2" "3" "4" ... $ ll : num -118131 $ proba : num [1:199, 1:6] 1 1 1 1 1 1 1 1 1 1 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:199] "1" "2" "3" "4" ... .. ..$ : chr [1:6] "1" "2" "3" "4" ... $ converged: logi TRUE $ n.iter : int 1 #converged already after the first iteration $ n.param : int 24414 - attr(*, "class")= chr [1:2] "snapclust" "list" # #rearrenge order of pops - for this to work, the pops in the (hierfstat) input file need to be in the exactly same order pop.order <- pop(GLOB_no.dupl_ImpRf) pop.order <- factor(pop.order, levels = c("CAS","POR","LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA")) pop.order <- sort(pop.order) table(pop.order, GLOB.aic.clust_ImpRf$group) table.value(table(pop.order, GLOB.aic.clust_ImpRf$group), col.lab=paste("cluster", 1:6),row.lab=paste(c("CAS","POR","LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA")),csize=0.9,clegend=0) #Results with "ward" appear to be stable as the same output (clustering of inds) is obtained with multiple runs. Results with pop.ini="kmeans" and NULL do not appear stable as pretty much every run gives a different output. On the ?find.clusters help page it says that "ward" method seems to be more reliable (than "kmeans") on some simulated datasets. #structure like graph snapclust GLOB.aic.clust_ImpRf_proba <- GLOB.aic.clust_ImpRf$proba GLOB.aic.clust_ImpRf_proba # 1 2 3 4 5 6 1 1.000000e+00 6.065818e-72 3.433115e-69 2.376166e-83 1.853101e-100 1.789315e-126 2 1.000000e+00 6.545162e-72 1.740057e-69 3.295383e-83 1.412046e-100 1.603481e-126 3 1.000000e+00 1.954306e-70 2.877192e-67 3.999040e-82 9.470463e-99 3.356401e-125 4 1.000000e+00 1.187720e-71 6.034102e-69 1.821480e-82 8.126225e-100 4.627476e-126 5 1.000000e+00 2.423912e-70 6.755820e-68 1.171375e-81 1.212161e-98 3.415909e-125 6 1.000000e+00 9.140489e-82 2.966395e-78 4.261606e-93 1.894929e-112 1.343659e-134 7 1.000000e+00 1.241066e-87 5.497469e-84 1.254563e-99 2.802559e-116 5.447749e-139 8 1.000000e+00 3.521812e-83 2.269574e-79 5.131871e-95 2.599717e-113 1.163586e-135 9 1.000000e+00 1.836985e-80 1.173849e-77 1.863050e-92 5.144585e-110 1.328234e-133 10 1.000000e+00 1.086966e-84 2.965297e-81 5.044461e-97 7.459610e-114 7.533194e-137 11 1.000000e+00 2.357390e-82 8.958551e-79 5.410225e-94 3.103313e-111 9.335942e-135 12 1.000000e+00 3.460702e-83 1.279394e-79 6.642527e-95 3.690441e-113 1.167702e-135 13 1.000000e+00 6.516857e-85 2.265215e-81 1.455671e-96 1.168268e-113 8.712981e-137 14 1.000000e+00 3.000870e-81 1.142212e-77 5.246051e-93 3.527816e-111 4.175251e-134 15 1.000000e+00 1.990277e-81 9.967942e-78 1.141123e-92 8.441540e-111 5.205599e-134 16 1.000000e+00 9.124730e-82 1.055813e-77 1.494476e-93 7.191798e-112 1.833339e-134 17 1.000000e+00 1.910713e-79 3.713130e-77 3.277450e-92 1.136049e-109 3.504208e-133 18 1.000000e+00 5.152861e-82 5.169267e-79 1.300365e-93 1.206189e-111 9.647343e-135 19 1.000000e+00 3.278895e-81 7.256571e-78 2.083894e-93 4.225859e-112 2.110892e-134 20 1.000000e+00 9.364703e-87 3.557016e-83 1.529299e-99 1.373314e-117 6.748147e-139 21 1.000000e+00 2.025870e-84 8.985185e-82 3.259984e-96 1.441359e-113 1.113375e-136 22 1.000000e+00 7.560005e-83 1.368924e-78 1.993409e-93 3.258380e-112 6.694468e-135 23 1.000000e+00 5.330064e-83 9.432670e-80 3.148329e-95 8.958704e-113 1.231841e-135 24 1.000000e+00 2.057136e-84 7.232586e-81 6.893686e-97 7.977734e-113 1.749016e-136 25 1.000000e+00 1.980278e-83 2.323442e-80 2.525728e-94 1.994501e-112 1.359039e-135 26 1.000000e+00 2.301982e-81 6.000673e-78 3.445594e-93 2.817715e-111 3.059910e-134 27 1.000000e+00 9.015995e-82 9.718016e-79 9.225839e-94 1.333770e-111 1.166143e-134 28 6.914458e-80 1.000000e+00 8.108641e-17 7.387197e-36 5.340970e-75 5.360155e-103 29 1.566449e-84 1.000000e+00 8.466911e-20 7.277789e-37 8.676861e-79 1.757182e-105 30 2.676511e-79 1.000000e+00 8.231287e-19 7.411766e-38 5.715501e-76 7.148562e-104 31 3.071614e-78 1.000000e+00 1.046648e-18 2.144185e-38 3.740938e-76 8.759880e-104 32 3.758114e-82 1.000000e+00 2.673079e-19 2.011624e-39 2.145373e-77 3.869211e-105 33 7.290167e-76 1.000000e+00 5.180023e-20 1.157231e-37 3.598521e-76 1.993144e-103 34 1.034618e-85 1.000000e+00 9.755647e-22 1.347835e-39 9.753206e-80 7.669733e-107 35 4.741979e-81 1.000000e+00 1.720852e-18 7.079155e-35 5.398385e-77 9.099255e-104 36 1.003445e-79 1.000000e+00 3.783957e-18 5.645021e-35 9.249237e-79 8.311098e-104 37 4.204323e-82 1.000000e+00 5.793277e-19 1.432919e-38 1.374038e-77 6.257261e-105 38 9.610259e-88 1.000000e+00 1.122653e-21 1.200646e-42 1.850448e-83 1.368203e-108 39 3.127065e-74 1.000000e+00 4.913986e-17 2.005044e-35 1.088659e-73 1.463300e-101 40 6.533850e-78 1.000000e+00 3.048006e-17 2.305039e-37 6.811397e-78 1.442899e-103 41 1.321872e-90 1.000000e+00 4.861343e-23 1.029010e-44 2.007723e-83 7.670610e-110 42 4.911989e-78 1.000000e+00 4.849294e-16 2.756552e-35 1.023453e-75 1.681474e-102 43 1.430337e-79 1.000000e+00 4.117561e-18 1.075993e-38 2.821235e-78 2.044945e-104 44 5.863267e-78 1.000000e+00 5.936482e-18 1.162229e-35 1.621006e-76 4.203056e-103 45 7.349943e-82 1.000000e+00 1.462387e-21 1.152121e-39 4.060594e-78 1.001159e-105 46 1.647449e-82 1.000000e+00 4.819622e-19 4.657773e-37 2.916593e-76 1.848383e-104 47 2.021681e-82 1.000000e+00 3.425478e-22 1.147453e-41 5.753118e-78 2.467349e-106 48 1.127401e-81 1.000000e+00 1.103092e-17 1.263268e-36 4.887430e-76 6.874947e-104 49 3.273479e-80 1.000000e+00 5.983331e-18 1.337906e-38 7.038143e-77 3.261245e-104 50 5.153443e-83 1.000000e+00 1.661794e-23 2.005486e-41 3.248213e-79 6.449469e-107 51 1.460570e-71 1.000000e+00 1.635482e-15 2.954407e-34 1.189754e-71 4.416063e-100 52 8.410834e-74 8.013279e-14 1.000000e+00 1.808430e-23 6.221223e-69 1.722588e-97 53 4.234698e-73 3.047983e-13 1.000000e+00 5.956661e-26 1.695767e-70 4.822330e-98 54 7.461465e-76 1.324464e-15 1.000000e+00 1.242787e-25 1.626788e-69 8.324046e-99 55 4.011552e-79 9.904893e-18 1.000000e+00 7.279729e-27 4.968716e-73 7.793283e-101 56 6.077643e-76 1.351019e-14 1.000000e+00 2.000944e-28 7.879381e-72 1.209597e-99 57 1.727213e-73 2.268443e-15 1.000000e+00 9.120716e-24 7.967961e-70 5.637754e-98 58 3.714267e-81 6.282948e-16 1.000000e+00 6.042759e-29 1.405494e-73 2.087547e-101 59 7.826084e-78 1.081026e-14 1.000000e+00 3.938346e-27 5.183313e-73 5.101249e-100 60 3.542931e-83 6.747514e-18 1.000000e+00 1.596282e-30 1.074009e-73 1.523144e-102 61 1.353599e-74 2.360936e-16 1.000000e+00 1.078797e-27 1.337722e-72 9.839932e-100 62 1.255644e-82 1.862536e-17 1.000000e+00 1.331868e-28 8.389771e-76 2.206240e-102 63 1.863105e-80 5.118437e-17 1.000000e+00 3.495096e-31 4.475475e-75 3.125725e-102 64 3.483773e-81 2.151560e-16 1.000000e+00 3.099597e-28 1.224398e-73 2.243997e-101 65 2.953482e-76 1.066747e-15 1.000000e+00 3.677521e-28 1.799662e-72 5.297178e-100 66 4.136886e-74 2.768758e-20 1.000000e+00 1.950408e-28 6.241233e-72 1.945750e-100 67 1.621458e-77 2.173658e-17 1.000000e+00 1.144547e-27 1.176593e-71 2.485922e-100 68 2.430662e-71 1.174445e-12 1.000000e+00 1.464575e-24 2.805509e-67 1.185966e-96 69 8.119017e-77 1.509555e-15 1.000000e+00 2.594469e-27 3.459983e-71 1.170877e-99 70 5.745225e-78 1.226554e-15 1.000000e+00 3.957357e-25 3.953359e-71 1.856475e-99 71 1.475278e-75 1.354928e-13 1.000000e+00 2.573868e-25 5.994868e-68 5.728584e-98 72 4.828453e-76 7.112568e-16 1.000000e+00 1.251043e-25 4.430946e-70 5.201598e-99 73 5.878432e-78 6.119664e-13 1.000000e+00 1.132505e-23 1.589913e-69 2.645433e-98 74 1.234910e-75 1.153018e-14 1.000000e+00 1.283912e-25 2.940119e-71 6.401628e-99 75 3.858747e-79 3.266915e-17 1.000000e+00 2.098611e-27 5.806260e-71 1.983804e-100 76 3.503633e-84 1.824588e-24 1.000000e+00 1.966466e-26 6.494828e-75 1.748250e-103 77 7.701990e-88 2.951205e-22 1.000000e+00 8.035262e-29 5.835549e-75 2.922489e-104 78 8.236649e-84 5.956580e-22 1.000000e+00 3.401661e-26 3.174690e-76 4.027580e-103 79 1.047419e-83 3.408333e-24 1.000000e+00 2.838581e-26 1.539344e-74 3.153813e-103 80 1.315173e-83 3.168667e-21 1.000000e+00 2.970752e-28 2.422320e-75 3.594245e-103 81 1.369387e-77 5.024075e-20 1.000000e+00 5.154193e-25 2.137955e-73 1.086837e-100 82 6.342006e-82 1.310799e-22 1.000000e+00 2.062837e-26 1.334494e-75 8.553129e-103 83 5.237441e-81 4.291709e-22 1.000000e+00 1.374850e-26 9.906522e-71 1.436817e-101 84 2.267606e-78 2.835107e-22 1.000000e+00 5.501823e-23 1.324464e-72 9.870812e-101 85 6.444890e-79 2.314346e-22 1.000000e+00 4.839940e-28 7.603799e-72 1.018871e-101 86 3.526401e-82 1.123614e-22 1.000000e+00 7.230961e-24 1.412898e-73 6.048773e-102 87 2.790292e-84 4.144266e-20 1.000000e+00 2.790168e-28 3.493341e-76 2.955356e-103 88 1.876130e-83 1.129578e-23 1.000000e+00 1.887713e-26 2.580823e-76 1.832275e-103 89 7.152120e-81 6.551717e-21 1.000000e+00 1.575503e-27 7.977974e-75 2.595592e-102 90 7.891807e-84 4.838896e-21 1.000000e+00 3.963694e-26 1.124774e-76 5.086672e-103 91 1.823294e-77 4.991508e-18 1.000000e+00 5.588220e-25 5.617117e-71 8.941105e-100 92 9.809323e-85 5.582828e-24 1.000000e+00 1.049728e-29 1.560459e-74 4.474873e-104 93 2.455645e-81 4.607219e-22 1.000000e+00 2.108329e-25 8.899839e-73 8.425607e-102 94 1.870025e-82 4.327538e-22 1.000000e+00 1.920316e-25 2.276090e-73 3.714892e-102 95 3.225822e-89 7.436069e-22 1.000000e+00 9.267042e-31 8.697673e-77 3.292166e-105 96 6.257450e-78 2.701962e-20 1.000000e+00 2.643001e-25 2.133674e-72 1.137828e-100 97 1.538641e-81 1.626041e-22 1.000000e+00 5.456812e-29 6.745284e-74 7.129478e-103 98 2.909864e-77 1.533434e-19 1.000000e+00 6.696630e-25 2.883159e-72 2.800713e-100 99 6.110582e-88 1.909048e-22 1.000000e+00 2.461439e-28 2.640238e-75 2.729999e-104 100 3.182786e-78 2.163158e-22 1.000000e+00 3.872940e-19 3.312850e-74 2.814775e-100 101 1.850086e-84 2.004581e-24 1.000000e+00 1.346004e-24 1.862334e-77 1.132087e-103 102 2.673358e-80 1.503958e-22 1.000000e+00 9.262451e-19 8.279493e-75 9.078637e-101 103 4.514593e-79 5.884477e-23 1.000000e+00 3.173321e-21 1.867670e-74 5.007768e-101 104 1.209000e-84 6.669339e-24 1.000000e+00 3.237186e-18 2.115681e-76 4.061080e-102 105 2.036471e-81 1.493368e-22 1.000000e+00 7.776093e-22 3.124784e-76 6.822468e-102 106 8.054510e-83 2.467054e-20 1.000000e+00 1.048655e-20 4.059173e-73 7.009284e-101 107 2.460033e-78 1.938318e-21 1.000000e+00 2.177791e-18 3.235248e-73 9.235791e-100 108 7.017784e-81 5.494562e-21 1.000000e+00 2.510675e-22 4.076177e-75 2.395805e-101 109 2.989297e-82 6.014903e-19 1.000000e+00 6.880140e-19 7.866991e-74 2.869835e-100 110 4.070471e-79 4.439099e-21 1.000000e+00 9.662353e-22 1.244987e-70 5.341381e-100 111 1.468489e-78 1.521696e-21 1.000000e+00 4.993543e-23 5.053205e-72 1.623495e-100 112 1.017388e-79 2.209554e-23 1.000000e+00 4.477878e-22 1.247473e-74 1.905439e-101 113 1.724212e-79 1.743127e-23 1.000000e+00 1.430076e-20 1.668658e-70 2.699886e-100 114 6.261794e-80 1.738634e-23 1.000000e+00 3.335336e-21 7.143700e-75 2.202978e-101 115 4.983268e-82 1.546780e-23 1.000000e+00 6.369751e-26 6.358289e-76 5.742450e-103 116 1.045543e-82 8.828446e-24 1.000000e+00 3.321941e-25 9.522204e-76 5.666139e-103 117 1.264654e-75 2.021053e-20 1.000000e+00 1.412134e-20 1.156028e-70 6.085481e-99 118 7.996554e-79 1.343998e-25 1.000000e+00 1.537831e-24 3.472056e-75 2.582114e-102 119 7.064445e-80 2.910520e-22 1.000000e+00 1.993742e-21 8.222280e-75 3.679240e-101 120 9.081761e-74 2.020923e-21 1.000000e+00 1.608513e-18 1.008963e-72 9.016128e-99 121 3.175092e-71 9.159737e-20 1.000000e+00 1.560316e-19 1.048117e-70 9.900760e-98 122 2.448189e-81 1.230233e-21 1.000000e+00 4.052748e-21 6.915093e-75 2.789280e-101 123 1.135818e-79 6.796695e-21 1.000000e+00 1.091234e-22 5.690122e-74 6.256701e-101 124 4.323077e-72 3.131069e-22 3.308136e-09 1.000000e+00 7.383726e-69 5.808782e-96 125 8.433250e-79 3.808254e-25 5.496058e-10 1.000000e+00 3.148915e-72 1.021416e-98 126 3.835377e-75 5.527166e-25 1.138527e-11 1.000000e+00 8.209712e-72 3.308358e-98 127 5.921772e-76 2.403241e-23 1.324866e-08 1.000000e+00 5.894539e-71 2.947069e-97 128 5.443831e-80 7.046845e-26 9.135218e-11 1.000000e+00 2.789415e-74 1.143516e-99 129 1.060889e-77 1.057472e-23 9.458142e-10 1.000000e+00 1.847019e-72 3.301105e-98 130 5.577210e-74 6.712442e-22 7.010807e-08 9.999999e-01 1.254353e-69 3.661957e-96 131 6.549148e-78 1.050723e-22 1.046806e-09 1.000000e+00 2.060306e-71 7.843508e-98 132 1.139606e-78 1.394767e-24 9.313841e-10 1.000000e+00 2.431296e-73 9.363985e-99 133 1.967440e-78 3.297072e-25 4.663641e-10 1.000000e+00 5.700995e-72 1.281024e-98 134 1.207857e-81 2.029000e-32 4.487737e-21 1.000000e+00 4.766924e-79 2.535881e-104 135 4.572067e-85 1.041241e-34 2.296446e-21 1.000000e+00 3.683160e-80 9.576443e-106 136 5.364389e-85 3.106554e-34 1.198182e-21 1.000000e+00 2.110550e-81 6.097611e-106 137 4.475268e-83 4.403792e-35 6.773826e-23 1.000000e+00 4.156879e-80 1.021094e-105 138 2.485836e-81 1.108844e-32 1.785923e-21 1.000000e+00 6.069363e-79 2.266136e-104 139 1.638280e-82 5.760604e-33 3.680652e-21 1.000000e+00 2.456364e-80 7.021516e-105 140 3.702186e-83 9.154801e-34 8.936294e-23 1.000000e+00 2.017488e-80 1.649800e-105 141 5.488751e-80 7.306526e-32 4.054163e-20 1.000000e+00 1.199146e-78 1.312828e-103 142 7.188336e-79 3.524618e-32 3.355622e-20 1.000000e+00 2.025979e-78 2.029765e-103 143 6.272461e-82 1.152400e-32 7.931225e-22 1.000000e+00 4.789380e-79 1.405880e-104 144 1.562707e-81 2.827323e-34 3.480004e-21 1.000000e+00 1.147642e-79 8.119774e-105 145 2.094553e-85 3.586598e-32 9.887683e-21 1.000000e+00 7.596062e-80 4.078566e-105 146 2.871699e-82 9.256304e-32 2.753047e-20 1.000000e+00 3.707941e-79 3.523066e-104 147 2.849613e-84 1.253879e-33 4.579908e-22 1.000000e+00 1.408199e-78 3.409798e-105 148 6.226761e-87 8.905915e-35 8.476935e-23 1.000000e+00 1.802481e-81 1.111287e-106 149 7.161366e-84 2.892426e-33 1.243236e-21 1.000000e+00 1.759109e-81 1.553950e-105 150 5.485628e-80 1.528233e-31 1.168634e-20 1.000000e+00 1.070823e-77 1.838140e-103 151 9.499798e-84 2.695654e-32 4.421353e-21 1.000000e+00 9.814884e-80 7.402526e-105 152 2.632846e-80 8.564533e-33 7.609098e-21 1.000000e+00 4.471462e-77 1.089428e-103 153 1.242660e-81 1.442200e-33 1.391014e-21 1.000000e+00 2.000811e-79 9.995378e-105 154 1.444331e-81 3.684887e-33 7.530253e-21 1.000000e+00 8.384743e-79 2.320033e-104 155 2.038963e-80 3.121547e-32 3.809901e-21 1.000000e+00 1.997570e-77 9.936996e-104 156 8.238245e-85 1.907813e-34 9.244278e-23 1.000000e+00 4.011350e-81 4.105092e-106 157 7.167925e-80 1.663715e-31 1.192757e-20 1.000000e+00 6.661933e-79 1.136426e-103 158 2.148840e-89 2.959025e-37 7.604323e-26 1.000000e+00 2.026240e-83 1.144029e-108 159 1.707557e-89 3.062997e-36 6.939696e-25 1.000000e+00 5.150220e-83 3.270045e-108 160 1.216210e-88 4.201505e-38 3.351342e-26 1.000000e+00 1.326387e-83 8.540369e-109 161 5.383370e-88 1.785872e-37 2.940356e-26 1.000000e+00 5.854379e-83 2.013600e-108 162 3.628242e-92 2.812037e-39 5.675694e-27 1.000000e+00 4.995775e-85 3.568486e-110 163 1.487446e-90 4.723286e-38 1.959963e-26 1.000000e+00 4.751700e-84 2.650773e-109 164 3.589297e-90 5.018578e-38 3.455688e-26 1.000000e+00 6.842310e-84 3.855508e-109 165 4.320828e-87 7.523074e-36 4.987811e-25 1.000000e+00 3.493473e-81 2.575385e-107 166 4.856629e-89 2.738163e-38 9.618720e-27 1.000000e+00 1.334013e-83 5.088977e-109 167 4.559939e-91 1.778290e-37 7.521192e-26 1.000000e+00 3.178534e-83 5.220489e-109 168 6.481705e-89 5.409443e-38 2.377238e-25 1.000000e+00 9.297141e-83 1.730091e-108 169 4.479073e-89 2.634179e-37 1.725710e-25 1.000000e+00 1.483889e-83 1.433035e-108 170 5.547995e-89 1.397608e-36 1.366182e-25 1.000000e+00 5.403287e-83 2.580796e-108 171 6.090897e-88 2.242733e-36 3.130759e-25 1.000000e+00 5.123088e-82 8.478991e-108 172 1.953567e-89 1.005280e-36 1.249253e-25 1.000000e+00 1.822904e-83 1.549966e-108 173 1.781338e-87 1.354715e-36 2.594462e-25 1.000000e+00 3.935522e-82 8.680504e-108 174 2.878386e-88 5.446556e-37 1.138588e-25 1.000000e+00 8.053028e-83 3.102669e-108 175 2.365638e-89 3.349896e-37 9.816871e-27 1.000000e+00 6.314132e-84 6.287375e-109 176 1.179898e-89 3.336729e-37 3.269849e-26 1.000000e+00 2.935912e-83 9.455911e-109 177 1.837130e-88 7.419655e-37 1.517023e-25 1.000000e+00 2.471268e-82 3.998557e-108 178 2.266238e-89 8.294768e-37 2.897733e-26 1.000000e+00 1.048843e-82 1.627766e-108 179 1.857271e-89 3.359302e-38 4.073561e-26 1.000000e+00 1.255188e-83 5.767367e-109 180 1.881937e-137 4.952390e-102 2.201977e-93 2.039415e-111 1.000000e+00 5.306540e-73 181 1.355399e-139 1.814128e-104 9.618833e-98 2.881401e-114 1.000000e+00 4.548435e-74 182 2.780559e-141 1.713482e-107 3.994177e-99 1.648106e-117 1.000000e+00 7.366949e-75 183 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 NaN 0.000000e+00 184 4.269139e-142 8.124696e-108 1.196187e-99 2.382767e-116 1.000000e+00 3.061491e-79 185 6.466533e-134 3.462028e-95 1.525280e-89 4.852760e-108 1.000000e+00 2.206011e-69 186 1.923787e-146 7.163697e-110 2.770413e-104 1.064370e-123 1.000000e+00 6.236340e-87 187 1.827588e-145 1.761877e-116 2.205699e-111 5.679293e-129 4.618438e-63 1.000000e+00 188 5.945245e-141 4.034423e-110 2.405890e-103 6.118952e-119 6.252216e-65 1.000000e+00 189 2.490014e-147 2.500085e-118 1.678729e-112 6.809163e-127 1.674553e-71 1.000000e+00 190 3.959825e-147 3.575788e-118 1.084761e-112 3.193075e-128 3.929923e-69 1.000000e+00 191 5.564385e-148 4.034489e-119 2.314423e-112 3.392154e-129 8.419735e-72 1.000000e+00 192 8.281076e-145 3.815017e-115 6.811067e-109 2.805781e-125 2.670471e-67 1.000000e+00 193 2.005078e-143 1.073518e-112 6.543905e-108 5.427841e-124 4.249155e-65 1.000000e+00 194 4.183143e-146 1.445402e-116 6.945637e-111 1.083186e-127 5.888829e-68 1.000000e+00 195 9.602331e-148 1.695140e-118 2.418789e-112 1.304289e-129 7.631767e-71 1.000000e+00 196 6.214239e-145 3.126148e-115 1.021532e-109 3.320549e-125 4.369055e-67 1.000000e+00 197 1.019242e-145 6.061926e-117 9.174507e-110 7.011407e-127 1.410284e-67 1.000000e+00 198 1.919506e-144 1.327858e-114 1.246078e-108 1.294780e-124 6.080979e-67 1.000000e+00 199 1.405670e-144 8.527670e-114 1.865022e-108 5.017970e-125 3.437904e-68 1.000000e+00 # #replace the NaN value GLOB.aic.clust_ImpRf_proba [183,5] <- 1 GLOB.aic.clust_ImpRf_proba # 179 1.857271e-89 3.359302e-38 4.073561e-26 1.000000e+00 1.255188e-83 5.767367e-109 180 1.881937e-137 4.952390e-102 2.201977e-93 2.039415e-111 1.000000e+00 5.306540e-73 181 1.355399e-139 1.814128e-104 9.618833e-98 2.881401e-114 1.000000e+00 4.548435e-74 182 2.780559e-141 1.713482e-107 3.994177e-99 1.648106e-117 1.000000e+00 7.366949e-75 183 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 184 4.269139e-142 8.124696e-108 1.196187e-99 2.382767e-116 1.000000e+00 3.061491e-79 185 6.466533e-134 3.462028e-95 1.525280e-89 4.852760e-108 1.000000e+00 2.206011e-69 186 1.923787e-146 7.163697e-110 2.770413e-104 1.064370e-123 1.000000e+00 6.236340e-87 187 1.827588e-145 1.761877e-116 2.205699e-111 5.679293e-129 4.618438e-63 1.000000e+00 # #Structure like plot - tiff tiff("Figure 3A.tif", res=600, compression = "lzw", height=17, width=32, units="cm") myCol <- c( "red","sienna4","lawngreen", "deepskyblue", "steelblue", "grey20") compoplot(GLOB.aic.clust_ImpRf_proba,space=0, lab="", col=myCol,cleg=1,legend=F,font.lab=2,xlab="population of origin of each sampled individual",cex.axis=1) axis(1, at = c(0,5,27,51,75,99,123,133,157,179,186,199), labels = F, lwd=1, lwd.ticks=1) axis(1, at = c(2.5, 16, 39, 63, 87, 111, 128, 145, 168, 182.5, 192.5), labels = c("CAS", "POR", "LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA"), las=1, tick=F,cex.axis=1) dev.off() #genepop - basic info genepopBasic <- basic_info(inputFile= "ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf_genepop.gen", outputFile = "BasicInfo", verbose = interactive()) #genepop - Fis estimation genepopFis <- genedivFis(inputFile= "ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf_genepop.gen", sizes = FALSE, outputFile = "Fis", dataType = "Diploid", verbose = interactive()) #genepop - the exact HW test genepopHW <- test_HW(inputFile= "ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf_genepop.gen", which = "Proba", outputFile = "HWout_Proba_EnumT", settingsFile = "", enumeration = T, verbose = interactive(),dememorization = 100000, batches = 5000, iterations = 10000) #basic stats - Ho, Hs, Fis for each of the 11 populations denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.hier.dat <- read.fstat("ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf_hierfstatHier.dat") denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.basic <- basic.stats(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.hier.dat,diploid=TRUE,digits=4) #Ho (observed heterozygosity) for each of the 11 populations (averaging over Ho obtained for each locus) Ho <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.basic$Ho HoMeans <- colMeans (Ho,na.rm=T) #obtain SE Ho_dat <- data.frame(Ho) std <- function(x) sd(x, na.rm = TRUE)/sqrt(length(x)) apply(Ho_dat,2, std) HoSes <- apply(Ho_dat,2, std) #Hs (observed heterozygosity) for each of the 11 populations (averaging over Hs obtained for each locus) Hs <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.basic$Hs HsMeans <- colMeans (Hs,na.rm=T) #obtain SE Hs_dat <- data.frame(Hs) std <- function(x) sd(x, na.rm = TRUE)/sqrt(length(x)) apply(Hs_dat,2, std) HsSes <- apply(Hs_dat,2, std) #Comparing Hs between populations: is Hs significantly higher or lower in one pop thain in another. Use the Welch Two Sample t-test (t.test) and (only for comparison) the non-parametric Wilcoxon test. But for the paper we will use the Welch Two Sample t-test. Although the data breaks the assumptions of equal variances and normality (too many zeros, i.e. monomorphic loci), this test should be sufficiently robust if sample sizes are large. From Wikipedia: #Welch's t-test is more robust than Student's t-test and maintains type I error rates close to nominal for unequal variances and for unequal sample sizes under normality. Furthermore, the power of Welch's t-test comes close to that of Student's t-test, even when the population variances are equal and sample sizes are balanced.[2] Welch's t-test can be generalized to more than 2-samples,[5] which is more robust than one-way analysis of variance (ANOVA). #It is not recommended to pre-test for equal variances and then choose between Student's t-test or Welch's t-test.[6] Rather, Welch's t-test can be applied directly and# without any substantial disadvantages to Student's t-test as noted above. Welch's t-test remains robust for skewed distributions and large sample sizes.[7] Reliability decreases for skewed distributions and smaller samples, where one could possibly perform Welch's t-test on ranked data.[4] HsPop1 <- Hs[,1] HsPop2 <- Hs[,2] HsPop3 <- Hs[,3] HsPop4 <- Hs[,4] HsPop5 <- Hs[,5] HsPop6 <- Hs[,6] HsPop7 <- Hs[,7] HsPop8 <- Hs[,8] HsPop9 <- Hs[,9] HsPop10 <- Hs[,10] HsPop11 <- Hs[,11] #test values for normality shapiro.test(HsPop1) #Shapiro-Wilk normality test #data: HsPop1 #W = 0.31799, p-value < 2.2e-16 shapiro.test(HsPop2) #Shapiro-Wilk normality test #data: HsPop2 #W = 0.37225, p-value < 2.2e-16 shapiro.test(HsPop3) #Shapiro-Wilk normality test #data: HsPop3 #W = 0.51642, p-value < 2.2e-16 shapiro.test(HsPop4) #Shapiro-Wilk normality test #data: HsPop4 #W = 0.55172, p-value < 2.2e-16 shapiro.test(HsPop5) #Shapiro-Wilk normality test #data: HsPop5 #W = 0.52416, p-value < 2.2e-16 shapiro.test(HsPop6) #Shapiro-Wilk normality test #data: HsPop6 #W = 0.49409, p-value < 2.2e-16 shapiro.test(HsPop7) #Shapiro-Wilk normality test #data: HsPop7 #W = 0.42965, p-value < 2.2e-16 shapiro.test(HsPop8) #Shapiro-Wilk normality test #data: HsPop8 #W = 0.31407, p-value < 2.2e-16 shapiro.test(HsPop9) #Shapiro-Wilk normality test #data: HsPop9 #W = 0.18824, p-value < 2.2e-16 shapiro.test(HsPop10) #Shapiro-Wilk normality test #data: HsPop10 #W = 0.50207, p-value < 2.2e-16 shapiro.test(HsPop11) #Shapiro-Wilk normality test #data: HsPop10 #W = 0.50121, p-value < 2.2e-16 #data are non-normal and in many comparisons the variances are not homogeneous, so first do independent t-test (parametric comparison of means) but also do non-parametric test (Wilcoxon) as the data fail assumtions for a parametric test t.test(HsPop1, HsPop2, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop2 #t = -0.80342, df = 8036.4, p-value = 0.4218 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.006976268 0.002920188 #sample estimates: #mean of x mean of y #0.03418241 0.03621045 wilcox.test(HsPop1, HsPop2, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop2 #W = 7851400, p-value = 2.3e-08 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop3, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop3 #t = -6.3956, df = 8074.5, p-value = 1.688e-10 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.02139933 -0.01135888 #sample estimates: #mean of x mean of y #0.03418241 0.05056151 wilcox.test(HsPop1, HsPop3, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop3 #W = 6950000, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop4, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop4 #t = -7.8289, df = 8071.4, p-value = 5.548e-15 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.02502092 -0.01500011 #sample estimates: #mean of x mean of y #0.03418241 0.05419292 wilcox.test(HsPop1, HsPop4, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop4 #W = 6553900, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop5, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop5 #t = -7.0624, df = 8084.6, p-value = 1.772e-12 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.02329900 -0.01317512 #sample estimates: #mean of x mean of y #0.03418241 0.05241946 wilcox.test(HsPop1, HsPop5, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop5 #W = 6751000, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop6, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop6 #t = -6.2221, df = 8090.8, p-value = 5.149e-10 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.02137130 -0.01113142 #sample estimates: #mean of x mean of y #0.03418241 0.05043377 wilcox.test(HsPop1, HsPop6, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop6 #W = 7077500, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop7, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop7 #t = -4.0316, df = 8090.9, p-value = 5.59e-05 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.015718256 -0.005433719 #sample estimates: #mean of x mean of y #0.03418241 0.04475839 wilcox.test(HsPop1, HsPop7, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop7 #W = 7513400, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop8, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop8 #t = 2.9055, df = 7755.4, p-value = 0.003677 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.002250973 0.011587222 #sample estimates: #mean of x mean of y #0.03418241 0.02726331 wilcox.test(HsPop1, HsPop8, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop8 #W = 8053800, p-value = 0.01343 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop9, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop9 #t = 9.1873, df = 6773.8, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.01575337 0.02429956 #sample estimates: #mean of x mean of y #0.03418241 0.01415594 wilcox.test(HsPop1, HsPop9, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop9 #W = 8513100, p-value = 6.381e-14 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop10 #t = -10.206, df = 7753.2, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.03558485 -0.02411782 #sample estimates: #mean of x mean of y #0.03418241 0.06403374 wilcox.test(HsPop1, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop10 #W = 7295200, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop1, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop1 and HsPop11 #t = -8.978, df = 7913.7, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.03079103 -0.01975478 #sample estimates: #mean of x mean of y #0.03418241 0.05945531 wilcox.test(HsPop1, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop1 and HsPop11 #W = 7170000, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop3, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop3 #t = -5.8051, df = 8119, p-value = 6.676e-09 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.01919714 -0.00950500 #sample estimates: #mean of x mean of y #0.03621045 0.05056151 wilcox.test(HsPop2, HsPop3, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop3 #W = 7332700, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop4, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop4 #t = -7.2893, df = 8120.8, p-value = 3.408e-13 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.02281837 -0.01314658 #sample estimates: #mean of x mean of y #0.03621045 0.05419292 wilcox.test(HsPop2, HsPop4, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop4 #W = 6942600, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop5, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop5 #t = -6.4987, df = 8108.6, p-value = 8.581e-11 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.02109829 -0.01131975 #sample estimates: #mean of x mean of y #0.03621045 0.05241946 wilcox.test(HsPop2, HsPop5, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop5 #W = 7140600, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop6, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop6 #t = -5.6334, df = 8087.1, p-value = 1.826e-08 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.019172623 -0.009274021 #sample estimates: #mean of x mean of y #0.03621045 0.05043377 wilcox.test(HsPop2, HsPop6, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop6 #W = 7458400, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop7, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop7 #t = -3.3698, df = 8076.9, p-value = 0.0007556 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.013520341 -0.003575553 #sample estimates: #mean of x mean of y #0.03621045 0.04475839 wilcox.test(HsPop2, HsPop7, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop7 #W = 7877200, p-value = 8.729e-09 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop8, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop8 #t = 3.9146, df = 7979.6, p-value = 9.129e-05 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.004466853 0.013427423 #sample estimates: #mean of x mean of y #0.03621045 0.02726331 wilcox.test(HsPop2, HsPop8, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop8 #W = 8457000, p-value = 0.0007803 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop9, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop9 #t = 10.63, df = 7109.8, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.01798747 0.02612153 #sample estimates: #mean of x mean of y #0.03621045 0.01415594 wilcox.test(HsPop2, HsPop9, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop9 #W = 8928200, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop10 #t = -9.7715, df = 7545, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.03340498 -0.02224161 #sample estimates: #mean of x mean of y #0.03621045 0.06403374 wilcox.test(HsPop2, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop10 #W = 7635700, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop2, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop2 and HsPop11 #t = -8.5009, df = 7756.5, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.02860505 -0.01788467 #sample estimates: #mean of x mean of y #0.03621045 0.05945531 wilcox.test(HsPop2, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop2 and HsPop11 #W = 7523600, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop3, HsPop4, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop3 and HsPop4 #t = -1.4499, df = 8135.9, p-value = 0.1471 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.008540948 0.001278131 #sample estimates: #mean of x mean of y #0.05056151 0.05419292 wilcox.test(HsPop3, HsPop4, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop3 and HsPop4 #W = 7908000, p-value = 9.683e-06 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop3, HsPop5, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop3 and HsPop5 #t = -0.73397, df = 8133.6, p-value = 0.463 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.006820073 0.003104172 #sample estimates: #mean of x mean of y #0.05056151 0.05241946 wilcox.test(HsPop3, HsPop5, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop3 and HsPop5 #W = 8109000, p-value = 0.03948 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop3, HsPop6, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop3 and HsPop6 #t = 0.049871, df = 8122.9, p-value = 0.9602 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.004893536 0.005149028 #sample estimates: #mean of x mean of y #0.05056151 0.05043377 wilcox.test(HsPop3, HsPop6, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop3 and HsPop6 #W = 8406500, p-value = 0.1081 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop3, HsPop7, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop3 and HsPop7 #t = 2.2553, df = 8116.7, p-value = 0.02414 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.000759076 0.010847166 #sample estimates: #mean of x mean of y #0.05056151 0.04475839 wilcox.test(HsPop3, HsPop7, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop3 and HsPop7 #W = 8823800, p-value = 7.156e-13 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop3, HsPop8, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop3 and HsPop8 #t = 10.016, df = 7922.7, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.01873852 0.02785789 #sample estimates: #mean of x mean of y #0.05056151 0.02726331 wilcox.test(HsPop3, HsPop8, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop3 and HsPop8 #W = 9442300, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop3, HsPop9, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop3 and HsPop9 #t = 17.179, df = 7000.6, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.03225123 0.04055991 #sample estimates: #mean of x mean of y #0.05056151 0.01415594 wilcox.test(HsPop3, HsPop9, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop3 and HsPop9 #W = 9934000, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop3, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop3 and HsPop10 #t = -4.6778, df = 7654.8, p-value = 2.949e-06 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.019117824 -0.007826623 #sample estimates: #mean of x mean of y #0.05056151 0.06403374 wilcox.test(HsPop3, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop3 and HsPop10 #W = 8498100, p-value = 0.0003854 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop3, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop3 and HsPop11 #t = -3.2127, df = 7849.1, p-value = 0.00132 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.014320509 -0.003467079 #sample estimates: #mean of x mean of y #0.05056151 0.05945531 wilcox.test(HsPop3, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop3 and HsPop11 #W = 8245400, p-value = 0.4606 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop4, HsPop5, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop4 and HsPop5 #t = 0.702, df = 8132.4, p-value = 0.4827 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.003178728 0.006725644 #sample estimates: #mean of x mean of y #0.05419292 0.05241946 wilcox.test(HsPop4, HsPop5, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop4 and HsPop5 #W = 8485500, p-value = 0.01487 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop4, HsPop6, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop4 and HsPop6 #t = 1.4704, df = 8120.1, p-value = 0.1415 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.001252309 0.008770618 #sample estimates: #mean of x mean of y #0.05419292 0.05043377 wilcox.test(HsPop4, HsPop6, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop4 and HsPop6 #W = 8783100, p-value = 1.073e-09 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop4, HsPop7, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop4 and HsPop7 #t = 3.6736, df = 8113.4, p-value = 0.0002407 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.004400258 0.014468800 #sample estimates: #mean of x mean of y #0.05419292 0.04475839 wilcox.test(HsPop4, HsPop7, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop4 and HsPop7 #W = 9200600, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop4, HsPop8, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop4 and HsPop8 #t = 11.605, df = 7932.6, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.02238074 0.03147848 #sample estimates: #mean of x mean of y #0.05419292 0.02726331 wilcox.test(HsPop4, HsPop8, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop4 and HsPop8 #W = 9850100, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop4, HsPop9, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop4 and HsPop9 #t = 18.946, df = 7017.2, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.03589451 0.04417945 #sample estimates: #mean of x mean of y #0.05419292 0.01415594 wilcox.test(HsPop4, HsPop9, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop4 and HsPop9 #W = 10356000, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop4, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop4 and HsPop10 #t = -3.4222, df = 7641.2, p-value = 0.0006243 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.015477686 -0.004203945 #sample estimates: #mean of x mean of y #0.05419292 0.06403374 wilcox.test(HsPop4, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop4 and HsPop10 #W = 8852400, p-value = 5.062e-15 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop4, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop4 and HsPop11 #t = -1.9041, df = 7838.1, p-value = 0.05693 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.0106800181 0.0001552461 #sample estimates: #mean of x mean of y #0.05419292 0.05945531 wilcox.test(HsPop4, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop4 and HsPop11 #W = 8799800, p-value = 1.019e-12 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop5, HsPop6, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop5 and HsPop6 #t = 0.76881, df = 8131.7, p-value = 0.442 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.003077290 0.007048683 #sample estimates: #mean of x mean of y #0.05241946 0.05043377 wilcox.test(HsPop5, HsPop6, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop5 and HsPop6 #W = 8582400, p-value = 0.0001838 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop5, HsPop7, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop5 and HsPop7 #t = 2.953, df = 8127.8, p-value = 0.003156 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.002575509 0.012746634 #sample estimates: #mean of x mean of y #0.05241946 0.04475839 wilcox.test(HsPop5, HsPop7, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop5 and HsPop7 #W = 9003300, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop5, HsPop8, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop5 and HsPop8 #t = 10.707, df = 7878.8, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.02055058 0.02976173 #sample estimates: #mean of x mean of y #0.05241946 0.02726331 wilcox.test(HsPop5, HsPop8, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop5 and HsPop8 #W = 9640800, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop5, HsPop9, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop5 and HsPop9 #t = 17.839, df = 6931.3, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.03405885 0.04246819 #sample estimates: #mean of x mean of y #0.05241946 0.01415594 wilcox.test(HsPop5, HsPop9, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop5 and HsPop9 #W = 10143000, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop5, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop5 and HsPop10 #t = -4.0064, df = 7710.2, p-value = 6.224e-05 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.017296993 -0.005931553 #sample estimates: #mean of x mean of y #0.05241946 0.06403374 wilcox.test(HsPop5, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop5 and HsPop10 #W = 8678200, p-value = 8.056e-09 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop5, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop5 and HsPop11 #t = -2.5236, df = 7893, p-value = 0.01164 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.012501167 -0.001570521 #sample estimates: #mean of x mean of y #0.05241946 0.05945531 wilcox.test(HsPop5, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop5 and HsPop11 #W = 8613600, p-value = 8.299e-07 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop6, HsPop7, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop6 and HsPop7 #t = 2.163, df = 8135.4, p-value = 0.03057 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.0005320735 0.0108186761 #sample estimates: #mean of x mean of y #0.05043377 0.04475839 wilcox.test(HsPop6, HsPop7, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop6 and HsPop7 #W = 8699600, p-value = 1.765e-08 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop6, HsPop8, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop6 and HsPop8 #t = 9.7275, df = 7814.4, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.01850120 0.02783972 #sample estimates: #mean of x mean of y #0.05043377 0.02726331 wilcox.test(HsPop6, HsPop8, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop6 and HsPop8 #W = 9306800, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop6, HsPop9, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop6 and HsPop9 #t = 16.638, df = 6837.7, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.03200349 0.04055216 #sample estimates: #mean of x mean of y #0.05043377 0.01415594 wilcox.test(HsPop6, HsPop9, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop6 and HsPop9 #W = 9794200, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop6, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop6 and HsPop10 #t = -4.649, df = 7780.7, p-value = 3.39e-06 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.019334412 -0.007865527 #sample estimates: #mean of x mean of y #0.05043377 0.06403374 wilcox.test(HsPop6, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop6 and HsPop10 #W = 8392100, p-value = 0.02758 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop6, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop6 and HsPop11 #t = -3.2043, df = 7946.6, p-value = 0.001359 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.014540626 -0.003502455 #sample estimates: #mean of x mean of y #0.05043377 0.05945531 wilcox.test(HsPop6, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop6 and HsPop11 #W = 8315500, p-value = 0.2209 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop7, HsPop8, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop7 and HsPop8 #t = 7.3066, df = 7788.7, p-value = 3.012e-13 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.01280135 0.02218882 #sample estimates: #mean of x mean of y #0.04475839 0.02726331 wilcox.test(HsPop7, HsPop8, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop7 and HsPop8 #W = 8860100, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop7, HsPop9, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop7 and HsPop9 #t = 13.948, df = 6802.7, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.02630139 0.03490351 #sample estimates: #mean of x mean of y #0.04475839 0.01415594 wilcox.test(HsPop7, HsPop9, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop7 and HsPop9 #W = 9336000, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop7, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop7 and HsPop10 #t = -6.5663, df = 7805.9, p-value = 5.489e-11 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.02502973 -0.01352096 #sample estimates: #mean of x mean of y #0.04475839 0.06403374 wilcox.test(HsPop7, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop7 and HsPop10 #W = 7991200, p-value = 0.001031 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop7, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop7 and HsPop11 #t = -5.2005, df = 7965, p-value = 2.037e-07 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.020236717 -0.009157114 #sample estimates: #mean of x mean of y #0.04475839 0.05945531 wilcox.test(HsPop7, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop7 and HsPop11 #W = 7907100, p-value = 1.553e-05 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop8, HsPop9, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop8 and HsPop9 #t = 6.905, df = 7635.8, p-value = 5.424e-12 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #0.009386278 0.016828451 #sample estimates: #mean of x mean of y #0.02726331 0.01415594 wilcox.test(HsPop8, HsPop9, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop8 and HsPop9 #W = 8761700, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop8, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop8 and HsPop10 #t = -13.511, df = 7027, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.04210541 -0.03143545 #sample estimates: #mean of x mean of y #0.02726331 0.06403374 wilcox.test(HsPop8, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop8 and HsPop10 #W = 7461200, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop8, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop8 and HsPop11 #t = -12.367, df = 7285.5, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.03729477 -0.02708923 #sample estimates: #mean of x mean of y #0.02726331 0.05945531 wilcox.test(HsPop8, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop8 and HsPop11 #W = 7342200, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop9, HsPop10, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop9 and HsPop10 #t = -19.583, df = 6018.1, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.05487084 -0.04488475 #sample estimates: #mean of x mean of y #0.01415594 0.06403374 wilcox.test(HsPop9, HsPop10, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop9 and HsPop10 #W = 6994800, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop9, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop9 and HsPop11 #t = -18.718, df = 6249.6, p-value < 2.2e-16 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.05004348 -0.04055525 #sample estimates: #mean of x mean of y #0.01415594 0.05945531 wilcox.test(HsPop9, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop9 and HsPop11 #W = 6853000, p-value < 2.2e-16 #alternative hypothesis: true location shift is not equal to 0 t.test(HsPop10, HsPop11, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) #Welch Two Sample t-test #data: HsPop10 and HsPop11 #t = 1.4731, df = 8048, p-value = 0.1408 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.00151412 0.01067098 #sample estimates: #mean of x mean of y #0.06403374 0.05945531 wilcox.test(HsPop10, HsPop11, alternative = "two.sided") #Wilcoxon rank sum test with continuity correction #data: HsPop10 and HsPop11 #W = 8072000, p-value = 0.1883 #alternative hypothesis: true location shift is not equal to 0 #FDR correction for the t.test derived p-values p<- c(0.4218, 0.0000000001688, 0.000000000000005548, 0.000000000001772, 0.0000000005149, 0.0000559, 0.003677, 0.00000000000000022, 0.00000000000000022, 0.00000000000000022, 0.000000006676, 0.0000000000003408, 0.00000000008581, 0.00000001826, 0.000000008729, 0.00009129, 0.00000000000000022, 0.00000000000000022, 0.00000000000000022, 0.1471, 0.463, 0.9602, 0.02414, 0.00000000000000022, 0.00000000000000022, 0.000002949, 0.00132, 0.4827, 0.1415, 0.0002407, 0.00000000000000022, 0.00000000000000022, 0.0006243, 0.05693, 0.442, 0.003156, 0.00000000000000022, 0.00000000000000022, 0.00006224, 0.01164, 0.03057, 0.00000000000000022, 0.00000000000000022, 0.00000339, 0.001359, 0.0000000000003012, 0.00000000000000022, 0.00000000005489, 0.0000002037, 0.000000000005424, 0.00000000000000022, 0.00000000000000022, 0.00000000000000022, 0.00000000000000022, 0.1408) p.adjust(p, method = "fdr", n = length(p)) #Comparing Hs between populations: Does Hs follow a latitudinal gradient - use Pearson's correlation test to test for correlation between Hs (y-axis) and latitude (x-axis) #sampling localities: POR GAL LOCQ STGU LANIL GOEL FERM PDC HELG SCOT SVAL Hs <- c(0.034156, 0.036215, 0.050534, 0.054233, 0.052438, 0.050492, 0.044764, 0.027306, 0.014158, 0.064071, 0.05948) Lat <- c(41.621624, 43.403193, 47.491326,47.820255, 48.479023, 48.889614 ,49.726228, 50.851796, 54.19193, 56.307873, 79.333862 ) par(mar = c(7, 5.5, 4, 5.5) + 0.1) plot(Lat,Hs,ylim=c(0,0.08),xlim=c(40,80),xlab="Latitude", ylab="",cex.axis=1.4,cex=1.9,cex.lab=1.3,bty="l",yaxs="i",xaxs="i",font.lab=2) mtext (text=expression(bolditalic("H"[S])), side=2, line=3.5, las=3,cex=1.3,font=2) axis(1,xlim=c(40,80), labels=T,at= c(40,50,60,70,80),col = "transparent",col.ticks = "black",cex.axis=1.4,cex.lab=1.3,font.lab=2) abline(lm(Hs ~ Lat)) cor.test(Lat,Hs,method="pearson") #Pearson's product-moment correlation #data: Lat and Hs #t = 1.0797, df = 9, p-value = 0.3083 #alternative hypothesis: true correlation is not equal to 0 #95 percent confidence interval: #-0.3278231 0.7800572 #sample estimates: #cor #0.338648 cor.test(Lat,Hs,method="spearman") # also try with non-parametric test #Spearman's rank correlation rho #data: Lat and Hs #S = 164, p-value = 0.4512 #alternative hypothesis: true rho is not equal to 0 #sample estimates: #rho #0.2545455 #Comparing Fis between populations: Does Fis follow a latitudinal gradient - use Pearson's correlation test to test for correlation between Fis (y-axis) and latitude (x-axis) #sampling localities: POR GAL LOCQ STGU LANIL GOEL FERM PDC HELG SCOT SVAL Fis <- c(-0.0774, -0.0493, 0.0739, 0.0762, 0.0217, 0.0949, 0.0193, 0.0039, -0.1301, 0.1447, 0.0585) Lat <- c(41.621624, 43.403193, 47.491326,47.820255, 48.479023, 48.889614 ,49.726228, 50.851796, 54.19193, 56.307873, 79.333862 ) par(mar = c(7, 5.5, 4, 5.5) + 0.1) plot(Lat,Fis,ylim=c(-0.140,0.150),xlim=c(40,80),xlab="Latitude", ylab="",cex.axis=1.4,cex=1.9,cex.lab=1.3,bty="l",yaxs="i",xaxs="i",font.lab=2) mtext (text=expression(bolditalic("F"[IS])), side=2, line=3.5, las=3,cex=1.3,font=2) axis(1,xlim=c(40,80), labels=T,at= c(40,50,60,70,80),col = "transparent",col.ticks = "black",cex.axis=1.4,cex.lab=1.3,font.lab=2) abline(lm(Fis ~ Lat)) cor.test(Lat,Fis,method="pearson") #Pearson's product-moment correlation #data: Lat and Fis #t = 0.82808, df = 9, p-value = 0.429 #alternative hypothesis: true correlation is not equal to 0 #95 percent confidence interval: #-0.3971972 0.7467587 #sample estimates: #cor #0.2660752 cor.test(Lat,Fis,method="spearman") # also try with non-parametric test #Spearman's rank correlation rho #data: Lat and Fis #S = 170, p-value = 0.5031 #alternative hypothesis: true rho is not equal to 0 #sample estimates: #rho #0.2272727 #Comparing private allele percentage between populations: Does private allele percentage follow a latitudinal gradient - use Pearson's correlation test to test for correlation between private allele percentage (y-axis) and latitude (x-axis) #sampling localities: POR GAL LOCQ GUEN LANIL GOEL FERM PDC HELG SCOT SVAL PrAll <- read.fstat("denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_hierfstatHier.dat",na.s = "0") rel_freq <- pop.freq(PrAll) #taking the alternative allele frequency for each population Pop1 <- sapply(rel_freq, '[[', 2) Pop2 <- sapply(rel_freq, '[[', 4) AltFreq <- cbind(Pop1,Pop2) Pop3 <- sapply(rel_freq, '[[', 6) AltFreq1 <- cbind(AltFreq,Pop3) Pop4 <- sapply(rel_freq, '[[', 8) AltFreq2 <- cbind(AltFreq1,Pop4) Pop5 <- sapply(rel_freq, '[[', 10) AltFreq3 <- cbind(AltFreq2,Pop5) Pop6 <- sapply(rel_freq, '[[', 12) AltFreq4 <- cbind(AltFreq3,Pop6) Pop7 <- sapply(rel_freq, '[[', 14) AltFreq5 <- cbind(AltFreq4,Pop7) Pop8 <- sapply(rel_freq, '[[', 16) AltFreq6 <- cbind(AltFreq5,Pop8) Pop9 <- sapply(rel_freq, '[[', 18) AltFreq7 <- cbind(AltFreq6,Pop9) Pop10 <- sapply(rel_freq, '[[', 20) AltFreq8 <- cbind(AltFreq7,Pop10) Pop11 <- sapply(rel_freq, '[[', 22) AltFreq9 <- cbind(AltFreq8,Pop11) AltFreq9.df <- as.data.frame(AltFreq9) #check which loci exhibit missing data in which populations Na <- sapply(AltFreq9.df,FUN=function(x) which(x=="NaN")) #check in which populations there are ten zeros and thus a private alternative allele - for some loci (see Na list above) there are nine zeros and NaN, or maybe even eight zeros and two NaN - but of course cannot say whether NaN is a zero or some number thus loci with missing data are excluded from the below ten_zeros data frame AltFreq9.df$check <- apply(AltFreq9.df[,c(1:11)],1,FUN=function(x) length(which(x==0))) AltFreq9.df$check1 <- apply(AltFreq9.df[,c(1:11)],1,FUN=function(x) length(which(x==1))) AltFreq9.df$total <- ifelse (AltFreq9.df$check == 10 ,1,0) AltFreq9.df$total1 <- ifelse (AltFreq9.df$check1 == 10 ,1,0) AltFreq9.df$total_sum <- AltFreq9.df$total + AltFreq9.df$total1 #above manipulations needed because top and bottom line of rel_freq are now a mixture of ref and alt allele freq, instead of alt freq always being on the bottom as was the case before the imputation. Because the 110 is always above 130, 120 is always above 130, 100 is always above 110, etc, but this doesnt necessarily correspond to the ref/alt order. Whereas in the previously prepared hierfstat input 1 was always ref allele (no matter which base) and 2 was always alt allele (no matter which base). Thus as 1 is before 2, ref allele was always above alt allele in the rel_freq file. #only those loci that have zero freq for the alternative allele at ten pops and some other freq at another pop - thus these are the private allele loci ten_zeros <- subset(AltFreq9.df,AltFreq9.df$total_sum==1) nrow(ten_zeros) #2337 #private allele proportion for each pop: #in pop 1 (POR) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop1!=0 & ten_zeros$Pop1!=1)) #180 #before imputation: 194 - the difference of 14 loci is due to these 14 loci being fixed for the alternative allele, and thus having 1 - so these loci are excluded using the above function but they shouldn't be (for details see Excel private allele prop before and after imputation) #in pop 2 (GAL) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop2!=0 & ten_zeros$Pop2!=1)) #285 #before imputation: 292 - the difference of 7 loci is due to these 7 loci being fixed for the alternative allele, and thus having 1 - so these loci are excluded using the above function but they shouldn't be (for details see Excel private allele prop before and after imputation) #in pop 3 (LOCQ) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop3!=0 & ten_zeros$Pop3!=1)) #322 #before imputation: 322 #in pop 4 (STGU) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop4!=0 & ten_zeros$Pop4!=1)) #241 #before imputation: 241 #in pop 5 (LANIL) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop5!=0 & ten_zeros$Pop5!=1)) #122 #before imputation: 122 #in pop 6 (GOEL) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop6!=0 & ten_zeros$Pop6!=1)) #120 #before imputation: 120 #in pop 7 (FERM) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop7!=0 & ten_zeros$Pop7!=1)) #205 #before imputation: 206 - the difference of 1 locus is due to this 1 locus being fixed for the alternative allele, and thus having 1 - so this locus is excluded using the above function but they shouldn't be (for details see Excel private allele prop before and after imputation) #in pop 8 (PDC) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop8!=0 & ten_zeros$Pop8!=1)) #61 #before imputation: 61 #in pop 9 (HELG) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop9!=0 & ten_zeros$Pop9!=1)) #15 #before imputation: 15 #in pop 10 (SCOT) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop10!=0 & ten_zeros$Pop10!=1)) #265 #before imputation: 270 - the difference of 5 loci is due to these 5 loci being fixed for the alternative allele, and thus having 1 - so these loci are excluded using the above function but they shouldn't be (for details see Excel private allele prop before and after imputation) #in pop 11 (SVAL) of the 2337 loci with private SNPs how many are private for this pop: length(which(ten_zeros$Pop11!=0 & ten_zeros$Pop11!=1)) #485 #before imputation: 494 - the difference of 9 loci is due to these 9 loci being fixed for the alternative allele, and thus having 1 - so these loci are excluded using the above function but they shouldn't be (for details see Excel private allele prop before and after imputation) #sampling localities: POR GAL LOCQ STGU LANIL GOEL FERM PDC HELG SCOT SVAL PrAl <- c((194/2337)*100,(292/2337)*100 ,(322/2337)*100 ,(241/2337)*100 ,(122/2337)*100 ,(120/2337)*100 ,(206/2337)*100 ,(61/2337)*100 ,(15/2337)*100 ,(270/2337)*100,(494/2337)*100) Lat <- c(41.621624, 43.403193, 47.491326,47.820255, 48.479023, 48.889614 ,49.726228, 50.851796, 54.19193, 56.307873, 79.333862 ) par(mar = c(7, 5.5, 4, 5.5) + 0.1) plot(Lat,PrAl,ylim=c(0,25),xlim=c(40,80),xlab="Latitude", ylab="Private allele percentage",cex.axis=1.4,cex=1.9,cex.lab=1.3,bty="l",yaxs="i",xaxs="i",font.lab=2) axis(1,xlim=c(40,80), labels=T,at= c(40,50,60,70,80),col = "transparent",col.ticks = "black",cex.axis=1.4,cex.lab=1.3,font.lab=2) abline(lm(PrAl ~ Lat)) cor.test(Lat,PrAl,method="pearson") #Pearson's product-moment correlation #data: Lat and PrAl #t = 1.8799, df = 9, p-value = 0.09282 #alternative hypothesis: true correlation is not equal to 0 #95 percent confidence interval: #-0.1010716 0.8576755 #sample estimates: #cor #0.530998 cor.test(Lat,PrAl,method="spearman") # also try with non-parametric test #Spearman's rank correlation rho #data: Lat and PrAl #S = 238, p-value = 0.8177 #alternative hypothesis: true rho is not equal to 0 #sample estimates: #rho #-0.08181818 #Comparing allellic richness (WITH rarefaction) between populations: Does allelic richness follow a latitudinal gradient - use Pearson's correlation test to test for correlation between allelic richness (y-axis) and latitude (x-axis) #sampling localities: POR GAL LOCQ GUEN LANIL GOEL FERM PDC HELG SCOT SVAL AllRe <- allelic.richness(PrAll,min.n=10,diploid=TRUE) #5 diploid sporophytes from Portugal str(AllRe) head(AllRe$Ar,10) # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] 10002 1 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1 1.989011 1.000000 10006 1 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1 1.000000 1.630769 10013 1 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1 1.989011 1.000000 100134 1 1.000000 1.512257 1.996924 1.995400 1.999400 1.998452 1.000000 1 NA 1.000000 10028 1 1.000000 1.376773 1.000000 1.000000 1.000000 1.000000 1.000000 1 1.000000 1.000000 10029 1 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1 1.934066 1.000000 10032 NA 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1 1.934066 1.000000 10048 1 1.000000 1.946744 1.000000 1.000000 1.000000 1.000000 1.000000 1 1.000000 1.000000 10055 1 1.000000 1.961138 1.961138 1.706862 1.997965 1.998452 1.512257 1 1.000000 1.000000 10064 1 1.809478 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1 1.000000 1.000000 # #in pop 1 (POR) mean(AllRe$Ar[,1],na.rm=T) #1.085984 #in pop 2 (GAL) mean(AllRe$Ar[,2],na.rm=T) #1.097694 #in pop 3 (LOCQ) mean(AllRe$Ar[,3],na.rm=T) #1.155962 #in pop 4 (STGU) mean(AllRe$Ar[,4],na.rm=T) #1.173145 #in pop 5 (LANIL) mean(AllRe$Ar[,5],na.rm=T) #1.162159 #in pop 6 (GOEL) mean(AllRe$Ar[,6],na.rm=T) #1.149068 #in pop 7 (FERM) mean(AllRe$Ar[,7],na.rm=T) #1.130912 #in pop 8 (PDC) mean(AllRe$Ar[,8],na.rm=T) #1.074386 #in pop 9 (HELG) mean(AllRe$Ar[,9],na.rm=T) #1.035697 #in pop 10 (SCOT) mean(AllRe$Ar[,10],na.rm=T) #1.176583 #in pop 11 (SVAL) mean(AllRe$Ar[,11],na.rm=T) #1.164841 #obtain SE Ar_dat <- data.frame(AllRe$Ar) std <- function(x) sd(x, na.rm = TRUE)/sqrt(length(x)) ArSes <- apply(Ar_dat,2, std) ArSes # X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 0.004395376 0.004225059 0.004700994 0.004709643 0.004700224 0.004732975 0.004671657 0.003675073 0.002717958 0.005707399 0.005253585 # #sampling localities: POR GAL LOCQ STGU LANIL GOEL FERM PDC HELG SCOT SVAL AlRe <- c(1.085984, 1.097694, 1.155962, 1.173145, 1.162159, 1.149068, 1.130912, 1.074386, 1.03569,1.176583,1.164841) Lat <- c(41.621624, 43.403193, 47.491326,47.820255, 48.479023, 48.889614 ,49.726228, 50.851796, 54.19193, 56.307873, 79.333862 ) par(mar = c(7, 5.5, 4, 5.5) + 0.1) plot(Lat,AlRe,ylim=c(1,1.2),xlim=c(40,80),xlab="Latitude", ylab="Allellic richness",cex.axis=1.4,cex=1.9,cex.lab=1.3,bty="l",yaxs="i",xaxs="i",font.lab=2) axis(1,xlim=c(40,80), labels=T,at= c(40,50,60,70,80),col = "transparent",col.ticks = "black",cex.axis=1.4,cex.lab=1.3,font.lab=2) abline(lm(AlRe ~ Lat)) cor.test(Lat,AlRe,method="pearson") #Pearson's product-moment correlation #data: Lat and AlRe #t = 0.82737, df = 9, p-value = 0.4294 #alternative hypothesis: true correlation is not equal to 0 #95 percent confidence interval: #-0.3973869 0.7466590 #sample estimates: #cor #0.2658658 cor.test(Lat,AlRe,method="spearman") # also try with non-parametric test #Spearman's rank correlation rho #data: Lat and AlRe #S = 178, p-value = 0.5763 #alternative hypothesis: true rho is not equal to 0 #sample estimates: #rho #0.1909091 #getting geo distances for IBD (using marmap) #coordinates of sampling localities: POR GAL LOCQ GUEN LANIL GOEL FERM PDC HELG SCOT SVAL #points need to be pushed more out to sea not to have lines connecting them via least cost path crossing the land - this set of coords used coord.lon <- c(-8.842757, -8.332640, -2.953724,-4.401469, -4.792607, -3.016897,-1.486632, 1.499115, 7.869016, -5.734236, 9.007976) coord.lon.df <- as.data.frame (coord.lon) coord.lat <- c(41.621624, 43.403193, 47.491326,47.820255, 48.479023, 48.889614 ,49.726228, 50.851796, 54.19193, 56.307873, 79.333862 ) coord.lat.df <- as.data.frame (coord.lat) coord <- cbind(coord.lon.df,coord.lat.df) #map of the sampling area GLOBmap <- getNOAA.bathy(lon1=15,lon2=-10,lat1=41,lat2=80, resolution=1,keep=T) plot(GLOBmap, image=TRUE, deep=-6000, shallow=0, step=1000) points(coord, pch=21, cex=1.5, col="red",lwd=4) #least cost path 1 - constrained only by land masses trans1 <- trans.mat(GLOBmap) out1 <- lc.dist(trans1, loc = coord, res = "path") #plot chart with the least cost paths shown plot(GLOBmap, image=TRUE, deep=-6000, shallow=0, step=1000) points(coord, pch=21, cex=1.5, col="red",lwd=4) lapply(out1, lines, col = "orange", lwd = 2, lty = 1) -> dummy #get matrix of geographic distances (in km) dist1 <- lc.dist(trans1, loc = coord, res = "dist") #least cost path 2 - with the following min and max depth (for other sets of settings (min.depth 5 or 10, max.depth 10, 20 or 30) the least cost path will cross the land between at least a pair of points thus a full set of pairwise distances is impossible to obtain) trans2 <- trans.mat(GLOBmap, min.depth = -5, max.depth = -30) out2 <- lc.dist(trans2, loc = coord, res = "path") #plot chart with the least cost paths shown plot(GLOBmap, image=TRUE, deep=-6000, shallow=0, step=1000) points(coord, pch=21, cex=1.5, col="red",lwd=4) lapply(out2, lines, col = "orange", lwd = 2, lty = 1) -> dummy #get matrix of geographic distances (in km) dist2 <- lc.dist(trans2, loc = coord, res = "dist") dist2_dat <-read.table("dist2GLOB.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") dist2_dat [1,1] <- "0" dist2_dat [2,2] <- "0" dist2_dat [3,3] <- "0" dist2_dat [4,4] <- "0" dist2_dat [5,5] <- "0" dist2_dat [6,6] <- "0" dist2_dat [7,7] <- "0" dist2_dat [8,8] <- "0" dist2_dat [9,9] <- "0" dist2_dat [10,10] <- "0" dist2_dat [11,11] <- "0" dist2_dat$POR<-as.numeric(dist2_dat$POR) dist2_dat$GAL<-as.numeric(dist2_dat$GAL) dist2_dat$LOCQ<-as.numeric(dist2_dat$LOCQ) dist2_dat$GUEN<-as.numeric(dist2_dat$GUEN) dist2_dat$LANIL<-as.numeric(dist2_dat$LANIL) dist2_dat$GOEL<-as.numeric(dist2_dat$GOEL) dist2_dat$FERM<-as.numeric(dist2_dat$FERM) dist2_dat$PDC<-as.numeric(dist2_dat$PDC) dist2_dat$HELG<-as.numeric(dist2_dat$HELG) dist2_dat$SCOT<-as.numeric(dist2_dat$SCOT) dist2_dat$SVAL<-as.numeric(dist2_dat$SVAL) dist2_dat[upper.tri(dist2_dat)] <- "" dist2_dat [1,1] <- "" dist2_dat [2,2] <- "" dist2_dat [3,3] <- "" dist2_dat [4,4] <- "" dist2_dat [5,5] <- "" dist2_dat [6,6] <- "" dist2_dat [7,7] <- "" dist2_dat [8,8] <- "" dist2_dat [9,9] <- "" dist2_dat [10,10] <- "" dist2_dat [11,11] <- "" geo.dist.dm <- data.matrix(dist2_dat) #neutral loci only #create the neutral loci dataset - substract the 339 loci from the full 4069 locus dataset denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf <- read.table ("ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf.txt",stringsAsFactors=FALSE, header=TRUE, dec=",") NeutralSNP_prep<- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf #negate the in operator `%nin%` = Negate(`%in%`) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs <- NeutralSNP_prep[NeutralSNP_prep$ID %nin% c(10064, 10077, 10227, 10239, 10384, 10388, 10567, 10631, 10671, 10944, 11038, 11120, 11415, 11464, 11602, 11622, 11733, 1176, 11858, 11992, 12150, 12208, 1233, 12361, 12753, 12836, 13067, 13086, 13168, 13402, 13490, 13528, 13583, 13731, 13767, 13916, 13949, 14176, 14322, 14549, 14571, 14706, 14749, 14796, 14809, 14851, 14900, 15030, 15039, 15156, 15336, 15417, 15424, 15670, 15682, 1571, 15870, 1625, 1626, 16514, 16666, 16729, 17063, 17138, 17158, 17228, 17266, 17351, 17357, 17422, 17482, 17512, 17539, 17694, 17696, 17748, 17799, 17887, 17988, 17993, 18080, 18126, 18129, 18134, 18161, 18279, 18352, 18386, 18471, 18502, 18652, 18711, 18752, 1883, 18921, 18922, 19008, 19035, 19175, 19203, 19219, 19321, 19323, 19386, 19400, 19487, 19500, 19634, 19823, 19845, 19900, 20050, 20104, 20184, 20368, 20664, 20687, 20749, 20791, 20974, 20996, 21041, 21088, 21230, 21390, 2145, 21841, 21844, 21859, 21970, 21986, 22293, 22326, 22406, 22435, 22446, 22573, 22710, 23182, 2323, 23719, 23802, 2414, 24620, 24667, 24834, 24991, 25039, 2504, 25050, 2618, 26349, 26497, 26509, 2678, 27211, 27247, 2762, 2771, 27925, 28178, 28482, 28588, 28760, 29183, 29424, 2945, 29902, 3010, 30528, 3062, 30887, 31196, 31505, 31962, 31970, 32118, 32194, 32232, 32294, 32332, 32401, 32548, 32924, 33020, 33258, 33286, 33354, 33361, 33447, 338, 34164, 3428, 34300, 34562, 3493, 350, 35292, 35548, 35760, 36084, 36521, 36627, 36788, 36838, 37417, 37440, 37483, 37692, 37710, 37939, 3871, 388, 38938, 39044, 39187, 39332, 3934, 3939, 39403, 3953, 3959, 3975, 39822, 40363, 40510, 4057, 4073, 40861, 41124, 41471, 4173, 42202, 42277, 42306, 4251, 42657, 42673, 42864, 42908, 4310, 4318, 437, 43882, 43934, 44323, 44328, 44862, 449, 4495, 454, 46713, 47102, 47125, 47397, 4758, 47662, 48422, 48796, 49104, 4944, 49477, 5054, 5068, 5086, 5093, 51376, 51378, 5240, 53588, 53884, 5443, 5487, 5537, 5574, 55966, 56674, 57303, 5738, 5798, 5809, 583, 5879, 59827, 61019, 6280, 6415, 6416, 6559, 6616, 6754, 6811, 6825, 69049, 6972, 7027, 7118, 7236, 7406, 7478, 757, 7617, 7640, 7755, 7765, 8046, 8164, 8242, 8277, 8281, 8323, 8360, 8431, 8446, 8498, 8521, 8636, 8706, 8744, 8799, 884, 8870, 8878, 8902, 8915, 8989, 9, 9007, 9011, 9075, 9168, 918, 9268, 932, 933, 946, 965, 9721, 9915),] write.table(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs, file = "C:/Users/aaa/bbb/ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf_NeutralSNPs.txt", sep="\t",quote=FALSE,col.names = T,row.names=F) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs_ForGenlight <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs[1:3730,10:208] dim(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs_ForGenlight) #3730 199 all.df <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs_ForGenlight all.df[ all.df == "0/0" ] = 0 all.df[ all.df == "0/1" ] = 1 all.df[ all.df == "1/0" ] = 1 all.df[ all.df == "1/1" ] = 2 all.df.num <- lapply(all.df, as.numeric) all.df.num.df <- as.data.frame(all.df.num) data.list <- as.list(all.df.num.df) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl <- new("genlight", data.list) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl # /// GENLIGHT OBJECT ///////// // 199 genotypes, 3,730 binary SNPs, size: 486.4 Kb 1079 (0.15 %) missing data // Basic content @gen: list of 199 SNPbin // Optional content @ind.names: 199 individual labels @other: a list containing: elements without names # #add pop information to genlight pop_denovo7.ord.mod.sel.GLOB_Rand_NEW_POP_NAMES <- read.table( "pop_ElevenLocalities_SelectedSamples_NEW_POP_NAMES.txt", header=T) pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl) <- pop_denovo7.ord.mod.sel.GLOB_Rand_NEW_POP_NAMES$STRATA #add information about the position of each SNP (POS column of the VCF) to genlight position(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl) <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs$POS #add information about the chromosome of each SNP (CHROM column of the VCF) to genlight chromosome(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl) <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs$CHROM #add information about the ID of each SNP (ID column of the VCF) to genlight locNames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl) <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs$ID #glMean(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl,alleleAsUnit = F) #glVar(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl,alleleAsUnit = F) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl@pop # [1] CAS CAS CAS CAS CAS ELL ELL ELL ELL ELL ELL ELL LAN LAN LAN LAN [17] LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN [33] LAN LAN LAN LAN POR POR POR POR POR POR POR POR POR POR POR POR [49] POR POR POR POR POR POR POR POR POR POR LEZ LEZ LEZ LEZ LEZ LEZ [65] LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ [81] LEZ LEZ HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL [97] HEL HEL HEL HEL HEL HEL HEL HEL LOC LOC LOC LOC LOC LOC LOC LOC [113] LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC [129] FER FER FER FER FER FER FER FER FER FER NYA NYA NYA NYA NYA NYA [145] NYA NYA NYA NYA NYA NYA NYA STG STG STG STG STG STG STG STG STG [161] STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG AUD [177] AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD [193] AUD AUD AUD AUD AUD AUD AUD Levels: AUD CAS ELL FER HEL LAN LEZ LOC NYA POR STG # #check for which no of retained PCs a.score is best using optim.ascore #first perform dapc with many PCs retained denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_centre_check.a.score<- dapc(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl, pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl), n.pca=100, n.da=10,scale=F, var.contrib=TRUE, var.loadings=FALSE, pca.info=TRUE,pca.select="nbEig", perc.pca=NULL, glPca=NULL,parallel=F) #perform optim.a.score on DAPC with many PCs retained optim.a.score(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_centre_check.a.score,n.sim=100,smart=F) #best is 4 #with a.score of 0.8466548 #adjusting the order of pops for genlight object denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd <-denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd # /// GENLIGHT OBJECT ///////// // 199 genotypes, 3,730 binary SNPs, size: 751 Kb 1079 (0.15 %) missing data // Basic content @gen: list of 199 SNPbin // Optional content @ind.names: 199 individual labels @loc.names: 3730 locus labels @chromosome: factor storing chromosomes of the SNPs @position: integer storing positions of the SNPs @pop: population of each individual (group size range: 5-24) @other: a list containing: elements without names # #add pop information to genlight pop_denovo7.ord.mod.sel.GLOB_Rand_NEW_POP_NAMES <- read.table( "pop_ElevenLocalities_SelectedSamples_NEW_POP_NAMES.txt", header=T) pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd) <- pop_denovo7.ord.mod.sel.GLOB_Rand_NEW_POP_NAMES$STRATA denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd@pop <- factor(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd@pop, levels = c("CAS", "POR", "LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA")) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd@pop # [1] CAS CAS CAS CAS CAS ELL ELL ELL ELL ELL ELL ELL LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN LAN POR POR POR POR POR POR POR POR POR [46] POR POR POR POR POR POR POR POR POR POR POR POR POR LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ LEZ HEL HEL HEL HEL HEL HEL HEL HEL [91] HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL HEL LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC LOC FER FER FER FER FER FER FER [136] FER FER FER NYA NYA NYA NYA NYA NYA NYA NYA NYA NYA NYA NYA NYA STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG STG AUD AUD AUD AUD AUD [181] AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD AUD Levels: CAS POR LOC STG LAN LEZ FER AUD HEL ELL NYA # #dapc with pre-designated pops (i.e. 11 pops) - with centering only and n.pca = 4 (after optim.a.score) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4 <- dapc(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd, pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd), n.pca=4, n.da=10,scale=FALSE, var.contrib=TRUE, var.loadings=FALSE, pca.info=TRUE,pca.select="nbEig", perc.pca=NULL, glPca=NULL,parallel=F) #Select the number of axes retained in the Principal Component Analysis (PCA) step: 4 (n.pca=4) #Select the the number of axes retained in the Discriminant Analysis step: 10 (n.da=10) summary(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4) # $`n.dim` [1] 4 $n.pop [1] 11 $assign.prop [1] 0.9949749 $assign.per.pop CAS POR LOC STG LAN LEZ FER AUD HEL ELL NYA 1.0000000 1.0000000 1.0000000 1.0000000 0.9583333 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 $prior.grp.size CAS POR LOC STG LAN LEZ FER AUD HEL ELL NYA 5 22 24 24 24 24 10 24 22 7 13 $post.grp.size CAS POR LOC STG LAN LEZ FER AUD HEL ELL NYA 5 22 24 25 23 24 10 24 22 7 13 # #percentage of variance retained on the first axis (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$eig[1]/sum(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$eig) )*100 #44.10852 #percentage of variance retained on the second axis (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$eig[2]/sum(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$eig) )*100 #38.00954 #percentage of variance retained on the third axis (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$eig[3]/sum(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$eig) )*100 #14.2916 #visualising the DAPC - scatter - axes 1 and 2 myCol <- c("red","chocolate1", "gold", "red4", "black", "navy", "greenyellow", "gold4", "darkgreen", "hotpink1", "mediumorchid4") scatter(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4, xax=1, yax=2, bg="white",pch=19,clab=0, cstar=1,cellipse = 1.25,cex=0.75,solid=.7, col=myCol, scree.pca=FALSE,posi.da="topleft",leg=TRUE,posi.leg="topright",cleg=1) #improved graphic PopData <- as.data.frame(levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd))) myCol <- c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20") ColData<- as.data.frame(myCol) PopColData <- cbind(PopData, ColData) PopColData$myCol <- as.character(PopColData$myCol) colnames(PopColData)[colnames(PopColData)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd))"] <- "Pop" colnames(PopColData)[colnames(PopColData)=="myCol"] <- "Col" denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df1_2 <- data.frame(x = denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,1], y = denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,2]) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df1_2 <- data.frame (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df1_2 , denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$grp) colnames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df1_2)[colnames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df1_2)=="denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.grp"] <- "Pop" col.points <- PopColData$Col[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df1_2$Pop, PopColData$Pop)] Shape1Data<- as.data.frame (c(19, 19, 19, 19, 19, 19, 19, 19, 19, 19,19)) PopShape1Data <- cbind(PopData, Shape1Data) colnames(PopShape1Data)[colnames(PopShape1Data)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd))"] <- "Pop" colnames(PopShape1Data)[colnames(PopShape1Data)=="c(19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19)"] <- "Shape1" PopShape1Data$Shape1 <- as.integer(PopShape1Data$Shape1) Shape1 <- PopShape1Data$Shape1[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df1_2$Pop, PopShape1Data$Pop)] Shape2Data<- as.data.frame (c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1)) PopShape2Data <- cbind(PopData, Shape2Data) colnames(PopShape2Data)[colnames(PopShape2Data)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd))"] <- "Pop" colnames(PopShape2Data)[colnames(PopShape2Data)=="c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)"] <- "Shape2" PopShape2Data$Shape2 <- as.integer(PopShape2Data$Shape2) Shape2 <- PopShape2Data$Shape2[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df1_2$Pop, PopShape2Data$Pop)] #tiff tiff("SNP_neutral_Axes12.tif", res=600, compression = "lzw", height=21, width=28.75, units="cm") scatter(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4, col="black", bg="gray99", pch="", cstar=1, clab=0, legend=F, scree.da=0, posi.da="topleft") par(xpd=TRUE) points(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,1], denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,2], pch=Shape1, col=col.points, cex=1.1) points(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,1], denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,2], pch=Shape2, col="black", cex=1.15,lwd=0.5) text(cex=1.2, x=-30.2, y=-1.75, "DF1 (44.1 %)", xpd=TRUE, srt=0, pos=2) text(cex=1.2, x=2.95, y=34.8, "DF2 (38.01 %)", xpd=TRUE, srt=90, pos=2) legend(x=50,y=-50,cex=0.8,c("Castelo do Neiva (CAS)","Portiño de Dexo (POR)" , "Locmariaquer (LOC)", "St Guenolé (STG)", "Lanildut (LAN)", "Lézardrieux (LEZ)", "Fermanville (FER)", "Audresselles (AUD)", "Helgoland (HEL)", "Ellenabeich (ELL)", "Ny-Ålesund (NYA)"),pch=c(21, 21, 21, 21, 21, 21, 21, 21, 21, 21),col=c("black", "black", "black", "black", "black", "black", "black", "black", "black", "black"),pt.bg=c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20"),pt.lwd = 2,pt.cex=1.5,ncol=2,xjust=0,text.width=c(19.25,19.25),x.intersp=0.7) add.scatter.eig(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$eig,15,1,2, posi="topright", inset=.02) dev.off() #visualising the DAPC - scatter - axes 2 and 3 myCol <- c("red","chocolate1", "gold", "red4", "black", "navy", "greenyellow", "gold4", "darkgreen", "hotpink1", "mediumorchid4") scatter(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4, xax=2, yax=3, bg="white",pch=19,clab=0, cstar=1,cellipse = 1.25,cex=0.75,solid=.7, col=myCol, scree.pca=FALSE,posi.da="topleft",leg=TRUE,posi.leg="topright",cleg=1) #improved graphic PopData <- as.data.frame(levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd))) myCol <- c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20") ColData<- as.data.frame(myCol) PopColData <- cbind(PopData, ColData) PopColData$myCol <- as.character(PopColData$myCol) colnames(PopColData)[colnames(PopColData)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd))"] <- "Pop" colnames(PopColData)[colnames(PopColData)=="myCol"] <- "Col" denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df2_3 <- data.frame(x = denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,2], y = denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,3]) denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df2_3 <- data.frame (denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df2_3 , denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$grp) colnames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df2_3)[colnames(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df2_3)=="denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.grp"] <- "Pop" col.points <- PopColData$Col[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df2_3$Pop, PopColData$Pop)] Shape1Data<- as.data.frame (c(19, 19, 19, 19, 19, 19, 19, 19, 19, 19,19)) PopShape1Data <- cbind(PopData, Shape1Data) colnames(PopShape1Data)[colnames(PopShape1Data)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd))"] <- "Pop" colnames(PopShape1Data)[colnames(PopShape1Data)=="c(19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19)"] <- "Shape1" PopShape1Data$Shape1 <- as.integer(PopShape1Data$Shape1) Shape1 <- PopShape1Data$Shape1[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df2_3$Pop, PopShape1Data$Pop)] Shape2Data<- as.data.frame (c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1)) PopShape2Data <- cbind(PopData, Shape2Data) colnames(PopShape2Data)[colnames(PopShape2Data)=="levels(pop(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd))"] <- "Pop" colnames(PopShape2Data)[colnames(PopShape2Data)=="c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)"] <- "Shape2" PopShape2Data$Shape2 <- as.integer(PopShape2Data$Shape2) Shape2 <- PopShape2Data$Shape2[match(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4.df2_3$Pop, PopShape2Data$Pop)] #tiff tiff("SNP_neutral_Axes23.tif", res=600, compression = "lzw", height=21, width=28.75, units="cm") scatter(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4,xax=2, yax=3, col="black", bg="gray99", pch="", cstar=1, clab=0, legend=F, scree.da=0, posi.da="topright") par(xpd=TRUE) points(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,2], denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,3], pch=Shape1, col=col.points, cex=1.1) points(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,2], denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$ind.coord[,3], pch=Shape2, col="black", cex=1.15,lwd=0.5) text(cex=1.2, x=-53.1, y=-1.2, "DF2 (38.01 %)", xpd=TRUE, srt=0, pos=2) text(cex=1.2, x=2, y=-28.7, "DF3 (14.29 %)", xpd=TRUE, srt=90, pos=2) legend(x=-63.4,y=-26.7,cex=0.8,c("Castelo do Neiva (CAS)","Portiño de Dexo (POR)" , "Locmariaquer (LOC)", "St Guenolé (STG)", "Lanildut (LAN)", "Lézardrieux (LEZ)", "Fermanville (FER)", "Audresselles (AUD)", "Helgoland (HEL)", "Ellenabeich (ELL)", "Ny-Ålesund (NYA)"),pch=c(21, 21, 21, 21, 21, 21, 21, 21, 21, 21),col=c("black", "black", "black", "black", "black", "black", "black", "black", "black", "black"),pt.bg=c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20"),pt.lwd = 2,pt.cex=1.5,ncol=2,xjust=0,text.width=c(13,13),x.intersp=0.7) add.scatter.eig(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$eig,15,2,3, posi="topleft", inset=.02) dev.off() #group memberships of DAPC - Structure like chart #first have to order the individuals in accordance with the order of south to north clusters... cosmoplotIndMatrix <- denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf_NeutralSNPs.gl_PopOrd_centre4$posterior cosmoplotIndMatrix_CAS <- cosmoplotIndMatrix [1:5,1:11] cosmoplotIndMatrix_POR <- cosmoplotIndMatrix [37:58,1:11] cosmoplotIndMatrix_LOC <- cosmoplotIndMatrix [105:128,1:11] cosmoplotIndMatrix_STG <- cosmoplotIndMatrix [152:175,1:11] cosmoplotIndMatrix_LAN <- cosmoplotIndMatrix [13:36,1:11] cosmoplotIndMatrix_LEZ <- cosmoplotIndMatrix [59:82,1:11] cosmoplotIndMatrix_FER <- cosmoplotIndMatrix [129:138,1:11] cosmoplotIndMatrix_AUD <- cosmoplotIndMatrix [176:199,1:11] cosmoplotIndMatrix_HEL <- cosmoplotIndMatrix [83:104,1:11] cosmoplotIndMatrix_ELL <- cosmoplotIndMatrix [6:12,1:11] cosmoplotIndMatrix_NYA <- cosmoplotIndMatrix [139:151,1:11] cosmoplotIndMatrix1 <- rbind (cosmoplotIndMatrix_CAS,cosmoplotIndMatrix_POR) cosmoplotIndMatrix2 <- rbind (cosmoplotIndMatrix1,cosmoplotIndMatrix_LOC) cosmoplotIndMatrix3 <- rbind (cosmoplotIndMatrix2,cosmoplotIndMatrix_STG) cosmoplotIndMatrix4 <- rbind (cosmoplotIndMatrix3,cosmoplotIndMatrix_LAN) cosmoplotIndMatrix5 <- rbind (cosmoplotIndMatrix4,cosmoplotIndMatrix_LEZ) cosmoplotIndMatrix6 <- rbind (cosmoplotIndMatrix5,cosmoplotIndMatrix_FER) cosmoplotIndMatrix7 <- rbind (cosmoplotIndMatrix6,cosmoplotIndMatrix_AUD) cosmoplotIndMatrix8 <- rbind (cosmoplotIndMatrix7,cosmoplotIndMatrix_HEL) cosmoplotIndMatrix9 <- rbind (cosmoplotIndMatrix8,cosmoplotIndMatrix_ELL) cosmoplotIndMatrix10 <- rbind (cosmoplotIndMatrix9,cosmoplotIndMatrix_NYA) myCol <- c("mediumorchid4", "red","sienna4","darkorange","yellow", "lawngreen", "burlywood3", "darkgreen", "deepskyblue", "steelblue", "grey20") #barplot_scale <- barplot( t(denovo7.ord.mod.sel_Adjusted.GLOB_RandLOC_no.dupl_ImpRf.gl_PopOrd_centre4$posterior) ) compoplot(cosmoplotIndMatrix10,space=0, lab="", col=myCol,cleg=1,legend=F,font.lab=2,xlab="population of origin of each sampled individual",cex.axis=1) axis(1, at = c(0,5,27,51,75,99,123,133,157,179,186,199), labels = F, lwd=1, lwd.ticks=1) axis(1, at = c(2.5, 16, 39, 63, 87, 111, 128, 145, 168, 182.5, 192.5), labels = c("CAS", "POR", "LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA"), las=1, tick=F,cex.axis=1) #perform likelihood-based genetic clustering on a genind object using adegenet (snapclust function - still experimental (?)) #create genind object (from hierfstat input) (hierfstat file has pop info already specified) GLOB_no.dupl_ImpRf_NeutralSNPs <-read.fstat("ElevenLocalities_SelectedSamples_no.dupl_sel_ImpRf_NeutralSNPs_hierfstat_NEW_POP_NAMES.dat") #choose the number of clusters for snapclust using AIC with snapclust.choose.k (search for k from 2 to 22 (double the number of pops)) GLOB_no.dupl_ImpRf_NeutralSNPs.aic <- snapclust.choose.k(max=22, GLOB_no.dupl_ImpRf_NeutralSNPs,IC= AIC, IC.only=T) #Large dataset syndrome: #for 197 individuals, differences in log-likelihoods exceed computer precision; group membership probabilities are approximated (only trust clear-cut values) plot(GLOB_no.dupl_ImpRf_NeutralSNPs.aic, type = "b", cex = 2, xlab = "k", ylab = "AIC") #points(which.min(GLOB_no.dupl_ImpRf_NeutralSNPs.aic), min(GLOB_no.dupl_ImpRf_NeutralSNPs.aic), col = "blue", pch = 20, cex = 2) #"true" K=7 but the selected K will be K=6 #perform snapclust - pop.ini "ward" with k=6 GLOB.aic.clust_ImpRf_NeutralSNPs <- snapclust(GLOB_no.dupl_ImpRf_NeutralSNPs, k = 6, pop.ini = "ward", max.iter = 10000) #Large dataset syndrome: # for 171 individuals, differences in log-likelihoods exceed computer precision; # group membership probabilities are approximated # (only trust clear-cut values) GLOB.aic.clust_ImpRf_NeutralSNPs$converged #TRUE str(GLOB.aic.clust_ImpRf_NeutralSNPs) # List of 6 $ group : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ... ..- attr(*, "names")= chr [1:199] "1" "2" "3" "4" ... $ ll : num -108307 $ proba : num [1:199, 1:6] 1 1 1 1 1 1 1 1 1 1 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:199] "1" "2" "3" "4" ... .. ..$ : chr [1:6] "1" "2" "3" "4" ... $ converged: logi TRUE $ n.iter : int 1 $ n.param : int 22380 - attr(*, "class")= chr [1:2] "snapclust" "list" # #rearrenge order of pops - for this to work, the pops in the (hierfstat) input file need to be in the exactly same order pop.order <- pop(GLOB_no.dupl_ImpRf_NeutralSNPs) pop.order <- factor(pop.order, levels = c("CAS","POR","LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA")) pop.order <- sort(pop.order) table(pop.order, GLOB.aic.clust_ImpRf_NeutralSNPs$group) table.value(table(pop.order, GLOB.aic.clust_ImpRf_NeutralSNPs$group), col.lab=paste("cluster", 1:6),row.lab=paste(c("CAS","POR","LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA")),csize=0.9,clegend=0) #Results with "ward" appear to be stable as the same output (clustering of inds) is obtained with multiple runs. Results with pop.ini="kmeans" and NULL do not appear stable as pretty much every run gives a different output. On the ?find.clusters help page it says that "ward" method seems to be more reliable (than "kmeans") on some simulated datasets. #structure like graph snapclust GLOB.aic.clust_ImpRf_NeutralSNPs_proba <-GLOB.aic.clust_ImpRf_NeutralSNPs$proba GLOB.aic.clust_ImpRf_NeutralSNPs_proba # 1 2 3 4 5 6 1 1.000000e+00 2.011255e-72 8.933041e-69 2.914657e-90 6.342321e-101 5.814020e-128 2 1.000000e+00 1.157942e-72 2.028349e-69 2.533867e-90 1.410341e-101 2.786144e-128 3 1.000000e+00 1.155975e-70 6.872371e-67 1.891581e-88 5.199751e-99 1.732960e-126 4 1.000000e+00 1.015826e-72 4.598258e-69 1.027138e-89 6.325401e-101 5.710078e-128 5 1.000000e+00 9.335090e-72 2.115738e-68 4.264512e-89 1.562403e-100 1.923378e-127 6 1.000000e+00 1.660498e-82 1.314417e-78 2.432619e-100 1.753880e-112 2.844626e-136 7 1.000000e+00 1.765841e-89 2.505817e-85 4.451725e-108 1.619760e-117 1.448746e-141 8 1.000000e+00 4.946233e-84 7.591280e-80 1.955768e-102 5.063718e-114 1.493796e-137 9 1.000000e+00 1.546603e-83 1.721758e-80 2.105072e-102 1.149307e-112 2.642572e-137 10 1.000000e+00 9.096696e-85 5.861177e-81 2.618805e-103 2.380295e-113 5.814825e-138 11 1.000000e+00 5.225693e-84 4.680464e-80 2.862947e-102 2.875600e-112 3.319029e-137 12 1.000000e+00 2.495538e-84 2.595771e-80 1.572953e-102 8.687310e-114 1.121044e-137 13 1.000000e+00 3.885692e-88 4.452662e-84 2.089873e-106 2.175434e-116 1.735269e-140 14 1.000000e+00 1.852574e-82 1.329239e-78 2.103337e-100 4.560931e-112 3.426765e-136 15 1.000000e+00 4.091042e-85 1.365658e-80 2.359255e-102 8.560842e-114 7.425225e-138 16 1.000000e+00 7.125259e-84 3.281886e-79 3.670106e-102 8.788386e-114 2.727210e-137 17 1.000000e+00 4.589286e-82 2.786488e-78 5.919556e-101 4.779646e-113 2.354581e-136 18 1.000000e+00 1.779414e-85 1.701652e-81 3.129502e-104 2.519916e-113 2.166796e-138 19 1.000000e+00 2.772259e-85 6.584434e-82 8.599899e-103 6.936966e-114 2.935075e-138 20 1.000000e+00 5.680233e-83 5.318948e-79 8.595821e-101 1.774855e-112 1.559353e-136 21 1.000000e+00 5.959921e-84 1.822783e-80 1.194095e-102 3.424856e-113 1.547916e-137 22 1.000000e+00 1.496503e-81 2.400722e-78 1.761254e-100 1.237608e-111 6.902097e-136 23 1.000000e+00 1.168656e-84 9.216228e-81 2.093898e-102 6.253449e-114 7.763886e-138 24 1.000000e+00 1.211430e-87 1.198113e-83 1.505406e-106 6.788321e-118 1.242964e-140 25 1.000000e+00 7.255918e-87 1.548822e-83 5.005935e-105 2.719796e-115 1.250717e-139 26 1.000000e+00 5.076086e-85 3.256493e-80 2.470693e-102 5.026940e-114 8.369441e-138 27 1.000000e+00 2.576123e-85 1.101555e-81 1.913234e-104 1.531159e-114 1.107053e-138 28 6.209621e-81 1.000000e+00 1.534058e-19 1.639049e-46 1.380967e-79 8.451865e-107 29 3.110685e-84 1.000000e+00 2.386140e-19 2.211302e-46 1.058976e-79 2.033449e-107 30 4.139787e-96 1.000000e+00 5.622773e-24 9.193051e-55 8.700198e-87 8.207287e-114 31 2.084391e-92 1.000000e+00 8.569657e-23 1.428814e-51 2.766222e-86 4.265571e-112 32 2.901935e-81 1.000000e+00 8.355875e-19 5.277164e-44 9.659376e-78 7.561744e-106 33 3.076160e-87 1.000000e+00 2.865728e-20 5.740914e-46 4.130765e-81 2.109926e-108 34 1.094021e-80 1.000000e+00 5.614623e-19 1.253987e-43 6.415242e-79 6.294868e-106 35 7.963183e-83 1.000000e+00 5.199240e-19 8.361754e-43 6.772104e-82 8.594600e-107 36 1.222737e-84 1.000000e+00 1.008546e-22 1.754370e-50 1.623317e-80 3.709598e-109 37 0.000000e+00 NaN NaN 0.000000e+00 0.000000e+00 0.000000e+00 38 4.528624e-78 1.000000e+00 1.405269e-18 2.736887e-44 1.247317e-76 5.340821e-105 39 1.479604e-85 1.000000e+00 1.797989e-18 1.109847e-44 1.390903e-78 6.066171e-107 40 1.685826e-80 1.000000e+00 5.307728e-18 5.033521e-45 5.071926e-80 3.403842e-106 41 2.381230e-73 1.000000e+00 1.235781e-16 1.274415e-42 1.987707e-73 1.083172e-102 42 1.848991e-82 1.000000e+00 6.001104e-19 1.484086e-47 7.183523e-81 1.882819e-107 43 3.074976e-83 1.000000e+00 3.802293e-19 8.873274e-43 7.938223e-79 2.775588e-106 44 5.114744e-79 1.000000e+00 2.126468e-20 4.986790e-45 7.958606e-78 6.126950e-106 45 5.968152e-88 1.000000e+00 3.388853e-22 2.450341e-47 3.040663e-81 3.132085e-109 46 3.905992e-85 1.000000e+00 5.886623e-20 3.691717e-45 9.504014e-79 2.764157e-107 47 3.979134e-85 1.000000e+00 1.085356e-22 1.270394e-46 3.039451e-80 2.016671e-108 48 3.877550e-82 1.000000e+00 2.702894e-17 4.497033e-44 2.725758e-77 1.207747e-105 49 1.051329e-80 1.000000e+00 1.313472e-16 2.000335e-43 1.910641e-78 2.539244e-105 50 4.391309e-85 1.000000e+00 8.164932e-20 4.330268e-48 1.709392e-79 5.559001e-108 51 7.305166e-86 1.000000e+00 4.589063e-24 3.085897e-49 2.956659e-81 1.436559e-109 52 3.465642e-75 3.806791e-13 1.000000e+00 2.547543e-29 9.245017e-70 5.737075e-99 53 1.194858e-78 9.115611e-16 1.000000e+00 3.646542e-37 2.057799e-74 1.103299e-102 54 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 55 4.524103e-86 2.734343e-18 1.000000e+00 1.803200e-39 8.332235e-76 2.060888e-105 56 5.707371e-73 5.340386e-12 1.000000e+00 1.873242e-32 5.787193e-69 9.205137e-99 57 5.118806e-76 1.968387e-15 1.000000e+00 6.850261e-31 1.053351e-71 2.707248e-100 58 4.691596e-76 3.553660e-16 1.000000e+00 3.298618e-36 6.855056e-74 5.963264e-102 59 1.099386e-79 6.424882e-16 1.000000e+00 2.334107e-33 8.740089e-73 7.797341e-102 60 4.618043e-79 4.887596e-15 1.000000e+00 3.110240e-37 3.771688e-74 1.395702e-102 61 1.568155e-81 2.929063e-17 1.000000e+00 1.274165e-34 2.354176e-74 4.875876e-103 62 3.367967e-77 2.093680e-13 1.000000e+00 7.960319e-33 1.227617e-69 4.244929e-100 63 2.981001e-79 1.906065e-17 1.000000e+00 7.937873e-35 3.487133e-73 1.993199e-102 64 2.514314e-83 2.813785e-15 1.000000e+00 1.483340e-36 2.629020e-75 1.407230e-103 65 1.082707e-81 1.126214e-17 1.000000e+00 2.093478e-34 3.845668e-72 1.144486e-102 66 4.221168e-77 1.867629e-20 1.000000e+00 1.613837e-36 2.383811e-73 5.709356e-103 67 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 68 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 69 1.671260e-80 5.838445e-15 1.000000e+00 9.773837e-35 3.641115e-75 1.473480e-102 70 9.522796e-82 9.225568e-17 1.000000e+00 1.934249e-39 6.044570e-77 1.830388e-104 71 0.000000e+00 NaN NaN 0.000000e+00 0.000000e+00 0.000000e+00 72 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 73 3.586335e-77 3.469143e-14 1.000000e+00 8.226248e-33 1.916354e-72 8.293279e-101 74 0.000000e+00 NaN NaN 0.000000e+00 0.000000e+00 0.000000e+00 75 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 76 1.450112e-86 1.307415e-23 1.000000e+00 6.444546e-35 1.167703e-75 1.233344e-105 77 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 78 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 79 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 80 1.991564e-80 9.448640e-23 1.000000e+00 1.862986e-29 6.761727e-74 8.613553e-103 81 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 82 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 83 3.234407e-87 4.162876e-25 1.000000e+00 2.474716e-36 8.314197e-77 1.408390e-106 84 1.731300e-85 9.736834e-22 1.000000e+00 6.101257e-35 9.669707e-77 2.882310e-105 85 1.412775e-82 3.112961e-22 1.000000e+00 4.814399e-35 2.441354e-72 6.351847e-104 86 1.295912e-84 3.092514e-23 1.000000e+00 6.253255e-32 1.220026e-75 1.436444e-104 87 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 88 1.312454e-80 1.190505e-22 1.000000e+00 4.566681e-36 1.621722e-73 4.708630e-104 89 6.036038e-84 5.640539e-21 1.000000e+00 6.645143e-37 1.255147e-76 3.555252e-105 90 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 91 2.342930e-80 1.715863e-20 1.000000e+00 2.550171e-33 2.659836e-73 5.589197e-103 92 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 93 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 94 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 95 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 96 6.064522e-79 2.123155e-19 1.000000e+00 1.516734e-32 1.662817e-73 2.304213e-102 97 4.612346e-79 2.080527e-17 1.000000e+00 1.143194e-31 2.636937e-72 1.420624e-101 98 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 99 0.000000e+00 0.000000e+00 NaN 0.000000e+00 0.000000e+00 0.000000e+00 100 7.026698e-84 3.462433e-22 1.000000e+00 4.648786e-30 1.360859e-72 3.145381e-103 101 8.670850e-82 1.562720e-23 1.000000e+00 2.030077e-29 1.107688e-71 9.057133e-103 102 2.595482e-83 7.220986e-22 1.000000e+00 4.577834e-29 8.288198e-77 1.073008e-103 103 1.868193e-81 8.900171e-23 1.000000e+00 1.826223e-31 9.700979e-75 1.425785e-103 104 2.303445e-82 2.603575e-26 1.000000e+00 1.078825e-32 2.589426e-77 3.199165e-105 105 8.935844e-84 2.960803e-21 1.000000e+00 1.622104e-30 3.933894e-77 5.078149e-104 106 1.563580e-85 2.421509e-24 1.000000e+00 2.910538e-35 1.168548e-78 3.035159e-106 107 1.337168e-78 3.016508e-21 1.000000e+00 1.391598e-27 9.946271e-73 1.620276e-101 108 9.110986e-86 1.168162e-19 1.000000e+00 7.601721e-26 2.141398e-76 5.104483e-103 109 3.992112e-86 1.200016e-20 1.000000e+00 2.010739e-28 2.761211e-75 1.396964e-103 110 4.313137e-73 1.078766e-20 1.000000e+00 1.970641e-26 1.318007e-72 4.749690e-100 111 1.331040e-85 8.426821e-24 1.000000e+00 9.528531e-31 1.283851e-79 1.939524e-105 112 2.808384e-86 4.219455e-24 1.000000e+00 5.277190e-34 3.916411e-78 5.470372e-106 113 4.138790e-82 2.059775e-21 1.000000e+00 1.744666e-31 2.623953e-76 9.516746e-104 114 2.071321e-81 3.920835e-24 1.000000e+00 5.821655e-29 3.254785e-77 7.900510e-104 115 3.705341e-82 9.905337e-23 1.000000e+00 2.717706e-28 3.170865e-76 2.292090e-103 116 2.365289e-76 1.101298e-21 1.000000e+00 7.951432e-28 3.033016e-75 1.046738e-101 117 9.384356e-84 3.567348e-24 1.000000e+00 6.230054e-31 3.069766e-77 1.050717e-104 118 4.620401e-81 1.493875e-23 1.000000e+00 4.124702e-27 4.269204e-77 3.000287e-103 119 2.462429e-87 8.250218e-25 1.000000e+00 3.309040e-33 6.007510e-80 1.518663e-106 120 2.719744e-83 1.818033e-23 1.000000e+00 6.496904e-30 1.075838e-77 2.333097e-104 121 2.782781e-89 5.255920e-26 1.000000e+00 3.951238e-28 2.657341e-80 3.143966e-106 122 9.914547e-82 8.806639e-24 1.000000e+00 1.227504e-30 1.072517e-76 4.702385e-104 123 1.159909e-84 4.961188e-24 1.000000e+00 3.439240e-26 8.244225e-78 5.043599e-104 124 1.422850e-71 8.975297e-21 1.000000e+00 1.602935e-09 1.568995e-67 2.299139e-95 125 2.421768e-79 1.144509e-23 1.000000e+00 8.934700e-10 5.223643e-71 3.037926e-98 126 4.678937e-72 1.799307e-20 9.999997e-01 3.050071e-07 3.933438e-68 4.582295e-95 127 1.357443e-77 1.039608e-22 1.000000e+00 1.463380e-12 3.503415e-71 2.704607e-98 128 6.922313e-80 1.926075e-23 1.000000e+00 2.789891e-09 4.001075e-72 1.971305e-98 129 2.256766e-77 1.730957e-21 1.000000e+00 7.587962e-10 9.881477e-71 2.257148e-97 130 5.361401e-76 6.502218e-22 1.000000e+00 3.186685e-13 2.136985e-70 8.616130e-98 131 9.553271e-79 6.419333e-21 1.000000e+00 2.062397e-11 2.221581e-70 8.911513e-98 132 1.867542e-79 2.564256e-23 1.000000e+00 1.542644e-12 1.093732e-72 4.382257e-99 133 9.985086e-80 9.154061e-24 1.000000e+00 4.011108e-14 7.467762e-72 2.226897e-99 134 5.776329e-82 4.286094e-34 7.420572e-20 1.000000e+00 8.697597e-78 3.169029e-104 135 1.072754e-80 3.899946e-34 1.433807e-19 1.000000e+00 1.304573e-79 2.747386e-104 136 3.136175e-85 5.167180e-34 2.669672e-19 1.000000e+00 6.383429e-82 1.407503e-105 137 1.404619e-82 4.646206e-34 2.208273e-19 1.000000e+00 3.425806e-79 1.580923e-104 138 1.633896e-88 2.711523e-36 7.598916e-21 1.000000e+00 1.381189e-82 3.923780e-107 139 1.492719e-81 2.896307e-33 9.643333e-20 1.000000e+00 3.809226e-78 5.016415e-104 140 7.511916e-82 2.188356e-33 9.941407e-19 1.000000e+00 2.035078e-79 3.669405e-104 141 2.610914e-86 5.503156e-36 7.641509e-19 1.000000e+00 3.706916e-80 9.596998e-106 142 1.554525e-85 6.555062e-34 5.917055e-20 1.000000e+00 1.111303e-80 1.680501e-105 143 4.814051e-83 1.809454e-32 1.080872e-18 1.000000e+00 1.198109e-79 2.955846e-104 144 7.882303e-87 3.509965e-33 4.575985e-19 1.000000e+00 2.266449e-80 2.247882e-105 145 7.192080e-83 3.477510e-33 2.063055e-19 1.000000e+00 1.949943e-79 1.822835e-104 146 7.924270e-87 1.401335e-35 5.416905e-20 1.000000e+00 4.668609e-82 2.238227e-106 147 1.954296e-82 4.570746e-35 2.560533e-19 1.000000e+00 7.634366e-80 8.102747e-105 148 1.196331e-84 1.772049e-36 1.007762e-21 1.000000e+00 3.991845e-81 2.795039e-106 149 3.096292e-81 1.189815e-32 3.295618e-19 1.000000e+00 2.176683e-78 8.802922e-104 150 1.195955e-85 1.859225e-34 4.654042e-20 1.000000e+00 7.676257e-79 2.755634e-105 151 1.319241e-82 2.453643e-34 7.358156e-19 1.000000e+00 2.064191e-79 1.579600e-104 152 4.015772e-86 1.599599e-35 5.000990e-21 1.000000e+00 1.256706e-81 2.406745e-106 153 3.778256e-84 8.543601e-35 5.215713e-20 1.000000e+00 1.864096e-81 1.443973e-105 154 1.051096e-80 4.275822e-32 5.604164e-19 1.000000e+00 5.389144e-79 1.221110e-103 155 1.136151e-82 7.390156e-34 6.029798e-20 1.000000e+00 9.659578e-80 9.956032e-105 156 1.462345e-83 1.854397e-34 9.651970e-21 1.000000e+00 5.114131e-80 3.058752e-105 157 1.437692e-84 9.051464e-35 8.113974e-21 1.000000e+00 7.883845e-81 1.107356e-105 158 1.861828e-91 9.113154e-39 1.455804e-24 1.000000e+00 1.553270e-84 2.382362e-109 159 1.388063e-90 8.922848e-39 2.181118e-24 1.000000e+00 1.348134e-84 3.736638e-109 160 3.891187e-89 3.958651e-38 3.253951e-24 1.000000e+00 3.196647e-83 2.000613e-108 161 1.367017e-89 5.497597e-38 1.079892e-23 1.000000e+00 2.678137e-83 2.126415e-108 162 2.326539e-91 4.103277e-39 5.569020e-26 1.000000e+00 1.569350e-85 6.990112e-110 163 1.311290e-91 1.375741e-38 8.299135e-25 1.000000e+00 2.106376e-84 2.290746e-109 164 1.356661e-90 1.892611e-38 1.789423e-24 1.000000e+00 1.333143e-83 6.571139e-109 165 3.401961e-91 3.619577e-38 7.341429e-25 1.000000e+00 6.853795e-84 4.155621e-109 166 1.388779e-91 2.861226e-38 3.875649e-24 1.000000e+00 3.328489e-84 4.001027e-109 167 2.380719e-90 1.251574e-39 1.332740e-24 1.000000e+00 8.604761e-85 2.327802e-109 168 1.217229e-89 5.379035e-39 8.627930e-25 1.000000e+00 7.488215e-84 6.102228e-109 169 2.310919e-94 6.350937e-41 1.778927e-25 1.000000e+00 3.093359e-86 6.944778e-111 170 1.628901e-92 5.960709e-40 2.322158e-25 1.000000e+00 2.277639e-85 4.002488e-110 171 2.679039e-92 1.687070e-39 5.731713e-25 1.000000e+00 5.053522e-85 7.649169e-110 172 2.671819e-88 1.754630e-36 1.059351e-22 1.000000e+00 6.232639e-82 2.282218e-107 173 5.418397e-91 4.430905e-40 1.936348e-25 1.000000e+00 6.091824e-85 8.925549e-110 174 2.987890e-93 5.717582e-39 1.243586e-24 1.000000e+00 1.614310e-84 9.273936e-110 175 3.489344e-91 1.602131e-39 4.734928e-24 1.000000e+00 4.870520e-84 3.035864e-109 176 4.730541e-91 7.240395e-38 3.442671e-24 1.000000e+00 5.675336e-84 6.688491e-109 177 2.909838e-90 2.475136e-38 3.125156e-24 1.000000e+00 1.579157e-83 9.340478e-109 178 2.583675e-91 1.363593e-38 2.834644e-24 1.000000e+00 9.585518e-85 2.860378e-109 179 6.882299e-92 6.211701e-40 3.539594e-25 1.000000e+00 3.033986e-85 6.202951e-110 180 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 NaN 0.000000e+00 181 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 NaN 0.000000e+00 182 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 NaN 0.000000e+00 183 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 NaN 0.000000e+00 184 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 NaN 0.000000e+00 185 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 NaN 0.000000e+00 186 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 NaN 0.000000e+00 187 4.880800e-145 4.125177e-114 2.157254e-107 3.397545e-130 4.580000e-66 1.000000e+00 188 4.165340e-144 2.483567e-112 8.748962e-107 1.391455e-129 2.073337e-64 1.000000e+00 189 7.159883e-147 1.593944e-116 8.916149e-110 1.211767e-133 5.462348e-67 1.000000e+00 190 1.622515e-148 4.768581e-118 1.411417e-111 2.917911e-135 2.862510e-70 1.000000e+00 191 4.519420e-145 4.504172e-114 5.677122e-108 4.900584e-130 7.522150e-66 1.000000e+00 192 1.348773e-147 3.749995e-118 3.478256e-110 2.709965e-134 6.313322e-68 1.000000e+00 193 9.551586e-146 7.063951e-115 3.710090e-108 1.358758e-131 1.116149e-66 1.000000e+00 194 1.139409e-145 4.814741e-114 8.315852e-108 3.732165e-131 6.425071e-68 1.000000e+00 195 6.191365e-146 2.019916e-115 6.249354e-110 1.061403e-134 3.394712e-62 1.000000e+00 196 4.420196e-141 6.086214e-109 1.465467e-101 3.843268e-123 2.460951e-64 1.000000e+00 197 7.613368e-147 9.460361e-117 3.507715e-110 3.316931e-130 1.161810e-69 1.000000e+00 198 1.488481e-146 3.222979e-116 3.508663e-110 5.297662e-132 6.097501e-67 1.000000e+00 199 6.233521e-149 6.668331e-119 7.149404e-112 1.761808e-134 5.601969e-72 1.000000e+00 # #replace the NaN value GLOB.aic.clust_ImpRf_NeutralSNPs_proba [37,2] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [37,3] <- 0 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [54,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [67,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [68,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [71,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [71,2] <- 0 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [72,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [74,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [74,2] <- 0 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [75,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [77,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [78,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [79,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [81,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [82,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [87,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [90,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [92,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [93,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [94,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [95,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [98,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [99,3] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [180,5] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [181,5] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [182,5] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [183,5] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [184,5] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [185,5] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba [186,5] <- 1 GLOB.aic.clust_ImpRf_NeutralSNPs_proba # 1 2 3 4 5 6 1 1.000000e+00 2.011255e-72 8.933041e-69 2.914657e-90 6.342321e-101 5.814020e-128 2 1.000000e+00 1.157942e-72 2.028349e-69 2.533867e-90 1.410341e-101 2.786144e-128 3 1.000000e+00 1.155975e-70 6.872371e-67 1.891581e-88 5.199751e-99 1.732960e-126 4 1.000000e+00 1.015826e-72 4.598258e-69 1.027138e-89 6.325401e-101 5.710078e-128 5 1.000000e+00 9.335090e-72 2.115738e-68 4.264512e-89 1.562403e-100 1.923378e-127 6 1.000000e+00 1.660498e-82 1.314417e-78 2.432619e-100 1.753880e-112 2.844626e-136 7 1.000000e+00 1.765841e-89 2.505817e-85 4.451725e-108 1.619760e-117 1.448746e-141 8 1.000000e+00 4.946233e-84 7.591280e-80 1.955768e-102 5.063718e-114 1.493796e-137 9 1.000000e+00 1.546603e-83 1.721758e-80 2.105072e-102 1.149307e-112 2.642572e-137 10 1.000000e+00 9.096696e-85 5.861177e-81 2.618805e-103 2.380295e-113 5.814825e-138 11 1.000000e+00 5.225693e-84 4.680464e-80 2.862947e-102 2.875600e-112 3.319029e-137 12 1.000000e+00 2.495538e-84 2.595771e-80 1.572953e-102 8.687310e-114 1.121044e-137 13 1.000000e+00 3.885692e-88 4.452662e-84 2.089873e-106 2.175434e-116 1.735269e-140 14 1.000000e+00 1.852574e-82 1.329239e-78 2.103337e-100 4.560931e-112 3.426765e-136 15 1.000000e+00 4.091042e-85 1.365658e-80 2.359255e-102 8.560842e-114 7.425225e-138 16 1.000000e+00 7.125259e-84 3.281886e-79 3.670106e-102 8.788386e-114 2.727210e-137 17 1.000000e+00 4.589286e-82 2.786488e-78 5.919556e-101 4.779646e-113 2.354581e-136 18 1.000000e+00 1.779414e-85 1.701652e-81 3.129502e-104 2.519916e-113 2.166796e-138 19 1.000000e+00 2.772259e-85 6.584434e-82 8.599899e-103 6.936966e-114 2.935075e-138 20 1.000000e+00 5.680233e-83 5.318948e-79 8.595821e-101 1.774855e-112 1.559353e-136 21 1.000000e+00 5.959921e-84 1.822783e-80 1.194095e-102 3.424856e-113 1.547916e-137 22 1.000000e+00 1.496503e-81 2.400722e-78 1.761254e-100 1.237608e-111 6.902097e-136 23 1.000000e+00 1.168656e-84 9.216228e-81 2.093898e-102 6.253449e-114 7.763886e-138 24 1.000000e+00 1.211430e-87 1.198113e-83 1.505406e-106 6.788321e-118 1.242964e-140 25 1.000000e+00 7.255918e-87 1.548822e-83 5.005935e-105 2.719796e-115 1.250717e-139 26 1.000000e+00 5.076086e-85 3.256493e-80 2.470693e-102 5.026940e-114 8.369441e-138 27 1.000000e+00 2.576123e-85 1.101555e-81 1.913234e-104 1.531159e-114 1.107053e-138 28 6.209621e-81 1.000000e+00 1.534058e-19 1.639049e-46 1.380967e-79 8.451865e-107 29 3.110685e-84 1.000000e+00 2.386140e-19 2.211302e-46 1.058976e-79 2.033449e-107 30 4.139787e-96 1.000000e+00 5.622773e-24 9.193051e-55 8.700198e-87 8.207287e-114 31 2.084391e-92 1.000000e+00 8.569657e-23 1.428814e-51 2.766222e-86 4.265571e-112 32 2.901935e-81 1.000000e+00 8.355875e-19 5.277164e-44 9.659376e-78 7.561744e-106 33 3.076160e-87 1.000000e+00 2.865728e-20 5.740914e-46 4.130765e-81 2.109926e-108 34 1.094021e-80 1.000000e+00 5.614623e-19 1.253987e-43 6.415242e-79 6.294868e-106 35 7.963183e-83 1.000000e+00 5.199240e-19 8.361754e-43 6.772104e-82 8.594600e-107 36 1.222737e-84 1.000000e+00 1.008546e-22 1.754370e-50 1.623317e-80 3.709598e-109 37 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 38 4.528624e-78 1.000000e+00 1.405269e-18 2.736887e-44 1.247317e-76 5.340821e-105 39 1.479604e-85 1.000000e+00 1.797989e-18 1.109847e-44 1.390903e-78 6.066171e-107 40 1.685826e-80 1.000000e+00 5.307728e-18 5.033521e-45 5.071926e-80 3.403842e-106 41 2.381230e-73 1.000000e+00 1.235781e-16 1.274415e-42 1.987707e-73 1.083172e-102 42 1.848991e-82 1.000000e+00 6.001104e-19 1.484086e-47 7.183523e-81 1.882819e-107 43 3.074976e-83 1.000000e+00 3.802293e-19 8.873274e-43 7.938223e-79 2.775588e-106 44 5.114744e-79 1.000000e+00 2.126468e-20 4.986790e-45 7.958606e-78 6.126950e-106 45 5.968152e-88 1.000000e+00 3.388853e-22 2.450341e-47 3.040663e-81 3.132085e-109 46 3.905992e-85 1.000000e+00 5.886623e-20 3.691717e-45 9.504014e-79 2.764157e-107 47 3.979134e-85 1.000000e+00 1.085356e-22 1.270394e-46 3.039451e-80 2.016671e-108 48 3.877550e-82 1.000000e+00 2.702894e-17 4.497033e-44 2.725758e-77 1.207747e-105 49 1.051329e-80 1.000000e+00 1.313472e-16 2.000335e-43 1.910641e-78 2.539244e-105 50 4.391309e-85 1.000000e+00 8.164932e-20 4.330268e-48 1.709392e-79 5.559001e-108 51 7.305166e-86 1.000000e+00 4.589063e-24 3.085897e-49 2.956659e-81 1.436559e-109 52 3.465642e-75 3.806791e-13 1.000000e+00 2.547543e-29 9.245017e-70 5.737075e-99 53 1.194858e-78 9.115611e-16 1.000000e+00 3.646542e-37 2.057799e-74 1.103299e-102 54 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 55 4.524103e-86 2.734343e-18 1.000000e+00 1.803200e-39 8.332235e-76 2.060888e-105 56 5.707371e-73 5.340386e-12 1.000000e+00 1.873242e-32 5.787193e-69 9.205137e-99 57 5.118806e-76 1.968387e-15 1.000000e+00 6.850261e-31 1.053351e-71 2.707248e-100 58 4.691596e-76 3.553660e-16 1.000000e+00 3.298618e-36 6.855056e-74 5.963264e-102 59 1.099386e-79 6.424882e-16 1.000000e+00 2.334107e-33 8.740089e-73 7.797341e-102 60 4.618043e-79 4.887596e-15 1.000000e+00 3.110240e-37 3.771688e-74 1.395702e-102 61 1.568155e-81 2.929063e-17 1.000000e+00 1.274165e-34 2.354176e-74 4.875876e-103 62 3.367967e-77 2.093680e-13 1.000000e+00 7.960319e-33 1.227617e-69 4.244929e-100 63 2.981001e-79 1.906065e-17 1.000000e+00 7.937873e-35 3.487133e-73 1.993199e-102 64 2.514314e-83 2.813785e-15 1.000000e+00 1.483340e-36 2.629020e-75 1.407230e-103 65 1.082707e-81 1.126214e-17 1.000000e+00 2.093478e-34 3.845668e-72 1.144486e-102 66 4.221168e-77 1.867629e-20 1.000000e+00 1.613837e-36 2.383811e-73 5.709356e-103 67 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 68 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 69 1.671260e-80 5.838445e-15 1.000000e+00 9.773837e-35 3.641115e-75 1.473480e-102 70 9.522796e-82 9.225568e-17 1.000000e+00 1.934249e-39 6.044570e-77 1.830388e-104 71 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 72 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 73 3.586335e-77 3.469143e-14 1.000000e+00 8.226248e-33 1.916354e-72 8.293279e-101 74 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 75 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 76 1.450112e-86 1.307415e-23 1.000000e+00 6.444546e-35 1.167703e-75 1.233344e-105 77 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 78 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 79 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 80 1.991564e-80 9.448640e-23 1.000000e+00 1.862986e-29 6.761727e-74 8.613553e-103 81 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 82 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 83 3.234407e-87 4.162876e-25 1.000000e+00 2.474716e-36 8.314197e-77 1.408390e-106 84 1.731300e-85 9.736834e-22 1.000000e+00 6.101257e-35 9.669707e-77 2.882310e-105 85 1.412775e-82 3.112961e-22 1.000000e+00 4.814399e-35 2.441354e-72 6.351847e-104 86 1.295912e-84 3.092514e-23 1.000000e+00 6.253255e-32 1.220026e-75 1.436444e-104 87 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 88 1.312454e-80 1.190505e-22 1.000000e+00 4.566681e-36 1.621722e-73 4.708630e-104 89 6.036038e-84 5.640539e-21 1.000000e+00 6.645143e-37 1.255147e-76 3.555252e-105 90 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 91 2.342930e-80 1.715863e-20 1.000000e+00 2.550171e-33 2.659836e-73 5.589197e-103 92 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 93 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 94 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 95 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 96 6.064522e-79 2.123155e-19 1.000000e+00 1.516734e-32 1.662817e-73 2.304213e-102 97 4.612346e-79 2.080527e-17 1.000000e+00 1.143194e-31 2.636937e-72 1.420624e-101 98 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 99 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 100 7.026698e-84 3.462433e-22 1.000000e+00 4.648786e-30 1.360859e-72 3.145381e-103 101 8.670850e-82 1.562720e-23 1.000000e+00 2.030077e-29 1.107688e-71 9.057133e-103 102 2.595482e-83 7.220986e-22 1.000000e+00 4.577834e-29 8.288198e-77 1.073008e-103 103 1.868193e-81 8.900171e-23 1.000000e+00 1.826223e-31 9.700979e-75 1.425785e-103 104 2.303445e-82 2.603575e-26 1.000000e+00 1.078825e-32 2.589426e-77 3.199165e-105 105 8.935844e-84 2.960803e-21 1.000000e+00 1.622104e-30 3.933894e-77 5.078149e-104 106 1.563580e-85 2.421509e-24 1.000000e+00 2.910538e-35 1.168548e-78 3.035159e-106 107 1.337168e-78 3.016508e-21 1.000000e+00 1.391598e-27 9.946271e-73 1.620276e-101 108 9.110986e-86 1.168162e-19 1.000000e+00 7.601721e-26 2.141398e-76 5.104483e-103 109 3.992112e-86 1.200016e-20 1.000000e+00 2.010739e-28 2.761211e-75 1.396964e-103 110 4.313137e-73 1.078766e-20 1.000000e+00 1.970641e-26 1.318007e-72 4.749690e-100 111 1.331040e-85 8.426821e-24 1.000000e+00 9.528531e-31 1.283851e-79 1.939524e-105 112 2.808384e-86 4.219455e-24 1.000000e+00 5.277190e-34 3.916411e-78 5.470372e-106 113 4.138790e-82 2.059775e-21 1.000000e+00 1.744666e-31 2.623953e-76 9.516746e-104 114 2.071321e-81 3.920835e-24 1.000000e+00 5.821655e-29 3.254785e-77 7.900510e-104 115 3.705341e-82 9.905337e-23 1.000000e+00 2.717706e-28 3.170865e-76 2.292090e-103 116 2.365289e-76 1.101298e-21 1.000000e+00 7.951432e-28 3.033016e-75 1.046738e-101 117 9.384356e-84 3.567348e-24 1.000000e+00 6.230054e-31 3.069766e-77 1.050717e-104 118 4.620401e-81 1.493875e-23 1.000000e+00 4.124702e-27 4.269204e-77 3.000287e-103 119 2.462429e-87 8.250218e-25 1.000000e+00 3.309040e-33 6.007510e-80 1.518663e-106 120 2.719744e-83 1.818033e-23 1.000000e+00 6.496904e-30 1.075838e-77 2.333097e-104 121 2.782781e-89 5.255920e-26 1.000000e+00 3.951238e-28 2.657341e-80 3.143966e-106 122 9.914547e-82 8.806639e-24 1.000000e+00 1.227504e-30 1.072517e-76 4.702385e-104 123 1.159909e-84 4.961188e-24 1.000000e+00 3.439240e-26 8.244225e-78 5.043599e-104 124 1.422850e-71 8.975297e-21 1.000000e+00 1.602935e-09 1.568995e-67 2.299139e-95 125 2.421768e-79 1.144509e-23 1.000000e+00 8.934700e-10 5.223643e-71 3.037926e-98 126 4.678937e-72 1.799307e-20 9.999997e-01 3.050071e-07 3.933438e-68 4.582295e-95 127 1.357443e-77 1.039608e-22 1.000000e+00 1.463380e-12 3.503415e-71 2.704607e-98 128 6.922313e-80 1.926075e-23 1.000000e+00 2.789891e-09 4.001075e-72 1.971305e-98 129 2.256766e-77 1.730957e-21 1.000000e+00 7.587962e-10 9.881477e-71 2.257148e-97 130 5.361401e-76 6.502218e-22 1.000000e+00 3.186685e-13 2.136985e-70 8.616130e-98 131 9.553271e-79 6.419333e-21 1.000000e+00 2.062397e-11 2.221581e-70 8.911513e-98 132 1.867542e-79 2.564256e-23 1.000000e+00 1.542644e-12 1.093732e-72 4.382257e-99 133 9.985086e-80 9.154061e-24 1.000000e+00 4.011108e-14 7.467762e-72 2.226897e-99 134 5.776329e-82 4.286094e-34 7.420572e-20 1.000000e+00 8.697597e-78 3.169029e-104 135 1.072754e-80 3.899946e-34 1.433807e-19 1.000000e+00 1.304573e-79 2.747386e-104 136 3.136175e-85 5.167180e-34 2.669672e-19 1.000000e+00 6.383429e-82 1.407503e-105 137 1.404619e-82 4.646206e-34 2.208273e-19 1.000000e+00 3.425806e-79 1.580923e-104 138 1.633896e-88 2.711523e-36 7.598916e-21 1.000000e+00 1.381189e-82 3.923780e-107 139 1.492719e-81 2.896307e-33 9.643333e-20 1.000000e+00 3.809226e-78 5.016415e-104 140 7.511916e-82 2.188356e-33 9.941407e-19 1.000000e+00 2.035078e-79 3.669405e-104 141 2.610914e-86 5.503156e-36 7.641509e-19 1.000000e+00 3.706916e-80 9.596998e-106 142 1.554525e-85 6.555062e-34 5.917055e-20 1.000000e+00 1.111303e-80 1.680501e-105 143 4.814051e-83 1.809454e-32 1.080872e-18 1.000000e+00 1.198109e-79 2.955846e-104 144 7.882303e-87 3.509965e-33 4.575985e-19 1.000000e+00 2.266449e-80 2.247882e-105 145 7.192080e-83 3.477510e-33 2.063055e-19 1.000000e+00 1.949943e-79 1.822835e-104 146 7.924270e-87 1.401335e-35 5.416905e-20 1.000000e+00 4.668609e-82 2.238227e-106 147 1.954296e-82 4.570746e-35 2.560533e-19 1.000000e+00 7.634366e-80 8.102747e-105 148 1.196331e-84 1.772049e-36 1.007762e-21 1.000000e+00 3.991845e-81 2.795039e-106 149 3.096292e-81 1.189815e-32 3.295618e-19 1.000000e+00 2.176683e-78 8.802922e-104 150 1.195955e-85 1.859225e-34 4.654042e-20 1.000000e+00 7.676257e-79 2.755634e-105 151 1.319241e-82 2.453643e-34 7.358156e-19 1.000000e+00 2.064191e-79 1.579600e-104 152 4.015772e-86 1.599599e-35 5.000990e-21 1.000000e+00 1.256706e-81 2.406745e-106 153 3.778256e-84 8.543601e-35 5.215713e-20 1.000000e+00 1.864096e-81 1.443973e-105 154 1.051096e-80 4.275822e-32 5.604164e-19 1.000000e+00 5.389144e-79 1.221110e-103 155 1.136151e-82 7.390156e-34 6.029798e-20 1.000000e+00 9.659578e-80 9.956032e-105 156 1.462345e-83 1.854397e-34 9.651970e-21 1.000000e+00 5.114131e-80 3.058752e-105 157 1.437692e-84 9.051464e-35 8.113974e-21 1.000000e+00 7.883845e-81 1.107356e-105 158 1.861828e-91 9.113154e-39 1.455804e-24 1.000000e+00 1.553270e-84 2.382362e-109 159 1.388063e-90 8.922848e-39 2.181118e-24 1.000000e+00 1.348134e-84 3.736638e-109 160 3.891187e-89 3.958651e-38 3.253951e-24 1.000000e+00 3.196647e-83 2.000613e-108 161 1.367017e-89 5.497597e-38 1.079892e-23 1.000000e+00 2.678137e-83 2.126415e-108 162 2.326539e-91 4.103277e-39 5.569020e-26 1.000000e+00 1.569350e-85 6.990112e-110 163 1.311290e-91 1.375741e-38 8.299135e-25 1.000000e+00 2.106376e-84 2.290746e-109 164 1.356661e-90 1.892611e-38 1.789423e-24 1.000000e+00 1.333143e-83 6.571139e-109 165 3.401961e-91 3.619577e-38 7.341429e-25 1.000000e+00 6.853795e-84 4.155621e-109 166 1.388779e-91 2.861226e-38 3.875649e-24 1.000000e+00 3.328489e-84 4.001027e-109 167 2.380719e-90 1.251574e-39 1.332740e-24 1.000000e+00 8.604761e-85 2.327802e-109 168 1.217229e-89 5.379035e-39 8.627930e-25 1.000000e+00 7.488215e-84 6.102228e-109 169 2.310919e-94 6.350937e-41 1.778927e-25 1.000000e+00 3.093359e-86 6.944778e-111 170 1.628901e-92 5.960709e-40 2.322158e-25 1.000000e+00 2.277639e-85 4.002488e-110 171 2.679039e-92 1.687070e-39 5.731713e-25 1.000000e+00 5.053522e-85 7.649169e-110 172 2.671819e-88 1.754630e-36 1.059351e-22 1.000000e+00 6.232639e-82 2.282218e-107 173 5.418397e-91 4.430905e-40 1.936348e-25 1.000000e+00 6.091824e-85 8.925549e-110 174 2.987890e-93 5.717582e-39 1.243586e-24 1.000000e+00 1.614310e-84 9.273936e-110 175 3.489344e-91 1.602131e-39 4.734928e-24 1.000000e+00 4.870520e-84 3.035864e-109 176 4.730541e-91 7.240395e-38 3.442671e-24 1.000000e+00 5.675336e-84 6.688491e-109 177 2.909838e-90 2.475136e-38 3.125156e-24 1.000000e+00 1.579157e-83 9.340478e-109 178 2.583675e-91 1.363593e-38 2.834644e-24 1.000000e+00 9.585518e-85 2.860378e-109 179 6.882299e-92 6.211701e-40 3.539594e-25 1.000000e+00 3.033986e-85 6.202951e-110 180 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 181 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 182 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 183 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 184 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 185 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 186 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 187 4.880800e-145 4.125177e-114 2.157254e-107 3.397545e-130 4.580000e-66 1.000000e+00 188 4.165340e-144 2.483567e-112 8.748962e-107 1.391455e-129 2.073337e-64 1.000000e+00 189 7.159883e-147 1.593944e-116 8.916149e-110 1.211767e-133 5.462348e-67 1.000000e+00 190 1.622515e-148 4.768581e-118 1.411417e-111 2.917911e-135 2.862510e-70 1.000000e+00 191 4.519420e-145 4.504172e-114 5.677122e-108 4.900584e-130 7.522150e-66 1.000000e+00 192 1.348773e-147 3.749995e-118 3.478256e-110 2.709965e-134 6.313322e-68 1.000000e+00 193 9.551586e-146 7.063951e-115 3.710090e-108 1.358758e-131 1.116149e-66 1.000000e+00 194 1.139409e-145 4.814741e-114 8.315852e-108 3.732165e-131 6.425071e-68 1.000000e+00 195 6.191365e-146 2.019916e-115 6.249354e-110 1.061403e-134 3.394712e-62 1.000000e+00 196 4.420196e-141 6.086214e-109 1.465467e-101 3.843268e-123 2.460951e-64 1.000000e+00 197 7.613368e-147 9.460361e-117 3.507715e-110 3.316931e-130 1.161810e-69 1.000000e+00 198 1.488481e-146 3.222979e-116 3.508663e-110 5.297662e-132 6.097501e-67 1.000000e+00 199 6.233521e-149 6.668331e-119 7.149404e-112 1.761808e-134 5.601969e-72 1.000000e+00 # #Structure like plot myCol <- c( "red","sienna4","burlywood3", "deepskyblue", "steelblue", "grey20") compoplot(GLOB.aic.clust_ImpRf_NeutralSNPs_proba,space=0, lab="", col=myCol,cleg=1,legend=F,font.lab=2,xlab="population of origin of each sampled individual",cex.axis=1) axis(1, at = c(0,5,27,51,75,99,123,133,157,179,186,199), labels = F, lwd=1, lwd.ticks=1) axis(1, at = c(2.5, 16, 39, 63, 87, 111, 128, 145, 168, 182.5, 192.5), labels = c("CAS", "POR", "LOC","STG","LAN","LEZ","FER","AUD","HEL","ELL","NYA"), las=1, tick=F,cex.axis=1)