Queller <- function(gen, n, af, an) { # the Queller and Goodnight estimator loci <- length(af) mat <- forQueller(gen, n, af, an) mat1 <- mat[[1]] mat2 <- mat[[2]] rm(mat) relat.denom <- function(v) 0.5 * (v[6] + v[5] + v[3] + v[7]) - v[1] - v[2] relat.num <- function(v) 1 + v[4] - v[1] - v[2] ## for mat1 (reference individual x) r1.d <- matrix(apply(mat1, 1, relat.denom), ncol = loci) r1.n <- matrix(apply(mat1, 1, relat.num), ncol = loci) r1 <- apply(r1.d, 1, sum)/apply(r1.n, 1, sum) ## for mat2 (reference individual y) r2.d <- matrix(apply(mat2, 1, relat.denom), ncol = loci) r2.n <- matrix(apply(mat2, 1, relat.num), ncol = loci) r2 <- apply(r2.d, 1, sum)/apply(r2.n, 1, sum) ## and the average of those: r <- rbind(c(r1), c(r2)) r <- apply(r, 2, mean) return(r) } #end of Queller function ## #################################################################### forQueller <- function(gen, n, af, an) { # gen is a list sim1 <- matrix(NA, n * length(af), 7) sim2 <- matrix(NA, n * length(af), 7) for (i in 1:length(af)) { s1 <- list(abs(sign(gen[[i]][, 2] - gen[[i]][, 3])), abs(sign(gen[[i]][, 1] - gen[[i]][, 3])), abs(sign(gen[[i]][, 1] - gen[[i]][, 4])), abs(sign(gen[[i]][, 1] - gen[[i]][, 2])), abs(sign(gen[[i]][, 3] - gen[[i]][, 4]))) s2 <- list(abs(sign(gen[[i]][, 1] - gen[[i]][, 4])), abs(sign(gen[[i]][, 2] - gen[[i]][, 4])), abs(sign(gen[[i]][, 2] - gen[[i]][, 3])), abs(sign(gen[[i]][, 1] - gen[[i]][, 2])), abs(sign(gen[[i]][, 3] - gen[[i]][, 4]))) sim1[((i - 1) * n + 1):(i * n), 3:7] <- trunc(cos(matrix(unlist(s1), n, 5) * (2/pi))) sim1[((i - 1) * n + 1):(i * n), 1:2] <- cbind(af[[i]][match(gen[[i]][, 1], an[[i]])], af[[i]][match(gen[[i]][, 3], an[[i]])]) sim2[((i - 1) * n + 1):(i * n), 3:7] <- trunc(cos(matrix(unlist(s2), n, 5) * (2/pi))) sim2[((i - 1) * n + 1):(i * n), 1:2] <- cbind(af[[i]][match(gen[[i]][, 2], an[[i]])], af[[i]][match(gen[[i]][, 4], an[[i]])]) } return(list(sim1, sim2)) } # end of of forQueller library(MasterBayes) library(pedantics) library(Rhh) library(car) library(ecodist) ped1 <- read.table("file 28 - bottleneck pedigree for x5 founder simulation.txt", header = T) ped5 <- read.table("file 26 - pedigree for founder simulation.txt", header = T) original_genotypes_Cr2 <- read.table("file 16 - x0 data for MasterBayes cr2 only.txt", header = T) original_genotypes_Cr3 <- read.table("file 17 - x0 data for MasterBayes cr3 only.txt", header = T) ACr2 <- extractA(original_genotypes_Cr2) ACr3 <- extractA(original_genotypes_Cr3) pedlist <- read.table("file 24 - pedigree relatedness.txt", header = T, sep = "\t") pedigree_matrix <- matrix(ncol = 125, nrow = 125) pedigree_matrix[cbind(pedlist$ind1, pedlist$ind2)] <- pedlist$pedigree expected_f <- read.table("file 25 - expected_f.txt", header = T) output <- matrix(NA, 1000, 4) RLTresults <- data.frame(Mantelr = output[, 1], `P Value` = output[, 2], `L lim` = output[, 3], `U lim` = output[, 4]) IBDresults <- data.frame(Cor = output[, 1], `P Values` = output[, 2], bcCor = output[, 3], `bcP Values` = output[, 4]) allele_results <- data.frame(alleles_per_locus = output[, 1]) pedigree2 <- read.table("file 23 - pedigree.txt", header = T) Cr2_positions <- as.matrix(c(4.5, 89.5, 9.5, 68, 46.3, 0, 31, 36, 39, 26.7, 44.9, 33.3, 44, 40.5)) Cr3_positions <- as.matrix(c(90.6, 58, 72.5, 65.7, 61.5, 0, 17.5, 80.5, 16)) Cr2_mutation_type <- as.matrix(c("micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro")) Cr3_mutation_type <- as.matrix(c("micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro", "micro")) ######################### for (r in 1:1000) { ######################## ped1b <- ped1 ped1b[, 2:3] <- NA for (i in 1:10) { rnd1 <- sample(5, 1, replace = T) rnd2 <- rnd1 + 5 ped1b[i + 10, 2] <- paste("UR", rnd1, sep = "") ped1b[i + 10, 3] <- paste("UR", rnd2, sep = "") rnd3 <- sample(5, 1, replace = T) rnd4 <- rnd3 + 5 ped1b[i + 20, 2] <- paste("G1_", rnd3, sep = "") ped1b[i + 20, 3] <- paste("G1_", rnd4, sep = "") rnd5 <- sample(5, 1, replace = T) rnd6 <- rnd5 + 5 ped1b[i + 30, 2] <- paste("G2_", rnd5, sep = "") ped1b[i + 30, 3] <- paste("G2_", rnd6, sep = "") rnd7 <- sample(5, 1, replace = T) rnd8 <- rnd5 + 5 ped1b[i + 40, 2] <- paste("G3_", rnd7, sep = "") ped1b[i + 40, 3] <- paste("G3_", rnd8, sep = "") } ped2 <- matrix(NA, 150, 3) ped3 <- matrix(NA, 150, 3) for (t in 1:5) { for (i in 1:30) { ped2[(30 * t) + i - 30, ] <- c(paste("G2_F", t, "_", i, sep = ""), paste("Fnd", t, "F", sep = ""), paste("Fnd", t, "M", sep = "")) ped3[(30 * t) + i - 30, ] <- c(paste("G2_M", t, "_", i, sep = ""), paste("Fnd", t, "F", sep = ""), paste("Fnd", t, "M", sep = "")) } } rm(rnd1, rnd2, rnd3, rnd4, rnd5, rnd6, rnd7, rnd8) M_ped <- matrix(NA, 100, 3) F_ped <- matrix(NA, 100, 3) ped4 <- matrix(NA, 100, 3) for (t in 1:5) { for (i in 1:20) { ped4[20 * t - 20 + i, 1] <- paste("UR", i, "_", t, sep = "") ran_F <- sample(1:150, 1) ran_M <- sample(1:150, 1) ped4[20 * t - 20 + i, 2] <- paste(ped2[ran_F, 1], 20 * t - 20 + i, sep = "_") ped4[20 * t - 20 + i, 3] <- paste(ped3[ran_M, 1], 20 * t - 20 + i, sep = "_") F_ped[20 * t - 20 + i, ] <- c(paste(ped2[ran_F, 1], 20 * t - 20 + i, sep = "_"), ped2[ran_F, 2], ped2[ran_F, 3]) M_ped[20 * t - 20 + i, ] <- c(paste(ped3[ran_M, 1], 20 * t - 20 + i, sep = "_"), ped2[ran_M, 2], ped2[ran_M, 3]) rm(ran_F, ran_M) } } colnames(ped4) <- c("id", "dam", "sire") colnames(F_ped) <- c("id", "dam", "sire") colnames(M_ped) <- c("id", "dam", "sire") ped6 <- rbind(ped1b, F_ped, M_ped, ped4, ped5) pedigree <- ped6 simped_Cr2 <- genomesim(pedigree = pedigree, initFreqs = ACr2, positions = Cr2_positions, mutationType = Cr2_mutation_type, returnG = "y", mutationRate = 0) simped_Cr3 <- genomesim(pedigree = pedigree, initFreqs = ACr3, positions = Cr3_positions, mutationType = Cr3_mutation_type, returnG = "y", mutationRate = 0) doubleid_data_Cr2 <- as.matrix(data.frame(cross = rownames(simped_Cr2$genomes[831:1080, ]), simped_Cr2$genomes[831:1080, ], row.names = NULL)) doubleid_data_Cr3 <- as.matrix(data.frame(cross = rownames(simped_Cr3$genomes[831:1080, ]), simped_Cr3$genomes[831:1080, ], row.names = NULL)) mat <- matrix(NA, 125, 47) for (n in 1:(ncol(doubleid_data_Cr2) - 2)) { for (m in 1:125) { mat[m, 1] <- doubleid_data_Cr2[m * 2, 2] mat[m, (n * 2)] <- doubleid_data_Cr2[(m * 2) - 1, n + 2] mat[m, (n * 2 + 1)] <- doubleid_data_Cr2[(m * 2), n + 2] mat[m, (n * 2)] <- doubleid_data_Cr2[(m * 2) - 1, n + 2] mat[m, (n * 2 + 1)] <- doubleid_data_Cr2[(m * 2), n + 2] } } for (n in 1:(ncol(doubleid_data_Cr3) - 2)) { for (m in 1:125) { mat[m, ((n + 14) * 2)] <- doubleid_data_Cr3[(m * 2) - 1, n + 2] mat[m, ((n + 14) * 2 + 1)] <- doubleid_data_Cr3[(m * 2), n + 2] mat[m, ((n + 14) * 2)] <- doubleid_data_Cr3[(m * 2) - 1, n + 2] mat[m, ((n + 14) * 2 + 1)] <- doubleid_data_Cr3[(m * 2), n + 2] } } data1 <- mat[, 2:ncol(mat)] B_Cr2 <- extractA(data1[, 1:28]) Cr2_positions_2 <- matrix(NA, 14, 1) B_Cr2_2 <- vector(mode = "list") mute1 <- matrix(NA, 14, 1) for (b in 1:14) { if (length(B_Cr2[[b]]) > 1) { B_Cr2_2 <- c(B_Cr2_2, B_Cr2[b]) Cr2_positions_2[b, ] <- Cr2_positions[b, ] mute1[b, ] <- Cr2_mutation_type[b, ] } } Cr2_pos <- as.numeric(na.omit(Cr2_positions_2)) Cr2_mute_type <- as.character(na.omit(mute1)) B_Cr3 <- extractA(data1[, 29:46]) mute2 <- matrix(NA, 14, 1) Cr3_positions_2 <- matrix(NA, 9, 1) B_Cr3_2 <- vector(mode = "list") for (b in 1:9) { if (length(B_Cr3[[b]]) > 1) { B_Cr3_2 <- c(B_Cr3_2, B_Cr3[b]) Cr3_positions_2[b, ] <- Cr3_positions[b, ] mute2[b, ] <- Cr3_mutation_type[b, ] } } Cr3_pos <- as.numeric(na.omit(Cr3_positions_2)) Cr3_mute_type <- as.character(na.omit(mute2)) simped_Cr2_2 <- genomesim(pedigree = pedigree2, initFreqs = B_Cr2_2, positions = Cr2_pos, mutationType = Cr2_mute_type, returnG = "y", mutationRate = 0) simped_Cr3_2 <- genomesim(pedigree = pedigree2, initFreqs = B_Cr3_2, positions = Cr3_pos, mutationType = Cr3_mute_type, returnG = "y", mutationRate = 0) doubleid_data_Cr2_2 <- as.matrix(data.frame(cross = rownames(simped_Cr2_2$genomes[411:660, ]), simped_Cr2_2$genomes[411:660, ], row.names = NULL)) doubleid_data_Cr3_2 <- as.matrix(data.frame(cross = rownames(simped_Cr3_2$genomes[411:660, ]), simped_Cr3_2$genomes[411:660, ], row.names = NULL)) rm(pedigree, ped6, ped4, M_ped, F_ped, ped2, ped3, mute1, mute2, Cr2_mute_type, Cr3_mute_type, B_Cr2, B_Cr3, B_Cr2_2, B_Cr3_2, Cr2_positions_2, Cr3_positions_2, Cr2_loc, Cr3_loc, Cr2_pos, Cr3_pos) Cr2_loc <- ncol(doubleid_data_Cr2_2) - 2 Cr3_loc <- ncol(doubleid_data_Cr3_2) - 2 mat_2 <- matrix(NA, 125, 2 * (Cr2_loc + Cr3_loc) + 1) for (n in 1:Cr2_loc) { for (m in 1:125) { mat_2[m, 1] <- doubleid_data_Cr2_2[m * 2, 2] mat_2[m, (n * 2)] <- doubleid_data_Cr2_2[(m * 2) - 1, n + 2] mat_2[m, (n * 2 + 1)] <- doubleid_data_Cr2_2[(m * 2), n + 2] mat_2[m, (n * 2)] <- doubleid_data_Cr2_2[(m * 2) - 1, n + 2] mat_2[m, (n * 2 + 1)] <- doubleid_data_Cr2_2[(m * 2), n + 2] } } for (n in 1:Cr3_loc) { for (m in 1:125) { mat_2[m, ((n + Cr2_loc) * 2)] <- doubleid_data_Cr3_2[(m * 2) - 1, n + 2] mat_2[m, ((n + Cr2_loc) * 2 + 1)] <- doubleid_data_Cr3_2[(m * 2), n + 2] mat_2[m, ((n + Cr2_loc) * 2)] <- doubleid_data_Cr3_2[(m * 2) - 1, n + 2] mat_2[m, ((n + Cr2_loc) * 2 + 1)] <- doubleid_data_Cr3_2[(m * 2), n + 2] } } data <- mat_2[, 2:ncol(mat_2)] ###### individuals <- nrow(data) loci <- (ncol(data))/2 n_alleles <- array() for (l in 1:loci) { g <- 2 * l - 1 h <- 2 * l n_alleles[l] <- length(unique(c(unique(data[, g]), unique(data[, h])))) } complete_data <- data if (min(n_alleles) < 2) { drop_list <- "" for (d in 1:loci) { if (n_alleles[d] < 2) { if (drop_list[1] == "") { drop_list <- d } else { drop_list <- c(drop_list, d) } } } column_droplist <- c(drop_list * 2, drop_list * 2 - 1) complete_data <- data[, -(column_droplist)] rm(column_droplist) } rm(individuals, n_alleles, g, h, loci, l, doubleid_data_Cr2, doubleid_data_Cr3, doubleid_data_Cr2_2, doubleid_data_Cr3_2, data, data1, simped_Cr2, simped_Cr3, simped_Cr2_2, simped_Cr3_2) individuals <- nrow(complete_data) loci <- ncol(complete_data)/2 n_alleles <- array(loci) for (l in 1:loci) { g <- 2 * l - 1 h <- 2 * l n_alleles[l] <- length(unique(c(unique(complete_data[, g]), unique(complete_data[, h])))) } n <- 125 A2 <- extractA(complete_data) nloci <- (ncol(complete_data)/2) af.s <- vector(mode = "list", length = length(A2)) for (a in 1:nloci) { af.s[[a]] <- matrix(NA, 1, length(A2[[a]])) af.s[[a]] <- unname(A2[[a]]) } an.s <- vector(mode = "list", length = length(A2)) for (a in 1:nloci) { an.s[[a]] <- matrix(NA, 1, length(A2[[a]])) an.s[[a]] <- as.numeric(names(A2[[a]])) } dat <- vector(mode = "list", length = length(af.s)) for (k in 1:length(af.s)) { mat <- matrix(NA, n^2, 4) for (j in 1:n) { for (i in 1:n) { mat[i + (j - 1) * n, 1] <- as.numeric(complete_data[j, k * 2 - 1]) mat[i + (j - 1) * n, 2] <- as.numeric(complete_data[i, k * 2 - 1]) mat[i + (j - 1) * n, 3] <- as.numeric(complete_data[j, k * 2]) mat[i + (j - 1) * n, 4] <- as.numeric(complete_data[i, k * 2]) } } dat[[k]] <- mat } Simulated_Queller <- Queller(dat, n = 125^2, af.s, an.s) Queller_matrix <- matrix(ncol = 125, nrow = 125) Queller_matrix[cbind(pedlist$ind1, pedlist$ind2)] <- Simulated_Queller mantel_results <- mantel(as.dist(pedigree_matrix) ~ as.dist(Queller_matrix), nperm = 100) numeric_results <- as.numeric(mantel_results) RLTresults[r, ] <- rbind(numeric_results[1], numeric_results[2], numeric_results[5], numeric_results[6]) write.table(complete_data, "simulated_data.txt", sep = "\t", col.names = F) Rhh_output <- mlh("simulated_data.txt", "mlh_output.txt", "0", 3) Cor <- cor.test(expected_f$f, Rhh_output$SH) Cor$estimate Cor$p.value bcCor <- cor.test(bcPower(Rhh_output$SH + 0.001, coef(powerTransform(Rhh_output$SH + 0.001))), expected_f$f) bcCor$estimate bcCor$p.value IBDresults[r, ] <- rbind(Cor$estimate, Cor$p.value, bcCor$estimate, bcCor$p.value) allele_results[r, ] <- (sum(n_alleles)/length(n_alleles)) rm(Rhh_output, Cor, bcCor, complete_data, mantel_results, Queller_matrix, nloci, a, i, j, k, m, n, af.s, an.s, dat, numeric_results, Simulated_Queller, mat, mat_2) } IBDresults RLTresults # Inbreeding Results mean(IBDresults$Cor) quantile(IBDresults$Cor, probs = (0.025)) quantile(IBDresults$Cor, probs = (0.975)) # Relatedness Results mean(RLTresults$Mantelr) quantile(RLTresults$P.Value, probs = (0.025)) quantile(RLTresults$P.Value, probs = (0.975)) # Allele Results mean(allele_results$allele_per_locus) quantile(allele_results$alleles_per_locus, probs = (0.025))