Read in our coverage data and delete all rows that do not correspond to individual species:
if (!require("rentrez")) {install.packages("rentrez")}
## Loading required package: rentrez
library(rentrez)
library(kableExtra)
dir <- "~/Grive/Slater_Lab/Charadriiformes/"
coverage <- read.csv(paste0(dir, "Coverage.csv"), stringsAsFactors = F)
coverage <- coverage[-c(386, 387, 394, 395), ]
Compare our taxonomic coverage with that of Burleigh et al. (2015) for all overlapping loci. Note that the locus-specific alignments read in by the function below were created by the aln_splitter.R
script. This means they have already been subsampled down to just charadriiforms, stripped of all-gap sequences, verified to contain at least one charadriiform sequences that does not consist exclusively of gaps, and that the name of every sequence has been stripped of the order and family prefixes.
missing.from.spreadsheet <- function(name1, name2) {
dir <- "~/Grive/Slater_Lab/Charadriiformes/"
# Get the full scientific names (incl. the family prefix) of all spp. in our coverage table
ournames <- paste(coverage[, 2], gsub(" ", "_", coverage[, 3]), sep = "_")
# Extract those scientific names for which a given column contains non-empty cells
ourcov <- ournames[grepl("[0-9]", coverage[, which(colnames(coverage) == name1)])]
# Read in the corresponding Burleigh et al. alignment
phy <- read.table(paste0(dir, "Burleigh_et_al_data/gene_alignments/", name2, ".phy"),
stringsAsFactors = F)
# Extract names
names <- phy[seq(1, nrow(phy), by = 2), ]
# Grab a dictionary mapping the Burleigh et al. names onto our own taxonomy
syn_dict <- read.csv(paste0(dir, "Megaphylogenies/taxon_dictionary.csv"), stringsAsFactors = F)
# Reformat the 'Burleigh_et_al' column to account for the absence of order and family prefixes
# from the sequences names in the alignments
syn_dict$Burleigh_et_al <- sapply(strsplit(syn_dict$Burleigh_et_al, "_"),
\(x) paste(tail(x, 2), collapse = "_"))
# Reconcile the Burleigh et al. names with our own taxonomy
ind_vect <- sapply(names, \(x) which(syn_dict$Burleigh_et_al == x))
# Some species are recognized as valid by Burleigh et al. but not us. For these, the operation
# above will return an empty index, coercing 'ind_vect' to a list. Here, we print out the names
# of such species, and coerce the list back to a vector, which removes all such empty hits.
empty <- Filter(\(x) length(x) == 0, ind_vect)
if (length(empty) > 0) {
ln <- gsub("X", "", name1)
print(paste0("Unrecognized species for ", ln, ": ", paste(names(empty), collapse = ", ")))
}
burleigh <- syn_dict$Our_TE_tree[unlist(ind_vect)]
test <- setdiff(burleigh, ourcov)
return(sort(test))
}
sprsheet_names <- c("X12S", "Cox1", "Cox2", "CytB", "ND2", "ND3", "ND4", "ALDOB", "CMOS", "FGB7",
"MB2", "MUSK", "NTF3", "ODC", "RAG1")
burleigh_names <- c("12S", "COI", "COII", "CytB", "ND2", "ND3", "ND4", "ALDOB", "CMOS", "FGBint67",
"MB", "MUSK", "NTF3", "ODC", "RAG1")
missing <- Map(\(x, y) missing.from.spreadsheet(x, y), x = sprsheet_names, y = burleigh_names)
## [1] "Unrecognized species for Cox1: Larus_thayeri, Tringa_sp"
## [1] "Unrecognized species for CytB: Larus_thayeri"
## [1] "Unrecognized species for ND2: Numenius_hudsonicus"
missing
## $X12S
## [1] "Haematopodidae_Haematopus_ostralegus"
## [2] "Laridae_Larus_canus"
## [3] "Scolopacidae_Coenocorypha_aucklandica"
## [4] "Scolopacidae_Tringa_semipalmata"
##
## $Cox1
## [1] "Burhinidae_Burhinus_grallarius"
##
## $Cox2
## [1] "Burhinidae_Burhinus_grallarius"
## [2] "Charadriidae_Ochthodromus_marginatus"
## [3] "Charadriidae_Ochthodromus_montanus"
## [4] "Haematopodidae_Haematopus_ostralegus"
## [5] "Laridae_Anous_stolidus"
## [6] "Laridae_Chroicocephalus_bulleri"
## [7] "Laridae_Chroicocephalus_cirrocephalus"
## [8] "Laridae_Chroicocephalus_genei"
## [9] "Laridae_Chroicocephalus_hartlaubii"
## [10] "Laridae_Chroicocephalus_novaehollandiae"
## [11] "Laridae_Chroicocephalus_philadelphia"
## [12] "Laridae_Chroicocephalus_serranus"
## [13] "Laridae_Larus_schistisagus"
## [14] "Scolopacidae_Actitis_hypoleucos"
## [15] "Scolopacidae_Actitis_macularius"
## [16] "Scolopacidae_Calidris_acuminata"
## [17] "Scolopacidae_Gallinago_delicata"
## [18] "Scolopacidae_Gallinago_nigripennis"
## [19] "Scolopacidae_Gallinago_paraguaiae"
## [20] "Scolopacidae_Numenius_arquata"
## [21] "Scolopacidae_Phalaropus_tricolor"
## [22] "Scolopacidae_Tringa_brevipes"
## [23] "Scolopacidae_Tringa_flavipes"
## [24] "Scolopacidae_Tringa_incana"
## [25] "Scolopacidae_Tringa_melanoleuca"
## [26] "Scolopacidae_Tringa_semipalmata"
## [27] "Scolopacidae_Tringa_solitaria"
##
## $CytB
## [1] "Burhinidae_Burhinus_grallarius"
## [2] "Laridae_Chroicocephalus_cirrocephalus"
## [3] "Scolopacidae_Coenocorypha_aucklandica"
## [4] "Scolopacidae_Gallinago_paraguaiae"
## [5] "Scolopacidae_Limnodromus_scolopaceus"
## [6] "Scolopacidae_Phalaropus_tricolor"
## [7] "Scolopacidae_Tringa_semipalmata"
##
## $ND2
## [1] "Burhinidae_Burhinus_grallarius"
## [2] "Recurvirostridae_Recurvirostra_americana"
## [3] "Scolopacidae_Coenocorypha_aucklandica"
##
## $ND3
## [1] "Burhinidae_Burhinus_grallarius"
##
## $ND4
## [1] "Burhinidae_Burhinus_grallarius"
##
## $ALDOB
## character(0)
##
## $CMOS
## [1] "Scolopacidae_Tringa_semipalmata"
##
## $FGB7
## character(0)
##
## $MB2
## [1] "Haematopodidae_Haematopus_ostralegus"
##
## $MUSK
## character(0)
##
## $NTF3
## character(0)
##
## $ODC
## character(0)
##
## $RAG1
## [1] "Burhinidae_Burhinus_grallarius"
## [2] "Haematopodidae_Haematopus_ostralegus"
## [3] "Laridae_Thalasseus_acuflavidus"
## [4] "Scolopacidae_Coenocorypha_aucklandica"
## [5] "Scolopacidae_Lymnocryptes_minimus"
## [6] "Scolopacidae_Phalaropus_lobatus"
## [7] "Scolopacidae_Tringa_semipalmata"
12S: All of the sequences in question were a priori excluded from our dataset:
Cox1: Burhinus grallarius sequence reassigned to Esacus magnirostris
CytB: All of the sequences in question were excluded from our dataset or reassigned:
ND2: All of the sequences in question were excluded from our dataset or reassigned:
ND3: Burhinus grallarius sequence reassigned to Esacus magnirostris
ND4: Burhinus grallarius sequence reassigned to Esacus magnirostris
CMOS: Tringa semipalmata sequence no longer identifiable to the species level after the separation of T. inornata
MB2: Haematopus ostralegus sequence not identifiable to the species level due to a mismatch between GenBank and the original paper
RAG1: All of the sequences in question were excluded from our dataset or reassigned:
As we can see, the only sequences included by Burleigh et al. (2015) that we failed to incorporate into our dataset are the 27 Cox2 samples listed above. Here, we will try to obtain their accession numbers:
# We need to back-translate the names outputted by missing.from.spreadsheet() to the Burleigh
# et al. taxonomy. This is complicated by the fact that the taxonomy used in the accession number
# table is not identical to the final taxonomy used in the tree, hence the need for some ad hoc
# substitutions.
get.seq.IDs <- function(miss_sp_vect, gene) {
lookup_table <- read.table(paste0(dir, "Burleigh_et_al_data/TotalAccessionTable.txt"),
sep = "\t", header = T, stringsAsFactors = F)
syn_dict <- read.csv(paste0(dir, "Megaphylogenies/taxon_dictionary.csv"), stringsAsFactors = F)
# Strip the 'Burleigh_et_al' column of the order prefix
syn_dict$Burleigh_et_al <- sapply(strsplit(syn_dict$Burleigh_et_al, "_"),
\(x) paste(tail(x, 3), collapse = "_"))
ind_vect <- sapply(miss_sp_vect, \(x) which(syn_dict$Our_TE_tree == x))
empty <- Filter(\(x) length(x) == 0, ind_vect)
if (length(empty) > 0) {
print(paste0("Unrecognized species: ", paste(names(empty), collapse = ", ")))
}
reconciled_names <- syn_dict$Burleigh_et_al[unlist(ind_vect)]
not_found <- which(!reconciled_names %in% lookup_table$TAXON)
# Take care of discrepancies:
reconciled_names[not_found] <- gsub("Chroicocephalus", "Larus", reconciled_names[not_found])
reconciled_names[not_found] <- gsub("arius", "aria", reconciled_names[not_found])
reconciled_names[not_found] <- gsub("ana", "anus", reconciled_names[not_found])
reconciled_names[not_found] <- gsub("ata", "atus", reconciled_names[not_found])
# Grab the corresponding accession numbers
nums <- lookup_table[[gene]][sapply(reconciled_names, \(x) which(lookup_table$TAXON == x))]
out <- data.frame(Taxon = reconciled_names, Sequence_ID = nums, stringsAsFactors = F)
return(out)
}
# The 'TotalAccessionTable' of Burleigh et al. (2015) provides sequences IDs rather than actual
# accession numbers, but these can still be used for lookups through the NCBI web interface:
get.acc.num.from.seq.ID <- function(seq_ID, full_info = F) {
seq <- rentrez::entrez_fetch(seq_ID, db = "nuccore", rettype = "fasta")
if (full_info) {
# Parse the FASTA file to keep just the first line with sequence info
out <- strsplit(seq, "\n")[[1]][1]
} else {
# Parse the FASTA file to keep just the accession number
out <- gsub(">", "", strsplit(seq, " ")[[1]][1])
}
return(out)
}
get.acc.num.vectorized <- function(frame, full_info = F) {
if (full_info) {
out <- sapply(frame$Sequence_ID, \(x) get.acc.num.from.seq.ID(x, T))
names(out) <- NULL
} else {
frame$Accession_number <- sapply(frame$Sequence_ID, get.acc.num.from.seq.ID)
out <- subset(frame, select = -Sequence_ID)
}
return(out)
}
acc_num <- missing.from.spreadsheet("Cox2", "COII") |>
get.seq.IDs("COII.Cl181.Final_18May2011") |>
get.acc.num.vectorized()
acc_num
## Taxon Accession_number
## 1 Burhinidae_Burhinus_grallarius DQ385236.1
## 2 Charadriidae_Charadrius_marginatus AM941621.1
## 3 Charadriidae_Charadrius_montanus AY794551.1
## 4 Haematopodidae_Haematopus_ostralegus EU826413.1
## 5 Laridae_Anous_stolidus DQ989647.1
## 6 Laridae_Larus_bulleri AY584123.1
## 7 Laridae_Larus_cirrocephalus AY584119.1
## 8 Laridae_Larus_genei AY584130.1
## 9 Laridae_Larus_hartlaubii AY584121.1
## 10 Laridae_Larus_novaehollandiae AY584127.1
## 11 Laridae_Larus_philadelphia AY584115.1
## 12 Laridae_Larus_serranus AY584131.1
## 13 Laridae_Larus_schistisagus AB233984.1
## 14 Scolopacidae_Actitis_hypoleucos AY894263.1
## 15 Scolopacidae_Actitis_macularia AY894265.1
## 16 Scolopacidae_Calidris_acuminata EU826414.1
## 17 Scolopacidae_Gallinago_delicata FJ603657.1
## 18 Scolopacidae_Gallinago_nigripennis FJ603661.1
## 19 Scolopacidae_Gallinago_paraguaiae FJ603662.1
## 20 Scolopacidae_Numenius_arquata EU826408.1
## 21 Scolopacidae_Phalaropus_tricolor AY894274.1
## 22 Scolopacidae_Tringa_brevipes AY894259.1
## 23 Scolopacidae_Tringa_flavipes AY894261.1
## 24 Scolopacidae_Tringa_incanus AY894264.1
## 25 Scolopacidae_Tringa_melanoleuca AY894266.1
## 26 Scolopacidae_Tringa_semipalmatus AY894258.1
## 27 Scolopacidae_Tringa_solitaria AY894269.1
Do these sequences actually represent Cox2?
missing.from.spreadsheet("Cox2", "COII") |> get.seq.IDs("COII.Cl181.Final_18May2011") |>
get.acc.num.vectorized(full_info = T)
## [1] ">DQ385236.1 Burhinus grallarius voucher AJB6177 ATP synthase F0 subunit 6 (ATP6) gene, complete cds; mitochondrial"
## [2] ">AM941621.1 Charadrius marginatus mitochondrial ATPase6 gene and partial ATPase8 gene, isolate fh17492"
## [3] ">AY794551.1 Charadrius montanus haplotype F ATPase subunit 8 and ATPase subunit 6 genes, partial cds; mitochondrial"
## [4] ">EU826413.1 Haematopus ostralegus ATP synthase F0 subunit 8 (ATP8) and ATP synthase F0 subunit 6 (ATP6) genes, partial cds; mitochondrial"
## [5] ">DQ989647.1 Anous stolidus haplotype 3 ATPase 8 and ATPase 6 genes, partial cds; mitochondrial"
## [6] ">AY584123.1 Larus bulleri isolate JAM262 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"
## [7] ">AY584119.1 Larus cirrocephalus isolate MKP1477 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"
## [8] ">AY584130.1 Larus genei isolate C575-112849 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"
## [9] ">AY584121.1 Larus hartlaubii isolate MKP1313 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"
## [10] ">AY584127.1 Larus novaehollandiae isolate R16 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"
## [11] ">AY584115.1 Larus philadelphia isolate DL414 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"
## [12] ">AY584131.1 Larus serranus isolate C582-112856 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"
## [13] ">AB233984.1 Larus schistisagus ATPase8, ATPase6 genes for F0-ATP synthase subunit 8, F0-ATP synthase subunit 6, complete cds"
## [14] ">AY894263.1 Actitis hypoleucos ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [15] ">AY894265.1 Actitis macularius ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [16] ">EU826414.1 Calidris acuminata ATP synthase F0 subunit 8 (ATP8) and ATP synthase F0 subunit 6 (ATP6) genes, partial cds; mitochondrial"
## [17] ">FJ603657.1 Gallinago delicata voucher 1B-3698 ATP6 gene, partial cds; mitochondrial"
## [18] ">FJ603661.1 Gallinago nigripennis voucher Gnig01 ATP6 gene, partial cds; mitochondrial"
## [19] ">FJ603662.1 Gallinago paraguaiae voucher L50137 ATP6 gene, partial cds; mitochondrial"
## [20] ">EU826408.1 Numenius arquata ATP synthase F0 subunit 8 (ATP8) and ATP synthase F0 subunit 6 (ATP6) genes, partial cds; mitochondrial"
## [21] ">AY894274.1 Phalaropus tricolor ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [22] ">AY894259.1 Heteroscelus brevipes ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [23] ">AY894261.1 Tringa flavipes ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [24] ">AY894264.1 Heteroscelus incanus ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [25] ">AY894266.1 Tringa melanoleuca ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [26] ">AY894258.1 Catoptrophorus semipalmatus ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [27] ">AY894269.1 Tringa solitaria ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
As we can see, all the sequences in question actually represent ATP6 and/or ATP8 samples, sometimes with a short fragment of Cox3. Are these actually missing from our coverage table?
acc.num.finder <- function(acc_num) {
indices <- which(coverage == acc_num, arr.ind = T)
if (length(indices) == 0) {
return(paste0(acc_num, ": missing."))
} else if (length(indices) == 2) {
return(paste0(acc_num, ": found; corresponds to ", colnames(coverage)[indices[2]],
" sequence from ", coverage$Scientific.name[indices[1]]))
} else {
return(paste0(acc_num, ": found; corresponds to ",
paste(colnames(coverage)[indices[, 2]], collapse = "/"), " sequences from ",
coverage$Scientific.name[unique(indices[, 1])]))
}
invisible(indices)
}
sapply(acc_num$Accession_number, acc.num.finder, USE.NAMES = F)
## [1] "DQ385236.1: found; corresponds to ATP6 sequence from Esacus magnirostris"
## [2] "AM941621.1: missing."
## [3] "AY794551.1: missing."
## [4] "EU826413.1: missing."
## [5] "DQ989647.1: missing."
## [6] "AY584123.1: missing."
## [7] "AY584119.1: found; corresponds to ATP6/ATP8 sequences from Chroicocephalus poiocephalus"
## [8] "AY584130.1: found; corresponds to ATP6/ATP8 sequences from Chroicocephalus genei"
## [9] "AY584121.1: missing."
## [10] "AY584127.1: missing."
## [11] "AY584115.1: missing."
## [12] "AY584131.1: found; corresponds to ATP6/ATP8 sequences from Chroicocephalus serranus"
## [13] "AB233984.1: found; corresponds to ATP6/ATP8 sequences from Larus schistisagus"
## [14] "AY894263.1: found; corresponds to ATP6/ATP8 sequences from Actitis hypoleucos"
## [15] "AY894265.1: found; corresponds to ATP6/ATP8 sequences from Actitis macularius"
## [16] "EU826414.1: found; corresponds to ATP6/ATP8 sequences from Calidris acuminata"
## [17] "FJ603657.1: found; corresponds to ATP6 sequence from Gallinago delicata"
## [18] "FJ603661.1: found; corresponds to ATP6 sequence from Gallinago nigripennis"
## [19] "FJ603662.1: found; corresponds to ATP6 sequence from Gallinago paraguaiae"
## [20] "EU826408.1: missing."
## [21] "AY894274.1: found; corresponds to ATP8 sequence from Phalaropus tricolor"
## [22] "AY894259.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa brevipes"
## [23] "AY894261.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa flavipes"
## [24] "AY894264.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa incana"
## [25] "AY894266.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa melanoleuca"
## [26] "AY894258.1: missing."
## [27] "AY894269.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa solitaria"
Of the remaining 10 sequences, most can also be accounted for:
This leaves AY794551.1 (Ochthodromus montanus) and DQ989647.1 (Anous stolidus) as the only sequences sampled by Burleigh et al. (2015) that are genuinely absent from our dataset.
We might also be interested in whether it would make sense to include additional loci in our analyses. We will follow the criterion that each locus considered for inclusion has to be sampled from at least as many taxa as the most sparsely locus already present in our dataset. However, we need to take into account that for a number of nuclear loci, we were able to improve coverage by extracting the corresponding sequences from the whole-genome scaffolds generated by the B10K project. We can exploit the fact that these sequences have different accession numbers (4 letters followed by 8 numbers) from the regular ones (2 letters followed by 6 numbers):
# For now, ignore those loci that were considered but not included:
coverage <- subset(coverage, select = -c(BMP2, DNAH3, PRLR, PTPN12, TRAF6))
# Also ignore columns with species names, family affiliations, complete mitogenomes, and morphology
ginfo <- apply(coverage[, -c(1:3, 19, 32)], 2, \(x) length(grep("[A-Z]{2}[0-9]{6}\\.", x)))
ginfo_df <- data.frame(Gene = gsub("X", "", names(ginfo)), Species_sampled = ginfo,
stringsAsFactors = F, row.names = NULL)
ginfo_df %>%
kable() %>%
kable_styling()
Gene | Species_sampled |
---|---|
12S | 219 |
16S | 133 |
ATP6 | 127 |
ATP8 | 122 |
Cox1 | 278 |
Cox2 | 96 |
Cox3 | 99 |
CytB | 260 |
ND1 | 105 |
ND2 | 233 |
ND3 | 122 |
ND4 | 90 |
ND4L | 91 |
ND5 | 119 |
ND6 | 85 |
ADH5 | 50 |
ALDOB | 26 |
BDNF | 22 |
CMOS | 22 |
FGB7 | 70 |
GAPDH3.5 | 25 |
GAPDH11 | 52 |
MB2 | 59 |
MUSK | 17 |
NTF3 | 22 |
ODC | 27 |
RAG1 | 175 |
We see that the most sparsely sampled locus is MUSK, with just 17 sequences other than those that were extracted from whole-genome scaffolds. Do any of the other loci sampled by Burleigh et al. (2015) exceed this number?
get.sp.num <- function(path_vect) {
pth <- "~/Grive/Slater_Lab/Charadriiformes/Burleigh_et_al_data/gene_alignments/"
alns <- Map(\(x) read.table(paste0(pth, x), stringsAsFactors = F), path_vect)
locnames <- sapply(strsplit(path_vect, "\\."), \(x) x[1])
# In each .phy file, sequence names alternate with the sequences themselves, to the total number
# of distinct sequences is equal to half the number of rows
seqnums <- sapply(alns, \(x) nrow(x)/2)
out <- data.frame(Gene = locnames, Species_sampled = seqnums, stringsAsFactors = F)
rownames(out) <- NULL
return(out)
}
burleigh_loci <- list.files(paste0(dir, "Burleigh_et_al_data/gene_alignments"), pattern = ".phy")
nonincluded_loci <- burleigh_loci[-c(1, 2, 4:6, 8, 14, 18, 19, 21:23, 25, 26, 28)]
get.sp.num(nonincluded_loci) %>%
kable() %>%
kable_styling()
Gene | Species_sampled |
---|---|
CLTC | 11 |
CRYAA | 11 |
EEF2 | 8 |
EGR1 | 15 |
EGR1exon | 15 |
FGBint4 | 8 |
FGBint5 | 12 |
GH1 | 11 |
HMGN2 | 3 |
IRF2 | 11 |
MYC | 14 |
NGF | 11 |
PCBD1 | 8 |
RHO | 10 |
TGFB2 | 12 |
TPM1 | 9 |
There are no loci whose coverage is as high or higher than that of the most sparsely covered presently included locus.