Read in our coverage data and delete all rows that do not correspond to individual species:

if (!require("rentrez")) {install.packages("rentrez")}
## Loading required package: rentrez
library(rentrez)
library(kableExtra)

dir <- "~/Grive/Slater_Lab/Charadriiformes/"
coverage <- read.csv(paste0(dir, "Coverage.csv"), stringsAsFactors = F)
coverage <- coverage[-c(386, 387, 394, 395), ]

Compare our taxonomic coverage with that of Burleigh et al. (2015) for all overlapping loci. Note that the locus-specific alignments read in by the function below were created by the aln_splitter.R script. This means they have already been subsampled down to just charadriiforms, stripped of all-gap sequences, verified to contain at least one charadriiform sequences that does not consist exclusively of gaps, and that the name of every sequence has been stripped of the order and family prefixes.

missing.from.spreadsheet <- function(name1, name2) {
  dir <- "~/Grive/Slater_Lab/Charadriiformes/"
  # Get the full scientific names (incl. the family prefix) of all spp. in our coverage table
  ournames <- paste(coverage[, 2], gsub(" ", "_", coverage[, 3]), sep = "_")
  # Extract those scientific names for which a given column contains non-empty cells
  ourcov <- ournames[grepl("[0-9]", coverage[, which(colnames(coverage) == name1)])]
  # Read in the corresponding Burleigh et al. alignment
  phy <- read.table(paste0(dir, "Burleigh_et_al_data/gene_alignments/", name2, ".phy"),
                    stringsAsFactors = F)
  # Extract names
  names <- phy[seq(1, nrow(phy), by = 2), ]
  # Grab a dictionary mapping the Burleigh et al. names onto our own taxonomy
  syn_dict <- read.csv(paste0(dir, "Megaphylogenies/taxon_dictionary.csv"), stringsAsFactors = F)
  # Reformat the 'Burleigh_et_al' column to account for the absence of order and family prefixes
  # from the sequences names in the alignments
  syn_dict$Burleigh_et_al <- sapply(strsplit(syn_dict$Burleigh_et_al, "_"),
                                    \(x) paste(tail(x, 2), collapse = "_"))
  # Reconcile the Burleigh et al. names with our own taxonomy
  ind_vect <- sapply(names, \(x) which(syn_dict$Burleigh_et_al == x))
  # Some species are recognized as valid by Burleigh et al. but not us. For these, the operation
  # above will return an empty index, coercing 'ind_vect' to a list. Here, we print out the names
  # of such species, and coerce the list back to a vector, which removes all such empty hits.
  empty <- Filter(\(x) length(x) == 0, ind_vect)
  if (length(empty) > 0) {
    ln <- gsub("X", "", name1)
    print(paste0("Unrecognized species for ", ln, ": ", paste(names(empty), collapse = ", ")))
  }
  burleigh <- syn_dict$Our_TE_tree[unlist(ind_vect)]
  test <- setdiff(burleigh, ourcov)
  return(sort(test))
}

sprsheet_names <- c("X12S", "Cox1", "Cox2", "CytB", "ND2", "ND3", "ND4", "ALDOB", "CMOS", "FGB7",
                    "MB2", "MUSK", "NTF3", "ODC", "RAG1")
burleigh_names <- c("12S", "COI", "COII", "CytB", "ND2", "ND3", "ND4", "ALDOB", "CMOS", "FGBint67",
                    "MB", "MUSK", "NTF3", "ODC", "RAG1")

missing <- Map(\(x, y) missing.from.spreadsheet(x, y), x = sprsheet_names, y = burleigh_names)
## [1] "Unrecognized species for Cox1: Larus_thayeri, Tringa_sp"
## [1] "Unrecognized species for CytB: Larus_thayeri"
## [1] "Unrecognized species for ND2: Numenius_hudsonicus"
missing
## $X12S
## [1] "Haematopodidae_Haematopus_ostralegus" 
## [2] "Laridae_Larus_canus"                  
## [3] "Scolopacidae_Coenocorypha_aucklandica"
## [4] "Scolopacidae_Tringa_semipalmata"      
## 
## $Cox1
## [1] "Burhinidae_Burhinus_grallarius"
## 
## $Cox2
##  [1] "Burhinidae_Burhinus_grallarius"         
##  [2] "Charadriidae_Ochthodromus_marginatus"   
##  [3] "Charadriidae_Ochthodromus_montanus"     
##  [4] "Haematopodidae_Haematopus_ostralegus"   
##  [5] "Laridae_Anous_stolidus"                 
##  [6] "Laridae_Chroicocephalus_bulleri"        
##  [7] "Laridae_Chroicocephalus_cirrocephalus"  
##  [8] "Laridae_Chroicocephalus_genei"          
##  [9] "Laridae_Chroicocephalus_hartlaubii"     
## [10] "Laridae_Chroicocephalus_novaehollandiae"
## [11] "Laridae_Chroicocephalus_philadelphia"   
## [12] "Laridae_Chroicocephalus_serranus"       
## [13] "Laridae_Larus_schistisagus"             
## [14] "Scolopacidae_Actitis_hypoleucos"        
## [15] "Scolopacidae_Actitis_macularius"        
## [16] "Scolopacidae_Calidris_acuminata"        
## [17] "Scolopacidae_Gallinago_delicata"        
## [18] "Scolopacidae_Gallinago_nigripennis"     
## [19] "Scolopacidae_Gallinago_paraguaiae"      
## [20] "Scolopacidae_Numenius_arquata"          
## [21] "Scolopacidae_Phalaropus_tricolor"       
## [22] "Scolopacidae_Tringa_brevipes"           
## [23] "Scolopacidae_Tringa_flavipes"           
## [24] "Scolopacidae_Tringa_incana"             
## [25] "Scolopacidae_Tringa_melanoleuca"        
## [26] "Scolopacidae_Tringa_semipalmata"        
## [27] "Scolopacidae_Tringa_solitaria"          
## 
## $CytB
## [1] "Burhinidae_Burhinus_grallarius"       
## [2] "Laridae_Chroicocephalus_cirrocephalus"
## [3] "Scolopacidae_Coenocorypha_aucklandica"
## [4] "Scolopacidae_Gallinago_paraguaiae"    
## [5] "Scolopacidae_Limnodromus_scolopaceus" 
## [6] "Scolopacidae_Phalaropus_tricolor"     
## [7] "Scolopacidae_Tringa_semipalmata"      
## 
## $ND2
## [1] "Burhinidae_Burhinus_grallarius"          
## [2] "Recurvirostridae_Recurvirostra_americana"
## [3] "Scolopacidae_Coenocorypha_aucklandica"   
## 
## $ND3
## [1] "Burhinidae_Burhinus_grallarius"
## 
## $ND4
## [1] "Burhinidae_Burhinus_grallarius"
## 
## $ALDOB
## character(0)
## 
## $CMOS
## [1] "Scolopacidae_Tringa_semipalmata"
## 
## $FGB7
## character(0)
## 
## $MB2
## [1] "Haematopodidae_Haematopus_ostralegus"
## 
## $MUSK
## character(0)
## 
## $NTF3
## character(0)
## 
## $ODC
## character(0)
## 
## $RAG1
## [1] "Burhinidae_Burhinus_grallarius"       
## [2] "Haematopodidae_Haematopus_ostralegus" 
## [3] "Laridae_Thalasseus_acuflavidus"       
## [4] "Scolopacidae_Coenocorypha_aucklandica"
## [5] "Scolopacidae_Lymnocryptes_minimus"    
## [6] "Scolopacidae_Phalaropus_lobatus"      
## [7] "Scolopacidae_Tringa_semipalmata"

Notes

As we can see, the only sequences included by Burleigh et al. (2015) that we failed to incorporate into our dataset are the 27 Cox2 samples listed above. Here, we will try to obtain their accession numbers:

# We need to back-translate the names outputted by missing.from.spreadsheet() to the Burleigh
# et al. taxonomy. This is complicated by the fact that the taxonomy used in the accession number
# table is not identical to the final taxonomy used in the tree, hence the need for some ad hoc
# substitutions.

get.seq.IDs <- function(miss_sp_vect, gene) {
  lookup_table <- read.table(paste0(dir, "Burleigh_et_al_data/TotalAccessionTable.txt"),
                             sep = "\t", header = T, stringsAsFactors = F)
  syn_dict <- read.csv(paste0(dir, "Megaphylogenies/taxon_dictionary.csv"), stringsAsFactors = F)
  # Strip the 'Burleigh_et_al' column of the order prefix
  syn_dict$Burleigh_et_al <- sapply(strsplit(syn_dict$Burleigh_et_al, "_"),
                                    \(x) paste(tail(x, 3), collapse = "_"))
  ind_vect <- sapply(miss_sp_vect, \(x) which(syn_dict$Our_TE_tree == x))
  empty <- Filter(\(x) length(x) == 0, ind_vect)
  if (length(empty) > 0) {
    print(paste0("Unrecognized species: ", paste(names(empty), collapse = ", ")))
  }
  reconciled_names <- syn_dict$Burleigh_et_al[unlist(ind_vect)]
  not_found <- which(!reconciled_names %in% lookup_table$TAXON)
  # Take care of discrepancies:
  reconciled_names[not_found] <- gsub("Chroicocephalus", "Larus", reconciled_names[not_found])
  reconciled_names[not_found] <- gsub("arius", "aria", reconciled_names[not_found])
  reconciled_names[not_found] <- gsub("ana", "anus", reconciled_names[not_found])
  reconciled_names[not_found] <- gsub("ata", "atus", reconciled_names[not_found])
  # Grab the corresponding accession numbers
  nums <- lookup_table[[gene]][sapply(reconciled_names, \(x) which(lookup_table$TAXON == x))]
  out <- data.frame(Taxon = reconciled_names, Sequence_ID = nums, stringsAsFactors = F)
  return(out)
}

# The 'TotalAccessionTable' of Burleigh et al. (2015) provides sequences IDs rather than actual
# accession numbers, but these can still be used for lookups through the NCBI web interface:

get.acc.num.from.seq.ID <- function(seq_ID, full_info = F) {
  seq <- rentrez::entrez_fetch(seq_ID, db = "nuccore", rettype = "fasta")
  if (full_info) {
    # Parse the FASTA file to keep just the first line with sequence info
    out <- strsplit(seq, "\n")[[1]][1]
  } else {
    # Parse the FASTA file to keep just the accession number
    out <- gsub(">", "", strsplit(seq, " ")[[1]][1])
  }
  return(out)
}

get.acc.num.vectorized <- function(frame, full_info = F) {
  if (full_info) {
    out <- sapply(frame$Sequence_ID, \(x) get.acc.num.from.seq.ID(x, T))
    names(out) <- NULL
  } else {
    frame$Accession_number <- sapply(frame$Sequence_ID, get.acc.num.from.seq.ID)
    out <- subset(frame, select = -Sequence_ID)
  }
  return(out)
}

acc_num <- missing.from.spreadsheet("Cox2", "COII") |>
             get.seq.IDs("COII.Cl181.Final_18May2011") |>
             get.acc.num.vectorized()
acc_num
##                                   Taxon Accession_number
## 1        Burhinidae_Burhinus_grallarius       DQ385236.1
## 2    Charadriidae_Charadrius_marginatus       AM941621.1
## 3      Charadriidae_Charadrius_montanus       AY794551.1
## 4  Haematopodidae_Haematopus_ostralegus       EU826413.1
## 5                Laridae_Anous_stolidus       DQ989647.1
## 6                 Laridae_Larus_bulleri       AY584123.1
## 7           Laridae_Larus_cirrocephalus       AY584119.1
## 8                   Laridae_Larus_genei       AY584130.1
## 9              Laridae_Larus_hartlaubii       AY584121.1
## 10        Laridae_Larus_novaehollandiae       AY584127.1
## 11           Laridae_Larus_philadelphia       AY584115.1
## 12               Laridae_Larus_serranus       AY584131.1
## 13           Laridae_Larus_schistisagus       AB233984.1
## 14      Scolopacidae_Actitis_hypoleucos       AY894263.1
## 15       Scolopacidae_Actitis_macularia       AY894265.1
## 16      Scolopacidae_Calidris_acuminata       EU826414.1
## 17      Scolopacidae_Gallinago_delicata       FJ603657.1
## 18   Scolopacidae_Gallinago_nigripennis       FJ603661.1
## 19    Scolopacidae_Gallinago_paraguaiae       FJ603662.1
## 20        Scolopacidae_Numenius_arquata       EU826408.1
## 21     Scolopacidae_Phalaropus_tricolor       AY894274.1
## 22         Scolopacidae_Tringa_brevipes       AY894259.1
## 23         Scolopacidae_Tringa_flavipes       AY894261.1
## 24          Scolopacidae_Tringa_incanus       AY894264.1
## 25      Scolopacidae_Tringa_melanoleuca       AY894266.1
## 26     Scolopacidae_Tringa_semipalmatus       AY894258.1
## 27        Scolopacidae_Tringa_solitaria       AY894269.1

Do these sequences actually represent Cox2?

missing.from.spreadsheet("Cox2", "COII") |> get.seq.IDs("COII.Cl181.Final_18May2011") |>
  get.acc.num.vectorized(full_info = T)
##  [1] ">DQ385236.1 Burhinus grallarius voucher AJB6177 ATP synthase F0 subunit 6 (ATP6) gene, complete cds; mitochondrial"                                                                                                          
##  [2] ">AM941621.1 Charadrius marginatus mitochondrial ATPase6 gene and partial ATPase8 gene, isolate fh17492"                                                                                                                      
##  [3] ">AY794551.1 Charadrius montanus haplotype F ATPase subunit 8 and ATPase subunit 6 genes, partial cds; mitochondrial"                                                                                                         
##  [4] ">EU826413.1 Haematopus ostralegus ATP synthase F0 subunit 8 (ATP8) and ATP synthase F0 subunit 6 (ATP6) genes, partial cds; mitochondrial"                                                                                   
##  [5] ">DQ989647.1 Anous stolidus haplotype 3 ATPase 8 and ATPase 6 genes, partial cds; mitochondrial"                                                                                                                              
##  [6] ">AY584123.1 Larus bulleri isolate JAM262 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"                                                                                                         
##  [7] ">AY584119.1 Larus cirrocephalus isolate MKP1477 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"                                                                                                  
##  [8] ">AY584130.1 Larus genei isolate C575-112849 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"                                                                                                      
##  [9] ">AY584121.1 Larus hartlaubii isolate MKP1313 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"                                                                                                     
## [10] ">AY584127.1 Larus novaehollandiae isolate R16 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"                                                                                                    
## [11] ">AY584115.1 Larus philadelphia isolate DL414 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"                                                                                                     
## [12] ">AY584131.1 Larus serranus isolate C582-112856 ATPase 8 gene, partial cds; and ATPase 6 gene, complete cds; mitochondrial"                                                                                                   
## [13] ">AB233984.1 Larus schistisagus ATPase8, ATPase6 genes for F0-ATP synthase subunit 8, F0-ATP synthase subunit 6, complete cds"                                                                                                
## [14] ">AY894263.1 Actitis hypoleucos ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"         
## [15] ">AY894265.1 Actitis macularius ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"         
## [16] ">EU826414.1 Calidris acuminata ATP synthase F0 subunit 8 (ATP8) and ATP synthase F0 subunit 6 (ATP6) genes, partial cds; mitochondrial"                                                                                      
## [17] ">FJ603657.1 Gallinago delicata voucher 1B-3698 ATP6 gene, partial cds; mitochondrial"                                                                                                                                        
## [18] ">FJ603661.1 Gallinago nigripennis voucher Gnig01 ATP6 gene, partial cds; mitochondrial"                                                                                                                                      
## [19] ">FJ603662.1 Gallinago paraguaiae voucher L50137 ATP6 gene, partial cds; mitochondrial"                                                                                                                                       
## [20] ">EU826408.1 Numenius arquata ATP synthase F0 subunit 8 (ATP8) and ATP synthase F0 subunit 6 (ATP6) genes, partial cds; mitochondrial"                                                                                        
## [21] ">AY894274.1 Phalaropus tricolor ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"        
## [22] ">AY894259.1 Heteroscelus brevipes ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"      
## [23] ">AY894261.1 Tringa flavipes ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"            
## [24] ">AY894264.1 Heteroscelus incanus ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"       
## [25] ">AY894266.1 Tringa melanoleuca ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"         
## [26] ">AY894258.1 Catoptrophorus semipalmatus ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"
## [27] ">AY894269.1 Tringa solitaria ATP synthase F0 subunit 8 (ATP8) gene, partial cds; ATP synthase F0 subunit 6 (ATP6) gene, complete cds; and cytochrome c oxidase subunit III (CO3) gene, partial cds; mitochondrial"

As we can see, all the sequences in question actually represent ATP6 and/or ATP8 samples, sometimes with a short fragment of Cox3. Are these actually missing from our coverage table?

acc.num.finder <- function(acc_num) {
  indices <- which(coverage == acc_num, arr.ind = T)
  if (length(indices) == 0) {
    return(paste0(acc_num, ": missing."))
  } else if (length(indices) == 2) {
    return(paste0(acc_num, ": found; corresponds to ", colnames(coverage)[indices[2]],
                  " sequence from ", coverage$Scientific.name[indices[1]]))
  } else {
    return(paste0(acc_num, ": found; corresponds to ",
                  paste(colnames(coverage)[indices[, 2]], collapse = "/"), " sequences from ",
                  coverage$Scientific.name[unique(indices[, 1])]))
  }
  invisible(indices)
}

sapply(acc_num$Accession_number, acc.num.finder, USE.NAMES = F)
##  [1] "DQ385236.1: found; corresponds to ATP6 sequence from Esacus magnirostris"               
##  [2] "AM941621.1: missing."                                                                   
##  [3] "AY794551.1: missing."                                                                   
##  [4] "EU826413.1: missing."                                                                   
##  [5] "DQ989647.1: missing."                                                                   
##  [6] "AY584123.1: missing."                                                                   
##  [7] "AY584119.1: found; corresponds to ATP6/ATP8 sequences from Chroicocephalus poiocephalus"
##  [8] "AY584130.1: found; corresponds to ATP6/ATP8 sequences from Chroicocephalus genei"       
##  [9] "AY584121.1: missing."                                                                   
## [10] "AY584127.1: missing."                                                                   
## [11] "AY584115.1: missing."                                                                   
## [12] "AY584131.1: found; corresponds to ATP6/ATP8 sequences from Chroicocephalus serranus"    
## [13] "AB233984.1: found; corresponds to ATP6/ATP8 sequences from Larus schistisagus"          
## [14] "AY894263.1: found; corresponds to ATP6/ATP8 sequences from Actitis hypoleucos"          
## [15] "AY894265.1: found; corresponds to ATP6/ATP8 sequences from Actitis macularius"          
## [16] "EU826414.1: found; corresponds to ATP6/ATP8 sequences from Calidris acuminata"          
## [17] "FJ603657.1: found; corresponds to ATP6 sequence from Gallinago delicata"                
## [18] "FJ603661.1: found; corresponds to ATP6 sequence from Gallinago nigripennis"             
## [19] "FJ603662.1: found; corresponds to ATP6 sequence from Gallinago paraguaiae"              
## [20] "EU826408.1: missing."                                                                   
## [21] "AY894274.1: found; corresponds to ATP8 sequence from Phalaropus tricolor"               
## [22] "AY894259.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa brevipes"        
## [23] "AY894261.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa flavipes"        
## [24] "AY894264.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa incana"          
## [25] "AY894266.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa melanoleuca"     
## [26] "AY894258.1: missing."                                                                   
## [27] "AY894269.1: found; corresponds to ATP6/ATP8/Cox3 sequences from Tringa solitaria"

Of the remaining 10 sequences, most can also be accounted for:

This leaves AY794551.1 (Ochthodromus montanus) and DQ989647.1 (Anous stolidus) as the only sequences sampled by Burleigh et al. (2015) that are genuinely absent from our dataset.

Additional loci

We might also be interested in whether it would make sense to include additional loci in our analyses. We will follow the criterion that each locus considered for inclusion has to be sampled from at least as many taxa as the most sparsely locus already present in our dataset. However, we need to take into account that for a number of nuclear loci, we were able to improve coverage by extracting the corresponding sequences from the whole-genome scaffolds generated by the B10K project. We can exploit the fact that these sequences have different accession numbers (4 letters followed by 8 numbers) from the regular ones (2 letters followed by 6 numbers):

# For now, ignore those loci that were considered but not included:
coverage <- subset(coverage, select = -c(BMP2, DNAH3, PRLR, PTPN12, TRAF6))
# Also ignore columns with species names, family affiliations, complete mitogenomes, and morphology
ginfo <- apply(coverage[, -c(1:3, 19, 32)], 2, \(x) length(grep("[A-Z]{2}[0-9]{6}\\.", x)))
ginfo_df <- data.frame(Gene = gsub("X", "", names(ginfo)), Species_sampled = ginfo,
                       stringsAsFactors = F, row.names = NULL)

ginfo_df %>%
  kable() %>%
  kable_styling()
Gene Species_sampled
12S 219
16S 133
ATP6 127
ATP8 122
Cox1 278
Cox2 96
Cox3 99
CytB 260
ND1 105
ND2 233
ND3 122
ND4 90
ND4L 91
ND5 119
ND6 85
ADH5 50
ALDOB 26
BDNF 22
CMOS 22
FGB7 70
GAPDH3.5 25
GAPDH11 52
MB2 59
MUSK 17
NTF3 22
ODC 27
RAG1 175

We see that the most sparsely sampled locus is MUSK, with just 17 sequences other than those that were extracted from whole-genome scaffolds. Do any of the other loci sampled by Burleigh et al. (2015) exceed this number?

get.sp.num <- function(path_vect) {
  pth <- "~/Grive/Slater_Lab/Charadriiformes/Burleigh_et_al_data/gene_alignments/"
  alns <- Map(\(x) read.table(paste0(pth, x), stringsAsFactors = F), path_vect)
  locnames <- sapply(strsplit(path_vect, "\\."), \(x) x[1])
  # In each .phy file, sequence names alternate with the sequences themselves, to the total number
  # of distinct sequences is equal to half the number of rows
  seqnums <- sapply(alns, \(x) nrow(x)/2)
  out <- data.frame(Gene = locnames, Species_sampled = seqnums, stringsAsFactors = F)
  rownames(out) <- NULL
  return(out)
}

burleigh_loci <- list.files(paste0(dir, "Burleigh_et_al_data/gene_alignments"), pattern = ".phy")
nonincluded_loci <- burleigh_loci[-c(1, 2, 4:6, 8, 14, 18, 19, 21:23, 25, 26, 28)]
get.sp.num(nonincluded_loci) %>%
  kable() %>%
  kable_styling()
Gene Species_sampled
CLTC 11
CRYAA 11
EEF2 8
EGR1 15
EGR1exon 15
FGBint4 8
FGBint5 12
GH1 11
HMGN2 3
IRF2 11
MYC 14
NGF 11
PCBD1 8
RHO 10
TGFB2 12
TPM1 9

There are no loci whose coverage is as high or higher than that of the most sparsely covered presently included locus.

References