#### Table of contents #### # This is the R Code that should re-create all the analyses and figures found in Stuart et al, 2020, Inferred genetic architecture underlying evolution in a fossil stickleback lineage #If something is broken, or provides an answer inconsistent with what is reported in the paper, please email Yoel Stuart (ystuart@luc.edu). # 0) Read in data for analysis. Define Global Variables. Load packages # 1) Question 1: Is there directional asymmetry in pelvic girdle reduction and is it left biased? # Figure 1 # Question 1 analyses # Figure 2 # Figure 2 Descriptive Statistics # Figure 3 # 2) Question 2: What are the relationships of pelvic girdle length, length of pelvic spine and pelvic score, with time? # Question 2 analyses # Figure 4 # Figure 5 # 3) Analyses suggested during review # Bespoke FUNCTIONS necessary for analyses. Run first. # end) Table of contents #### 0) Read in data for analysis. Define Global Variables.#### #NB: Place "phenotypic.data.for.analysis.no.sc.csv" and "univariate.data.L.fromTableS4_forAnalysis.csv" in a folder and generate a path to it. In the code below, replace "path.data.cleaned.varve" with the name of the path to your data folder. #NB: phenotypic.data.for.analysis.no.sc.csv is the K series. #NB: univariate.data.L.fromTableS4_forAnalysis.csv is the L series. ##### Read in ##### data.k.sample.no.sc <- read.csv(paste(path.data.cleaned.varve, "phenotypic.data.for.analysis.no.sc.csv", sep = ""), header = TRUE, stringsAsFactors = FALSE) #NB: k.T0 is the oldest. It is the transition period where new armored forms replace older, less armored forms. All the k.T0 data here have a pelvic score of 3. data.l.sample <- read.csv(paste(path.data.cleaned.varve, "univariate.data.L.fromTableS4_forAnalysis.csv", sep = ""), header = TRUE, stringsAsFactors = FALSE) data.l.sample.PS <- read.csv(paste(path.data.cleaned.varve, "Sample_L_Pelvic_Frequencies_fromTravis_May2019.csv", sep = ""), header = TRUE, stringsAsFactors = FALSE) #This is Supplementary Table S2. It is used for Figure 2. # end Read in #### Global Variables #### layers <- unique(substr(data.k.sample.no.sc$univ.specimenID, 1, 4)) layers.L <-sort(unique(data.l.sample$INT)) meristic.names <- c("mcv", "mav", "mdf", "maf", "mpt", "mds", "ppt") continuous.names <- c("pmx", "ect", "cle", "pf5", "pga", "pgp", "tpg", "lps", "lpt", "ds1", "ds2", "ds3", "ldf", "stl") time.since.colonization <- read.csv(paste(path.raw.data.varve, "KSampleMeanTimes.csv", sep = ""), header = TRUE, stringsAsFactors = FALSE)[, c(1,3)] years.before.present <- read.csv(paste(path.raw.data.varve, "KSampleMeanTimes.csv", sep = ""), header = TRUE, stringsAsFactors = FALSE)[, c(1,2)] library(lme4) library(diptest) # end Global Variables) # end 0) ##### 1) Question 1: Is there directional asymmetry in pelvic girdle reduction and is it left biased? ##### #uses L series # calculate directional asymmetry for fish with 1<=PS<3 (i.e., between 1 and 2.8, including 1.) #percent asymmetry is ((right - left) / (right + left)) *100. Left biased is negative. #subset to 1<=PS<3 data.l.sample.pelvic.reduction <- data.l.sample[which(data.l.sample$pelvic.score < 3 & data.l.sample$pelvic.score > 0.9) , -12] nrow(data.l.sample.pelvic.reduction) == length(which(data.l.sample$pelvic.score < 3 & data.l.sample$pelvic.score > 0.9)) #check that filter worked #subset L series to Lineage II fish, present after replacement complete at year 4500. (see Extended Data 2) data.l.sample.pelvic.reduction <- data.l.sample.pelvic.reduction[-which(is.na(data.l.sample.pelvic.reduction$INT)), ] #some INT values are "NA". Remove. data.l.sample.pelvic.reduction <- data.l.sample.pelvic.reduction[-which(data.l.sample.pelvic.reduction$INT < 5250), ] #remove Lineage I fish. NB. Per Supplementary Table S2, Extended Data Figure 2, and the text in the manuscript, this number should be 4500. However, the chronology in this data table was miscalculated and is therefore incorrect. We can still correlate the samples in this dataset with Table S2 by the replacement event: when mean PS went from ~1 to exactly 3.0 for a stretch of thousands of years. This happens at the INT value of 5250 years. Thus, any specimens from earlier than 5250 years are lineage I. Specimens from during and after 5250 years are lineage II. #calculate percent asymmetry data.l.sample.pelvic.reduction$perc.asymm <- round(((data.l.sample.pelvic.reduction$right.total.length.mm - data.l.sample.pelvic.reduction$left.total.length.mm) / (data.l.sample.pelvic.reduction$right.total.length.mm + data.l.sample.pelvic.reduction$left.total.length.mm)) * 100, 5) which(data.l.sample.pelvic.reduction$perc.asymm == 0) #It seems unlikely that there would be so many individuals with exactly zero asymmetry, when length and right vistges are measured out to 4 sig figs. Remove these individuals. data.l.sample.pelvic.reduction <- data.l.sample.pelvic.reduction[ - which(data.l.sample.pelvic.reduction$perc.asymm == 0), ] #815 individuals #order by asymmetry and add a column denoting rank with most negative as 0 and most positive as N. perc.asymm.for.plot <- data.frame(data.l.sample.pelvic.reduction$perc.asymm, matrix(NA, nrow = length(data.l.sample.pelvic.reduction$perc.asymm), ncol = 1)); colnames(perc.asymm.for.plot) <- c("perc.asymm", "rank") perc.asymm.for.plot.ordered <- perc.asymm.for.plot[order(perc.asymm.for.plot$perc.asymm), ] perc.asymm.for.plot.ordered$rank <- seq(0, length(perc.asymm.for.plot.ordered$perc.asymm)-1, by = 1) which(perc.asymm.for.plot.ordered$perc.asymm == 0) perc.asymm.for.plot.ordered[which(perc.asymm.for.plot.ordered$perc.asymm < 0.5 & perc.asymm.for.plot.ordered$perc.asymm >= -0.5), ] # flip from negative to positive happens between individual 599 and 600. 599/length(perc.asymm.for.plot.ordered$perc.asymm) #73.5% of fish have negative asymmetry. 26.5% of fish have positive asymmetry. ##### Figure 1 ##### par(mfrow = c(1,1), mar = c(3, 3, 1, 1), oma = c(0,0,0,0)) mp <- barplot(height = perc.asymm.for.plot.ordered$perc.asymm, xlab = "", ylab = "", ylim = c(-70, 70), axes = FALSE) ticks <- c(mp[seq(1, 801, by = 100)], mp[877]) ; axis(side = 1, at = ticks, labels = c(seq(0, 900, 100))) ; axis(side = 2, at = seq(-70, 70, by = 20), las = 1) abline(h = 0) abline(v = mean(mp[600], mp[600])) text("73.5% left side \n\ larger", x = mp[599/2], y = 35) text("26.5% right side \n\ larger", x = mp[(600 + (815-600)/2)], y = 35) mtext(side = 1, line = 2, text = "Ranked Asymmetry") mtext(side = 2, line = 2, text = "Signed Asymmetry (%)") # end Figure 1 ##### Question 1 analyses ##### #paired t.test. Are pelvic vestiges significantly asymmetric? model.sig.asymm <- t.test(x = data.l.sample.pelvic.reduction$right.total.length.mm, y = data.l.sample.pelvic.reduction$left.total.length.mm, paired = TRUE) #mean left-minus-right difference = -0.29mm. p < 0.0001. #Chisq test. Are significantly more than half of the specimens are left biased? data.l.sample.pelvic.reduction$bias <- ifelse(data.l.sample.pelvic.reduction$perc.asymm <0, 0, 1) #left side gets zero. right side gets 1. so sum is number of right side biased. test.matrix <- matrix(c(sum(data.l.sample.pelvic.reduction$bias), 815 - sum(data.l.sample.pelvic.reduction$bias), 815/2, 815/2), ncol = 2) chisq.test(test.matrix) # X2 = 95.31, p < 0.00001 #Chisq test. Is asymmetry independent of pelvic score? chisq tests run within each ps cat ps.cat <- sort(unique(data.l.sample.pelvic.reduction$pelvic.score)) which(data.l.sample.pelvic.reduction$pelvic.score == 1.3) # 99 data.for.asymm.vs.ps <- data.l.sample.pelvic.reduction[-99 ,] ps.cat.a.vs.ps <- sort(unique(data.for.asymm.vs.ps$pelvic.score)) #observed frequency table a.vs.ps.table.observed <- data.frame(matrix(NA, ncol = length(ps.cat.a.vs.ps), nrow = 2)); colnames(a.vs.ps.table.observed) <- ps.cat.a.vs.ps; rownames(a.vs.ps.table.observed) <- c("right.bias", "left.bias") a.vs.ps.table.observed[1, ] <- tapply(X = data.for.asymm.vs.ps$bias, INDEX = as.factor(data.for.asymm.vs.ps$pelvic.score), FUN = sum) #number of right side biased a.vs.ps.table.observed[2, ] <- tapply(X = data.for.asymm.vs.ps$bias, INDEX = as.factor(data.for.asymm.vs.ps$pelvic.score), FUN = length) - tapply(X = data.for.asymm.vs.ps$bias, INDEX = as.factor(data.for.asymm.vs.ps$pelvic.score), FUN = sum) sum(a.vs.ps.table.observed)#876. Check. #expected frequency table a.vs.ps.table.expected <- data.frame(matrix(NA, ncol = length(ps.cat.a.vs.ps), nrow = 2)); colnames(a.vs.ps.table.expected) <- ps.cat.a.vs.ps; rownames(a.vs.ps.table.expected) <- c("right.bias", "left.bias") a.vs.ps.table.expected[1, ] <- .26 * apply(X = a.vs.ps.table.observed, MARGIN = 2, FUN = sum) #based on entire data set, expect 26% to be right biased. a.vs.ps.table.expected[2, ] <- .74 * apply(X = a.vs.ps.table.observed, MARGIN = 2, FUN = sum) #based on entire data set, expect 74% to be left biased. chisq.test(x = a.vs.ps.table.observed, y = a.vs.ps.table.expected) #X2 = 13.05, p = 0.16 #end Question 1 analyses ##### Figure 2. Temporal Sequence L. Mean pelvic score through time. Essentially recreating Bell 2006 Figure 3 without Lineage I. ##### #data.l.sample has dates that are incorrectly calibrated. This doesn't matter for the analyses above as they are independent of time. To create Figure 2 however, correctly calibrated times are necessary. Thus, M. Travis generated a new data file with correctly calibrated times called "Sample_L_Pelvic_Frequencies_fromTravis_May2019.csv". data.l.sample.PS$meanPS <- f.mean.pelvic.score(interval = data.l.sample.PS$interval, data.ps = data.l.sample.PS[, 3:14]) # means included PS0 fish, which will slightly understimate the mean since the jump from 1 to 0 is discontinuous on the ordinal scale. data.l.sample.PS$sePS <- f.se.pelvic.score(interval = data.l.sample.PS$interval, data.ps = data.l.sample.PS[, 3:14]) #sd and sample sizes included PS0 fish, which will slightly over estimate the standard error since the jump from 1 to 0 is discontinuous on the ordinal scale. par(mfrow = c(1,1), mar = c(4, 4, 1, 1), oma = c(0, 0, 0, 0)) plot(data.l.sample.PS$meanPS[which(data.l.sample.PS$time.since.replacement>=0)] ~ data.l.sample.PS$time.since.replacement[which(data.l.sample.PS$time.since.replacement>=0)], pch = 16, lty = 1, xlab = "", ylab = "", xlim = c(0, 23250-5250), ylim = c(0, 3), yaxt = "n", xaxt = "n", axes = FALSE) segments(x0 = data.l.sample.PS$time.since.replacement[which(data.l.sample.PS$time.since.replacement>=0)], x1 = data.l.sample.PS$time.since.replacement[which(data.l.sample.PS$time.since.replacement>=0)], y0 = data.l.sample.PS$meanPS[which(data.l.sample.PS$time.since.replacement>=0)], y1 = data.l.sample.PS$meanPS[which(data.l.sample.PS$time.since.replacement>=0)] - data.l.sample.PS$sePS[which(data.l.sample.PS$time.since.replacement>=0)]) axis(side = 2, tick = TRUE, at = c(0, 1, 2, 3), labels = c("0", "1", "2", "3"), line = 0, las = 1) axis(side = 1, tick = TRUE, at = seq(0, 18000, by = 3000), labels = seq(0, 18000, by = 3000), line = 0) mtext(text = "Mean Pelvic Score", side = 2, line = 2, outer = FALSE) mtext(text = "Time since lineage II appeared (years)", side = 1, line = 2, outer = FALSE) mtext(text = "Oldest", side = 1, line = 2, outer = FALSE, at = 0, cex = 0.8) mtext(text = "Youngest", side = 1, line = 2, outer = FALSE, at = 18000, cex = 0.8) lines(data.l.sample.PS$time.since.replacement[which(data.l.sample.PS$time.since.replacement>=0)], data.l.sample.PS$meanPS[which(data.l.sample.PS$time.since.replacement>=0)], lty = 2) # End Figure 2 ##### Descriptive Statistics for Figure 2, Table S2 ##### #max percentage of zeros by section max(data.l.sample.PS$ps0.0[which(data.l.sample.PS$time.since.replacement>=0)] / data.l.sample.PS$total.N[which(data.l.sample.PS$time.since.replacement>=0)] * 100) #### Figure 3. Plot pelvic score distributions through time to show multimodality. Supports Question 1 but uses temporal sequence K #### data.for.histograms.ips <- data.k.sample.no.sc par(mfrow = c(length(layers)/2, 2), mar = c(1, 1, 1, 1), oma = c(3, 3, 0, 0)) labels.mod <- layers; labels.mod[1] <- "k.T" for(i in c(1, 10, 2, 11, 3, 12, 4, 13, 5, 14, 6, 15, 7, 16, 8, 17, 9, 18)){ layer.data.t <- data.for.histograms.ips[substr(data.for.histograms.ips$univ.specimenID, 1, 4) == layers[i], ] layer.data.df.t <- data.frame(matrix(NA, nrow = length(c(0, seq(1.0, 3.0, by = 0.2))), ncol = 2)); colnames(layer.data.df.t) <- c("PS", "Count") layer.data.df.t$PS <- c(0, seq(1.0, 3.0, by = 0.2)) for(j in 1:length(layer.data.df.t$PS)){ layer.data.df.t$Count[j] <- sum(layer.data.t$ips == layer.data.df.t$PS[j], na.rm = TRUE) } layer.data.df.t$Proportion <- round(layer.data.df.t$Count / sum(layer.data.df.t$Count), 2) plot(layer.data.df.t$Proportion ~ layer.data.df.t$PS, xaxt = "n", yaxt = "n", xlab = "", ylab = "", main = "", ylim = c(0, 1.2), xlim = c(-0.05, 3.05), col = "black", bty = "n", pch = 16, cex = .5, type = "n") layer.data.segments.t <- layer.data.df.t[which(layer.data.df.t$Proportion>0),] segments(x0 = layer.data.segments.t$PS, x1 = layer.data.segments.t$PS, y0 = 0, y1 = layer.data.segments.t$Proportion, lwd = 2) text(x = layer.data.segments.t$PS, y = layer.data.segments.t$Proportion + 0.1, labels = layer.data.segments.t$Count, cex = 0.8) axis(side = 2, tick = TRUE, at = c(0, 0.5, 1.0), labels = c("","", ""), pos = -.1, las = 1, cex = 0.2) axis(side = 2, tick = TRUE, at = c(0.25, 0.75), labels = c("",""), pos = -.1, las = 1, cex = 0.2, lwd.ticks = 0.25, lwd = 0) axis(side = 1, tick = TRUE, at = c(0.0, 1.0, 2.0, 3.0), labels = c("", "", "", ""), lwd.ticks = 1.0) axis(side = 1, tick = TRUE, at = c(1.2, 1.4, 1.6, 1.8, 2.2, 2.4, 2.6, 2.8), labels = c("", "", "", "", "", "", "", ""), lwd.ticks = .25, lwd = 0) if(i %in% c(1, 9, 10, 18)) axis(side = 1, tick = TRUE, at = c(0.0, 1.0, 1.5, 2.0, 2.5, 3.0), labels = c("0", "1.0", "", "2.0", "", "3.0"), lwd = 0, lwd.ticks = 0, line = -0.5) if(i %in% c(1:9)) axis(side = 2, tick = TRUE, at = c(0, 0.5, 1.0), labels = c("0","0.5", "1.0"), pos = -.1, las = 1, cex = 0.2, lwd = 0, lwd.ticks = 0) text(x = 1.5, y = 1.15, label = paste(labels.mod[i], " (", round(time.since.colonization$Inverted[i],0), " yr; mean PS = ", round(mean(layer.data.t$ips, na.rm = TRUE), 1), ")", sep = ""), cex = 0.8) } mtext(text = "Pelvic Score", side = 1, line = 1.5, outer = TRUE) mtext(text = "Proportion by PS category (with counts)", side = 2, line = 1.5, outer = TRUE) #end Figure 3 ##### 1) End ##### ##### 2) Question 2: What are the relationships of pelvic girdle length, length of pelvic spine and pelvic score, with time? ##### #uses K series ##### Question 2 analyses ##### #Generate a dataframe with just sl, tpg, and ips data.1 <- data.k.sample.no.sc[ , which(colnames(data.k.sample.no.sc) %in% c("univ.specimenID", "tpg", "lps", "stl", "ips", "layer.old.to.young"))] #Subset to only fish with ps=3 data.1.ps.3 <- na.omit(data.1[data.1$ips == 3 , ]) #size correct (NB: see function at end of this code) data.1.ps.3.sc <- f.sizecorrect(standard.length = data.1.ps.3$stl, vector.of.columns = which(colnames(data.1.ps.3) %in% c("tpg", "lps")), dataframe.to.use = data.1.ps.3, section = substr(data.1.ps.3$univ.specimenID, 1, 4)) # K series summary stats #total pelvic length tpg.sc.means <- data.frame(unique(substr(data.1.ps.3$univ.specimenID, 1, 4)) , tapply(X = data.1.ps.3.sc$tpg.sc, INDEX = data.1.ps.3.sc$layer.old.to.young, FUN = mean)) ; colnames(tpg.sc.means) <- c("layer", "tpg.sc.mean") tpg.N <- data.frame(unique(substr(data.1.ps.3$univ.specimenID, 1, 4)) , tapply(X = data.1.ps.3.sc$tpg.sc, INDEX = data.1.ps.3.sc$layer.old.to.young, FUN = length)) ; colnames(tpg.N) <- c("layer", "tpg.N") tpg.sc.std.err <- data.frame(unique(substr(data.1.ps.3$univ.specimenID, 1, 4)), tapply(X = data.1.ps.3.sc$tpg.sc, INDEX = data.1.ps.3.sc$layer.old.to.young, FUN = sd) / sqrt(tpg.N$tpg.N)) ; colnames(tpg.sc.std.err) <- c("layer", "tpg.sc.std.err") #precipitious drop in number of fish in sample with ps = 3 between k13 and k12. k.11 = 5. K10 and later <=2 #end analysis at k = 11 #length pelvic spine lps.sc.means <- data.frame(unique(substr(data.1.ps.3$univ.specimenID, 1, 4)) , tapply(X = data.1.ps.3.sc$lps.sc, INDEX = data.1.ps.3.sc$layer.old.to.young, FUN = mean)) ; colnames(lps.sc.means) <- c("layer", "lps.sc.mean") lps.N <- data.frame(unique(substr(data.1.ps.3$univ.specimenID, 1, 4)) , tapply(X = data.1.ps.3.sc$lps.sc, INDEX = data.1.ps.3.sc$layer.old.to.young, FUN = length)) ; colnames(lps.N) <- c("layer", "lps.N") lps.sc.std.err <- data.frame(unique(substr(data.1.ps.3$univ.specimenID, 1, 4)), tapply(X = data.1.ps.3.sc$lps.sc, INDEX = data.1.ps.3.sc$layer.old.to.young, FUN = sd) / sqrt(lps.N$lps.N)) ; colnames(lps.sc.std.err) <- c("layer", "lps.sc.std.err") #pelvic score ips.N <- tapply(X = data.1$ips, INDEX = data.1$layer.old.to.young, FUN = f.sum.not.na) ips.means <- data.frame(unique(substr(data.1$univ.specimenID, 1, 4)) , tapply(X = data.1$ips, INDEX = data.1$layer.old.to.young, FUN = mean, na.rm = T)) ; colnames(ips.means) <- c("layer", "ips.means") ips.sterr <- data.frame(unique(substr(data.1$univ.specimenID, 1, 4)) , (tapply(X = data.1$ips, INDEX = data.1$layer.old.to.young, FUN = sd, na.rm = T)) / sqrt(ips.N)) ; colnames(ips.sterr) <- c("layer", "ips.sterr") ips.N.ps.3 <- tapply(X = data.1$ips[data.1$ips == 3], INDEX = data.1$layer.old.to.young[data.1$ips == 3], FUN = f.sum.not.na) #standard length stl.N <- tapply(X = data.1$stl, INDEX = data.1$layer.old.to.young, FUN = f.sum.not.na) stl.means <- data.frame(unique(substr(data.1$univ.specimenID, 1, 4)) , tapply(X = data.1$stl, INDEX = data.1$layer.old.to.young, FUN = mean, na.rm = T)) ; colnames(stl.means) <- c("layer", "stl.means") stl.sterr <- data.frame(unique(substr(data.1$univ.specimenID, 1, 4)) , (tapply(X = data.1$stl, INDEX = data.1$layer.old.to.young, FUN = sd, na.rm = T)) / sqrt(stl.N)) ; colnames(stl.sterr) <- c("layer", "stl.sterr") #Does pelvic girdle length (tpg) and pelvic spine length (psl) start reduction immediately, relative to pelvic score (ips), following replacement? #Find breakpoints for tpg, psl, and ips using piecewise regression #https://stats.stackexchange.com/questions/19772/estimating-the-break-point-in-a-broken-stick-piecewise-linear-model-with-rando #https://www.r-bloggers.com/r-for-ecologists-putting-together-a-piecewise-regression/ means.for.breakpoint <- data.frame(lps.sc.means[1:7,], tpg.sc.means[1:7,2], ips.means[1:7,2], time.since.colonization[1:7,2]); colnames(means.for.breakpoint) <- c("layer", "lps.sc", "tpg.sc", "ips", "time.since.col") #first seven sections, before blip at section 8 that dominates. breaks <- means.for.breakpoint$time.since.col[which(means.for.breakpoint$time.since.col >= 0 & means.for.breakpoint$time.since.col <= 5800)] # the window before the weird blip at time section 8 (5960 years) #tpg brk.tpg <- f.mse(brks = breaks, trait.values = means.for.breakpoint$tpg.sc, time.points = means.for.breakpoint$time.since.col) xx.tpg <- breaks[which(brk.tpg == min(brk.tpg))] #1271.82 (section 3) piecewise.tpg <- lm(means.for.breakpoint$tpg.sc ~ (means.for.breakpoint$time.since.col*(means.for.breakpoint$time.since.col < xx.tpg)) + (means.for.breakpoint$time.since.col * (means.for.breakpoint$time.since.col >= xx.tpg)) ) summary(piecewise.tpg) #lps brk.lps <- f.mse(brks = breaks, trait.values = means.for.breakpoint$lps.sc, time.points = means.for.breakpoint$time.since.col) xx.lps <- breaks[which(brk.lps == min(brk.lps))] #1271.82 (section 3) piecewise.lps <- lm(means.for.breakpoint$lps.sc ~ means.for.breakpoint$time.since.col*(means.for.breakpoint$time.since.col < xx.lps) + means.for.breakpoint$time.since.col * (means.for.breakpoint$time.since.col >= xx.lps) ) summary(piecewise.lps) #ips brk.ips <- f.mse(brks = breaks, trait.values = means.for.breakpoint$ips, time.points = means.for.breakpoint$time.since.col) xx.ips <- breaks[which(brk.ips == min(brk.ips))] #4072.66 (section 6) #xx.ips <- 3129.04 piecewise.ips <- lm(means.for.breakpoint$ips ~ means.for.breakpoint$time.since.col*(means.for.breakpoint$time.since.col < xx.ips) + means.for.breakpoint$time.since.col * (means.for.breakpoint$time.since.col >= xx.ips) ) summary(piecewise.ips) #### Figure 4. Plot ips, tpg, lps, with breakpoints#### par(mfrow = c(1, 1), mar = c(2, 3, 0, 0), oma = c(0,0,0,0)) rows.to.plot <- which(tpg.sc.means$layer == "k.T0") : which(tpg.sc.means$layer == "k.11") #tpg plot(x = time.since.colonization$Inverted.Year[rows.to.plot], y = tpg.sc.means$tpg.sc.mean[rows.to.plot], yaxt = "n", type = "b", ylim = c(1, 1.01*max(tpg.sc.means$tpg.sc.mean[rows.to.plot])), pch = 16, xlab = "", ylab = "", main ="", axes = FALSE, xlim = c(0, 9050), cex = 0.8) arrows(x0 = time.since.colonization$Inverted.Year[rows.to.plot], x1 = time.since.colonization$Inverted.Year[rows.to.plot], y0 = (tpg.sc.means$tpg.sc.mean[rows.to.plot]), y1 = (tpg.sc.means$tpg.sc.mean[rows.to.plot]) + tpg.sc.std.err$tpg.sc.std.err[rows.to.plot], code = 2, angle = 90, length = 0) arrows(x0 = time.since.colonization$Inverted.Year[rows.to.plot], x1 = time.since.colonization$Inverted.Year[rows.to.plot], y0 = (tpg.sc.means$tpg.sc.mean[rows.to.plot]), y1 = (tpg.sc.means$tpg.sc.mean[rows.to.plot]) - tpg.sc.std.err$tpg.sc.std.err[rows.to.plot], code = 2, angle = 90, length = 0) #lps points(x = time.since.colonization$Inverted.Year[rows.to.plot], y = lps.sc.means$lps.sc.mean[rows.to.plot], yaxt = "n", type = "b", pch = 4, cex = 0.8) arrows(x0 = time.since.colonization$Inverted.Year[rows.to.plot], x1 = time.since.colonization$Inverted.Year[rows.to.plot], y0 = (lps.sc.means$lps.sc.mean[rows.to.plot]), y1 = (lps.sc.means$lps.sc.mean[rows.to.plot]) + lps.sc.std.err$lps.sc.std.err[rows.to.plot], code = 2, angle = 90, length = 0) arrows(x0 = time.since.colonization$Inverted.Year[rows.to.plot], x1 = time.since.colonization$Inverted.Year[rows.to.plot], y0 = (lps.sc.means$lps.sc.mean[rows.to.plot]), y1 = (lps.sc.means$lps.sc.mean[rows.to.plot]) - lps.sc.std.err$lps.sc.std.err[rows.to.plot], code = 2, angle = 90, length = 0) #ips #points(x = time.since.colonization$Inverted.Year[rows.to.plot], y = ygap[(length(rows.to.plot)+1):length(ygap)], type = "b") points(x = time.since.colonization$Inverted.Year[rows.to.plot], y = ips.means$ips.means[rows.to.plot], type = "b", cex = 0.8) arrows(x0 = time.since.colonization$Inverted.Year[rows.to.plot], x1 = time.since.colonization$Inverted.Year[rows.to.plot], y0 = ips.means$ips.means[rows.to.plot], y1 = ips.means$ips.means[rows.to.plot] + ips.sterr$ips.sterr[rows.to.plot], code = 2, angle = 90, length = 0) arrows(x0 = time.since.colonization$Inverted.Year[rows.to.plot], x1 = time.since.colonization$Inverted.Year[rows.to.plot], y0 = ips.means$ips.means[rows.to.plot], y1 = ips.means$ips.means[rows.to.plot] - ips.sterr$ips.sterr[rows.to.plot], code = 2, angle = 90, length = 0) #global text ylab = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13) ticks <- 1:13 ; axis(side = 2, at = ticks, labels = ylab, las = 1) axis(side = 1, at = c(0, 3000, 6000, 9000), labels = seq(0, 9000, by = 3000), line = -1) mtext(text = paste("Phenotypic Value (mean +/- 1 s.e.)", sep = "" ), side = 2, line = 2) mtext(text = paste("Time since lineage II appeared (years)", sep = "" ), side = 1, line = 1) mtext(text = "Oldest", side = 1, at = time.since.colonization$Inverted.Year[rows.to.plot][1], line = 1, cex = 0.8) ; mtext(text = "Youngest", side = 1, at = time.since.colonization$Inverted.Year[rows.to.plot][length(rows.to.plot)], line = 1, adj = 0.0, cex = 0.8) #legend legend(6000, 13, c("size-adj. pgl", "size-adj. psl", "pelvic score"), pch = c(16, 4, 1), lwd = c(1, 1), bty = "n") #add arrows indicating breakpoints from piecewise regression code above. Dependent on running that code #tpg location.tpg <- c(means.for.breakpoint$time.since.col[which(means.for.breakpoint$time.since.col == xx.tpg)-1], xx.tpg) arrows(x0 = mean(location.tpg), x1 = mean(location.tpg), y0 = 12.75, y1 = 12, length = 0.1) #lps location.lps <- c(means.for.breakpoint$time.since.col[which(means.for.breakpoint$time.since.col == xx.lps)-1], xx.lps) arrows(x0 = mean(location.lps), x1 = mean(location.lps), y0 = 7.05, y1 = 6.3, length = 0.1) #lps location.ips <- c(means.for.breakpoint$time.since.col[which(means.for.breakpoint$time.since.col == xx.ips)-1], xx.ips) arrows(x0 = mean(location.ips), x1 = mean(location.ips), y0 = 3.85, y1 = 3.1, length = 0.1) # end Figure 4 ##### Figure 5. Histograms of change through time by pelvic girdle length and pelvic spine length#### data.for.histograms <- data.1.ps.3.sc[data.1.ps.3.sc$layer.old.to.young < 10 , ] #Figure 5A. tpg par(mfrow = c(10, 1), mar = c(1, 0, 0, 0), oma = c(4, 5, 1, 0)) labels.mod <- layers; labels.mod[1] <- "k.T" xmin.tpg <- min(data.for.histograms$tpg.sc, na.rm = TRUE) - 1 xmax.tpg <- max(data.for.histograms$tpg.sc, na.rm = TRUE) + 1 for(i in 1:which(layers == "k.11")){ layer.data.t <- data.for.histograms[substr(data.for.histograms$univ.specimenID, 1, 4) == layers[i], ] hist(x = layer.data.t$tpg.sc, freq = TRUE, xlim = c(xmin.tpg, 20), ylim = c(0,21), ylab = "", xlab = "", main = "", axes = FALSE, col = "grey45", border = "grey50", right = FALSE, breaks = seq(4, 19, by = 1), las = 1) axis(side = 2, at = seq(0, 21, by = 7 ), las = 1) text(labels = labels.mod[i], x = 19.5, y = 9, cex = 1.1) text(labels = paste(round(time.since.colonization$Inverted.Year[i], 0), "yr", sep = ""), x = 19.5, y = 6) points(x = mean(layer.data.t$tpg.sc, na.rm = TRUE), y = 0, col = 'black', pch = 16, cex = 1.5) } axis(side = 1, at = seq(5, 19, by = 2), line = -.2) mtext(text = "Size-corrected pelvic girdle (mm)", side = 1, line = 2, outer = TRUE, cex = 1.2) mtext(text = "Raw Frequency", side = 2, line = 3, outer = TRUE, cex = 1.2) #Figure 5B. lps par(mfrow = c(10, 1), mar = c(1, 0, 0, 0), oma = c(4, 5, 1, 0.2)) xmin.lps <- ifelse((min(data.for.histograms$lps.sc, na.rm = TRUE))<0, 0, min(data.for.histograms$lps.sc, na.rm = TRUE) ) xmax.lps <- max(data.for.histograms$lps.sc, na.rm = TRUE) for(i in 1:which(layers == "k.11")){ layer.data.t <- data.for.histograms[substr(data.for.histograms$univ.specimenID, 1, 4) == layers[i], ] hist(x = layer.data.t$lps.sc, freq = TRUE, xlim = c(xmin.lps, xmax.lps), ylim = c(0,15), ylab = "", xlab = "", main = "", axes = FALSE, col = "grey45", border = "grey50", breaks = seq(0, 12, by = 0.5), las = 1) axis(side = 2, las = 1) points(x = mean(layer.data.t$lps.sc, na.rm = TRUE), y = 0, col = 'black', pch = 16, cex = 1.5) } axis(side = 1, at = seq(0, 10.5, 1.5), line = -0.2) mtext(text = "Size-corrected pelvic spine (mm)", side = 1, line = 2, outer = TRUE, cex = 1.2) mtext(text = "", side = 2, line = 3, outer = TRUE, cex = 1.2) # end Figure 5 #tests of normality at teach time step #Basis of Supplementary Table S3 shapiro.test.tpg <- data.frame(matrix(NA, nrow = 10, ncol = 2)); colnames(shapiro.test.tpg) <- c("test.stat", "P") for(i in 1:which(layers == "k.11")){ norm.data.t <- data.for.histograms[substr(data.for.histograms$univ.specimenID, 1, 4) == layers[i], ] norm.test.t <- shapiro.test(norm.data.t$tpg.sc) shapiro.test.tpg[i, ] <- c(norm.test.t$statistic, norm.test.t$p.value) } shapiro.test.lps <- data.frame(matrix(NA, nrow = 10, ncol = 2)); colnames(shapiro.test.lps) <- c("test.stat", "P") for(i in 1:which(layers == "k.11")){ norm.data.t <- data.for.histograms[substr(data.for.histograms$univ.specimenID, 1, 4) == layers[i], ] norm.test.t <- shapiro.test(norm.data.t$lps.sc) shapiro.test.lps[i, ] <- c(norm.test.t$statistic, norm.test.t$p.value) } #tests of unimodality at each step #Basis of Supplementary Table S3 hds.test.tpg <- data.frame(matrix(NA, nrow = 10, ncol = 2)); colnames(hds.test.tpg) <- c("test.stat", "P") for(i in 1:which(layers == "k.11")){ dip.data.t <- data.for.histograms[substr(data.for.histograms$univ.specimenID, 1, 4) == layers[i], ] dip.test.t <- dip.test(x = dip.data.t$tpg.sc, simulate.p.value = TRUE, B = 2000) hds.test.tpg[i, ] <- c(dip.test.t$statistic, dip.test.t$p.value) } hds.test.lps <- data.frame(matrix(NA, nrow = 10, ncol = 2)); colnames(hds.test.lps) <- c("test.stat", "P") for(i in 1:which(layers == "k.11")){ dip.data.t <- data.for.histograms[substr(data.for.histograms$univ.specimenID, 1, 4) == layers[i], ] dip.test.t <- dip.test(x = dip.data.t$lps.sc, simulate.p.value = TRUE, B = 2000) hds.test.lps[i, ] <- c(dip.test.t$statistic, dip.test.t$p.value) } #Does SL-adjusted pelvic girdle length shrink unimodally once Pitx1 deletion is active? #subset to only PS = 1 and size correct data.1.ps.1 <- na.omit(data.1[data.1$ips == 1 , -3]) data.1.ps.1.sc <- f.sizecorrect(standard.length = data.1.ps.1$stl, vector.of.columns = which(colnames(data.1.ps.1) %in% c("tpg")), dataframe.to.use = data.1.ps.1, section = substr(data.1.ps.1$univ.specimenID, 1, 4)) diptest <- dip.test(x = data.1.ps.1.sc$tpg.sc, simulate.p.value = TRUE, B = 2000) # D = 0.019, p = 0.67, 2000 monte carlo replicates. Unimodal. swtest <- shapiro.test(x = data.1.ps.1.sc$tpg.sc) #end Question 2 ##### 3) Analyses suggested during review ##### #For the K series, How correlated are the individual tpg and psl scores during the first 3000 years of figure 3? Can the authors’ data speak to whether tpg and psl are likely reduced via the same or different genetic changes? # data.1.ps.3.sc.first3000 <- data.1.ps.3.sc[substr(data.1.ps.3.sc$univ.specimenID, 1, 4) %in% layers[1:5] , ] tpgpsl.cors <- vector() for(i in 1:10){ temp.data.cors <- data.1.ps.3.sc[substr(data.1.ps.3.sc$univ.specimenID, 1, 4) == layers[i] , c(7,8)] tpgpsl.cors <- c(tpgpsl.cors, cor(temp.data.cors[,1], temp.data.cors[,2])) } mean(tpgpsl.cors) sd(tpgpsl.cors) summary(tpgpsl.cors) # end 3) ##### Functions ##### #### f.mean.pelvic.score #### #interval <- data.l.sample.PS$interval #data.ps <- data.l.sample.PS[, 3:14] f.mean.pelvic.score <- function(interval, data.ps){ mean.ps.int <- vector() for(int in 1:length(interval)){ mean.ps.int.t <- round(sum(as.numeric(substr(names(data.ps), 3, 5)) * data.ps[int , ])/ sum(data.ps[int , ]), 2) mean.ps.int <- c(mean.ps.int, mean.ps.int.t) } return(mean.ps.int) } #### f.se.plevic.score ##### #interval <- data.l.sample.Travis$interval #data.ps <- data.l.sample.Travis[, 3:14] f.se.pelvic.score <- function(interval, data.ps){ se.ps.int <- vector() for(int in 1:length(interval)){ recreate.data.t <- vector() for(i in 1:length(names(data.ps))){ recreate.data.t <- c(recreate.data.t, rep(substr(names(data.ps), 3, 5)[i], data.ps[int, i])) } se.ps.int <- round(c(se.ps.int, sd(as.numeric(recreate.data.t))/sqrt(length(as.numeric(recreate.data.t)))), 2) } return(se.ps.int) } #### f.size.correct #### # OKE ET AL COMMON GARDEN MANUSCRIPT METHOD #"All relative warps and univariate shape traits were allometrically standardized to a common body size (Reist 1985; Lleonart et al. 2000) based on Ms = M0 (Ls / L0)^b, where Ms is the standardized trait value (mm), M0 is the non-standardized trait value (mm), Ls is the overall mean centroid size (for RWs) or standard length (mm, for univariate shape traits), and L0 is the centroid size or standard length (mm) of the individual. The common within-group slope, b, was calculated from a linear mixed model of log10(M0) regressed on log10(L0), with group included as a random factor (Reist 1985; Lleonart et al. 2000)." - Oke et al. The size correction formula described above: Ms = M0(Ls/L0)^b #variables: #standard.length = data.1.ps.3$stl #the trait to be used for size correction #section = substr(data.1.ps.3$univ.specimenID, 1, 4) #the random factor in the linear mixed model. This is the section #dataframe.to.use = data.1.ps.3 #the data #vector.of.columns = 2 #the columns in dataframe.to.use that are to be size corrected #The output will have all the original columns from dataframe.to.use with the size-corrected columns added on at the end. f.sizecorrect <- function(standard.length, vector.of.columns, dataframe.to.use, section) { #Calculate overall mean standard length (Ls) Ls <- mean(standard.length, na.rm = TRUE) #Call individual standard length L0 <- standard.length #Calculate common, within-group slope, b, for each trait. #Here, I am treating each section as a group for random factor b.vector.lmm <- vector() for (i in vector.of.columns) { abcd <- (dataframe.to.use[i]) b.model <- lmer(log10(abcd[,])~log10(standard.length) + (1|section)) b <- coef(summary(b.model))[2,1] b.vector.lmm <- c(b.vector.lmm, b) } # size correct xx <- dataframe.to.use columnnames <- colnames(xx) j=1 for (i in vector.of.columns) { M0 <- xx[,i] #grab the appropriate column of data Ms = M0 * ((Ls/L0)^b.vector.lmm[j]) #size correction formula j=j+1 columnnames <- c(columnnames, paste(colnames(xx[i]), "sc", sep = ".")) xx <- cbind(xx, Ms) } colnames(xx) <- columnnames # Rename thh columns in the temporary dataframe xx return(xx) #Output a new dataframe with the name provided in "outputfilename" } # end f.size.correct #### f.sum.not.na #### f.sum.not.na <- function(X){ length(X) - sum(is.na(X)) } #end f.sum.not.na #### f.mse #### #breaks <- means.for.breakpoint$time.since.col[which(means.for.breakpoint$time.since.col >= 0 & means.for.breakpoint$time.since.col <= 8600)] #brks <- breaks #trait.values <- means.for.breakpoint$tpg.sc #time.points <- means.for.breakpoint$time.since.col f.mse <- function(brks, trait.values, time.points){ mse <- numeric(length(brks)) for(i in 1:length(brks)){ piecewise1 <- lm(trait.values ~ (time.points*(time.points < brks[i])) + (time.points * (time.points >= brks[i])) ) mse[i] <- summary(piecewise1)[6] } mse <- as.numeric(mse) return(mse) } # end f.mse