# Analysis of sister species # install.packages("robustbase") library(robustbase) theme_set(theme_classic()) # this works better here than setting "+theme_classic()" on each command, which causes +theme() not to work x <- read.csv("summarized_beak_data.csv", stringsAsFactors = FALSE) x <- x[order(x$Pair.age..MY.), ] names(x) # get rid of two outliers with the highest and third highest PC1diff2.corrected values & Pair.age..MY. ~ 0 x <- x[!x$Species.1 == "Numenius_minutus",] x <- x[!x$Species.1 == "Geospiza_magnirostris",] dim(x) #indicator variable for temperate vs. tropical x$temptropbinary <- ifelse(x$TempTrop == "Trop", 1, 0) # variables for sympatry/allopatry, including an indicator variable x$SympAllo <- ifelse(x$Range.overlap > 0.2, "Symp", "Allo") x$patrybinary <- ifelse(x$patry == "allopatric", 0, 1) # indicator variable for sympatry/allopatry ################### # Analysis of PC1 - modeling "diff", the square root of diff2 # The variance of residuals still increases with time, but relationship is relatively weak and weighting changes little ################### ######### # Brownian motion, unweighted regression through the origin ######### w0 <- nls( PC1diff.corr ~ b*sqrt(Pair.age..MY.), data = x, start = list(b=.1)) plot(PC1diff.corr ~ Pair.age..MY., pch = ".", data = x) lines(predict(w0) ~ Pair.age..MY., data = x) # Variance of residuals still rises with time.... plot(w0) r <- residuals(w0) plot(sqrt(abs(r)) ~ Pair.age..MY., data = x) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x); abline(zr) summary(w0) # Parameters: # Estimate Std. Error t value Pr(>|t|) # 0.074587 0.002184 34.15 <2e-16 *** coef(w0) AIC(w0) # [1] -1083.884 # ++++++++++ # weighted version - weights estimated from sqrt(abs(regression residuals)) r <- residuals(w0) plot(sqrt(abs(r)) ~ Pair.age..MY., data = x) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x); abline(zr) yr <- predict(zr) w0_w <- nls( PC1diff.corr ~ b*sqrt(Pair.age..MY.), data = x, start = list(b=.1), weights = 1/yr^2) summary(w0_w) # Estimate Std. Error t value Pr(>|t|) # 0.076295 0.002304 33.11 <2e-16 *** # Compare weighted and unweighted fits plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = ".") lines(predict(w0) ~ Pair.age..MY., data = x) lines(predict(w0_w) ~ Pair.age..MY., data = x, lty = 2) # ++++++++++ # model with different slopes for temperate zone and tropics w0_zone <- nls( PC1diff.corr ~ (c*temptropbinary + b)*sqrt(Pair.age..MY.), data = x, start = list(b=.1, c = 1)) anova(w0, w0_zone) #Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1138 25.659 #2 1137 25.645 1 0.014823 0.6572 0.4177 AIC(w0_zone) # [1] -1082.543 hist(resid(w0_zone), breaks = 40) # plot predictions in ggplot x$predlm <- predict(w0_zone) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = predlm), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_BM_passerine_diff.pdf", width = 5, height = 4) ######### # Brownian motion, unweighted regression with intercept ######### w1 <- nls( PC1diff.corr ~ a + b*sqrt(Pair.age..MY.), data = x, start = list(a=0.1, b=.1)) plot(PC1diff.corr ~ Pair.age..MY., pch = ".", data = x) lines(predict(w1) ~ Pair.age..MY., data = x) summary(w1) # Estimate Std. Error t value Pr(>|t|) #a 0.057533 0.010575 5.440 6.51e-08 *** # b 0.048897 0.005192 9.419 < 2e-16 *** coef(w1) AIC(w1) # [1] -1111.154 # ++++++++++ # weighted version - weights estimated from sqrt(abs(regression residuals)) r <- residuals(w1) plot(sqrt(abs(r)) ~ Pair.age..MY., data = x) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x); abline(zr) yr <- predict(zr) w1_w <- nls( PC1diff.corr ~ a + b*sqrt(Pair.age..MY.), data = x, start = list(a=0.1, b=.1), weights = 1/yr^2) summary(w1_w) # Estimate Std. Error t value Pr(>|t|) #a 0.068073 0.009813 6.937 6.71e-12 *** # b 0.042594 0.005314 8.016 2.70e-15 *** # Compare weighted and unweighted fits plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = ".") lines(predict(w1) ~ Pair.age..MY., data = x) lines(predict(w1_w) ~ Pair.age..MY., data = x, lty = 2) # ++++++++++ # model with different slopes for temperate zone and tropics to test significance w1_zone1 <- nls( PC1diff.corr ~ a + (c*temptropbinary + b)*sqrt(Pair.age..MY.), data = x, start = list(a = 1, b=.1, c = 1)) anova(w1, w1_zone1) AIC(w1_zone1) # model with different intercepts for temperate zone and tropics w1_zone2 <- nls( PC1diff.corr ~ (a + c*temptropbinary) + b*sqrt(Pair.age..MY.), data = x, start = list(a = 1, b=.1, c = 1)) summary(w1_zone2) anova(w1, w1_zone2) #Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1137 25.008 #2 1136 24.987 1 0.020973 0.9535 0.329 AIC(w1_zone2) # [1] -1110.11 hist(resid(w1_zone2), breaks = 40) # ++++++++++ # weighted version - weights estimated from sqrt(abs(regression residuals)) r <- residuals(w1_zone2) plot(sqrt(abs(r)) ~ Pair.age..MY., data = x) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x); abline(zr) yr <- predict(zr) w1_zone2_w <- nls( PC1diff.corr ~ (a + c*temptropbinary) + b*sqrt(Pair.age..MY.), data = x, start = list(a = 1, b=.1, c = 1), weights = 1/yr^2) summary(w1_zone2_w) # model with different slopes and intercepts for temperate zone and tropics w1_zone3 <- nls( PC1diff.corr ~ (a + c*temptropbinary) + (b + d*temptropbinary)*sqrt(Pair.age..MY.), data = x, start = list(a = 1, b=.1, c = 1, d = 1)) summary(w1_zone3) anova(w1_zone2, w1_zone3) #Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1136 24.987 #2 1135 24.967 1 0.020668 0.9396 0.3326 AIC(w1_zone3) # [1] -1109.052 # in ggplot - for model w/ different intercepts x$predlm <- predict(w1_zone2) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = predlm), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_BMi_passerine_diff.pdf", width = 5, height = 4) ######### # Ornstein Uhlenbeck fit, unweighted nonlinear regression ######### w <- nls( PC1diff.corr ~ sqrt(((b2)/(2*a)) * (1-exp(-2*a*Pair.age..MY.))), data = x, start = list(a=0.1, b2=0.001)) plot(PC1diff.corr ~ Pair.age..MY., pch = ".", data = x) lines(predict(w) ~ Pair.age..MY., data = x) coef(w) AIC(w) # -1083.9 # model with different slopes for temperate zone and tropics w_zone <- nls( PC1diff.corr ~ sqrt(((b2 + c*temptropbinary)/(2*a)) * (1-exp(-2*a*Pair.age..MY.))), data = x, start = list(a=0.1, b2=0.001, c = .001)) AIC(w_zone) #[1] -1082.455 anova(w, w_zone) # Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1137 25.614 #2 1136 25.602 1 0.012482 0.5538 0.4569 # in ggplot x$predlm <- predict(w_zone) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = predlm), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_OU_passerine_diff.pdf", width = 5, height = 4) ######### # Power function through the origin ######### # I can either take square root of both sides and fit, as done for Brownian and OU w3 <- nls( PC1diff.corr ~ sqrt(a*Pair.age..MY.^b), data = x, start = list(a=0.01, b=0.01)) plot(PC1diff.corr ~ Pair.age..MY., pch = ".", data = x) lines(predict(w3) ~ Pair.age..MY., data = x) coef(w3) AIC(w3) # [1] -1101.221 # Or I can just fit a power function to sqrt(Y) # oh good, it is the same fit w3 <- nls( PC1diff.corr ~ a*Pair.age..MY.^b, data = x, start = list(a=0.01, b=.01)) plot(PC1diff.corr ~ Pair.age..MY., pch = ".", data = x) lines(predict(w3) ~ Pair.age..MY., data = x) coef(w3) AIC(w3) # [1] -1101.221 # model with different slopes for temperate zone and tropics w3_zone <- nls( PC1diff.corr ~ a*Pair.age..MY.^(b + c*temptropbinary), data = x, start = list(a=0.01, b=.01, c = .1)) anova(w3, w3_zone) # Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1137 25.227 #2 1136 25.227 1 0.0001574 0.0071 0.9329 AIC(w3_zone) # -1099.228 # in ggplot x$predlm <- predict(w3_zone) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = predlm), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_power_passerine_diff.pdf", width = 5, height = 4) ######### # Power function with intercept ######### # I can either take square root of both sides and fit, as done for Brownian and OU w4 <- nls( PC1diff.corr ~ sqrt(k + a*Pair.age..MY.^b), data = x, start = list(a=0.003, b=2, k=0.01)) plot(PC1diff.corr ~ Pair.age..MY., pch = ".", data = x) lines(predict(w4) ~ Pair.age..MY., data = x) coef(w4) AIC(w4) # -1124.329 # # Or I can just fit a power function to sqrt(Y) w4 <- nls( PC1diff.corr ~ k + a*Pair.age..MY.^b, data = x, start = list(a=0.003, b=2, k=0.01)) plot(PC1diff.corr ~ Pair.age..MY., pch = ".", data = x) lines(predict(w4) ~ Pair.age..MY., data = x) summary(w4) # Estimate Std. Error t value Pr(>|t|) #a 0.002810 0.002055 1.367 0.172 #b 1.554534 0.268659 5.786 9.3e-09 *** # k 0.114854 0.008183 14.036 < 2e-16 *** coef(w4) AIC(w4) # [1] -1126.257 # model with different slopes for temperate zone and tropics to test differences w4_zone <- nls( PC1diff.corr ~ k + a*Pair.age..MY.^(b + c*temptropbinary), data = x, start = list(a=0.003, b=2, c = 0.1, k=0.01)) anova(w4, w4_zone) AIC(w4_zone) # model with different intercepts for temperate zone and tropics w4_zone2 <- nls( PC1diff.corr ~ (k + c*temptropbinary) + a*Pair.age..MY.^b, data = x, start = list(a=0.003, b=2, c = 0.1, k=0.01)) summary(w4_zone2) #Estimate Std. Error t value Pr(>|t|) #a 0.002978 0.002151 1.385 0.166 #b 1.535145 0.265184 5.789 9.16e-09 *** # c -0.007665 0.009562 -0.802 0.423 #k 0.119581 0.010270 11.643 < 2e-16 *** anova(w4, w4_zone2) # Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1136 24.636 #2 1135 24.622 1 0.013914 0.6414 0.4234 AIC(w4_zone2) # [1] -1124.9 # ++++++++++ # weighted version - weights estimated from sqrt(abs(regression residuals)) r <- residuals(w4_zone2) plot(sqrt(abs(r)) ~ Pair.age..MY., data = x) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x); abline(zr) yr <- predict(zr) w4_zone2_w <- nls( PC1diff.corr ~ (k + c*temptropbinary) + a*Pair.age..MY.^b, data = x, start = list(a=0.003, b=2, c = 0.1, k=0.01), weights = 1/yr^2) summary(w4_zone2_w) # model with different slopes and different intercepts for temperate zone and tropics w4_zone3 <- nls( PC1diff.corr ~ (k + c*temptropbinary) + a*Pair.age..MY.^(b + d*temptropbinary), data = x, start = list(a=0.003, b=2, c = 0.1, d= 0.1, k=0.01)) summary(w4_zone3) anova(w4, w4_zone3) anova(w4_zone2, w4_zone3) # Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1135 24.622 #2 1134 24.618 1 0.0038619 0.1779 0.6733 AIC(w4_zone3) #[1] -1123.079 # ++++++++++ # weighted version - weights estimated from sqrt(abs(regression residuals)) r <- residuals(w4_zone3) plot(sqrt(abs(r)) ~ Pair.age..MY., data = x) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x); abline(zr) yr <- predict(zr) w4_zone3_w <- nls( PC1diff.corr ~ (k + c*temptropbinary) + a*Pair.age..MY.^(b + d*temptropbinary), data = x, start = list(a=0.003, b=2, c = 0.1, d= 0.1, k=0.01), weights = 1/yr^2) summary(w4_zone3_w) # predictions for model in ggplot x$predlm <- predict(w4_zone2) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = predlm), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_poweri_passerine_diff.pdf", width = 5, height = 4) ##### # test differences between sympatric vs. allopatric sister pairs ##### # can change threshold for sympatry cutoff # x$SympAllo <- ifelse(x$Range.overlap > 0.49, "Symp", "Allo") # x$patrybinary <- ifelse(x$SympAllo == "Allo", 0, 1) # indicator variable for sympatry/allopatry ###### # best fitting model is power w/ intercept ##### w4 <- nls( PC1diff.corr ~ k + a*Pair.age..MY.^b, data = x, start = list(a=0.003, b=2, k=0.01)) # add sympatry/allopatry as a term to test whether there is a difference in evolutionary rates between sympatric and allopatric sister pairs w4_patry_i <- nls( PC1diff.corr ~ (k + c*patrybinary) + a*Pair.age..MY.^b, data = x, start = list(a=0.003, b=2, k=0.01, c = 0.01)) w4_patry_si <- nls( PC1diff.corr ~ (k + c*patrybinary) + a*Pair.age..MY.^(b+d*patrybinary), data = x, start = list(a=0.003, b=2, k=0.01, c = 0.01, d =0.01)) AIC(w4, w4_patry_i, w4_patry_si) #w4 4 -1126.257 #w4_patry_i 5 -1126.009 #w4_patry_si 6 -1126.463 summary(w4_patry_i) #Parameters: # Estimate Std. Error t value Pr(>|t|) #a 0.002198 0.001724 1.275 0.203 #b 1.639411 0.287347 5.705 1.48e-08 *** # k 0.112924 0.008093 13.953 < 2e-16 *** # c 0.012436 0.009346 1.331 0.184 anova(w4, w4_patry_i) #Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1136 24.636 #2 1135 24.598 1 0.037878 1.7478 0.1864 anova(w4_patry_i, w4_patry_si) #Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1135 24.598 #2 1134 24.545 1 0.052931 2.4455 0.1181 x$predlm <- predict(w4_patry_i) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = SympAllo))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("midnightblue", "maroon1")) + scale_linetype_manual(values = c("solid", "dashed"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line( aes(y = predlm, group = (SympAllo)), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_patry_passerine.pdf", width = 5, height = 4) # now add latitudinal zone as a term to test whether there is a difference in evolutionary rates between sympatric and allopatric sister pairs w4_patry_i <- nls( PC1diff.corr ~ (k + c*patrybinary) + a*Pair.age..MY.^b, data = x, start = list(a=0.003, b=2, k=0.01, c = 0.01)) w4_patry_zone <- nls( PC1diff.corr ~ (k + c*patrybinary + d*temptropbinary) + a*Pair.age..MY.^b, data = x, start = list(a=0.003, b=2, k=0.01, c = 0.01, d = 0.01)) w4_patry_zone2 <- nls( PC1diff.corr ~ (k + c*patrybinary + d*temptropbinary) + a*Pair.age..MY.^(b+e*temptropbinary), data = x, start = list(a=0.003, b=2, k=0.01, c = 0.01, d = 0.01,e = 0.01)) AIC(w4_patry_i, w4_patry_zone, w4_patry_zone2) #w4_patry_i 5 -1126.009 #w4_patry_zone 6 -1124.393 #w4_patry_zone2 7 -1122.579 anova(w4_patry_i, w4_patry_zone) #Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1135 24.598 #2 1134 24.590 1 0.0082872 0.3822 0.5366 anova(w4_patry_zone, w4_patry_zone2) #Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1134 24.590 #2 1133 24.585 1 0.0040148 0.185 0.6672 anova(w4_patry_i, w4_patry_zone2) x$predlm <- predict(w4_patry_zone) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = SympAllo, linetype = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("midnightblue", "maroon1")) + scale_linetype_manual(values = c("dashed", "solid"))+ scale_y_continuous("beak shape divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line( aes(y = predlm, group = interaction(SympAllo, TempTrop)), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_patry_passerine_zone.pdf", width = 5, height = 4) # ----------------------------- # Analysis of PC1 - modeling the squared differences divided by 2 (PC1diff2) # Unweighted fits are justified by the observation that the many outliers make the # variance assumptions unlikely (e.g., the variance is supposed to increase with t under Brownian motion) ######## # Brownian motion, unweighted regression through the origin ######## z0 <- lm(PC1diff2.corrected ~ Pair.age..MY. - 1, data = x) summary(z0) # Estimate Std. Error t value Pr(>|t|) # Pair.age..MY. 0.009808 0.000556 17.64 <2e-16 *** plot(PC1diff2.corrected ~ Pair.age..MY., data = x, pch = 1) lines(predict(z0) ~ Pair.age..MY., data = x) # plot(z0) # check residuals etc coef(z0) AIC(z0) # [1] -1943.018 # Plot Y on square root scale plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = ".") # square root, retaining sign lines(sqrt(predict(z0)) ~ Pair.age..MY., data = x) # ++++++++++ # It needs to be weighted because the variance still does increase with t. See residual plots # But it doesnt change results much # weighted version - weights estimated from sqrt(abs(regression residuals from z0)) # Result is same but bigger SE # Results are similar when use weights=1/(Pair.age..MY.+.01) r <- residuals(z0) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x) yr <- predict(zr) z0_w <- lm(PC1diff2.corrected ~ Pair.age..MY. - 1, data = x, weights = 1/yr^2) summary(z0_w) # Estimate Std. Error t value Pr(>|t|) # Pair.age..MY. 0.0102231 0.0006701 15.26 <2e-16 *** # Compare weighted and unweighted fits plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = 1) lines(sqrt(predict(z0)) ~ Pair.age..MY., data = x) lines(sqrt(predict(z0_w)) ~ Pair.age..MY., data = x, lty = 2) # ++++++++++ # fit single model with different slopes for temperate zone and tropics to test significance z0_zone <- lm(PC1diff2.corrected ~ (Pair.age..MY.:TempTrop) - 1, data = x) summary(z0_zone) # Estimate Std. Error t value Pr(>|t|) # Pair.age..MY.:TempTropTemp 0.0090198 0.0011159 8.083 1.61e-15 *** # Pair.age..MY.:TempTropTrop 0.0100690 0.0006413 15.700 < 2e-16 *** anova(z0, z0_zone) # Res.Df RSS Df Sum of Sq F Pr(>F) #1 1138 12.069 #2 1137 12.062 1 0.0070494 0.6645 0.4151 AIC(z0_zone) # [1] -1941.683 hist(z0_zone$residuals, breaks = 40) plot(z0_zone$residuals ~ x$Pair.age..MY.) plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = ".") # square root, retaining sign y <- predict(z0_zone) lines(sqrt(y[x$temptropbinary == 0]) ~ x$Pair.age..MY.[x$temptropbinary == 0], col = "blue") lines(sqrt(y[x$temptropbinary == 1]) ~ x$Pair.age..MY.[x$temptropbinary == 1], col = "red") # plot this model library(ggplot2) x$predlm <- predict(z0_zone) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = sqrt(predlm)), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_BM_diff2.pdf", width = 5, height = 4) ####### # Brownian motion, unweighted regression with intercept ("initial difference") ######## z1 <- lm(PC1diff2.corrected ~ Pair.age..MY., data = x) plot(PC1diff2.corrected ~ Pair.age..MY., data = x, pch = ".") lines(predict(z1) ~ Pair.age..MY., data = x) coef(z1) summary(z1) # Estimate Std. Error t value Pr(>|t|) #(Intercept) 0.0113211 0.0046518 2.434 0.0151 * # Pair.age..MY. 0.0082491 0.0008476 9.733 <2e-16 *** AIC(z1) # [1] -1946.936 # Plot Y on square root scale plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = ".") # square root, retaining sign lines(sqrt(predict(z1)) ~ Pair.age..MY., data = x) # ++++++++++ # weighted version - weights estimated from sqrt(abs(regression residuals)) r <- residuals(z1) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x) yr <- predict(zr) z1_w <- lm(PC1diff2.corrected ~ Pair.age..MY., data = x, weights = 1/yr^2) summary(z1_w) # Estimate Std. Error t value Pr(>|t|) #(Intercept) 0.0170779 0.0035270 4.842 1.46e-06 *** # Pair.age..MY. 0.0067352 0.0009421 7.149 1.56e-12 *** # Compare weighted and unweighted fits plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = 1) lines(sqrt(predict(z1)) ~ Pair.age..MY., data = x) lines(sqrt(predict(z1_w)) ~ Pair.age..MY., data = x, lty = 2) # ++++++++++ # model with different slopes for temperate zone and tropics to test significance of slopes z1_zone_slope <- lm(PC1diff2.corrected ~ Pair.age..MY.:TempTrop, data = x) anova(z1, z1_zone_slope) AIC(z1_zone_slope) hist(z1_zone_slope$residuals, breaks = 40) plot(z1_zone_slope$residuals ~ x$Pair.age..MY.) # model with different intercepts for temperate zone and tropics to test significance of intercepts z1_zone_intercept <- lm(PC1diff2.corrected ~ Pair.age..MY. + TempTrop, data = x) anova(z1, z1_zone_intercept) #Res.Df RSS Df Sum of Sq F Pr(>F) #1 1137 12.006 # 2 1136 12.005 1 0.0010006 0.0947 0.7584 AIC(z1_zone_intercept) # [1] -1945.031 # model with different slopes and different intercepts for temperate zone and tropics to test significance of slopes and intercepts z1_zone_slopeandintercept <- lm(PC1diff2.corrected ~ Pair.age..MY.*TempTrop, data = x) summary(z1_zone_slopeandintercept) # Estimate Std. Error t value Pr(>|t|) #((Intercept) 0.021775 0.008234 2.645 0.008292 ** #Pair.age..MY. 0.005799 0.001650 3.515 0.000457 *** # TempTropTrop -0.015050 0.009982 -1.508 0.131910 #Pair.age..MY.:TempTropTrop 0.003366 0.001925 1.749 0.080605 . anova(z1_zone_intercept, z1_zone_slopeandintercept) #Res.Df RSS Df Sum of Sq F Pr(>F) #1 1136 12.005 #2 1135 11.973 1 0.03226 3.0581 0.08061 . AIC(z1_zone_slopeandintercept) #[1] -1946.096 # Test of different intercepts at time=0 (not significantly different) but not so useful in a model with a difference in slope library(emmeans) z <- emmeans(z1_zone_slopeandintercept, "TempTrop", at = list(Pair.age..MY. = 0)) z # NOTE: Results may be misleading due to involvement in interactions #TempTrop emmean SE df lower.CL upper.CL #Temp 0.02177 0.00823 1135 0.00562 0.0379 #Trop 0.00672 0.00564 1135 -0.00435 0.0178 contrast(z) #contrast estimate SE df t.ratio p.value #Temp effect 0.00753 0.00499 1135 1.508 0.1319 #Trop effect -0.00753 0.00499 1135 -1.508 0.1319 # ++++++++++ # weighted version - weights estimated from sqrt(abs(regression residuals)) # Result is same but bigger SE r <- residuals(z1_zone_slopeandintercept) zr <- lm(sqrt(abs(r)) ~ Pair.age..MY., data = x) yr <- predict(zr) z1_zone_slopeandintercept_w <- lm(PC1diff2.corrected ~ Pair.age..MY.*TempTrop, data = x, weights = 1/yr^2) summary(z1_zone_slopeandintercept_w) # Estimate Std. Error t value Pr(>|t|) #(Intercept) 0.025872 0.006089 4.249 2.32e-05 *** #Pair.age..MY. 0.004569 0.001783 2.563 0.0105 * # TempTropTrop -0.013308 0.007489 -1.777 0.0758 . #Pair.age..MY.:TempTropTrop 0.003143 0.002101 1.496 0.1349 anova(z1_zone_slopeandintercept_w) # [Type 1 SS] # Df Sum Sq Mean Sq F value Pr(>F) #Pair.age..MY. 1 10.391 10.3911 51.6615 1.193e-12 *** #TempTrop 1 0.204 0.2038 1.0132 0.3144 #Pair.age..MY.:TempTrop 1 0.450 0.4503 2.2387 0.1349 #Residuals 1135 228.292 0.2011 # Trop Temp fits x1 <- data.frame(yhat = predict(z1_zone_slopeandintercept_w), x) plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = ".") lines(sqrt(yhat) ~ Pair.age..MY., data = x1[x1$TempTrop == "Trop", ], col = "red") lines(sqrt(yhat) ~ Pair.age..MY., data = x1[x1$TempTrop == "Temp", ], col = "blue") # ++++++++++ # plot best fitting BMi model x$predlm <- predict(z1_zone_intercept) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = sqrt(predlm)), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_BMi_diff2.pdf", width = 5, height = 4) ######## # Ornstein Uhlenbeck fit, unweighted nonlinear regression ######## plot(PC1diff2.corrected ~ Pair.age..MY., data = x, pch = ".") z <- nls( PC1diff2.corrected ~ ((b2)/(2*a)) * (1-exp(-2*a*Pair.age..MY.)), data = x, start = list(a=.2, b2=.1)) lines(predict(z) ~ Pair.age..MY., data = x) coef(z) AIC(z) # [1] -1943.376 # Plot Y on square root scale plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = ".") # square root, retaining sign lines(sqrt(predict(z)) ~ Pair.age..MY., data = x) # model with latitudinal zone to statistically test differences in rate z_zone <- nls(PC1diff2.corrected ~ ((b2 + c * temptropbinary)/(2*a)) * (1-exp(-2*a *Pair.age..MY.)), data = x, start = list(a=.2, b2=.1, c =1)) anova(z, z_zone) #Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1 1137 12.044 # 2 1136 12.035 1 0.0084265 0.7954 0.3727 AIC(z_zone) # [1] -1942.173 hist(resid(z_zone), breaks = 40) # visualize model to double check that it is doing what I think it should be doing Pair.age..MY. <- rep(seq(0, 20, length= 100),2) temptropbinary <- c(rep(c(0), length=100), rep(1, length=100)) OU_shape_model <- data.frame(Pair.age..MY., temptropbinary) OU_shape_model$fitted <- predict(z_zone, newdata = OU_shape_model) plot(PC1diff.corr ~ Pair.age..MY., data = x, pch = ".") # square root, retaining sign lines(sqrt(OU_shape_model$fitted[1:100]) ~ Pair.age..MY.[1:100],col = "blue") lines(sqrt(OU_shape_model$fitted[101:200]) ~ Pair.age..MY.[101:200],col = "red") # plot best fitting model x$predlm <- predict(z_zone) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak size divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = sqrt(predlm)), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_OU_diff2.pdf", width = 5, height = 4) ######### # Power function through the origin ######### # I can either take square root of both sides and fit, as done for Brownian and OU w3 <- nls(PC1diff2.corrected ~ sqrt(a*Pair.age..MY.^b), data = x, start = list(a=0.001, b=0.001)) plot(PC1diff2.corrected ~ Pair.age..MY., pch = ".", data = x) lines(predict(w3) ~ Pair.age..MY., data = x) coef(w3) AIC(w3) # Or I can just fit a power function to sqrt(Y) # oh good, it is exactly the same fit w3 <- nls(PC1diff2.corrected ~ a*Pair.age..MY.^b, data = x, start = list(a=0.01, b=.01)) plot(PC1diff2.corrected ~ Pair.age..MY., pch = ".", data = x) lines(predict(w3) ~ Pair.age..MY., data = x) coef(w3) AIC(w3) #[1] -1941.234 # model with different slopes for temperate zone and tropics w3_zone <- nls( PC1diff2.corrected ~ a*Pair.age..MY.^(b + c*temptropbinary), data = x, start = list(a=0.01, b=.01, c = .1)) anova(w3, w3_zone) # Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) #1137 12.066 #2 1136 12.051 1 0.01527 1.4394 0.2305 AIC(w3_zone) # -1940.676 # in ggplot x$predlm <- predict(w3_zone) ggplot(data = x, aes(x = Pair.age..MY., y = PC1diff.corr, color = TempTrop))+ geom_point(alpha = 0.4, size = .4)+ scale_color_manual(values=c("dodgerblue2", "darkorange2"))+ scale_y_continuous("beak shape divergence", limits = c(-0.071, 1.3))+ scale_x_continuous("evolutionary age (my)")+ geom_line(aes(y = sqrt(predlm)), size = 1)+ theme(legend.title = element_blank()) #ggsave("beak_size_power_diff2.pdf", width = 5, height = 4) ######### # Power function with intercept ######### # I can either take square root of both sides and fit, as done for Brownian and OU w4 <- nls( PC1diff2.corrected ~ sqrt(k + a*Pair.age..MY.^b), data = x, start = list(a=0.03, b=1, k=0.00001)) # doesn't fit # Or I can just fit a power function to sqrt(Y) w4 <- nls( PC1diff2.corrected ~ k + a*Pair.age..MY.^b, data = x, start = list(a=0.003, b=1, k=0.00001)) # also doesn't fit