###PGLS ANALYSIS#### #Don't forget to set your working directory! library(dplyr) library(ape) library(geiger) library(nlme) library(phytools) library(AICcmodavg) #Loading in the data pglsdata <- read.csv("cetaceanOrdered.csv",row.names=3) pglstree <- read.tree("trimTree.tre") name.check(pglstree, pglsdata) #Check names match, should (and does) return "Ok" #####DataPretreatment##### ###filtering the mysticeti whales### mdata<-filter(pglsdata, Infraorder == "Mysticeti") #Removing NA's again which(is.na(mdata$EQ)==TRUE) #Which rows in my dataset have "NA" for EQ: "3","5","8","9","11","12","14","22","23","24","25","28","30","35","36","44","45","47","48","49","50","51","52","53","55","56","59","65","71","73","74" EQdata<-mdata[-c(1,7,9,10),] #remove those rows which(is.na(EQdata$LogLifespan)==TRUE) #which rows in the EQ data have NA for lifespan: 1 2 3 11 15 18 19 29 35 #integer(0) meaning all species for which we know EQ, we know lifespan is.na(EQdata$LogLifespan) ##double check there are no NAs (should return all false) is.na(EQdata$EQ) ##double check there are no NAs (should return all false) #####"Simple" PGLS Model of Lifespan and EQ, Brownian##### LSEQBrownian <- gls(LogLifespan ~ EQ, correlation = corBrownian(phy = pglstree), data = EQdata, method = "ML") summary(LSEQBrownian) plot(LSEQBrownian) ##Model Fit Investigations res<-resid(LSEQBrownian) ## pulling our residuals to investigate them plot(fitted(LSEQBrownian), res) #produce residual vs. fitted model plot abline(0,0) #add a horizontal line at 0 plot(density(res)) #distribution of residuals (ideally normal) qqnorm(res) #Q-Q plot checking whether the normality assumption is violated qqline(res) ##Plotting the simple model (dashed red line) onto the data plot(LogLifespan ~ EQ, mdata) abline(a = coef(LSEQBrownian)[1], b = coef(LSEQBrownian)[2],lwd=2,lty="dashed",col="red") ######Multiple Predictors, Brownian##### ##Using additional predictors (Fem1stReprod): Generalized least squares fit by maximum likelihood #Data Pre-Treament for Multiple Predictors which(is.na(EQdata$LogFem1stReprod)==TRUE) #which rows in the filtered data have NA for Fem1stReprod: 9 20 22 24 #integer(0) meaning again, those that we have EQ for, we have Fem1stReprod for is.na(EQdata$LogFem1stReprod) ##double check there are no NAs (should return all false) LSEQxFem1stBrown <- gls(LogLifespan ~ EQ * LogFem1stReprod, correlation = corBrownian(phy = pglstree), data = EQdata, method = "ML") summary(LSEQxFem1stBrown) res2<-resid(LSEQxFem1stBrown) ## pulling our residuals to investigate them plot(fitted(LSEQxFem1stBrown), res2) #produce residual vs. fitted model plot abline(0,0) #add a horizontal line at 0 qqnorm(res2) #check whether/extent of normality assumption violation qqline(res2) plot(density(res2)) #density distribution of residuals - should be approximately normal #Multiple Predictors, additive (not interactive) LSEQandFem1stBrown <- gls(LogLifespan ~ EQ + LogFem1stReprod, correlation = corBrownian(phy = pglstree), data = EQdata, method = "ML") summary(LSEQandFem1stBrown) res3<-resid(LSEQandFem1stBrown) plot(fitted(LSEQandFem1stBrown), res3) #produce residual vs. fitted model plot abline(0,0) #add a horizontal line at 0 qqnorm(res3) #check whether/extent of normality assumption violation qqline(res3) plot(density(res3)) #density distribution of residuals - should be approximately normal ## Comparing model fits using AIC AIC(LSEQBrownian, LSEQxFem1stBrown, LSEQandFem1stBrown) #df AIC #LSEQBrownian 3 1.08915377 #LSEQxFem1stBrown 5 -1.66977278 #LSEQandFem1stBrown 4 0.05262271 anova(LSEQBrownian, LSEQxFem1stBrown, LSEQandFem1stBrown) #Model df AIC BIC logLik Test L.Ratio p-value #LSEQBrownian 1 3 1.0891538 1.6808275 2.455423 #LSEQxFem1stBrown 2 5 -1.6697728 -0.6836499 5.834886 1 vs 2 6.758927 0.0341 #LSEQandFem1stBrown 3 4 0.0526227 0.8415210 3.973689 2 vs 3 3.722395 0.0537 #####Pagel's Lambda Covariance Structure###### #Note that the outputs for this I planted straight into the "Results" powerpoint #"The most simple generalization of the Brownian model is one with one #additional parameter to scale the expected covariance under pairs of species. #This model is called the lambda model of Pagel (Pagel's lambda). When lambda = 0 the #covariance between species is zero and this corresponds to a non-phylogenetic #regression. By contrast, when &lambda = 1, the evolution of the residual error #is Brownian. Let's try, then, to repeat our analysis with the lambda model." ###LoglifeSpan ~ EQ, "EQdata", relaxed Brownian model of evolution assumption LSEQPagel <- gls(LogLifespan ~ EQ, correlation = corPagel(1,pglstree), data = EQdata, method = "ML") #note the same warning message as before crops up about "Species will be taken as ordered" summary(LSEQPagel) res1<-resid(LSEQPagel) ## pulling our residuals to investigate them plot(fitted(LSEQPagel), res1) #produce residual vs. fitted model plot abline(0,0) #add a horizontal line at 0 qqnorm(res1) #check whether/extent of normality assumption violation qqline(res1) plot(density(res1)) #density distribution of residuals - should be approximately normal #### LSEQxFem1stPagel <- gls(LogLifespan ~ EQ * LogFem1stReprod, correlation = corPagel(1,pglstree), data = EQdata, method = "ML") #Error in eigen(val, only.values = TRUE): infinite or missing values in 'x' #https://stackoverflow.com/questions/44761123/error-in-eigenval-only-values-true-infinite-or-missing-values-in-x-when #"the problem comes from correlation = corPagel(1,pglstree). The error comes from functions used by the nlme package's developper. eigen is a function used to develop gls function which calculates eigenvalues/eigenvectors of a given x matrix." #https://www.mail-archive.com/r-sig-phylo@r-project.org/msg05011.html summary(LSEQxFem1stPagel) res2<-resid(LSEQxFem1stPagel) ## pulling our residuals to investigate them plot(fitted(LSEQxFem1stPagel), res2) #produce residual vs. fitted model plot abline(0,0) #add a horizontal line at 0 qqnorm(res2) #check whether/extent of normality assumption violation qqline(res2) plot(density(res2)) #density distribution of residuals - should be approximately normal ## Model written as an additive rather than interaction LSEQandFem1stPagel <- gls(LogLifespan ~ EQ + LogFem1stReprod, correlation = corPagel(1,pglstree), data = EQdata, method = "ML") summary(LSEQandFem1stPagel) res3<-resid(LSEQandFem1stPagel) plot(fitted(LSEQandFem1stPagel), res3) #produce residual vs. fitted model plot abline(0,0) #add a horizontal line at 0 qqnorm(res3) #check whether/extent of normality assumption violation qqline(res3) plot(density(res3)) #density distribution of residuals - should be approximately normal ## comparing model fits using AIC AIC(LSEQPagel, LSEQandFem1stPagel) #df AIC #LSEQPagel 4 -25.96615 #LSEQandFem1stPagel 5 -26.81648 anova(LSEQPagel, LSEQandFem1stPagel) #Model df AIC BIC logLik Test L.Ratio p-value #LSEQPagel 1 4 -25.96615 -25.17725 16.98307 #LSEQandFem1stPagel 2 5 -26.81648 -25.83036 18.40824 1 vs 2 2.850337 0.0914 AIC(LSEQBrownian, LSEQxFem1stBrown, LSEQandFem1stBrown, LSEQPagel, LSEQandFem1stPagel) #df AIC df AIC #LSEQBrownian 3 1.08915377 #LSEQxFem1stBrown 5 -1.66977278 #LSEQandFem1stBrown 4 0.05262271 #LSEQPagel 4 -25.96614643 #LSEQandFem1stPagel 5 -26.81648374 anova(LSEQBrownian, LSEQxFem1stBrown, LSEQandFem1stBrown, LSEQPagel, LSEQandFem1stPagel) #Model df AIC BIC logLik Test L.Ratio p-value #LSEQBrownian 1 3 1.089154 1.680828 2.455423 #LSEQxFem1stBrown 2 5 -1.669773 -0.683650 5.834886 1 vs 2 6.758927 0.0341 #LSEQandFem1stBrown 3 4 0.052623 0.841521 3.973689 2 vs 3 3.722395 0.0537 #LSEQPagel 4 4 -25.966146 -25.177248 16.983073 #LSEQandFem1stPagel 5 5 -26.816484 -25.830361 18.408242 4 vs 5 2.850337 0.0914 ####Remove Bowhead from data, so new sample size = 8, or N8##### newmdata <- EQdata[-c(9),] #remove bowhead whale LSEQBrownN8 <- gls(LogLifespan ~ EQ, correlation = corBrownian(phy = pglstree), data = newmdata, method = "ML") summary(LSEQBrownN8) #Generalized least squares fit by maximum likelihood, Model: LogLifespan ~ EQ , Data: newmdata #AIC BIC logLik #-5.598586 -5.360261 5.799293 #Correlation Structure: corBrownian: Formula: ~1, Parameter estimate(s): numeric(0) #Coefficients: # Value Std.Error t-value p-value #(Intercept) 2.0524840 0.3577979 5.736435 0.0012 #EQ -0.5351504 0.2743931 -1.950306 0.0990 #Correlation: (Intr) EQ -0.263 #Standardized residuals: Min Q1 Med Q3 Max #-0.3724586 -0.2551303 0.1930944 0.3719801 0.5581109 #Residual standard error: 0.3113902 Degrees of freedom: 8 total; 6 residual newres<-resid(LSEQBrownN8) ## pulling our residuals to investigate them plot(fitted(LSEQBrownN8), newres) #produce residual vs. fitted model plot abline(0,0) #add a horizontal line at 0 plot(density(newres)) #distribution of residuals (ideally normal) qqnorm(newres) #Q-Q plot checking whether the normality assumption is violated qqline(newres) ##Additive Fem1stReprod## LSEQandFem1stBrownN8 <- gls(LogLifespan ~ EQ + LogFem1stReprod, correlation = corBrownian(phy = pglstree), data = newmdata, method = "ML") summary(LSEQandFem1stBrownN8) #Generalized least squares fit by maximum likelihood Model: LogLifespan ~ EQ + LogFem1stReprod Data: newmdata #AIC BIC logLik #-5.109939 -4.792173 6.55497 #Correlation Structure: corBrownian Formula: ~1 Parameter estimate(s): numeric(0) #Coefficients: Value Std.Error t-value p-value #(Intercept) 2.4039414 0.4959650 4.846998 0.0047 #EQ -0.5247155 0.2736808 -1.917254 0.1133 #LogFem1stReprod -0.4045848 0.3967821 -1.019665 0.3547 #Correlation: (Intr) EQ #EQ -0.163 #LogFem1stReprod -0.695 -0.037 #Standardized residuals: Min Q1 Med Q3 Max #-0.4256143 -0.1984937 0.1588728 0.4177743 0.5103131 #Residual standard error: 0.2833229 Degrees of freedom: 8 total; 5 residual newresadd<-resid(LSEQandFem1stBrownN8) ## pulling our residuals to investigate them plot(fitted(LSEQandFem1stBrownN8), newresadd) #produce residual vs. fitted model plot abline(0,0) #add a horizontal line at 0 plot(density(newresadd)) #distribution of residuals (ideally normal) qqnorm(newresadd) #Q-Q plot checking whether the normality assumption is violated qqline(newresadd) #PAGELS# newLSEQPagel <- gls(LogLifespan ~ EQ, correlation = corPagel(1,pglstree), data = newmdata, method = "ML") #Unable to run this model due to "Error in eigen(val, only.values = TRUE) : #infinite or missing values in 'x' ##ADDITIVE## newLSEQxFem1stPagel <- gls(LogLifespan ~ EQ + LogFem1stReprod, correlation = corPagel(1,pglstree), data = newmdata, method = "ML") #Error in gls(LogLifespan ~ EQ + LogFem1stReprod, correlation = corPagel(1, : #false convergence (8) #Model comparison# anova(LSEQBrownN8, LSEQandFem1stBrownN8) ####Model Comparison#### mysticetelist <- list(LSEQBrownian, LSEQxFem1stBrown, LSEQandFem1stBrown, LSEQPagel, LSEQandFem1stPagel, LSEQBrownN8, LSEQandFem1stBrownN8) mysticetenames <- c('LifespanEQBrown', 'LifespanEQINTFem1stBrown', 'LifespanEQandFem1stBrown', 'LifespanEQPagel', 'LifespanEQandFem1stPagel', 'N8LifespanEQBrown', 'N8LifespanEQandFem1stBrown') aictab(cand.set = mysticetelist, modnames = mysticetenames) #K AICc Delta_AICc AICcWt Cum.Wt LL #LifespanEQPagel 4 -15.97 0.00 0.99 0.99 16.98 #LifespanEQandFem1stPagel 5 -6.82 9.15 0.01 1.00 18.41 #N8LifespanEQBrown 3 0.40 16.37 0.00 1.00 5.80 #LifespanEQBrown 3 5.89 21.86 0.00 1.00 2.46 #N8LifespanEQandFem1stBrown 4 8.22 24.19 0.00 1.00 6.55 #LifespanEQandFem1stBrown 4 10.05 26.02 0.00 1.00 3.97 #LifespanEQINTFem1stBrown 5 18.33 34.30 0.00 1.00 5.83 plot(LogLifespan ~ EQ, mdata, xlim=c(0,5),ylim=c(1.2,2.5), xlab="Encephalisation Quotient", ylab="Log of Lifespan", col="cadetblue", pch=20, las=1) abline(a = coef(LSEQPagel)[1], b = coef(LSEQPagel)[2],lwd=2,lty="dashed",col="aquamarine2") plot(LogLifespan ~ EQ, mdata, xlim=c(0,1),ylim=c(1.2,2.5)) abline(a = coef(LSEQPagel)[1], b = coef(LSEQPagel)[2],lwd=2,lty="dashed",col="red")