#Dec. 12 2016 # Packages ------------------------------------------------------------- library(reshape2);install.packages("ROCR");library(ROCR);install.packages("plyr");library(plyr);library(scales);install.packages("ncdf4"); library(ncdf4) install.packages("ape");library(ape);install.packages("phytools");library(phytools);install.packages("ade4");library(ade4) install.packages("vegan");library(vegan);install.packages("geiger");library(geiger);install.packages("OUwie");library(OUwie); install.packages("paleoTS");library(paleoTS);install.packages("quantreg");library(quantreg);install.packages("nlme");library(nlme) install.packages('ppcor');library(ppcor);install.packages("matrixStats");library(matrixStats);install.packages("geiger");library(geiger) install.packages("RColorBrewer");library(RColorBrewer);install.packages("rlist");library(rlist) install.packages('qpcR');library(qpcR) # Setup ------------------------------------------------------------- setwd("G:/My Drive/School/Old schoolwork/2015 (3) Fall - The Long Fall/Finnegan lab/Energetics") #on home computer setwd("C:/Users/jgsauls/Google Drive/School/Old schoolwork/2015 (3) Fall - The Long Fall/Finnegan lab/Energetics") #at school dat <- cbind(paste(read.csv("AllSources.csv",header=TRUE,stringsAsFactors=FALSE)[,5],read.csv("AllSources.csv",header=TRUE, stringsAsFactors=FALSE)[,6]),read.csv("AllSources.csv",header=TRUE,stringsAsFactors=FALSE)[,c(1,14,15,17,19,20,10,2)]) colnames(dat) <- c("name","source","lat","lon","depth","Linf","k","lifespan","family"); dat <- dat[dat$source != "PatelloDB" & dat$name != "Pinna bicolor",] dat$k <- as.numeric(dat$k);dat <- dat[!is.na(dat$k),] #removes 2 non-entries: dat <- dat[1:700,] # Functions: lat.to.y <- function(lat,res){round((lat+90)/res+0.5)};lon.to.x <- function(lon,res){round((lon+180)/res+0.5)} #res in degrees y.to.lat <- function(y,res){(y-0.5)*res-90}; x.to.lon <- function(x,res){(x-0.5)*res-180}; outersect <- function(x,y){sort(c(setdiff(x,y), setdiff(y,x)))} seqWrapper <- function(lb, ub, by=1) {s <- c(); if(!ub < lb) s <- seq(lb,ub, by=by);return(s)}#forget what this does but important for bivtree.repeats se <- function(alist){sd(alist)/sqrt(length(alist))} rotate <- function(x) t(apply(x, 2, rev)) # Getting oceanographic parameters ---------------------------------------- #Using daytime SST at 11um setwd("C:/Users/jgsauls/Desktop/Energetics") #directory where .nc files are stored #make.percentile.map takes a list of maps and returns the given percentile of the values in each cell make.percentile.map <- function(map.list,percentile){ perc <- percentile/100 return(apply(array(unlist(map.list), c(dim(map.list[[1]])[1], dim(map.list[[1]])[2], length(map.list))), c(1,2), function(x){quantile(x,perc,na.rm = TRUE)}))} #this function is useful when there isn't enough memory to store every map for a parameter #analyze.by.slices divides a map into vertical slices depending on the RAM limit specified and stores only one slice across all maps #RAM limit is in GB #param just tells R's function for opening .nc files what sort of parameter the map is for - can be either "sst" or "chlor_a" analyze.by.slices <- function(file.inventory,param,RAM.limit,percentile){ #file inventory is the folder where the files are stored ex <- ncvar_get(nc_open(paste(file.inventory,list.files(file.inventory,pattern = "\\.nc$")[1],sep="")),param) fill.value <- as.numeric(ncatt_get(nc_open(paste(file.inventory,list.files(file.inventory,pattern = "\\.nc$")[1],sep="")),param,'_FillValue')[2]) max.value <- as.numeric(ncatt_get(nc_open(paste(file.inventory,list.files(file.inventory,pattern = "\\.nc$")[1],sep="")),param,'display_max')[2]) min.value <- as.numeric(ncatt_get(nc_open(paste(file.inventory,list.files(file.inventory,pattern = "\\.nc$")[1],sep="")),param,'display_min')[2]) slice.num <- as.numeric(ceiling(object.size(ex)*length(list.files(file.inventory,pattern = "\\.nc$"))/(RAM.limit*10^9))) slice.points <- unlist(lapply(seq(1,dim(ex)[1],length.out=slice.num+1),round));slice.points[slice.num+1] <- dim(ex)[1]+1 print(paste("Number of slices to make: ",slice.num)) final.map <- matrix(nrow=4320,ncol=0) print(paste("dim(ex)[2] is ", dim(ex)[2])) #for each slice, grab that slice from each map for(i in 1:slice.num){ print(paste("Current slice number: ",i)) temp.map.list <- list() for(j in 1:length(list.files(file.inventory,pattern = "\\.nc$"))){ print(paste("Map number: ",j)) temp.map.list <- list.append(temp.map.list,ncvar_get(nc_open(paste(file.inventory,list.files(file.inventory,pattern = "\\.nc$")[j], sep="")),param,c(slice.points[i],1),c(slice.points[i+1]-slice.points[i],4320))) temp.map.list[[j]][temp.map.list[[j]]==fill.value | temp.map.list[[j]] >= max.value | temp.map.list[[j]] <= min.value] <- NA} #this sets all fill values to NA a <- make.percentile.map(temp.map.list,percentile) final.map <- cbind(final.map,t(a)) remove(a) remove(temp.map.list) gc()} return(final.map)} #make a map of the 25th percentile of temperature sst.25p <- analyze.by.slices("sst/","sst",RAM.limit=1,percentile=25) write.csv(sst.25p, file="sst25p.csv") sst.75p <- analyze.by.slices("sst/","sst",RAM.limit=1,percentile=75) write.csv(sst.75p, file="sst75p.csv") chlor_a.25p <- analyze.by.slices("chlor_a/","chlor_a",RAM.limit=1,percentile=25) write.csv(chlor_a.25p, file="chlor_a25p.csv") chlor_a.75p <- analyze.by.slices("chlor_a/","chlor_a",RAM.limit=1,percentile=75) write.csv(chlor_a.75p, file="chlor_a75p.csv") #June 2018: redux with percentiles 10, 25, 75, 90 setwd("C:/Users/James/Google Drive/School/Old schoolwork/2015 (3) Fall - The Long Fall/Finnegan lab/Energetics/Big files") #sst10p sst10p <- as.matrix(read.csv('sst10p.csv',header=FALSE))[c(4321:2),] for(i in 1:dim(dat)[1]){dat$sst10p[i] <- sst10p[lat.to.y(dat$lat[i],1/24),lon.to.x(dat$lon[i],1/24)]} remove(sst10p) #sst25p sst25p <- as.matrix(read.csv('Big files/sst25p.csv',header=FALSE))[c(4321:2),] for(i in 1:dim(dat)[1]){dat$sst25p[i] <- sst25p[lat.to.y(dat$lat[i],1/24),lon.to.x(dat$lon[i],1/24)]} remove(sst25p) #sst75p sst75p <- as.matrix(read.csv('Big files/sst75p.csv',header=FALSE))[c(4321:2),] for(i in 1:dim(dat)[1]){dat$sst75p[i] <- sst75p[lat.to.y(dat$lat[i],1/24),lon.to.x(dat$lon[i],1/24)]} remove(sst75p) #sst90p sst90p <- as.matrix(read.csv('Big files/sst90p.csv',header=FALSE))[c(4321:2),] for(i in 1:dim(dat)[1]){dat$sst90p[i] <- sst90p[lat.to.y(dat$lat[i],1/24),lon.to.x(dat$lon[i],1/24)]} remove(sst90p) #chlor_a10p chlor_a10p <- as.matrix(read.csv('Big files/chlor_a10p.csv',header=FALSE))[c(4321:2),] for(i in 1:dim(dat)[1]){dat$chlor_a10p[i] <- chlor_a10p[lat.to.y(dat$lat[i],1/24),lon.to.x(dat$lon[i],1/24)]} remove(chlor_a10p) #chlor_a25p chlor_a25p <- as.matrix(read.csv('Big files/chlor_a25p.csv',header=FALSE))[c(4321:2),] for(i in 1:dim(dat)[1]){dat$chlor_a25p[i] <- chlor_a25p[lat.to.y(dat$lat[i],1/24),lon.to.x(dat$lon[i],1/24)]} remove(chlor_a25p) #chlor_a75p chlor_a75p <- as.matrix(read.csv('Big files/chlor_a75p.csv',header=FALSE))[c(4321:2),] for(i in 1:dim(dat)[1]){dat$chlor_a75p[i] <- chlor_a75p[lat.to.y(dat$lat[i],1/24),lon.to.x(dat$lon[i],1/24)]} remove(chlor_a75p) #chlor_a90p chlor_a90p <- as.matrix(read.csv('Big files/chlor_a90p.csv',header=FALSE))[c(4321:2),] for(i in 1:dim(dat)[1]){dat$chlor_a90p[i] <- chlor_a90p[lat.to.y(dat$lat[i],1/24),lon.to.x(dat$lon[i],1/24)]} remove(chlor_a90p) #processing plot(dat$sst10p,dat$sst25p) plot(dat$sst25p,dat$sst75p) plot(dat$sst75p,dat$sst90p) plot(log10(as.numeric(dat$chlor_a75p)),log10(as.numeric(dat$chlor_a90p))) #datp is dat with all 4 percentile values for both env predictors datp <- data.frame(name=dat$name,lat=dat$lat,lon=dat$lon,Linf=dat$Linf,k=dat$k,lifespan=dat$lifespan, family=dat$family,sst10p=as.numeric(dat$sst10p),sst25p=as.numeric(dat$sst25p), sst75p=as.numeric(dat$sst75p), sst90p=as.numeric(dat$sst90p),chlor_a10p=as.numeric(dat$chlor_a10p), chlor_a25p=as.numeric(dat$chlor_a25p), chlor_a75p=as.numeric(dat$chlor_a75p),chlor_a90p=as.numeric(dat$chlor_a90p)) datp.all <- datp datp <- datp[datp$mindepth<200|is.na(datp$mindepth),] write.csv(datp,"datp.csv") #or, if you've already done that... datp <- read.csv("datp.csv") #world map library(maps) dev.off() par("fg"="gray70") map("world", fill=TRUE, col="gray70", bg="white", ylim=c(-90, 90),xlim=c(-180,180),mar=c(0,0,0,0),boundary=TRUE,interior=FALSE) points(datp$lon,datp$lat,pch=ifelse(!is.na(datp$chlor_a10p),21,24), bg=ifelse(!is.na(datp$chlor_a10p),alpha("steelblue2",0.5),alpha("darkorange",0.5)),col=alpha("black",0.3),cex=0.9) # Correlation ------------------------------------------------------------- par(mfrow=c(2,2),mar=c(5,5,1,1)) #quantreg, tmin plot(datp$sst10p,log(datp$k),xlab="Minimum temperature (°C)",ylab="ln ( Growth coefficient )",type = "n",xlim=c(0,34),cex.lab=1.2) points(datp$sst10p,log(datp$k),cex=.9,col=ifelse(abs(datp$lat)<30, "darkorange2", alpha("blue2",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)), pch=ifelse(abs(datp$lat)<30, 24, 21)) taus <- c(.05,.25,.75,.95) xx <- seq(min(datp$sst10p[!is.na(datp$sst10p)]),max(datp$sst10p[!is.na(datp$sst10p)]),0.75) f <- coef(rq((log(datp$k))~(datp$sst10p),tau=taus)) yy <- cbind(1,xx)%*%f for(i in 1:length(taus)){lines(xx,yy[,i],lwd=2,lty="dashed")} abline(rq(log(datp$k) ~ datp$sst10p), col="black",lwd=2) sst10pfit1 <- rq(log(datp$k)~datp$sst10p,tau=.25) sst10pfit2 <- rq(log(datp$k)~datp$sst10p,tau=.50) sst10pfit3 <- rq(log(datp$k)~datp$sst10p,tau=.75) anova(sst10pfit1,sst10pfit2,sst10pfit3) #quantreg, tmax plot(datp$sst90p,log(datp$k),xlab="Maximum temperature",ylab="",type = "n",xlim=c(0,34),cex.lab=1.2) points(datp$sst90p,log(datp$k),cex=.9,col=ifelse(abs(datp$lat)<30, "darkorange2", alpha("blue2",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)), pch=ifelse(abs(datp$lat)<30, 24, 21)) taus <- c(.05,.25,.75,.95) xx <- seq(min(datp$sst90p[!is.na(datp$sst90p)]),max(datp$sst90p[!is.na(datp$sst90p)]),0.75) f <- coef(rq((log(datp$k))~(datp$sst90p),tau=taus)) yy <- cbind(1,xx)%*%f for(i in 1:length(taus)){lines(xx,yy[,i],lwd=2,lty="dashed")} abline(rq(log(datp$k) ~ datp$sst90p), col="black",lwd=2) sst90pfit1 <- rq(log(datp$k)~datp$sst90p,tau=.25) sst90pfit2 <- rq(log(datp$k)~datp$sst90p,tau=.50) sst90pfit3 <- rq(log(datp$k)~datp$sst90p,tau=.75) anova(sst90pfit1,sst90pfit2,sst90pfit3) #title("Quantile regression of temperature and growth rate", outer = TRUE, cex = 1.3) #quantreg, chlor_a10p plot(datp$chlor_a10p,datp$k,xlab=expression(paste("Minimum chlorophyll (mg/",m^3,")")),ylab="Growth coefficient",type = "n",xlim=c(0,20),cex.lab=1.2) points(datp$chlor_a10p,datp$k,col=ifelse(abs(datp$lat)<30, "darkorange2", alpha("blue2",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)), pch=ifelse(abs(datp$lat)<30, 24, 21)) taus <- c(.05,.25,.75,.95) xx <- seq(min(datp$chlor_a10p[!is.na(datp$chlor_a10p)]),max(datp$chlor_a10p[!is.na(datp$chlor_a10p)]),0.05) f <- coef(rq((datp$k)~(datp$chlor_a10p),tau=taus)) yy <- cbind(1,xx)%*%f for(i in 1:length(taus)){lines(xx,yy[,i],lwd=2,lty="dashed")} abline(rq(datp$k ~ datp$chlor_a10p), col="black",lwd=2) chlor_a10pfit1 <- rq(datp$k~datp$chlor_a10p,tau=.25);chlor_a10pfit2 <- rq(datp$k~datp$chlor_a10p,tau=.50);chlor_a10pfit3 <- rq(datp$k~datp$chlor_a10p,tau=.75) anova(chlor_a10pfit1,chlor_a10pfit2,chlor_a10pfit3) #quantreg, chlor_a90p plot(datp$chlor_a90p,datp$k,xlab="Maximum chlorophyll",ylab="",type = "n",xlim=c(0,20),cex.lab=1.2) points(datp$chlor_a90p,datp$k,col=ifelse(abs(datp$lat)<30, "darkorange2", alpha("blue2",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)), pch=ifelse(abs(datp$lat)<30, 24, 21)) taus <- c(.05,.25,.75,.95) xx <- seq(min(datp$chlor_a90p[!is.na(datp$chlor_a90p)]),max(datp$chlor_a90p[!is.na(datp$chlor_a90p)]),0.05) f <- coef(rq((datp$k)~(datp$chlor_a90p),tau=taus)) yy <- cbind(1,xx)%*%f for(i in 1:length(taus)){lines(xx,yy[,i],lwd=2,lty="dashed",col=alpha("black"))} abline(rq(datp$k ~ datp$chlor_a90p), col="black",lwd=2) chlor_a90pfit1 <- rq(datp$k~datp$chlor_a90p,tau=.25);chlor_a90pfit2 <- rq(datp$k~datp$chlor_a90p,tau=.50);chlor_a90pfit3 <- rq(datp$k~datp$chlor_a90p,tau=.75) anova(chlor_a90pfit1,chlor_a90pfit2,chlor_a90pfit3) #mtext("Quantile regression of chlorophyll and growth rate", outer = TRUE, cex = 1.3) #correlations #temp cor.test(datp$sst10p,log(datp$k)) cor.test(datp$sst90p,log(datp$k)) #chlor cor.test(datp$chlor_a10p,datp$k) cor.test(datp$chlor_a90p,datp$k) #log chlor cor.test(log10(datp$chlor_a10p),log10(datp$k)) cor.test(log10(datp$chlor_a90p),log10(datp$k)) #with suspension feeders only, and with all ecologies #sst10 cor.test(datp$sst10p,log10(datp$k))$estimate^2 cor.test(datp$sst10p[datp$feeding=="S"],log10(datp$k[datp$feeding=="S"]))$estimate^2 #sst90 cor.test(datp$sst90p,log10(datp$k))$estimate^2 cor.test(datp$sst90p[datp$feeding=="S"],log10(datp$k[datp$feeding=="S"]))$estimate^2 #chlor10 cor.test(datp$chlor_a10p,log10(datp$k))$estimate^2 cor.test(datp$chlor_a10p[datp$feeding=="S"],log10(datp$k[datp$feeding=="S"]))$estimate^2 #chlor90 cor.test(datp$chlor_a90p,log10(datp$k))$estimate^2 cor.test(datp$chlor_a90p[datp$feeding=="S"],log10(datp$k[datp$feeding=="S"]))$estimate^2 #range as a predictor plot(abs(dat$lat),dat$tempmax-dat$tempmin,xlab="Absolute latitude",ylab="Temperature range",main="Latitude and temp. seasonality") plot(abs(dat$lat),log10(dat$chlormax)-log10(dat$chlormin),xlab="Absolute latitude",ylab="Chlorophyll range",main="Latitude and chlor. seasonality") cor.test(dat$tempmax[dat$tempmax-dat$tempmin != 0]-dat$tempmin[dat$tempmax-dat$tempmin != 0],dat$k[dat$tempmax-dat$tempmin != 0]) plot(dat$tempmax[dat$tempmax-dat$tempmin != 0]-dat$tempmin[dat$tempmax-dat$tempmin != 0],dat$k[dat$tempmax-dat$tempmin != 0], col=ifelse(abs(dat$lat[dat$tempmax-dat$tempmin != 0])<30, "darkorange", "blue"),pch=19,cex=1.2, xlab="Temperature range",ylab="Growth coefficient k",main="Temperature range and growth coefficient") cor.test(log10(dat$chlormax[dat$chlormax-dat$chlormin != 0])-log10(dat$chlormin[dat$chlormax-dat$chlormin != 0]), dat$k[dat$chlormax-dat$chlormin != 0]) plot(log10(dat$chlormax[dat$chlormax-dat$chlormin != 0])-log10(dat$chlormin[dat$chlormax-dat$chlormin != 0]), dat$k[dat$chlormax-dat$chlormin != 0],col=ifelse(abs(dat$lat[dat$chlormax-dat$chlormin != 0])<30, "darkorange", "blue"),pch=19,cex=1.2, xlab="Chlorophyll range",ylab="Growth coefficient k",main="Chlorophyll range (logged before taking range) and k") #regressions: summary(lm(log10(datp$k)~datp$sst10p)) summary(lm(log10(datp$k)~datp$sst25p)) summary(lm(log10(datp$k)~datp$sst75p)) summary(lm(log10(datp$k)~datp$sst90p)) summary(lm(datp$k~datp$chlor_a10p)) summary(lm(datp$k~datp$chlor_a25p)) summary(lm(datp$k~datp$chlor_a75p)) summary(lm(datp$k~datp$chlor_a90p)) #scallops have a really strong correlation for some reason plot(datp$sst10p[datp$family=="Pectinidae"],datp$k[datp$family=="Pectinidae"]) summary(lm(log(datp$k[datp$family=="Pectinidae"])~datp$sst10p[datp$family=="Pectinidae"])) #bootstrapping m <- matrix(NA,nrow=1000,ncol=8) for(i in 1:1000){ da <- datp[sample(1:700, 700, replace=TRUE),] #sst10 sst10reg <- summary(lm(log(da$k)~da$sst10p)) m[i,1] <- sst10reg$r.squared m[i,2] <- sst10reg$coefficients[2,4] #sst90 sst90reg <- summary(lm(log(da$k)~da$sst90p)) m[i,3] <- sst90reg$r.squared m[i,4] <- sst90reg$coefficients[2,4] #chlor_a10 chlor_a10reg <- summary(lm(da$k~da$chlor_a10p)) m[i,5] <- chlor_a10reg$r.squared m[i,6] <- chlor_a10reg$coefficients[2,4] #chlor_a90 chlor_a90reg <- summary(lm(da$k~da$chlor_a90p)) m[i,7] <- chlor_a90reg$r.squared m[i,8] <- chlor_a90reg$coefficients[2,4] } colMedians(m) table(m[,6]<0.05) table(m[,8]<0.05) #miscellaneous analysis: just looking at distribution of k between families #unlogged boxplot(log10(datp$k)[datp$family=="Arcidae"],log10(datp$k)[datp$family=="Arcticidae"],log10(datp$k)[datp$family=="Cardiidae"],log10(datp$k)[datp$family=="Donacidae"],log10(datp$k)[datp$family=="Hiatellidae"], log10(datp$k)[datp$family=="Mactridae"],log10(datp$k)[datp$family=="Myidae"],log10(datp$k)[datp$family=="Mytilidae"],log10(datp$k)[datp$family=="Ostreidae"],log10(datp$k)[datp$family=="Pectinidae"], log10(datp$k)[datp$family=="Pharidae"],log10(datp$k)[datp$family=="Pinnidae"],log10(datp$k)[datp$family=="Semelidae"],log10(datp$k)[datp$family=="Tellinidae"],log10(datp$k)[datp$family=="Veneridae"]) #logged boxplot(datp$k[datp$family=="Arcidae"],datp$k[datp$family=="Arcticidae"],datp$k[datp$family=="Cardiidae"],datp$k[datp$family=="Donacidae"],datp$k[datp$family=="Hiatellidae"], datp$k[datp$family=="Mactridae"],datp$k[datp$family=="Myidae"],datp$k[datp$family=="Mytilidae"],datp$k[datp$family=="Ostreidae"],datp$k[datp$family=="Pectinidae"], datp$k[datp$family=="Pharidae"],datp$k[datp$family=="Pinnidae"],datp$k[datp$family=="Semelidae"],datp$k[datp$family=="Tellinidae"],datp$k[datp$family=="Veneridae"]) #multiple regression: reg1 <- lm(log(k)~sst10p,data = datp[!is.na(datp$chlor_a10p) & !is.na(datp$sst10p),]) reg2 <- lm(log(k)~chlor_a10p,data = datp[!is.na(datp$chlor_a10p) & !is.na(datp$sst10p),]) reg3 <- lm(log(k)~sst10p+chlor_a10p,data = datp[!is.na(datp$chlor_a10p) & !is.na(datp$sst10p),]) reg4 <- lm(log(k)~sst10p+sst90p+chlor_a10p+chlor_a90p,data = datp[!is.na(datp$chlor_a10p) & !is.na(datp$sst10p),]) reg5 <- lm(log(k)~factor(family),data = datp[!is.na(datp$chlor_a10p) & !is.na(datp$sst10p),]) reg6 <- lm(log(k)~sst10p+factor(family),data = datp[!is.na(datp$chlor_a10p) & !is.na(datp$sst10p),]) reg7 <- lm(log(k)~sst10p+chlor_a10p+factor(family),data = datp[!is.na(datp$chlor_a10p) & !is.na(datp$sst10p),]) reg8 <- lm(log(k)~sst10p+sst90p+chlor_a10p+chlor_a90p+factor(family),data = datp[!is.na(datp$chlor_a10p) & !is.na(datp$sst10p),]) aic.w(as.list(AIC(reg1,reg2,reg3,reg4,reg5,reg6,reg7,reg8))[[2]]) #looking at correlations within families dev.off() par(mfrow=c(1,2)) plot( c( sd(datp$sst10p[datp$family=="Veneridae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Pectinidae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Mytilidae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Hiatellidae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Cardiidae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Mactridae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Tellinidae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Myidae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Donacidae"],na.rm = TRUE), sd(datp$sst10p[datp$family=="Arcidae"],na.rm = TRUE), sd(datp$sst10p,na.rm = TRUE) ), c( summary(lm(log(k)~sst10p,data = datp[datp$family=="Veneridae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Pectinidae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Mytilidae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Hiatellidae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Cardiidae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Mactridae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Tellinidae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Myidae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Donacidae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp[datp$family=="Arcidae",]))$r.squared, summary(lm(log(k)~sst10p,data = datp))$r.squared), xlab="Standard deviation of observed temperature values",ylab="r^2 value",main="Within-family SST-ln(k) regressions", pch=c(rep(1,10),15),ylim=c(0,0.35)) #chlor plot( c( sd(datp$chlor_a10p[datp$family=="Veneridae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Pectinidae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Mytilidae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Hiatellidae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Cardiidae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Mactridae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Tellinidae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Myidae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Donacidae"],na.rm = TRUE), sd(datp$chlor_a10p[datp$family=="Arcidae"],na.rm = TRUE), sd(datp$chlor_a10p,na.rm = TRUE) ), c( summary(lm(k~chlor_a10p,data = datp[datp$family=="Veneridae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Pectinidae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Mytilidae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Hiatellidae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Cardiidae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Mactridae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Tellinidae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Myidae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Donacidae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp[datp$family=="Arcidae",]))$r.squared, summary(lm(k~chlor_a10p,data = datp))$r.squared), xlab="Standard deviation of observed chlor-a values",ylab="r^2 value",main="Within-family chlor-k regressions", pch=c(rep(1,10),15),ylim=c(0,0.35)) # Phylogeny ------------------------------------------------------------- #Setup: setwd("C:/Users/jgsauls/Google Drive/School/Old schoolwork/2015 (3) Fall - The Long Fall/Finnegan lab/Energetics") bivtree <- read.nexus(file="Phylogenetics/Final biv/Final_Biv_tre.nex.tre") #some of the tree tips were misnamed, so... bivtree.trimmed <- drop.tip(bivtree,outersect(bivtree$tip.label,gsub(" ","_",unique(datp$name)))) #removes all taxa not found in dat #How many genera with multiple species? table(table(unlist(strsplit(bivtree.trimmed$tip.label,"_"))[seq(1,length(bivtree.trimmed$tip.label)*2,2)])-1>0) #27 genera #trimming dat datp.phy <- datp[which(gsub(" ","_",datp$name) %in% bivtree.trimmed$tip.label),] datp.phy$name <- gsub(" ","_",datp.phy$name) # Phylogenetic signal ----------------------------------------------------- #make dat.phy.means: mean.name = character() mean.k=numeric();mean.k.max=numeric();mean.k.sd=numeric();mean.k.se=numeric();mean.k.range=numeric();mean.k.N=numeric(); mean.Linf=numeric();mean.Linf.se=numeric();mean.Linf.range=numeric();mean.Linf.N=numeric() mean.lifespan=numeric();mean.lifespan.se=numeric() mean.tempmin=numeric();mean.tempmax=numeric();mean.chlormin=numeric();mean.chlormax=numeric() mean.lat=numeric();mean.lat.se=numeric();mean.lat.range=numeric() for(i in 1:length(bivtree.trimmed$tip.label)){ mean.name <- c(mean.name,bivtree.trimmed$tip.label[i]) mean.k <- c(mean.k,mean(datp.phy$k[datp.phy$name==bivtree.trimmed$tip.label[i]],na.rm=TRUE)) mean.k.max <- c(mean.k.max,max(datp.phy$k[datp.phy$name==bivtree.trimmed$tip.label[i]],na.rm=TRUE)) mean.k.sd <- c(mean.k.sd,sd(datp.phy$k[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.k.se <- c(mean.k.se,se(datp.phy$k[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.k.range <- c(mean.k.range,max(datp.phy$k[datp.phy$name==bivtree.trimmed$tip.label[i]])-min(datp.phy$k[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.k.N <- c(mean.k.N,length(datp.phy$k[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.Linf <- c(mean.Linf,mean(datp.phy$Linf[datp.phy$name==bivtree.trimmed$tip.label[i]],na.rm=TRUE)) mean.Linf.se <- c(mean.Linf.se,se(datp.phy$Linf[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.Linf.range <- c(mean.Linf.range,max(datp.phy$Linf[datp.phy$name==bivtree.trimmed$tip.label[i]])-min(datp.phy$Linf[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.Linf.N <- c(mean.Linf.N,length(datp.phy$Linf[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.lifespan <- c(mean.lifespan,mean(datp.phy$lifespan[datp.phy$name==bivtree.trimmed$tip.label[i]],na.rm=TRUE)) mean.lifespan.se <- c(mean.lifespan.se,se(datp.phy$lifespan[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.tempmin <- c(mean.tempmin,mean(datp.phy$sst10p[datp.phy$name==bivtree.trimmed$tip.label[i]],na.rm=TRUE)) mean.tempmax <- c(mean.tempmax,mean(datp.phy$sst90p[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.chlormin <- c(mean.chlormin,mean(datp.phy$chlor_a10p[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.chlormax <- c(mean.chlormax,mean(datp.phy$chlor_a90p[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.lat <- c(mean.lat,mean(datp.phy$lat[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.lat.se <- c(mean.lat.se,se(datp.phy$lat[datp.phy$name==bivtree.trimmed$tip.label[i]])) mean.lat.range <- c(mean.lat.range,max(datp.phy$lat[datp.phy$name==bivtree.trimmed$tip.label[i]])-min(datp.phy$lat[datp.phy$name==bivtree.trimmed$tip.label[i]])) } mean.k.se[is.na(mean.k.se)] <- mean(mean.k.se,na.rm=TRUE) mean.Linf.se[is.na(mean.Linf.se)] <- mean(mean.Linf.se,na.rm=TRUE) mean.lifespan.se[is.na(mean.lifespan.se)] <- mean(mean.lifespan.se,na.rm=TRUE) mean.lat.se[is.na(mean.lat.se)] <- mean(mean.lat.se,na.rm=TRUE) datp.phy.means <- data.frame(name=mean.name,k=mean.k,k.max=mean.k.max,k.sd=mean.k.sd,k.se=mean.k.se,k.range=mean.k.range,k.N=mean.k.N, Linf=mean.Linf,Linf.se=mean.Linf.se,Linf.range=mean.Linf.range,Linf.N=mean.Linf.N, lifespan=mean.lifespan,lifespan.se=mean.lifespan.se, tempmin=mean.tempmin,tempmax=mean.tempmax, chlormin=mean.chlormin,chlormax=mean.chlormax, lat=mean.lat,lat.se=mean.lat.se,lat.range=mean.lat.range) #the following propagates error in this way: for x = log10(p), s_x = 0.434 * s_p / p datp.phy.means$log.k <- log10(datp.phy.means$k); datp.phy.means$log.k.se <- 0.43429 * datp.phy.means$k.se/datp.phy.means$k datp.phy.means$log.Linf <- log10(datp.phy.means$Linf); datp.phy.means$log.Linf.se <- 0.43429 * datp.phy.means$Linf.se/datp.phy.means$Linf datp.phy.means$log.lifespan <- log10(datp.phy.means$lifespan); datp.phy.means$log.lifespan.se <- 0.43429 * datp.phy.means$lifespan.se/datp.phy.means$lifespan write.csv(datp.phy.means,"datp_means.csv") #or, if you've done all that... datp.phy.means <- read.csv("datp_means.csv") #basic statistics: plot(datp.phy.means$k.N,datp.phy.means$k.range) plot(datp.phy.means$k.N,log10(datp.phy.means$k),log="x") plot(datp.phy.means$k.se,datp.phy.means$k.N) plot(datp.phy.means$lat.range,datp.phy.means$k.range) mean(datp.phy.means$k.range) plot(datp.phy.means$tempmin,datp.phy.means$k.max) plot(datp.phy.means$tempmin,datp.phy.means$k) plot(datp.phy.means$k.N,datp.phy.means$k.range,log="x",col=alpha("darkblue",0.25),pch=19,cex=1.4, xlab="Sample size",ylab="Growth coefficient range",main="Growth coefficients within species") hist(datp.phy.means$k.N,breaks=seq(0,45,1),main="Sample sizes, species in phylogeny",xlab="Sample size") plot(datp.phy.means$log.lifespan,datp.phy.means$k.max) #ultrametric tree: bivtree_ultra.c <- chronos(bivtree.trimmed,model = "correlated",lambda=1);class(bivtree_ultra) <- "phylo" bivtree_ultra.r <- chronos(bivtree.trimmed,model = "relaxed",lambda=1);class(bivtree_ultra) <- "phylo" bivtree_ultra.l0 <- chronos(bivtree.trimmed,model="relaxed",lambda=0) dots <- data.frame('k'=datp.phy.means$k,"log.Linf"=datp.phy.means$log.Linf,"log.lifespan"=datp.phy.means$log.lifespan,"log.k"=log10(datp.phy.means$k), 'sst10p'=datp.phy.means$tempmin,'sst90p'=datp.phy.means$tempmax,'chlor_a10p'=datp.phy.means$chlormin,'chlor_a90p'=datp.phy.means$chlormax) rownames(dots)<-datp.phy.means$name dots$log.Linf[is.na(dots$log.Linf)] <- 0 dots$log.lifespan[is.na(dots$log.lifespan)] <- 0 #plotting bluepal <- colorRampPalette(brewer.pal(9,'Blues')) orpal <- colorRampPalette(brewer.pal(9,'OrRd')) grpal <- colorRampPalette(brewer.pal(9,'Greens')) #I have found 800x2200 to be a good resolution, or A4 letter phylo.heatmap(bivtree_ultra.l0,cbind(dots["log.k"],dots["log.k"]),fsize=c(0.5,1,1), colors = orpal(200),split=c(0.9,0.1),lwd=2,pts=FALSE,labels=FALSE,mar=c(0.1,0.1,0.1,0.1)) phylo.heatmap(bivtree_ultra.l0,cbind(dots["log.Linf"],dots["log.Linf"]),fsize=c(0.5,1,1), colors = bluepal(200),split=c(0.9,0.1),lwd=2,pts=FALSE,labels=FALSE,mar=c(0.1,0.1,0.1,0.1)) phylo.heatmap(bivtree_ultra.l0,cbind(dots["log.lifespan"],dots["log.lifespan"]),fsize=c(0.5,1,1), colors = grpal(200),split=c(0.9,0.1),lwd=2,pts=FALSE,labels=FALSE,mar=c(0.1,0.1,0.1,0.1)) #K and lambda, means method #lambda: log.k.lambda <- phylosig(tree=bivtree_ultra.r,x=100*datp.phy.means$log.k,method="lambda",test=TRUE) log.k.max.lambda <- phylosig(tree=bivtree_ultra.r,x=log10(datp.phy.means$k.max),method="lambda",test=TRUE) log.Linf.lambda <- phylosig(tree=drop.tip(bivtree_ultra.r,bivtree_ultra.r$tip.label[is.na(datp.phy.means$Linf)]), x=datp.phy.means$log.Linf[!is.na(datp.phy.means$log.Linf)],method="lambda",test=TRUE) log.lifespan.lambda <- phylosig(tree=drop.tip(bivtree_ultra.r,bivtree_ultra.r$tip.label[is.na(datp.phy.means$log.lifespan)]), x=100*datp.phy.means$log.lifespan[!is.na(datp.phy.means$log.lifespan)],method="lambda",test=TRUE) lat.lambda <- phylosig(tree=bivtree_ultra.r,x=datp.phy.means$lat,method="lambda",test=TRUE,nsim=1000) #Blomberg's K: log.k.K <- phylosig(tree=bivtree_ultra.r,x=datp.phy.means$log.k,method="K",test=TRUE,nsim=1000) log.k.max.K <- phylosig(tree=bivtree_ultra.r,x=log10(datp.phy.means$k.max),method="K",test=TRUE,nsim=1000) log.Linf.K <- phylosig(tree=drop.tip(bivtree_ultra.r,bivtree_ultra.r$tip.label[is.na(datp.phy.means$Linf)]), x=datp.phy.means$log.Linf[!is.na(datp.phy.means$log.Linf)],method="K",nsim=1000,test=TRUE) log.lifespan.K <- phylosig(tree=drop.tip(bivtree_ultra.r,bivtree_ultra.r$tip.label[is.na(datp.phy.means$log.lifespan)]), x=datp.phy.means$log.lifespan[!is.na(datp.phy.means$log.lifespan)],method="K",nsim=1000,test=TRUE) lat.K <- phylosig(tree=bivtree_ultra.r,x=datp.phy.means$lat,method="K",nsim=1000,test=TRUE) # Using the Bieler et al. 2014 tree --------------------------------------- bieler <- read.nexus(file="Results/Reviews, round 1/Bieler et al 2014/Bivalve.dated.tre") # PGLS -------------------------------------------------------------------- #PGLS using Ives implementation (http://blog.phytools.org/2012/07/pgls-regression-with-sampling-error.html) #datp.sst.phy: subset of datp that has k, the species in the phylogeny, and sst datp.sst.phy <- datp[gsub(" ","_",datp$name) %in% bivtree$tip.label&!is.na(datp$sst10p)&!is.na(datp$k),] datp.sst.phy$name <- gsub(" ","_",datp.sst.phy$name) #datp.chlor_a.phy: subset of datp that has k, the species in the phylogeny, and chlor_a datp.chlor_a.phy <- datp[gsub(" ","_",datp$name) %in% bivtree$tip.label&!is.na(datp$chlor_a10p)&!is.na(datp$k),] datp.chlor_a.phy$name <- gsub(" ","_",datp.chlor_a.phy$name) #for the phylogeny, use those tips with spp with env data bivtree.sst.pgls <- drop.tip(bivtree.trimmed, bivtree.trimmed$tip.label[!(bivtree.trimmed$tip.label %in% datp.sst.phy$name)]) bivtree.chlor_a.pgls <- drop.tip(bivtree.trimmed, bivtree.trimmed$tip.label[!(bivtree.trimmed$tip.label %in% datp.chlor_a.phy$name)]) #vectors for analysis #temp tmin.vect <- datp.sst.phy$sst10p; names(tmin.vect) <- datp.sst.phy$name tmax.vect <- datp.sst.phy$sst90p; names(tmax.vect) <- datp.sst.phy$name k.t.vect <- datp.sst.phy$k; names(k.t.vect) <- datp.sst.phy$name #chlor cmin.vect <- datp.chlor_a.phy$chlor_a10p; names(cmin.vect) <- datp.chlor_a.phy$name cmax.vect <- datp.chlor_a.phy$chlor_a90p; names(cmax.vect) <- datp.chlor_a.phy$name k.c.vect <- datp.chlor_a.phy$k; names(k.c.vect) <- datp.chlor_a.phy$name #the actual PGLS: par(mfrow=c(2,2),mar=c(5,5,1,1)) #tmin pgls.tmin <- pgls.Ives(bivtree.sst.pgls,tmin.vect,log(k.t.vect)) for(i in 1:99){ replacement<-pgls.Ives(bivtree.sst.pgls,tmin.vect,log(k.t.vect)) if(replacement$logL > pgls.tmin$logL){ pgls.tmin <- replacement} replacement <- numeric() print(i)} #tmin, null hypothesis (no relationship) pgls.tmin.null <- pgls.Ives(bivtree.sst.pgls,tmin.vect,log(k.t.vect),fixed.b1=0) for(i in 1:99){ replacement<-pgls.Ives(bivtree.sst.pgls,tmin.vect,log(k.t.vect),fixed.b1 = 0) if(replacement$logL > pgls.tmin.null$logL){ pgls.tmin.null <- replacement} replacement <- numeric() print(i)} #tmin, plotting plot(log(k.t.vect) ~ tmin.vect,xlab = "Minimum temperature (°C)", ylab = "ln ( Growth coefficient )",type="n",xlim=c(0,34),cex.lab=1.2) points(tmin.vect,log(k.t.vect),cex=0.9,col=ifelse(abs(datp$lat)<30, "darkorange", alpha("blue",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)),pch=21) abline(a=pgls.tmin$beta[1],b=pgls.tmin$beta[2],col="black",lwd=2) aicw(c(AICc(pgls.tmin),AICc(pgls.tmin.null))) #tmax pgls.tmax <- pgls.Ives(bivtree.sst.pgls,tmax.vect,log(k.t.vect)) for(i in 1:99){ replacement<-pgls.Ives(bivtree.sst.pgls,tmax.vect,log(k.t.vect)) if(replacement$logL > pgls.tmax$logL){ pgls.tmax <- replacement} replacement <- numeric() print(i)} #tmax, null hypothesis (no relationship) pgls.tmax.null <- pgls.Ives(bivtree.sst.pgls,tmax.vect,log(k.t.vect),fixed.b1=0) for(i in 1:99){ replacement<-pgls.Ives(bivtree.sst.pgls,tmax.vect,log(k.t.vect),fixed.b1 = 0) if(replacement$logL > pgls.tmax.null$logL){ pgls.tmax.null <- replacement} replacement <- numeric() print(i)} #tmax, plotting plot(log(k.t.vect) ~ tmax.vect,xlab = "Maximum temperature", ylab = "",type="n",xlim=c(0,34),cex.lab=1.2) points(tmax.vect,log(k.t.vect),cex=0.9,col=ifelse(abs(datp$lat)<30, "darkorange", alpha("blue",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)),pch=21) abline(a=pgls.tmax$beta[1],b=pgls.tmax$beta[2],col="black",lwd=2) aicw(c(AICc(pgls.tmax),AICc(pgls.tmax.null))) #cmin pgls.cmin <- pgls.Ives(bivtree.chlor_a.pgls,cmin.vect,k.c.vect) for(i in 1:99){ replacement<-pgls.Ives(bivtree.chlor_a.pgls,cmin.vect,k.c.vect) if(replacement$logL > pgls.cmin$logL){ pgls.cmin <- replacement} replacement <- numeric() print(i)} #cmin, null hypothesis (no relationship) pgls.cmin.null <- pgls.Ives(bivtree.chlor_a.pgls,cmin.vect,k.c.vect,fixed.b1=0) for(i in 1:99){ replacement<-pgls.Ives(bivtree.chlor_a.pgls,cmin.vect,k.c.vect,fixed.b1 = 0) if(replacement$logL > pgls.cmin.null$logL){ pgls.cmin.null <- replacement} replacement <- numeric() print(i)} #cmin, plotting plot(k.c.vect ~ cmin.vect,xlab=expression(paste("Minimum chlorophyll (mg/",m^3,")")), ylab = "Growth coefficient",type="n",cex.lab=1.2) points(cmin.vect,k.c.vect,cex=0.9,col=ifelse(abs(datp$lat)<30, "darkorange", alpha("blue",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)),pch=21) abline(a=pgls.cmin$beta[1],b=pgls.cmin$beta[2],col="black",lwd=2) aicw(c(AICc(pgls.cmin),AICc(pgls.cmin.null))) #cmax pgls.cmax <- pgls.Ives(bivtree.chlor_a.pgls,cmax.vect,k.c.vect) for(i in 1:99){ replacement<-pgls.Ives(bivtree.chlor_a.pgls,cmax.vect,k.c.vect) if(replacement$logL > pgls.cmax$logL){ pgls.cmax <- replacement} replacement <- numeric() print(i)} #cmax, null hypothesis (no relationship) pgls.cmax.null <- pgls.Ives(bivtree.chlor_a.pgls,cmax.vect,k.c.vect,fixed.b1=0) for(i in 1:99){ replacement<-pgls.Ives(bivtree.chlor_a.pgls,cmax.vect,k.c.vect,fixed.b1 = 0) if(replacement$logL > pgls.cmax.null$logL){ pgls.cmax.null <- replacement} replacement <- numeric() print(i)} #cmax, plotting plot(k.c.vect ~ cmax.vect,xlab = "Maximum chlorophyll", ylab = "",type="n",cex.lab=1.2) points(cmax.vect,k.c.vect,cex=0.9,col=ifelse(abs(datp$lat)<30, "darkorange", alpha("blue",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)),pch=21) abline(a=pgls.cmax$beta[1],b=pgls.cmax$beta[2],col="black",lwd=2) aicw(c(AICc(pgls.cmax),AICc(pgls.cmax.null))) # PGLS multiple predictors ------------------------------------------------ #both tmin and cmin #Liam Revell's pgls.Ives doesn't support multiple regression #it uses the data frame datp.multi and a subsetted phylogeny #subsetting the phylogeny: #phy.multi drops every species that's isn't in both datp.sst.phy and datp.chor_a.phy #non-ultrametric phy.multi <- drop.tip(bivtree.trimmed, bivtree.trimmed$tip.label[(!(bivtree.trimmed$tip.label %in% datp.sst.phy$name)) | (!(bivtree.trimmed$tip.label %in% datp.chlor_a.phy$name))]) #ultrametric phy.ultra.multi <- drop.tip(bivtree_ultra.l0, bivtree_ultra.l0$tip.label[(!(bivtree_ultra.l0$tip.label %in% datp.sst.phy$name)) | (!(bivtree_ultra.l0$tip.label %in% datp.chlor_a.phy$name))]); class(phy.ultra.multi) <- "phylo" #building the data frame: #setting up the vectors: multi.name <- character() multi.log.k <- numeric() multi.sst10p <- numeric() multi.sst90p <- numeric() multi.chlor_a10p <- numeric() multi.chlor_a90p <- numeric() #populating the vectors: #this goes through every species in phy.multi and averages all available chlor, temp, and k data for it for(i in 1:length(phy.multi$tip.label)){ multi.name <- c(multi.name,phy.multi$tip.label[i]) multi.log.k <- c(multi.log.k,mean(log(datp.phy$k)[gsub(" ","_",datp.phy$name)==phy.multi$tip.label[i]],na.rm=TRUE)) multi.sst10p <- c(multi.sst10p,mean(datp.phy$sst10p[gsub(" ","_",datp.phy$name)==phy.multi$tip.label[i]],na.rm=TRUE)) multi.sst90p <- c(multi.sst90p,mean(datp.phy$sst90p[gsub(" ","_",datp.phy$name)==phy.multi$tip.label[i]],na.rm=TRUE)) #chlor multi.chlor_a10p <- c(multi.chlor_a10p,mean(datp.phy$chlor_a10p[gsub(" ","_",datp.phy$name)==phy.multi$tip.label[i]],na.rm=TRUE)) multi.chlor_a90p <- c(multi.chlor_a90p,mean(datp.phy$chlor_a90p[gsub(" ","_",datp.phy$name)==phy.multi$tip.label[i]],na.rm=TRUE)) } #making the data frame out of the vectors datp.multi <- data.frame(name=multi.name,k=multi.log.k,sst10p=multi.sst10p,sst90p=multi.sst90p,chlor_a10p=multi.chlor_a10p,chlor_a90p=multi.chlor_a90p) rownames(datp.multi) <- multi.name #----PGLS with caper install.packages("caper", repos="http://R-Forge.R-project.org");library(caper) phy.ultra.multi$node.label <- NULL ucdat <- comparative.data(phy=phy.ultra.multi,data=datp.multi,names.col = "name",vcv.dim = 2,warn.dropped = TRUE) upgls.sst <- pgls(k~sst10p,data=ucdat, lambda = "ML") upgls.sst.hi <- pgls(k~sst90p,data=ucdat, lambda = "ML") upgls.chlor <- pgls(k~chlor_a10p,data=ucdat, lambda = "ML") upgls.chlor.hi <- pgls(k~chlor_a90p,data=ucdat, lambda = "ML") upgls.multi <- pgls(k~sst10p+chlor_a10p,data=ucdat, lambda = "ML") upgls.sst.lohi <- pgls(k~sst10p+sst90p,data=ucdat, lambda = "ML") upgls.chlor.lohi <- pgls(k~chlor_a10p+chlor_a90p,data=ucdat, lambda = "ML") upgls.multi.lohi <- pgls(k~sst10p+sst90p+chlor_a10p+chlor_a90p,data=ucdat, lambda = "ML") #----Same analysis, all lambda set to 0 (equivalent to GLS) upgls.sst.0 <- pgls(k~sst10p,data=ucdat, lambda = 0.0001) upgls.sst.hi.0 <- pgls(k~sst90p,data=ucdat, lambda = 0.0001) upgls.chlor.0 <- pgls(k~chlor_a10p,data=ucdat, lambda = 0.0001) upgls.chlor.hi.0 <- pgls(k~chlor_a90p,data=ucdat, lambda = 0.0001) upgls.multi.0 <- pgls(k~sst10p+chlor_a10p,data=ucdat, lambda = 0.0001) upgls.sst.lohi.0 <- pgls(k~sst10p+sst90p,data=ucdat, lambda = 0.0001) upgls.chlor.lohi.0 <- pgls(k~chlor_a10p+chlor_a90p,data=ucdat, lambda = 0.0001) upgls.multi.lohi.0 <- pgls(k~sst10p+sst90p+chlor_a10p+chlor_a90p,data=ucdat, lambda = 0.0001) #and lastly, model selection + comparison #pgls aic.w(c(upgls.sst$aic, upgls.sst.hi$aic, upgls.chlor$aic, upgls.chlor.hi$aic, upgls.multi$aic, upgls.sst.lohi$aic, upgls.chlor.lohi$aic, upgls.multi.lohi$aic)) # Distance matrices ------------------------------------------------------- install.packages('geosphere');library(geosphere) #geographic distance dist.geo <- distm(cbind(datp.phy$lon,datp.phy$lat)[order(datp.phy$name),]) #phylogenetic distance matrix #mol.dist.1 is a nxn matrix, where n is the number of tips (135) dist.mol.1 <- cophenetic.phylo(bivtree.trimmed) dist.mol.1.sorted <- dist.mol.1[sort(labels(dist.mol.1)[[1]]),sort(labels(dist.mol.1)[[1]])] #mol.dist.2 is an mxm matrix, where m is the number of observations (692) #replicates each row N times, where N is the sample size for that species dist.mol.2 <- apply(apply(dist.mol.1.sorted, 2, function(c) rep(c,datp.phy.means$k.N[order(datp.phy.means$name)])), 1, function(c) rep(c,datp.phy.means$k.N[order(datp.phy.means$name)])) #sst10p distance: dist.sst10p <- dist(datp.phy$sst10p[order(datp.phy$name)]) #sst90p distance: dist.sst90p <- dist(datp.phy$sst90p[order(datp.phy$name)]) #chlor10p distance: dist.chlor10p <- dist(datp.phy$chlor_a10p[order(datp.phy$name)]) #chlor90p distance: dist.chlor90p <- dist(datp.phy$chlor_a90p[order(datp.phy$name)]) #k distance: dist.k <- dist(datp.phy$k[order(datp.phy$name)]) dist.ln.k <- dist(log(datp.phy$k)[order(datp.phy$name)]) #plotting distance matrices against each other #mol and geo plot(dist.geo[lower.tri(dist.geo)],dist.mol.2[lower.tri(dist.mol.2)], pch=21,bg=alpha("darkorange",0.01),xlab="Distance (m)",ylab="Phylogenetic distance, expected substitutions per site",col=alpha("black",0.01)) #min and max temp plot(dist.sst10p[lower.tri(dist.sst10p)],dist.sst90p[lower.tri(dist.sst90p)], pch=21,bg=alpha("darkorange",0.01),xlab="Minimum temperature distance (°C)",ylab="Maximum temperature distance (°C)",col=alpha("black",0.01)) #min temp and ln(k) plot(dist.sst10p[lower.tri(dist.sst10p)],dist.ln.k[lower.tri(dist.ln.k)], pch=21,bg=alpha("darkorange",0.01),xlab="Minimum temperature distance (°C)",ylab="Distance in ln(k)",col=alpha("black",0.01)) #phy distance and ln(k) distance plot(dist.mol.2[lower.tri(dist.mol.2)],dist.ln.k[lower.tri(dist.ln.k)], pch=21,bg=alpha("darkorange",0.01),xlab="Phylogenetic distance",ylab="Distance in ln(k)",col=alpha("black",0.01)) #Mantel tests mantel.geo.phy <- mantel(dist.mol.2,dist.geo) mantel.geo.k <- mantel(dist.geo,dist.ln.k) mantel.phy.k <- mantel(dist.mol.2,dist.ln.k) mantel.sst10.k <- mantel(dist.sst10p,dist.ln.k,na.rm = TRUE) mantel.chlor10.k <- mantel(dist.chlor10p,dist.k,na.rm = TRUE) mantel.sst.phy <- mantel(dist.mol.2,as.dist(dist.sst10p),na.rm = TRUE) #multivariate phy.sst.k <- lm(dist.ln.k ~ dist.sst10p+as.dist(dist.mol.2)) # Bonus tests ------------------------------------------------------------- #BONUS TEST OF DEPTH AND k plot(log10(datp$mindepth),log10(datp$k),pch=19,col=ifelse(abs(datp$lat)<=30,"red",ifelse(abs(datp$lat)>50,"blue","darkorange"))) par(mfrow=c(3,1)) plot(datp$mindepth[abs(datp$lat)<=30],datp$k[abs(datp$lat)<=30],log='xy',pch=19,col="red",xlab="Minimum depth, m",ylab="k",main="Depth and k, <=30°") plot(datp$mindepth[abs(datp$lat)>30 & abs(datp$lat)<=50],datp$k[abs(datp$lat)>30 & abs(datp$lat)<=50],log='xy',ylab="k",pch=19,col="darkorange",xlab="Minimum depth, m",main="Depth and k, 30-50°") plot(datp$mindepth[abs(datp$lat)>50],datp$k[abs(datp$lat)>50],log='y',pch=19,col="blue",xlab="Minimum depth, m",ylab="k",main="Depth and k, >50°") #BONUS TEST OF K*LINF datp$kLinf <- (log(datp$k)+abs(min(log(datp$k),na.rm = TRUE)))*(log(datp$Linf)+abs(min(log(datp$Linf),na.rm = TRUE))) par(mfrow=c(2,2),mar=c(5,5,1,1)) #sst10 plot(datp$sst10p,datp$kLinf,xlab="Minimum temperature (°C)",ylab="Growth rate",type = "n",xlim=c(0,34),cex.lab=1.2) points(datp$sst10p,datp$kLinf,cex=.9,col=ifelse(abs(datp$lat)<30, alpha("darkorange",0.75), alpha("blue",0.3)),pch=19) abline(rq(datp$kLinf ~ datp$sst10p), col="black",lwd=2) #sst90 plot(datp$sst90p,datp$kLinf,xlab="Maximum temperature (°C)",ylab="Growth rate",type = "n",xlim=c(0,34),cex.lab=1.2) points(datp$sst90p,datp$kLinf,cex=.9,col=ifelse(abs(datp$lat)<30, alpha("darkorange",0.65), alpha("blue",0.3)),pch=19) abline(rq(datp$kLinf ~ datp$sst90p), col="black",lwd=2) #chlor10 plot(log(datp$chlor_a10p),datp$kLinf,xlab="Minimum chlorophyll",ylab="Growth rate",type = "n",xlim=c(-1.7,1.2),cex.lab=1.2) points(log(datp$chlor_a10p),datp$kLinf,col=ifelse(abs(datp$lat)<30, alpha("darkorange",0.75), alpha("blue",0.3)),pch=19) abline(rq(datp$kLinf ~ log(datp$chlor_a10p)), col="black",lwd=2) #chlor90 plot(log(datp$chlor_a90p),datp$kLinf,xlab="Maximum chlorophyll",ylab="Growth rate",type = "n",xlim=c(-1.7,1.2),cex.lab=1.2) points(log(datp$chlor_a90p),datp$kLinf,col=ifelse(abs(datp$lat)<30, alpha("darkorange",0.75), alpha("blue",0.3)),pch=19) #correlations summary(lm(datp$kLinf~datp$sst10p)) summary(lm(datp$kLinf~datp$sst90p)) summary(lm(datp$kLinf~log(datp$chlor_a10p))) summary(lm(datp$kLinf~log(datp$chlor_a90p))) #BONUS TEST OF LOGGING FOR CHLOROPHYLL AND K par(mfrow=c(2,2),mar=c(5,5,1,1)) #ln chlor_a10p, k plot(log(datp$chlor_a10p),datp$k,xlab="ln ( Minimum chlorophyll )",ylab="Growth coefficient",type = "n",cex.lab=1.2) points(log(datp$chlor_a10p),datp$k,col=ifelse(abs(datp$lat)<30, "darkorange", alpha("blue",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)),pch=21) abline(rq(datp$k ~ log(datp$chlor_a10p)), col="black",lwd=2) summary(lm(datp$k~log(datp$chlor_a10p))) #ln chlor_a90p, k plot(log(datp$chlor_a90p),datp$k,xlab="ln ( Maximum chlorophyll )",ylab="Growth coefficient",type = "n",cex.lab=1.2) points(log(datp$chlor_a90p),datp$k,col=ifelse(abs(datp$lat)<30, "darkorange", alpha("blue",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)),pch=21) abline(rq(datp$k ~ log(datp$chlor_a90p)), col="black",lwd=2) summary(lm(datp$k~log(datp$chlor_a90p))) #ln chlor_a10p, ln k plot(log(datp$chlor_a10p),log(datp$k),xlab="ln ( Minimum chlorophyll )",ylab="ln ( Growth coefficient )",type = "n",cex.lab=1.2) points(log(datp$chlor_a10p),log(datp$k),col=ifelse(abs(datp$lat)<30, "darkorange", alpha("blue",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)),pch=21) abline(rq(log(datp$k) ~ log(datp$chlor_a10p)), col="black",lwd=2) summary(lm(log(datp$k)~log(datp$chlor_a10p))) #ln chlor_a90p, ln k plot(log(datp$chlor_a90p),log(datp$k),xlab="ln ( Maximum chlorophyll )",ylab="ln ( Growth coefficient )",type = "n",cex.lab=1.2) points(log(datp$chlor_a90p),log(datp$k),col=ifelse(abs(datp$lat)<30, "darkorange", alpha("blue",0.65)), bg=ifelse(abs(datp$lat)<30, alpha("darkorange",0.45), alpha("blue",0.3)),pch=21) abline(rq(log(datp$k) ~ log(datp$chlor_a90p)), col="black",lwd=2) summary(lm(log(datp$k)~log(datp$chlor_a90p)))