# Setup and analyses, pt. 1: dataset -------------------------------------- library(GeoRange);library(devtools);library(nlme);devtools::install_github("iobis/obistools") #OBIS occurrences to dataset occurrences_raw <- read.csv("occurrences_accessed_August_2019.csv") #remove occurrences without species ID and bourgueticrinids (stalked comatulids) (removes ~16% of the occurrences) occurrences <- occurrences_raw[occurrences_raw$species %in% dat$species,] occurrences_to_species <- function(lats,longs,maxdepths=NA,mindepths=NA,sp){ out <- data.frame("species"=unique(sp),"n_obis"=NA,"latitude_southernmost"=NA,"latitude_northernmost"=NA, "latitude_midpoint"=NA,"latitude_highest"=NA,"latitude_lowest"=NA,"latitude_range"=NA, "western_hemisphere"=NA,"eastern_hemisphere"=NA,"minimum_depth_obis"=NA,"maximum_depth_obis"=NA, "randlat"=NA,"randlong"=NA,"randtemp"=NA) for(i in 1:length(unique(sp))){ #print(paste(i,":",unique(sp)[i])) spi <- sp==unique(sp)[i] #this boolean vector indexes the species out$n_obis[i] <- sum(spi) out$latitude_northernmost[i] <- max(lats[spi]) out$latitude_southernmost[i] <- min(lats[spi]) out$latitude_midpoint[i] <- mean(c(max(lats[spi]),min(lats[spi]))) #print(paste(out$latitude_northernmost[i],out$latitude_southernmost[i],out$latitude_midpoint[i])) out$latitude_highest[i] <- max(abs(lats[spi])) out$latitude_lowest[i] <- ifelse(sign(min(lats[spi]))!=sign(max(lats[spi])),0, ifelse(min(lats[spi])<0,max(lats[spi]),min(lats[spi]))) out$latitude_range[i] <- max(lats[spi]) - min(lats[spi]) out$western_hemisphere[i] <- any(longs[spi]<0) out$eastern_hemisphere[i] <- any(longs[spi]>0) if(!all(is.na(mindepths[spi]))){out$minimum_depth_obis[i] <- min(c(maxdepths[spi],mindepths[spi]),na.rm=TRUE) out$maximum_depth_obis[i] <- max(c(maxdepths[spi],mindepths[spi]),na.rm=TRUE) #for comparison of latitude and temperature lookups <- obistools::lookup_xy(data.frame("decimalLatitude"=lats[spi],"decimalLongitude"=longs[spi])) if(any(lookups$bathymetry>0 & lookups$bathymetry<100)){ randindex <- sample(which(lookups$bathymetry>0 & lookups$bathymetry<100),1) out$randlat[i] <- lats[spi][randindex] out$randlong[i] <- longs[spi][randindex] out$randtemp[i] <- lookups$sstemperature[randindex]} } };return(out)} # Setup and analyses, pt. 2: pre-phylogenetic -------------------------------------- #packages and dataset library(quantreg) dat <- read.csv("main_dataset.csv") #quantile regression, qreg.5 <- rq(dat$arm_number~ #5th conditional quantile abs(dat$latitude_midpoint),tau = 0.05) qreg.5.null <- rq(dat$arm_number~1,tau = 0.05) logLik(qreg.5) logLik(qreg.5.null) AIC(qreg.5) AIC(qreg.5.null) qreg.10 <- rq(dat$arm_number~ #10th conditional quantile abs(dat$latitude_midpoint),tau = 0.1) qreg.10.null <- rq(dat$arm_number~1,tau = 0.1) logLik(qreg.10) logLik(qreg.10.null) AIC(qreg.10) AIC(qreg.10.null) qreg.90 <- rq(dat$arm_number~ #90th conditional quantile abs(dat$latitude_midpoint),tau = 0.9) qreg.90.null <- rq(dat$arm_number~1,tau = 0.9) logLik(qreg.90) logLik(qreg.90.null) AIC(qreg.90) AIC(qreg.90.null) qreg.95 <- rq(dat$arm_number~ #95th conditional quantile abs(dat$latitude_midpoint),tau = 0.95) qreg.95.null <- rq(dat$arm_number~1,tau = 0.95) logLik(qreg.95) logLik(qreg.95.null) AIC(qreg.95) AIC(qreg.95.null) #does the LANG (latitudinal arm number gradient) still show up if we don't use latitudinal midpoints? #sampling from uniform distribution bounded by range limits returnrandlats <- function(lowlats,highlats){ out <- c();for(i in 1:length(highlats)){out <- c(out,runif(n=1,min=lowlats[i],max=highlats[i]))};return(out)} rhos <- c();ps <- c();for(i in 1:1000){ test <- cor.test(abs(returnrandlats(lowlats = dat$latitude_southernmost,dat$latitude_northernmost)),dat$arm_number,method="spearman") rhos <- c(rhos,test$estimate);ps <- c(ps,test$p.value)} #is the LANG a "result" of the LDG? roundUp <- function(x,to=10){ to*(x%/%to + as.logical(x%%to))} dat$latbin <- roundUp(abs(dat$latitude_midpoint)) #latitudinal bins rhos <- c() ps <- c() q95slopes <- c() dAICs <- c() for(i in 1:10000){ print(i) dat.ss <- dat[c(sample(which(dat$latbin==10),13),sample(which(dat$latbin==20),13), sample(which(dat$latbin==30),13),sample(which(dat$latbin==40),13), sample(which(dat$latbin==50),13),sample(which(dat$latbin==60),13), sample(which(dat$latbin==70),13)),] #tst <- cor.test(abs(dat.ss$latitude_midpoint),dat.ss$arm_number) #rhos <- c(rhos,tst$estimate) #ps <- c(ps,x$p.value) qr <- rq(dat.ss$arm_number~abs(dat.ss$latitude_midpoint),tau = 0.95) qr.null <- rq(dat.ss$arm_number~1,tau = 0.95) q95slopes <- c(q95slopes,qr$coefficients[2]) dAICs <- c(dAICs,AIC(qr.null)-AIC(qr)) } hist(rhos,xlim=c(-0.5,0.5)) hist(ps) #temperature vs latitude dat_sort <- dat[order(dat$species),] rhos_temp <- c();rhos_lat <- c();rhodiffs <- c() for(i in 1:250){print(i); bysp <- occurrences_to_species(occurrences$decimalLatitude,occurrences$decimalLongitude,sp=occurrences$species, maxdepths = occurrences$maximumDepthInMeters,mindepths = occurrences$minimumDepthInMeters) bysp <- bysp[order(bysp$species),] #record difference in absolute rho between latitude as predictor and temp as predictor #plot(abs(bysp$randlat),dat_sort$arm_number,pch=21,bg=alpha("black",0.2),xlim=c(80,0),xlab="Absolute latitude",ylab="Arm number") #plot(bysp$randtemp,dat_sort$arm_number,pch=21,bg=alpha("black",0.2),xlab="SST",ylab="Arm number") rhos_temp <- c(rhos_temp,as.numeric(cor.test(bysp$randtemp,dat_sort$arm_number,method="spearman")$estimate)) rhos_lat <- c(rhos_lat,as.numeric(abs(cor.test(abs(bysp$randlat),dat_sort$arm_number,method="spearman")$estimate))) rhodiffs <- c(rhodiffs,as.numeric(abs(cor.test(abs(bysp$randlat),dat_sort$arm_number,method="spearman")$estimate))- as.numeric(cor.test(bysp$randtemp,dat_sort$arm_number,method="spearman")$estimate))} # Setup and analyses, pt. 3: phylogenies ---------------------------------- library(ape) library(phytools) read.process.tree <- function(path){ x <- read.tree(path) x$tip.label[c(11,15,17,26,30,42,54,55,58,60,68,77,112,144,154)] <- c("Psathyrometra_bigradata","Antedon_iris","Antedon_parviflora", "Decametra_laevipinna", "Decametra_alaudae","Tropiometra_carinata","Asterometra_cristata","Nanometra_johnstoni", "Isometra_flavescens","Eometra_weddelli","Eudiocrinus_pinnatus","Euantedon_exquisita", "Clarkcomanthus_comanthipinna","Thalassometra_margaritifera","Pentametrocrinus_semperi") x <- drop.tip(x,x$tip.label[!gsub('_',' ',x$tip.label) %in% dat$species]) x$node.label <- NULL #not interesting - support values #drop Phanogenia typica bc no arm data return(drop.tip(x,"Phanogenia_typica")) } ftime.lambda0 <- read.process.tree("Featherstar chronogram lambda 0.tre") ftime.lambda100 <- read.process.tree("Featherstar chronogram lambda 100.tre") ftime.lambda10000 <- read.process.tree("Featherstar chronogram lambda 10000.tre") #phylogenetic signal armno <- dat$arm_number names(armno) <- gsub(' ','_',dat$species) inphy <- dat$species %in% gsub('_',' ',ftime.lambda0$tip.label) armno <- armno[inphy] armno <-armno[match(ftime.lambda0$tip.label,names(armno))] #fixing order abslat <- abs(dat$latitude_midpoint);names(abslat) <- gsub(' ','_',dat$species);abslat <- abslat[inphy] abslat <- abslat[match(ftime.lambda0$tip.label,names(abslat))] phylosig(tree=ftime.lambda0,x=armno[inphy],method="K",test=TRUE) #diagnostics: normality and homoscedasticity m1 <- gls(armno~abslat,correlation=corBrownian(1,ftime.lambda0)) plot(ecdf(m1$residuals)) m2 <- gls(log(armno)~abslat,correlation=corBrownian(1,ftime.lambda0)) plot(ecdf(m2$residuals)) m3 <- gls(log(log(armno))~abslat,correlation=corBrownian(1,ftime.lambda0)) plot(ecdf(m1$residuals)) #normality ks.test(m1$residuals,y="pnorm") ks.test(m2$residuals,y="pnorm") ks.test(m3$residuals,y="pnorm") #heteroskedasticity: Bruesch-Pagan with Koenker correction summary(lm(m1$residuals^2 ~ abslat))$r.squared*length(armno) < qchisq(0.95,df=1) summary(lm(m2$residuals^2 ~ abslat))$r.squared*length(armno) < qchisq(0.95,df=1) summary(lm(m3$residuals^2 ~ abslat))$r.squared*length(armno) < qchisq(0.95,df=1) #phylogenetic permutations phylo.permute <- function(tre,vec,meth="K",margin=0.01,stucklimit=1000){ log <- capture.output({ physig.empirical <- as.numeric(phylosig(tre,vec,method=meth)[1]) #empirical phylogenetic signal #print(paste("Empirical phylogenetic signal:",physig.empirical)) vec.old <- sample(vec) #shuffled vector physig.old <- as.numeric(phylosig(tre,vec.old,method=meth))[1] #phylogenetic signal of shuffled vector #print(paste("Permuted phylogenetic signal:",physig.old)) #initiate the main loop - break on condition that physig.old is sufficiently close to physig.empirical stuckcounter <- 0 while(abs(physig.old - physig.empirical) > margin){ if(stuckcounter>=stucklimit){ #if the algorithm got stuck print("stuck!") stuckcounter <- 0 vec.old <- sample(vec) #shuffled vector physig.old <- as.numeric(phylosig(tre,vec.old,method=meth))[1]} indices <- sample(seq(length(vec)),2) vec.new <- replace(vec.old,indices,vec.old[rev(indices)]) #swap pair of trait values physig.new <- as.numeric(phylosig(tre,vec.new,method=meth)[1]) #if this configuration more closely approximates the physig of the empirical... if(abs(physig.new - physig.empirical) <= abs(physig.old - physig.empirical)){ vec.old <- vec.new #replace the old value with the new one physig.old <- physig.new #print(paste("Permuted phylogenetic signal:",physig.new)) }}}) return(vec.old)} #empirical statistics rho.empirical <- cor.test(abslat,armno,method="spearman")$estimate q95slope.empirical <- rq(armno~abslat,tau=0.95)$coefficients[2] q90slope.empirical <- rq(armno~abslat,tau=0.90)$coefficients[2] #distributions, Blomberg's K, shuffling both predictor and response variable rhos <- c(); q90slopes <- c();q95slopes <- c() for(i in 1:1000){ x <- phylo.permute(ftime.lambda0,unname(abslat),margin=0.01) y <- phylo.permute(ftime.lambda0,unname(armno),margin=0.01) rhos <- c(rhos,cor.test(x,y,method="spearman")$estimate) q90slopes <- c(q90slopes,rq(y~x,tau=0.90)$coefficients[2]) q95slopes <- c(q95slopes,rq(y~x,tau=0.95)$coefficients[2]) } #distributions, Blomberg's K, shuffling just the predictor rhos.predictor <- c(); q90slopes.predictor <- c();q95slopes.predictor <- c() for(i in 1:1000){ x <- phylo.permute(ftime.lambda0,unname(abslat),margin=0.01) y <- armno rhos.predictor <- c(rhos.predictor,cor.test(x,y,method="spearman")$estimate) q90slopes.predictor <- c(q90slopes.predictor,rq(y~x,tau=0.90)$coefficients[2]) q95slopes.predictor <- c(q95slopes.predictor,rq(y~x,tau=0.95)$coefficients[2]) } #distributions, Blomberg's K, shuffling just the response variable rhos.response <- c(); q90slopes.response <- c();q95slopes.response <- c() for(i in 1:1000){ x <- abslat y <- phylo.permute(ftime.lambda0,unname(armno),margin=0.01) rhos.response <- c(rhos.response,cor.test(x,y,method="spearman")$estimate) q90slopes.response <- c(q90slopes.response,rq(y~x,tau=0.90)$coefficients[2]) q95slopes.response <- c(q95slopes.response,rq(y~x,tau=0.95)$coefficients[2]) } #distributions, ordinary permutations rhos.ordinary <- replicate(10000,cor.test(abslat,sample(armno),method = "spearman")$estimate) q90s.ordinary <- replicate(10000,rq(armno~sample(abslat),tau=0.90)$coefficients[2]) q95s.ordinary <- replicate(10000,rq(armno~sample(abslat),tau=0.95)$coefficients[2]) #distributions, Pagel's lambda, shuffling both predictor and response variable rhos.lambda <- c(); q90slopes.lambda <- c();q95slopes.lambda <- c() for(i in 1:1000){ x <- phylo.permute(ftime.lambda0,unname(abslat),margin=0.01,meth="lambda") y <- phylo.permute(ftime.lambda0,unname(armno),margin=0.01,meth="lambda") rhos <- c(rhos,cor.test(x,y,method="spearman")$estimate) q90slopes <- c(q90slopes,rq(y~x,tau=0.90)$coefficients[2]) q95slopes <- c(q95slopes,rq(y~x,tau=0.95)$coefficients[2]) } #Felsenstein's worst case tr <- read.tree('Felsenstein worst case.tre') x.fels <- fastBM(tr); y.fels <- fastBM(tr) rsquareds.fels <- c() for(i in 1:1000){ x.fels.perm <- phylo.permute(tr,unname(x.fels),margin=0.01) y.fels.perm <- phylo.permute(tr,unname(y.fels),margin=0.01) rsquareds.fels <- c(rsquareds.fels,summary(lm(y.fels.perm~x.fels.perm))$r.squared) } rsquareds.fels.ordinary <- replicate(1000,summary(lm(y.fels.perm~sample(x.fels.perm)))$r.squared) rsquare.empirical <- summary(lm(y.fels~x.fels))$r.squared #phylogeny and habit phy.schneider <- inphy & dat$schneider_habit!="" habits.inphy <- as.numeric(as.character(dat$schneider_habit[phy.schneider])) tree.schneider <- ladderize(keep.tip(ftime.lambda0,gsub(' ','_',dat$species[phy.schneider])),right=FALSE) orderschneiderliketree <- match(tree.schneider$tip.label,gsub(' ','_',dat$species[phy.schneider])) habits.inphy <- habits.inphy[orderschneiderliketree] #fixing order diurnal <- habits.inphy==1|habits.inphy==2;diurnal[2] <- FALSE exposed <- habits.inphy==1|habits.inphy==3;exposed[2] <- FALSE plotTree(tree.schneider,fsize=0.6,ftype="i",lwd=1) tiplabels(pch=21,bg=ifelse(exposed,"black","white")) diurnal.indat <- phy.schneider & (dat$schneider_habit==1 | dat$schneider_habit==2) nocturnal.indat <- phy.schneider & (dat$schneider_habit==3 | dat$schneider_habit==4) exposed.indat <- phy.schneider & (dat$schneider_habit==1 | dat$schneider_habit==3) concealed.indat <- phy.schneider & (dat$schneider_habit==2 | dat$schneider_habit==4) moran.i(as.integer(diurnal),tree.schneider) hist(replicate(1000,moran.i(sample(as.integer(exposed)),tree.schneider))) #ordinary and phylo permutation test #armno abs(mean(dat$arm_number[diurnal.indat])-mean(dat$arm_number[nocturnal.indat])) diffs <- c();for(i in 1:10000){#p = 0.1904 x <- dat$arm_number[diurnal.indat|nocturnal.indat] inds <- sample(seq(sum(diurnal.indat|nocturnal.indat))) diffs <- c(diffs,abs(mean(x[inds[1:sum(diurnal.indat)]])-mean(x[inds[(sum(diurnal.indat)+1):sum(diurnal.indat|nocturnal.indat)]])))} diffs.pp <- c();for(i in 1:1000){#p = 0.166 print(i) arms.pp <- phylo.permute(tree.schneider,dat$arm_number[phy.schneider][orderschneiderliketree],margin=0.05) habits.pp <- phylo.permute(tree.schneider,as.numeric(habits.inphy==1|habits.inphy==2),margin=0.05) diffs.pp <- c(diffs.pp,abs(mean(arms.pp[habits.pp==1],na.rm = TRUE)-mean(arms.pp[habits.pp==0],na.rm = TRUE)))} #regenerating arms abs(mean(dat$schneider_mean_number_regen[exposed.indat])-mean(dat$schneider_mean_number_regen[concealed.indat])) diffs <- c();for(i in 1:10000){ x <- dat$schneider_mean_number_regen[exposed.indat|concealed.indat] inds <- sample(seq(sum(exposed.indat|concealed.indat))) diffs <- c(diffs,abs(mean(x[inds[1:sum(exposed.indat)]])-mean(x[inds[(sum(exposed.indat)+1):sum(exposed.indat|concealed.indat)]])))} regen1 <- dat$schneider_mean_number_regen[phy.schneider][orderschneiderliketree] diffs.pp.regen1 <- c();for(i in 1:1000){ print(i) habits.pp <- phylo.permute(tree.schneider,exposed,margin=0.05) diffs.pp.regen1 <- c(diffs.pp.regen1,abs(mean(regen1[habits.pp==1],na.rm = TRUE)- mean(regen1[habits.pp==0],na.rm = TRUE)))} #proportion arms regenerating abs(mean(dat$schneider_mean_prop_regen[exposed.indat])-mean(dat$schneider_mean_prop_regen[concealed.indat])) diffs <- c();for(i in 1:10000){ x <- dat$schneider_mean_prop_regen[exposed.indat|concealed.indat] inds <- sample(seq(sum(exposed.indat|concealed.indat))) diffs <- c(diffs,abs(mean(x[inds[1:sum(exposed.indat)]])-mean(x[inds[(sum(exposed.indat)+1):sum(exposed.indat|concealed.indat)]])))} regen2 <- dat$schneider_mean_prop_regen[phy.schneider][orderschneiderliketree] diffs.pp.regen2 <- c();for(i in 1:1000){ habits.pp <- phylo.permute(tree.schneider,exposed,margin=0.05) diffs.pp.regen2 <- c(diffs.pp.regen2,abs(mean(regen2[habits.pp],na.rm = TRUE)- mean(regen2[!habits.pp],na.rm = TRUE)))};beep() #proportion with regenerating arms abs(mean(dat$schneider_prop_with_regen[exposed.indat])-mean(dat$schneider_prop_with_regen[concealed.indat])) diffs <- c();for(i in 1:10000){ x <- dat$schneider_prop_with_regen[exposed.indat|concealed.indat] inds <- sample(seq(sum(exposed.indat|concealed.indat))) diffs <- c(diffs,abs(mean(x[inds[1:sum(exposed.indat)]])-mean(x[inds[(sum(exposed.indat)+1):sum(exposed.indat|concealed.indat)]])))} regen3 <- dat$schneider_prop_with_regen[phy.schneider][orderschneiderliketree] diffs.pp.regen3 <- c();for(i in 1:1000){ habits.pp <- phylo.permute(tree.schneider,exposed,margin=0.05) diffs.pp.regen3 <- c(diffs.pp.regen3,abs(mean(regen3[habits.pp],na.rm = TRUE)- mean(regen3[!habits.pp],na.rm = TRUE)))};beep() # Figures ------------------------------------------------------- library(scales) #plotting functions skyline <- function(xvals,yvals,color="black"){ #xvals one longer than yvals for(i in 1:length(yvals)){ #horizontal lines lines(c(xvals[i],xvals[i+1]),c(yvals[i],yvals[i]),col=color,lwd=2)} for(i in 1:(length(yvals)-1)){ #vertical lines lines(c(xvals[i+1],xvals[i+1]),c(yvals[i],yvals[i+1]),col=color,lwd=2)}} plotbylat <- function(lowlats,highlats,trait,taus=seq(0.1,0.9,0.1),xlims=c(-90,90),ylabel=NA){ rangelimits <- sort(unique(c(lowlats,highlats))) mat <- matrix(NA,nrow=length(taus),ncol=length(rangelimits)-1) for(i in (1:length(rangelimits)-1)){ mat[,i] <- quantile(trait[highlats > rangelimits[i] & lowlats < rangelimits[i+1]],taus,na.rm=TRUE)} plot(1,xlim=c(xlims[1],xlims[2]),ylim=c(min(mat,na.rm = TRUE),max(mat,na.rm = TRUE)),type='n',xlab="Latitude",ylab=ylabel) for(i in 1:length(taus)){ skyline(rangelimits,mat[i,],color = viridis(length(taus))[i])}} plotbylatmeans <- function(lowlats,highlats,trait,rangelimits=NA,xlims=c(-90,90)){ if(is.na(rangelimits)){rangelimits <- sort(unique(c(lowlats,highlats)))} vals <- c() for(i in (1:(length(rangelimits)-1))){ vals <- c(vals,mean(trait[highlats > rangelimits[i] & lowlats < rangelimits[i+1]],na.rm=TRUE))} #plot plot(1,xlim=c(xlims[1],xlims[2]),ylim=c(min(vals,na.rm = TRUE),max(vals,na.rm = TRUE)),type='n',xlab="Absolute latitude",ylab="Mean arm number") skyline(rangelimits,vals)} returnlatmeans <- function(lowlats,highlats,trait,rangelimits=NA){ #same thing as plotbylatmeans, but return instead of plot if(is.na(rangelimits)){rangelimits <- sort(unique(c(lowlats,highlats)))} vals <- c() for(i in (1:(length(rangelimits)-1))){ vals <- c(vals,mean(trait[highlats > rangelimits[i] & lowlats < rangelimits[i+1]],na.rm=TRUE))} return(vals)} plotLDG <- function(lowlats,highlats,rangelimits=NA){ if(is.na(rangelimits)){rangelimits <- sort(unique(c(lowlats,highlats)))} divs <- c() for(i in 1:(length(rangelimits)-1)){ divs <- c(divs,sum(highlats > rangelimits[i] & lowlats < rangelimits[i+1]))} plot(1,xlim=c(min(rangelimits),max(rangelimits)),ylim=c(0,max(divs)), type='n',xlab="Latitude",ylab='Species richness') skyline(rangelimits,divs)} #mode can be N, richness, or N per richness plotbylatoccurrences <- function(lats,longs,sp,bins=seq(-90,90,5),xlims=c(-90,90),ylabel="N",mode="N"){ N <- c() #vector of total samples in each bin richness <- c() #vector of number of species in each bin for(i in (1:(length(bins)-1))){ N <- c(N,sum(lats>bins[i] & latsbins[i] & lats