################################################################################### #********************************************************************************** ## APPENDIX S7: R code to perform data analyses described in the methods section ## #********************************************************************************** ################################################################################### setwd("C:/Users/Jason/Dropbox/DIADEMA Analyses/Data/Arbres/2020(composition+traits)/TRAITS/IMPUTATION/Imputations2/ms5(data_paper)/Jesus_Chris") rm(list=ls()) #knitr::stitch('AppendixS7_v3.R') ####################### ## Download packages ## ####################### library(vegan) ## many functions to calculate dissimilarities, perform PCA... library(adespatial) ## to perform MSR tests library(spdep) ## to work with adespatial library(BiodiversityR)## broken stick models for PCA library(ggplot2) ## plot packages library(FactoMineR) ## PCA plot library("factoextra") ## PCA plot library(ade4) ## plot functions library(forecast) ## for data normalisation library(ecodist) ## for MRM models library(relaimpo) ## for LMG calculations library(ade4) ## for plots and PCA library(hypervolume) ## to compute multi-dimensional trait space library(labdsv) ## Ordination and Multivariate Analysis library(RColorBrewer) ## color design library(BHPMF) ## for data imputations library(phytools) ## phylogenetic analyses library(ape) library(brranching) library(phylotate) library(stringr) library(motmot) library(rethinking) ## Bayesian confidence interval #citation("vegan") ## package citation ############################## ## Open data file ## ############################## ## Environmental data (E1 = untransformed; E2 = normalised-standardised): #------------------------------------------------------------------------ E <- as.data.frame(read.table("AppendixS4.txt",h =T,row.names = 1)) colnames(E) E1 <- E[,-c(1:4)] # Normalising soil variables: E2 <- E1 lambda <- as.data.frame(matrix(ncol=ncol(E2),nrow=1)) ; colnames(lambda) <- colnames(E2) for(i in 1:ncol(E2)){ lambda[1,i] <- BoxCox.lambda(E2[,i]+abs(min(E2[,i],na.rm=TRUE))+0.00000000000000000000001, method = "loglik", lower = -2, upper = 2) E2[,i] <- BoxCox(E2[,i], lambda=lambda[1,i]) } #par(mfrow=c(5,5),mex=0.3) #for(j in 1:ncol(E2)){hist(E2[,j],col="red",main = colnames(E2)[j])} #par(mfrow=c(1,1)) ## Standardising variables: E2 <- decostand(E2,"standardize",na.rm=TRUE) ## Sites: #-------- sites <- E[,1] ## Spatial coordinates: #---------------------- C <- E[,2:3] ## Habitats: #----------- Hab <- as.matrix(E[,4]) ## Species abundance data: #------------------------- TS <- as.data.frame(read.table("AppendixS5.txt",h =T,row.names = 1)) TS2 <- decostand(TS,"hel") ## Hellinger-transformed species abundances dim(TS2) rownames(TS2) #**************************************** ### FUNCTIONAL DATA ### #**************************************** #--------------------------------------------------------------------------------- ## Imputed values for the 2318 species of the NBA database and the FG inventories: #--------------------------------------------------------------------------------- ## [ NBA = 1630 species ] ## [ Species in common = 779 ] ## [ FG floristic inventories = 1467 species] ## [ 851 sp. ] [ 688 sp. ] Z1 <- as.data.frame(read.table("AppendixS6.xls",h=T)) head(Z1);dim(Z1) j=1 gens <- as.character(Z1[,2]) gens.names <- names(table(Z1[,2])) for(j in 1:length(gens.names)){ w <- as.numeric(which(gens%in%gens.names[j])) gens[w] <- as.character(paste0(gens[w],"_",paste0(rep("sp",length(w)),c(1:length(w))))) } ## Original matrix of the 8345 individuals with missing trait values: ## (All species [ n = 1630] with observed trait values are in this matrix) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ------ traits <- as.data.frame(read.table("AppendixS3.txt",h=T)) head(traits);dim(traits) namesp.traits <- names(table(traits$Species)) ; namesp.traits ## Imputed (NBA1) and observed (NBA2) trait values for the NBA database (1630 species): # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - w1 <- which(Z1[,1]%in%namesp.traits) ; w1 NBA1 <- Z1[w1,-c(1:6)] rownames(NBA1) <- Z1[w1,1] head(NBA1);dim(NBA1) sp_list.NBA1 <- Z1[w1,1] ; ge_list.NBA1 <- Z1[w1,2] ; fa_list.NBA1 <- Z1[w1,3] sp_list.NBA2 <- Z1[w1,1] ; ge_list.NBA2 <- Z1[w1,2] ; fa_list.NBA2 <- Z1[w1,3] head(traits);dim(traits) sel1 <- c(20:22,24,26,28:30,34,40:41,45:46,58,72:76) colnames(traits)[sel1] NBA2 <- NBA1 ; NBA2[,] <- 0 for(i in 1:nrow(NBA2)){ w <- which (traits$Species%in%rownames(NBA2)[i]) NBA2[i,] <- colMeans(traits[w,sel1],na.rm = TRUE) } head(NBA2);dim(NBA2) head(NBA1);dim(NBA1) ## Imputed (FG1) and observed (FG2) trait values for the FG floristic inventories (1467 species): # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - w <- which(Z1[,1]%in%colnames(TS2)) ; w length(w) FG1 <- Z1[w,-c(1:6)] rownames(FG1) <- Z1[w,1] head(FG1);dim(FG1) FG1.for.table2 <- FG1 sp_list.FG1 <- Z1[w,1] ; ge_list.FG1 <- Z1[w,2] ; fa_list.FG1 <- Z1[w,3] sp_list.FG2 <- Z1[w,1] ; ge_list.FG2 <- Z1[w,2] ; fa_list.FG2 <- Z1[w,3] FG2 <- FG1 ; FG2[,] <- 0 for(i in 1:nrow(FG2)){ w <- which (traits$Species%in%rownames(FG2)[i]) FG2[i,] <- colMeans(traits[w,sel1],na.rm = TRUE) } head(FG2);dim(FG2) ## Selection of traits: #---------------------- head(NBA1) ; dim(NBA1) head(NBA2) ; dim(NBA2) head(FG1) ; dim(FG1) head(FG2) ; dim(FG2) sel <- c(1:19) ## Normalise & Standardise NBA1, NBA2, FG1 and FG2 #------------------------------------------------- for(z in 1:4){ if(z==1){ Z2 <- NBA1[,sel] ; sp_list <- sp_list.NBA1 } if(z==2){ Z2 <- NBA2[,sel] ; sp_list <- sp_list.NBA2 } if(z==3){ Z2 <- FG1 [,sel] ; sp_list <- sp_list.FG1 } if(z==4){ Z2 <- FG2 [,sel] ; sp_list <- sp_list.FG2 } #if(z==5){ Z2 <- NBA4 } #if(z==6){ Z2 <- FG4 } ## If removing the 5% most extreme values for each trait: #for(i in 1:ncol(Z2)){ # Q <- quantile( Z2[,i] , c( 0.025 , 0.975 ) ,na.rm=TRUE) # w1 <- which( Z2[,i] < Q[1] ) # w2 <- which( Z2[,i] > Q[2] ) # w <- c(w1,w2) ; w # Z2[w,i] <- mean(Z2[,i],na.rm=TRUE) #} lambda <- as.data.frame(matrix(ncol=ncol(Z2),nrow=1)) ; colnames(lambda) <- colnames(Z2) for(i in 1:ncol(Z2)){ lambda[1,i] <- BoxCox.lambda(Z2[,i]+abs(min(Z2[,i],na.rm=TRUE))+0.00000000001, method = "loglik", lower = -1, upper = 1) Z2[,i] <- BoxCox(Z2[,i], lambda=lambda[1,i]) } lambda ## lambda values stored for back normalisation par(mfrow=c(7,9),mex=0.3) for(j in 1:ncol(Z2)){hist(Z2[,j],col="blue",main = colnames(Z2)[j])} par(mfrow=c(1,1)) for(h in 1:ncol(Z2)){ H <- Z2[,h] ; H[H%in%NaN] <- NA ; Z2[,h] <- H } Z2 <- decostand(Z2,"standardize",na.rm=TRUE) if(z==1){ NBA1 <- Z2 } if(z==2){ NBA2 <- Z2 } if(z==3){ FG1 <- Z2 } if(z==4){ FG2 <- Z2 } } NBA1 <- NBA1[,sel];NBA2 <- NBA2[,sel];FG1 <- FG1[,sel];FG2 <- FG2[,sel] head(NBA1) ; dim(NBA1) head(NBA2) ; dim(NBA2) head(FG1) ; dim(FG1) head(FG2) ; dim(FG2) #------------------------------------------------------------ ## Estimations of imputation precision via distance matrices: #------------------------------------------------------------ ## For the 1625 species of the NBA matrix: # - - - - - - - - - - - - - - - - - - - - DZ2 <- as.vector(dist(t(NBA1))) ; DM2 <- as.vector(dist(t(NBA2))) length(DZ2) ; length(DM2) for(k in 1:2){ if(k==1){Y <- DZ2} ; if(k==2){Y <- DM2} lambda <- BoxCox.lambda(Y+abs(min(Y,na.rm=TRUE))+0.00000000000000000000001, method = "loglik", lower = -2, upper = 2) Y <- BoxCox(Y, lambda=lambda) H <- Y ; H[H%in%NaN] <- NA ; Y <- H Y <- decostand(Y,"standardize",na.rm=TRUE) if(k==1){DZ3 <- as.numeric(Y)} ; if(k==2){DM3 <- as.numeric(Y)} } V <- as.data.frame(cbind(DZ3,DM3)) plot (DZ3~DM3,data=V,col=col.alpha("yellow3",0.6),cex=1,pch=16,ylab="Imputed",xlab="Observed") cor.test(V$DZ3,V$DM3) bm1 <- quap( alist( DZ3 ~ dnorm(mu, sigma), mu ~ a + b1*DM3 , a ~ dnorm(0,1),b1~dlnorm(0,1),sigma ~ dunif (0,50) ), data = V ) weight.seq <- seq( from=-5 , to=5 , length.out=30 ) pred_dat <- list( DM3=weight.seq ) ; mu <- link( bm1 , data=pred_dat ) ; mu.mean <- apply( mu , 2 , mean ) mu.PI <- apply( mu , 2 , PI , prob=0.89 ) ; sim.height <- sim( bm1 , data=pred_dat ) height.PI <- apply( sim.height , 2 , PI , prob=0.90 ) lines( weight.seq , mu.mean ) shade( mu.PI , weight.seq, col=col.alpha("yellow3",0.3)) shade( height.PI , weight.seq, col=col.alpha("yellow3",0.2)) ## For the 1467 species of the FG floristic inventories: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - DZ2 <- as.vector(dist(t(FG1))) ; DM2 <- as.vector(dist(t(FG2))) for(k in 1:2){ if(k==1){Y <- DZ2} ; if(k==2){Y <- DM2} lambda <- BoxCox.lambda(Y+abs(min(Y,na.rm=TRUE))+0.00000000000000000000001, method = "loglik", lower = -2, upper = 2) Y <- BoxCox(Y, lambda=lambda) H <- Y ; H[H%in%NaN] <- NA ; Y <- H Y <- decostand(Y,"standardize",na.rm=TRUE) if(k==1){DZ3 <- as.numeric(Y)} ; if(k==2){DM3 <- as.numeric(Y)} } V <- as.data.frame(cbind(DZ3,DM3)) points(DZ3~DM3,data=V,col=col.alpha("blue",0.5),pch=16,cex=0.8,ylab="Imputed",xlab="Observed") cor.test(V$DZ3,V$DM3) cor(V$DZ3,V$DM3)^2 bm1 <- quap( alist( DZ3 ~ dnorm(mu, sigma), mu ~ a + b1*DM3 , a ~ dnorm(0,1),b1~dlnorm(0,1), sigma ~ dunif (0,10) ), data = V ) weight.seq <- seq( from=-5 , to=5 , length.out=30 ) pred_dat <- list( DM3=weight.seq ) ; mu <- link( bm1 , data=pred_dat ) ; mu.mean <- apply( mu , 2 , mean ) mu.PI <- apply( mu , 2 , PI , prob=0.89 ) ; sim.height <- sim( bm1 , data=pred_dat ) height.PI <- apply( sim.height , 2 , PI , prob=0.90 ) lines( weight.seq , mu.mean ) shade( mu.PI , weight.seq, col=col.alpha("blue3",0.2)) shade( height.PI , weight.seq, col=col.alpha("blue3",0.1)) ############################# ## Overview of data matrices: ############################# head(E2);dim(E2) ## environmental variables head(Hab);dim(Hab) ## habitats sites ; length(sites) ## sites head(TS [,1:10]) ; dim(TS) ## species abundance (untransformed) head(TS2[,1:10]) ; dim(TS2) ## species abundance (Hellinger-transformed) taxo <- as.data.frame(read.table("AppendixS3.txt",h =T,row.names = 1))[,4:9] ## taxa names columns in Appendix S3 head(taxo);dim(taxo) ########################################### #### Phylogenetic Independent Contrast #### ########################################### #remotes::install_github("jinyizju/V.PhyloMaker") library(V.PhyloMaker) for(u in 1:4){ if(u==1){ TR <- NBA1 ; sp_list <- sp_list.NBA1 ; ge_list <- ge_list.NBA1 ; fa_list <- fa_list.NBA1 } if(u==2){ TR <- NBA2 ; sp_list <- sp_list.NBA2 ; ge_list <- ge_list.NBA1 ; fa_list <- fa_list.NBA1 } if(u==3){ TR <- FG1 ; sp_list <- sp_list.FG1 ; ge_list <- ge_list.FG1 ; fa_list <- fa_list.FG1 } if(u==4){ TR <- FG2 ; sp_list <- sp_list.FG2 ; ge_list <- ge_list.FG2 ; fa_list <- fa_list.FG2 } spec <- as.character(sp_list) ; spec <- str_replace(spec, "_", " ") gens <- as.character(ge_list) fams <- as.character(fa_list) example <- data.frame(species = spec, genus = gens, family = fams) tree <- phylo.maker(example, scenarios=c("S3")) tree2 <- as.phylo(tree$scenario.3) tree2 <- force.ultrametric(tree2, method=c("nnls")) tree.NSF = tree2 w = which(rownames(TR)%in%tree.NSF$tip.label) ; w w2 = which(tree.NSF$tip.label%in%rownames(TR)) ; w2 w3 = which(table(c(c(1:length(tree.NSF$tip.label)),w2))==1) ; w3 if(length(w3)>0){tree.NSF <- drop.tip(tree.NSF,tree.NSF$tip.label[w3])} TR <- TR[w,] TR2 <- TR ord <- c() for(i in 1:nrow(TR2)){ ord <- c(ord,which(rownames(TR2)%in%tree.NSF$tip.label[i])) } tree.NSF = multi2di(tree.NSF) tree.NSF = compute.brlen(tree.NSF) dim(TR2) if(u==1){ NBA1.Ust <- TR2 ; tree.NBA1 <- tree.NSF } if(u==2){ NBA2.Ust <- TR2 ; tree.NBA2 <- tree.NSF } if(u==3){ FG1.Ust <- TR2 ; tree.FG1 <- tree.NSF } if(u==4){ FG2.Ust <- TR2 ; tree.FG2 <- tree.NSF } ### CALCULATE phylogenetic independent contrasts data: #----------------------------------------------------- tip=tree.NSF$tip.label # pic = pic(TR2[,i],tree.NSF) ; length(pic) PIC = TR2[-1,] ; PIC[,]=0 ; dim(PIC) TR3 <- TR2 rownames(PIC) <- paste0(rep("Nod"),c(1:nrow(PIC))) if(u%in%c(2,4)){ TR3 <- TR2 for(h in 1:ncol(TR3)){ w <- which(is.na(TR3[,h])%in%TRUE) TR3[w,h] <- mean(TR3[,h],na.rm=TRUE) } } ## Calculate PIC values: #----------------------- for(i in 1:ncol(TR3)) { PIC[,i]=pic(TR3[,i],tree.NSF) } if(u%in%c(1,3)){ ## if using imputed trait data trait2=apply(PIC, 2, as.numeric) rownames(trait2) <- rownames(PIC) head(trait2);dim(trait2) colnames(trait2) <- c("Chlo","Thick","Tough","Area","SLA","C","N","13C","Ca","P","K", "TBT","SapWSG","WSG", "Diam","SRL","Dens.","SRTA","Branch") par(mfrow=c(6,4),mex=0.3) for(i in 1:ncol(trait2)){ hist(trait2[,i]) } par(mfrow=c(1,1),mex=0.4) } if(u%in%c(2,4)){ trait3=apply(PIC, 2, as.numeric) rownames(trait3) <- rownames(PIC) colnames(trait3) <- colnames(trait2) } if(u==1){PIC.NBA1 <- trait2} if(u==2){PIC.NBA2 <- trait3} if(u==3){PIC.FG1 <- trait2} if(u==4){PIC.FG2 <- trait3} } ############################################ #******************************************* #### Phylogenetic signal for each trait #### #******************************************* ############################################ library(phytools) Traits.1 <- NBA2 ; head(Traits.1) ; dim(Traits.1) Phylog.1 <- tree.NBA2 ; Phylog.1 phylosignal <- as.data.frame(matrix(nrow=ncol(Traits.1),ncol=4)) colnames(phylosignal) <- c("lambda","CI.low","CI.upp","Pval.Chi2") rownames(phylosignal) <- colnames(Traits.1) phylosignal ## Test of lambda using likelihood ratio test between ## the BM and a stochastic model: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for(i in 1:ncol(Traits.1)){ n=i;colnames(Traits.1)[n] sortedData <- sortTraitData(phy = Phylog.1, y = Traits.1, log.trait = FALSE, data.name = colnames(Traits.1)[n], pass.ultrametric = TRUE) phy <- sortedData$phy male.length <- sortedData$trait traitData.plot(y = male.length^1, phy = phy, lwd.traits = 0.5) bm.ml <- transformPhylo.ML(phy = phy, y = male.length, model = "bm") lambda.ml <- transformPhylo.ML(phy = phy, y = male.length, model = "lambda") p.value <- 1 - pchisq(lambda.ml$MaximumLikelihood - bm.ml$logLikelihood, 1) phylosignal[i,1] <- round(lambda.ml$Lambda[1],3) phylosignal[i,2] <- round(lambda.ml$Lambda[2],3) phylosignal[i,3] <- round(lambda.ml$Lambda[3],3) phylosignal[i,4] <- round(p.value,3) } phylosignal ################################################# #************************************************ ##### PCA on environmental variables ##### #************************************************ ################################################# head(E2);dim(E2) ## overview of all environmental variables colnames(E2)[4] <- "SDMR" colnames(E2)[11] <- "P" library(corrplot) corrplot(cor(E2)) ## overview of correlations among environmental variables ## observed eigenvalues + broken stick model of eigenvalues: #----------------------------------------------------------- barplot(rda(E2)$CA$eig/sum(rda(E2)$CA$eig)*100,col="black") Sign <- PCAsignificance(rda(E2)) ; Sign cumsum(Sign[4,]) barplot(rda(E2)$CA$eig,col=c("black","black",rep("grey",11))) ## PCA with FactoMineR: #---------------------- res.pca <- dudi.pca(E2, scannf = FALSE, # Cacher le scree plot nf = 5 # Nombre d'axes gard?s ) #fviz_eig(res.pca) a=1;b=2 #a=3;b=4 fviz_pca_var(res.pca, axes=c(a,b), #col.var = "contrib", col.var = "black", #gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE ) #fviz_pca_ind(res.pca) #gray = gray.colors(20, start = 0.9, end = 0.3, gamma = 2.2, alpha = NULL) #fviz_pca_var(res.pca, col.var="contrib",gradient.cols = c(gray[8], gray[14], gray[20])) ## Emphasise habitats: # - - - - - - - - - - p <- fviz_pca_ind(res.pca,label="none", habillage=as.factor(H), addEllipses=TRUE, ellipse.level=0.95) p ## Emphasize sites: # - - - - - - - - - p <- fviz_pca_ind(res.pca,label="none", habillage=sites, addEllipses=TRUE, ellipse.level=0.95) p p + scale_color_manual(values=c("#80883d", "#c35ebd", "#73b340", "#7669c9", "#d0a13b", "#6799d1", "#d25141", "#4eab7d", "#c45b84", "#b97547")) p ## choose panel of colours on https://medialab.github.io/iwanthue/ ## with ggplot: #------------- PCA <- as.data.frame(cbind(rda(E2)$CA$u[,1],-rda(E2)$CA$u[,2])) colnames(PCA) <- c("PC1","PC2") head(PCA);dim(PCA) sites H2 <- as.factor(H) mycolors2 <- c("#c26a7d","#6ab74d","#a864c3","#c3ab41","#6587cd", "#cf5038","#4bb193","#d0427e","#6d8038","#c07c42") p <- ggplot(PCA, aes(x=PC1, y=PC2)) + geom_point(aes(shape=H2, color=sites), size=3)+ scale_color_manual(values=mycolors2)+ scale_shape_manual(values=c(0,1,2,3)) p p+stat_ellipse(aes(x=PC1, y=PC2,colour=as.factor(sites)),type="norm")+ scale_color_manual(values = mycolors2) ## With varimax: #--------------- a=1;b=2 a=3;b=4 par(mfrow=c(1,1),mex=0.4) pca.env = rda(E2, scale=T) pca.env$CA$eig/sum(pca.env$CA$eig)*100 loading = scores(pca.env, choices=c(a:b))$species #choices determines which pc to be taken rloading = varimax(loading)$loadings iloading = t(pinv(rloading));iloading scores = scale(E2) %*% iloading r = range(c(rloading, scores)) plot(scores, xlim = c(-1.6,1.6), ylim= c(-1.6,1.6), xlab= "PC1 ", ylab= "PC2 ",col="white") arrows(0,0, rloading[,1]*1, rloading[,2]*1, length=0.1, col="black") text(rloading[,1], rloading[,2], labels = colnames(E2), cex=1.3, pos=3, col="black") head(E2) 0.806^2 #text(scores[,1], scores[,2], labels = rownames(TT9), pos = 3) abline(h=0,v=0,lty=2,col="grey60") loadings <- as.matrix(abs(iloading)*100) ; rownames(loadings) <- colnames(E2) loadings #write.table(loadings,"loading.xls") ## Appendix S8. Pearson correlations between environmental variables and plot scores: #------------------------------------------------------------------------------------ AppS8 <- round(cor(rda(E2)$CA$u,E2),3) write.table(AppS8,"Appendix.S8.xls") ## Appendix S9. Pearson correlations among environmental variables: #------------------------------------------------------------------ AppS9 <- round(cor(E2),3) write.table(AppS9,"Appendix.S9.xls") ######################################################################### #************************************************************************ ## Table 1 (mean and range of climate variables + diversity in each site) #************************************************************************ ######################################################################### ## Function ENSK to calculate the Effective Number of Species Expected from a random sampling ## (with replacement) of individuals: ENSK <- function(data=dataset, K=k){ # library(vegan) DIM<- dim(data) ENS <- matrix(NA,DIM[1],1) for (i in 1:DIM[1]) { if (K<=rowSums(data)[i]) { Sk=rarefy(data[i,],K) Nt=Sk for (j in 1:10000) { Nt1=Sk/(1-(1-1/Nt)^K) if (Nt1-Nt<0.0001) {break} else{Nt=Nt1} } ENS[i,1]=Nt } } return(ENS) } head(E1) nrsites <- length(table(sites)) ; nrsites sites.names <- names(table(sites)) nrhabs <- length(table(H)) ; nrhabs habs.names <- names(table(H)) Table1 <- as.data.frame(matrix(nrow=nrsites+nrhabs,ncol=6)) colnames(Table1) <- c("Nrplots","alt.range","MAR","MAT","mean.Nr.sp","ENS2") rownames(Table1) <- c(names(table(sites)),names(table(H))) head(Table1);dim(Table1) for(i in 1:nrsites){ H1 <- sites.names[i] ; H1 w <- which(sites%in%H1) E9 <- E[w,] Table1[i,1] <- nrow(E9) ## Nr of plots Table1[i,2] <- paste0(range(E9[,5])[1],"_",range(E9[,5])[2]) # altitudinal range Table1[i,3] <- mean (E9[,7]) # mean annual rainfall Table1[i,4] <- mean (E9[,6]) # mean annual temperature ## Mean number of species/plot: Table1[i,5] <- paste0(round(mean(specnumber(TS[w,])),3),"_(",round((sd(specnumber(TS[w,]))/sqrt(nrow(TS[w,]))),3),")") ## Mean ENS/plot for a random sampling of 2 individuals: mean <- mean(ENSK(as.matrix(TS[w,]),2)) CV <- sd(ENSK(as.matrix(TS[w,]),2))/sqrt(nrow(TS[w,])) Table1[i,6] <- paste0(round(mean,3),"_(",round(CV,3),")") } for(i in 11:14){ H1 <- habs.names[i-10] ; H1 w <- which(H%in%H1) E9 <- E[w,] head(E9) Table1[i,1] <- nrow(E9) ## Nr of plots Table1[i,2] <- paste0(round(range(E9[,5])[1],1),"_",round(range(E9[,5])[2],1)) # altitudinal range Table1[i,3] <- mean (E9[,7]) # mean annual rainfall Table1[i,4] <- mean (E9[,6]) # mean annual temperature ## Mean number of species/plot: Table1[i,5] <- paste0(round(mean(specnumber(TS[w,])),3),"_(",round((sd(specnumber(TS[w,]))/sqrt(nrow(TS[w,]))),3),")") ## Mean ENS/plot for a random sampling of 2 individuals: mean <- mean(ENSK(as.matrix(TS[w,]),2)) CV <- sd(ENSK(as.matrix(TS[w,]),2))/sqrt(nrow(TS[w,])) Table1[i,6] <- paste0(round(mean,3),"_(",round(CV,3),")") } Table1 #write.table(Table1,"Table1.xls") ##################################################################### #******************************************************************** #### Table 3. Environmental differences across habitats #### #******************************************************************** ##################################################################### Table3 <- as.data.frame(matrix(0,ncol=ncol(E2),nrow=11)) colnames(Table3) <- colnames(E2) rownames(Table3) <- c("Cloud","SF","TF","WS","ANOVA", "SF.Cloud","TF.Cloud","WS.Cloud","TF.SF","WS.SF","WS.TF") Table3 habtypes <- names(table(H)) ; habtypes head(E1);dim(E1) i=1 e=1 for(i in 1:length(habtypes)){ w <- which(H%in%habtypes[i]) sub <- E1[w,] sd <- c() for(e in 1:ncol(E1)){sd <- c(sd,sd(sub[,e]))} Table3[i,] <- paste0(round(colMeans(sub),2)," (",round(sd,2),")") } for(j in 1:ncol(E1)){ Table3[5,j] <- round(anova(lm(E2[,j]~H))$Pr[1],4) aov <- aov(E2[,j]~H) THSD <- TukeyHSD(aov) Table3[6:11,j] <- round(THSD$H[,4],4) } Table3 #write.table(Table3,"Table3.xls") ################################################# ## Correlations and PCAs on traits ## ################################################# ## Good link ## http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/ #---------------------------- ## Correlations among traits: #---------------------------- TT8.a <- NBA1 ## Imputed traits for the NBA database TT8.a <- NBA2 ## Observed traits for the NBA database TT8.a <- FG1 ## Imputed traits for the FG floristic plots TT8.a <- FG2 ## Observed traits for the FG floristic plots TT8.b <- PIC.NBA1 ## PIC data on imputed traits for the NBA database TT8.b <- PIC.NBA2 ## PIC data on observed traits for the NBA database TT8.b <- PIC.FG1 ## PIC data on imputed traits for the FG floristic plots TT8.b <- PIC.FG2 ## PIC data on observed traits for the FG floristic plots Cor.a <- round(cor(TT8.a,use="complete.obs"),2) ## imputed traits Cor.b <- round(cor(TT8.b,use="complete.obs"),2) ## matrix with missing traits Cor1 <- Cor.a Cor1[lower.tri(Cor1)%in%FALSE] <- 0 Cor1[lower.tri(Cor1)%in%FALSE] <- Cor.b[lower.tri(Cor.b)%in%FALSE] #----------------------------------- ## CO-INERTIA & PROCRUSTES ANALYSES: #----------------------------------- ## Tests of Procrustes correlation between PCAs obtained using matrices of correlations ## among traits, these matrices being calculated using only the observed trait values, ## following a recommendation in Dray et al. (2015) - Plant Ecol (2015) 216:657-667. #install.packages("MatrixCorrelation") library("MatrixCorrelation") ## Store TT8 for Procrustes Analyses: TT8.a <- NBA1 ## Imputed traits for the NBA database TT8.a <- NBA2 ## Observed traits for the NBA database TT8.a <- FG1 ## Imputed traits for the FG floristic plots TT8.a <- FG2 ## Observed traits for the FG floristic plots TT8.a <- PIC.NBA1 ## PIC data on imputed traits for the NBA database TT8.a <- PIC.NBA2 ## PIC data on observed traits for the NBA database TT8.a <- PIC.FG1 ## PIC data on imputed traits for the FG floristic plots TT8.a <- PIC.FG2 ## PIC data on observed traits for the FG floristic plots TT8.b <- NBA1 ## Imputed traits for the NBA database TT8.b <- NBA2 ## Observed traits for the NBA database TT8.b <- FG1 ## Imputed traits for the FG floristic plots TT8.b <- FG2 ## Observed traits for the FG floristic plots TT8.b <- PIC.NBA1 ## PIC data on imputed traits for the NBA database TT8.b <- PIC.NBA2 ## PIC data on observed traits for the NBA database TT8.b <- PIC.FG1 ## PIC data on imputed traits for the FG floristic plots TT8.b <- PIC.FG2 ## PIC data on observed traits for the FG floristic plots for(k in 1:ncol(TT8.a)){Q<-quantile(TT8.a[,k],c(0.025,0.975),na.rm=TRUE) ; w1<-which(TT8.a[,k]Q[2]) ; w <-c(w1,w2) ; TT8.a[w,k]<-mean(TT8.a[,k],na.rm=TRUE)} for(k in 1:ncol(TT8.b)){Q<-quantile(TT8.b[,k],c(0.025,0.975),na.rm=TRUE) ; w1<-which(TT8.b[,k]Q[2]) ; w <-c(w1,w2) ; TT8.b[w,k]<-mean(TT8.b[,k],na.rm=TRUE)} a=1 ; b=2 a=3 ; b=4 prin.a <- PCA(TT8.a, graph = FALSE, ncp=7)$var$coord[,a:b] ## PCA on imputed traits prin.b <- PCA(TT8.b, graph = FALSE, ncp=7)$var$coord[,a:b] ## PCA on matrix with missing trait values Y9 <- prin.a ; X9 <- prin.b dim(Y9);dim(X9) procr <- protest(X=X9,Y=Y9) ; procr PT <- as.data.frame(matrix(ncol=4,nrow=4)) rownames(PT) <- c("FG1","FG2","NBA1","NBA2") colnames(PT) <- c("PIC.FG1","PIC.FG2","PIC.NBA1","PIC.NBA2") a=1 ; b=2 a=3 ; b=4 i=2;j=3 for(i in 1:4){ for(j in 1:4){ #if(i==1){A1 <- PCA(FG1, graph=FALSE,ncp=4)$var$coord[,a:b] } ; if(i==2){A1 <- PCA(FG2, graph=FALSE,ncp=4)$var$coord[,a:b] } #if(i==3){A1 <- PCA(NBA1,graph=FALSE,ncp=4)$var$coord[,a:b] } ; if(i==4){A1 <- PCA(NBA2,graph=FALSE,ncp=4)$var$coord[,a:b] } #if(j==1){A2 <- PCA(PIC.FG1, graph=FALSE,ncp=4)$var$coord[,a:b] } ; if(j==2){A2 <- PCA(PIC.FG2, graph=FALSE,ncp=4)$var$coord[,a:b] } #if(j==3){A2 <- PCA(PIC.NBA1,graph=FALSE,ncp=4)$var$coord[,a:b] } ; if(j==4){A2 <- PCA(PIC.NBA2,graph=FALSE,ncp=4)$var$coord[,a:b] } if(i==1){A1 <- FG1 } ; if(i==2){A1 <- FG2 } if(i==3){A1 <- NBA1 } ; if(i==4){A1 <- NBA2 } if(j==1){A2 <- PIC.FG1 } ; if(j==2){A2 <- PIC.FG2 } if(j==3){A2 <- PIC.NBA1} ; if(j==4){A2 <- PIC.NBA2} for(k in 1:ncol(A1)){Q <- quantile(A1[,k],c(0.025,0.975),na.rm=TRUE) ; w1<-which(A1[,k]Q[2]) ; w <-c(w1,w2) ; A1[w,k]<-mean(A1[,k],na.rm=TRUE)} for(k in 1:ncol(A2)){Q <- quantile(A2[,k],c(0.025,0.975),na.rm=TRUE) ; w1<-which(A2[,k]Q[2]) ; w <-c(w1,w2) ; A2[w,k]<-mean(A2[,k],na.rm=TRUE)} L1 <- 4 ; L2 <- 4 pcoa1 <- cmdscale(as.dist(cor(A1,use="complete.obs")),k=15,eig=TRUE) pcoa2 <- cmdscale(as.dist(cor(A2,use="complete.obs")),k=15,eig=TRUE) A11 <- pcoa1$points[,1:L1] A22 <- pcoa2$points[,1:L2] PT[i,j] <- round(protest(X=A11,Y=A22)$scale,3) } } PT #------------------------------------------ ## PCA after Varimax rotation (with vegan): #------------------------------------------ # notes: varimax rotation is supposed to be done on loadings not on eigenvectors # link => https://stackoverflow.com/questions/34944179/doing-pca-with-varimax-rotation-in-r library(vegan) library(pracma) TT8 <- NBA1 ## Imputed traits for the NBA database TT8 <- FG1 ## Imputed traits for the FG floristic plots TT8 <- PIC.NBA1 ## PIC data on imputed traits for the NBA database TT8 <- PIC.FG1 ## PIC data on imputed traits for the FG floristic plots ## TT8 <- traits5 colnames(TT8) <- c("Chlo","Thick","Tough","Area","SLA","C","N","13C","Ca","P","K", "TBT","SapWSG","WSG","Diam","SRL","Dens.","SRTA","Branch") a=1;b=2 ## to plot axes 1-2 a=3;b=4 ## to plot axes 3-4 cols1 <- c(rep("green4",11),rep("orange3",2),rep("blue",1),rep("red2",5)) TT9 <- TT8 head(TT9);dim(TT9) round(rda(TT9)$CA$eig/sum(rda(TT9)$CA$eig)*100,3) ## eigenvalues Sign <- PCAsignificance(rda(TT9)) ; Sign ## broken stick eigenvalues par(mfrow=c(1,1),mex=0.4) pca.env = rda(TT9, scale=T) loading = vegan::scores(pca.env, choices=c(a,b))$species #choices determines which pc to be taken rloading = varimax(loading)$loadings ; iloading = t(pinv(rloading));iloading scores = scale(TT9) %*% iloading r = range(c(rloading, scores)) plot(scores, xlim = c(-2.6,3.1), ylim= c(-2.3,2.4), xlab= "PC1 ", ylab= "PC2 ",col="white") arrows(0,0, rloading[,1]*1, rloading[,2]*1, length=0.1, col=cols1) text(rloading[,1], rloading[,2], labels = colnames(TT9), cex=1.3, pos=3, col=cols1) abline(h=0,v=0,lty=2,col="grey60") loadings <- as.matrix(abs(iloading)*100) ; rownames(loadings) <- colnames(TT9) loadings ############################################################# #************************************************************ #### Table 2. List of traits + their mean and range #### #************************************************************ ############################################################# AT <- FG1.for.table2 head(AT);dim(AT) Table2 <- as.data.frame(matrix(ncol=ncol(AT),nrow=11)) colnames(Table2) <- colnames(AT) rownames(Table2) <- c("Cloud","SF","TF","WS","ANOVA", "SF.Cloud","TF.Cloud","WS.Cloud","TF.SF","WS.SF","WS.TF") Table2 habtypes <- names(table(Hab)) ; habtypes head(E1);dim(E1) w.CL <- which(Hab%in%"CLOUD") ; w.CL w.SF <- which(Hab%in%"SF") ; w.SF w.TF <- which(Hab%in%"TF") ; w.TF w.WS <- which(Hab%in%"WS") ; w.WS TS.CL <- TS[w.CL,] ; TS.CL <- TS.CL[,colSums(TS.CL)>=10] ; dim(TS.CL) TS.SF <- TS[w.SF,] ; TS.SF <- TS.SF[,colSums(TS.SF)>=10] ; dim(TS.SF) TS.TF <- TS[w.TF,] ; TS.TF <- TS.TF[,colSums(TS.TF)>=10] ; dim(TS.TF) TS.WS <- TS[w.WS,] ; TS.WS <- TS.WS[,colSums(TS.WS)>=10] ; dim(TS.WS) for(i in 1:length(habtypes)){ w <- which(Hab%in%habtypes[i]) sub <- TS[w,] sub <- sub[,colSums(sub)>=10] FG22 <- AT[which(rownames(AT)%in%colnames(sub)),] sd <- c() for(e in 1:ncol(FG22)){sd <- c(sd,round(sd(FG22[,e]),2))} #Table2[i,] <- paste0(round(colMeans(FG22),2),"(",sd,")") Table2[i,] <- round(colMeans(FG22),2) #for(e in 1:ncol(AT)){median <- c(median(FG22))} } for(j in 1:ncol(FG1)){ AT.CL <- AT[which(rownames(AT)%in%colnames(TS.CL)),] # dim(AT.CL) AT.SF <- AT[which(rownames(AT)%in%colnames(TS.SF)),] # dim(AT.SF) AT.TF <- AT[which(rownames(AT)%in%colnames(TS.TF)),] # dim(AT.TF) AT.WS <- AT[which(rownames(AT)%in%colnames(TS.WS)),] # dim(AT.WS) U <- as.vector(cbind(c(AT.CL[,j],AT.SF[,j],AT.TF[,j],AT.WS[,j]))) V <- as.vector(c(rep("Cloud",nrow(AT.CL)),rep("SF" ,nrow(AT.SF)), rep("TF" ,nrow(AT.TF)),rep("WS" ,nrow(AT.WS)))) Table2[5,j] <- round(anova(lm(U~V))$Pr[1],3) aov <- aov(U~V) THSD <- TukeyHSD(aov) Table2[6:11,j] <- round(THSD$V[,4],3) } Table2 T9 <- as.matrix(Table2[c(1:4),]) T9 <- decostand(T9,"standardize") heatmap(t(T9),Rowv=NA) ############################################################################## #***************************************************************************** ## Fourth corner analysis (Fig. 5) #***************************************************************************** ############################################################################## ## FCA analyses: #--------------- E99 <- E2 ; head(E99) ; dim(E99) E99 <- rda(E2)$CA$u[,1:3] rda(E2)$CA$eig/sum(rda(E2)$CA$eig)*100 cor(E2,E99) #colnames(TS2)[1:5];colnames(TS)[1:5] w <- names(which(colSums(TS)>=10)) ; w w2 <- which(colnames(TS)%in%w) ; w2 E99 <- E99[rownames(E99)%in%rownames(TS2),] TS99 <- TS2[,w2] ; head(TS99[,1:10]) ; dim(TS99) AT99 <- cbind(FG1) ; head(AT99) ; dim(AT99) TS99 <- TS99[,colnames(TS99)%in%rownames(AT99) ] AT99 <- AT99[ rownames(AT99)%in%colnames(TS99),] TS99 <- TS99[,colnames(TS99)%in%rownames(AT99) ] TS99 <- TS99[,sort(colnames(TS99))] AT99 <- AT99[sort(rownames(AT99)),] dim(E99);dim(TS99);dim(AT99) rownames(E99);rownames(TS99) colnames(TS99)[1:20];rownames(AT99)[1:20] fca <- fourthcorner(as.data.frame(E99), as.data.frame(TS99), as.data.frame(AT99), modeltype = 6, p.adjust.method.G = "fdr", p.adjust.method.D = "fdr", nrepet = 4999) FCA1 <- as.data.frame((summary(fca)[,1:7])) head(FCA1);dim(FCA1) FCA.matrix <- as.data.frame(matrix(0,ncol=ncol(AT99),nrow=ncol(E99))) rownames(FCA.matrix) <- colnames(E99) ; colnames(FCA.matrix) <- colnames(AT99) rep1 <- rep(c(1:ncol(E99)),ncol(AT99)) ; rep1 rep2 <- c() ; for(a in 1:ncol(AT99)){ rep2 <- c(rep2,rep(a,ncol(E99))) } rep3 <- as.matrix(cbind(rep1,rep2)) ; rep3 for(i in 1:nrow(rep3)){ FCA.matrix[rep3[i,1],rep3[i,2]] <- round(as.numeric(as.matrix(FCA1)[i,3]),3) } FCA.matrix barplot(as.numeric(FCA.matrix[1,])) for(i in 1:nrow(rep3)){ FCA.matrix[rep3[i,1],rep3[i,2]] <- round(as.numeric(as.matrix(FCA1)[i,3]),3) if(as.numeric(as.matrix(FCA1)[i,7])<=0.05) {FCA.matrix[rep3[i,1],rep3[i,2]]<-paste0(FCA.matrix[rep3[i,1],rep3[i,2]],"*")} if(as.numeric(as.matrix(FCA1)[i,7])<=0.01) {FCA.matrix[rep3[i,1],rep3[i,2]]<-paste0(FCA.matrix[rep3[i,1],rep3[i,2]],"*")} if(as.numeric(as.matrix(FCA1)[i,7])<=0.001){FCA.matrix[rep3[i,1],rep3[i,2]]<-paste0(FCA.matrix[rep3[i,1],rep3[i,2]],"*")} } FCA.matrix ## For each pair of trait and envir variable: FCA.matrix <- as.data.frame(matrix(0,ncol=ncol(AT99),nrow=ncol(E99))) rownames(FCA.matrix) <- colnames(E99) ; colnames(FCA.matrix) <- colnames(AT99) rep1 <- rep(c(1:ncol(E99)),ncol(AT99)) ; rep1 rep2 <- c() ; for(a in 1:ncol(AT99)){ rep2 <- c(rep2,rep(a,ncol(E99))) } rep3 <- as.matrix(cbind(rep1,rep2)) ; rep3 for(i in 1:nrow(rep3)){ FCA.matrix[rep3[i,1],rep3[i,2]] <- round(as.numeric(as.matrix(FCA1)[i,3]),3) } FCA.matrix #------------------------------------------------------------------- ### Considering spatial AND phylogenetic auto-correlations ### in the FCA - Braga et al. (2018) Ecology, 99(12):2667-2674. #------------------------------------------------------------------- ## Use of phylomatic to generate a phylogenetic tree in newick format: #--------------------------------------------------------------------- taxa <- colnames(TS2) tree <- phylomatic(taxa=taxa, get = 'POST',outformat = "newick") #plot(tree, no.margin=TRUE) str(tree) ## Calculate phylogenetic eigenvectors to model phylogenetic structures: #----------------------------------------------------------------------- PE <- me.phylo(x = tree, f = function(x) 1-(x/max(x))) TS4 <- TS2 ; colnames(TS4) <- tolower(colnames(TS4)) TS4 <- TS4[,colnames(TS4)%in%rownames(PE)] TT10 <- TT9[tolower(rownames(TT9))%in%colnames(TS4),] dim(TT10);dim(TS4) #---------------------------------------------------------------------------------- ## Calculate Moran's Eigenvector Maps (MEM) explaining spatial structures in the ## the env. variables, in order to extract connectivities in the best ## spatial weighting matrix which will be used in the Moran Spectral Randomisations ## to account for spatial and phylogenetic autocorrelation #----------------------------------------------------------------------------------- ## Number of Moran Spectral Randomisations used in the following sections: nMSR <- 4999 Y <- E99 ; head(Y) ; dim(Y) C2 <- C C2 <- as.matrix(C2[rownames(C2)%in%rownames(Y), ]) Y <- as.matrix(Y [rownames(Y) %in%rownames(C2),]) C2 <- as.matrix(C2[rownames(C2)%in%rownames(Y), ]) Y <- as.matrix(Y [rownames(Y) %in%rownames(C2),]) dim(Y);dim(C2) ## Selection of best W matrices [see Bauman et al. (2018) Ecology 99(10):2159-2166 :] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - candidates <- listw.candidates(coord = C2, style="B", nb = c("gab"), weights = c("flin","up"), y_fup = 0.5) ## Selection of the best MEM matrix based on "candidates" and Y: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - spatial.effect <- 0 ## restera = 0 si pas d'effet global MEM <- listw.select(Y, candidates, MEM.autocor = "all", method = "FWD", MEM.all = TRUE, nperm = 4999, nperm.global = 4999, alpha = 0.05, p.adjust = TRUE) DN <- 0 if(is.list(MEM)){ if(is.list(MEM$best)){ MEM.find <- 1 MEM.select <- as.matrix(MEM$best$MEM.select) spatial.effect <- 1 DN <- candidates[[MEM$best.id]] if(i==1){DN2 <- DN} } } w1 <- which(MEM$best$summary[,6]<=0.001) MEM.soil <- MEM.select[,w1] MSR.soil <- msr(Y, DN, nrepet = nMSR, method = "singleton") ## Performing the fourth-corner with MSR randomization procedure: #---------------------------------------------------------------- nrepet <- 9999 fca3 <- fca ## correct only for spatial autocorrelation: msr.four <- msr(fca3, listwORorthobasis = DN , nrepet = nrepet) ## correct for both spatial and phylogenetic autocorrelation: msr.four <- msr(fca3, listwORorthobasis = DN , phyloORorthobasis = PE, nrepet = nrepet) summary(msr.four) ## export results fca4 <- msr.four fca4 <- as.data.frame((summary(msr.four)[,1:7])) fca1 <- as.data.frame((summary(fca)[,1:7])) #################################################################################### #*********************************************************************************** ### Imputation of missing trait values in the list of 8345 individuals (Appendix S2) #*********************************************************************************** #################################################################################### traits0 <- as.data.frame(read.table("AppendixS3.txt",h=T)) head(traits0);dim(traits0) colnames(traits0) traits <- traits0[,4:ncol(traits0)] head(traits);dim(traits) #----------------------------------------------- ## Remove outliers for T0 before imputation ## #----------------------------------------------- colnames(traits) i=15;colnames(traits)[i] ; plot(traits[,i],col="blue",pch=16) quantile(traits[,i],probs=c(0,1,5,95,99,100)/100,na.rm=TRUE) outliers <- c(18,24) min1 <- c(-10,-10) max1 <- c(40,50) ## Loop to remove outlier with extreme threshold values stored in Q: #------------------------------------------------------------------- for(i in 1:length(outliers)){ w <- which(traits[,outliers[i]] > max1[i]) traits[w,outliers[i]] <- NA } for(i in 1:length(outliers)){ w <- which(traits[,outliers[i]] < min1[i]) traits[w,outliers[i]] <- NA } ## Verification that the outliers have been removed: #--------------------------------------------------- par(mfrow=c(6,4),mex=0.4) for(i in 15:ncol(traits)){plot(traits[,i],col="blue",pch=16,cex=0.1)} par(mfrow=c(1,1)) #------------------------------------------------------- ## Imputation of missing trait values using BHPMF ## #------------------------------------------------------- N <- nrow(traits) ; N traits <- traits[1:N,] names <- colnames(traits) traits <- cbind(c(1:N),traits) colnames(traits) <- c("ID",names) ## add an ID column head(traits);dim(traits) colnames(traits) ## List of species: #------------------ list.species <- names(table(traits[,2])) ; L <- length(list.species) ; L ## Trait values distributions before transformation: #-------------------------------------------------- traits2 <- traits[1:N,c(16:ncol(traits))] head(traits2);dim(traits2) ## % of missing values for each trait: #------------------------------------- for(i in 1:nrow(NA.ind)){ NA.ind[i,2] <- round((length(which(is.na(traits2[,i])==TRUE))/nrow(traits2))*100,2) } NA.ind plot(as.numeric(NA.ind[,2]),pch=15,ylim=c(0,100)) ## Mean observed traits (M1) and % of missing trait values per species: #---------------------------------------------------------------------- sp1 <- names(table(traits[,2])[table(traits[,2])>0]) ; sp1 sp3 <- sp1 ## if region is not taken into account in imputation M1 <- as.data.frame(matrix(nrow=length(sp3),ncol=ncol(traits2))) rownames(M1) <- sp3 colnames(M1) <- colnames(traits)[16:ncol(traits)] i=1 for(i in 1:nrow(M1)){ subtraits <- traits[traits[,2]%in%sp1[i],] M1[i,] <- colMeans(subtraits[,16:ncol(subtraits)],na.rm=TRUE) } head(M1);dim(M1) na.traits2 <- as.data.frame(matrix(ncol=ncol(traits2),nrow=1)) ; colnames(na.traits2) <- colnames(traits2) for(i in 1:ncol(na.traits2)){ na.traits2[1,i] <- round((length(which(is.na(M1[,i])==TRUE))/nrow(M1))*100,2) } for(i in 1:nrow(NA.ind)){ NA.ind[i,3] <- round((length(which(is.na(M1[,i])==TRUE))/nrow(M1))*100,2) } NA.ind ## Correlations among traits before imputation: #---------------------------------------------------- M2 <- M1 for(e in 1:ncol(M2)) { M2[,e] <- M2[,e] + abs(min(M2[,e],na.rm=TRUE))+0.000000000000000000000000000000001 } for(i in 1:ncol(M2)){ lambda1 <- BoxCox.lambda(M2[,i],method = "loglik", lower = -3, upper = 3) M2[,i] <- BoxCox(M2[,i], lambda=lambda1) } par(mfrow=c(6,4),mex=0.3) for(j in 1:ncol(M2)){hist(M2[,j],col="blue",main = colnames(M2)[j])} par(mfrow=c(1,1)) M2 <- decostand(M2,"standardize",na.rm=TRUE) head(M2);dim(M2) cors <- round(cor(M2,use="complete.obs"),3) #------------------------------------------------------------------------------- ## Create the "Amazon.traits" matrix to be imputed, containing 9033 individuals: ## - the 8345 individuals from the NBA database ## - the 688 species from the FG inventories not present in the NBA database #------------------------------------------------------------------------------- ## FG floristic inventories: FG.species <- as.data.frame(read.table("AppendixS5.txt",h=T,row.names=1)) head(FG.species);dim(FG.species) w <- which(colnames(FG.species)%in%sp1) ; w ## which FG species are in the trait database with 1630 species z <- c(1:ncol(FG.species))[-w] ; z add <- length(z) ## nr of species of FG not present in the imputed trait data Amazon.traits <- as.data.frame(matrix(ncol=ncol(M1)+7,nrow=nrow(traits)+add)) dim(Amazon.traits) colnames(Amazon.traits) <- colnames(traits)[-c(8:15)] #dim(traits[,-c(8:15)]) Amazon.traits[1:8345,] <- traits[,-c(8:15)] Amazon.traits[,2] <- c(as.character(traits[,2]),as.character(colnames(FG.species)[z])) Amazon.traits[,1] <- c(1:nrow(Amazon.traits)) for(i in 1:nrow(traits)){ w <- which(traits[,2]%in%Amazon.traits[i,2]) Amazon.traits[i,3] <- as.character(traits[w[1],3]) Amazon.traits[i,4] <- as.character(traits[w[1],4]) Amazon.traits[i,5] <- as.character(traits[w[1],5]) Amazon.traits[i,6] <- as.character(traits[w[1],6]) Amazon.traits[i,7] <- as.character(traits[w[1],7]) } for(h in 8:ncol(Amazon.traits)) { Amazon.traits[1:8345,h] <- as.numeric(Amazon.traits[1:8345,h]) } head(Amazon.traits);dim(Amazon.traits) Amazon.traits[8340:8350,] #write.table(Amazon.traits,"Amazon.traits2.xls") t6 <- table(Amazon.traits[,2]) ; t6 <- t6[t6>0] length(t6) # nr of species when row-binding the NBA matrix with the 688 FG species # not present in the NBA matrix. ## Remove lines in the NBA data (up to line 8345) containing only NA: rem <- c() k=10 for(k in 1:nrow(traits)){ nrna <- length(which(is.na(Amazon.traits[k,8:ncol(Amazon.traits)])%in%"TRUE")) if(nrna==21){rem<-c(rem,k)} } length(rem) Amazon.traits <- Amazon.traits[-rem,] head(Amazon.traits);dim(Amazon.traits) ## fill taxonomic info for the FG species not present in the 1630 species x traits matrix: taxo <- as.data.frame(read.table("taxo.txt",h=T,row.names=1)) taxo2 <- taxo head(taxo2);dim(taxo2) rownames(taxo2)[980] <- "Palicourea_amapaensis" Amazon.traits[8063,2]<- "Palicourea_amapaensis" for(i in 7613:nrow(Amazon.traits)){ w <- which(rownames(taxo2)%in%Amazon.traits[i,2]) Amazon.traits[i,3] <- as.character(taxo2[w,1]) Amazon.traits[i,4] <- as.character(taxo2[w,2]) Amazon.traits[i,5] <- as.character(taxo2[w,3]) Amazon.traits[i,6] <- as.character(taxo2[w,4]) Amazon.traits[i,7] <- as.character(taxo2[w,5]) } head(Amazon.traits);dim(Amazon.traits) Amazon.traits[7610:7615,] colMeans(Amazon.traits[,8:ncol(Amazon.traits)],na.rm=TRUE) ## Mean observed traits (M1B) and % of missing trait values per species ## in the NBA + FGF matrix of 9033 individuals: #---------------------------------------------------------------------- sp1 <- names(table(Amazon.traits[,2])[table(Amazon.traits[,2])>0]) ; sp1 sp3 <- sp1 ## if region is not taken into account in imputation length(sp3) M1 <- as.data.frame(matrix(nrow=length(sp3),ncol=ncol(traits2))) rownames(M1) <- sp3 colnames(M1) <- colnames(traits)[16:ncol(traits)] i=1 head(Amazon.traits) for(i in 1:nrow(M1)){ subtraits <- Amazon.traits[Amazon.traits[,2]%in%sp1[i],] M1[i,] <- colMeans(subtraits[,8:ncol(subtraits)],na.rm=TRUE) } head(M1);dim(M1) na.traits2 <- as.data.frame(matrix(ncol=ncol(traits2),nrow=1)) ; colnames(na.traits2) <- colnames(traits2) for(i in 1:ncol(na.traits2)){ na.traits2[1,i] <- round((length(which(is.na(M1[,i])==TRUE))/nrow(M1))*100,2) } for(i in 1:nrow(NA.ind)){ NA.ind[i,3] <- round((length(which(is.na(M1[,i])==TRUE))/nrow(M1))*100,2) } NA.ind ## BoxCox Normalisation of traits: #--------------------------------- traits3 <- Amazon.traits[,-c(1:7)] head(traits3);dim(traits3) for(h in 1:ncol(traits3)){ H <- traits3[,h] H[H%in%NaN] <- NA traits3[,h] <- H } absmin <- c() for(t in 1:ncol(traits3)) { absmin <- c(absmin,abs(min(as.numeric(traits3[,t]),na.rm=TRUE))) } absmin for(e in 1:ncol(traits3)) { traits3[,e] <- traits3[,e] + abs(min(traits3[,e],na.rm=TRUE))+0.000000000000000000000000000000001 } traits3[1:5,1:5] colnames(traits3) lambda <- as.data.frame(matrix(ncol=ncol(traits3),nrow=1)) ; colnames(lambda) <- colnames(traits3) for(i in 1:ncol(traits3)){ lambda[1,i] <- BoxCox.lambda(traits3[,i],method = "loglik", lower = -3, upper = 3) traits3[,i] <- BoxCox(traits3[,i], lambda=lambda[1,i]) } lambda ## lambda values stored for back normalisation par(mfrow=c(7,3),mex=0.3) for(j in 1:ncol(traits3)){hist(traits3[,j],col="blue",main = colnames(traits3)[j])} par(mfrow=c(1,1)) ## Verification that the back normalisation is able to retrieve the original values: i=1 ; colnames(traits3)[i] traits2[1:10,i] ## untransformed data traits3[1:10,i] ## normalised data INV <- InvBoxCox(traits3[,i], lambda[1,i], biasadj = FALSE, fvar = NULL) INV[1:10]-absmin[i] ## should be = to untransformed data in traits2 ## Store mean and sd values for back standardisation after imputation: ## (to perform before standardisation!!) #--------------------------------------------------------------------- means.T0 <- colMeans(traits3,na.rm=TRUE) sd.T0 <- c() for(s in 1:ncol(traits3)){sd.T0 <- c(sd.T0,sd(traits3[,s],na.rm=TRUE))} sd.T0 ## Standardisation of traits3 (z-scores) -> traits4: #--------------------------------------------------- head(traits3);dim(traits3) traits4 <- traits3 for(h in 1:ncol(traits3)){ H <- traits4[,h] H[H%in%NaN] <- NA traits4[,h] <- H } traits4 <- decostand(traits4,"standardize",na.rm=TRUE) head(traits4);dim(traits4) ## add artefact column filled with "1" as some few lines have only missing values: traits4 <- cbind(traits4,1) colnames(traits4) <- c(colnames(traits3),"artefact") head(traits4);dim(traits4) ## define hierarchy: #------------------- head(traits) hierarchy <- Amazon.traits[,c(1:7)] head(hierarchy);dim(hierarchy) dim(traits) ## all original trait values for the NBA database (8345 ind.) + taxonomy dim(traits2) ## idem traits but without taxonomy dim(traits3) ## normalised [traits 2 row-binded with the 688 species from FG inventories not present in the NBA database] dim(traits4) ## normalised-standardised traits 3, then artefact column added for imputations dim(hierarchy) #--------------------------------------- ## Imputation of missing values (BHPMF): #--------------------------------------- GapFilling <- GapFilling(as.matrix(traits4), hierarchy,rmse.plot.test.data=TRUE, num.samples=10000, burn=1000, prediction.level = 7, mean.gap.filled.output.path="C:/Users/Jason/Dropbox/DIADEMA Analyses/Data/Arbres/2020(composition+traits)/TRAITS/IMPUTATION/Imputations2/ms5(data_paper)/Jesus_Chris/SN.imputed.T0.txt", std.gap.filled.output.path ="C:/Users/Jason/Dropbox/DIADEMA Analyses/Data/Arbres/2020(composition+traits)/TRAITS/IMPUTATION/Imputations2/ms5(data_paper)/Jesus_Chris/SD.SN.imputed.T0.txt") # Calculate cross validation RMSE, with average RMSE with the default values: out1 <- CalculateCvRmse(as.matrix(traits4), hierarchy) (avg.rmse <- out1$avg.rmse) (std.rmse <- out1$std.rmse) out1 #--------------------------------------------------------- ## Re-import "SN.imputed.T0" in R to back-transform data: #--------------------------------------------------------- SN.imputed.T0 <- as.data.frame(read.table("SN.imputed.T0.txt",h=T)) SN.imputed.T0 <- SN.imputed.T0[,-ncol(SN.imputed.T0)] head(SN.imputed.T0) ; dim(SN.imputed.T0) traits5A <- SN.imputed.T0 round(cor(traits5A),2) SD.imputed.T0 <- as.data.frame(read.table("SD.SN.imputed.T0.txt",h=T)) SD.imputed.T0 <- SD.imputed.T0[,-ncol(SD.imputed.T0)] head(SD.imputed.T0) ; dim(SD.imputed.T0) V <- c() for(i in 1:ncol(SD.imputed.T0)){V<-c(V,as.vector(SD.imputed.T0[,i]))} length(V) mean(V,na.rm=TRUE) quantile(V,probs=c(0.01,0.99)) ## Remove, from SN.imputed.T0, the imputed values with inflated standard deviation: #--------------------------------------------------------------------------------- traits5B <- traits5A head(traits5B);dim(traits5B) for(i in 1:ncol(traits5B)){ for(j in 1:nrow(traits5B)){ if(SD.imputed.T0[j,i]>3){traits5B[j,i]<-"NA"} } } traits5 <- traits5B traits4[1:5,1:5] ## original normalised-standardised values traits5[1:5,1:5] ## imputed normalised-standarsised values T4 <- traits4 ## for imputation uncertainty evaluation (see below) T5 <- traits5 ## for imputation uncertainty evaluation (see below) ## Back standardisation: #----------------------- for(i in 1:ncol(traits5)){ traits5[,i] <- (SN.imputed.T0[,i]*sd.T0[i]) + means.T0[i] } traits3[1:5,1:5] ## original normalised values before standardisation traits5[1:5,1:5] ## imputed normalised back-standardised values ## Back normalisation of imputed traits: #---------------------------------------------------------- #i=14 for(i in 1:ncol(traits5)){ traits5[,i] <- InvBoxCox(traits5[,i], lambda[,i], biasadj = FALSE, fvar = NULL) } na <- as.data.frame(matrix(ncol=ncol(traits5),nrow=1)) ; colnames(na) <- colnames(traits5) for(i in 1:ncol(traits5)){ na[1,i] <- (length(which(is.na(traits5[,i])))/nrow(traits5))*100 } na ## Remove absmin from each traits value: #--------------------------------------- for(i in 1:ncol(traits5)){ traits5[,i] <- traits5[,i] - absmin[i] } traits2[1:5,1:5] ## original values traits5[1:5,1:5] ## imputed back-transformed trait values T22 <- traits2 ## for imputation uncertainty evaluation (see below) ## Replace imputed values by observed ones (when there are observed values) #-------------------------------------------------------------------------- k=1 dim(traits5) TTT <- Amazon.traits[ ,8:ncol(Amazon.traits)] head(TTT);dim(TTT) k=1 for(k in 1:ncol(traits5)){ wF <- which(is.na(TTT[,k])==FALSE) traits5[wF,k] <- TTT[wF,k] } traits2[1:10,1:10] ## original normalised values before standardisation traits5[1:10,1:10] ## imputed normalised back-standardised values replaced by original values when existing colnames(traits5) round(cor(traits5,use="pairwise.complete.obs"),3) cor(traits5[,15],traits5[,16],use="complete.obs") ## Correct extreme values then re-export imputed and back-transformed values: #-------------------------------------------------------------------------------- head(traits5);dim(traits5) traits6 <- cbind(hierarchy,traits5);head(traits6) traits [1:5,1:10] ## original normalised values before standardisation traits6[1:5,1:10] ## imputed normalised back-standardised values replaced by original values when existing colnames(traits6);dim(traits6) par(mfrow=c(5,5),mex=0.4) for(i in 8:28){plot(traits6[,i],col="blue",pch=16,cex=0.1)} par(mfrow=c(1,1)) ## Correct extreme values: # - - - - - - - - - - - - - head(traits) T0.original <- Amazon.traits[1:7612,8:ncol(Amazon.traits)] ## if imputing individuals T0.original <- Amazon.traits[ ,8:ncol(Amazon.traits)] ## if imputing individuals dim(T0.original);dim(traits6) min.T0 <- as.data.frame(matrix(ncol=ncol(T0.original),nrow=1)) colnames(min.T0) <- colnames(T0.original) max.T0 <- min.T0 for(g in 1:ncol(min.T0)){ min.T0[1,g] <- min(T0.original[,g],na.rm=TRUE) } for(g in 1:ncol(max.T0)){ max.T0[1,g] <- max(T0.original[,g],na.rm=TRUE) } corrected.traits6 <- traits6 head(traits6);dim(traits6) for(a in 8:ncol(corrected.traits6)){ w.min <- which(corrected.traits6[,a] < min.T0[1,a-7]) w.max <- which(corrected.traits6[,a] > max.T0[1,a-7]) if(length(w.min)>0){ corrected.traits6[w.min,a] <- min.T0[1,a-7] } if(length(w.max)>0){ corrected.traits6[w.max,a] <- max.T0[1,a-7] } } traits6 <- corrected.traits6 ## Replace few remaining NA values (back-normalisation issue) my mean trait value: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for(i in 8:ncol(traits6)){ w8 <- which(is.na(traits6[,i])) if(length(w8)>0){traits6[w8,i]<-mean(traits6[,i],na.rm=TRUE)} } which(is.na(traits6[,8:ncol(traits6)])==TRUE) T66 <- traits6[,-c(1:7)] head(T66 [1:5,1:5]) ## for imputation uncertainty evaluation (see below) head(traits2[1:5,1:5]) ## original values ## Save the imputed trait-individuals matrix #-------------------------------------------- write.table(traits6,"imputed.backtransformed.traits.xls") #-------------------------------------------------------------------------------- ## Generate species x traits matrices ## #-------------------------------------------------------------------------------- T1 <- as.data.frame(read.table("imputed.backtransformed.traits.xls",h=T)) head(T1);dim(T1) namesp <- names(table(T1[,2])) ; namesp Nrsp <- length(namesp) ; Nrsp species.by.traits <- as.data.frame(matrix(ncol=ncol(T1),nrow=Nrsp)) colnames(species.by.traits) <- colnames(T1) species.by.traits[,2] <- namesp head(species.by.traits);dim(species.by.traits) for(i in 1:Nrsp){ w <- which(T1[,2]%in%namesp[i]) species.by.traits[i,3] <- as.character(T1[w[1],3]) species.by.traits[i,4] <- as.character(T1[w[1],4]) species.by.traits[i,5] <- as.character(T1[w[1],5]) species.by.traits[i,6] <- as.character(T1[w[1],6]) species.by.traits[i,7] <- as.character(T1[w[1],7]) } head(species.by.traits);dim(species.by.traits) species.by.traits[1:20,1:10] for(i in 1:Nrsp){ w <- which(T1[,2]%in%namesp[i]) for(k in 8:ncol(species.by.traits)){ species.by.traits[i,k] <- as.numeric(mean(T1[w,k],na.rm=TRUE)) } } head(species.by.traits);dim(species.by.traits) M9 <- species.by.traits[,-c(1)] head(M9);dim(M9) #write.table(M9,"species.by.traits.xls") #################################################################################### ## Estimations of imputation precision : #################################################################################### #------------------------------------------------------------ ## Imputed species x trait matrix #------------------------------------------------------------ Z1 <- as.data.frame(read.table("species.by.traits.xls",h=T)) head(Z1);dim(Z1) ## Normalise & Standardise Z1: Z2 <- Z1[,-c(1:6)] rownames(Z2) <- Z1[,1] head(Z2);dim(Z2) lambda <- as.data.frame(matrix(ncol=ncol(Z2),nrow=1)) ; colnames(lambda) <- colnames(Z2) for(i in 1:ncol(Z2)){ lambda[1,i] <- BoxCox.lambda(Z2[,i]+abs(min(Z2[,i],na.rm=TRUE))+0.00000000000000000000001, method = "loglik", lower = -2, upper = 2) Z2[,i] <- BoxCox(Z2[,i], lambda=lambda[1,i]) } lambda ## lambda values stored for back normalisation par(mfrow=c(7,9),mex=0.3) for(j in 1:ncol(Z2)){hist(Z2[,j],col="blue",main = colnames(Z2)[j])} par(mfrow=c(1,1)) for(h in 1:ncol(Z2)){ H <- Z2[,h] ; H[H%in%NaN] <- NA ; Z2[,h] <- H } Z2 <- decostand(Z2,"standardize",na.rm=TRUE) head(Z2);dim(Z2) cors <- round(cor(Z2),3) ; cors #write.table(cors,"cors.xls") #--------------------------------------------------------- ## Original traits measurements for the same species => M2 #--------------------------------------------------------- T2 <- Amazon.traits head(T2);dim(T2) namesp <- names(table(T2[,2])) ; namesp Nrsp <- length(namesp) ; Nrsp species.by.traits.b <- as.data.frame(matrix(ncol=ncol(T2),nrow=Nrsp)) colnames(species.by.traits.b) <- colnames(T2) species.by.traits.b[,2] <- namesp head(species.by.traits.b);dim(species.by.traits.b) dim(T2) i=1 for(i in 1:Nrsp){ w <- which(T2[,2]%in%namesp[i]) species.by.traits.b[i,3] <- as.character(T2[w[1],3]) species.by.traits.b[i,4] <- as.character(T2[w[1],4]) species.by.traits.b[i,5] <- as.character(T2[w[1],5]) species.by.traits.b[i,6] <- as.character(T2[w[1],6]) species.by.traits.b[i,7] <- as.character(T2[w[1],7]) } head(species.by.traits.b);dim(species.by.traits.b) for(i in 1:Nrsp){ w <- which(T2[,2]%in%namesp[i]) for(k in 8:ncol(species.by.traits.b)){ species.by.traits.b[i,k] <- as.numeric(mean(T2[w,k],na.rm=TRUE)) } } head(species.by.traits.b);dim(species.by.traits.b) M10 <- as.data.frame(species.by.traits.b)[,-c(1)] head(M10);dim(M10) ## Normalise & Standardise M1: M2 <- M10[,-c(1:6)] rownames(M2) <- M10[,1] lambda <- as.data.frame(matrix(ncol=ncol(M2),nrow=1)) ; colnames(lambda) <- colnames(M2) for(i in 1:ncol(M2)){ lambda[1,i] <- BoxCox.lambda(M2[,i]+abs(min(M2[,i],na.rm=TRUE))+0.00000000000000000000001, method = "loglik", lower = -2, upper = 2) M2[,i] <- BoxCox(M2[,i], lambda=lambda[1,i]) } head(M2);dim(M2) lambda ## lambda values stored for back normalisation par(mfrow=c(6,4),mex=0.3) for(j in 1:ncol(M2)){hist(M2[,j],col="blue",main = colnames(M2)[j])} par(mfrow=c(1,1)) for(h in 1:ncol(M2)){ H <- M2[,h] ; H[H%in%NaN] <- NA ; M2[,h] <- H } M2 <- decostand(M2,"standardize",na.rm=TRUE) head(M2);dim(M2) rownames(Z2)[711] rownames(M2)[711] Z2[101,] M2[101,] #------------------------------------------------------------ ## Select traits => new matrices Z3 and M3: #------------------------------------------------------------ head(Z2);dim(Z2) ## imputed normalised-standardised traits head(M2);dim(M2) ## original normalised-standardised traits colnames(Z2) #select.traits <- c(1:3,5,7:11,15,21,22,26:27,29:30,34:36,39,42:44,53:57) #Z3 <- Z2[,select.traits] M3 <- M2[,colnames(M2)%in%colnames(Z2)] Z3 <- Z2 head(Z3);dim(Z3) head(M3);dim(M3) ## If using only the species present in FG: ## FG floristic inventories: #------------------------------------------ FG.species <- as.data.frame(read.table("AppendixS5.txt",h=T,row.names=1)) head(FG.species);dim(FG.species) w <- which(rownames(Z2)%in%colnames(FG.species)) ; length(w) Z3 <- Z2[w,] M3 <- M2[w,] head(Z3);dim(Z3) head(M3);dim(M3) ## correlation among traits at the species level before and after imputations: #----------------------------------------------------------------------------- cors <- round(cor(M3,use="complete.obs"),3) ; cors write.table(cors,"corsbefore.xls") cors <- round(cor(Z3,use="complete.obs"),3) ; cors write.table(cors,"corsafter.xls") #------------------------------------------------------------ ## Estimations of imputation precision via distance matrices: #------------------------------------------------------------ DZ2 <- as.vector(dist(t(Z3))) DM2 <- as.vector(dist(t(M3))) length(DZ2) length(DM2) V <- as.data.frame(cbind(DZ2,DM2)) plot(DZ2~DM2,data=V,col="grey40",pch=16,ylab="Imputed",xlab="Observed") cor.test(V$DZ2,V$DM2) library(rethinking) bm1 <- quap( alist( DZ2 ~ dnorm(mu, sigma), mu ~ a + b1*DM2 , a ~ dnorm(0,1),b1~dlnorm(0,1),sigma ~ dunif (0,50) ), data = V ) #precis(bm1) weight.seq <- seq( from=min(DZ2) , to=max(DZ2) , length.out=30 ) pred_dat <- list( DM2=weight.seq ) mu <- link( bm1 , data=pred_dat ) ; mu.mean <- apply( mu , 2 , mean ) mu.PI <- apply( mu , 2 , PI , prob=0.89 ) ; sim.height <- sim( bm1 , data=pred_dat ) height.PI <- apply( sim.height , 2 , PI , prob=0.90 ) lines( weight.seq , mu.mean ) ; shade( mu.PI , weight.seq ) ; shade( height.PI , weight.seq ) ## Correlation between traits: #----------------------------------- head(Z3);dim(Z3) ## imputed normalised-standardised traits head(M3);dim(M3) ## original normalised-standardised traits #------------------------------------------------------------------------------------- # Test correlation btw predicted vs. observed values for each trait, at species level: #------------------------------------------------------------------------------------- U <- as.data.frame(matrix(nrow=ncol(Z3),ncol=1)) rownames(U) <- colnames(Z3) head(U) ; dim(U) i=5 a=0 for(i in 1:ncol(Z3)){ a <- a+1 w9 <- which(is.na(M3[,i])==FALSE) #U[a,1] <- cor(Z3[w9,i],M3[w9,i]) U[i,1] <- cor(Z3[,i],M3[,i],use="complete.obs") } U ; min(U,na.rm=TRUE) plot(as.numeric(U[,1]),pch=15,ylim=c(-0.15,1),ylab="r - Pearson",xlab="");abline(h=0,lty=2) axis(1, at=1:ncol(traits2),labels=colnames(traits2), las = 2) #write.table(U,"U.xls") #**************************************************************************** ## Test correlation between trait data dissimilarity obtained (i) within ## and (ii) outside the plots. #**************************************************************************** out <- as.data.frame(read.table("AppendixS3.txt",h=T)) head(out);dim(out) colnames(out) table(out$Region) out.FG <- out[out$Region%in%"FG",] head(out.FG);dim(out.FG) out.others <- out[out$Region!="FG",] head(out.others);dim(out.others) out.FG2 <- out.FG [out.FG$Species %in%out.others$Species,] ; head(out.FG2) ; dim(out.FG2) out.others2 <- out.others [out.others$Species %in%out.FG2$Species,] ; head(out.others2) ; dim(out.others2) out.FG2 <- out.FG2 [out.FG2$Species %in%out.others2$Species,] ; head(out.FG2) ; dim(out.FG2) out.others2 <- out.others2[out.others$Species%in%out.FG2$Species,] ; head(out.others2) ; dim(out.others2) colnames(out) sel <- c(a:b) ## select traits FG <- out.FG2 [,sel] ; sp.FG <- as.character(out.FG2$Species) OT <- out.others2[,sel] ; sp.OT <- as.character(out.others2$Species) head(FG);dim(FG) head(OT);dim(OT) length(sp.FG);length(sp.OT) sp.FG2 <- names(table(sp.FG)[table(sp.FG)>0]) sp.OT2 <- names(table(sp.OT)[table(sp.OT)>0]) sp.FG3 <- sp.FG2[which(sp.FG2%in%sp.OT2)] ; sp.FG3 sp.OT3 <- sp.OT2[which(sp.OT2%in%sp.FG3)] ; sp.OT3 length(sp.FG3);length(sp.OT3) sp.FG3[1];sp.OT3[1] FG2 <- as.data.frame(matrix(ncol=ncol(FG),nrow=length(sp.FG3))) colnames(FG2) <- colnames(FG) rownames(FG2) <- sp.FG3 OT2 <- FG2 head(FG2);dim(FG2) head(OT2);dim(OT2) ## Median of values for each species and each trait: for(i in 1:nrow(FG2)){ for(j in 1:ncol(FG2)){ w <- which(sp.FG%in%rownames(FG2)[i]) FG2[i,j] <- median(FG[w,j],na.rm=TRUE) w2 <- which(sp.OT%in%rownames(OT2)[i]) OT2[i,j] <- median(OT[w2,j],na.rm=TRUE) } } head(FG2);dim(FG2) head(OT2);dim(OT2) ## => 138 species in common between FG and outside ## => 34 species in common between FG and outside, with at least 3 individuals ## Number values for each species and each trait: #------------------------------------------------ FG3 <- as.data.frame(matrix(ncol=ncol(FG),nrow=length(sp.FG3))) colnames(FG3) <- colnames(FG) rownames(FG3) <- sp.FG3 OT3 <- FG3 for(i in 1:nrow(FG3)){ for(j in 1:ncol(FG3)){ w <- which(sp.FG%in%rownames(FG3)[i]) L1 <- length(which(is.na(FG[w,j])%in%FALSE)) if(L1==0){FG3[i,j] <- 0} if(L1 >0){ FG3[i,j] <- length(which(is.na(FG[w,j])%in%FALSE)) } w2 <- which(sp.OT%in%rownames(OT3)[i]) L2 <- length(which(is.na(OT[w,j])%in%FALSE)) if(L1==0){OT3[i,j] <- 0} if(L1 >0){ OT3[i,j] <- length(which(is.na(OT[w,j])%in%FALSE)) } } } head(FG3);dim(FG3) head(OT3);dim(OT3) ## normalise-standardise FG2: #---------------------------- lambda <- as.data.frame(matrix(ncol=ncol(FG2),nrow=1)) ; colnames(lambda) <- colnames(FG2) for(i in 1:ncol(FG2)){ lambda[1,i] <- BoxCox.lambda(FG2[,i]+abs(min(FG2[,i],na.rm=TRUE))+0.00000000000000000000001, method = "loglik", lower = -2, upper = 2) FG2[,i] <- BoxCox(FG2[,i], lambda=lambda[1,i]) } #par(mfrow=c(5,5),mex=0.3) #for(j in 1:ncol(E2)){hist(E2[,j],col="red",main = colnames(E2)[j])} #par(mfrow=c(1,1)) ## Standardising variables: FG2 <- decostand(FG2,"standardize",na.rm=TRUE) ## normalise-standardise OT2: #---------------------------- lambda <- as.data.frame(matrix(ncol=ncol(OT2),nrow=1)) ; colnames(lambda) <- colnames(OT2) for(i in w4){ lambda[1,i] <- BoxCox.lambda(OT2[,i]+abs(min(OT2[,i],na.rm=TRUE))+0.000000000000000000001, method = "loglik", lower = -2, upper = 2) OT2[,i] <- BoxCox(OT2[,i], lambda=lambda[1,i]) } #par(mfrow=c(5,5),mex=0.3) #for(j in 1:ncol(E2)){hist(E2[,j],col="red",main = colnames(E2)[j])} #par(mfrow=c(1,1)) ## Standardising variables: OT2 <- decostand(OT2,"standardize",na.rm=TRUE) FG2 <- FG2[,w4] OT2 <- OT2[,w4] head(FG2);dim(FG2) head(OT2);dim(OT2) a<-c() for(h in 1:ncol(FG2)){ is.na.fg <- length(is.na(FG2[,h])[is.na(FG2[,h])%in%"TRUE"])/nrow(FG2)*100 is.na.ot <- length(is.na(OT2[,h])[is.na(OT2[,h])%in%"TRUE"])/nrow(OT2)*100 if(is.na.fg < 100 & is.na.ot < 100){a<-c(a,h)} } a ## traits with % of missing values < 100 #a<-1:ncol(FG2) D1 <- as.dist(dist(FG2[,a],"euclidean")) D2 <- as.dist(dist(OT2[,a],"euclidean")) D1 <- (dist(FG2[,a],"euclidean")) D2 <- (dist(OT2[,a],"euclidean")) D1 <- as.vector(D1) D2 <- as.vector(D2) ## normalise-standardise FG2: #---------------------------- lamb <- BoxCox.lambda(D1+abs(min(D1,na.rm=TRUE))+0.00000000000000000000001, method = "loglik", lower = -2, upper = 2) D1 <- BoxCox(D1, lambda=lamb) D1 <- decostand(D1,"standardize",na.rm=TRUE) ## normalise-standardise OT2: #---------------------------- lamb <- BoxCox.lambda(D2+abs(min(D2,na.rm=TRUE))+0.000000000000000000001, method = "loglik", lower = -2, upper = 2) D2 <- BoxCox(D2, lambda=lamb) D2 <- decostand(D2,"standardize",na.rm=TRUE) mean(D1,na.rm=TRUE) mean(D2,na.rm=TRUE) plot(D1,D2,pch=16,col="grey40",cex=1.5, xlab="trait dissimilarity (Fr. Guiana species)", ylab="trait dissimilarity (outside of Fr. Guiana)") abline(lm(D2~D1),col="red",lwd=2) cor.test(D2,D1) FG3 <- t(FG2[1:2,]) ; FG3[,] <- 999 colnames(FG3) <- c("corr","p") i=1 for(i in 1:nrow(FG3)){ is.na.fg <- length(is.na(FG2[,i])[is.na(FG2[,i])%in%"TRUE"])/nrow(FG2)*100 is.na.ot <- length(is.na(OT2[,i])[is.na(OT2[,i])%in%"TRUE"])/nrow(OT2)*100 if(is.na.fg < 90 & is.na.ot < 90){ D1 <- as.dist(dist(FG2[,i],"euclidean")) D2 <- as.dist(dist(OT2[,i],"euclidean")) cortest <- cor.test(log(D2+1000),log(D1+1000)) FG3[i,1] <- round(cortest$estimate,3) FG3[i,2] <- round(cortest$p.value, 3) } } FG3