--- title: "BRT full code KTS" author: "KTS" date: "17 July 2019" output: html_document --- ```{r libraries} library(tidyverse) library(ggfortify) library(broom) library(effects) library(emmeans) library(ggplot2) library(car) library(devtools) library(gridExtra) library(gbm) library(dismo) ``` ```{r} # Clear plots if(!is.null(dev.list())) dev.off() # Clear console cat("\014") # Clean workspace rm(list=ls()) ``` you will need to change object names to fit the species you want to explore this example is done with labrid density response = labrid.den dataset = tx.crest distribution = poisson ```{r read tx summ data} #call data -> Tx.dat<-read_csv('file_path/Transect Summ data.csv', trim_ws=TRUE) Tx.dat <- Tx.dat %>% mutate(Status=factor(Status), Level = factor(Level), Tx = factor(Tx), Island = factor(Island), Location = factor(Location), Transect_ID= factor(Transect_ID), sqrtFragC = sqrt(Frag_C), sqrtP_Rub = sqrt(P_Rub), sqrtP_Sand = sqrt(P_Sand), sqrtP_SC = sqrt(P_SC), sqrt.lab.bio = sqrt(labrid.bio), sqrt.lab.juv = sqrt(lab.juv.den)) head(Tx.dat) levels(Tx.dat$Level) tx.crest <- Tx.dat %>% filter(Level == 'C') #C = crest tx.slope <- Tx.dat %>% filter(Level == 'S') #S = slope ``` #Select Radius for later BRT analysis this is to select radius and also choose from highly collinear variables where x = whatever fish response variable you want dist = distribution of response variable dat = data set made them as functions so can just call these for each response variable you want later. ```{r Macroaglae radius } MA.radius <- function(x, dist, dat) { radius = gbm(x ~ P.MA250 + P.MA500 + P.MA1000, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(radius, method='cv') summary(radius, n.trees=best.iter) } ``` ```{r mangrove radius function} MG.radius <- function(x, dist, dat) { radius = gbm(x ~ P.MG250 + P.MG500 + P.MG1000, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(radius, method='cv') summary(radius, n.trees=best.iter) } ``` ```{r Coral Reef radius function} CR.radius <- function(x, dist, dat) { radius = gbm(x ~ P.CR250 + P.CR500 + P.CR1000, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(radius, method='cv') summary(radius, n.trees=best.iter) } ``` ```{r Seagrass radius function} SG.radius <- function(x, dist, dat) { radius = gbm(x ~ P.SG250 + P.SG500 + P.SG1000, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(radius, method='cv') summary(radius, n.trees=best.iter) } ``` ```{r Sand radius function} Sand.radius <- function(x, dist, dat) { radius = gbm(x ~ P.Sand250 + P.Sand500 + P.Sand1000, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(radius, method='cv') summary(radius, n.trees=best.iter) } ``` ```{r Reef Flat radius function} RF.radius <- function(x, dist, dat) { radius = gbm(x ~ P.RF250 + P.RF500 + P.RF1000, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(radius, method='cv') summary(radius, n.trees=best.iter) } ``` ```{r Coral Type Variable function} #this is for benthic transect variables that are highly collinear Coral.type <- function(x, dist, dat) { coral.type = gbm(x ~ P_HC + Frag_C + Robust_C, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(coral.type, method='cv') summary(coral.type, n.trees=best.iter) } ``` ```{r Rubble vs EAM Variable function} #for benthic transect variables which are highly collinear Rub.vs.EAM <- function(x, dist, dat) { rub.vs.eam = gbm(x ~ P_Rub + P_EAM, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(rub.vs.eam, method='cv') summary(rub.vs.eam, n.trees=best.iter) } ``` ```{r Dist.SG vs P.MA.250} #for spatial variables that are highly collinear Dist.sg.vs.P.MA250 <- function(x, dist, dat) { sg.vs.ma = gbm(x ~ Dist_SG + P.MA250, data=dat, distribution = dist, n.trees = 5000, interaction.depth = 3, shrinkage = 0.001, train.fract = 1, bag.fraction = 0.8, cv.folds = 3) best.iter=gbm.perf(sg.vs.ma, method='cv') summary(sg.vs.ma, n.trees=best.iter) } ``` ```{r selecting data } species = tx.crest$labrid.den #Can call whatever dataset and then species you want choose.dist = 'poisson' # distrubtion dataset = tx.crest # data set ``` ```{r run variable selection functions} MA.radius(x= species, dist= choose.dist, dat = dataset) MG.radius(x= species, dist= choose.dist, dat = dataset) CR.radius(x= species, dist= choose.dist, dat = dataset) SG.radius(x= species, dist= choose.dist, dat = dataset) Sand.radius(x= species, dist= choose.dist, dat = dataset) RF.radius(x= species, dist= choose.dist, dat = dataset) Coral.type(x= species, dist= choose.dist, dat = dataset) Rub.vs.EAM(x= species, dist= choose.dist, dat = dataset) Dist.sg.vs.P.MA250(x= species, dist= choose.dist, dat = dataset) ``` This selects the variables to use in the gbm - removes highly collinear variables ---------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------------------------------------- #Run one BRT to test parameters with pared selection of variables decided from above code need to pick from collinear selection from above: 1. Coral TYpe 2. RADIUS TYPES ```{r brt test parameters } #choose variables preselected above dataset "species" species lab.den.crest.pared <-gbm.step(data=tx.crest %>% as.data.frame, gbm.x = c("P_HC", "P_Sand", "P_MA", "P_EAM", "P_SC", "Depth", "SC", "Status", "NTMR_Ha", "Dist_Shore", "Dist_SG", "Dist_MA", "Dist_MG", "P.MA500", "P.MG500", "P.RF250", "P.SG1000", "P.Sand250"), #this is lest of predictor variables selected gbm.y='labrid.den', tree.complexity=3, learning.rate = 0.001, bag.fraction = 0.75, family='poisson') summary(lab.den.crest.pared) ``` ```{r deviance explained and cv dev species pared} (lab.den.crest.pared.dev<-(lab.den.crest.pared$self.statistics$mean.null- lab.den.crest.pared$self.statistics$mean.resid)/lab.den.crest.pared$self.statistics$mean.null) (lab.den.crest.pared.dev<-(lab.den.crest.pared$self.statistics$mean.null- lab.den.crest.pared$cv.statistics$deviance.mean)/lab.den.crest.pared$self.statistics$mean.null) ``` simplify process to reduce number of variables based on reduction in CV deviance ```{r gmb simplify species from pared test} lab.den.crest.pared.simp <- gbm.simplify(lab.den.crest.pared, n.folds=10, n.drops='8', alpha=1, plot=TRUE) ``` ```{r gbm from simplify species } lab.den.crest.pared.simp.1 <-gbm.step(data=tx.crest %>% as.data.frame, gbm.x = lab.den.crest.pared.simp$pred.list[[8]], gbm.y='labrid.den', tree.complexity=3, learning.rate = 0.001, bag.fraction = 0.75, family='poisson') summary(lab.den.crest.pared.simp.1) ``` ```{r deviance explained and cv dev of simplifies model} (lab.den.crest.pared.simp1.dev<-(lab.den.crest.pared.simp.1$self.statistics$mean.null- lab.den.crest.pared.simp.1$self.statistics$mean.resid)/lab.den.crest.pared.simp.1$self.statistics$mean.null) (lab.den.crest.pared.simp.dev<-(lab.den.crest.pared.simp.1$self.statistics$mean.null- lab.den.crest.pared.simp.1$cv.statistics$deviance.mean)/lab.den.crest.pared.simp.1$self.statistics$mean.null) ``` #BOOTSTRAP species DENSITY note - if you dont get grouped by values for variables its because plyr was loaded after dplyr - need to do detach(package:plyr) then run again - for relative influence - you will get NA for the categories LEVEL and Status - thats okay - see notes in the code. -------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------------------- model.bootstrap <- function(boot.num, species, Coral.X, P.CR.X, P.MA.X, P.MG.X, P.RF.X, P.SG.X, P.Sand.X, choose.dist) this is the bootstrap code for ONE species response varialbe. If you want to run the bootstrap code in a loop to do multiple response variables at once, use the BRT_BOOTSTRAP_final code ```{r bootstrap BRT and associated output data put species in for the output files} set.seed(123) rel.imp = list() mod=list() trees = list() dev.exp = list() Coral = list() #These need to match the variables you include in the gbm below P_Rub = list() P_Sand = list() P_MA = list() P_EAM = list() P_SC = list() Depth = list() SC = list() Level = list() Status = list() NTMR_Ha = list() Dist_Shore = list() Dist_SG = list() Dist_MA = list() Dist_MG = list() P.CR = list() P.MA = list() P.MG = list() P.RF = list() P.SG = list() P.Sand = list() #testing with 2 bootstraps to make sure everything runs, bump it up to 100 when its clean for (i in 1:2) { n = sample(1:nrow(tx.crest), size = nrow(tx.crest), replace=TRUE) dat = tx.crest[n,] model.gbm = gbm.step(data=dat %>% as.data.frame, gbm.x = c("P_HC", "P_Rub", "P_Sand", "P_MA", "P_EAM", "P_SC", "Depth", "SC", "Level", "Status", "NTMR_Ha", "Dist_Shore", "Dist_SG", "Dist_MA", "Dist_MG", "P.CR1000", "P.MA500", "P.MG1000", "P.RF250", "P.SG250", "P.Sand250"), gbm.y=c("labrid.den"), tree.complexity=3, learning.rate = 0.001, bag.fraction = 0.75, family='poisson') #model output mod[[i]] = model.gbm #number of trees for each run trees[[i]] = data.frame(model.gbm$n.trees) %>% mutate(Iter = i) #need to add data frame so it makes it possible to add columns instead of keeping as matrix #deviance explained for each run dev.exp[[i]] = data.frame((model.gbm$self.statistics$mean.null - model.gbm$self.statistics$mean.resid)/model.gbm$self.statistics$mean.null) %>% mutate(Iter=i) #rel.imp for all model runs rel.imp[[i]] = data.frame(summary(model.gbm)) %>% mutate(Iter=i) #Ind.VARIABLES so you can plot - this is the predicted values of coral etc and the predicted y (fish den/bio) values - does 100 values. #the mutate adds a row number to each of the 100 predicted values for each run - so you can group by later Coral[[i]] = plot.gbm(model.gbm, i.var = 1, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P_Rub[[i]] = plot.gbm(model.gbm, i.var = 2, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P_Sand[[i]] = plot.gbm(model.gbm, i.var = 3, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P_MA[[i]] = plot.gbm(model.gbm, i.var = 4, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P_EAM[[i]] = plot.gbm(model.gbm, i.var = 5, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P_SC[[i]] = plot.gbm(model.gbm, i.var = 6, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) Depth[[i]] = plot.gbm(model.gbm, i.var = 7, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) SC[[i]] = plot.gbm(model.gbm, i.var = 8, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) Level[[i]] = plot.gbm(model.gbm, i.var = 9, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) Status[[i]] = plot.gbm(model.gbm, i.var = 10, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) NTMR_Ha[[i]] = plot.gbm(model.gbm, i.var = 11, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) Dist_Shore[[i]] = plot.gbm(model.gbm, i.var = 12, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) Dist_SG[[i]] = plot.gbm(model.gbm, i.var = 13, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) Dist_MA[[i]] = plot.gbm(model.gbm, i.var = 14, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) Dist_MG[[i]] = plot.gbm(model.gbm, i.var = 15, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P.CR[[i]] = plot.gbm(model.gbm, i.var = 16, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P.MA[[i]] = plot.gbm(model.gbm, i.var = 17, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P.MG[[i]] = plot.gbm(model.gbm, i.var = 18, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P.RF[[i]] = plot.gbm(model.gbm, i.var = 19, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P.SG[[i]] = plot.gbm(model.gbm, i.var = 20, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) P.Sand[[i]] = plot.gbm(model.gbm, i.var = 21, return.grid=TRUE) %>% mutate(Num = row_number()) %>% mutate(Iter=i) } #----------------------------------------------------------------- #----------------------------------------------------------------- #data wrangling for bootstrap species.mod = mod #Relative importance means for each variables plus their 95% CI #combines all tibbles into one dataframe rel.imp=do.call('rbind', rel.imp) #groups by variable, the calculates Mean, and 95% CI rel.imp.sum <- rel.imp %>% group_by(var) %>% summarize(Mean=mean(rel.inf), lower=quantile(rel.inf, p=0.025), upper=quantile(rel.inf, p=0.975)) #records the numbers of trees for each model run. species.trees = do.call('rbind', trees) species.trees.avg = species.trees %>% summarize(Mean=mean(model.gbm.n.trees), lower=quantile(model.gbm.n.trees, p=0.025), upper=quantile(model.gbm.n.trees, p=0.975)) #records the deviance explained for each run, and renames the column to something sensible, then calculate the mean and 95% CI for the deviance explained dev.exp = do.call('rbind', dev.exp) %>% rename(Dev.Exp = X.model.gbm.self.statistics.mean.null...model.gbm.self.statistics.mean.resid..model.gbm.self.statistics.mean.null) species.dev.exp.sum = dev.exp %>% summarize(Mean=mean(Dev.Exp), lower=quantile(Dev.Exp, p=0.025), upper=quantile(Dev.Exp, p=0.975)) #PLots where values of each variables and the response - with their CI #need to group by num - because want the average predicted response value per each predictor var value. But for each run it picks a different set of 100 values to predict from #So i got around this by using num (because the values are close enough together) to give me an average across all the runs #this will give you the AVERAGE to plot. The coral - will have all 100 runs so you can do the plot with all the run in lighter lines species.Coral.r=do.call('rbind', Coral) # DONT need to rename coral type #bc diff coral types used in mode. need to rename the column to coral from the first [1] variables in the mode %>% then can summarize, and renames based on whats used in model species.Coral.sum = species.Coral.r %>% group_by(Num) %>% rename('Coral' = model.gbm$var.names[1]) %>% summarize(Mean.y=mean(y), Mean.x=mean(Coral), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor(model.gbm$var.names[1])) %>% rename('Pred.Var' = 'Num') species.P_Rub.r=do.call('rbind', P_Rub) species.P_Rub.sum = species.P_Rub.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(P_Rub), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('P_Rub')) %>% rename('Pred.Var' = 'Num') species.P_Sand.r=do.call('rbind', P_Sand) species.P_Sand.sum = species.P_Sand.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(P_Sand), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('P_Sand')) %>% rename('Pred.Var' = 'Num') species.P_MA.r=do.call('rbind', P_MA) species.P_MA.sum = species.P_MA.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(P_MA), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('P_MA')) %>% rename('Pred.Var' = 'Num') species.P_EAM.r=do.call('rbind', P_EAM) species.P_EAM.sum = species.P_EAM.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(P_EAM), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('P_EAM')) %>% rename('Pred.Var' = 'Num') species.P_SC.r=do.call('rbind', P_SC) species.P_SC.sum = species.P_SC.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(P_SC), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('P_SC')) %>% rename('Pred.Var' = 'Num') species.Depth.r=do.call('rbind', Depth) species.Depth.sum = species.Depth.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(Depth), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('Depth')) %>% rename('Pred.Var' = 'Num') species.SC.r=do.call('rbind', SC) species.SC.sum = species.SC.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(SC), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('P_SC')) %>% rename('Pred.Var' = 'Num') #NOTE THESE ARE DIFFERENT because they are two categories rather than continuous variables species.Level.r=do.call('rbind', Level) # will get NA for mean.x bc c categories Level.1 = species.Level.r %>% group_by(Level) %>% summarize(Mean.y=mean(y), Mean.x=mean(Level), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) #copies levels (slope and crest) for the level column into the mean.x column so it will fit the order of all the other continuous data Level.1$Mean.x = Level.1$Level #reassignes the column name changing it from level to Pred.Var. and then changes the contents of the Pred Var col so all the rows say LEVEL species.Level.sum = Level.1 %>% rename('Pred.Var' = 'Level') %>% mutate('Pred.Var'= 'Level') # species.Status.r=do.call('rbind', Status) Status.1 = species.Status.r %>% group_by(Status) %>% summarize(Mean.y=mean(y), Mean.x=mean(Status), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) Status.1$Mean.x = Status.1$Status species.Status.sum = Status.1 %>% rename('Pred.Var' = 'Status') %>% mutate('Pred.Var'= 'Status') species.NTMR_Ha.r=do.call('rbind', NTMR_Ha) species.NTMR_Ha.sum = species.NTMR_Ha.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(NTMR_Ha), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('NTMR_Ha')) %>% rename('Pred.Var' = 'Num') species.Dist_Shore.r=do.call('rbind', Dist_Shore) species.Dist_Shore.sum = species.Dist_Shore.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(Dist_Shore), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('Dist_Shore')) %>% rename('Pred.Var' = 'Num') species.Dist_SG.r=do.call('rbind', Dist_SG) species.Dist_SG.sum = species.Dist_SG.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(Dist_SG), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('Dist_SG')) %>% rename('Pred.Var' = 'Num') species.Dist_MA.r=do.call('rbind', Dist_MA) species.Dist_MA.sum = species.Dist_MA.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(Dist_MA), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('Dist_MA')) %>% rename('Pred.Var' = 'Num') species.Dist_MG.r=do.call('rbind', Dist_MG) species.Dist_MG.sum = species.Dist_MG.r %>% group_by(Num) %>% summarize(Mean.y=mean(y), Mean.x=mean(Dist_MG), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor('Dist_MG')) %>% rename('Pred.Var' = 'Num') #RADIUS VARIABLES species.P.CR.r=do.call('rbind', P.CR) species.P.CR.sum = species.P.CR.r %>% group_by(Num) %>% rename('P.CR' = model.gbm$var.names[16]) %>% summarize(Mean.y=mean(y), Mean.x=mean(P.CR), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor(model.gbm$var.names[16])) %>% rename('Pred.Var' = 'Num') species.P.MA.r=do.call('rbind', P.MA) species.P.MA.sum = species.P.MA.r %>% group_by(Num) %>% rename('P.MA' = model.gbm$var.names[17]) %>% summarize(Mean.y=mean(y), Mean.x=mean(P.MA), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor(model.gbm$var.names[17])) %>% rename('Pred.Var' = 'Num') species.P.MG.r=do.call('rbind', P.MG) species.P.MG.sum = species.P.MG.r %>% group_by(Num) %>% rename('P.MG' = model.gbm$var.names[18]) %>% summarize(Mean.y=mean(y), Mean.x=mean(P.MG), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor(model.gbm$var.names[18])) %>% rename('Pred.Var' = 'Num') species.P.RF.r=do.call('rbind', P.RF) species.P.RF.sum = species.P.RF.r %>% group_by(Num) %>% rename('P.RF' = model.gbm$var.names[19]) %>% summarize(Mean.y=mean(y), Mean.x=mean(P.RF), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor(model.gbm$var.names[19])) %>% rename('Pred.Var' = 'Num') species.P.SG.r=do.call('rbind', P.SG) species.P.SG.sum = species.P.SG.r %>% group_by(Num) %>% rename('P.SG' = model.gbm$var.names[20]) %>% summarize(Mean.y=mean(y), Mean.x=mean(P.SG), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor(model.gbm$var.names[20])) %>% rename('Pred.Var' = 'Num') species.P.Sand.r=do.call('rbind', P.Sand) species.P.Sand.sum = species.P.Sand.r %>% group_by(Num) %>% rename('P.Sand' = model.gbm$var.names[21]) %>% summarize(Mean.y=mean(y), Mean.x=mean(P.Sand), lower=quantile(y, p=0.025), upper=quantile(y, p=0.975)) %>% mutate(Num = factor(model.gbm$var.names[21])) %>% rename('Pred.Var' = 'Num') #combining all above data frames into one to show relative importance mean and 95% CI for each predictor variable species.All.predictors <- rbind(species.Coral.sum, species.P_Rub.sum, species.P_Sand.sum, species.P_MA.sum, species.P_EAM.sum, species.P_SC.sum, species.Depth.sum, species.SC.sum, species.Level.sum, species.Status.sum, species.NTMR_Ha.sum, species.Dist_Shore.sum, species.Dist_SG.sum, species.Dist_MA.sum, species.Dist_MG.sum, species.P.CR.sum, species.P.MA.sum, species.P.MG.sum, species.P.RF.sum, species.P.SG.sum, species.P.Sand.sum) #add columns with variable category names to Rel.IMP summary rel.imp.1 = rel.imp.sum #assigning new name just to be safe rel.imp.1$Var.Cat = rel.imp.1$var #column for variable category #Variable Cat #change to character so can rename rel.imp.1 = rel.imp.1 %>% mutate(Var.Cat= as.character(Var.Cat)) rel.imp.1$Var.Cat[rel.imp.1$Var.Cat == model.gbm$var.names[1]] <- 'Coral' rel.imp.1$Var.Cat[rel.imp.1$Var.Cat == model.gbm$var.names[16]] <- 'P.CR' rel.imp.1$Var.Cat[rel.imp.1$Var.Cat == model.gbm$var.names[17]] <- 'P.MA' rel.imp.1$Var.Cat[rel.imp.1$Var.Cat == model.gbm$var.names[18]] <- 'P.MG' rel.imp.1$Var.Cat[rel.imp.1$Var.Cat == model.gbm$var.names[19]] <- 'P.RF' rel.imp.1$Var.Cat[rel.imp.1$Var.Cat == model.gbm$var.names[20]] <- 'P.SG' rel.imp.1$Var.Cat[rel.imp.1$Var.Cat == model.gbm$var.names[21]] <- 'P.Sand' rel.imp.1 #change back to factors once complete rel.imp.1 = rel.imp.1 %>% mutate(Var.Cat = as.factor(Var.Cat)) #new column for variable type - based off variables type bc already renames some of the data rel.imp.1$Var.Type = rel.imp.1$Var.Cat #now changing variables into category groups rel.imp.1$Var.Cat = ifelse(rel.imp.1$Var.Cat %in% c("Coral", "P_Rub", "P_Sand", "P_MA", "P_EAM", "P_SC", "Depth", "SC", "Level"), "Tx", #transect variables ifelse(rel.imp.1$Var.Cat %in% c("Status", "NTMR_Ha"), "NTMR", #Reserve variables ifelse(rel.imp.1$Var.Cat %in% c("Dist_Shore", "Dist_SG", "Dist_MA", "Dist_MG"), "Dist", #Spatial Distance variables ifelse(rel.imp.1$Var.Cat %in% c("P.CR", "P.MA", "P.MG", "P.RF", "P.SG", "P.Sand"), "Radius", "NA")))) #Spatial Radius variables species.rel.imp.1 = rel.imp.1 species.trees #number of trees run for each bootstrap species.trees.avg #average number of trees run across the entire bootstrap dev.exp #deviance explained values species.dev.exp.sum #mean and 95% CI of deviance explained species.All.predictors #relative importance mean and 95% CI for all predictor variables. # saves the data if you want. all of it. delete sections if you dont want to save everything. #save(species.mod, species.trees, species.trees.avg, dev.exp, species.dev.exp.sum, species.rel.imp.1, species.All.predictors, species.Coral.r, species.Coral.sum, species.P_Rub.r, species.P_Rub.sum, species.P_Sand.r, species.P_Sand.sum, species.P_MA.r, species.P_MA.sum, species.P_EAM.r, species.P_EAM.sum, species.P_SC.r, species.P_SC.sum, species.Depth.r, species.Depth.sum, species.SC.r, species.SC.sum, species.Level.r, species.Level.sum, species.Status.r, species.Status.sum, species.NTMR_Ha.r, species.NTMR_Ha.sum, species.Dist_Shore.r, species.Dist_Shore.sum, species.Dist_SG.r, species.Dist_SG.sum, species.Dist_MA.r, species.Dist_MA.sum, species.Dist_MG.r, species.Dist_MG.sum, species.P.CR.r, species.P.CR.sum, species.P.MA.r, species.P.MA.sum, species.P.MG.r, species.P.MG.sum, species.P.RF.r, species.P.RF.sum, species.P.SG.r, species.P.SG.sum, species.P.Sand.r, species.P.Sand.sum, file="species.files.RData") ``` PLOTTING ------------------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------------------ ```{r plot relative importance means} ggplot(data=species.rel.imp.1, aes(y=Mean, x=var, color = Var.Cat))+geom_point() + geom_pointrange(aes(ymin=lower, ymax=upper))+ geom_hline(yintercept=c(4.67), linetype='dashed', size=1) + coord_flip()+ theme_light() ``` ```{r example partial plot} #plot with means and CI for a predictor variable head(species.Depth.sum) #shows the data structure for each predictor variables and the modelled data Species.depth = ggplot(data=species.Depth.sum, aes(y=Mean.y, x=Mean.x))+geom_line()+ geom_ribbon(aes(ymin=lower, ymax=upper), alpha = 0.3) Species.depth + labs(x='Depth', y='Species') ```