#packages library(ggplot2) library(extrafont) loadfonts(device="win") #clear workspace rm(list=ls()) #import exposed canopy tree dataset crown_data<- #read "corrected_50ha_crowns_final.csv" into the environment str(crown_data) #create a numeric vector of direct strikes crown_data$times.struck=ifelse(crown_data$direct.strike=="direct",1,0) length(crown_data$DBH) #there are 1945 trees #drop all values of crown area less than 50m2 because sampling is uneven below this value trim_crown_data<-subset(crown_data,Shape_Area>=50&DBH>100) length(trim_crown_data$DBH) #no trees were dropped because they data were already trimmed #create physics based functions #First test the effect of crown area #then test the effect canopy status along with crown area #define the function #then use "optim()" #we are estimating the probability each tree is struck given 19 different events #these events are either 1) a tree is directly struck by lightning or 2) a tree is not directly struck #if lightning is stochastic, then only crown area matters #this means that the probability a tree is struck is: P = a*(CrownArea) #And therefore the probability of not being struck is: P = 1-(a*(CrownArea)) ###all probabilities must be between 0 and 1 ##so the parameter values need to be constrained so that bot ##these probabilities satisfy this for all crown areas ##that is, a must be less than 1/(max crown area) ##the optimization will always stear away from unrealistic probabilities anyway, so maybe this is not necessary ##the parameter can never be negative ##so to constrain this, we do the optimization in terms of the log of the parameter instead ##by default, optim minimizes but we want to maximize the likelihood, so take the negative of this #area based function first f_area<- function(par,x,y,xmax,ytot){ truepar=exp(par) ifelse(truepar>=1/xmax,1e100,-sum(y*(log(truepar)+log(x)) + (ytot-y)*log(1-truepar*x))) } #optimize the parameter totstrike=sum(trim_crown_data$times.struck) maxarea=max(trim_crown_data$Shape_Area) initpar1=log(1/(10*maxarea)) # set an initial value that will make the probability be between 0 and 1 prob1<-optim(par=initpar1,f_area,x=trim_crown_data$Shape_Area, y=trim_crown_data$times.struck,xmax=maxarea,ytot=totstrike, method="BFGS",hessian = TRUE) prob1 #this is the parameter for log likelihood exp(prob1$par) #H this is the parameter value after back-transforming from log exp(prob1$par)*10000 #this is the transformed parameter value in ha instead of m2 of area #create an function that separates emergent, canopy, and subcanopy trees f_crownstatus<- function(par,x,x2,y,xmax3,ytot){ # x2 = 1 if subcanopy, 2 if canopy 3 if emergent # xmax3 is a vector of the maximum crown sizes for categories 1, 2, and 3 respectively truepars=exp(par) problems=sum(ifelse(truepars>1/xmax3,1,0)) ifelse(problems>0,1e100, -sum(y*(log(truepars[x2])+log(x)) + (ytot-y)*log(1-truepars[x2]*x))) } trim_crown_data$crownindex=ifelse(trim_crown_data$Score_Cano=="Subcanopy",1, ifelse(trim_crown_data$Score_Cano=="Canopy",2,3)) xmax3=tapply(trim_crown_data$Shape_Area,trim_crown_data$crownindex,max) totstrike=sum(trim_crown_data$times.struck) prob2<-optim(par=rep(initpar1,3),f_crownstatus,x=trim_crown_data$Shape_Area, x2=trim_crown_data$crownindex, y=trim_crown_data$times.struck,xmax3=xmax3,ytot=totstrike, method="BFGS", hessian = TRUE) prob2 exp(prob2$par) # this is the parameter value after back-transforming from log ##order of parameters: subcanopy, canopy, emergent exp(prob2$par)*10000 # this is the transformed parameter value in units of ha instead of m2 #compare model to a null model in which all trees have the same probability of being struck #the null model treats each exposed tree crown with >50m2 of crown area equally f_null<- function(par,y,ytot){ truepar=exp(par) ifelse(1-truepar<0,1e100,-sum(y*(log(truepar)) + (ytot-y)*log(1-truepar))) } prob1_null<-optim(par=log(0.1),f_null,y=trim_crown_data$times.struck,ytot=totstrike, method="BFGS") prob1_null exp(prob1_null$par)# this is the parameter value after back-transforming from log ##################### #calculate model AIC aic_crownstatus=2*prob2$value+2*3 aic_area=2*prob1$value+2 aic_null=2*prob1_null$value+2 #print aic values aic_crownstatus aic_area #crown status decreses AIC by 10.5 aic_null #highly significant difference in AIC values - dAIC = 15.8 ##################################################### #Now estimate the frequency of direct strikes for each tree with an exposed crown #then produce a dataset with this information exp(prob2$par) #subcanopy parameter = 1.440861e-09 #canopy parameter = 4.595417e-06 #emergent parameter = 5.355981e-06 #what is the parameter including only crown area? exp(prob1$par) #parameter for all trees: 3.297489e-06 #now estimate the probability of being struck for all tree here ###we approximate the probability of being directly struck as 0 - this is the analytical solution as described in the methods prob.direct.strike<-ifelse(trim_crown_data$Score_Cano=="Subcanopy",trim_crown_data$Shape_Area*0, ifelse(trim_crown_data$Score_Cano=="Canopy", trim_crown_data$Shape_Area* 4.594463e-06,trim_crown_data$Shape_Area*5.355193e-06)) #calculate the mean probability of a direct strike for each canopy class mean_prob_by_canopy_full<-tapply(prob.direct.strike,trim_crown_data$Score_Cano,mean) mean_prob_by_canopy_areaonly<-tapply(trim_crown_data$Shape_Area*3.297489e-06,trim_crown_data$Score_Cano,mean) mean_prob_by_canopy_full mean_prob_by_canopy_areaonly 1-mean_prob_by_canopy_full/mean_prob_by_canopy_areaonly #now multiply by 6.35 - the number of strikes per 50ha plot annually freq.direct.strike<-prob.direct.strike*6.35 freq.direct.strike #confrim the the total frequency of direct strikes is 6.35 sum(freq.direct.strike) #now add these columns to the data and save for later use export_data<-data.frame(trim_crown_data,prob.direct.strike,freq.direct.strike) #write.csv(export_data,"direct_strike_estimates_2019-3-11.csv") getwd() ################################################################# #the hessian does not behave well for parameter 1 (subcanopy trees) because of its sample size and homogenous responses #optimize the same function using bblme to then calculate 95% CIs using likelihood profiling library(bbmle) #create an function that separates emergent, canopy, and subcanopy trees #create a crown index variable trim_crown_data$crownindex=ifelse(trim_crown_data$Score_Cano=="Subcanopy",1, ifelse(trim_crown_data$Score_Cano=="Canopy",2,3)) #create the function now f_crownstatus<- function(par1, par2, par3){ # x2 = 1 if subcanopy, 2 if canopy 3 if emergent # xmax3 is a vector of the maximum crown sizes for categories 1, 2, and 3 respectively -sum(ifelse(trim_crown_data$crownindex==1, trim_crown_data$times.struck*(log(exp(par1))+log(trim_crown_data$Shape_Area)) + (19-trim_crown_data$times.struck)*log(1-exp(par1)*trim_crown_data$Shape_Area), ifelse(trim_crown_data$crownindex==2, trim_crown_data$times.struck*(log(exp(par2))+log(trim_crown_data$Shape_Area)) + (19-trim_crown_data$times.struck)*log(1-exp(par2)*trim_crown_data$Shape_Area), trim_crown_data$times.struck*(log(exp(par3))+log(trim_crown_data$Shape_Area)) + (19-trim_crown_data$times.struck)*log(1-exp(par3)*trim_crown_data$Shape_Area)))) } maxarea=max(trim_crown_data$Shape_Area) initpar1=list(par1 = log(1/(10*maxarea)),par2 = log(1/(10*maxarea)),par3 = log(1/(10*maxarea))) prob2<-mle2(f_crownstatus,method="BFGS",start=initpar1,skip.hessian = FALSE) exp(coef(prob2))*10000#parameter estimates are identical to the optim approach prof_prob2<-profile(prob2,tol.newmin = 0.01)#error related to real minimum of par1 being 0 (outside of log-space) exp(confint(prof_prob2))#the lower end of parameter 1 is NA because this is in log-space and the real value is 0