# InsectOccupancy2020.R # alien insects # occupancy modeling # 1 December 2020 # PACKAGES NEEDED # install unmarked package # install.packages("unmarked", repos="https://cran.uib.no/") library(unmarked, quietly=T) # install AICcmodavg package # install.packages("AICcmodavg", repos="https://cran.uib.no/") # load AICcmodavg package library(AICcmodavg, quietly=T) # install the Hmisc package to make some simple graphs # need this package to print the figures for the real parameter estimates # install.packages("Hmisc", repos="https://cran.uib.no/") # installs a lot of other packages library(Hmisc, quietly=T) # ENCOUNTER HISTORIES # working directory setwd("C:/Users/brett.sandercock/OneDrive - NINA/Documents/Projects/InvasivePlantInsect/Analyses_Nov2020") # clean up to rerun script rm(list=ls()) # input encounter histories enchist1 = read.csv("enchist_knust2018.csv", header=T) enchist2 = read.csv("enchist_ETOH2019.csv", header=T) enchist3 = read.csv("enchist_lysering2019.csv", header=T) enchist4 = read.csv("enchist_lysering2020.csv", header=T) # combine methods for 2019, lysering into ETOH temp = enchist3[,5:8] temp[is.na(temp)] <- 0 colnames(temp) = c('L1D','L2D','L3D','L4D') enchist2 = cbind(enchist2,temp) enchist2$R1D = enchist2$R1D + enchist2$L1D enchist2$R2D = enchist2$R2D + enchist2$L2D enchist2$R3D = enchist2$R3D + enchist2$L3D enchist2$R4D = enchist2$R4D + enchist2$L4D enchist2$L1D <- NULL enchist2$L2D <- NULL enchist2$L3D <- NULL enchist2$L4D <- NULL #head(enchist2,100) enchist2$R1D[enchist2$R1D>=1] <- 1 enchist2$R2D[enchist2$R2D>=1] <- 1 enchist2$R3D[enchist2$R3D>=1] <- 1 enchist2$R4D[enchist2$R4D>=1] <- 1 # combine years enchist = rbind(enchist1, enchist2, enchist4) head(enchist) colnames(enchist) <- c("risk", "species", "year", "site", "R1D", "R2D","R3D","R4D") list = sort(levels(enchist$species)) #enchist$species = relevel(enchist$species, ref=list) levels(enchist$risk) # reclassify enchist$risk2 = "NA" enchist$risk2[enchist$risk=="SE - Svært høy risiko"] <- "SE" enchist$risk2[enchist$risk=="HI - Høy risiko"] <- "HI" enchist$risk2[enchist$risk=="PH - Potensielt høy risiko"] <- "HI" # pool with HI enchist$risk2[enchist$risk=="LO - Lav risiko"] <- "LO" enchist$risk2[enchist$risk=="NR - Ikke risikovurdert"] <- "NR" enchist$risk2[enchist$risk=="NW - Newbies"] <- "NW" enchist$risk2 = factor(enchist$risk2) levels(enchist$risk2) enchist$risk <- NULL colnames(enchist) <- c("species", "year", "site", "R1D", "R2D","R3D","R4D","risk") head(enchist) # drop no risk species enchist = enchist[enchist$risk!='NR', ] enchist$risk = factor(enchist$risk) levels(enchist$risk) # drop species with no detections with the sampling method # tally up rounds per site enchist$rowsum = rowSums(enchist[,c('R1D','R2D','R3D','R4D')], na.rm=TRUE) # tally up sites per species undetected <- aggregate(enchist$rowsum, by=list(Category=enchist$species), FUN=sum) colnames(undetected) = c('species', 'count') undetected$flag = 'Unk' undetected$flag[undetected$count==0] <- 'Not' undetected$flag[undetected$count!=0] <- 'Det' undetected$count <- NULL # head(undetected) # merge dataframes enchist$rowsum <- NULL enchist = merge(enchist, undetected, by=c('species')) # only detected enchist = enchist[enchist$flag=='Det', ] enchist$flag <- NULL # SITE COVARIATES # input site covariates for 2020 site2020 = read.csv("SiteCovar2020.csv", header=T) head(site2020) dim(site2020) # area of sample site was 1 km radius or 3.14 km2 # calculate percent cover site2020[,19:28] <- (site2020[,19:28]/(3141592.6535))*100 # take subset of site covariates and rename site2020 = site2020[,c("location", "selection", "Housing", "Forest", "OpenLand")] colnames(site2020) <- c("site", "choice", "housing", "forest", "openland") # scale three covariates # convert housing variable to a z-score site2020$housing.z = scale(site2020$housing) range(site2020$housing.z) # -1.559636 2.427951 # values for backtransformation housing.mean = mean(site2020$housing) # 39.0992 housing.sd = sd(site2020$housing) # 23.28985 housing.range= range(site2020$housing) # 2.775516 95.645809 # convert forest variable to a z-score site2020$forest.z = scale(site2020$forest) range(site2020$forest.z) # -1.591693 1.969680 # values for backtransformation forest.mean = mean(site2020$forest) # 45.00197 forest.sd = sd(site2020$forest) # 27.84516 forest.range = range(site2020$forest) # 0.6810336 99.8480244 # convert openland variable to a z-score site2020$openland.z = scale(site2020$openland) range(site2020$openland.z) #-1.403956 3.493468 # values for backtransformation openland.mean = mean(site2020$openland) # 9.447759 openland.sd = sd(site2020$openland) # 6.399578 openland.range = range(site2020$openland) # 0.4630327 31.8044798 # drop untransformed site2020$housing <- NULL site2020$forest <- NULL site2020$openland <- NULL head(site2020) # merge together the two files enchist = merge(enchist, site2020, by="site", all = TRUE) write.csv(enchist, file="EncHistInsects_Nov2020.csv") # drop some sites with no detections of bad species enchist = enchist[is.na(enchist$species)==FALSE, ] head(enchist) # TALLY SPECIES # tally species by route choice #junk1 <- aggregate(enchist$R1D, by=list(enchist$risk, enchist$choice), FUN=sum, na.rm=TRUE) #junk2 <- aggregate(enchist$R2D, by=list(enchist$risk, enchist$choice), FUN=sum, na.rm=TRUE) #junk3 <- aggregate(enchist$R3D, by=list(enchist$risk, enchist$choice), FUN=sum, na.rm=TRUE) #junk4 <- aggregate(enchist$R4D, by=list(enchist$risk, enchist$choice), FUN=sum, na.rm=TRUE) #junker = cbind(rep(c(2,4,5,3,1),2),junk1,junk2[3],junk3[3],junk4[3]) #colnames(junker) = c('ord', 'risk','rutevalg','R1','R2','R3','R4') #junker[order(junker$ord, junker$risk),] # tally up risk categories for 60 sites table(enchist$risk)/60 # list of species detected enchist$species = factor(enchist$species) temp = paste(enchist$risk, enchist$species) unique.spp = unique(temp) unique.spp = sort(unique.spp) unique.spp # CREATE OBJECTS NEEDED FOR OCCUPANCY ANALYSIS # detection data (0 = nondetection, 1 = detection) detections <- enchist[, c("R1D", "R2D", "R3D", "R4D")] # covariates for rows # site.covs = enchist[, c("risk", "year")] site.covs = enchist[, c("risk", "year", "site", "choice", "housing.z")] # effort covariates - 4 rounds of visits (count = dim(enchist)[1]) visit <- data.frame(rep('t1',count), rep('t2',count), rep('t3',count), rep('t4',count)) obs.covs = list(visit=visit) # assemble in unmarkedFrameOccu bug.data <- unmarkedFrameOccu(y = detections, siteCovs = site.covs, obsCovs=obs.covs) # summary of formatted dataset # summary(bug.data) # FIT ALTERNATIVE MODELS # arguments of occu() function from unmarked package # occu(~p ~Psi, data=source) # model m0 # constant detection and occupancy m0.n = "p(.), Psi(.)" (m0 <- occu(~ 1 ~ 1, data = bug.data)) # get estimates of occupancy and detection newdat = data.frame(c(1)) # constant predict(m0, type = "state", newdata=newdat) # occupancy predict(m0, type = "det", newdata=newdat) # detection # model m1 # detection varies by visit, constant occupancy m1.n = "p(round), Psi(.)" #m1.n = "p(tømming), Psi(.)" (m1 <- occu(~ visit ~ 1, data = bug.data)) # get estimates of occupancy and detection newdat = data.frame(c(1)) # constant predict(m1, type = "state", newdata=newdat) # occupancy newdat <- data.frame(visit=c('t1','t2','t3', 't4')) predict(m1, type="det", newdata=newdat, append=T) # detection # model m2 # occupancy by risk, detection by round m2.n = "p(round), Psi(guild)" #m2.n = "p(tømming), Psi(kat)" (m2 <- occu(~ visit ~ risk, data = bug.data)) # get estimates of occupancy and detection newdat = data.frame(risk=c('SE', 'HI', 'LO', 'NW')) predict(m2, type = "state", newdata=newdat, append=T) # occupancy newdat <- data.frame(visit=c('t1','t2','t3', 't4')) predict(m2, type="det", newdata=newdat, append=T) # detection # model m3 # occupancy by risk, detection by risk m3.n = "p(guild), Psi(guild)" #m3.n = "p(kat), Psi(kat)" (m3 <- occu(~ risk ~ risk, data = bug.data)) # get estimates of occupancy and detection newdat <- data.frame(risk=c("NW", "LO", "HI", "SE")) (occ = predict(m3, type = "state", newdata=newdat, append=T)) # occupancy (det = predict(m3, type="det", newdata=newdat, append=T)) # detection # plot the data windows(width=10, height=10) par(mfrow=c(2,1)) par(mar=c(2,5,3,2)) #with(occ,errbar(1:4, Predicted[1:4],lower[1:4],upper[1:4], ylim=c(0,0.9),xaxt="n", lwd=2.5, cex=2, cex.lab=1.5, cex.axis=1.5, xlab="Gruppering",ylab="Tilstedeværelse")) with(occ,errbar(1:4, Predicted[1:4],lower[1:4],upper[1:4], ylim=c(0,0.9),xaxt="n", lwd=2.5, cex=2, cex.lab=1.5, cex.axis=1.5, xlab="Risk category",ylab="Occupancy")) axis(1, at=1:4, cex.axis=1.5, labels=c("NW", "LO", "PH/HI", "SE")) #text(2.85,0.8, labels=c("A"), cex=3, adj=0) par(mar=c(5,5,1,2)) #with(det,errbar(1:4, Predicted[1:4],lower[1:4],upper[1:4], ylim=c(0,0.9),xaxt="n", lwd=2.5, cex=2, cex.lab=1.5, cex.axis=1.5, xlab="Gruppering",ylab="Deteksjon")) with(det,errbar(1:4, Predicted[1:4],lower[1:4],upper[1:4], ylim=c(0,0.9),xaxt="n", lwd=2.5, cex=2, cex.lab=1.5, cex.axis=1.5, xlab="Risk category",ylab="Detection")) axis(1, at=1:4, cex.axis=1.5, labels=c("NW", "LO", "PH/HI", "SE")) #text(2.85,0.8, labels=c("B"), cex=3, adj=0) # model m4 # occupancy by risk, detection constant m4.n = "p(.), Psi(guild)" #m4.n = "p(.), Psi(kat)" (m4 <- occu(~ 1 ~ risk, data = bug.data)) # get estimates of occupancy and detection newdat = data.frame(risk=c('SE', 'HI', 'LO', 'NW')) predict(m4, type = "state", newdata=newdat, append=T) # occupancy newdat = data.frame(c(1)) # constant predict(m4, type = "det", newdata=newdat) # detection # model m5 # occupancy by species, detection by visit #m5.n = "p(round), Psi(species)" #(m5 <- occu(~ visit ~ species, data = bug.data)) #spplist = levels(enchist$species) #newdat = data.frame(species=spplist) #sppout = predict(m5, type = "state", newdata=newdat, append=T) # occupancy #cbind(round(sppout[,1:4],4), sppout$species) #newdat <- data.frame(visit=c('t1','t2','t3', 't4')) #predict(m5, type="det", newdata=newdat, append=T) # detection # a function for converting coefficients into probabilities # to double-check estimates if desired logit2prob <- function(logit){ odds <- exp(logit) prob <- odds / (1 + odds) return(prob) } int = 2.77; est = 0 int = 2.77; est = -5.50 int = 2.77; est = -5.02 int = 2.77; est = -3.99 int = 2.77; est = -3.26 logit2prob(int+est) # model m6 # occupancy by housing, detection by risk m6.n = "p(guild), Psi(housing)" #m6.n = "p(kat), Psi(hus)" (m6 <- occu(~ risk ~ housing.z, data = bug.data)) # estimates of detection newdat <- data.frame(risk=c("NK", "LO", "HI", "SE")) # risk predict(m6, type="det", newdata=newdat, append=T) # estimates of occupancy newdat <- data.frame(housing.z=seq(-1.54,2.15,0.01)) output = predict(m6, type="state", newdata=newdat, append=T) # backtransform z-score, mean = 41.54144, SD = 25.17636 output$housing = (25.17636*output$housing.z) + 41.54144 # plot occupancy vs. housing #windows(width=15, height=10) # par(mfrow=c(1,2)) # double-plot windows(width=8, height=8) par(mfrow=c(1,1)) par(mar=c(5,5,3,2)) #plot(Predicted ~ housing, output, type = 'l', lwd=3, col='black', cex.lab=1.5, cex.axis=1.5, ylim =c(0,1), xlim =c(0,100), xlab='Andel bebyggelse', ylab='Tilstedeværelse') plot(Predicted ~ housing, output, type = 'l', lwd=3, col='black', cex.lab=1.5, cex.axis=1.5, ylim =c(0,1), xlim =c(0,100), xlab='Percent housing', ylab='Occupancy') lines(lower ~ housing, output, type = 'l', lwd=3, lty=3, col='black') lines(upper ~ housing, output, type = 'l', lwd=3, lty=3, col='black') text(2,0.95, labels=c("A"), cex=3, adj=0) # model m7 # occupancy by risk*housing, detection by risk m7.n = "p(guild), Psi(guild*housing)" #m7.n = "p(kat), Psi(kat*hus)" (m7 <- occu(~ risk ~ risk*housing.z, data = bug.data)) # estimates of detection newdat <- data.frame(risk=c("NW", "LO", "HI", "SE")) # risk predict(m7, type="det", newdata=newdat, append=T) # estimates of occupancy newdat <- expand.grid(housing.z=seq(-1.54,2.15,0.01),risk=c("NW", "LO", "HI", "SE")) output = predict(m7, type="state", newdata=newdat, append=T) output$housing = (25.17636*output$housing.z) + 41.54144 out.NW <- output[which(output$risk=='NW'), ] out.LO <- output[which(output$risk=='LO'), ] #out.PH <- output[which(output$risk=='PH'), ] out.HI <- output[which(output$risk=='HI'), ] out.SE <- output[which(output$risk=='SE'), ] # plot occupancy vs. housing for three different risks windows(width=8, height=8) par(mfrow=c(1,1)) par(mar=c(5,4.5,3,1)) #plot(Predicted ~ housing, out.NW, type = 'l', lwd=3, col='blue', cex.lab=1.5, cex.axis=1.5, ylim =c(0,1), xlim =c(0,100), xlab='Andel bebyggelse', ylab='Tilstedeværelse') plot(Predicted ~ housing, out.NW, type = 'l', lwd=3, col='blue', cex.lab=1.5, cex.axis=1.5, ylim =c(0,1), xlim =c(0,100), xlab='Percent housing', ylab='Occupancy') lines(lower ~ housing, out.NW, type = 'l', lwd=3, lty=3, col='blue') lines(upper ~ housing, out.NW, type = 'l', lwd=3, lty=3, col='blue') lines(Predicted ~ housing, out.LO, type = 'l', lwd=3, lty=1, col='green') lines(lower ~ housing, out.LO, type = 'l', lwd=3, lty=3, col='green') lines(upper ~ housing, out.LO, type = 'l', lwd=3, lty=3, col='green') #lines(Predicted ~ housing, out.PH, type = 'l', lwd=3, lty=1, col='orange') #lines(lower ~ housing, out.PH, type = 'l', lwd=3, lty=3, col='orange') #lines(upper ~ housing, out.PH, type = 'l', lwd=3, lty=3, col='orange') lines(Predicted ~ housing, out.HI, type = 'l', lwd=3, lty=1, col='orange') lines(lower ~ housing, out.HI, type = 'l', lwd=3, lty=3, col='orange') lines(upper ~ housing, out.HI, type = 'l', lwd=3, lty=3, col='orange') lines(Predicted ~ housing, out.SE, type = 'l', lwd=3, lty=1, col='red') lines(lower ~ housing, out.SE, type = 'l', lwd=3, lty=3, col='red') lines(upper ~ housing, out.SE, type = 'l', lwd=3, lty=3, col='red') #legend('topright', legend=c("NW", "LO", "PH", "HI", "SE"), cex=1, lty=1, lwd=3, col=c("blue", "green", "orange", "yellow", "red"), bty="n") legend('topright', legend=c("NW", "LO", "PH/HI", "SE"), cex=1.5, lty=1, lwd=3, col=c("blue", "green", "orange", "red"), bty="n") text(2,0.95, labels=c("B"), cex=3, adj=0) # models by choice of site, DOES NOT WORK # model m9 # occupancy and detection by choice of site = Auto or Manual # m9.n = "p(choice*year), Psi(choice*year)" # m9.n = "p(rutevalg*år), Psi(rutevalg*år)" # (m9 <- occu(~ choice*year ~ choice*year, data = bug.data)) # get estimates of occupancy and detection # newdat <- data.frame(choice=c(rep("Auto",2), rep("Manual",2)), year=rep(c("2018", "2019"),2)) # predict(m9, type = "state", newdata=newdat, append=T) # occupancy # predict(m9, type="det", newdata=newdat, append=T) # detection # models by choice of site, # model m10 # occupancy and detection by choice of site = Auto or Manual m10.n = "p(choice), Psi(choice)" #m10.n = "p(rutevalg), Psi(rutevalg)" (m10 <- occu(~ choice ~ choice, data = bug.data)) # get estimates of occupancy and detection newdat <- data.frame(choice=c('Auto', 'Manual')) predict(m10, type = "state", newdata=newdat, append=T) # occupancy predict(m10, type="det", newdata=newdat, append=T) # detection # model m11 # occupancy by risk, detection by risk m11.n = "p(guild+year), Psi(guild)" (m11 <- occu(~ risk+year ~ risk, data = bug.data)) # model m12 # occupancy by risk, detection by risk m12.n = "p(guild*year), Psi(guild)" (m12 <- occu(~ risk*year ~ risk, data = bug.data)) # GOODNESS-OF-FIT TESTING # global model is m7 p(guild), Psi(guild*housing) # mb.gof.test function # (GOFtest = mb.gof.test(m7)) # c-hat = 5.08 # parboot function Mod_global = m7 fitstats <- function(Mod_global) { observed <- getY(Mod_global@data) expected <- fitted(Mod_global) resids <- residuals(Mod_global) sse <- sum(resids^2,na.rm=TRUE) chisq <- sum((observed - expected)^2 / expected,na.rm=TRUE) freeTuke <- sum((sqrt(observed) - sqrt(expected))^2,na.rm=TRUE) out <- c(SSE=sse, Chisq=chisq, freemanTukey=freeTuke) return(out) } (pb <- parboot(Mod_global, fitstats, nsim=100, report=1)) # change to 1000 # c-hat = observed chi-square/mean chi-square of bootstrap distribution pb@t0[2] # 9930.329 mean(pb@t.star[,2]) # 9768.628 (cHat_pb <- pb@t0[2] / mean(pb@t.star[,2])) # 1.016553 # MODEL SELECTION # model selection with AICcmodavg package # set up candidate model list Cands <- list(m0, m3, m4, m6, m7, m10, m12) # assign descriptive names to each model Model.names <- c(m0.n, m3.n, m4.n, m6.n, m7.n, m10.n, m12.n) # model selection based on AICc (AICtab = aictab(cand.set = Cands, modnames = Model.names, second.ord=TRUE, c.hat=1.011293)) # export to a csv file write.csv(AICtab, file="AICoutputInsects_Nov2020.csv")