--- title: "St. Kitts Mosquito Analysis" author: "Greg Jacobs" date: "18 August 2020" output: html_document: default --- Requires files: `dat7.csv`, `lc.csv`, `Diversity_Analysis.r` ```{r packages, message=FALSE, warning=FALSE} library(readxl) library(reshape2) library(glmmTMB) library(bbmle) library(ggplot2) library(plyr) library(dplyr) library(ggpubr) library(kableExtra) library(performance) library(sjPlot) library(runjags) library(coda) library(ggmcmc) library(DHARMa) ``` ```{r dataset} dat7 <- read.csv("dat7.csv") spp_counts <- aggregate(ct~spp, dat7, sum) spp_occ <- aggregate(I(ct>0)~spp, dat7, mean) dat8 <- dat7[which(dat7$spp%in%spp_counts$spp[which(spp_counts$ct>160)]),] ``` # Diversity analysis Our hierarchical Bayesian multi-species occupancy model can be expressed as, $$ R = n + \sum_{i=n+1}^{n+n_{aug}}{w_{(i)}}\\ w_{i} \sim \mathrm{Bernoulli}(\Omega)\\ Z_{j,i} \sim \mathrm{Bernoulli}(\psi_{j,i}\times w_{i})\\ X_{j,k,i} \sim \mathrm{Bernoulli}(p_{j,k,i} \times Z_{j,i}) $$, Where $R$ is species richness, $n$ is the number of of observed species, $n_{aug}$ is the number of augmented (all-zero capture history) species added to the dataset, $Z$ is the latent occupancy variable, and $X$ is the data. Indexes refer to site ($i$), occasion ($k$), and species ($i$). The occurrence ($\psi$) and observation ($p$) processes were modeled as hierarchical random effects, $$ \mathrm{logit}(p_{j,k,i}) = v_{i} \\ v_{i} \sim \mathrm{Normal}(\mu_v, \tau_v) \\ \mathrm{logit}(\psi_{j,i}) = u_{i} \\ u_{i} \sim \mathrm{Normal}(\mu_u, \tau_u) $$. We used weakly informative Gaussian priors for mean of $v_{i}$ and $u_{i}$ (mean = 0, sd = 2.25), and vague gamma priors for their precision (r = .1, lambda = 0.1). Omega was given a uniform (0, 1) prior. We also monitored the following derived diversity measures: $$ r_j = \sum_{i=1}^{n+n_{aug}}{Z_{j,i}} \\ \alpha = \mathrm{Mean}(r_j) \\ \beta = \frac{R}{\alpha}\\ \zeta = \sum_{i=1}^{n+n_{aug}}\prod_{j=1}^{J}Z_{j,i} $$, Where $r_j$ is "local" species richness at site $j$, $\alpha$ is alpha-diversity, $\beta$ is beta-diversity, and $\zeta$ is zeta-diversity. We also monitored the number of sites each species occupied, $Nsites_i = \sum_{j=1}^{J}{Z_{j,i}}$. ```{r diversity} if (file.exists("runjagsfiles")){ divout <- results.jags("runjagsfiles") } else { source("Diversity_Analysis.r") divout <- diversity_analysis() } res.mcmc.list <- as.mcmc.list(divout) res.ggs <- ggs(res.mcmc.list) res.coda <- summary(divout) res.df <- data.frame(res.coda) ``` ```{r msom-coeff-table, results='asis'} library(xtable) msomtab <- res.df[grep("Nsites\\[1\\]|Nsites\\[7\\]|alpha|beta|zeta|R|mu|sigma|omega",rownames(res.df)),c("Lower95", "Median", "Upper95", "psrf")] msomtab$Parameter <- rownames(msomtab) msomtab<-msomtab[c(3,1,11,2,12,4,7,8,9,5,6),c("Parameter", "Median", "Lower95", "Upper95")] kable(msomtab, format='html', digits=3, row.names=FALSE, escape=FALSE, caption="Table 3.") ``` ```{r diversity-landscape, fig.height=4, fig.width=6.5, fig.cap="Figure 2."} #land cover data data1. <- dat7 %>% arrange(td) %>% mutate(Species = sub("female", "", spp), Point = site, Rep = as.numeric(as.factor(td))) total.count. <- tapply(data1.$ct, data1.$Species, sum) data1. <- subset(data1., Species %in% names(total.count.)[total.count.>0]) %>% arrange(Point, Species) uspecies <- as.character(unique(data1.$Species)) lc <- data1. %>% group_by(Point) %>% summarize(lc.ag=first(Agricultural), lc.ma=first(Mangrove), lc.rf=first(Rainforest), lc.sf=first(Scrub), lc.ur=first(Urban)) rtab <- res.df[grep("r", rownames(res.df)), c("Lower95", "Median", "Upper95")] %>% mutate(param=rownames(.)) %>% mutate(Point=as.numeric(substr(param,3,nchar(param)-1))) %>% left_join(lc) lcsiteplot <- function(lc.var="lc.sf"){ rtab %>% ggplot(aes(x=eval(parse(text=lc.var)), y=Median)) + geom_pointrange(aes(ymin=Lower95, ymax=Upper95))+ theme_classic() + xlim(0,1) + scale_y_continuous(breaks = c(2,4,6,8,10)) } #plot panels dlpanel1 <- lcsiteplot("lc.sf") + ylab("N of Species") + xlab("Scrub") dlpanel2 <- lcsiteplot("lc.ag") + ylab("") + xlab("Agriculture") dlpanel3 <- lcsiteplot("lc.ma") + ylab("") + xlab("Mangrove") dlpanel4 <- lcsiteplot("lc.rf") + ylab("N of Species") + xlab("Rainforest") dlpanel5 <- lcsiteplot("lc.ur") + ylab("") + xlab("Urban") dlpanel6 <- msomtab[grep("R|alpha|beta|zeta", rownames(msomtab)), c("Lower95", "Median", "Upper95")] %>% mutate(param=rownames(.)) %>% filter(param!='R_exp') %>% mutate(param=factor(param, levels=c("R", "alpha", "beta", "zeta"))) %>% arrange(-Median) %>% ggplot(aes(x=param, y=Median,)) + geom_pointrange(aes(ymin=Lower95, ymax=Upper95)) + ylab("") + xlab("") + theme_classic() + scale_x_discrete(labels=c(parse(text="italic('R')"), expression(alpha), expression(beta), expression(zeta))) #plot lcplotlist<- list(dlpanel1, dlpanel2, dlpanel3, dlpanel4, dlpanel5, dlpanel6) LCDIVERSITY_PLOT <- ggarrange(plotlist=lcplotlist, labels=c("a)","b)","c)","d)","e)","f)")) ggsave("Figure2.pdf", plot = LCDIVERSITY_PLOT, device = "pdf", width = 6.5, height = 4, dpi = 600) LCDIVERSITY_PLOT ``` # Relative abundance analysis ```{r rescale-covs} #Re-scale covariates prior to fitting the model allcols <- ncol(dat8) covlist <- c("precip_in", "Agricultural", "Rainforest", "Urban", "Scrub", "Mangrove") for(i in 1:length(covlist)){ dat8[,(allcols+i)] <- scale(dat8[,covlist[i]]) names(dat8)[(allcols+i)] <- paste("c.",covlist[i],sep="") } dat8$pos <- numFactor(dat8$lon, dat8$lat) # for use in spatial autocorrelation test dat8$group <- factor(rep(1, nrow(dat8))) # for use in spatial autocorrelation test # Mangrove mosquitoes and mangrove site location dat8$mtrait <- ifelse(dat8$spp %in% c("De.ma.female","Ae.ta.female"), 1, 0) dat8$msite <- as.numeric(dat8$landuse=="Mangrove") dat8$c.Anthropogenic <- with(dat8, scale(Agricultural + Urban)) ``` ```{r fit-models, warning=FALSE} # Model hypotheses (suppress convergence warnings in nb1 and poisson models) hlist <- list( H1 = ct ~ c.precip_in + spp:(c.Agricultural + c.Rainforest + c.Urban) + spp*Mangrove + (1|site) + (c.precip_in|spp), H2 = ct ~ c.precip_in + spp:(c.Anthropogenic) + (1|site) + (c.precip_in|spp), H3 = ct ~ c.precip_in + spp:(c.Agricultural + c.Urban) + (1|site) + (c.precip_in|spp), H4 = ct ~ c.precip_in + spp:(c.Anthropogenic) + mtrait*msite + (1|site) + (c.precip_in|spp), H5 = ct ~ c.precip_in + spp:(c.Agricultural + c.Urban) + mtrait*msite + (1|site) + (c.precip_in|spp), H6 = ct ~ c.precip_in + spp:(c.Anthropogenic + c.Scrub) + mtrait*msite + (1|site) + (c.precip_in|spp), H7 = ct ~ c.precip_in + spp:(c.Agricultural + c.Urban + c.Scrub) + mtrait*msite + (1|site) + (c.precip_in|spp), H8 = ct ~ c.precip_in + spp:(c.Anthropogenic + c.Rainforest) + mtrait*msite + (1|site) + (c.precip_in|spp), H9 = ct ~ c.precip_in + spp:(c.Agricultural + c.Urban + c.Rainforest) + mtrait:msite + (1|site) + (c.precip_in|spp), H10 = ct ~ c.precip_in + spp:(c.Anthropogenic + c.Scrub + c.Rainforest) + mtrait*msite + (1|site) + (c.precip_in|spp), H11 = ct ~ c.precip_in + spp:(c.Agricultural + c.Urban + c.Scrub + c.Rainforest) + mtrait*msite + (1|site) + (c.precip_in|spp), H12 = ct ~ c.precip_in + spp:(c.Agricultural + c.Urban + c.Rainforest) + mtrait*msite + (1|site) + (c.precip_in|spp) ) # fit models if (file.exists("models.RData")){ load("models.RData") } else { models <- lapply(hlist, function(x) glmmTMB(x, data=dat8, family=nbinom2)) # re-fit with alternative error distributions - many throw convergence warnings models.nb1 <- lapply(models, function(x) update(x, .~., family=nbinom1)) models.poi <- lapply(models, function(x) update(x, .~., family=poisson)) #Save model list objects save(models, models.nb1, models.poi, file="models.RData") } ``` ```{r compare-AIC, message=FALSE, warning=FALSE} if (file.exists("compare_AIC.RData")){ load("compare_AIC.RData") } else { #!!!MUST!!! set 'sort = FALSE' for selection of top variable_selection <- AICtab(models, sort=FALSE, base = T) R.models <- sapply(models, r2) topmod <- models[[which(variable_selection$dAIC==0)]] ### Other families varsel_pois <- AICtab(models.poi, sort=FALSE, base = T) R.poi <- sapply(models.poi, r2) varsel_nb1 <- AICtab(models.nb1, sort=FALSE, base = T) R.nb1 <- sapply(models.nb1, r2) save(variable_selection, R.models, topmod, varsel_pois, varsel_nb1, R.poi, R.nb1, file = "compare_AIC.RData") } ``` ```{r vif_mods} vif_list <- lapply(models, check_collinearity) check_collinearity(topmod) ``` ```{r DHARMa} res <- simulateResiduals(topmod) plot(res, quantreg = T) ``` ```{r testDHARMa-sa} res_site <- recalculateResiduals(res, group = dat8$site) testSpatialAutocorrelation(res_site, x = aggregate(dat8$lon, list(dat8$site), mean)$x, y = aggregate(dat8$lat, list(dat8$site), mean)$x, plot = TRUE) ``` ```{r testDHARMa-ta} res2 = recalculateResiduals(res, group = dat8$td) testTemporalAutocorrelation(res2, time = as.Date(unique(dat8$td), format="%m/%d/%Y")) ``` ## Results ```{r p-occ} spp_summary <- left_join(spp_counts, spp_occ) names(spp_summary)[3] <- "occ" ``` The best count model was H11, which can be expressed as, $$ \begin{aligned} y_{(i,j,s)} &\sim \mathrm{NegBin}(\mu_{(i,j,s)}, \theta) \\ \mathrm{log}({\mu_{(i,j,s)}}) &= \beta_{0} + \alpha_{0(s)} + (\beta_{1} + \alpha_{1(s)})Precip_{(j)} + \beta_{2}m_{trait(s)} + \beta_{3} Mangrove_{(i)} + \beta_{4(s)}LocalAgriculture_{(i)} + \\ &\beta_{5(s)}LocalUrban_{(i)} + \beta_{6(s)}LocalRainforest_{n(i)} + \beta_{7}m_{trait(s)}Mangrove_{(i)} + \alpha_{2(i)} \\ \mathrm{log}(\theta) &= \eta_{(s)}, \end{aligned} $$ where $i$ indexes site, $j$ indexes sampling occasion, and $s$ indexes species. There are random intercepts for each species($\beta_{0} + \alpha_{0(s)}$), a main effect of precipitation with species-specific random slopes ($(\beta_{1} + \alpha_{1(s)})Precip_{(j)}$), a main effect of the Magrove breeding trait ($\beta_{2(m_{trait(s)})}$) multiplied by sites where points are located in Mangroves ($Mangrove_{(i)}$), species-specific main effects on proportional land cover variables ($\beta_{3(s)}LocalAgriculture_{(i)} + \beta_{4(s)}LocalUrban_{(i)} + \beta_{5(s)}Local2^\circ Forest_{(i)} + \beta_{6(s)}LocalRainforest_{n(i)}$), a site-level random intercept term ($\alpha_{2(i)}$), and a dispersion parameter, $\eta_{(s)}$. The top model employs a negative binomial parameterization where the variance scales quadratically with the mean, $Var(y_{(i,j,s)})=\mu_{(i,j,s)}(1+\frac{\mu_{(i,j,s)}}{\phi})$, where the predicted mean is $\mu$, and $\phi=e^{\eta}$ (Hardin & Hilbe 2007). ```{r model-predictions} newdata = dat8 newdata$site <- NA temp = predict(topmod, data = newdata, se.fit=TRUE, type="link") newdata$predlink = temp$fit newdata$predFE = exp(temp$fit) newdata$predFE.min = exp(temp$fit-1.98*temp$se.fit) newdata$predFE.max = exp(temp$fit+1.98*temp$se.fit) ``` ```{r predictions2} real=ddply(dat8, ~spp+landuse+td, summarize, m=mean(ct), min = min(ct), max = max(ct)) spp_names <- function(string) { string <- gsub("\\.", "\\. ", string) #replace period with period & space tag <- sub('.*\\.', '.', string) #define tag string <- gsub(tag, ".", string) #remove tag at end of species name string <- paste("italic('",string,"')", sep="") } real <- transform(real, spp = factor(spp, levels=unique(real$spp), labels=sapply(unique(real$spp), function(i) spp_names(i)))) real$landuse <- factor(real$landuse, levels=c("Scrub", "Agricultural", "Mangrove", "Rainforest", "Urban")) newdata2 <- transform(newdata, spp = factor(spp, levels=unique(newdata$spp), labels=sapply(unique(newdata$spp), function(i) spp_names(i)))) newdata2 <- ddply(newdata2, ~spp+landuse+td, summarize,m=mean(predFE), lo=exp(mean(predlink)-1.98*sd(predlink)), hi=exp(mean(predlink)+1.98*sd(predlink))) newdata2$landuse <- factor(newdata2$landuse, levels = c("Scrub", "Agricultural", "Mangrove", "Rainforest", "Urban")) ``` ## Tables ```{r count-coeffs} tab_model(topmod, transform = NULL, wrap.labels = 50, title = "Table 4.") ``` ## Figures ```{r PREDICTIONS, fig.width=7.5, fig.height=6, fig.cap="Figure 3."} predPalette <- c("#E69F00", "#F0E442", "#D55E00", "#009E73", "#888888") gg_color <- function(n) { hues = seq(15, 375, length=n+1) hcl(h = hues, l = 65, c = 100)[1:n] } predPalette <- gg_color(5) predcovers <- c("Scrub", "Agricultural", "Mangrove", "Rainforest", "Urban") newdata2$name <- newdata2$spp newdata2$name <- sub("Ae. ae.", "Aedes aegypti", newdata2$name) newdata2$name <- sub("Ae. ta.", "Aedes taeniorhynchus", newdata2$name) newdata2$name <- sub("Cu. qu.", "Culex quinquefasciatus", newdata2$name) newdata2$name <- sub("De. ma.", "Deinocerites magnus", newdata2$name) plotpreds <- function(name, ymin, ymax){newdata2[grep(name, newdata2$name),] %>% ggplot(aes(as.Date(td, "%m/%d/%Y"), m, colour=landuse)) + ggtitle(parse(text=unique(grep(name,newdata2$name, value=T)))) + geom_line(size = 1) + theme_classic() + theme(plot.title=element_text(hjust=0.5, size=11), axis.text.x=element_text(angle=30, hjust=1)) + ylab("") + xlab("") + scale_x_date(date_labels = "%b %Y", breaks=pretty(as.Date(newdata2$td, "%m/%d/%Y"), 5)) + ylim(ymin,ymax) } panela <- plotpreds("Aedes aegypti", 0, 5) panelb <- plotpreds("Aedes taeniorhynchus", 0, 200) panelc <- plotpreds("Culex quinquefasciatus", 0, 15) paneld <- plotpreds("Deinocerites magnus", 0, 15) PREDICTIONS <- ggarrange(panela, panelb, panelc, paneld, ncol=2, nrow=2, labels=c("a)","b)","c)","d)"), common.legend=TRUE, legend="right") ggsave("Figure3.pdf", plot = PREDICTIONS, device = "pdf", width = 7.5, height = 6, dpi = 600) PREDICTIONS ``` ```{r blups, fig.width = 3.5, fig.height = 3.5, fig.cap="Figure 4."} # Plot BLUPs library(ggeffects) precippred <- ggpredict(topmod, c("c.precip_in"), type = "fe") blups <- ggpredict(topmod, c("c.precip_in", "spp"), type = "re") pr <- ggplot(blups, aes(x = x , y = predicted, color = group)) + geom_line() + xlab("Precipitation (cm)") + ylab("Count") + theme_classic() + theme(legend.title=element_blank(), legend.justification=c(.05,.95), legend.position=c(.05,.95)) + scale_color_discrete( labels = c("Aedes aegypti", "Aedes taeniorhynchus", "Culex quinquefasciatus", "Deinocerites magnus"), guide = guide_legend(label.theme = element_text(size = 11, angle = 0, face = "italic"))) ggsave("Figure4.pdf", plot = pr, device = "pdf", width = 3.5, height = 3.5, dpi = 600) pr ``` # Appendixes ```{r LANDCOVER-BARPLOT, fig.height = 4, fig.width=6.5, fig.cap="Figure S1."} # plot cbPalette <- c("#F0E442", "#D55E00", "#0072B2", "#56B4E9", "#009E73", "#E69F00", "#888888") lc <- read.csv("lc.csv") LANDCOVER_BARPLOT <- ggplot() + geom_bar(aes(y = pct, x = site, fill = LandCover), data = lc, stat="identity") + scale_fill_manual(values=cbPalette, name = "Land Cover Class") + labs(fill = "Land Cover Class") + ylab("Proportion of Land Cover") + xlab("Site") + theme_classic() + scale_x_continuous(breaks = seq(0,30,by=2)) ggsave("FigureS1.pdf", plot = LANDCOVER_BARPLOT, device="pdf", width=6.5, height=4, dpi=600) LANDCOVER_BARPLOT ``` ```{r FIGURE-PREDICTIONS2, fig.width=7.5, fig.height=10, fig.cap="Figure S2. a caption goes here."} real$name <- real$spp real$name <- sub("Ae. ae.","Aedes aegypti",real$name) real$name <- sub("Ae. ta.","Aedes taeniorhynchus",real$name) real$name <- sub("Cu. qu.","Culex quinquefasciatus",real$name) real$name <- sub("De. ma.","Deinocerites magnus",real$name) FIGURE_PREDICTIONS2 <- newdata2 %>% mutate(td=as.Date(td,"%m/%d/%Y")) %>% ggplot(aes(td, m, colour=landuse)) + geom_line() + geom_line(aes(td, lo), lty=2) + geom_line(aes(td, hi), lty=2) + geom_point(data = real, aes(as.Date(td,"%m/%d/%Y"), m), size = 1) + facet_grid(name~landuse, scales = "free", labeller = label_parsed) + theme(legend.position = "none") + theme_classic() + ylab("Average mosquito relative abundance") + xlab("Month") + scale_x_date(date_labels = "%b %Y") + theme(strip.background = element_blank(), axis.text.x=element_text(angle=30, hjust=1)) ggsave("FigureS2.pdf", plot = FIGURE_PREDICTIONS2, device="pdf", width=7.5, height=10, dpi=600) FIGURE_PREDICTIONS2 ``` ```{r session-info} sessionInfo() ```