Snail sampling protocols

  • Sampling in three habitats (ephemeral pond, ephemeral river and permanent stream) located in two vilalges in Burkina Faso (Est and South-West regions)
  • Time-based protocol: (frequency: weekly) systematic collection during 30min by one trained field assistant using 2mm seive and hand collection
  • Quadrat protocol: (frequency: monthly) 8 replicas of exhaustive sampling in 30x30cm quadrats

Sampling results

Data Preparation

Sampling data for each habitat and species:

#  Preamble -------
library(pomp)       # for Sobol Design
library(tidyverse)  # data manipulation environment
library(lubridate)  # date functions
library(foreach)    # parallel looping
library(itertools)  # iterator tools
library(magrittr)   # data manipulation
library(doSNOW)     # for parallel computations
library(knitr)
library(kableExtra)

rm(list=ls())

map <- purrr::map

#  Load Data -------
snail_data <- read_csv("../data/perezsaez-et-al_2019_snail-sampling_data.csv") %>% 
  mutate(
    # set the month to the closest mont
    month =  month(round_date(date, "month")),
    # handle the case of December for year rounding
    year = case_when(month == 1 & month(date) == 12 ~ year(date) + 1, T ~ year(date)),
    # rounded date
    rdate = as.Date(str_c(year, month, 1, sep = "-"), format = "%Y-%m-%d"),
    # create habitat-species identifier
    ID = str_c(habitat, species, sep = "-")
  )

snail_data %>% 
  DT::datatable(
    extensions = "Buttons", 
    options = list(dom = "Bfrtip", 
                   buttons = c("csv"))) %>%
  DT::formatStyle(columns = colnames(snail_data), fontSize = '10pt')

Compute monthly means for analysis.

# compute the monthly means, standard deviations and standard errors
snails_monthly <- snail_data %>% 
  group_by(rdate, type, ID) %>% 
  summarise(mean = mean(counts),
            sd = sd(counts),
            se = sd/sqrt(n())) %>% 
  mutate(
    # set the year corresponding to the season
    year = year(round_date(rdate+50, unit = "year"))-1,
    # julian day
    yday = yday(rdate)
  ) %>% 
  ungroup()

# reshape the dataframe to have time and quadrat columns
snails_montly_trans <- snails_monthly %>% 
  select(-sd, -se, -yday) %>% 
  spread(type, mean) %>% 
  inner_join(
    snails_monthly %>% 
      select(-mean, -sd, -yday) %>% 
      spread(type, se) %>% 
      rename(qse = quadrats, tse = time)
  )

# extract separately the mean per month for time samples and keep quadrat counts
snails_q_monthly_t <- snail_data %>% 
  filter(type == "quadrats") %>% 
  select(-type) %>% 
  rename(counts.q = counts) %>% 
  inner_join(
    snail_data %>% 
      filter(type == "time") %>% 
      group_by(month, year, ID) %>% 
      summarise(counts.t = mean(counts))
  )

Figure 2: Quadrat sampling performance

We assess the performance of the quadrat sampling protocol in terms of relative standard error.

# factors levels for habitat IDs
ID_levels = c("pond-bulinus", "river-bulinus", "stream-bulinus", "stream-biomphalaria")

# Dictionary for ID and sampling type lables
ID_dict <- c("pond-bulinus" = "Pond (Bulinus)",
             "river-bulinus" = "River (Bulinus)",
             "stream-bulinus" = "Stream (Bulinus)",
             "stream-biomphalaria" = "Stream (B. pfeifferi)",
             "stream-both" = "Stream (both)")

type_dict <- c("quadrats" = "quadrats \n [snails/quadrat]",
               "time" = "time \n [snails/30min]")

# Custom ggplot theme
custom_theme <- ggthemes::theme_base() +
  theme(axis.title = element_text(size = 10),
        axis.text = element_text(size = 8),
        strip.text = element_text(size = 8),
        axis.ticks.length = unit(3, "points"),
        plot.background = element_blank(),
        legend.background = element_blank(),
        legend.text = element_text(size = 8),
        legend.title = element_text(size = 10)) 

# Dictionnary of variables
var_dict <- c("Id" = "Index of dispersion",
              "se" = "Standard error",
              "relse" = "Relative SE",
              "mean" = "Mean density")

### FIGURE 2 ###
p.se <- rbind(snails_monthly %>% 
                filter(type == "quadrats") %>% 
                mutate(y = sd^2/mean,
                       var = "Id"),
              snails_monthly %>% 
                filter(type == "quadrats") %>% 
                mutate(y = mean/0.09,
                       var = "mean"),
              snails_monthly %>% 
                filter(type == "quadrats") %>% 
                mutate(y = se/0.09,
                       var = "se"),
              snails_monthly %>% 
                filter(type == "quadrats") %>% 
                mutate(y = se/mean,
                       var = "relse") %>% 
                filter(y < 1)) %>% 
  select(ID, y, var)  %>% 
  mutate(var = factor(var, levels = c("se", "relse", "Id", "mean")),
         ID = factor(ID, levels = ID_levels)) %>% 
  filter(!is.na(y), ID != "stream-both") %>%  
  ggplot(aes(x = ID, y = y)) +
  geom_boxplot(size = .3, outlier.size = .6) +
  facet_wrap(~var, scales = "free", labeller = labeller(var = var_dict), nrow = 1) +
  scale_x_discrete(labels = ID_dict) +
  labs(x = "", y = "") +
  custom_theme +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        legend.text = element_text(size = 8, vjust = .5),
        legend.title = element_text(size = 10, vjust = .5),
        legend.key.width = unit(8, "points"),
        legend.key.height = unit(8, "points"),
        legend.position = "right")


print(p.se)
Quadrat sampling scheme performace. Reported are the distribution of standard errors ($SE = \sigma/\sqrt{n}$ [snails$^{\frac{1}{2}}$/m]), relative standard errors ($SE/\mu$ [-]), the index of dispersion ($I_D = \sigma^2/\mu$ [-]) and the mean density ($\mu$ [snails/m\textsuperscript{2}]) by habitat and intermediate host species.

Quadrat sampling scheme performace. Reported are the distribution of standard errors (\(SE = \sigma/\sqrt{n}\) [snails\(^{\frac{1}{2}}\)/m]), relative standard errors (\(SE/\mu\) [-]), the index of dispersion (\(I_D = \sigma^2/\mu\) [-]) and the mean density (\(\mu\) [snails/m]) by habitat and intermediate host species.

Figure 3: Time series of snail counts for both sampling protocols

### FIGURE 3 ###
# rainy season periods for plotting
rainy_seas <- map(unique(year(snail_data$rdate)), 
                  ~ set_names(str_c(., c("-06-15", "-09-15")), c("start", "end"))) %>% 
  do.call(rbind, .) %>% 
  as_tibble() %>% 
  mutate(start = as.Date(start, format = "%Y-%m-%d"), end = as.Date(end, format = "%Y-%m-%d")) %>% 
  filter(year(start) < 2018)

p.data <- snail_data %>%  
  filter(ID != "stream-both") %>% 
  group_by(date, ID, counts, rdate, type) %>% 
  summarise(ncounts = n()) %>% 
  mutate(alpha = case_when(type == "time" ~ .2, T ~ .15)) %>% 
  # plot
  ggplot() +
  # rainy season
  geom_rect(data = rainy_seas, aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf), fill = "#E0E8FF") +
  # count data
  geom_point(aes(x = date, y = counts, size = ncounts), color = "#808080") + 
  # monthly means
  geom_point(data = snails_monthly %>% filter(ID != "stream-both"), aes(x = rdate, y = mean), col = "red", size = .65) +
  geom_path(data = filter(snails_monthly, !str_detect(ID, "stream")), aes(x = rdate, y = mean, group = year), col = "red", size = .3) +
  geom_line(data = filter(snails_monthly, str_detect(ID, "stream"), ID != "stream-both"), aes(x = rdate, y = mean), col = "red", size = .3) +
  geom_errorbar(data = snails_monthly %>% filter(ID != "stream-both"), aes(x = rdate, ymin = mean - 1.96 * se, ymax = mean + 1.96 * se), width = 0, col = "red", size = .25) +
  # rest
  facet_grid(type~ID, scales = "free", labeller = labeller(ID = ID_dict, type = type_dict)) +
  scale_size(range = c(.3, 1.5), breaks = c(1, 4, 8)) +
  guides(size = guide_legend(title = "number", override.aes = list("color" = "#808080"))) + 
  custom_theme +
  theme(legend.text = element_text(size = 6),
        legend.title = element_text(size = 6),
        legend.position = c(0.19, 0.89),
        legend.spacing.y = unit(2, "points"),
        legend.key.height = unit(4, "points"),
        legend.key.width = unit(4, "points")) 

print(p.data)
Ecological monitoring data of intermediate hosts of schistosomiasis in Burkina Faso. Data was collected in three different habitats in two distinct sites, one in the sudano-sahelian climatic zone (pond and river, village of Lioulgou), and one in the sudanian climatic zone (stream, village of Panamasso).  Data are presented by intermediate host genera for the quadrat (top row) and time-based method (bottom row) protocols with an indicative timing of the rainy season (July-September) (blue rectangles). Monthly means (red points) for the quadrat method consist of the average of the 8 quadrat replicas on the sampling date, and the mean of the weekly time-based counts grouped to the closest month start, along with their 95\% CI (red errorbars, mean $\pm 1.96$ SE). Note that quadrat data in the months of June-October of 2016 were not collected in the permanent stream in Panamasso due to logistical constraints.

Ecological monitoring data of intermediate hosts of schistosomiasis in Burkina Faso. Data was collected in three different habitats in two distinct sites, one in the sudano-sahelian climatic zone (pond and river, village of Lioulgou), and one in the sudanian climatic zone (stream, village of Panamasso). Data are presented by intermediate host genera for the quadrat (top row) and time-based method (bottom row) protocols with an indicative timing of the rainy season (July-September) (blue rectangles). Monthly means (red points) for the quadrat method consist of the average of the 8 quadrat replicas on the sampling date, and the mean of the weekly time-based counts grouped to the closest month start, along with their 95% CI (red errorbars, mean \(\pm 1.96\) SE). Note that quadrat data in the months of June-October of 2016 were not collected in the permanent stream in Panamasso due to logistical constraints.

Figure S1: Seasonality of snail populations

# extract coefficients of variation of count time series
var_coefs <- snails_monthly %>% 
  filter(ID != "stream-both") %>% 
  group_by(type) %>% 
  mutate(y = max(mean + 1.96 * se, na.rm = T)  * .91) %>% 
  group_by(ID, type, y) %>% 
  summarise(cv = sd(mean, na.rm = T)/mean(mean, na.rm = T),
            label = sprintf("CV = %.2f", cv)) 

### FIGURE 4 ###
p.seas <- snails_monthly %>% 
  # remove anoying data
  filter(!(!str_detect(ID, "stream") & yday < 100), ID != "stream-both") %>% 
  ggplot() +
  geom_rect(data = rainy_seas %>% 
              mutate(start = yday(start) + (year(start)-2015.5)* 6,
                     end = yday(end) + (year(end)-2015.5)* 6) %>% distinct, 
            aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf), fill = "#E0E8FF") +
  geom_errorbar(aes(x = yday + (year-2015.5)* 6, ymin = mean - 1.96 * se, ymax = mean + 1.96 * se, color = year, group = year), width = 0, alpha = .5) +
  geom_point(aes(x = yday + (year-2015.5)* 6, y = mean, color = year, group = year), size = .8) +
  geom_line(aes(x = yday + (year-2015.5)* 6, y = mean, color = year, group = year), size = .4) +
  geom_text(data = var_coefs, aes(x = 260, y = y, label = label), size = 2.4) + 
  facet_grid(type~ID, scales = "free_y", labeller = labeller(ID = ID_dict, type = type_dict)) +
  guides(color = guide_legend(title = "year")) +
  custom_theme +
  scale_color_gradient(high = "darkgreen", low = "#76FF92") +
  theme(legend.text = element_text(size = 6, vjust = .5),
        legend.title = element_text(size = 8, vjust = .5),
        legend.key.width = unit(6, "points"),
        legend.key.height = unit(6, "points"),
        legend.position = c(0.062, 0.828) ,
        legend.margin = margin(0, 0, 0, 0)
  ) + 
  labs(x = "year day", y = "mean counts")

print(p.seas)
Seasonality of population dynamics. Data are presented by intermediate host genera for the quadrat sampling (top row, snails/30x30cm quadrat) and time-based method (bottom row, snails/30min search) along with the coefficient of variation (CV) of the monthly count means with the indicative timing of the rainy season (July-September) (blue rectangles).

Seasonality of population dynamics. Data are presented by intermediate host genera for the quadrat sampling (top row, snails/30x30cm quadrat) and time-based method (bottom row, snails/30min search) along with the coefficient of variation (CV) of the monthly count means with the indicative timing of the rainy season (July-September) (blue rectangles).

Figure 4: Comparison of mean counts

### FIGURE 5 ###
p.monthdata <- snails_montly_trans %>%
  filter(ID != "stream-both") %>% 
  ggplot(aes(x = time, y = quadrats)) +  
  geom_errorbar(aes(ymin = quadrats - 1.96 * qse, ymax = quadrats + 1.96 * qse), color = "darkgray", size = .25) +
  geom_errorbarh(aes(xmin = time - 1.96 * tse, xmax = time + 1.96 * tse), color = "darkgray", size = .25) +
  geom_point(size = .75) +
  facet_wrap(~ID,  labeller = labeller(ID = ID_dict), scales = "free", nrow = 1) +
  guides(color = "none") +
  labs(x = "time [snails/30min]", y = "quadrats \n [snails/quadrat]") +
  custom_theme

print(p.monthdata)
Seasonality of population dynamics. Data are presented by intermediate host genera for the quadrat sampling (top row, snails/30x30cm quadrat) and time-based method (bottom row, snails/30min search) along with the coefficient of variation (CV) of the monthly count means with the indicative timing of the rainy season (July-September) (blue rectangles).

Seasonality of population dynamics. Data are presented by intermediate host genera for the quadrat sampling (top row, snails/30x30cm quadrat) and time-based method (bottom row, snails/30min search) along with the coefficient of variation (CV) of the monthly count means with the indicative timing of the rainy season (July-September) (blue rectangles).

Figure S2: Correlations between sampling methods

# Compute correlations
getCorr <- function(x, y, method) {
  test <- cor.test(x, y, method = method, use = "complete")
  df <- data.frame(estimate = test$estimate, 
                   pval = test$p.value) 
  return(df)
}

# range of time-based estimates over which to compute correlations
time_sample_range <- seq(5, 60, by = 2)

# loop over ranges and extract correlations
correlations <- foreach(rrange = time_sample_range,
                        .combine = rbind) %do% {
                          
                          snails_montly_trans %>%
                            filter(time < rrange, ID != "stream-both") %>% 
                            nest(-ID) %>% 
                            mutate(corr = map(data, ~getCorr(.$quadrats, .$time, method = "pearson") ),
                                   rcorr = map(data, ~getCorr(.$quadrats, .$time, method = "spearman"))
                            ) %>% 
                            select(-data) %>% 
                            unnest() %>% 
                            mutate(rrange = rrange) %>% 
                            gather(var, value, -ID, -rrange) %>% 
                            mutate(type = case_when(str_detect(var, "1") ~ "spearman", T ~ "pearson"),
                                   valtype = case_when(str_detect(var, "estimate") ~ "corr", T ~ "pval")) %>% 
                            select(-var) %>% 
                            spread(valtype, value)
                        }

### FIGURE S1 ###
p.corr <- correlations %>% 
  inner_join(snails_montly_trans %>% group_by(ID) %>% 
               summarise(mt = max(time, na.rm = T))) %>% 
  group_by(ID) %>% 
  filter(rrange <= mt + 2) %>%
  ggplot(aes(x = rrange, y = corr, group = factor(rrange))) + 
  geom_bar(aes(fill = pval < 0.05), stat = "identity", color = "white", size = .1) +
  geom_hline(aes(yintercept = 0), linetype = 3, color = "gray") +
  facet_grid(type~ID, labeller = labeller(ID = ID_dict)) +
  scale_fill_manual(values = c("darkgray", "black"))+
  labs(x = "range time counts [snails/30min]", y = "correlation") + 
  ylim(c(0, 1)) +
  custom_theme  +
  theme(legend.text = element_text(size = 6, vjust = .5),
        legend.title = element_text(size = 8, vjust = .5),
        legend.key.width = unit(6, "points"),
        legend.key.height = unit(6, "points"),
        legend.position = c(.075, 0.395)) 

print(p.corr)
 Correlation between monthly mean densities from each sampling technique. Pearson (top row) and Spearman (bottom row) correlations computed for increasing ranges of time-based mean counts.

Correlation between monthly mean densities from each sampling technique. Pearson (top row) and Spearman (bottom row) correlations computed for increasing ranges of time-based mean counts.

Statistical analysis

Model fitting

We fit all combinations of functional form for the transfer function, the measurement model and the function for zero-occurence (50 combinations per species-habitat) through Maximum Likelihood Estimation.

# Re-run model fitting? (takes approximately 1h on a computer with 8 3.70GHz processors)
do.fit <- F 

# Define models -----

# Function forms of transfer functions
# Linear function
linear <- function(pars, x){
  # with exponential link for positivity
  return(exp(pars["m_a"]) + exp(pars["m_b"])*x)
}

#  Michaelis Menten function (ie Holling type II)
michaelisMenten <- function(pars, x){
  return(exp(pars["m_a"])*x/(exp(pars["m_b"])+x))
}

# Mono molecular function
monoMolecular <- function(pars, x){
  return(exp(pars["m_a"]) * (1-exp(-exp(pars["m_b"])*x)))
}

# Gompertz function
gompertz <- function(pars, x){
  return(exp(pars["m_a"]) * exp(-exp(exp(pars["m_b"]) - exp(pars["m_c"])*x)))
}

# Logistic function
logistic <- function(pars, x){
  return(exp(pars["m_a"]) / (1 + exp(exp(pars["m_b"]) - exp(pars["m_c"])*x)))
}

# Functional forms of zero-occurence
# constant probability of detection
sigmaCnst <- function(pars, x){
  pmax(1e-8, 1 - 1 / (1 + exp(-(rep(pars["s_a"], length(x))))))
}

# logistic probability of detection
sigmaLogistic <- function(pars, x){
  # exponentiate slope to ensure positive value
  pars["s_b"] <- exp(pars["s_b"])
  pmax(1e-8, 1 - 1 / (1 + exp(-(matrix(pars[c("s_a", "s_b")], nrow = 1) %*% rbind(1, x)))))
}

# Measurement models to compute the log-likelihood of a paramter set (ll)
# Poisson likelihood
poissFit <- function(pars, x, y, f, indmu, ...){
  # mean 
  mu <- f(pars[indmu], x)
  # set values of mu=0 to a small value to avoid numerical error with logs
  mu <- pmax(mu, 1e-8)
  # return likelihood
  -(sum(-mu + y * log(mu) - log(factorial(y))))
}

# Negative-binomial likelihood
negbinFit <- function(pars, x, y, f, indmu, ...){
  # mean
  mu <- f(pars[indmu], x)
  # set values of mu=0 to a small value to avoid numerical error with logs
  mu <- pmax(mu, 1e-8)
  # parameter of the NB
  k <- exp(pars["ll_k"])
  
  if(k > 20) {
    # return poisson likelihood
    -(sum(-mu + y * log(mu) - log(factorial(y))))
  } else {
    # return NB likelihood
    -(sum(log(gamma(k + y)) - log(gamma(k)) + k * log(k/(k+mu)) + y * log(mu/(k+mu)) - log(factorial(y)) ))
  }
}

# zero-inflated poisson
zipFit <- function(pars, x, y, f, fsigma, indmu, inds, ...){
  # mean
  mu <- f(pars[indmu], x)
  # set values of mu=0 to a small value to avoid numerical error with logs
  mu <- pmax(mu, 1e-8)
  # zero-inflation
  sigma <- fsigma(pars[inds], x)
  
  -(sum(log(sigma[y == 0] + (1-sigma[y == 0]) * exp(-mu[y == 0]))) + 
      sum(log(1 - sigma[y > 0]) + y[y > 0] * log(mu[y > 0]) - mu[y>0] - log(factorial(y[y>0]))))
}


# zero-inflated negative binomial
zinbFit <- function(pars, x, y, f, fsigma, indmu, inds, ...){
  # mean
  mu <- f(pars[indmu], x)
  # set values of mu=0 to a small value to avoid numerical error with logs
  mu <- pmax(mu, 1e-8)
  # parameter of the NB
  k <- exp(pars["ll_k"])
  # zero-inflation
  sigma <- fsigma(pars[inds], x)
  
  if(k > 20) {
    # return poisson likelihood
    -(sum(log(sigma[y == 0] +(1-sigma[y == 0]) * exp(-mu[y == 0]))) + 
        sum(log(1 - sigma[y > 0]) + y[y > 0] * log(mu[y > 0]) - mu[y>0] - log(factorial(y[y>0]))))
  } else {
    # return NB likelihood
    -(sum(log(sigma[y == 0] + (1-sigma[y == 0]) * (k/(k+mu[y == 0]))^k)) + 
        sum(log(1 - sigma[y > 0]) + log(gamma(k + y[y>0])) - log(gamma(k)) +
              k * log(k/(k+mu[y>0])) + y[y>0] * log(mu[y>0]/(k+mu[y>0])) - log(factorial(y[y>0]))))
  }
  
}

# hurdle poisson (zero-truncated poisson)
hupFit <- function(pars, x, y, f, fsigma, indmu, inds, ...){
  # mean
  mu <- f(pars[indmu], x)
  # set values of mu=0 to a small value to avoid numerical error with logs
  mu <- pmax(mu, 1e-8)
  # hurdle
  sigma <- fsigma(pars[inds], x)
  
  -(sum(log(sigma[y == 0])) + sum(log(1 - sigma[y > 0]) + y[y > 0] * log(mu[y > 0]) - mu[y>0] - log(1-exp(-mu[y>0])) - log(factorial(y[y>0]))))
}

# hurdle NB (zero-truncated NB)
hunbFit <- function(pars, x, y, f, fsigma, indmu, inds, ...){
  # mean
  mu <- f(pars[indmu], x)
  # set values of mu=0 to a small value to avoid numerical error with logs
  mu <- pmax(mu, 1e-8)
  # parameter of the NB
  k <- exp(pars["ll_k"])
  # zero-inflation
  sigma <- fsigma(pars[inds], x)
  
  if(k > 20) {
    # return poisson hurdle likelihood
    -(sum(log(sigma[y == 0])) + sum(log(1 - sigma[y > 0]) + y[y > 0] * log(mu[y > 0]) - mu[y>0] - log(1-exp(-mu[y>0])) - log(factorial(y[y>0]))))
  } else {
    # return NB hurdle likelihood
    -(sum(log(sigma[y == 0])) + sum(log(1 - sigma[y > 0]) + log(gamma(k + y[y>0])) - log(gamma(k)) - log(factorial(y[y>0])) + k * log(k/(k+mu[y>0])) + y[y>0] * log(mu[y>0]/(k+mu[y>0])) - log(1 - (k/(k+mu[y>0]))^k)))
  }
}

# initial paramters for transfer functions
parinit_m <-  c(-.5, -.5, -.5)
names(parinit_m) <- c("m_a", "m_b", "m_c")
# initial paramters for sigma functions
parinit_s <-  c(-.5, -1)
names(parinit_s) <- c("s_a", "s_b")
# initial paramters for likelihood models
parinit_ll <-  c(1)
names(parinit_ll) <- c("ll_k")

# combinations of models and parameters
parinit <- list(
  poisson = parinit_m,
  zip = c(parinit_m, parinit_s),
  negbin = c(parinit_m, parinit_ll),
  zinb = c(parinit_m, parinit_s, parinit_ll),
  hup = c(parinit_m, parinit_s),
  hunb = c(parinit_m, parinit_s, parinit_ll)
)

# lists of functions
f.list <- list(linear = linear, michaelisMenten = michaelisMenten, monoMolecular = monoMolecular, gompertz = gompertz, logistic = logistic)
sigma.list <- list(nosigma = NULL, sigmaCnst = sigmaCnst, sigmaLog = sigmaLogistic)
ll.list <- list(poisson = poissFit, zip = zipFit, negbin = negbinFit, zinb = zinbFit, hup = hupFit, hunb = hunbFit)

# helper to catch errors
checkVal <- function(evaluation) {
  if(inherits(evaluation, "try-error")){
    T
  } else {
    evaluation
  }
}

# function to test initial parameters as feasable during optimization
testInitialParamters <- function(init_grid, bounds, f, ll, fsigma, data) {
  foreach(r = iter(init_grid, by = "row"),
          .combine = rbind) %do%{
            r <- unlist(r)
            cntr <- 1
            while(checkVal(try(ll(r, x = data$counts.t, y = data$counts.q, f = f, fsigma = fsigma) > 1000)) & cntr < 10000) {
              r <- runif(nrow(bounds), bounds[, "lower"], bounds[, "upper"])
              names(r) <- rownames(bounds)
              cntr <- cntr+1
            }
            r
          }
}


# Fit models -----

fit_results.file <- "..//perezsaez-et-al_2019_snail-sampling_stat-fits.rda"

if (!do.fit | !file.exists(fit_results.file)) {
  # load results if not asked to re-compute
  load(fit_results.file)
  
} else {
  
  # setup parllel computation
  cl <- makeCluster(parallel::detectCores())
  registerDoSNOW(cl)
  
  # number of initial positions from which to start the optimization
  ninit <- 20
  
  # vector of ranges of time-based counts over which to compute model fits (200 corresponds to all the data in all cases)
  time_sample_range2 <- c(seq(4, 30, by = 2), 200)
  
  # Fit model combinations
  fit_data <- foreach(dat = isplit(snails_q_monthly_t, snails_q_monthly_t$ID),
                      .combine = rbind) %:%
    foreach(rrange = time_sample_range2,
            .combine = rbind) %:%
    foreach(f = f.list,
            fname = names(f.list),
            .combine = rbind) %:%
    foreach(fsigma = sigma.list,
            sname = names(sigma.list),
            .combine = rbind) %:%
    foreach(ll = ll.list,
            llname = names(ll.list),
            pinit = parinit,
            .combine = rbind,
            .inorder = F,
            .errorhandling = "stop",
            .packages = c("tidyverse", "foreach", "pomp", "magrittr", "iterators", "itertools")) %dopar%  {
              
              # filter range of time-based counts
              dat$value <- filter(dat$value, counts.t < rrange)
              
              if((!str_detect(llname, "z|h") & !is.null(fsigma)) | (str_detect(llname, "z|h") & is.null(fsigma))) {
                # exit loop for unnecessary combinations of sigma and ll models
                return(NULL)
              } 
              
              # adjust intial paramter vectors 
              if(sname == "sigmaCnst") {
                pinit <- pinit[names(pinit) != "s_b"]
              }
              
              if(fname != "gompertz" & fname != "logistic") {
                pinit <- pinit[names(pinit) != "m_c"]
              }
              
              # grid of initial paramter vectors to test
              bounds <- do.call(rbind, lapply(pinit, function(x) qnorm(c(.025, .975), x, 3*abs(x)))) %>% 
                set_colnames(c("lower", "upper"))
              
              init_grid <- pomp::sobolDesign(lower = bounds[, "lower"],
                                             upper = bounds[, "upper"],
                                             nseq = ninit)
              
              # test initial values as feasable
              init_grid_valid <- testInitialParamters(init_grid, bounds, f, ll, fsigma, dat$value) 
              
              # optimize loglik for each initial guess
              optim_results <- foreach(p = iter(init_grid_valid, by = "row"),
                                       .combine =  rbind) %do% {
                                         p <- as.numeric(p)
                                         names(p) <- rownames(bounds)
                                         # optimize
                                         res <- optim(par = p, fn = ll,
                                                      x = dat$value$counts.t, y = dat$value$counts.q,
                                                      f = f, fsigma = fsigma,
                                                      indmu = str_detect(names(p), "m_"),
                                                      inds = str_detect(names(p), "s_"))
                                         # return result
                                         tibble(loglik = res$value) %>% 
                                           cbind(t(res$par))
                                       } %>% 
                as_tibble()
              
              # get best result
              best <- optim_results %>% 
                # filter out failed optimizations
                filter(loglik > 30, loglik < 1e3) %>% 
                filter(loglik == min(loglik))
              
              # format results for return
              tibble(rrange = rrange,
                     llmodel = llname,
                     model = fname,
                     smodel = sname,
                     ID = dat$key[[1]],
                     ll = best$loglik[1],
                     npar = ncol(select(best, -loglik)),
                     params = list(select(best, -loglik))) %>% 
                mutate(AICc = 2 * ll + 2 * npar + 2 * npar * (npar+1) /(nrow(dat$value) - npar - 1))
            }
  
  stopCluster(cl)
  
  # compute AIC weights
  fit_data %<>% 
    group_by(ID, rrange) %>% 
    mutate(habitat = str_split(ID, "-")[[1]][1],
           species = str_split(ID, "-")[[1]][2],
           w = exp(-(AICc-min(AICc))/2),
           w = w/sum(w)) %>% 
    arrange(desc(w)) %>% 
    mutate(cumw = cumsum(w)) %>% 
    ungroup()
  
  save(fit_data, file = fit_results.file)
}

Table S1: Statistical model fits

Satistical model fits:

fit_data %>% 
  head

All model selection results:

# Dictionaries for labeling
species_dict <- c("bulinus" = "Bulinus spp.",
                  "biomphalaria" = "B. pfeifferi",
                  "both" = "both")

habitat_dict <- c("river" = "River",
                  "pond" = "Pond",
                  "stream" = "Stream")

smodel_dict <- c("nosigma" = "-",
                 "sigmaCnst" = "constant",
                 "sigmaLog" = "logistic")

# Get the 95% confidence set
model_selection_results <- fit_data %>% 
  filter(rrange == 200) %>% 
  ungroup %>% 
  mutate(ID = factor(ID, levels = c(ID_levels, "stream-both"))) %>%
  group_by(ID) %>% 
  arrange(AICc) %>% 
  filter(cumw <= .959) 

# Grouping of results for pretty display
groupings <- model_selection_results %>%
  ungroup %>% 
  mutate(rownum = row_number()) %>% 
  group_by(ID) %>% 
  summarise(sind = min(rownum),
            eind = max(rownum))

getGroups <- function(ID, ID_dict, groupings) {
  return(list(group_label = ID_dict[ID],
              start_row = unlist(groupings$sind[groupings$ID == ID]),
              end_row = unlist(groupings$eind[groupings$ID == ID]))
  )
}

model_selection_table <- model_selection_results %>% 
  # Pretty display of species and habitat
  mutate(species = species_dict[species],
         species = case_when(row_number() == 1 ~ species, T ~ ""),
         habitat = habitat_dict[habitat],
         habitat = case_when(row_number() == 1 ~ habitat, T ~ ""),
         smodel = smodel_dict[smodel]) %>% 
  ungroup %>% 
  select(model, llmodel, smodel, ll, npar, AICc, w, cumw) %>% 
  knitr::kable(digits = 2, caption = "Model selection results") %>% 
  kable_styling(font_size = 12, full_width = F) 

for(id in unique(fit_data$ID)) {
  model_selection_table %<>%  {do.call(what = pack_rows, args = c(list(kable_input = .), getGroups(id, ID_dict, groupings)))}
}

### TABLE S1 ###
# output model selection fits for 95% confidence set based on AIC weights
model_selection_table %>%
  scroll_box(width = "100%", height = "500px") 
Model selection results
model llmodel smodel ll npar AICc w cumw
River (Bulinus)
monoMolecular zip constant 113.29 3 232.77 0.12 0.12
michaelisMenten zip constant 113.36 3 232.92 0.11 0.24
gompertz zip constant 112.48 4 233.29 0.09 0.33
linear zip constant 113.65 3 233.49 0.09 0.42
logistic zip constant 112.76 4 233.85 0.07 0.49
monoMolecular zinb constant 113.29 4 234.90 0.04 0.53
monoMolecular zip logistic 113.29 4 234.90 0.04 0.57
michaelisMenten zinb constant 113.36 4 235.05 0.04 0.61
michaelisMenten zip logistic 113.36 4 235.05 0.04 0.65
gompertz zinb constant 112.48 5 235.46 0.03 0.69
gompertz zip logistic 112.48 5 235.46 0.03 0.72
monoMolecular negbin
114.70 3 235.59 0.03 0.75
linear zip logistic 113.65 4 235.62 0.03 0.78
linear zinb constant 113.65 4 235.62 0.03 0.81
michaelisMenten negbin
114.87 3 235.93 0.03 0.83
logistic zinb constant 112.76 5 236.01 0.02 0.86
logistic zip logistic 112.76 5 236.01 0.02 0.88
gompertz negbin
113.89 4 236.10 0.02 0.90
logistic negbin
114.19 4 236.70 0.02 0.92
monoMolecular zinb logistic 113.29 5 237.07 0.01 0.94
michaelisMenten zinb logistic 113.36 5 237.22 0.01 0.95
Pond (Bulinus)
logistic zip constant 188.26 4 384.84 0.31 0.31
gompertz zip constant 189.05 4 386.43 0.14 0.45
logistic zip logistic 188.20 5 386.89 0.11 0.56
logistic zinb constant 188.26 5 387.01 0.10 0.66
monoMolecular zip constant 191.09 3 388.38 0.05 0.71
gompertz zip logistic 188.96 5 388.40 0.05 0.77
gompertz zinb constant 189.05 5 388.60 0.05 0.81
logistic zinb logistic 188.25 6 389.19 0.03 0.85
monoMolecular zip logistic 190.55 4 389.43 0.03 0.88
michaelisMenten zip constant 191.65 3 389.49 0.03 0.91
monoMolecular zinb constant 191.09 4 390.51 0.02 0.93
michaelisMenten zip logistic 191.13 4 390.59 0.02 0.94
Stream (B. pfeifferi)
michaelisMenten zip constant 358.18 3 722.44 0.19 0.19
michaelisMenten zip logistic 357.61 4 723.37 0.12 0.32
monoMolecular zip constant 358.87 3 723.83 0.10 0.41
michaelisMenten zinb constant 358.18 4 724.50 0.07 0.48
monoMolecular zip logistic 358.31 4 724.77 0.06 0.54
gompertz zip constant 358.42 4 724.99 0.05 0.60
logistic zip constant 358.57 4 725.30 0.05 0.64
michaelisMenten zinb logistic 357.61 5 725.45 0.04 0.69
monoMolecular zinb constant 358.87 4 725.88 0.03 0.72
gompertz zip logistic 357.84 5 725.90 0.03 0.76
logistic zip logistic 357.94 5 726.10 0.03 0.79
linear zip constant 360.35 3 726.80 0.02 0.81
linear zip logistic 359.33 4 726.81 0.02 0.83
monoMolecular zinb logistic 358.31 5 726.85 0.02 0.85
gompertz zinb constant 358.42 5 727.06 0.02 0.87
logistic zinb constant 358.57 5 727.37 0.02 0.89
linear hup logistic 359.64 4 727.44 0.02 0.90
michaelisMenten hup logistic 359.89 4 727.93 0.01 0.92
gompertz zinb logistic 357.84 6 727.99 0.01 0.93
logistic zinb logistic 357.94 6 728.19 0.01 0.94
logistic hup logistic 359.16 5 728.55 0.01 0.95
gompertz hup logistic 359.18 5 728.59 0.01 0.96
Stream (Bulinus)
michaelisMenten negbin
460.71 3 927.52 0.22 0.22
linear zinb logistic 458.70 5 927.62 0.21 0.43
monoMolecular negbin
461.27 3 928.62 0.13 0.56
michaelisMenten zinb constant 460.53 4 929.21 0.09 0.65
linear negbin
461.94 3 929.98 0.06 0.72
monoMolecular zinb constant 461.12 4 930.38 0.05 0.77
gompertz zinb logistic 459.12 6 930.55 0.05 0.82
michaelisMenten zinb logistic 460.39 5 931.02 0.04 0.86
logistic zinb logistic 459.49 6 931.30 0.03 0.89
linear zinb constant 461.62 4 931.39 0.03 0.92
monoMolecular zinb logistic 461.12 5 932.46 0.02 0.94
gompertz negbin
462.33 4 932.80 0.02 0.96
Stream (both)
michaelisMenten zinb constant 552.43 4 1113.01 0.19 0.19
michaelisMenten negbin
553.60 3 1113.30 0.17 0.36
monoMolecular zinb constant 552.73 4 1113.61 0.14 0.51
monoMolecular negbin
553.92 3 1113.93 0.12 0.63
michaelisMenten zinb logistic 552.13 5 1114.49 0.09 0.72
monoMolecular zinb logistic 552.47 5 1115.17 0.07 0.79
michaelisMenten hunb logistic 553.26 5 1116.74 0.03 0.82
monoMolecular hunb logistic 553.42 5 1117.07 0.03 0.84
gompertz zinb constant 553.47 5 1117.16 0.02 0.87
linear negbin
555.56 3 1117.21 0.02 0.89
gompertz negbin
554.62 4 1117.39 0.02 0.91
linear zinb constant 554.72 4 1117.59 0.02 0.93
logistic zinb constant 554.14 5 1118.50 0.01 0.95
gompertz zinb logistic 553.16 6 1118.64 0.01 0.96

Table 3: Model support

### TABLE 3 ###
# compute relative support for saturating vs. non-saturating models
fit_data %>% 
  filter(rrange == 200) %>% 
  group_by(species, habitat) %>% 
  summarise(saturating = sum(w[model != "linear"]),
            inflation = sum(w[str_detect(llmodel, "z")]),
            sigma_cnstL= sum(w[smodel == "sigmaCnst"])/(sum(w[smodel == "sigmaCnst"]) + sum(w[smodel == "sigmaLog"]))
  ) %>% 
  ungroup %>% 
  arrange(habitat) %>% 
  set_colnames(c("Species", "Habitat", "Saturation", "Zero-inflation", "Constant vs. Logistic")) %>% 
  knitr::kable(digits = 2, align = "c", caption = "Model structure probabilities. Probabilities were computed by summing AIC weights for the functional form of the transfer function and for zero-inflation in the measurement model. The probability of constant vs. logistic zero-inflation parameter was computed using the ratio of their respective AIC weights to their sum.") %>% 
  kable_styling(font_size = 12, full_width = F)  %>%
  add_header_above(c(" " = 2, "Functional form" = 1, "Measurement model" = 2))
Model structure probabilities. Probabilities were computed by summing AIC weights for the functional form of the transfer function and for zero-inflation in the measurement model. The probability of constant vs. logistic zero-inflation parameter was computed using the ratio of their respective AIC weights to their sum.
Functional form
Measurement model
Species Habitat Saturation Zero-inflation Constant vs. Logistic
bulinus pond 1.00 0.99 0.71
bulinus river 0.83 0.88 0.74
biomphalaria stream 0.92 0.93 0.56
both stream 0.94 0.58 0.61
bulinus stream 0.69 0.55 0.35

Compute parameter variance through bootstrapping

We compute the variance of model parameters by boostrapping using 1000 random draws of the data with replacement for each species-habitat combination.

paramvariance_results.file <- "..//perezsaez-et-al_2019_snail-sampling_param-variance.rda"

# Re-run parameter variance estimation with boostraping? (takes approximately 2h on a computer with  8 3.70GHz processors)
do.var <- F

if (!do.var | !file.exists(paramvariance_results.file)) {
  # load results if not asked to re-compute
  load(paramvariance_results.file)
  
} else {
  
  # number of bootstraps
  nboot <- 1000
  ninit <- 5
  
  # setup parllel computation
  cl <- makeCluster(parallel::detectCores()-1)
  registerDoSNOW(cl)
  
  system.time({
    param_variance <- foreach(fit = iter(fit_data %>% 
                                           ungroup %>% 
                                           filter(cumw < .95), by = "row"),
                              # it = icount(10),
                              .inorder = F,
                              .combine = rbind,
                              .packages = c("tidyverse", "foreach", "magrittr", "itertools", "iterators")) %dopar% {
                                
                                # extract models
                                fsigma <- sigma.list[[fit$smodel]]
                                ll <- ll.list[[fit$llmodel]]
                                f <- f.list[[fit$model]]
                                best_pars <- unlist(fit$params[[1]])
                                pinit <- parinit[[fit$llmodel]]
                                full_dat <- filter(snails_q_monthly_t, ID == fit$ID) 
                                
                                # adjust intial paramter vectors 
                                if(fit$smodel == "sigmaCnst") {
                                  pinit <- pinit[names(pinit) != "s_b"]
                                }
                                
                                if(fit$model != "gompertz" & fit$model != "logistic") {
                                  pinit <- pinit[names(pinit) != "m_c"]
                                }
                                
                                # grid of initial paramter vectors to test
                                bounds <- do.call(rbind, lapply(pinit, function(x) qnorm(c(.5, .75), x, max(3*abs(x), .5)))) %>% 
                                  set_colnames(c("lower", "upper"))
                                
                                init_grid <- pomp::sobolDesign(lower = bounds[, "lower"],
                                                               upper = bounds[, "upper"],
                                                               nseq = ninit)
                                
                                # compute best paramters for bootsrap
                                boot_par <- foreach(bootit = icount(nboot),
                                                    .combine = rbind,
                                                    .inorder = F) %do% {
                                                      # take random bootstrap sampling with replacement
                                                      iboot <- sample(1:nrow(full_dat), replace = TRUE)
                                                      dat <- full_dat[iboot,]
                                                      
                                                      # test initial values as feasable
                                                      init_grid_valid <- testInitialParamters(init_grid, bounds, f, ll, fsigma, dat)
                                                      # optimize loglik for each initial guess
                                                      optim_results <- foreach(p = iter(init_grid_valid, by = "row"),
                                                                               .combine =  rbind) %do% {
                                                                                 p <- as.numeric(p)
                                                                                 names(p) <- rownames(bounds)
                                                                                 # optimize
                                                                                 res <- optim(par = p, fn = ll, x = dat$counts.t, y = dat$counts.q, f = f, fsigma = fsigma)
                                                                                 
                                                                                 tibble(loglik = res$value,
                                                                                        par = names(res$par),
                                                                                        parval = res$par)
                                                                               } %>% 
                                                        as_tibble()
                                                      
                                                      
                                                      # get best result
                                                      best <- optim_results %>% 
                                                        filter(loglik > 30, loglik < 1e3) %>% 
                                                        filter(loglik == min(loglik))
                                                    } 
                                boot_par %>% 
                                  group_by(par) %>% 
                                  summarise(mean = mean(parval),
                                            var = var(parval),
                                            q025 = quantile(parval, 0.025),
                                            q975 = quantile(parval, 0.975)) %>% 
                                  mutate(ID = fit$ID) %>% 
                                  inner_join(select(fit, -params))
                              }
  })
  
  stopCluster(cl)
  
  save(param_variance, file = paramvariance_results.file)
}

Table S2: Saturation Paramter

# function to extract unconditional variance of a given parameter 
getParamVariance <- function(variance_results, param, transform, do.sqm = F) {
  # compute saturation plateau
  variance_results %>% 
    filter(par == param) %>% 
    group_by(ID) %>% 
    summarise(mean_par = sum(mean*w),
              var_par = sum(w * (var + (mean - sum(mean*w))^2)^.5)^2) %>% 
    group_by(ID) %>% 
    mutate(se = sqrt(var_par),
           mean = transform(mean_par),
           ci = list(map_dbl(c(-1.96, 1.96), ~transform(mean_par + . * se))),
           lo = min(unlist(ci)),
           hi = max(unlist(ci))) %>% 
    select(ID, mean, lo, hi) %>% 
    gather(variable, value, -ID) %>% 
    {
      if(do.sqm) {
        # transform densities from quadrats to square meters (each quadrat is 30sqcm)
        mutate(., valuesqm = value/(0.3)^2)
      } else {
        .
      }
    } %>% 
    gather(type,value, -ID, -variable) %>% 
    mutate(value = sprintf("%.2f", value),
           value = str_trim(value)) %>% 
    group_by(ID, type) %>% 
    summarise(string = str_c(value[variable=="mean"], " (", value[variable=="lo"], "-", value[variable=="hi"], ")")) %>% 
    spread(type, string) %>% 
    # ungroup %>% 
    mutate(species = str_split(ID, "-")[[1]][2],
           habitat = str_split(ID, "-")[[1]][1])%>% 
    ungroup %>%  {
      if(do.sqm) {
        # transform densities from quadrats to square meters (each quadrat is 30sqcm)
        select(., species, habitat , value, valuesqm) 
      } else {
        select(., species, habitat , value) 
      }
    }
}

### TABLE S2 ###
# Extract unconditional variance of the saturation parameter in the transfer functions
getParamVariance(variance_results = param_variance %>% 
                   filter(model != "linear"),
                 param = "m_a",
                 transform = exp,
                 do.sqm = T
) %>% 
  set_colnames(c("Species", "Habitat", "snails/quadrat", "snails/m2")) %>% 
  knitr::kable(align = "c", caption = "Estimates of the saturation of the transfer function between time and quadrat counts. Unconditional mean and 95\\% confidence intervals (in parenthesis). Note that these values do not correspond to absolute densities which require in addition the accounting for zero-inflation in counts.")%>% 
  kable_styling(font_size = 12, full_width = F)
Estimates of the saturation of the transfer function between time and quadrat counts. Unconditional mean and 95% confidence intervals (in parenthesis). Note that these values do not correspond to absolute densities which require in addition the accounting for zero-inflation in counts.
Species Habitat snails/quadrat snails/m2
bulinus pond 2.70 (2.06-3.53) 29.95 (22.86-39.23)
bulinus river 7.17 (0.00-13030.09) 79.72 (0.04-144778.76)
biomphalaria stream 2.14 (1.37-3.36) 23.81 (15.17-37.38)
both stream 4.47 (2.45-8.13) 49.62 (27.26-90.34)
bulinus stream 2.46 (1.05-5.75) 27.28 (11.65-63.91)

Figure S3: Support for saturation for different ranges of time-based counts

Support for saturation in quadrat counts for different ranges of time-based counts. Support was computed as in Tabls S2. Each point corresponds to the support of saturating vs. non-saturating models when considering only sampling data within the range of time-based counts. The absence of points bellow 20 snails/30min are due to the absence of data.

Support for saturation in quadrat counts for different ranges of time-based counts. Support was computed as in Tabls S2. Each point corresponds to the support of saturating vs. non-saturating models when considering only sampling data within the range of time-based counts. The absence of points bellow 20 snails/30min are due to the absence of data.

Table S3: Zero-inflation parameter

# function to compute constant sigma
sigmaCnstSimple <- function(par) {
  1-1/(1+exp(-par))
}

### TABLE S3 ###
# Extract unconditional variance of the constant zero-infaltion parameter
getParamVariance(variance_results = param_variance %>% 
                   filter(smodel == "sigmaCnst", !str_detect(llmodel, "^h")),
                 par = "s_a",
                 transform = sigmaCnstSimple,
                 do.sqm = F
) %>% 
  set_colnames(c("Species", "Habitat", "$\\sigma$")) %>% 
  knitr::kable(align = "c", caption = "Zero-inflation parameter estimates. Unconditional mean and 95\\% confidence intervals (in parenthesis).")%>% 
  kable_styling(font_size = 12, full_width = F)
Zero-inflation parameter estimates. Unconditional mean and 95% confidence intervals (in parenthesis).
Species Habitat \(\sigma\)
bulinus pond 0.38 (0.28-0.49)
bulinus river 0.41 (0.05-0.90)
biomphalaria stream 0.42 (0.33-0.51)
both stream 0.14 (0.00-1.00)
bulinus stream 0.15 (0.00-1.00)

Simulate quadrat counts

Finally we do 5000 simulations of the quadrat counts based on the 95% model confidence set.

# Re-do simulations? (takes approximately 1h on a computer with 8 3.70GHz processors)
do.sim <- F

sim_results.file <- "..//perezsaez-et-al_2019_snail-sampling_simulations.rda"

# values to predict
xpred <- seq(0, 120, by = .5)

if(!do.sim | !file.exists(sim_results.file)) {
  # load simulation results if not asked to re-run
  load(sim_results.file)
  
} else {
  
  # function to extract mean of simulations and quantiles
  getQuants <- function(sims, probs) {
    # compute quantiles
    qnts <- quantile(sims, probs)
    names(qnts) <- str_c("q", str_replace(probs, "\\.", ""))
    # extract
    data.frame(t(qnts)) %>% 
      as_tibble() %>% 
      mutate(meansim = mean(sims),
             sdsim = sd(sims))
  }
  
  # poisson
  simPoiss <- function(mu, nsim, ...) {
    # simulate
    sims <- rpois(nsim, mu)
    return(sims)
  }
  
  # negative binomial
  simNB <- function(mu, pars, nsim, ...) {
    # extract clumping paramter of NB
    k <- exp(pars["ll_k"])
    # simulate
    sims <- rnbinom(nsim, mu = mu, size = k)
    return(sims)
  }
  
  # zero-inflated poisson
  simZip <- function(mu, sigma, nsim, ...) {
    # simulate
    sims <- rpois(nsim, mu) * as.integer(rbinom(nsim, size = 1, prob = 1-sigma) > 0)
    return(sims)
  }
  
  # zero-inflated NB
  simZinb <- function(mu, pars, sigma, nsim, ...) {
    # extract clumping paramter of NB
    k <- exp(pars["ll_k"])
    # simulate
    sims <- rnbinom(nsim, mu = mu, size = k) * as.integer(rbinom(nsim, size = 1, prob = 1-sigma) > 0)
    return(sims)
  }
  
  # hurdel poisson
  simHup <- function(mu, sigma, nsim, ...) {
    # simulate using inverse cdf
    sims <-  qpois(runif(nsim, dpois(0, mu), 1), mu)  * as.integer(runif(nsim) > rep(sigma, nsim))
    return(sims)
  }
  
  # hurdel NB
  simHunb <- function(mu, pars, sigma, nsim,...) {
    # extract clumping paramter of NB
    k <- exp(pars["ll_k"])
    # simulate using inverse cdf
    sims <- qnbinom(runif(nsim, dnbinom(0, mu = mu, size = k), 1), mu = mu, size = k) * as.integer(runif(nsim) > rep(sigma, nsim))
    return(sims)
  }
  
  # list of simulation functions
  simfun.list <- list(poisson = simPoiss, zip = simZip, negbin = simNB, zinb =  simZinb, hup = simHup, hunb = simHunb)
  
  # extract the 95% model confidence set
  confidence_set <- filter(fit_data, cumw <= 0.95)
  
  # number of simulations for monte carlo prediction intervals
  nsim <- 5000
  
  
  # setup parllel computation
  cl <- makeCluster(parallel::detectCores())
  registerDoSNOW(cl)
  
  # monte carlo simulation for prediction intervals
  simulations <- foreach(fit = isplit(confidence_set, confidence_set$ID),
                         .combine = rbind,
                         .packages = c("tidyverse", "foreach", "iterators", "itertools")) %dopar% {
                           
                           
                           sims <- foreach(simit = icount(nsim),
                                           .combine = rbind) %do% {
                                             
                                             # get random model among available ones
                                             rind <- which(fit$value$cumw > runif(1, min = 0, max = max(fit$value$cumw)))[1]
                                             
                                             # extract models
                                             fsigma <- sigma.list[[fit$value$smodel[rind]]]
                                             f <- f.list[[fit$value$model[rind]]]
                                             fsim <- simfun.list[[fit$value$llmodel[rind]]]
                                             best_pars <- as.numeric(fit$value$params[rind][[1]])
                                             names(best_pars) <- colnames(fit$value$params[rind][[1]])
                                             
                                             # simulate
                                             foreach(x = xpred, .combine = c) %do% {
                                               fsim(mu = f(best_pars, x), sigma = fsigma(best_pars, x), pars = best_pars, nsim = 1)
                                             }
                                           }
                           
                           qnts <- do.call(rbind, apply(sims, 2, getQuants, probs = c(0.025, 0.25, 0.5, 0.75, 0.975)))
                           # return data
                           tibble(counts.t =  xpred) %>% 
                             cbind(qnts) %>% 
                             mutate(ID = fit$key[[1]])
                         }
  
  stopCluster(cl)
  
  save(simulations, file = sim_results.file)
}

Figure 5: Simulations of statistical models

counts_bytype <- snails_q_monthly_t %>% 
  group_by(ID, counts.t, counts.q) %>% 
  summarise(n = n()) %>% 
  filter(n>0) %>% 
  group_by(ID, counts.t) %>% 
  # re-compute the mean of the quadrat counts
  mutate(mean.q = sum(counts.q * n)/sum(n)) %>% 
  ungroup %>% 
  mutate(ID = factor(ID, levels = c(ID_levels, "stream-both")))

### FIGURE 5 ###
p.sim <- ggplot(simulations %>% 
                  mutate(ID = factor(ID, levels = c(ID_levels, "stream-both"))), 
                aes(x = counts.t)) +
  geom_ribbon(aes(ymin = q0025, ymax = q0975), fill = "#E6E6E6") +
  geom_ribbon(aes(ymin = q025, ymax = q075), fill = "#BFBFBF") +
  geom_point(data = counts_bytype, aes(size = n, y = counts.q), color = "#808080") +
  geom_point(data = counts_bytype, aes(y = mean.q), col = "red", size = 1) +
  geom_line(aes(y = meansim), size = .4) +
  scale_x_continuous(breaks = seq(0, 100, by = 25)) +
  scale_size(range = c(.3, 1.5), breaks = c(1, 4, 8)) +
  facet_wrap(~ ID, scales = "free", labeller = labeller(ID = ID_dict)) +
  guides(size = guide_legend(title = "number")) + 
  labs(x = "time [snails/30min]", y = "quadrat [snails/quadrat]") +
  custom_theme +
  theme(legend.text = element_text(size = 6),
        legend.title = element_text(size = 8),
        legend.key.size = unit(4, "points"),
        legend.position = c(0.85, 0.25))  

print(p.sim)
Model simulations of the relations between time-based and quadrat sampling strategies. Simulations were performed using the 95\% credible model set for each species-habitat configuration. Simulations are shown in terms of the mean (black line) as well as the 95\% (light gray ribbon) and 50\% (dark gray ribbon) simulation envelops for 5000 simulations per time-based count. Quadrat counts (gray dots) are given as in the first row of Fig. 3 along with monthly means (red dots).

Model simulations of the relations between time-based and quadrat sampling strategies. Simulations were performed using the 95% credible model set for each species-habitat configuration. Simulations are shown in terms of the mean (black line) as well as the 95% (light gray ribbon) and 50% (dark gray ribbon) simulation envelops for 5000 simulations per time-based count. Quadrat counts (gray dots) are given as in the first row of Fig. 3 along with monthly means (red dots).

Figure S4: Simualtions of means

### FIGURE S2 ###
p.means <- ggplot(simulations%>% group_by(ID) %>% 
                    mutate(species = str_split(ID, "-")[[1]][2],
                           habitat = str_split(ID, "-")[[1]][1]) %>% 
                    mutate(species = factor(species_dict[species], levels = c("Bulinus spp.", "B. pfeifferi", "both")),
                           habitat = habitat_dict[habitat]), 
                  aes(x = counts.t)) +
  geom_line(aes(y = meansim/0.09, color = species)) +
  facet_wrap(~habitat) +
  labs(x = "time-based counts [snails/30min]", y = "asbolute density [snails/sqm]") +
  custom_theme +
  theme(legend.position = c(.12, .8),
        legend.key.size = unit(6, "points"))


print(p.means)
Model simulations of the relations between time-based and absolute snail densities estimated by quadrat sampling. Simulations were performed using the 95\% credible model set for each species-habitat configuration. Simulations are shown in terms of the mean (lines) for 5000 simulations per time-based count.

Model simulations of the relations between time-based and absolute snail densities estimated by quadrat sampling. Simulations were performed using the 95% credible model set for each species-habitat configuration. Simulations are shown in terms of the mean (lines) for 5000 simulations per time-based count.


Session info used to produce these results:

R version 3.4.4 (2018-03-15)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=fr_FR.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=fr_FR.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=fr_FR.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=fr_FR.UTF-8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: kableExtra(v.1.1.0), knitr(v.1.20), bindrcpp(v.0.2.2), doSNOW(v.1.0.16), snow(v.0.4-2), pomp(v.1.18), magrittr(v.1.5), itertools(v.0.1-3), iterators(v.1.0.9), foreach(v.1.4.4), lubridate(v.1.7.4), forcats(v.0.3.0), stringr(v.1.3.1), dplyr(v.0.7.6), purrr(v.0.2.5), readr(v.1.1.1), tidyr(v.0.8.1), tibble(v.1.4.2), ggplot2(v.3.1.0) and tidyverse(v.1.2.1)

loaded via a namespace (and not attached): httr(v.1.3.1), jsonlite(v.1.6), viridisLite(v.0.3.0), modelr(v.0.1.2), shiny(v.1.1.0), assertthat(v.0.2.1), highr(v.0.7), pander(v.0.6.3), cellranger(v.1.1.0), yaml(v.2.1.19), pillar(v.1.3.1), backports(v.1.1.2), lattice(v.0.20-35), glue(v.1.2.0), digest(v.0.6.15), promises(v.1.0.1), rvest(v.0.3.2), colorspace(v.1.3-2), httpuv(v.1.4.4.1), htmltools(v.0.3.6), plyr(v.1.8.4), psych(v.1.8.4), pkgconfig(v.2.0.1), broom(v.0.4.4), haven(v.1.1.1), xtable(v.1.8-2), mvtnorm(v.1.0-8), scales(v.1.0.0), webshot(v.0.5.1), later(v.0.7.3), DT(v.0.5), withr(v.2.1.2), lazyeval(v.0.2.1), cli(v.1.1.0), mnormt(v.1.5-5), mime(v.0.5), crayon(v.1.3.4), readxl(v.1.1.0), evaluate(v.0.10.1), fansi(v.0.4.0), nlme(v.3.1-131), xml2(v.1.2.0), foreign(v.0.8-69), ggthemes(v.3.5.0), tools(v.3.4.4), hms(v.0.4.2), munsell(v.0.5.0), compiler(v.3.4.4), rlang(v.0.3.4), grid(v.3.4.4), nloptr(v.1.0.4), rstudioapi(v.0.7), htmlwidgets(v.1.3), subplex(v.1.5-4), crosstalk(v.1.0.0), labeling(v.0.3), rmarkdown(v.1.10), gtable(v.0.2.0), codetools(v.0.2-15), deSolve(v.1.21), reshape2(v.1.4.3), R6(v.2.2.2), utf8(v.1.1.4), bindr(v.0.1.1), rprojroot(v.1.3-2), stringi(v.1.2.3), parallel(v.3.4.4), Rcpp(v.0.12.17), tidyselect(v.0.2.4) and coda(v.0.19-1)