Preamble

library(here)
here()
## [1] "/Users/benarnold/mbita-schisto"
#--------------------------------
# source the configuration file
#--------------------------------
source(here("R/mbita-schisto-Config.R"))

Load and process data

#---------------------------
# mbita PSAC data
#---------------------------
d <- readRDS(here("data","mbita_schisto.rds") )


#---------------------------
# log-transform the MFI
# values
# set values at or below 0
# to missing before the 
# transform
#---------------------------
d2 <- d %>%
  mutate(logsea = ifelse(sea <=0, NA, log10(sea)),
         logsm25 = ifelse(sm25 <= 1, NA, log10(sm25)))

#---------------------------
# pivot the data longer to
# make it easier to plot
#---------------------------

d3 <- d2 %>%
  dplyr::select(year,vid,pid,agey,logsea,logsm25) %>%
  pivot_longer(cols = c(logsea,logsm25), names_to = "antigen", names_prefix = "log", values_to = "logmfi") %>%
  mutate(antigenf = case_when(
    antigen == "sea" ~ "SEA",
    antigen == "sm25" ~ "Sm25"
  ), 
  antigenf = factor(antigenf,levels = c("SEA","Sm25"))
  )
#---------------------------
# store SEA and Sm25 ROC cutoffs
# these cutoffs were provided
# by Kim Won in the file
# Mbita cutoff table for Ben.xlsx
#---------------------------
dcuts <- data.frame(antigenf = c("SEA","Sm25"), cutroc = c(log10(965), log10(38) ))

Figure S1

MFI by age

pcols <- c(vircols[3],cbPalette[2])
log10labs <- c( 
  expression(10^0),
  expression(10^1),
  expression(10^2),
  expression(10^3),
  expression(10^4)
)

pmfiage <- ggplot(data = d3, aes(x = agey, y = logmfi, color = antigenf)) +
  facet_grid(antigenf~.) +
  geom_point(size = 0.5, alpha = 0.3) +
  geom_hline(data = dcuts, aes(yintercept = cutroc),lwd = 0.25,col = "gray20") +
  # geom_smooth(method = "loess", se = FALSE, lwd = 0.5, color = "gray20") +
  scale_color_manual(values = pcols) +
  scale_y_continuous(breaks = 0:4, labels = log10labs) +
  labs(x = "age, years", y = "Luminex response (MFI-bg)", tag = "A") +
  coord_cartesian(ylim = c(0,4.5)) +
  theme(
    legend.position = "none",
    strip.text.y = element_blank()
  )

Antibody level distributions

pcols <- c(vircols[3],cbPalette[2])
log10labs <- c( 
  expression(10^0),
  expression(10^1),
  expression(10^2),
  expression(10^3),
  expression(10^4)
)

nobs <- length(d3$logmfi[d3$antigen=="sea"])
hbreaks <- seq(0,4.6,by=0.05)
pmfidist <- ggplot(data = d3, aes(x = logmfi, color = antigenf, fill = antigenf)) +
  facet_grid(antigenf~.) +
  # geom_line(stat = "density") +
  geom_histogram(aes(x = logmfi), breaks = hbreaks, color = NA, alpha = 0.8)+
  geom_vline(data = dcuts, aes(xintercept = cutroc),lwd = 0.25,col = "gray20") +
  scale_color_manual(values = pcols) +
  scale_fill_manual(values = pcols) +
  scale_x_continuous(breaks = 0:4, labels = log10labs) +
  scale_y_continuous(expand = c(0,0), breaks = seq(0,700,by=100))+
  labs(x = NULL, y = "N observations", tag = "B") +
  coord_flip(ylim = c(0,700), xlim = c(0,4.5))+
  theme(
    legend.position = "none", 
    strip.text.y = element_text(angle = 0)
  )

Combined figure

pcomp <- grid.arrange(pmfiage,pmfidist,nrow = 1, ncol =2)

ggsave(here("output","mbita-schisto-ab-distributions.png"),pcomp,device = "png")
## Saving 7 x 5 in image

Extra: plot age-dependent mean and seroprevalence

Sm25 is not included in the main analyses, but examine age-dependent mean and seroprevalence for completeness. Simply use ggplot’s internal smoother (no 95% CIs because it does not account for clustering on village).

plot_age_means <- ggplot(data=d3, aes(x = agey, y = logmfi, color = antigenf)) + 
  geom_smooth(se = FALSE) +
  scale_color_manual(values = pcols, guide=guide_legend(title = "Antigen")) +
  scale_y_continuous(breaks = 0:4, labels = log10labs) +
  labs(x = "age, years", y = "Luminex response (MFI-bg)", tag = "A") +
  coord_cartesian(ylim = c(0,4.5)) 

plot_age_means
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 916 rows containing non-finite values (stat_smooth).

d4 <- d3 %>%
  mutate(serocut = ifelse(antigenf == "SEA",log10(965),log10(38)),
         seropos = ifelse(logmfi>serocut,1,0)
         )

plot_age_serop <- ggplot(data=d4, aes(x = agey, y = seropos, color = antigenf)) + 
  geom_smooth(se = FALSE) +
  scale_color_manual(values = pcols, guide=guide_legend(title = "Antigen")) +
  scale_y_continuous(breaks = seq(0,1,by=0.2), labels = sprintf("%1.0f",seq(0,1,by=0.2)*100)) +
  labs(x = "age, years", y = "Seroprevalence (%)", tag = "B") +
  coord_cartesian(ylim = c(0,1)) 

plot_age_serop
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 916 rows containing non-finite values (stat_smooth).

Session Info

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] doParallel_1.0.15  iterators_1.0.12   foreach_1.5.0      mgcv_1.8-31       
##  [5] nlme_3.1-148       leaflet_2.0.3      raster_3.3-13      sf_0.9-5          
##  [9] sp_1.4-2           kableExtra_1.1.0   gridExtra_2.3      ellipse_0.4.2     
## [13] viridisLite_0.3.0  RColorBrewer_1.1-2 scales_1.1.1       forcats_0.5.0     
## [17] stringr_1.4.0      dplyr_1.0.0        purrr_0.3.4        readr_1.3.1       
## [21] tidyr_1.1.0        tibble_3.0.3       ggplot2_3.3.2      tidyverse_1.3.0   
## [25] here_0.1          
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.4.2                lubridate_1.7.9         webshot_0.5.2          
##  [4] httr_1.4.2              rprojroot_1.3-2         tools_4.0.2            
##  [7] backports_1.1.8         rgdal_1.5-12            R6_2.4.1               
## [10] KernSmooth_2.23-17      DBI_1.1.0               colorspace_1.4-1       
## [13] withr_2.2.0             tidyselect_1.1.0        compiler_4.0.2         
## [16] cli_2.0.2               rvest_0.3.5             xml2_1.3.2             
## [19] labeling_0.3            classInt_0.4-3          digest_0.6.25          
## [22] rmarkdown_2.3           base64enc_0.1-3         pkgconfig_2.0.3        
## [25] htmltools_0.5.0         highr_0.8               dbplyr_1.4.4           
## [28] htmlwidgets_1.5.1       rlang_0.4.7             readxl_1.3.1           
## [31] rstudioapi_0.11         generics_0.0.2          farver_2.0.3           
## [34] jsonlite_1.7.0          crosstalk_1.1.0.1       magrittr_1.5           
## [37] Matrix_1.2-18           Rcpp_1.0.5              munsell_0.5.0          
## [40] fansi_0.4.1             viridis_0.5.1           lifecycle_0.2.0        
## [43] stringi_1.4.6           yaml_2.2.1              MASS_7.3-51.6          
## [46] blob_1.2.1              crayon_1.3.4            lattice_0.20-41        
## [49] haven_2.3.1             splines_4.0.2           hms_0.5.3              
## [52] knitr_1.29              pillar_1.4.6            codetools_0.2-16       
## [55] reprex_0.3.0            glue_1.4.1              evaluate_0.14          
## [58] leaflet.providers_1.9.0 modelr_0.1.8            png_0.1-7              
## [61] vctrs_0.3.2             cellranger_1.1.0        gtable_0.3.0           
## [64] assertthat_0.2.1        xfun_0.15               lwgeom_0.2-5           
## [67] broom_0.7.0             e1071_1.7-3             class_7.3-17           
## [70] units_0.6-7             ellipsis_0.3.1