#Script for subsampling distribution data to produce a sata with equally many observations #in each time period (here, three time periods, time periods 1 and 3 used in Hällfors et al. 2021) # # Maria Hällfors, Juha Pöyry, Janne Heliölä, et al. Combining range and phenology shifts offers a winning strategy # for boreal Lepidoptera. Ecology Letters, accepted April 14th 2021. DOI:10.1111/ele.13774 # ############ DISCLAIMER ON REPRODUCIBILITY ############# #NB! The subsampling is not determenistic, but a different subset will be produced each #time. Thus, the resulting NRBs and prevalence measures will not be exactly the same as in #the original paper. ############## # sessionInfo() # R version 4.0.5 (2021-03-31) # Platform: x86_64-w64-mingw32/x64 (64-bit) # Running under: Windows 10 x64 (build 18363) ############## ##### #Install and load required packages ##### # Package names packages <- c("dplyr", "reshape2", "tidyr", "purrr") # Install packages not yet installed installed_packages <- packages %in% rownames(installed.packages()) if (any(installed_packages == FALSE)) { install.packages(packages[!installed_packages]) } #load libraries library(dplyr) library(reshape2) library(tidyr) library(purrr) ######### #Set wd and read in data ######### #set working directory # setwd("MY/PATH/") #read in data BR0=read.csv("Data_Distribution_Raw_Butterflies.csv") MR0=read.csv("Data_Distribution_Raw_Moths.csv") ############ #PART 1 - SUBSAMPLING OF DATA ############ #This is done separately for the butterflies and moths ##################### #2a.1 Subsample Butterflies ##################### #define number of presences to subsample from each Time period (equaling to the number of observations in TP1) Nobs=46602 #subsample TPs to have equal presences BR1=BR0 %>% group_by(TP) %>% tidyr::nest() %>% dplyr::mutate(v = purrr::map2(data, Nobs, sample_n, replace=FALSE)) %>% tidyr::unnest(v) # write.csv(BR1, file="Data_Distribution_Subsampled_Butterflies.csv") #################### #2a.2 subsample for moths #################### #define number of presences to subsample from each Time period (equaling to the number of observations in TP1) Nobs=135442 #subsample TPs to have equal presences MR1=MR0 %>% group_by(TP) %>% nest() %>% dplyr::mutate(v = map2(data, Nobs, sample_n, replace=FALSE)) %>% unnest(v) # write.csv(MR1, file="Data_Distribution_Subsampled_Moths.csv")