--- title: Calculation of the number of seeds dispersed per ha/yr and correction by the number of pellets - Liberio et al., Biotropica author: "Lucas Paolucci and Divino Silverio" date: "10/12/2018" output: pdf_document: default html_document: default --- ##Estimating the number of seeds dispersed per ha/yr ```{r, message=FALSE, warning=FALSE} #Reading data data <- read.csv("master_Paolucci_et.al.csv", h=T) names(data) #transforming Date column to class (Date): data$Date=lubridate::dmy(data$Date) #Excluding dung clumps that were found outside trails, during the last sampling event. More details about why this procedure was done can be found in SI. data <- data[ which(data$Found_outside_trails_.only_for_the_fourth_sampling.event!='yes'), ] #function for obtaining the number of clumps and abundance of seeds per sampling event summary <- function(x, ...){ c(number_dung_clumps_sampled=length(x),total_seeds=sum(x,...)) } #Summarizing data by sampling event data.summary <- doBy::summaryBy(Abundance_seeds~Date,data,na.rm=T,FUN=summary) #Adjusting the names of colums names(data.summary) <- c("Date","Number_dung_clumps_sampled","Total_seeds") #Including the sampling event column data.summary$Sampling_event <- c(1:4) #Calculating the number of days elapsed between sampling events t1=as.numeric(data.summary$Date[2]-data.summary$Date[1]);t1 t2=as.numeric(data.summary$Date[3]-data.summary$Date[2]);t2 t3=as.numeric(data.summary$Date[4]-data.summary$Date[3]);t3 #Number of days elapsed since last sampling event equals 'NA' for the first sampling event, as there was no previous sampling event. data.summary$Days_elapsed_since_last_sampling=c(NA,t1,t2,t3) ####Calculation of the number of seeds dispersed per ha/yr### #As described in the main text (Statistical anlyses section), first we obtained the average of seeds dispersed per day between each sampling interval. For that, we divide the number of seeds found in a sampling event ('total_seeds'column) by the number of days elapsed since the previous sampling event ('days_elapsed_since_last_sampling'column). data.summary$Seeds_dispersed_per_day <- (data.summary$Total_seeds)/(data.summary$Days_elapsed_since_last_sampling) data.summary #We then average the number of seeds dispersed per day between each sampling event, and multiplied it by 365, to obtain the average number of seeds dispersed per year: (Average_seeds_per_year <- mean(data.summary$Seeds_dispersed_per_day,na.rm=T)*365) #Finally, we divided this number by 150 (the total area of our study site in hectares), to obtain the number of seeds dispersed per ha/yr: Average_seeds_per_ha_yr <- Average_seeds_per_year/150 #As this number is not corrected yet, let's change its name to make this clear: Average_seeds_per_ha_yr_before_correction <- Average_seeds_per_ha_yr ``` ##Correcting the "number_of_seeds_area_time" by the number os pellets ```{r, message=FALSE, warning=FALSE} ###Correction### # This correction was necessary because we counted the number of seeds for just 10 pellets per dung clumps we found, and most dung clumps have more than 10 pellets. ##For that, we sampled 63 additional clumps (different from those used in the study), counted their total number of pellets, and added a correction factor for the number of seeds we found on original 10 pellets (more details in the main text and SI). We followed six steps: #1) Reading data of the 63 additional dung clumps data.additional.clumps <- read.csv("additional_clumps.csv", h=T) #2) Obtaining the 'correction factor'; for that, we divide the total number of pellets found in each of the 63 clumps by 10 (correction_factor <- data.additional.clumps$Number_pellets/10) #3) Multiplying the correction factor by the number of seeds dispersed per ha/yr before correction, so we correct our original estimation (Average_seeds_per_ha_yr_corrected <- correction_factor*Average_seeds_per_ha_yr_before_correction) #4) Averaging these 63 values, each representing the number of seeds expected to be dispersed in each dung clump, considering the real number of pellets. This is the average final value of seeds dispersed per ha/yr across our study area. Also, to obtain 95% confidence intervals (CI) associated with this correction, we did a bootstrap procedure in which we resampled these values 2000 times (Seeds.dispersed.ha.yr <- Hmisc::smean.cl.boot(Average_seeds_per_ha_yr_corrected, B=2000)) (Seeds.dispersed.ha.yr_mean <- Seeds.dispersed.ha.yr[1]) #mean (Seeds.dispersed.ha.yr_CI.low <- Seeds.dispersed.ha.yr[2]) #lower CI (Seeds.dispersed.ha.yr_CI.upp <- Seeds.dispersed.ha.yr[3]) #upper CI #5) Calculating the CI associated with our original dataset (i.e., with the raw data from our four sampling events) (CI.original.data.set <- 1.96*sd(data$Abundance_seeds)/sqrt(length(data$Abundance_seeds))) # 6) Sum both IC (Final.IC.low <- Seeds.dispersed.ha.yr_CI.low+CI.original.data.set) (Final.IC.up <- Seeds.dispersed.ha.yr_CI.upp+CI.original.data.set) ``` ##Estimating the number of seeds dispersed per ha/yr only in the Control ```{r, message=FALSE, warning=FALSE} #Apart from the next line (and respective nomenclature changes), where we exclude Fire data, the following script is exactly the same as the above data.control <- data[ which(data$Treatment_plot=='Control'), ] #Summarizing data by sampling event data.control.summary <- doBy::summaryBy(Abundance_seeds~Date,data.control,na.rm=T,FUN=summary) #Adjusting the names of colums names(data.control.summary) <- c("Date","Number_dung_clumps_sampled","Total_seeds") data.control.summary$Sampling_event <- c(1:4) #Calculate the number of days elapsed between sampling events t1=as.numeric(data.control.summary$Date[2]-data.control.summary$Date[1]);t1 t2=as.numeric(data.control.summary$Date[3]-data.control.summary$Date[2]);t2 t3=as.numeric(data.control.summary$Date[4]-data.control.summary$Date[3]);t3 #Number of days elapsed since last sampling event equals 'NA' for the first sampling event, as there was no previous sampling event. data.control.summary$Days_elapsed_since_last_sampling=c(NA,t1,t2,t3) ####Calculation of the number of seeds dispersed per ha/yr### #As described in the main text (Statistical anlyses section), first we obtained the average of seeds dispersed per day between each sampling interval. For that, we divided the number of seeds found in a sampling event ('total_seeds'column) by the number of days elapsed since the previous sampling event ('days_elapsed_since_last_sampling'column) data.control.summary$Seeds_dispersed_per_day <- (data.control.summary$Total_seeds)/(data.control.summary$Days_elapsed_since_last_sampling) data.control.summary #We then averaged the number of seeds dispersed per day between each sampling event, and multiplied it by 365, to obtain the average number of seeds dispersed by year (Average_seeds_per_year.control <- mean(data.control.summary$Seeds_dispersed_per_day,na.rm=T)*365) #Finally, we divided this number by 50 (the area of the Control plot in hectares), to obtain the number of seeds dispersed per ha/yr Average_seeds_per_ha_yr.control <- Average_seeds_per_year.control/50 #As this number is not corrected yet, let's change its name to make this clear Average_seeds_per_ha_yr.control_before_correction <- Average_seeds_per_ha_yr.control ``` ##Correcting the estimation by the number os pellets ```{r, message=FALSE, warning=FALSE} ###Correction### #3) Multiplying the correction factor by the number of seeds dispersed per ha/yr before correction, so we correct our original estimation (Average_seeds_per_ha_yr.control_corrected <- correction_factor*Average_seeds_per_ha_yr.control_before_correction) #4) Averaging these 63 values, each representing the number of seeds expected to be dispersed in each dung clump, considering their real number of pellets. This is the average final value of seeds dispersed per ha/yr across our study area. Also, to obtain confidence intervals (CI) associated with this correction, we did a bootstrap procedure in which we resampled these values 2000 times (Seeds.dispersed.ha.yr.control <- Hmisc::smean.cl.boot(Average_seeds_per_ha_yr.control_corrected, B=2000)) (Seeds.dispersed.ha.yr.control_mean <- Seeds.dispersed.ha.yr.control[1]) #mean (Seeds.dispersed.ha.yr.control_CI.low <- Seeds.dispersed.ha.yr.control[2]) #lower CI (Seeds.dispersed.ha.yr.control_CI.upp <- Seeds.dispersed.ha.yr.control[3]) #upper CI #5) Calculating the CI associated with our original dataset (i.e., with the raw data from our four sampling events) (CI.original.data.set.control <- 1.96*sd(data.control$Abundance_seeds)/sqrt(length(data.control$Abundance_seeds))) #6) Sum both IC (Final.IC.low.control <- Seeds.dispersed.ha.yr.control_CI.low+CI.original.data.set.control) (Final.IC.up.control <- Seeds.dispersed.ha.yr.control_CI.upp+CI.original.data.set.control) ``` ##Estimating the number of seeds dispersed per ha/yr only in the B1yr plot ```{r, message=FALSE, warning=FALSE} #Apart from the next line (and respective nomenclature changes), where we select B1yr plot data, the following script is exactly the same as the above data.B1yr <- data[ which(data$Treatment_plot=='B1yr'), ] #Summarizing data by sampling event data.B1yr.summary <- doBy::summaryBy(Abundance_seeds~Date,data.B1yr,na.rm=T,FUN=summary) #Adjusting the names of colums names(data.B1yr.summary) <- c("Date","Number_dung_clumps_sampled","Total_seeds") data.B1yr.summary$Sampling_event <- c(1:4) #Calculating the number of days elapsed between sampling events t1=as.numeric(data.B1yr.summary$Date[2]-data.B1yr.summary$Date[1]);t1 t2=as.numeric(data.B1yr.summary$Date[3]-data.B1yr.summary$Date[2]);t2 t3=as.numeric(data.B1yr.summary$Date[4]-data.B1yr.summary$Date[3]);t3 #Number of days elapsed since last sampling event equals 'NA' for the first sampling event, as there was no previous sampling event data.B1yr.summary$Days_elapsed_since_last_sampling=c(NA,t1,t2,t3) ####Calculation of the number of seeds dispersed per ha/yr### #As described in the main text (Statistical anlyses section), first we obtained the average of seeds dispersed per day between each sampling interval. For that, we divided the number of seeds found in a sampling event ('total_seeds'column) by the number of days elapsed since the previous sampling event ('days_elapsed_since_last_sampling'column) data.B1yr.summary$Seeds_dispersed_per_day <- (data.B1yr.summary$Total_seeds)/(data.B1yr.summary$Days_elapsed_since_last_sampling) data.B1yr.summary #We then averaged the number of seeds dispersed per day between each sampling event, and multiplied it by 365, to obtain the average number of seeds dispersed by year (Average_seeds_per_year.B1yr <- mean(data.B1yr.summary$Seeds_dispersed_per_day,na.rm=T)*365) #Finally, we divided this number by 50 (the area of the B1yr plot in hectares), to obtain the number of seeds dispersed per ha/yr Average_seeds_per_ha_yr.B1yr <- Average_seeds_per_year.B1yr/50 #As this number is not corrected yet, let's change its name to make this clear Average_seeds_per_ha_yr.B1yr_before_correction <- Average_seeds_per_ha_yr.B1yr ``` ##Correcting the estimation by the number os pellets ```{r, message=FALSE, warning=FALSE} ###Correction### #3) Multiplying the correction factor by the number of seeds dispersed per ha/yr before correction, so we correct our original estimation (Average_seeds_per_ha_yr.B1yr_corrected <- correction_factor*Average_seeds_per_ha_yr.B1yr_before_correction) #4) Averaging these 63 values, each representing the number of seeds expected to be dispersed in each dung clump, considering their real number of pellets. This is the average final value of seeds dispersed per ha/yr across our study area. Also, to obtain confidence intervals (CI) associated with this correction, we did a bootstrap procedure in which we resampled these values 2000 times (Seeds.dispersed.ha.yr.B1yr <- Hmisc::smean.cl.boot(Average_seeds_per_ha_yr.B1yr_corrected, B=2000)) (Seeds.dispersed.ha.yr.B1yr_mean <- Seeds.dispersed.ha.yr.B1yr[1]) #mean (Seeds.dispersed.ha.yr.B1yr_CI.low <- Seeds.dispersed.ha.yr.B1yr[2]) #lower CI (Seeds.dispersed.ha.yr.B1yr_CI.upp <- Seeds.dispersed.ha.yr.B1yr[3]) #upper CI #5) Calculating the CI associated with our original dataset (i.e., with the raw data from our four sampling events) (CI.original.data.set.B1yr <- 1.96*sd(data.B1yr$Abundance_seeds)/sqrt(length(data.B1yr$Abundance_seeds))) #6) Sum both IC (Final.IC.low.B1yr <- Seeds.dispersed.ha.yr.B1yr_CI.low+CI.original.data.set.B1yr) (Final.IC.up.B1yr <- Seeds.dispersed.ha.yr.B1yr_CI.upp+CI.original.data.set.B1yr) ``` ##Estimating the number of seeds dispersed per ha/yr only in the B3yr plot ```{r, message=FALSE, warning=FALSE} #Apart from the next line (and respective nomenclature changes), where we select B3yr plot data, the following script is exactly the same as the above data.B3yr <- data[ which(data$Treatment_plot=='B3yr'), ] #Summarizing data by sampling event data.B3yr.summary <- doBy::summaryBy(Abundance_seeds~Date,data.B3yr,na.rm=T,FUN=summary) #Adjusting the names of colums names(data.B3yr.summary) <- c("Date","Number_dung_clumps_sampled","Total_seeds") data.B3yr.summary$Sampling_event <- c(1:4) #Calculating the number of days elapsed between sampling events t1=as.numeric(data.B3yr.summary$Date[2]-data.B3yr.summary$Date[1]);t1 t2=as.numeric(data.B3yr.summary$Date[3]-data.B3yr.summary$Date[2]);t2 t3=as.numeric(data.B3yr.summary$Date[4]-data.B3yr.summary$Date[3]);t3 #Number of days elapsed since last sampling event equals 'NA' for the first sampling event, as there was no previous sampling event data.B3yr.summary$Days_elapsed_since_last_sampling=c(NA,t1,t2,t3) ####Calculation of the number of seeds dispersed per ha/yr### #As described in the main text (Statistical anlyses section), first we obtained the average of seeds dispersed per day between each sampling interval. For that, we divided the number of seeds found in a sampling event ('total_seeds'column) by the number of days elapsed since the previous sampling event ('days_elapsed_since_last_sampling'column). data.B3yr.summary$Seeds_dispersed_per_day <- (data.B3yr.summary$Total_seeds)/(data.B3yr.summary$Days_elapsed_since_last_sampling) data.B3yr.summary #We then averaged the number of seeds dispersed per day between each sampling event, and multiplied it by 365, to obtain the average number of seeds dispersed by year: (Average_seeds_per_year.B3yr <- mean(data.B3yr.summary$Seeds_dispersed_per_day,na.rm=T)*365) #Finally, we divided this number by 50 (the area of the B3yr plot in hectares), to obtain the number of seeds dispersed per ha/yr Average_seeds_per_ha_yr.B3yr <- Average_seeds_per_year.B3yr/50 #As this number is not corrected yet, let's change its name to make this clear: Average_seeds_per_ha_yr.B3yr_before_correction <- Average_seeds_per_ha_yr.B3yr ``` ##Correcting the estimation by the number os pellets ```{r, message=FALSE, warning=FALSE} ###Correction### #3) Multiplying the correction factor by the number of seeds dispersed per ha/yr before correction, so we correct our original estimation: (Average_seeds_per_ha_yr.B3yr_corrected <- correction_factor*Average_seeds_per_ha_yr.B3yr_before_correction) #4) Averaging these 63 values, each representing the number of seeds expected to be dispersed in each dung clump, considering their real number of pellets. This is the average final value of seeds dispersed per ha/yr across our study area. Also, to obtain confidence intervals (CI) associated with this correction, we did a bootstrap procedure in which we resampled these values 2000 times: (Seeds.dispersed.ha.yr.B3yr <- Hmisc::smean.cl.boot(Average_seeds_per_ha_yr.B3yr_corrected, B=2000)) (Seeds.dispersed.ha.yr.B3yr_mean <- Seeds.dispersed.ha.yr.B3yr[1]) #mean (Seeds.dispersed.ha.yr.B3yr_CI.low <- Seeds.dispersed.ha.yr.B3yr[2]) #lower CI (Seeds.dispersed.ha.yr.B3yr_CI.upp <- Seeds.dispersed.ha.yr.B3yr[3]) #upper CI #5) Calculating the CI associated with our original dataset (i.e., with the raw data from our four sampling events) (CI.original.data.set.B3yr <- 1.96*sd(data.B3yr$Abundance_seeds)/sqrt(length(data.B3yr$Abundance_seeds))) #6) Sum both IC: (Final.IC.low.B3yr <- Seeds.dispersed.ha.yr.B3yr_CI.low+CI.original.data.set.B3yr) (Final.IC.up.B3yr <- Seeds.dispersed.ha.yr.B3yr_CI.upp+CI.original.data.set.B3yr) ```