# R code for Contact chains and bTB paper ## Please contact Helen Fielding at fieldinghr@gmail.com for queries regarding this code. # The data in the following listed files contains confidential information regarding GB farms and does not belong to the authors # therefore cannot be shared by the authors, however is obtainable on request to the Animal and Plant Health Agency # as outlined in our data accessibility statement. Some of this code is computationally intensive when using large datasets and therefore was # performed on an HPC machine. # Files required from APHA for data analysis: # CTS movements for relevant time period (with markets removed and seen as direct movement from farm a to farm b) # CTS cattle population data - herd sizes and breed data to define herd types as outlined in methods # Reference file to link parish identifier with a risk area/country # File to link CTS location keys with farm information e.g. CPH #, XY coordinates, etc # bTB breakdown data files # Details required from these files for the following analysis: ## To create a file named 'Attributes.rds' # This file was set up with the following columns: # "CPH" County Parish Holding number # "CtyKey" County identifier # "CtyPar" County parish identifier # "LocationKey" Cattle tracing system identifier # "X" "Y" X and Y location co-ordinates of farm # "group" Type of holding # "RiskArea" Risk area as defined in methods # "NumDaysRestr2010" The number of days a herd was under OTFW or OTFS restrictions in that year 2010-2016 # "NumDaysRestr2011" # "NumDaysRestr2012" # "NumDaysRestr2013" # "NumDaysRestr2014" # "NumDaysRestr2015" # "NumDaysRestr2016" # # "NumNewOTFS20072011" Number of new OTFS breakdowns from 2007-2011 # "NumNewOTFW20072011" Number of new OTFW breakdowns from 2007-2011 # "NumCattleReactors20072011" Number of cattle reactors 2007-2011 # "NumNewBD20072011" Number of new bTB incidents 2 # "NumNewBD20152016" Number of new bTB incidents 2015-2016 # "NumNewOTFS20152016" Number of new incidents classed as OTFS 2015-2016 # "NumNewOTFW20152016" Number of new incidents classed as OTFW 2015-2016 # "MultipleHerds" Yes or No whether the CPH had multiple herds associated with it - these are later removed in our analysis (see Methods) # "AvHerdSize" Average annual herd size as outlined in Methods (calculated over study period for multiple years) # "Htype" Herd type (Dairy, Mixed, Suckler, Fat) as outlined in Methods (calculated over study period for multiple years) ## 'Attributes_*.rds' # This file pertains to a specific year (*) and is the same as the above file but AvHerdSize and Htype are calculated based on data from that single year ## 'Movements201220132014.csv' # This data should include all movements between animal holdings from 1st Jan 2012 to 31st December 2014 # It should be set up with the following columns: # 1. source (CTS location key of source farm) # 2. destination (CTS location key of destination farm) # 3. t (date of movement) ## 'Edgelist_*.rds' # These files pertain to specific years (*) and contain the movement data in edgelist format (similar to above) # Columns: # 1. from (ie. CTS location key of farm animals came from), # 2. to (i.e. CTS location key of farm animals sold to), # 3. date (date of movement) ############################################################################################################################ ### The following code can then be run using these files ### ######################################################################################################################### # Load packages #### library(data.table) library(bit64) library(tidyverse) ## Create Network attributes file: AttributesNA17_*.rds #### # *2012,2013,2014 # load packages library(igraph) # Calculate in degree, in strength and inverted betweenness for all farms in 2012,2013 and 2014 for(year in 2012:2014) { print(year) # load graph g <- readRDS(paste0(path, 'Edgelist_', year, '.rds')) # Network measures # # in degree id <- degree(g, mode = 'in') # in strength is <- strength(g, mode = 'in') print('calculating betweenness') # Betweenness # To INVERT weights edge_attr(g)$weight <- 1/(edge_attr(g)$weight) ebi <- estimate_betweenness(g, vids = V(g), cutoff = -1, directed = TRUE, weights = NULL) # put individual node metrics into a data table (dt) dt <- as.data.table(d, keep.rownames = TRUE) dt[, 'id' := id][, 'is' := is][, 'ebi' := ebi][, 'Year' := year] setnames(dt, c('rn', 'd'), c('LocationKey', 'd')) print('node attributes saved') # save degree, strength and betweenness into attributes file - now have replicated this data attr <- readRDS(paste0(path, paste0('Attributes/Attributes17_', year, '.rds'))) #attr <- readRDS('Attributes_', year, '.rds') setkey(attr, LocationKey) attr <- attr[duplicated(attr$LocationKey) == FALSE] dt$LocationKey <- as.integer(dt$LocationKey) setkey(dt, LocationKey) attr[, Year := NULL] Result <- merge(dt,attr, all = FALSE) Result[, 'Year' := year] saveRDS(Result, paste0(path, paste0('Attributes/AttributesNA17_', year, '.rds'))) print(paste0('AttributesNA17', year, '.rds file created')) rm(dt, Result, g, attr) } # Create mean Ingoing Contact Chains for each farm over 3 year period, at monthly lags #### # Sent to HPC for(year in 2012:2014) { for(month in 01:12) { print(year) # Batch code to send off to calculate contact chains for all nodes in network in certain year #### dat <- read.csv(file = paste0(path, 'NetworksPaper/Data/Movements', year-1, year, '.csv')) dat$X <- NULL root <- sort(unique(c(dat$source, dat$destination))) print('root set') print(paste('calculating network summary for one year from 1st ', month, year)) result <- NetworkSummary(dat, root = root, tEnd = as.Date(paste0(year, '-', month,'-01')), days = 365) print('summarydone') saveRDS(result, paste0(path, 'NetworksPaper/Data/ContactChains1yr_', year-1, year, month, '.rds')) print(paste0(year,' end')) rm(dat, root) gc() } } # Take mean of each calculated ICC dat[, 'meanICC' := mean(ingoingContactChain), by = root] ## Cluster code: Calculating and recording farms in the ingoing contact chain #### # The following code was submitted in parallel on a HPC for each farm in the study (~70,000 farms) # e.g. args = 1,2,3,4, etc to ~70,000 # Multiple errors occurred due to memory problems hence the trycatch etc code args <- commandArgs(trailingOnly = TRUE) ##Root ID # Root farm is defined by the trailing argument when Rscript is submitted root <- as.integer(args[1]) print(root) # load packages library(plyr) library(igraph) library(animation) library(ggmap) library(Rcpp) library(EpiContactTrace) library(data.table) library(bit64) print('packagesloaded') # set path path <- '~/ICCroots/' print(paste0('path set as ', path)) # Load movements 2012 - 2014 dat <- read.csv('~/Movements201220132014.csv', header = T) dat$X <- NULL dat$t <- as.character(dat$t) dat$t <- as.Date(dat$t) print('movements loaded in') # trailing argument has already named the root as root and outgoing dates are specified in function newdir <- paste0(path,'r', as.character(root)) if(dir.exists(newdir)) { print('Directory exists for root') } else { dir.create(newdir) } if(file.exists(paste0(path, 'r', root, '/ICCstructure24Months_r',root,'.rds'))) { print(paste0('24 month summary already exists for r', root, ' so no need for further file generation')) } else { for(year in 2012:2013) { print(year) for(month in 1:12) { print(year) tryCatch({ print(year) print(month) if (file.exists(paste0(path, 'r', root, '/ICCstructure', year, '_m', month,'r',root,'.rds'))) { print(paste0('File exists for r', root, ' month ', month, ' ', year)) } else { #make formal contact trace object with all contacts on root <- as.integer(root) print('File doesnt yet exist so starting to calculate trace object') ct <- Trace(dat, root, inBegin = as.Date(paste0(year, '-', month,'-01')) , inEnd = as.Date(paste0(year +1, '-', month,'-01')) , outBegin = '1900-01-01', outEnd = '1900-01-01', maxDistance = 20) print('ct trace finished') # Check if ct object has been made, if not continue to end of loop and start next if(exists('ct')){ saveRDS(ct, paste0(path, 'r', root, '/ICCtrace', year, '_m', month,'r',root,'.rds')) print(paste0(year, ' network root ', root, ' trace done')) # Don't need this for multiple roots ct <- list(ct) ns <- do.call("rbind", lapply(ct, function(x) { x <- NetworkStructure(x) x[x$direction == "in", c("root", "source", "distance")] # need only source as this will have all unique farms in that are in destination - just not the root (which we have anyway) })) rownames(ns) <- NULL saveRDS(ns, paste0(path, 'r', root, '/ICCstructure', year, '_m', month,'r',root,'.rds')) print(paste0(year, ' network root ', root, ' structure done')) } else { print('no structure or trace file saved as no contact trace object') } } }, error = function(e){cat('ERROR:', conditionMessage(e),'\n')}) } } # Loop to read in all structure files just created for(year in 2012:2013) { for(month in 1:12) { dat <- readRDS(paste0(path,'r', root, '/ICCstructure', year,'_m', month, 'r', root, '.rds')) dat <- as.data.table(dat) dat[, 'Year' := year] dat[, 'Month' := month] assign(paste0('d', month,'_', year), dat) } } # Combine into one data.table of all source farms and their distances l <- list(d1_2013, d2_2013, d3_2013, d4_2013, d5_2013, d6_2013, d7_2013, d8_2013, d9_2013, d10_2013, d11_2013, d12_2013, d1_2012, d2_2012, d3_2012, d4_2012, d5_2012, d6_2012, d7_2012, d8_2012, d9_2012, d10_2012, d11_2012, d12_2012) dt <- rbindlist(l) warnings() if(nrow(dt) > 0) { # Save RDS for all months saveRDS(dt, paste0(path, 'r', root, '/ICCstructure24Months_r',root,'.rds')) # Collapse down the list into unique farms v <- dt[,min(distance), by = source] setnames(v, 'V1', 'mindist') # calculate summary stats m <- round(c(summary(v$mindist)[1:6], SD = sd(v$mindist), ICC = nrow(v)), digits = 2) m <- as.data.table(m, keep.rownames = 'rownames') m[, 'root' := root] setnames(m, 'm', 'mindist') # Save summary data RDS saveRDS(m, paste0(path, 'r', root, '/ICCstructureSummary_r',root,'.rds')) print(paste0('network done for root ',root)) } else {print('ICC sum to zero')} } warnings() ## Cluster code: Summarise source farms in each ICC (Cluster - SummariseCCdata) Creates farmlist # and ICC summary, a list of unique source farms in ingoing contact chain with location keys #### # PACKAGES AND PATHS library(raster) library(dismo) library(maptools) library(rgeos) library(rgdal) library(gdistance) path <- '~/ICCsummary/' # Table to change roots back from anonymous roots for 2012-2014 data #lkup <- readRDS(paste0(path, 'SourceData/Roots201220132014lkup.rds')) # can remove after # Load attributes attr <- readRDS(paste0(path, 'AttributesCCAug17.rds')) #attr <- readRDS(paste0(path, 'Attributes.rds')) setnames(attr, 'LocationKey', 'source') setkey(attr, source) attr$source <- as.character(attr$source) print('Attributes loaded') # open files for(root in 1:70000) # this loop was run in batches of 10,000 to increase speed/help find errors { if(file.exists(paste0(path, 'Summaries/ICCsummary_r', root, 'a.rds'))) { print(paste0('ICC summary already exists for r', root, ' so no need for further file generation')) }else{ ICCpath <- paste0('~/ICCroots/r', root, '/') if(file.exists(paste0(ICCpath, 'ICCstructure24MonthsTot_r', root, '.rds'))) { print(paste0('NEW ROOT: reading 24 months summary for root ', root)) dt <- readRDS(paste0(ICCpath, 'ICCstructure24MonthsTot_r', root, '.rds')) # Add in root column # Merge dt with attr to get info setkey(dt, source) setkey(attr, source) result <- merge(dt, attr, all.x = TRUE) # Remove CPHs that have multiple herds associated with them as per methods (cannot consolidate with bTB testing data) result <- result[MultipleHerds == FALSE] # 5/9/17:The holding level btb data might occur twice in a chain, therefore we should remove # Previous thoughts:Don't think I need to remove multiple herds here as all herds relate to same LK - which is what we merge on. Multiple location keys per CPH sorted in Attributes/RootsToRemove.rds file, Attributes2.R. multiple herds need to be removed from the root list of 67400 as well. # Make Welsh WLRA and WHRA into Wales and SLRA into Scotland if(nrow(result) >=1){ attr[RiskArea == 'WHRA'|RiskArea == 'WLRA', RiskArea := 'Wales'] attr[RiskArea == 'SLRA', RiskArea := 'Scotland'] # Possibly here select the line of attr for the root farm - root data frame rdf <- attr[source == dt$root[1]] rm(dt) # Did this above : Add root location key as column so we confirm which root it is #result[, 'rootLK' := lkup[rootsF == ra][[1]]] #print('root location key been added') # Save this file saveRDS(result,paste0(ICCpath, 'ICCfarmlist_r', root, '.rds')) print('ICC farm list saved to root folder') # Geographical extent of farms in chain # create new df xy - which can change into spatial df -makes XY coordinates into spatial points - with a projection OSGB (the same as the shape file) xy <- result[!is.na(X)] xy <- xy[X > 0] if(nrow(xy) == 0) { print('No points in spatial data frame - continue without spatial info') } else { sdf <- SpatialPointsDataFrame(xy[,c('X','Y')], xy, proj4string=OSGB) # get xy location of the root farm (from prev df) and make it into a sdf rLoc <- attr[source == lkup[rootsF == ra][[1]]] rLoc <- rLoc[!is.na(X)] rLoc <- rLoc[X > 0] print(paste0('If no more entries for root ', root, ' then there was no spatial data for this root.')) if(nrow(rLoc) < 1) { next } # need to insert some code to carry on loop/cancel if there are no xy coords for the root rLocsdf <- SpatialPointsDataFrame(rLoc[,c('X','Y')], rLoc, proj4string=OSGB) print('Spatial data frames created') # calculate distances between root and farms in chain distances <- gDistance(rLocsdf, sdf, byid = TRUE) # gets in m so /1000 to get xy[, 'km_root' := distances/1000] } # Get summary of all farms in chain from it only going to be able to get % - would be really nice to split it by mindist # bTB measure - breakdown in last 1,5,10 years result[, 'BD2011' := ifelse(NumDaysRestr2011 > 0, 1,0)] result[, BD2011 := as.factor(BD2011)] result[, 'BD2007_2011numdays' := NumDaysRestr2011 + NumDaysRestr2010+ NumDaysRestr2009+NumDaysRestr2008+ NumDaysRestr2007] result[, 'BD2007_2011' := ifelse(BD2007_2011numdays > 0, 1,0)] result[, BD2007_2011 := as.factor(BD2007_2011)] result[, 'BD2002_2011numdays' := NumDaysRestr2011+NumDaysRestr2010+ NumDaysRestr2009+ NumDaysRestr2008+ NumDaysRestr2007+ NumDaysRestr2006+ NumDaysRestr2005+ NumDaysRestr2004+NumDaysRestr2003+ NumDaysRestr2002] result[, 'BD2002_2011' := ifelse(BD2002_2011numdays > 0, 1,0)] result[, BD2002_2011 := as.factor(BD2002_2011)] result[, 'BD2012_2014numdays' := NumDaysRestr2014+NumDaysRestr2013+ NumDaysRestr2012] result[, 'BD2012_2014' := ifelse(BD2012_2014numdays > 0, 1,0)] result[, BD2012_2014 := as.factor(BD2012_2014)] # For root response variable rdf[, 'BD2015_2016numdays' := NumDaysRestr2015+ NumDaysRestr2016] rdf[, 'BD2015_2016' := ifelse(BD2015_2016numdays > 0, 1,0)] rdf[, BD2015_2016 := as.factor(BD2015_2016)] rdf[, 'BD2012_2014numdays' := NumDaysRestr2012+NumDaysRestr2013+NumDaysRestr2014] rdf[, 'BD2012_2014' := ifelse(BD2012_2014numdays > 0, 1,0)] rdf[, BD2012_2014 := as.factor(BD2012_2014)] # Classify type of breakdown for root. you might expect farms with single bd to be more likely to get tb from buying in? rdf[, 'BD2012_2016numdays' := NumDaysRestr2012+NumDaysRestr2013+NumDaysRestr2014+NumDaysRestr2015+NumDaysRestr2016] rdf[, 'BD2012_2016' := ifelse(BD2012_2016numdays > 0, 1,0)] rdf[, BD2012_2016 := as.factor(BD2012_2016)] rdf[, 'BD2012_2016type' := ifelse(BD2012_2016numdays > 548, 'Persistant', 'Single')] rdf[BD2012_2016type == 'Single', 'BD2012_2016type' := ifelse(BD2012_2016numdays == 0, 'None', 'Single')] rdf[NumNewBD20122016 > 1, 'BD2012_2016type' := 'Repeated'] result <- result[!is.na(X)] # Network attributes - for in strength & degree na2012 <- readRDS(paste0(path,'AttributesNA17_2012.rds')) na2013 <- readRDS(paste0(path,'AttributesNA17_2013.rds')) na2014 <- readRDS(paste0(path,'AttributesNA17_2014.rds')) na2012 <- na2012 %>% select(LocationKey, is, id) na2013 <- na2013 %>% select(LocationKey, is, id) na2014 <- na2014 %>% select(LocationKey, is, id) l <- list(na2012,na2013,na2014) na <- rbindlist(l) rm(l) setkey(na, LocationKey) na[, 'InStrength' := sum(is), by= .(LocationKey)] na <- na[na[, .I[1], by = LocationKey]$V1, ] na[, 'is' := NULL] na[, 'MeanInDeg' := mean(id), by= .(LocationKey)] na <- na[na[, .I[1], by = LocationKey]$V1, ] na[, 'id' := NULL] # Save location key (check this) LK <- lkup[rootsF == ra][[1]] CC24m <- readRDS(paste0(path, '/SourceData/CC24m_data.rds')) #result <- drop.levels(result$RiskArea) # Make new data frame with just direct contacts in d <- result[mindist == 1] # Merge list of neighbouring farms < 2km with ICC list # get number in both local <- readRDS(paste0(path, 'SourceData/Within2km/Within2km_LK', rdf$source, '.rds')) # check root is the location key of the root local[, 'id' := 1] setnames(local, 'LocationKey', 'source') local$source <- as.character(local$source) setkey(local, source) setkey(result, source) local[,c('X', 'Y') := NULL] both <- merge(result,local, all = FALSE) # only leave those in both lists # bTB hx of LOCAL farms l <- merge(local, attr, all.x = TRUE) l[, 'BD2011' := ifelse(NumDaysRestr2011 > 0, 1,0)] l[, BD2011 := as.factor(BD2011)] l[, 'BD2007_2011numdays' := NumDaysRestr2011 + NumDaysRestr2010+ NumDaysRestr2009+NumDaysRestr2008+ NumDaysRestr2007] l[, 'BD2007_2011' := ifelse(BD2007_2011numdays > 0, 1,0)] l[, BD2007_2011 := as.factor(BD2007_2011)] l[, 'BD2002_2011numdays' := NumDaysRestr2011+NumDaysRestr2010+ NumDaysRestr2009+ NumDaysRestr2008+ NumDaysRestr2007+ NumDaysRestr2006+ NumDaysRestr2005+ NumDaysRestr2004+NumDaysRestr2003+ NumDaysRestr2002] l[, 'BD2002_2011' := ifelse(BD2002_2011numdays > 0, 1,0)] l[, BD2002_2011 := as.factor(BD2002_2011)] # For response variable l[, 'BD2015_2016numdays' := NumDaysRestr2015+ NumDaysRestr2016] l[, 'BD2015_2016' := ifelse(BD2015_2016numdays > 0, 1,0)] l[, BD2015_2016 := as.factor(BD2015_2016)] l[, 'BD2012_2014numdays' := NumDaysRestr2014+NumDaysRestr2013+ NumDaysRestr2012] l[, 'BD2012_2014' := ifelse(BD2012_2014numdays > 0, 1,0)] l[, 'BD2012_2014' := as.factor(BD2012_2014)] # calculate summary stats df <- data.frame(roota= root,#y rLocationKey = LK,#y rRiskArea = as.character(rLoc$RiskArea),#y rCty = rLoc$CtyKey,#y # root farm information: rAvHerdSize = rdf$AvHerdSize,#y rHtype = rdf$Htype,#y rIS = na[LocationKey == LK, InStrength],# y # number of animals (in strength) bought in by root farm rmeanICC = round(CC24m[rootLK == LK]$meanICC),#y # Root farm TB response variable rBD2015_2016 = rdf$BD2015_2016,#y # Root farm breakdown type rBD2012_2016type = rdf$BD2012_2016type,#y # SOURCE farms md1 = nrow(result[mindist==1]), md2 = nrow(result[mindist==2]), md3 = nrow(result[mindist==3]), md4 = nrow(result[mindist==4]), md5 = nrow(result[mindist==5]), md6 = nrow(result[mindist==6]), md7 = nrow(result[mindist==7]), md8 = nrow(result[mindist==8]), # distance from root - geographically and by level md1km = round(mean(xy[mindist==1]$km_root)), md2km = round(mean(xy[mindist==2]$km_root)), md3km = round(mean(xy[mindist==3]$km_root)), md4km = round(mean(xy[mindist==4]$km_root)), md5km = round(mean(xy[mindist==5]$km_root)), md6km = round(mean(xy[mindist==6]$km_root)), md7km = round(mean(xy[mindist==7]$km_root)), md8km = round(mean(xy[mindist==8]$km_root)), # btb history spropHRA = nrow(result[RiskArea == 'HRA'])/nrow(result), spropLRA = nrow(result[RiskArea == 'LRA'])/nrow(result), spropEdge = nrow(result[RiskArea == 'Edge'])/nrow(result), spropScot = nrow(result[RiskArea == 'Scotland'])/nrow(result), spropWales = nrow(result[RiskArea == 'Wales'])/nrow(result) ) saveRDS(df,paste0(path, 'Summaries/ICCsummary_r', root, 'a.rds')) print(paste0('Data frame for root ', root, 'saved')) }else{ print(paste0('No farms left in root ', root, 's ICC after removing multiple herds')) } } else { print(paste0('No 24 month summary for root ', root)) } }} # Cluster code: Combine data from all farms - creates CC2summaries.rds #### ICCpath <- # set own path path <- #set own path # Read in files into a data.table all.files <- list.files(path = paste0(ICCpath), pattern = "ICCsummary_r") # 64783 setwd(paste0(ICCpath)) l <- lapply(all.files, readRDS) # 64783 dt <- rbindlist(l, use.names = TRUE) # 65237 setkey(dt, rLocationKey) # Check duplicated rows and remove dt[duplicated(dt$rLocationKey == TRUE)]$rLocationKey dt <- unique(dt) saveRDS(dt, paste0(path, 'CC2summaries.rds')) # Summarise source farms at each level: from farmlist #### path <- '~/ICCsummary/' # Load CC2all.rds #dt <- readRDS(paste0(path, 'SourceData/CC2summaries.rds')) dt <- readRDS(paste0(path, 'SourceData/CC2all.rds')) # both files have this weird dt$root thing #setnames(dt, 'LocationKey', 'rLocationKey') dt <- dt%>% select(rLocationKey, roota, rRiskArea, rAvHerdSize, rIS, rmeanICC, rBD2015_2016, rBD2012_2016type, md1, md2,md3,md4,md5,md6,md7,md8, spropHRA, spropLRA, spropEdge, spropScot, spropWales) setkey(dt, roota) # Give non-network farms an arbitrary roota value - i.e. 77,986 onwards. zeros <- nrow(dt[roota == 0]) dt[roota == 0, 'roota' := as.integer(seq(max(dt$roota)+1, (max(dt$roota)+ zeros),1))] setkey(dt, roota) # add attributes from 2012-2014 #### attr <- readRDS(paste0(path, 'SourceData/AttributesCCAug17.rds')) #attr <- readRDS('Attributes.rds') attr$LocationKey <- as.character(attr$LocationKey) setkey(attr, LocationKey) attr <- attr[duplicated(attr$LocationKey)== FALSE] print('start loop') # open files from farmlist-(result) to add in extra info needed for each row/root in dt for(anon in 1:max(dt$roota)) { ICCpath <- paste0('~/ICCroots/r', anon, '/') print(anon) if(file.exists(paste0(ICCpath, 'ICCfarmlist_r', anon, '.rds'))) { result <- readRDS(paste0(ICCpath, 'ICCfarmlist_r', anon, '.rds')) # Get BD info for 2010-2014 for source farms result[, 'BD2010_2014numdays' := NumDaysRestr2010 + NumDaysRestr2011+ NumDaysRestr2012+NumDaysRestr2013+ NumDaysRestr2014] result[, 'BD2010_2014' := ifelse(BD2010_2014numdays > 0, 1,0)] # Loop through all levels to get number of FARMS with a breakdown 2010-2014 at THAT level for(i in 1:8) { dt[roota == anon, eval(expression(paste0('BD', i))) := result[mindist == i, sum(BD2010_2014)]] dt[roota == anon, eval(expression(paste0('nHRA', i))) := nrow(result[mindist == i & RiskArea == 'HRA'])] dt[roota == anon, eval(expression(paste0('nEdge', i))) := nrow(result[mindist == i & RiskArea == 'Edge'])] dt[roota == anon, eval(expression(paste0('nLRA', i))) := nrow(result[mindist == i & RiskArea == 'LRA'])] dt[roota == anon, eval(expression(paste0('nWHRA', i))) := nrow(result[mindist == i & RiskArea == 'WHRA'])] dt[roota == anon, eval(expression(paste0('nWLRA', i))) := nrow(result[mindist == i & RiskArea == 'WLRA'])] dt[roota == anon, eval(expression(paste0('nSLRA', i))) := nrow(result[mindist == i & RiskArea == 'SLRA'])] } } else { print(paste0('no chain for root ', anon)) for(i in 1:8) { dt[roota == anon, eval(expression(paste0('BD', i))) := 0] dt[roota == anon, eval(expression(paste0('nHRA', i))) := 0] dt[roota == anon, eval(expression(paste0('nEdge', i))) := 0] dt[roota == anon, eval(expression(paste0('nLRA', i))) := 0] dt[roota == anon, eval(expression(paste0('nWHRA', i))) := 0] dt[roota == anon, eval(expression(paste0('nWLRA', i))) := 0] dt[roota == anon, eval(expression(paste0('nSLRA', i))) := 0] } } # Make df for root information rdf <- attr[LocationKey == dt[roota==anon]$rLocationKey] rdf[, 'BD2010_2014numdays' := NumDaysRestr2010+NumDaysRestr2011+NumDaysRestr2012+NumDaysRestr2013+NumDaysRestr2014] dt[roota == anon, 'rBD2010_2014' := ifelse(rdf$BD2010_2014numdays > 0, 1,0)] # Get bTB history for farms 2-10km away for(i in c(2,4,6,8,10)) { if(file.exists(paste0(path, 'SourceData/Within', i, 'km/Within', i,'km_LK', dt[roota==anon]$rLocationKey, '.rds'))) { local <- readRDS(paste0(path, 'SourceData/Within', i, 'km/Within', i,'km_LK', dt[roota==anon]$rLocationKey, '.rds')) # check root is the location key of the root local[, 'id' := 1] #setnames(local, 'LocationKey', 'source') local$LocationKey <- as.character(local$LocationKey) setkey(local, LocationKey) local[,c('X', 'Y') := NULL] # bTB hx of LOCAL farms l <- merge(local, attr, all.x = TRUE) lnum <- nrow(l) l[, 'BD2010_2014numdays' := NumDaysRestr2010+ NumDaysRestr2011 + NumDaysRestr2014+NumDaysRestr2013+ NumDaysRestr2012] l[, 'BD2010_2014' := ifelse(BD2010_2014numdays > 0, 1,0)] dt[roota == anon, eval(expression(paste0('lBD', i))) := sum(l$BD2010_2014)] dt[roota == anon, eval(expression(paste0('plBD', i))) := ifelse(lnum >0,sum(l$BD2010_2014)/lnum, 0)] dt[roota == anon, eval(expression(paste0('lnum', i))) := ifelse(lnum >0,lnum, 0)] print('Local farms saved') } else { paste0('no local farms between ',i,' and ', i+2,'km') for(i in c(2,4,6,8,10)) { dt[roota == anon, eval(expression(paste0('lBD', i))) := 0] dt[roota == anon, eval(expression(paste0('plBD', i))) := 0] dt[roota == anon, eval(expression(paste0('lnum', i))) := 0] } } } } # Remove herds with zero average animals dt <- dt[rAvHerdSize > 0] saveRDS(dt, paste0(path, 'Data/CC2levels_060218.rds')) # Then run CompileICCsummaries.R from ICCsummary folder in command line is fine. # Then run CompileSummariesNIN.R from helen/CC2scripts/ to combine with non network farms. # Calculate data for farms that had no farms in the ingoing contact chain (CC2scripts/DataWrangling/CompileSummaries_NINattr.R) #### # Creates farmlist, a list of unique farms in CC with real location keys # PACKAGES AND PATHS library(data.table) library(bit64) library(ggplot2) library(raster) library(dismo) library(maptools) library(rgeos) library(rgdal) library(gdistance) library(dplyr) library(tidyr) path <- 'insert path' # Load data #result <- readRDS(paste0(path, 'CC2data/NotInNetwork1214.rds')) result <- readRDS('NotInNetwork1214.rds') # Make Welsh WLRA and WHRA into Wales and SLRA into Scotland result[RiskArea == 'WHRA'|RiskArea == 'WLRA', RiskArea := 'Wales'] result[RiskArea == 'SLRA', RiskArea := 'Scotland'] rdf <- copy(result) rm(result) # For root response variable rdf[, 'BD2015_2016numdays' := NumDaysRestr2015+ NumDaysRestr2016] rdf[, 'BD2015_2016' := ifelse(BD2015_2016numdays > 0, 1,0)] rdf[, BD2015_2016 := as.factor(BD2015_2016)] rdf[, 'BD2012_2014numdays' := NumDaysRestr2012+NumDaysRestr2013+NumDaysRestr2014] rdf[, 'BD2012_2014' := ifelse(BD2012_2014numdays > 0, 1,0)] rdf[, BD2012_2014 := as.factor(BD2012_2014)] # Classify type of breakdown for root. you might expect farms with single bd to be more likely to get tb from buying in? rdf[, 'BD2012_2016numdays' := NumDaysRestr2012+NumDaysRestr2013+NumDaysRestr2014+NumDaysRestr2015+NumDaysRestr2016] rdf[, 'BD2012_2016' := ifelse(BD2012_2016numdays > 0, 1,0)] rdf[, BD2012_2016 := as.factor(BD2012_2016)] rdf[, 'BD2012_2016type' := ifelse(BD2012_2016numdays > 548, 'Persistant', 'Single')] rdf[BD2012_2016type == 'Single', 'BD2012_2016type' := ifelse(BD2012_2016numdays == 0, 'None', 'Single')] rdf[NumNewBD20122016 > 1, 'BD2012_2016type' := 'Repeated'] saveRDS(rdf, paste0(path, 'CC2data/NINattr.rds')) # set NA columns rdf[, c('Median_md','Mean_md', 'Max_md','SD_md', 'Mode_md','Width_md','meankm_root','maxkm_root','sdkm_root') := NA] # Set zero columns rdf[, c('rIS','local_ICC', 'rmeanID','rmeanICC','sNumFarms','md1','md2','md3', 'md4', 'md5', 'md6', 'md7', 'md8') := 0] rdf[, c('md1km','md2km','md3km', 'md4km', 'md5km', 'md6km', 'md7km', 'md8km') := 0] rdf[, c('spropBD2011','spropBD2007_2011','spropBD2002_2011','sNumNewOTFW20072011','sNumNewOTFS20072011','sNumCattleReactors20072011','sNumCattleIFNReactors20072011') := 0] rdf[, c('sNumFarms','sNumNewSLHBD20072011','sNumDaysRestr20072011','snumfarmsBD2011','snumfarmsBD2007_2011','snumfarmsBD2002_2011', 'spropDairy', 'spropFat', 'spropSuckler', 'spropMixed', 'spropHRA', 'spropLRA', 'spropEdge', 'spropScot', 'spropWales') := 0] rdf[, c('smeanAvHerdSize', 'spropWithinCty', 'spropWithinRA', 'sSp10', 'sSp11', 'sSp12', 'sSp13', 'sSp15', 'sSp17', 'sSp20', 'sSp21', 'sSp22', 'sSp25', 'sSp34', 'sSp35', 'sSp51', 'sSp60', 'sSp65', 'sSp74', 'sSp9') := 0] rdf[, c('dpropBD2011', 'dpropBD2007_2011', 'dpropBD2002_2011', 'dNumNewOTFW20072011', 'dNumNewOTFS20072011', 'dNumCattleReactors20072011', 'dNumCattleIFNReactors20072011', 'dNumNewSLHBD20072011', 'dNumDaysRestr20072011', 'dnumfarmsBD2011', 'dnumfarmsBD2007_2011', 'dnumfarmsBD2002_2011', 'dpropDairy', 'dpropFat', 'dpropSuckler', 'dpropMixed', 'dpropHRA', 'dpropLRA', 'dpropEdge', 'dpropScot', 'dpropWales') := 0] rdf[, c('dmeanAvHerdSize', 'dpropWithinCty', 'dpropWithinRA', 'dSp10', 'dSp11', 'dSp12', 'dSp13', 'dSp15', 'dSp17', 'dSp20', 'dSp21', 'dSp22', 'dSp25', 'dSp34', 'dSp35', 'dSp51', 'dSp60', 'dSp65', 'dSp74', 'dSp9') := 0] # Merge list of neighbouring farms < 2km with ICC list # get number in both attr <- readRDS('/home/helen/Networks/Attributes/AttributesCCAug17.rds') setkey(attr, LocationKey) for(i in 1:nrow(rdf)){ local <- readRDS(paste0(path, 'Data/Within2km/Within2km_LK', rdf$LocationKey[i], '.rds')) setkey(local, LocationKey) local[, 'id' := 1] local[,c('X', 'Y') := NULL] # bTB hx of LOCAL farms l <- merge(local, attr, all.x = TRUE) l[, 'BD2011' := ifelse(NumDaysRestr2011 > 0, 1,0)] l[, BD2011 := as.factor(BD2011)] l[, 'BD2007_2011numdays' := NumDaysRestr2011 + NumDaysRestr2010+ NumDaysRestr2009+NumDaysRestr2008+ NumDaysRestr2007] l[, 'BD2007_2011' := ifelse(BD2007_2011numdays > 0, 1,0)] l[, BD2007_2011 := as.factor(BD2007_2011)] l[, 'BD2002_2011numdays' := NumDaysRestr2011+NumDaysRestr2010+ NumDaysRestr2009+ NumDaysRestr2008+ NumDaysRestr2007+ NumDaysRestr2006+ NumDaysRestr2005+ NumDaysRestr2004+NumDaysRestr2003+ NumDaysRestr2002] l[, 'BD2002_2011' := ifelse(BD2002_2011numdays > 0, 1,0)] l[, BD2002_2011 := as.factor(BD2002_2011)] # For response variable l[, 'BD2015_2016numdays' := NumDaysRestr2015+ NumDaysRestr2016] l[, 'BD2015_2016' := ifelse(BD2015_2016numdays > 0, 1,0)] l[, BD2015_2016 := as.factor(BD2015_2016)] l[, 'BD2012_2014numdays' := NumDaysRestr2014+NumDaysRestr2013+ NumDaysRestr2012] l[, 'BD2012_2014' := ifelse(BD2012_2014numdays > 0, 1,0)] l[, 'BD2012_2014' := as.factor(BD2012_2014)] # Put information for each LK into rdf before we get rid of it # LOCAL contacts bTB history - i.e. those farms within 2km of root farm rdf[rdf[i], 'lnumFarms' := nrow(l)] rdf[rdf[i], 'lpropBD2011' := ifelse(nrow(l) == 0, 0, nrow(l[BD2011 == 1])/nrow(l))]# proportion of farms with a breakdown in the chain in that year rdf[rdf[i], 'lpropBD2007_2011' := ifelse(nrow(l) == 0, 0, nrow(l[BD2007_2011 == 1])/nrow(l))] rdf[rdf[i], 'lpropBD2002_2011' := ifelse(nrow(l) == 0, 0, nrow(l[BD2002_2011 == 1])/nrow(l))] rdf[rdf[i], 'lNumNewOTFW20072011' := sum(l[,NumNewOTFW20072011])]# number of new otfw bd between 2007-2011 of all farms in chain rdf[rdf[i], 'lNumNewOTFS20072011' := sum(l[,NumNewOTFS20072011])]# number of otfs bd 2007-2011 of all farms in chain rdf[rdf[i], 'lNumCattleReactors20072011' := sum(l[, NumCattleReactors20072011])]# number of skin reactors 2007-2011 of all farms in chain rdf[rdf[i], 'lNumCattleIFNReactors20072011' := sum(l[, NumCattleIFNReactors20072011])]# number of IFN reactors 2007-2011 of all farms in chain rdf[rdf[i], 'lNumNewSLHBD20072011' := sum(l[, NumNewSLHBD20072011])]# number BDs disclosed at SLH 2007-2011 of all farms in chain rdf[rdf[i], 'lNumDaysRestr20072011' := sum(l[,BD2007_2011numdays])]# number of days restricted 2007-2011 of all farms in chain rdf[rdf[i], 'lnumfarmsBD2011' := nrow(l[BD2011 == 1])] # number of source farms with a BD in that time period NOT the number of breakdowns. rdf[rdf[i], 'lnumfarmsBD2007_2011' := nrow(l[BD2007_2011 == 1])] rdf[rdf[i], 'lnumfarmsBD2002_2011' := nrow(l[BD2002_2011 == 1])] rdf[rdf[i], 'lpropDairy' := ifelse(nrow(l) == 0, 0, nrow(l[Htype == 'Dairy'])/nrow(l))] rdf[rdf[i], 'lpropFat' := ifelse(nrow(l) == 0, 0, nrow(l[Htype == 'Fat'])/nrow(l))] rdf[rdf[i], 'lpropSuckler' := ifelse(nrow(l) == 0, 0, nrow(l[Htype == 'Suckler'])/nrow(l))] rdf[rdf[i], 'lpropMixed' := ifelse(nrow(l) == 0, 0, nrow(l[Htype == 'Mixed'])/nrow(l))] rdf[rdf[i], 'lmeanAvHerdSize' := ifelse(nrow(l) == 0, 0, mean(l[!is.na(AvHerdSize)]$AvHerdSize))] } # calculate summary stats setnames(rdf, c('rootsF','LocationKey', 'RiskArea', 'CtyKey', 'AvHerdSize','Htype', 'NumNewOTFW20072011', 'NumNewOTFS20072011','NumNewBD20072011','BD2012_2014','BD2015_2016'), c('roota','rLocationKey', 'rRiskArea','rCty', 'rAvHerdSize', 'rHtype', 'rNumNewOTFW20072011', 'rNumNewOTFS20072011','rNumNewBD2007_2011','rBD2012_2014','rBD2015_2016')) setnames(rdf, c('rSp20152016', 'rSp20122014','BD2012_2016type'), c('rSpol20152016', 'rSpol20122014','rBD2012_2016type')) saveRDS(rdf, paste0(path, 'CC2data/NINattr.rds'))