--- title: "Collins Pine FIA and QQ Processing" author: "Alexis Bernal" date: "February 28, 2020" output: word_document --- #Loading libraries necessary to process FIA and QQ data ```{r} library(raster) #Work with spatial data library(sf) #Work with spatial data library(rgdal) #Work with spatial data library(here) #Call on files from Collins_Pine-master R project library(tidyverse) #Data wrangling and manipulation ``` #Section I - Working with spatial data to subset FIA plots Creating a reproducible process of importing spatial data into R and selecting plots from FIA data to compare to Historical QQ data from Collins Pine. When looking for the appropriate "boundary" to constrain our sampling of FIA data, we chose a bounding box that encompassed areas which showed similar biophysical environments as the QQ sections we sampled. ```{r} # Loading DEM for spatial extractions (originated from LANDFIRE) # Transforming raster to projection EPSG 4269 DEM <- raster(here("large_files/Aligned", "dem_aligned.tif")) %>% projectRaster(., crs = CRS("+init=EPSG:4269")) # Looking at FIA spreadsheets, whole CN number is not visible. # Setting up options in R to view all digits (max is 22). options(digits = 22) # Loading FIA plot data (USFS FIA) FIA <- read.csv(here("large_files/CA_FIA", "CA_PLOTSNAP.csv")) # Filtering FIA for inventory year (INVYR) > 2010 (measure in 10-yr cycles). # Plots within these years have the same plot design protocol (DESIGNCD = 1). # Plots within these years are also the most current (some plots are remeasured). # Creating new ID's based on coordinates to identify remeasured plots. # PREV_PLT_CN does indicate that some of these plots are re-measurements, but... # Previous measurements taken before 2011 (so not "double counting" plots in this dataset). # Filtered for most recent data (max INVYR), just in case there are duplicates. # Technically 4-subplots/plot - so looks like duplicate entries for each year, but they're not. # Removing duplicate rows for now (just need to subset plot locations). # Extracting relevant columns for analysis. # Adding coordinate columns to retain in sf dataframe. # Converting FIA data to simple feature. # Adding CRS EPSG 4269 FIA.sf <- FIA %>% filter(INVYR > 2010) %>% mutate(new.ID = group_indices(., LON, LAT)) %>% group_by(new.ID) %>% filter(INVYR == max(INVYR)) %>% ungroup() %>% mutate(duplicates = duplicated(new.ID)) %>% filter(duplicates == "FALSE") %>% dplyr::select(CN, INVYR, LON, LAT, ELEV, DESIGNCD) %>% mutate(x.coord = LON, y.coord = LAT) %>% st_as_sf(., coords = c("LON", "LAT")) %>% st_set_crs(., st_crs("EPSG:4269")) # Creating a boundary for DEM to clip FIA points to (polygon) DEM.boundary <- st_as_sfc(st_bbox(DEM)) # Clipping FIA points to DEM boundary FIA.sf.clip <- st_intersection(FIA.sf, DEM.boundary) # Checking alignment of layers # Some points outside of actual raster data, probably due to projection? # Those will be removed anyways based on guidelines for elevation and fuzzing plot(DEM) plot(FIA.sf.clip["geometry"], col = "red", add = TRUE) plot(DEM.boundary, add = TRUE) # Start with 274 plots (FIA plots within boundary). # Extracting DEM values to FIA plots. # Converting elevation of FIA plots (originally in feet) to meters. # Selecting FIA plots within the elevation range of the QQ data (1113m - 1923m; removed 70 plots). # Selecting plots within "fuzzing" standards (150 m max discrepancy; removed 22 plots). # End with 182 plots. elevation <- FIA.sf.clip %>% mutate(DEM.value = raster::extract(DEM,.), ELEV_met= as.double(ELEV/3.281), Elev.discrepancy = abs(ELEV_met - DEM.value)) %>% filter(ELEV_met > 1112 & ELEV_met <= 1923) %>% filter(Elev.discrepancy < 151) ``` #Section II - Extracting FIA plot and tree data for "contemporary" comparisons with historical QQ data First, created functions to estimate density (TPH) and live BA (sq m/ha). Checking what the sample size would be given several constraints: only 1 condition (no split conditions within the plot), live BA > 9 m^2/ha, no fire damage, and no anomalous forest type. Our sample size started with 182 plots, after accounting for elevation discrepancies from fuzzing and being within the elevation range of the historical data. Found 60 plots that had more than one condition and an additional 29 plots had fire associated with them (disturbance codes 30-32). Also found 1 additional plot with FORTYPCD = 999 (stand age = 0; seedlings). The designated forest types seem like a reasonable comparison for the historical QQ data, with the exception of California black oak (n = 3), canyon live oak (n = 1), and tanoak, (n = 1). After aggregating tree data, we removed hardwoods from dataset and found that 5 plots had live BA < 9 m^2. After accounting for these constraints, our sample size would be 71 - 47 federally/state-owned and 24 privately/native-owned. Proceeded with aggregating tree data to eestimate basal area, tree density, and pine fraction for each plot and exported a .csv file. ```{r} # Create function to estimate density (trees/ha), # Where x = TPA_UNADJ (TPA of a given tree), # and 0.4047 is the conversion (ac to ha). density.FUN <- function(x) {density <- sum(x)/0.4047} # Create function to estimate live BA (sq m/ha), # Where a = DBH (in) of a given tree, b = TPA_UNADJ (TPA of that tree), # 0.00694444 is the conversion from sq in to sq ft, # and 0.229568 is the conversion (sq ft/ac to sq m/ha). liveba.FUN <- function(a,b) {liveba <- (sum(((pi*(a/2)^2)*0.00694444)*b))*0.229568} # Import FIA condition and tree list data. # Import forest and species reference lists from FIA. FIA_TREE <- read.csv(here("large_files/CA_FIA", "CA_TREE.csv")) FIA_COND <- read.csv(here("large_files/CA_FIA", "CA_COND.csv")) FIA_COND <- FIA_COND[,-which(sapply(FIA_COND, function(x)all(is.na(x))))] #Delete empty columns FIA_TREE <- FIA_TREE[,-which(sapply(FIA_TREE, function(x)all(is.na(x))))] #Delete empty columns fia_forest_types <- read.csv(here("large_files/CA_FIA", "REF_FOREST_TYPE.csv")) fia_species_list <- read.csv(here("large_files/CA_FIA", "REF_SPECIES.csv")) # Renaming CN column to PLT_CN to join COND and TREE tables. # According to FIA user guide, # CN of plot data = PLT_CN of COND/TREE data (foreign key) elevation <- elevation %>% rename(PLT_CN = CN) # Joining COND table with subset of FIA data (PLT_CN is the foreign key). # Extracting columns describing "conditions" (e.g. condition code, stand age, disturbance, stocking). # Calculate number of conditions for each plot. # Start with 182 plots. # Remove plots with more than 1 condition (60 plots removed; 110 plots remain). # Remove plots with fire damage (29 plots removed; 81 plots remain). # Removed plots where FORTYPCD = 999 (1 plot with stand age = 0 (seedlings only), 80 plots remain). # End with 80 plots. condition_join <- inner_join(FIA_COND, elevation, by = c("PLT_CN", "INVYR")) %>% dplyr::select(PLT_CN, INVYR, ELEV_met, COND_STATUS_CD, STDAGE, FORTYPCD, OWNGRPCD, DSTRBCD1, CONDPROP_UNADJ, GSSTKCD, x.coord, y.coord) %>% group_by(PLT_CN) %>% mutate(number.conditions = length(COND_STATUS_CD)) %>% ungroup() %>% filter(number.conditions == 1) %>% filter(DSTRBCD1 < 30 | DSTRBCD1 > 32) %>% filter(FORTYPCD != 999) # Creating a column FORTYPCD to join forest type data with condition data. fia_forest_types <- fia_forest_types %>% mutate(FORTYPCD = VALUE) # Joining forest type data with condition_join data. # Start with 80 plots. # Looked at forest types and some may not be good for comparison. # Removed California black oak (n = 3) and tanoak (n = 1). # End with 76 plots. forest_types_join<- inner_join(condition_join, fia_forest_types, by = "FORTYPCD") %>% filter(MEANING != "California black oak") %>% filter(MEANING != "Tanoak") %>% dplyr::select(PLT_CN, FORTYPCD, MEANING) # Joining tree data with forest_types_join data. # Start with 76 plots. # Subsetting relevant columns for analysis. # Keeping only live trees with a DBH >= 12 inches (cut-off DBH for Collins Pine QQ data). tree_join <- inner_join(forest_types_join, FIA_TREE, by = c("PLT_CN")) %>% dplyr::select(PLT_CN, STATUSCD, DIA, TPA_UNADJ, SPCD) %>% filter(STATUSCD == 1 & DIA >= 12) # Joining species data with tree_join data. # Start with 76 plots. # Species include: ABCO, ABMA, ACMA (A. macrophyllum), ALRH (A. rhombifolia), CADE, # PICO, PIJE, PILA, PIMO (p. monticola), PIPO, # POBAT (P. balsimifera spp. trichocarpa), PSME, QUCH, and QUKE. # Removed hardwoods from dataset. # None of the plots contained 100% hardwoods, 76 plots still remain. # Calculate density of trees (>12 in DBH) for each plot. # Calculate live BA in sq m/ha for each plot. # Removed plots where live BA < 9 m^2/ha (5 plots removed; 71 plots remain). # Estimating live BA by species for each plot. # Collapsing rows to have single value for each species in each plot. # Extracting relevant columns for analysis. # Creating columns of live BA for each species. # Collapsing rows to have single value for each species in each plot. # Replacing NA with 0 for live BA by species. # Renaming column headers to match JTS code. # Estimating pine ratio (BA Pinus spp/total live basal area) # End with 71 plots. species_join <- inner_join(tree_join, fia_species_list, by = "SPCD") %>% filter(SPECIES_SYMBOL != "QUCH2") %>% filter(SPECIES_SYMBOL != "QUKE") %>% filter(SPECIES_SYMBOL != "ACMA3") %>% filter(SPECIES_SYMBOL != "ALRH2") %>% filter(SPECIES_SYMBOL != "POBAT") %>% group_by(PLT_CN) %>% mutate(DENS_12plus = density.FUN(TPA_UNADJ), BALIVE_sqmha = liveba.FUN(DIA, TPA_UNADJ)) %>% ungroup() %>% filter(BALIVE_sqmha > 8.9) %>% group_by(PLT_CN, SPECIES_SYMBOL) %>% mutate(BA_met_species = liveba.FUN(DIA, TPA_UNADJ)) %>% distinct(PLT_CN, .keep_all = TRUE) %>% dplyr::select(PLT_CN, DENS_12plus, BALIVE_sqmha, SPECIES_SYMBOL, BA_met_species) %>% ungroup() %>% spread(., "SPECIES_SYMBOL", "BA_met_species") %>% replace(is.na(.), 0) %>% rename(ABCO_met = ABCO, ABMA_met = ABMA, CADE_met = CADE27, PICO_met = PICO, PIJE_met = PIJE, PILA_met = PILA, PIMO_met = PIMO3, PIPO_met = PIPO, PSME_met = PSME) %>% group_by(PLT_CN) %>% mutate(pine_fraction = (PIJE_met + PILA_met + PIPO_met + PIMO_met)/BALIVE_sqmha) %>% ungroup() # Joining tables to get all information for plots. # Final FIA dataset has 71 plots. d_FIA <- inner_join(species_join, condition_join, by = "PLT_CN") d_FIA <- inner_join(d_FIA, forest_types_join, by = c("PLT_CN", "FORTYPCD")) # Of those plots, 47 plots are under federal/state ownership, # And 24 plots are under private ownership. owner.check <- d_FIA %>% group_by(OWNGRPCD) %>% summarise(count = length(OWNGRPCD)) # Only extracting relevant columns for analysis d_FIA <- d_FIA %>% dplyr::select(1:14, x.coord, y.coord, MEANING) # Export extracted FIA data to .csv file write.csv(d_FIA, here("Data/Derived", "d_FIA.csv"), row.names = FALSE) # Converting final FIA dataset to .shp d_FIA_shp <- d_FIA %>% st_as_sf(., coords = c("x.coord", "y.coord")) %>% st_set_crs(., st_crs(FIA.sf.clip)) st_write(d_FIA_shp, "GIS//d_FIA/d_FIA.shp", append = FALSE) ``` #Section III: Extracting climate and topographic data for QQ sections and FIA plots Climate and topographic data will be used as inputs for random forest modeling and categorical regression trees. ```{r} # Loading additional libraries library(readxl) #To read excel files library(rgeos) #For spatial data # Loading elevation data # EPSG 3310 CA Albers; NAD83. From viewer.nationalmap.gov; 1/3 arc sec (~10 m) resolution r <- raster(here("large_files/Topography", "Collins DEM.tif")) # Loading FIA plots shapefile # Matching crs of FIA data to match raster datasets plots <- readOGR("./GIS/d_FIA/d_FIA.shp") %>% spTransform(., CRS("+init=epsg:3310")) # Loading QQ section shapefile # Matching crs of QQ data to match raster datasets lots <- readOGR("./GIS/ScascadeQQ/SCascadeQQs.shp") %>% spTransform(., CRS("+init=epsg:3310")) # Extracting topographic variables elev_mean_plots <- raster::extract(r, plots, fun=mean) plots_elev <- data.frame(site = "lassen", id = plots$PLT_CN, elev_mean = elev_mean_plots) elev_mean_lots <- raster::extract(r, lots, fun=mean) lots_elev <- data.frame(site = "lassen", id = lots$LotCodeFul, elev_mean = elev_mean_lots) # Exporting elevation data write_csv(lots_elev,"Data/Derived/lots_elev.csv") write_csv(plots_elev,"Data/Derived/plots_elev.csv") # Importing elevation data derived above^ lassen_extractions_plots <- read_csv("Data/Derived/plots_elev.csv") lassen_extractions_lots <- read_csv("Data/Derived/lots_elev.csv") # Creating centroid points for each QQ section to extract values lots_centroids <- rgeos::gCentroid(lots,byid=TRUE) # Importing slope data slope <- raster(here("large_files/Topography", "Collins Slope.tif")) #EPSG 3310 CA Albers; NAD83. Derived from DEM in QGIS aspect <- raster(here("large_files/Topography", "Collins Aspect.tif")) #EPSG 3310 CA Albers; NAD83. Derived from DEM in QGIS # Extracting slope and aspect data for FIA and QQ lassen_extractions_plots$slope <- raster::extract(slope, plots, fun=mean) lassen_extractions_lots$slope <- raster::extract(slope, lots_centroids, fun= mean) lassen_extractions_plots$aspect_num <- raster::extract(aspect, plots, fun=max) lassen_extractions_lots$aspect_num <- raster::extract(aspect, lots_centroids, fun=max) lassen_extractions_plots$aspect_cat <- ifelse(between(lassen_extractions_plots$aspect_num, 135,315),"SW","NE") lassen_extractions_lots$aspect_cat <- ifelse(between(lassen_extractions_lots$aspect_num, 135,315),"SW","NE") # Loading climate data from http://climate.calcommons.org/node/1129 CA Climate Commons, BCM. # From 1981-2010 30-year avg. dataset # EPSG 3310 NAD83 CA Albers cwd <- raster(here("large_files/Climate", "cwd1981_2010_ave_HST_1575394138/cwd1981_2010_ave_HST_1575394138.tif")) cwd_jun <- raster(here("large_files/Climate", "cwd1981_2010jun_ave_HST_1575394203/cwd1981_2010jun_ave_HST_1575394203.tif")) aet <- raster(here("large_files/Climate", "aet1981_2010_ave_HST_1575394039/aet1981_2010_ave_HST_1575394039.tif")) spk <- raster(here("large_files/Climate", "aprpck1981_2010_ave_HST_1575394218/aprpck1981_2010_ave_HST_1575394218.tif")) ppt <- raster(here("large_files/Climate", "ppt1981_2010_ave_HST_1575394242/ppt1981_2010_ave_HST_1575394242.tif")) tmx <- raster(here("large_files/Climate", "tmx1981_2010_ave_HST_1575394259/tmx1981_2010_ave_HST_1575394259.tif")) tmx_jja <- raster(here("large_files/Climate", "tmx1981_2010jja_ave_HST_1575394284/tmx1981_2010jja_ave_HST_1575394284.tif")) # Extracting climate data for FIA plots lassen_extractions_plots$cwd_mean8110 <- raster::extract(cwd, plots, fun=mean) lassen_extractions_plots$cwd_jun_mean8110 <- raster::extract(cwd_jun, plots, fun=mean) lassen_extractions_plots$aet_mean8110 <- raster::extract(aet, plots, fun=mean) lassen_extractions_plots$spk_mean8110 <- raster::extract(spk, plots, fun=mean) lassen_extractions_plots$ppt_mean8110 <- raster::extract(ppt, plots, fun=mean) lassen_extractions_plots$tmx_mean8110 <- raster::extract(tmx, plots, fun=mean) lassen_extractions_plots$tmx_jja_mean8110 <- raster::extract(tmx_jja, plots, fun=mean) # Extracting climate data for QQ sections lassen_extractions_lots$cwd_mean8110 <- raster::extract(cwd, lots_centroids, fun=mean) lassen_extractions_lots$cwd_jun_mean8110 <- raster::extract(cwd_jun, lots_centroids, fun=mean) lassen_extractions_lots$aet_mean8110 <- raster::extract(aet, lots_centroids, fun=mean) lassen_extractions_lots$spk_mean8110 <- raster::extract(spk, lots_centroids, fun=mean) lassen_extractions_lots$ppt_mean8110 <- raster::extract(ppt, lots_centroids, fun=mean) lassen_extractions_lots$tmx_mean8110 <- raster::extract(tmx, lots_centroids, fun=mean) lassen_extractions_lots$tmx_jja_mean8110 <- raster::extract(tmx_jja, lots_centroids, fun=mean) # Exporting lassen extractions write_csv(lassen_extractions_plots,"Data/Derived/Collins_FIA_extractions.csv") #FIA data write_csv(lassen_extractions_lots,"Data/Derived/Collins_QQ_extractions.csv") #QQ sections ```