library(dplyr) # manipulate data
library(ggplot2) # create data visualizations
library(stringr) # work with string
library(here) # file paths
library(sf) # handle vector geospatial data
library(tigris) # access TIGER/Line shapefiles
Editing geospatial files
Sometimes we find a geospatial file that has more than we need. For instance, I downloaded a file that has all the state parks in California, but I only want the state parks in LA County. In this section we’ll cover how to extract data from existing files.
Find selected California counties
California state has a file that lists all the counties in California. https://gis.data.ca.gov/datasets/California::california-county-boundaries-and-identifiers/explore
Let’s get the boundaries for Los Angeles County.
Read the file with all the counties in California.
<- read_sf(here('data/raw/California_County_Boundaries_and_Identifiers_Blue_Version_view_2716745428963682036/California_County_Boundaries_and_Identifiers.shp')) ca_counties
glimpse(ca_counties)
Rows: 73
Columns: 13
$ COUNTY <chr> "Alameda County", "Alpine County", "Amador County", "Butte …
$ COPRI <chr> "01000", "02000", "03000", "04000", "05000", "06000", "0700…
$ PLACE_NAME <chr> "Alameda County", "Alpine County", "Amador County", "Butte …
$ GEOID <chr> "06001", "06003", "06005", "06007", "06009", "06011", "0601…
$ PLACE_TYPE <chr> "County", "County", "County", "County", "County", "County",…
$ GNIS_ID <dbl> 1675839, 1675840, 1675841, 1675842, 1675885, 1675902, 16759…
$ LEGAL_PLAC <chr> "Alameda County", "Alpine County", "Amador County", "Butte …
$ PLACE_ABBR <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ CNTY_ABBR <chr> "ALA", "ALP", "AMA", "BUT", "CAL", "COL", "CCA", "DNE", "ED…
$ AREA_SqMi <dbl> 820.7918, 741.0404, 606.0190, 1677.8158, 1035.7786, 1156.65…
$ COASTAL <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ GlobalID <chr> "1ef554be-bce2-4a37-b205-c10f99330cb3", "8f6df77e-8ec0-4cda…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((-136862.7 -..., MULTIPOLYGON (…
We see the name of the county is in COUNTY
column. Use filter()
to select the rows for ‘Los Angeles County’.
<- ca_counties %>%
LA filter(COUNTY == 'Los Angeles County')
LA
Simple feature collection with 2 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 96851.82 ymin: -583500.2 xmax: 216620.6 ymax: -352271.1
Projected CRS: NAD83 / California Albers
# A tibble: 2 × 13
COUNTY COPRI PLACE_NAME GEOID PLACE_TYPE GNIS_ID LEGAL_PLAC PLACE_ABBR
* <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
1 Los Angeles C… 19000 Los Angel… 06037 County 277283 Los Angel… <NA>
2 Los Angeles C… 19000 Los Angel… 06037 County 277283 Los Angel… <NA>
# ℹ 5 more variables: CNTY_ABBR <chr>, AREA_SqMi <dbl>, COASTAL <chr>,
# GlobalID <chr>, geometry <MULTIPOLYGON [m]>
ggplot() +
geom_sf(data=LA)
Use st_write()
to save the state parks within LA County as a Shape file. The first argument is the map data we want to save, and the second argument is the file path.
st_write(LA, here('data/cleaned/los_angeles_county/los_angeles_county.shp'))
Find waterways in Los Angeles County
The US Census produces geospatial data that are linked to the Census data. This data is called Topologically Integrated Geographic Encoding and Referencing (TIGER). We can use the tigris package to download TIGER/Line shapefiles.
area_water()
from tigris returns: The area hydrography shapefile contains the geometry and attributes of both perennial and intermittent area hydrography features, including ponds, lakes, oceans, swamps, glaciers, and the area covered by large streams represented as double-line drainage.
We can find waterways in LA County by passing the state and county to area_water()
.
<- area_water(state = "CA", county="Los Angeles") area_water
ggplot() +
geom_sf(data = area_water)
Let’s find the LA River
Use names()
to get the column names.
names(area_water)
Let’s look for FULLNAME
equals ‘Los Angeles River’
<- area_water %>%
la_river filter(FULLNAME == 'Los Angeles River')
la_river
No records found.
Let’s use str_starts()
from stringr package to look for items that start with ‘Los Angeles’
<- area_water %>%
la_river filter(str_starts(FULLNAME, 'Los Angeles'))
la_river
We get three records. The river is called ‘Los Angeles Riv’
We want row 1 and 2. row 3 is a weird triangle
ggplot() +
geom_sf(data=la_river[1,]) +
geom_sf(data=la_river[2,])
ggplot() +
geom_sf(data=la_river[3,])
<- la_river %>%
fixed_river filter(HYDROID %in% c(1104493447410, 1104493447488))
ggplot() +
geom_sf(data=fixed_river)
Save the files
st_write(fixed_river, here('data/cleaned/los_angeles_river.geojson'))
st_write(area_water, here('data/cleaned/la_county_waterareas.geojson'))
Find state parks in LA County
Let’s get the all the California state parks within Los Angeles county.
California State Parks has file that lists all the state parks in California. https://www.parks.ca.gov/?page_id=29682
California state has a file that lists all the counties in California. https://gis.data.ca.gov/datasets/California::california-county-boundaries-and-identifiers/explore
First we read the file with all state parks in California.
<- read_sf(here('data/raw/Park_Boundaries/ParkBoundaries.shp')) state_parks
The we read the file with LA County
<- read_sf(here('data/cleaned/los_angeles_county/los_angeles_county.shp')) LA_county
Compare CRS for the state parks map and the county map.
st_crs(state_parks) == st_crs(LA_county)
[1] FALSE
Use st_transform()
to change the CRS of counties map to match the parks map.
<- st_transform(LA_county, crs = st_crs(state_parks))
LA_county
st_crs(state_parks) == st_crs(LA_county)
[1] TRUE
Use st_intersects()
to find the state parks within LA county. This is similar to how we got all the observations in Expo park in an earlier lesson.
<- state_parks[st_intersects(state_parks, LA_county ) %>% lengths > 0,] LA_parks
Create a map showing LA County and state parks within LA County.
ggplot() +
geom_sf(data=LA_county) +
geom_sf(data=LA_parks, fill='yellow')
Save the file.
st_write(LA_parks, here('data/cleaned/state_parks_los_angeles_county/state_parks_los_angeles_county.shp'))
Find national parks in LA County
Let’s get the National Parks within Los Angeles county.
National Park Boundaries https://catalog.data.gov/dataset/national-park-boundaries
<- read_sf(here('data/raw/nps_boundary/nps_boundary.shp'))
parks_nps
<- read_sf(here('data/cleaned/los_angeles_county/los_angeles_county.shp')) la_county
check if CRS are the same
st_crs(parks_nps) == st_crs(la_county)
[1] FALSE
Set CRS of the parks to match LA County.
<- st_transform(parks_nps, crs = st_crs(la_county) )
parks_nps st_crs(parks_nps) == st_crs(la_county)
[1] TRUE
get national parks within LA county
<- parks_nps[st_intersects(parks_nps, la_county ) %>% lengths > 0,]
ca_nps_parks
ggplot() +
geom_sf(data=la_county) +
geom_sf(data=ca_nps_parks, fill='yellow')
Save the file.
st_write(ca_nps_parks, here('data/cleaned/nps_la_county.geojson'))
LA County Park Needs Assessment
The LA County Park Needs Assessment has layer for the entire county wide. We want to remove the county wide layer.
<- read_sf(here('data/raw/PNA_Demographics_for_Dashboard_(View_Layer_SDE)_8662424390864523907.geojson'))
la_county_pna
dim(la_county_pna)
[1] 189 96
<- la_county_pna[-189, ]
la_county_pna_edit
dim(la_county_pna_edit)
[1] 188 96
ggplot(la_county_pna_edit) +
geom_sf()
st_write(la_county_pna_edit, here('data/cleaned/LA_County_PNA_Demographics.geojson'))
Wildfires
Let’s get the wildfires within Los Angeles County from California Department of Forestry and Fire Protection’s Fire and Resource Assessment Program (FRAP) and Wildland Fire Interagency Geospatial Services (WFIGS) Group.
<- tempfile()
tmp <- here('data/raw/California_Fire_Perimeters_(all).zip')
zip_path unzip(zipfile = zip_path, exdir = tmp)
<- read_sf(tmp)
ca_fires_all
<- read_sf(here('data/raw/CA_Perimeters_NIFC_FIRIS_public_view/CA_Perimeters_NIFC_FIRIS_public_view.shp'))
NIFC_FIRIS
<- tempfile()
tmp <- here('data/raw/POSTFIRE_MASTER_DATA_SHARE_2101786311546707294.zip')
zip_path unzip(zipfile = zip_path, exdir = tmp)
<- read_sf(tmp)
DINS
<- read_sf(here('data/raw/WFIGS_Interagency_Perimeters_YearToDate_-5395415287356828930/Perimeters.shp'))
WFIGS_2025
<- read_sf(here('data/cleaned/los_angeles_county/los_angeles_county.shp')) la_county
check if CRS are the same
st_crs(ca_fires_all) == st_crs(la_county)
[1] FALSE
st_crs(NIFC_FIRIS) == st_crs(la_county)
[1] FALSE
st_crs(DINS) == st_crs(la_county)
[1] FALSE
st_crs(WFIGS_2025) == st_crs(la_county)
[1] FALSE
Set CRS of the fires to match LA County.
<- st_transform(ca_fires_all, crs = st_crs(la_county))
ca_fires_all
<- st_transform(NIFC_FIRIS, crs = st_crs(la_county))
NIFC_FIRIS
<- st_transform(DINS, crs = st_crs(la_county))
DINS
<- st_transform(WFIGS_2025, crs = st_crs(la_county)) WFIGS_2025
get fires within LA county
<- ca_fires_all[st_intersects(ca_fires_all, la_county ) %>% lengths > 0,]
la_fires_all
ggplot() +
geom_sf(data=la_county) +
geom_sf(data=la_fires_all)
<- NIFC_FIRIS[st_intersects(NIFC_FIRIS, la_county ) %>% lengths > 0,]
NIFC_FIRIS_la
ggplot() +
geom_sf(data=la_county) +
geom_sf(data=NIFC_FIRIS_la)
<- DINS[st_intersects(DINS, la_county ) %>% lengths > 0,]
DINS_la
ggplot() +
geom_sf(data=la_county) +
geom_sf(data=DINS_la)
<- WFIGS_2025[st_intersects(WFIGS_2025, la_county ) %>% lengths > 0,]
WFIGS_2025_la
ggplot() +
geom_sf(data=la_county) +
geom_sf(data=WFIGS_2025_la)
Save the file
st_write(la_fires_all, here('data/cleaned/cal_fire_los_angeles_county.geojson'))
st_write(NIFC_FIRIS_la, here('data/cleaned/NIFC_FIRIS_los_angeles_county.geojson'))
st_write(DINS_la, here('data/cleaned/DINS_los_angeles_county.geojson'))
st_write(WFIGS_2025_la, here('data/cleaned/wfigs_2025_los_angeles_county.geojson'))