-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_indicators.Rmd
232 lines (177 loc) · 7.05 KB
/
process_indicators.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
---
title: "Code to process all indicators and create final data frame"
author: "Caitlin C. Mothes, PhD"
date: "`r Sys.Date()`"
output: html_document
---
## Process Indicators
There are a total of 11 indicators in this code base. The majority of them are calculated in R and each have their own function (found in the 'R/' folder). Two indicators, heat exposure and canopy cover, were calculated using Google Earth Engine and each have a stand alone Python script in the 'python/' folder.
This workflow is if you want to calculate the indicators individually. If you want to calculate indicators as their component scores, you can do so in the 'process_components.Rmd' file.
First set up the environment with all the required libraries (using the 'setup.R' script to install and load packages) and the spatial boundaries (in `sf` format) of interest. In this workflow we are calculating these indicators for state and federal prisons, but **these functions will work on any sf polygon file in the U.S.**
```{r}
# set up environment -----------------
source("setup.R")
# read in processed prison polygons
prisons <- read_sf("data/processed/prisons/study_prisons.shp")
```
#### Flood Risk
```{r}
# flood risk (takes 1-2 days to run)
flood_risk <- calc_flood_risk(sf_obj = prisons)
```
#### Wildfire Risk
```{r}
# wildfire risk
wildfire_risk <- calc_wildfire_risk(
sf_obj = prisons,
file = "data/raw/wildfire_risk/whp2020_GeoTIF/"
)
```
#### Ozone
```{r}
# ozone
ozone <- calc_ozone(
sf_obj = prisons,
folder = "data/raw/air_quality/o3_daily/",
dist = 1000, years = c(2014, 2016)
)
```
#### PM 2.5
```{r}
# pm2.5 (may change dataset, check for more recent years)
pm25 <-
calc_pm25(
sf_obj = prisons,
folder = "data/raw/air_quality/pm2.5_sedac/",
dist = 1000,
years = c(2017, 2019),
save = FALSE
)
```
#### Pesticides
```{r}
pesticides <- calc_pesticides(prisons,
folder = "data/raw/pesticides/ferman-v1-pest-chemgrids-v1-01-geotiff",
dist = 1000,
save = TRUE)
```
#### Traffic Proximity
```{r}
# traffic proximity (takes 1.5 days to run on Desktop comp)
traffic_prox <- calc_traffic_proximity(
sf_obj = prisons,
file = "data/processed/traffic_proximity/aadt_2018.RData"
)
```
#### Risk Management Plan (RMP) Facility Proximity
```{r}
# calculate Risk Management Plan (RMP) facility proximity
rmp_prox <- calc_rmp_proximity(
sf_obj = prisons,
file = "data/raw/EPA_RMP/EPA_Emergency_Response_(ER)_Risk_Management_Plan_(RMP)_Facilities.csv"
)
```
#### NPL/Superfund Facility Proximity
```{r}
# calculate NPL facility proximity
npl_prox <- calc_npl_proximity(
sf_obj = prisons,
file = "data/processed/npl_addresses_geocoded_arc_sf.csv"
)
```
#### Hazardous Waste Facility Proximity
```{r}
# calculate Haz waste facility proximity
haz_prox <- calc_haz_waste_proximity(
sf_obj = prisons,
file = "data/processed/hazardous_waste/TSD_LQGs.csv"
)
```
### Component Score Calculation
Now to calculate component scores, you can run the following chunks of code. In this example I read in the output datasets from running each of the functions above. Since some of the functions take a long time to run, you may not create every object in a single session anyways.
#### Climate Scores
Note that heat exposure and canopy cover were saved in the 'data/processed' folder and exported from GEE instead of the 'outputs/' folder
```{r}
## climate scores -------------
flood_risk <- read_csv("outputs/flood_risk_2023-03-10.csv") %>%
select(FACILITYID, flood_risk = flood_risk_percent)
wildfire_risk <- read_csv("outputs/wildfire_risk_2023-08-29.csv")
heat_exp <- read_csv("data/processed/heat_exposure/lst_average_2023-08-30.csv")
canopy_cover <- read_csv("data/processed/canopy_cover/prison_canopy_AK.csv") %>%
bind_rows(read_csv("data/processed/canopy_cover/prison_canopy_HI.csv")) %>%
bind_rows(read_csv("data/processed/canopy_cover/prison_canopy_CONUS.csv"))
climate_scores <- list(flood_risk, wildfire_risk, heat_exp, canopy_cover) %>%
# convert FACILITYID to character for all to make sure they join
purrr::map(~ .x %>% mutate(FACILITYID = as.character(FACILITYID))) %>%
purrr::reduce(left_join, by = "FACILITYID") %>%
# calculate percentile columns for each raw indicator
mutate(across(
where(is.numeric),
.fns = list(pcntl = ~ cume_dist(.) * 100),
.names = "{col}_{fn}"
)) %>%
# need to inverse canopy cover since high value is good
mutate(percent_tree_cover_pcntl = cume_dist(desc(percent_tree_cover)) * 100) %>%
rowwise() %>%
# calculate climate component score (average all indicator percentile values per prison
mutate(climate_score = gm_mean(c_across(contains("pcntl"))))
```
#### Environmental Exposures Scores
```{r}
ozone <- read_csv("outputs/ozone_2023-08-15.csv")
pm25 <- read_csv("outputs/pm25_2023-08-10.csv")
pesticides <- read_csv("outputs/pesticides_2023-08-14.csv")
traffic <- read_csv("outputs/traffic_prox_2023-05-13.csv")
exposure_scores <- list(ozone, pm25, pesticides, traffic) %>%
# convert FACILITYID to character for all to make sure they join
purrr::map(~ .x %>% mutate(FACILITYID = as.character(FACILITYID))) %>%
purrr::reduce(left_join, by = "FACILITYID") %>%
# calculate percentile columns for each raw indicator
dplyr::mutate(across(
where(is.numeric),
.fns = list(pcntl = ~ cume_dist(.) * 100),
.names = "{col}_{fn}"
)) %>%
rowwise() %>%
# calculate climate component score (geometric mean of indicator percentiles)
mutate(exposure_score = gm_mean(c_across(contains("pcntl"))))
```
#### Environmental Effects Scores
```{r}
npl <- read_csv("data/processed/npl_prox_2023-05-16.csv")
rmp <- read_csv("data/processed/rmp_prox_2023-05-16.csv")
haz <- read_csv("data/processed/haz_prox_2023-05-16.csv")
effects_scores <- list(npl, rmp, haz) %>%
purrr::reduce(left_join, by = "FACILITYID") %>%
# make facility ID character
mutate(FACILITYID = as.character(FACILITYID)) %>%
# calculate percentile columns for each raw indicator
dplyr::mutate(across(
where(is.numeric),
.fns = list(pcntl = ~ cume_dist(.) * 100),
.names = "{col}_{fn}"
)) %>%
rowwise() %>%
# calculate climate component score (average all indicator percentile values per prison
mutate(effects_score = gm_mean(c_across(contains("pcntl"))))
```
### Final Data Frame
Now using the component scores data frames we can create a single, final data frame with an overall vulnerability index.
```{r}
final_df <- list(climate_scores, exposure_scores, effects_scores) %>%
purrr::reduce(left_join, by = "FACILITYID") %>%
# remove rowwise
ungroup() %>%
mutate(
final_risk_score = rowMeans(select(., contains("score"))),
final_risk_score_pcntl = cume_dist(final_risk_score) * 100
) %>%
# join with original prison facility metadata
mutate(FACILITYID = as.character(FACILITYID)) %>%
left_join(prisons, by = "FACILITYID")
# save as shapefile and csv
final_df %>% st_as_sf() %>% write_sf(paste0("outputs/final_df_", Sys.Date(), ".shp"))
final_df %>%
select(-geometry) %>%
write_csv(paste0("outputs/final_df_", Sys.Date(), ".csv"))
```