-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_checks.R
53 lines (49 loc) · 1.64 KB
/
model_checks.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# NOTE: Not to be run directly, but sourced from the individual data processing
# scripts. The `groundhog` package should be loaded and the scripts should
# define:
# - DATASET: A global variable that contains the name of the dataset
# - df: The original data. The data should have the following columns:
# - outcome: The predicted outcome
# - risk: The risk score
# - race: The race of the individuals involved
groundhog.library(c("mgcv", "glue"), "2024-07-04")
################################################################################
# Print the AUC
df %>%
roc_auc(outcome, risk) %>%
glue_data("AUC: {.estimate}") %>%
print()
# Covenience function for fitting a GAM
fit_gam <- function(race) {
bounds <- df %>%
filter(race == {{ race }}) %>%
summarize(
min = min(risk),
max = max(risk)
)
m <- gam(
outcome == "TRUE" ~ s(risk),
family = binomial,
data = filter(df, race == {{ race }})
)
tibble(
risk = seq(bounds$min, bounds$max, by = 1e-2)
) %>%
predict(m, newdata = ., type = "response", se.fit = TRUE) %>%
as_tibble() %>%
mutate(
risk = seq(bounds$min, bounds$max, by = 1e-2),
fit = as.double(fit),
se.fit = as.double(se.fit),
ymin = pmax(0, pmin(1, fit - 1.96 * se.fit)),
ymax = pmax(0, pmin(1, fit + 1.96 * se.fit))
) %>%
select(risk, fit, ymin, ymax)
}
tibble(race = c("b", "h", "w")) %>%
mutate(
fit = map(race, fit_gam),
race = as_factor(race)
) %>%
unnest(fit) %>%
write_rds(path("data", "clean", str_c(DATASET, "-calib.rds")))