-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpolling_error_states.rmd
108 lines (87 loc) · 3.06 KB
/
polling_error_states.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
```{r}
library(readr)
library(tidyverse)
mit_potus_data <- read_csv("1976-2020-president.csv")
state_potus_2020 <- mit_potus_data %>%
filter(year == 2020) %>%
rename(state_code = state_po) %>%
group_by(state_code) %>%
summarize(
potus_votes_dem = sum(ifelse(party_detailed == "DEMOCRAT", candidatevotes, 0), na.rm=T),
potus_votes_gop = sum(ifelse(party_detailed == "REPUBLICAN", candidatevotes, 0), na.rm=T)
) %>%
ungroup() %>%
mutate(
vote_share_two_way_dem = potus_votes_dem / (potus_votes_dem + potus_votes_gop)
)
fivethirtyeight_polls <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/pollster-ratings/raw-polls.csv")
state_polls <- fivethirtyeight_polls %>%
filter(
type_simple %in% c("Pres-G","Sen-G", "Gov-G")
& !(methodology %in% c("Face-to-face", "Mail"))
& location != "US"
& year %in% c(2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022)
) %>%
mutate(
state_code = substr(location,0,2),
state_code = case_when(
state_code %in% c("N1","N2") ~ "NE",
state_code %in% c("M1","M2") ~ "ME",
TRUE ~ state_code
),
methodology = ifelse(grepl("/", methodology, fixed=TRUE), "Mixed Mode", methodology)
) %>%
rename(
sample_size = samplesize,
margin_bias_dem = bias,
district = location
) %>%
select(
year,
state_code,
district,
type_simple,
type_detail,
partisan,
methodology,
sample_size,
margin_bias_dem
)
swings <- c("AZ","FL","GA","IA","ME","MI","MN","NE","NV","NH","NC","OH","PA","TX","WI")
state_polls <- state_polls %>%
mutate(
year = as.factor(year),
is_swing = case_when(
state_code %in% swings ~ "Swing State",
TRUE ~ "Non-swing State"
)
)
state_polls_avg <- state_polls %>%
group_by(year, state_code, methodology) %>%
summarize(margin_bias_dem = mean(margin_bias_dem, na.rm = T)) %>%
ungroup()
```
```{r}
plotting_data <- state_polls_avg %>%
left_join(state_potus_2020, by = c("state_code"))
```
```{r}
ggplot(data = plotting_data, aes(x = vote_share_two_way_dem, y = margin_bias_dem, color = year)) +
geom_hline(yintercept = 0) +
geom_point(stroke = 0, alpha = .10) +
geom_smooth(method = "lm", se = FALSE) +
theme_minimal() +
theme(legend.position="right") +
labs(title = "Polling Margin Bias by Biden Vote Share", x = "Biden Two-Way Vote Share (2020)", y = "Mean Margin Bias") +
facet_wrap(~methodology)
ggsave("state_year_error_method.png")
ggplot(data = plotting_data, aes(x = vote_share_two_way_dem, y = margin_bias_dem)) +
geom_hline(yintercept = 0) +
geom_point(stroke = 0, alpha = .10) +
geom_smooth(method = "lm", se = FALSE) +
theme_minimal() +
theme(legend.position="top") +
labs(title = "Polling Margin Bias by Biden Vote Share", x = "Biden Two-Way Vote Share (2020)", y = "Mean Margin Bias") +
facet_wrap(~year)
ggsave("state_year_error.png")
```