-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.qmd
126 lines (97 loc) · 4.13 KB
/
index.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
---
title: "What kinds of households own cars in the Triangle?"
format:
html:
title-block-banner: true
toc: true
toc-location: left
toc-depth: 3
html-math-method: katex
code-fold: true
code-summary: "show the code"
code-overflow: wrap
code-tools: true
number-sections: true
theme:
dark: slate
light: flatly
fig-width: 9
fig-height: 6
editor: visual
---
## Introduction
Let's find out what proportion of households in the Triangle within each of the U.S. Census ACS household income brackets own 0, 1, 2, and 3+ cars!
## Get some data
Let's pull in the census data necessary. Vehicles by Income is not tabulated in the ACS area releases so we'll need to use the microdata (2019 this case). Thus, we'll define the Triangle as all Census Public Use Microdata Areas that intersect the Raleigh-Durham-Cary Combined Statistical Area:
```{r, output=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
library(tidyverse)
library(tidycensus)
library(survey)
library(srvyr)
library(sf)
library(mapview)
library(viridis)
census_api_key("b25f8b1b7bf10561c9cbc3a20a4d2572677f1f05", install=TRUE,overwrite=TRUE)
pumas <- tigris::pumas(state="NC",year = 2019) %>%
mutate(area_puma=as.numeric(st_area(.)))
csa <- tigris::combined_statistical_areas(year = 2019) %>%
filter(GEOID==450) %>% #get Raleigh combined statistical area
mutate(area_csa=as.numeric(st_area(.)))
geoids <- st_intersection(csa,pumas) %>% mutate(int_area=as.numeric(st_area(.)),
perc_puma_area = 100*int_area/area_puma)
PUMAs <- pumas %>% filter(PUMACE10 %in% geoids$PUMACE10[which(geoids$perc_puma_area>50)])
mapview(PUMAs, layer.name="PUMAs in analysis") + mapview(csa,color = "red", lwd=2,alpha.regions=0, col.region="red", layer.name="Raleigh-Durham-Cary CSA")
v20 <- pums_variables %>%
filter(year == 2019, survey == "acs5")
# data <- get_pums(
# variables = c("PUMA", "HINCP", "FHINCP", "VEH", "FVEHP"),
# state = "NC",
# survey = "acs5",
# recode = TRUE,
# year = 2019,
# rep="housing"
# )
```
```{r, results="asis"}
mapview(PUMAs, layer.name="PUMAs in analysis") + mapview(csa,color = "red", lwd=2,alpha.regions=0, col.region="red", layer.name="Raleigh-Durham-Cary CSA")
```
## Summarize data
Now we summarize the household level data by breaking the household income variable into 11 brackets, and the vehicles variable into categories of 0,1,2, and 3+ vehicles. We also plot the results
```{r, results="asis", message=FALSE, warning=FALSE, message=FALSE}
#write_csv(data,"data.csv")
data <- read_csv("data.csv")
data2 <- data %>% filter(SPORDER==1) %>%
filter(PUMA %in% geoids$PUMACE10[which(geoids$perc_puma_area>50)]) %>%
mutate(
household_income_bracket = case_when(
HINCP < 10000 ~ "<$10k",
HINCP >= 10000 & HINCP < 15000 ~ "$10k-$15k",
HINCP >= 15000 & HINCP < 20000 ~ "$15k-$25k",
HINCP >= 25000 & HINCP < 35000 ~ "$25k-$35k",
HINCP >= 35000 & HINCP < 50000 ~ "$35k-$50k",
HINCP >= 50000 & HINCP < 75000 ~ "$50k-$75k",
HINCP >= 75000 & HINCP < 100000 ~ "$75k-$100k",
HINCP >= 100000 & HINCP < 125000 ~ "$100k-$125k",
HINCP >= 125000 & HINCP < 150000 ~ "$125k-$150k",
HINCP >= 150000 & HINCP < 200000 ~ "$150k-$200k",
HINCP >= 200000 ~ ">$200k"),
vehicles = case_when(
VEH == 0 ~ "0",
VEH == 1 ~ "1",
VEH == 2 ~ "2",
VEH > 2 ~ "3+",
)
)
plot_data <- data2 %>%
group_by(household_income_bracket,vehicles) %>%
summarize(proportion = sum(WGTP)) %>%
group_by(household_income_bracket) %>%
mutate(vehicle_percentage = 100*proportion/sum(proportion)) %>%
ungroup() %>%
filter(!is.na(household_income_bracket))
plot_data$vehicles<- factor(plot_data$vehicles, levels = c("3+", "2","1","0"))
plot_data$household_income_bracket <- factor(plot_data$household_income_bracket, levels = c("<$10k","$10k-$15k","$15k-$25k","$25k-$35k","$35k-$50k","$50k-$75k","$75k-$100k","$100k-$125k","$125k-$150k","$150k-$200k",">$200k"))
# Stacked + percent
ggplot(plot_data, aes(fill=vehicles, y=proportion, x=household_income_bracket)) +
geom_bar(position="fill", stat="identity")
```