-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2023w35
109 lines (98 loc) · 4.71 KB
/
2023w35
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
---
title: "Tidy Tuesday Historical Markers"
author: "Erin Franke"
date: "2023-07-06"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(tidyverse)
library(skimr)
library(mapdata)
library(ggtext)
library(stopwords)
library(tidytext)
library(ggrepel)
library(showtext)
font_add_google("Gochi Hand", "g")
historical_markers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-07-04/historical_markers.csv')
no_markers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-07-04/no_markers.csv')
```
```{r}
skim(historical_markers)
```
```{r}
historical_markers %>%
count(state_or_prov)
combined <- historical_markers %>%
filter(!(state_or_prov %in% c("Alaska", "Hawaii", "Puerto Rico"))) %>%
group_by(state_or_prov) %>%
summarize(text = paste(title, collapse = " "))
combined$words <- str_replace_all(combined$text, "[[:punct:]]", "")
word_by_state <- combined %>%
unnest_tokens(word, text) %>%
select(-words) %>%
mutate(word = str_replace(word, "</i>", ""),
word = str_replace(word, "<i>", ""),
word = str_trim(word, side = "both"),
word = str_to_lower(word)) %>%
filter(str_detect(word, "\\S"),
!(word %in% c("la", "le", "de"))) %>%
group_by(state_or_prov) %>%
count(word) %>%
filter(!(word %in% stopwords(source = "snowball"))) %>%
group_by(state_or_prov) %>%
mutate(most_popular = max(n)) %>%
filter(n == most_popular, n >10) %>%
mutate(word = str_to_title(word),
word = case_when(state_or_prov == "Oklahoma" ~ "U.S.",
state_or_prov == "Illinois" ~ "St.",
state_or_prov == "District of Columbia" ~ "Park, Street",
state_or_prov == "Massachusetts" ~ "Knox Trail",
state_or_prov == "Oregon" ~ "Applegate Trail",
TRUE ~ word)) %>%
distinct(state_or_prov, word) %>%
mutate(state_or_prov = str_to_lower(state_or_prov))
```
```{r}
state_info <- map_data("state")
state_labels <- state_info %>%
group_by(region) %>%
summarise(min_long = min(long),
max_long = max(long),
min_lat = min(lat),
max_lat = max(lat),
range_long = max_long - min_long,
range_lat = max_lat - min_lat,
long = min_long + range_long/2,
lat = min_lat + range_lat/2) %>%
mutate(long= case_when(region %in% c("michigan", "florida") ~ long + 2,
region == "idaho" ~ long -1,
region == "virginia" ~ long + 1,
TRUE ~ long)) %>%
mutate(lat = case_when(region == "maryland" ~ lat + 0.5,
TRUE ~ lat)) %>%
select(region, long, lat) %>%
right_join(word_by_state, by = c("region" = "state_or_prov"))
historical_markers %>%
filter(!(state_or_prov %in% c("Alaska", "Hawaii", "Puerto Rico"))) %>%
ggplot()+
geom_polygon(aes(x=long, y=lat, group = group), data = state_info, fill = NA, color = "black", linewidth = 0.15)+
coord_fixed(ratio = 1.3) +
geom_density2d_filled(aes(x=longitude_minus_w, y=latitude_minus_s), show.legend = FALSE, alpha=0.4, bins=7)+
scale_fill_manual(values = c("white", "#CEE9e9", "#84BBD8", "#F8F2BE", "#FEC376", "#F88A51", "#A50026"))+
geom_text(data = state_labels %>% filter(!(region %in% c("massachusetts", "connecticut", "new jersey", "delaware", "maryland", "district of columbia", "new hampshire"))), aes(x = long, y = lat, label = word), size =2, inherit.aes = FALSE, family = "g")+
geom_text_repel(data = state_labels %>% filter(region %in% c("massachusetts", "connecticut", "new jersey", "delaware", "maryland", "district of columbia", "new hampshire")), aes(x=long, y=lat, label = word), nudge_x = c(5, 3, 5, 4, 5, 4, 4), nudge_y = c(0, 0, -3, 0, 0, 0, 0), size = 2, min.segment.length = 0.2, family = "g")+
theme_classic()+
theme(axis.ticks = element_blank(),
axis.line = element_blank(),
axis.text = element_blank(),
plot.title = element_text(size = 13, family = "g"),
plot.subtitle = element_text(size = 8, family = "g", color = "grey50"),
plot.background = element_rect(fill = "ivory"),
panel.background = element_rect(fill = "ivory"),
plot.caption = element_text(family = "g"))+
labs(x="", y="", title = "Where the historical markers and how are they commonly named?", subtitle = "Density plot shows where historical markers are most concentrated. States are marked with the most common word across all \ntheir historical marker titles (word must appear 10+ times to be labeled).", caption = "Erin Franke | Data from Historical Marker Database USA Index")
```