-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNobel Prize Winners.R
124 lines (82 loc) · 2.87 KB
/
Nobel Prize Winners.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Loading in required libraries
install.packages("tidyverse")
install.packages("lubridate")
library(tidyverse)
# Reading in the Nobel Prize data
nobel <- read_csv("data/nobel.csv")
# Taking a look at the first couple of winners
head(nobel)
# Counting the number of (possibly shared) Nobel Prizes handed
# out between 1901 and 2016
nobel %>% count()
# Counting the number of prizes won by male and female recipients.
nobel %>%
count(sex)
# Counting the number of prizes won by different nationalities.
nobel %>%
count(birth_country) %>%
arrange(desc(n)) %>%
head(20)
# Calculating the proportion of USA born winners per decade
prop_usa_winners <- nobel %>%
mutate(
usa_born_winner = birth_country == "United States of America",
decade = floor(year / 10) * 10
) %>%
group_by(decade) %>%
summarize(proportion = mean(usa_born_winner, na.rm = TRUE))
# Display the proportions of USA born winners per decade
prop_usa_winners
# Setting the size of plots in this notebook
options(repr.plot.width=7, repr.plot.height=4)
# Plotting USA born winners
ggplot(prop_usa_winners, aes(x = decade, y = proportion)) +
geom_line(color = "skyblue") +
geom_point(color = "red") +
scale_y_continuous(labels = scales::percent, limits = 0:1, expand = c(0, 0))
ggsave("img/plot01.png", dpi=1000)
# Calculating the proportion of female laureates per decade
prop_female_winners <- nobel %>%
mutate(
female_winner = sex == "Female",
decade = floor(year / 10) * 10
) %>%
group_by(decade, category) %>%
summarize(proportion = mean(female_winner, na.rm = TRUE))
# Plotting the proportion of female laureates per decade
ggplot(prop_female_winners, aes(x = decade, y = proportion, color = category)) +
geom_line() +
geom_point() +
scale_y_continuous(labels = scales::percent, limits = 0:1, expand = c(0, 0))
ggsave("img/plot02.png", dpi=1000)
# Picking out the first woman to win a Nobel Prize
nobel %>%
filter(sex == "Female") %>%
top_n(1, desc(year))
# Selecting the laureates that have received 2 or more prizes.
nobel %>%
count(full_name) %>%
filter(n > 1)
# Loading the lubridate package
# library(lubridate)
# Calculating the age of Nobel Prize winners
nobel_age <- nobel %>%
mutate(age = year - year(birth_date))
# Plotting the age of Nobel Prize winners
ggplot(nobel_age, aes(x = age, y = year)) +
geom_point() +
geom_smooth()
ggsave("img/plot03.png", dpi=1000)
# Same plot as above, but faceted by the category of the Nobel Prize
ggplot(nobel_age, aes(x = age, y = year)) +
geom_point() +
geom_smooth(se = FALSE) +
facet_wrap(~ category)
ggsave("img/plot04.png", dpi=1000)
# The oldest winner of a Nobel Prize as of 2016
nobel_age %>% top_n(1, age)
# The youngest winner of a Nobel Prize as of 2016
nobel_age %>% top_n(1, desc(age))
# The name of the youngest winner of the Nobel Prize as of 2016
youngest_winner <- "Malala Yousafzai"
youngest_winner