-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathr-intro-geospatial--06-dplyr.diff
220 lines (201 loc) · 5.95 KB
/
r-intro-geospatial--06-dplyr.diff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
diff --git a/home/zhian/Documents/Carpentries/Git/zkamvar--postmaul/data/rmd-repos/datacarpentry--r-intro-geospatialR3/_episodes/06-dplyr.md b/home/zhian/Documents/Carpentries/Git/zkamvar--postmaul/data/rmd-repos/datacarpentry--r-intro-geospatialR4/_episodes/06-dplyr.md
index 21dc7d5..ee2f67c 100644
--- a/home/zhian/Documents/Carpentries/Git/zkamvar--postmaul/data/rmd-repos/datacarpentry--r-intro-geospatialR3/_episodes/06-dplyr.md
+++ b/home/zhian/Documents/Carpentries/Git/zkamvar--postmaul/data/rmd-repos/datacarpentry--r-intro-geospatialR4/_episodes/06-dplyr.md
@@ -195,10 +195,10 @@ str(gapminder)
~~~
'data.frame': 1704 obs. of 6 variables:
- $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
+ $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
$ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
$ pop : num 8425333 9240934 10267083 11537966 13079460 ...
- $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
+ $ continent: chr "Asia" "Asia" "Asia" "Asia" ...
$ lifeExp : num 28.8 30.3 32 34 36.1 ...
$ gdpPercap: num 779 821 853 836 740 ...
~~~
@@ -215,20 +215,21 @@ gapminder %>% group_by(continent) %>% str()
~~~
tibble [1,704 × 6] (S3: grouped_df/tbl_df/tbl/data.frame)
- $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
+ $ country : chr [1:1704] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
$ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
$ pop : num [1:1704] 8425333 9240934 10267083 11537966 13079460 ...
- $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
+ $ continent: chr [1:1704] "Asia" "Asia" "Asia" "Asia" ...
$ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
$ gdpPercap: num [1:1704] 779 821 853 836 740 ...
- attr(*, "groups")= tibble [5 × 2] (S3: tbl_df/tbl/data.frame)
- ..$ continent: Factor w/ 5 levels "Africa","Americas",..: 1 2 3 4 5
- ..$ .rows :List of 5
+ ..$ continent: chr [1:5] "Africa" "Americas" "Asia" "Europe" ...
+ ..$ .rows : list<int> [1:5]
.. ..$ : int [1:624] 25 26 27 28 29 30 31 32 33 34 ...
.. ..$ : int [1:300] 49 50 51 52 53 54 55 56 57 58 ...
.. ..$ : int [1:396] 1 2 3 4 5 6 7 8 9 10 ...
.. ..$ : int [1:360] 13 14 15 16 17 18 19 20 21 22 ...
.. ..$ : int [1:24] 61 62 63 64 65 66 67 68 69 70 ...
+ .. ..@ ptype: int(0)
..- attr(*, ".drop")= logi TRUE
~~~
{: .output}
@@ -255,7 +256,19 @@ original dataframe into multiple pieces, then we can run functions
gdp_bycontinents <- gapminder %>%
group_by(continent) %>%
summarize(mean_gdpPercap = mean(gdpPercap))
+~~~
+{: .language-r}
+
+
+
+~~~
+`summarise()` ungrouping output (override with `.groups` argument)
+~~~
+{: .output}
+
+
+~~~
gdp_bycontinents
~~~
{: .language-r}
@@ -265,7 +278,7 @@ gdp_bycontinents
~~~
# A tibble: 5 x 2
continent mean_gdpPercap
- <fct> <dbl>
+ <chr> <dbl>
1 Africa 2194.
2 Americas 7136.
3 Asia 7902.
@@ -292,7 +305,19 @@ even better.
> > lifeExp_bycountry <- gapminder %>%
> > group_by(country) %>%
> > summarize(mean_lifeExp=mean(lifeExp))
+> >~~~
+> >{: .language-r}
+> >
> >
+> >
+> >~~~
+> >`summarise()` ungrouping output (override with `.groups` argument)
+> >~~~
+> >{: .output}
+> >
+> >
+> >
+> >~~~
> > lifeExp_bycountry %>%
> > filter(mean_lifeExp == min(mean_lifeExp) | mean_lifeExp == max(mean_lifeExp))
> >~~~
@@ -303,7 +328,7 @@ even better.
> >~~~
> ># A tibble: 2 x 2
> > country mean_lifeExp
-> > <fct> <dbl>
+> > <chr> <dbl>
> >1 Iceland 76.5
> >2 Sierra Leone 36.8
> >~~~
@@ -328,7 +353,7 @@ even better.
> >~~~
> ># A tibble: 1 x 2
> > country mean_lifeExp
-> > <fct> <dbl>
+> > <chr> <dbl>
> >1 Sierra Leone 36.8
> >~~~
> >{: .output}
@@ -347,7 +372,7 @@ even better.
> >~~~
> ># A tibble: 1 x 2
> > country mean_lifeExp
-> > <fct> <dbl>
+> > <chr> <dbl>
> >1 Iceland 76.5
> >~~~
> >{: .output}
@@ -365,6 +390,13 @@ gdp_bycontinents_byyear <- gapminder %>%
~~~
{: .language-r}
+
+
+~~~
+`summarise()` regrouping output by 'continent' (override with `.groups` argument)
+~~~
+{: .output}
+
That is already quite powerful, but it gets even better! You're not limited to defining 1 new variable in `summarize()`.
@@ -378,6 +410,13 @@ gdp_pop_bycontinents_byyear <- gapminder %>%
~~~
{: .language-r}
+
+
+~~~
+`summarise()` regrouping output by 'continent' (override with `.groups` argument)
+~~~
+{: .output}
+
## `count()` and `n()`
A very common operation is to count the number of observations for each group.
@@ -399,14 +438,12 @@ gapminder %>%
~~~
-# A tibble: 5 x 2
- continent n
- <fct> <int>
-1 Africa 52
-2 Asia 33
-3 Europe 30
-4 Americas 25
-5 Oceania 2
+ continent n
+1 Africa 52
+2 Asia 33
+3 Europe 30
+4 Americas 25
+5 Oceania 2
~~~
{: .output}
@@ -424,10 +461,17 @@ gapminder %>%
+~~~
+`summarise()` ungrouping output (override with `.groups` argument)
+~~~
+{: .output}
+
+
+
~~~
# A tibble: 5 x 2
continent se_le
- <fct> <dbl>
+ <chr> <dbl>
1 Africa 0.366
2 Americas 0.540
3 Asia 0.596
@@ -452,10 +496,17 @@ gapminder %>%
+~~~
+`summarise()` ungrouping output (override with `.groups` argument)
+~~~
+{: .output}
+
+
+
~~~
# A tibble: 5 x 5
continent mean_le min_le max_le se_le
- <fct> <dbl> <dbl> <dbl> <dbl>
+ <chr> <dbl> <dbl> <dbl> <dbl>
1 Africa 48.9 23.6 76.4 0.366
2 Americas 64.7 37.6 80.7 0.540
3 Asia 60.1 28.8 82.6 0.596
@@ -482,6 +533,13 @@ gdp_pop_bycontinents_byyear <- gapminder %>%
~~~
{: .language-r}
+
+
+~~~
+`summarise()` regrouping output by 'continent' (override with `.groups` argument)
+~~~
+{: .output}
+
## Other great resources
* [R for Data Science](http://r4ds.had.co.nz/)