-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02_Matt-Javier-Analysis.Rmd
329 lines (305 loc) · 16.3 KB
/
02_Matt-Javier-Analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
---
title: 02 Analysis
output: html_document
editor_options:
chunk_output_type: console
---
# 1.Setup
## 1.a Boilerplate: packages, library, setting modifications, etc
```{r}
knitr::opts_chunk$set(echo = TRUE)
# knitr::opts_chunk$set(echo = TRUE)
## location, location, location!
setwd(dir="C:/Users/matti/Desktop/Office/A. Quick Files/PS 239T/Final Project")
# getwd()
library(pacman) # thanks Jae
p_load("dplyr", "tidyr", "tidyverse", "plyr", "readr", "readxl", "ff", "data.table", "qdap", "car", "qwraps2", "sf", "xlsx", "ggplot2", "ggthemes", "scales", "gtools", "scales", "reshape2")
## setting modifications
options(max.print=1000000)
## ggplot settings
# theme_set(theme_bw()) ##<- if you need to reset
theme_set(theme_economist(base_size = 11, base_family = ""))
theme(plot.title = element_text(face="bold", size=15), axis.title.x = element_text( face="bold", size=15), axis.title.y = element_text( face="bold", size=15))
```
## 1.b Load data
```{r}
## Load data
petitlarceny <- read.csv(file="./z. finaldata/petitlarceny4.17.h.csv") ##latest version
petitlarceny <- petitlarceny[, c("PRECINCT_ID", "DIST_ENTR", "DIST_SHLTR", "DIST_WIFI", "DIST_MUNI", "DIST_LIB")] #only want these rn
precincts <- read.csv(file="./z. finaldata/precincts2.csv")
##if u need these below
colnames(petitlarceny)
nrow(petitlarceny)
head(petitlarceny)
colnames(precincts)
precincts
```
# 2. Analysis: means and medians of variables for each precinct
Variables of analysis: Subway Entrances, Bus Stop Shelters, Wifi Hotspots, Munimeters, Public Libraries
# 2.a Distance averages to each variable
```{r}
##Lets find the mean distance of a larceny to each variable, based on precinct
meanies <- aggregate(petitlarceny, list(petitlarceny$PRECINCT_ID), mean)*3.28084 # multiply distance by 3.28084 bc QGIS uses meters
# meanies$PRECINCT_ID <- meanies$PRECINCT_ID/3.28084 ## convert the ID's back
meanies <- subset(meanies, select = -Group.1) #dont needs this
colnames(meanies) # make sure they are labled
setnames(meanies, old = c("DIST_ENTR", "DIST_SHLTR", "DIST_WIFI", "DIST_MUNI", "DIST_LIB" ), new = c("DIST_ENTR_MEAN", "DIST_SHLTR_MEAN", "DIST_WIFI_MEAN", "DIST_MUNI_MEAN", "DIST_LIB_MEAN"))
meanies
#now lets find the median distance of a larceny to each infra, based on precinct
medians <- aggregate(petitlarceny, list(petitlarceny$PRECINCT_ID), median)*3.28084 ## multiply distance by 3.28084 bc QGIS uses meters
# medians$PRECINCT_ID <- medians$PRECINCT_ID/3.28084 ## convert the ID's back
medians <- subset(medians, select = -Group.1) ## dont need this
setnames(medians, old = c("DIST_ENTR", "DIST_SHLTR", "DIST_WIFI", "DIST_MUNI", "DIST_LIB" ), new = c("DIST_ENTR_MEDIAN", "DIST_SHLTR_MEDIAN", "DIST_WIFI_MEDIAN", "DIST_MUNI_MEDIAN", "DIST_LIB_MEDIAN"))
medians
##sew them together
precincts <- precincts*3.28084 ## long story, check 7 lines below to see why lol
precincts <- merge(precincts, meanies)
precincts <- merge(precincts, medians)
head(precincts)
colnames(precincts) ## aha
##also forgot the crime per capita
precincts <- precincts/3.28084 ## okay the merge got messed up and only produced 68. so i decided to convert everyting to meters... even the ID's and population by 3.28084 and then just divided it all back after the merge. They're all the same btw, idk why
##precinct 22 is Central Park. It's screwing up the graphs since it has a pop density of 18 making petty larceny 40000 per capita! Lets remove it
precincts <- precincts[-13,]
precincts
# write.csv(precincts, file="./z. finaldata/precincts2.csv")
# precincts <- read.csv(file = "./z. finaldata/precincts2.csv")
head(precincts)
```
## 2.b Aggregate precinct data
```{r}
##Dont need that
precinctss <- subset(precincts, select = -PRECINCT_ID)
##means for each
precinct_averages <- colMeans(precinctss)
precinct_summary <- data.frame(precinct_averages)
##lets see the averages of each column
precinct_summary
write.csv(precinct_summary, file="./z. finaldata/precinct_summary.csv")
```
## 2.c Per capita for each column
```{r}
##start new datafame
precinct_percap <- precincts[, c("PRECINCT_ID", "PRECINCT_POPDENSITY")]
## per capita for each of the following items. Rates are measured in per 100000 persons
precinct_percap$LARCENY_PER_CAPITA <- (precincts$LARCENIES_PRECINCT/precincts$PRECINCT_POP)*100000
precinct_percap$SBWYS_PER_CAPITA <- (precincts$NUM_SBWYS/precincts$PRECINCT_POP)*100000
precinct_percap$BUS_PER_CAPITA <- (precincts$NUM_BUS/precincts$PRECINCT_POP)*100000
precinct_percap$WIFI_PER_CAPITA <- (precincts$NUM_WIFI/precincts$PRECINCT_POP)*100000
precinct_percap$MUNI_PER_CAPITA <- (precincts$NUM_MUNIMETER/precincts$PRECINCT_POP)*100000
precinct_percap$LIBRY_PER_CAPITA <- (precincts$NUM_LIBRARY/precincts$PRECINCT_POP)*100000
#check
precinct_percap
# write.csv(precinct_percap, file="./z. finaldata/precinct_percapita.csv")
```
## 2.d Analyze data
```{r}
## Lets look at them now. Here so you can look at them while creating graphs
precincts
precinct_summary
precinct_percap
```
# 3. Visualization
## 3.a larceny to population
```{r}
## larceny to population graph
larceny2pop <- ggplot(precincts, aes(x=precincts[,3], y=precincts[,5])) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population") + ylab("Petty Larceny") + ggtitle("Petty Larceny to Population") # + xlim(0,1000) + ylim(0,1000)
larceny2pop
## Relative positions to other precincts 1-77 (y axis is sequenced)
relative_larceny2pop <- ggplot(precincts, aes(x=precincts[,3], y=seq(precincts[,5]))) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population") + ylab("Petty Larceny") + ggtitle("Petty Larceny to Population (Relative)") # + xlim(0,1000) + ylim(0,1000)
relative_larceny2pop
```
Lets try again with pop density
```{r}
## THis graph is for larceny to population density
larceny2popdensity <- ggplot(precincts, aes(x=precincts[,4], y=precincts[,5])) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=TRUE, level=9.5, color="blue") + xlab("Population Density") + ylab("Petty Larceny") + ggtitle("Petty Larceny to Population Density") # + xlim(0,1000) + ylim(0,1000)
larceny2popdensity
## Relative positions to other precincts 1-77 (y axis is sequenced)
relative_larceny2popdensity <- ggplot(precincts, aes(x=precincts[,4], y=seq(precincts[,5]))) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=TRUE, level=9.5, color="blue") + xlab("Population Density") + ylab("Petty Larceny") + ggtitle("Petty Larceny to Population Density (Relatve)") # + xlim(0,1000) + ylim(0,1000)
relative_larceny2popdensity
```
Okay so it seems like less densely populated is associated with a higher rate of petty larceny while higher rates of larceny are very loosely associated with higher populations.
Now lets see larceny per capita rate vs pop density
```{r}
## THis graph is for larceny to population density
percaplarceny2popdensity <- ggplot(precinct_percap, aes(x=precinct_percap[,2], y=precinct_percap[,3])) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=TRUE, level=9.5, color="blue") + xlab("Population Density") + ylab("Petty Larceny Per Capita") + ggtitle("Petty Larceny rate to Pop Density") # + xlim(0,1000) + ylim(0,1000)
percaplarceny2popdensity ## eh
## Relative positions to other precincts 1-77 (y axis is sequenced)
relative_percaplarceny2popdensity <- ggplot(precinct_percap, aes(x=precinct_percap[,2], y=seq(precinct_percap[,3]))) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=TRUE, level=9.5, color="blue") + xlab("Population Density") + ylab("Petty Larceny Per Capita") + ggtitle("Petty Larceny rate to Pop Density (Relatve)") # + xlim(0,1000) + ylim(0,1000)
relative_percaplarceny2popdensity ## eh
```
## 3.b Population to x variable
Population to subway entrances
```{r}
## check for the names
colnames(precincts)
subways2pop <- ggplot(precincts, aes(x=precincts[,3], y=precincts[,6])) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Subway Entrances") + ggtitle("Subway Entrances to Population") # + xlim(0,1000) + ylim(0,1000)
subways2pop ## okay that doesn't really tell us anything
## Relative positions to other precincts 1-77 (y axis is sequenced)
relative_subways2pop <- ggplot(precincts, aes(x=precincts[,3], y=seq(precincts[,6]))) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Subway Entrances") + ggtitle("Subway Entrances to Population (Relative)") # + xlim(0,1000) + ylim(0,1000)
relative_subways2pop ## okay that doesn't really tell us anything
```
Population to Bus Shelters Graphs
```{r}
## Bus shelters
bus2pop <- ggplot(precincts, aes(x=precincts[,3], y=precincts[,7])) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Bus Shelters") + ggtitle("Bus Shelters to Population") # + xlim(0,1000) + ylim(0,1000)
bus2pop ##
## Relative positions to other precincts 1-77 (y axis is sequenced)
relative_bus2pop <- ggplot(precincts, aes(x=precincts[,3], y=seq(precincts[,7]))) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Bus Shelters") + ggtitle("Bus Shelters to Population (Relative)") # + xlim(0,1000) + ylim(0,1000)
relative_bus2pop
```
Wifi to Population Graphs
```{r}
## wifi Locations
colnames(precincts)
wifi2pop <- ggplot(precincts, aes(x=precincts[,3], y=precincts[,8])) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Wifi") + ggtitle("Wifi Locations to Population") # + xlim(0,1000) + ylim(0,1000)
wifi2pop
## Relative positions to other precincts 1-77 (y axis is sequenced)
relative_wifi2pop <- ggplot(precincts, aes(x=precincts[,3], y=seq(precincts[,8]))) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Wifi") + ggtitle("Wifi Locations to Population (Relative)")
relative_wifi2pop
```
Munimeters to population Graphs
```{r}
## Munimeters
muni2pop <- ggplot(precincts, aes(x=precincts[,3], y=precincts[,9])) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Munimeter") + ggtitle("Munimeters to Population") # + xlim(0,1000) + ylim(0,1000)
muni2pop ##
## Relative positions to other precincts 1-77 (y axis is sequenced)
relative_muni2pop <- ggplot(precincts, aes(x=precincts[,3], y=seq(precincts[,9]))) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Munimeters") + ggtitle("Munimeters to Population (Relative)") # + xlim(0,1000) + ylim(0,1000)
relative_muni2pop
```
Libraries to Population Graphs
```{r}
## Read a book!
libraries2pop <- ggplot(precincts, aes(x=precincts[,3], y=precincts[,10])) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Libraries") + ggtitle("Libraries to Population") # + xlim(0,1000) + ylim(0,1000)
library2pop
## Relative positions to other precincts 1-77 (y axis is sequenced)
relative_library2pop <- ggplot(precincts, aes(x=precincts[,3], y=seq(precincts[,10]))) + geom_point( size=1) + geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=9.5, color="blue") + xlab("Population Density") + ylab("Libraries") + ggtitle("Libraries to Population (Relative)") # + xlim(0,1000) + ylim(0,1000)
relative_library2pop
```
## 3.c Comparing the distances of each variable per precinct
```{r}
## Make smaller df for the graph
colnames(precincts)
precinct1 <- precincts[,c(1, 16:20)]
precinct1
## melt it so that it can work with ggplot
melted_precinct <- melt(precinct1, id.vars="PRECINCT_ID", value.name="value", variable.name="variable")
#plot below
distances_variables <- ggplot(data=melted_precinct, aes(x=PRECINCT_ID, y=value, group = variable, colour = variable)) + ylab("Distance (feet)") + xlab("Precinct") + ggtitle("Mean variable Dist to Petty Larceny by Precinct") + geom_line() + geom_point( size=1, shape=21, fill="black") + scale_x_discrete(limits=c(1:123)) + theme(axis.text.x = element_text(size=3)) + theme(legend.text=element_text(size=5)) + ylim(0,750) #750 one block face away
distances_variables
```
## 4. Statistical Analysis for Distances and X variable (too early to make conclusion but lets see what happens when pop density is compared to each variable's average distance to a petty larceny incident)
## 4.a Population Density and Distance to Nearest Subway
Both the Welch Two Sample t-test and Pearson's product-moment correlation output a p-value higher than .05, thus revealing there is no statistical significances between pop density and a petty larceny's distance to x variable.
```{r}
colnames(precincts) #check names
##reassign short names
x1 <- precincts$PRECINCT_POPDENSITY
y1 <- precincts$DIST_ENTR_MEAN
## conduct statistical analysis t-test and correlation test
t.test(x1, y1, use="pairwise.complete.obs") #??? 0.05.. not
cor.test(x1, y1, use="pairwise.complete.obs")
```
## 4.b Population Density and Distance to Nearest Bus Stop Shelter
Result: Not Statistically Significant
```{r}
##reassign short names
x2 <- precincts$PRECINCT_POPDENSITY
y2 <- precincts$DIST_SHLTR_MEAN
## conduct statistical analysis
t.test(x2, y2, use="pairwise.complete.obs") #??? 0.05.. nope
cor.test(x2, y2, use="pairwise.complete.obs")
```
## 4.c Population Density and Distance To Nearest wifi location
Result: Not Statistically Significant
```{r}
##reassign short names
x3 <- precincts$PRECINCT_POPDENSITY
y3 <- precincts$DIST_WIFI_MEAN
## Check the statistics
t.test(x3, y3, use="pairwise.complete.obs") #??? 0.05.. nope
cor.test(x3, y3, use="pairwise.complete.obs")
```
## 4.c Population Density and Distance To Nearest Munimeter
Result: Not Statistically Significant
```{r}
##reassign short names
x4 <- precincts$PRECINCT_POPDENSITY
y4 <- precincts$DIST_MUNI_MEAN
t.test(x4, y4, use="pairwise.complete.obs") #??? 0.05.. nope
cor.test(x4, y4, use="pairwise.complete.obs")
```
## 4.c Population Density and Distance To Nearest Library
Result: Not Statistically Significant
```{r}
##reassign short names
x5 <- precincts$PRECINCT_POPDENSITY
y5 <- precincts$DIST_LIB_MEAN
## Tests below
t.test(x5, y5, use="pairwise.complete.obs") #??? 0.05.. nope
cor.test(x5, y5, use="pairwise.complete.obs")
```
## 4.d Population Density and Larceny
Result: Not statty significant
```{r}
##reassign short names
x6 <- precincts$PRECINCT_POPDENSITY
y6 <- precincts$LARCENIES_PRECINCT
## Tests below
t.test(x6, y6, use="pairwise.complete.obs") #??? 0.05.. nope
cor.test(x6, y6, use="pairwise.complete.obs")
```
# 5. Statistical Analysis for Larcenies Per Capita to Distances and X variable
## 5.a Petty Larceny per Capita and Distance to Nearest Subway Entrance
p-values: .8919 and .8345
```{r}
## Check names so you dont have to scroll up
colnames(precinct_percap)
##reassign short names
x1 <- precinct_percap$LARCENY_PER_CAPITA
y1 <- precincts$DIST_ENTR_MEAN
## Tests below
t.test(x1, y1, use="pairwise.complete.obs") #??? 0.05.. no
cor.test(x1, y1, use="pairwise.complete.obs")
```
## 5.b Petty Larceny per Capita and Distance to Nearest Bus Stop Shelter
p-value: .4369, 0.009501
```{r}
##reassign short names
x2 <- precinct_percap$LARCENY_PER_CAPITA
y2 <- precincts$DIST_SHLTR_MEAN
## Tests below
t.test(x2, y2, use="pairwise.complete.obs")
cor.test(x2, y2, use="pairwise.complete.obs") # Yes tho...
```
## 5.c Petty Larceny per Capita and Distance To Nearest wifi location
p-value: 0.6292, 0.9251
```{r}
##reassign short names
x3 <- precinct_percap$LARCENY_PER_CAPITA
y3 <- precincts$DIST_WIFI_MEAN
## Tests below
t.test(x3, y3, use="pairwise.complete.obs") #??? 0.05.. nope
cor.test(x3, y3, use="pairwise.complete.obs")
```
## 5.d Petty Larceny per Capita and Distance To Nearest Munimeter
p-value: 0.499, 0.7315
```{r}
##reassign short names
x4 <- precinct_percap$LARCENY_PER_CAPITA
y4 <- precincts$DIST_MUNI_MEAN
## Tests below
t.test(x4, y4, use="pairwise.complete.obs") #??? 0.05.. nope
cor.test(x4, y4, use="pairwise.complete.obs")
```
## 5.e Petty Larceny per Capita and Distance To Nearest Library
p-value: 0.9907, 0.1769
```{r}
##reassign short names
x5 <- precinct_percap$LARCENY_PER_CAPITA
y5 <- precincts$DIST_LIB_MEAN
## Tests below
t.test(x5, y5, use="pairwise.complete.obs") #??? 0.05.. nope
cor.test(x5, y5, use="pairwise.complete.obs")
```
THE_END!