-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPollutionExploration.Rmd
127 lines (106 loc) · 3.97 KB
/
PollutionExploration.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
---
title: "PollutionExploration"
author: "James Budarz"
date: "July 16, 2018"
output:
html_document: default
pdf_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(ggplot2)
library(lubridate)
library(corrplot)
library(RColorBrewer)
```
```{r loaddata, include=FALSE, cache = TRUE}
df = read.csv("PreppedPollutionData.csv", header = TRUE)
```
##Step 0: For exploratory purposes, just deal with Arizona data:
```{r DataReduction}
cityAQI = df %>%
group_by(City_State) %>%
summarise(citypop = first(POP),
lat = first(lat),
long = first(lon),
avgNO2aqi = mean(NO2.AQI),
avgO3aqi = mean(O3.AQI),
avgSO2aqi = mean(SO2.AQI),
avgCOaqi = mean(CO.AQI))
AnnualCityAQI = df %>%
group_by(City_State, measurementyear) %>%
summarise(citypop = first(POP),
lat = first(lat),
long = first(lon),
avgNO2aqi = mean(NO2.AQI),
avgO3aqi = mean(O3.AQI),
avgSO2aqi = mean(SO2.AQI),
avgCOaqi = mean(CO.AQI))
```
##Step 3: Make some plots of interest:
```{r Data Plotting}
#Scatter plot of average AQI per city vs. city population
ggplot(cityAQI, aes(x = citypop, y = avgNO2aqi)) + geom_point() + scale_x_log10() + geom_smooth(method = 'lm') + ggtitle('Effect of population on NO2 Pollution')
ggplot(cityAQI, aes(x = citypop, y = avgO3aqi)) + geom_point() + scale_x_log10() + geom_smooth(method = 'lm') + ggtitle('Effect of population on O3 Pollution')
ggplot(cityAQI, aes(x = citypop, y = avgSO2aqi)) + geom_point() + scale_x_log10() + geom_smooth(method = 'lm') + ggtitle('Effect of population on SO2 Pollution')
ggplot(cityAQI, aes(x = citypop, y = avgCOaqi)) + geom_point() + scale_x_log10() + geom_smooth(method = 'lm') + ggtitle('Effect of population on CO Pollution')
```
```{r}
# Map
#library(rgdal)
library(leaflet)
library(htmltools)
desiredMapMetric = cityAQI$avgNO2aqi
desiredyear = 2002
pal <- colorNumeric(
palette = "Reds",
domain = cityAQI$desiredMapMetric)
pollmap <- leaflet() %>%
addProviderTiles('OpenTopoMap', group = "Topo") %>% # Add topographic tiles
addCircleMarkers(lng=cityAQI$long,
lat=cityAQI$lat,
popup=paste(cityAQI$City_State, '<br>',
'Population:', cityAQI$citypop, '<br>',
'Average Annual AQI:', round(desiredMapMetric)),
#pal = pal,
fillColor = 'Red',
opacity = desiredMapMetric/max(desiredMapMetric),
fillOpacity = desiredMapMetric/max(desiredMapMetric),
radius = 20 * sqrt(cityAQI$citypop/max(cityAQI$citypop)),
color = 'Red',
stroke = F
) %>%
addLegend("bottomright", pal = pal,
values = desiredMapMetric,
title = "AQI")
# opacity = 1)
pollmap
```
```{r}
# Google Vis calendar chart:
library(googleVis)
df$Date.Local = as.Date(df$Date.Local)
pollutantofinterest = paste("NO2",'.AQI',sep='')
plot(
gvisCalendar(df,
datevar = "Date.Local",
numvar = pollutantofinterest,
chartid = "Calendar"
)
)
```
``` {r}
gg <- ggplot(df)
gg + geom_point(aes(x = CO.Mean, y = NO2.Mean, color = City), size = .05) +
ggtitle('NO2 Pollution Levels in Arizona')
gg + geom_point(aes(x = Date.Local, y = NO2.Mean, color = City), size = .05) +
ggtitle('NO2 Pollution Levels in Arizona') +
geom_smooth(aes(x = Date.Local, y = NO2.Mean, color = City), formula = y ~ sin(x) + x)
gg + geom_col(aes(x = month, y = NO2.Mean, fill = City), position = 'dodge') +
ggtitle('NO2 Levels in AZ by Month')
gg + geom_boxplot(aes(x = City, y = NO2.Mean))
# Correlation of the different pollutants: CORRELATION DOESN'T CAPTURE SINE WAVES!
pollutantsonly = df %>% select('CO.AQI','O3.AQI','NO2.AQI','SO2.AQI') %>% filter(complete.cases(.))
corrplot(cor(pollutantsonly), order = "hclust")
```