-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathR Workshop Script - Intro.R
196 lines (160 loc) · 5.96 KB
/
R Workshop Script - Intro.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
###########################################################################################
#Script for the R workshop held in conjunction with the OBBN Annual Biomonitoring Meeting
#January 18 2017
#
#Part 1: Introdution to R
#Prepared by: Patrick Schaefer ([email protected])
###########################################################################################
###########################################################################################
#Prologue
#
#R is an interprative programming language consisting of objects and functions. Objects are created
#by the user or by functions:
a<-NA
a<-"Patrick"
#Here I created a blank object "a", then assigned it to represent the characters "Patrick".
#The "<-" or "=" notation is used to assign something to an object and can create the object in the same process.
b=" is a stats wizard?"
#Here b is created and assigned in a single line.
#
#Calling the object a or b is done by typing the object name in the console below. It can also be called
#by highlighting the object in the script and pressing ctrl-r, or clicking the cursor on the line containing
#the object and pressing ctrl-r.
a
b
#
#A function is an object that is assigned arguments and will do something with them. Functions accept arguments
#in parenthesis i.e. do.something(). The paste() function will print the assigned values of all objects you give it:
paste(a, b)
#
#To learn more about any function in R, simply type the function behind a question mark, without parentheses
?paste
#
#Function outputs can be saved to objects as well and run later to retrieve the values:
c<-paste(a, b)
c
#
#Functions can be called as a arguments to be fed to other functions for more complex commands:
paste(a, gsub(pattern="stats wizard", replace="nerd", x=b))
#Here, the function gsub() replaced the "stats wizard" text with "nerd" in the object b, within the paste() function.
#Note that b was not permanently altered:
b
#
#There are thousands (maybe millions) of functions that have been created by users and are freely available.
###########################################################################################
###########################################################################################
#Section 1 - Installing and loading packages, Loading Data, Data Types
#
###########################################################################################
#Download packages
install.packages(c("vegan","lme4","MuMIn","devtools"))
#Load packages into memory
library(vegan)
library(lme4)
library(MuMIn)
library(devtools)
#Loading data from a .csv file
data1<-read.csv(choose.files())
data1<-read.csv("file path")
data1<-read.delim("clipboard")
#Shortcut: data1<-data("data1")
#View the data
View(data1)
#Look at the structure of the data
str(data1)
#A factor - type vector contains a set of numeric codes with character-valued levels.
data1$Site
data1$Site[1]
summary(data1$Site)
#Character/string – each element in the vector is a string of one or more characters.
as.character(data1$Site)
###########################################################################################
#Section 2 - Selecting, viewing and manipulating data, logical statements
#
###########################################################################################
#Selecting a column
data1[,1]
data1[,c(1:3,5)]
data1[,"Richness"]
data1[,c("Richness","HBI")]
data1$Richness
#Selecting data by rows
data1[1:4,]
#Using logical statements
data1$Site == "Site A" # "==" a logical statement - returns T/F vector
data1$Site != "Site A" # "!=" a logical statement - returns T/F vector
data1$MonYr %in% c(2,3) # "%in%" a logical statement - returns T/F vector
#Selecting data by logical vectors
data1[data1$Site =="Site A",]
data1[data1$MonYr %in% c(2,3),]
data1[data1$Site =="Site A" | data1$Site =="Site B",]
data1[data1$Site =="Site A" & data1$MonYr %in% c(2,3),]
#Add a new column to the dataset where year is treated as a factor
data1$MonYr.factor<-as.factor(data1$MonYr)
#Add a new column to the dataset where year is treated as a character vector
data1$MonYr.char<-as.character(data1$MonYr)
str(data1)
View(data1)
#cross-tab counts
table(data1$Site,data1$MonYr)
###########################################################################################
#Section 3 - Data summaries and plots
#
###########################################################################################
#Data summaries
summary(data1)
min(data1$Richness)
max(data1$Richness)
median(data1$Richness)
#Simple xy plot
plot(x=data1$Richness, y=data1$HBI)
plot(Richness~HBI, data=data1)
plot(x=data1$Richness, y=data1$HBI,
main="Richness vs. HBI",
xlab="Richness",
ylab="HBI",
pch=19,
cex=0.5)
#add a linear trend line
abline(lm(data1$HBI~data1$Richness))
#Histogram
hist(data1$HBI,
main="HBI Frequency")
#Boxplot
boxplot(data1$HBI~data1$Site,
main="HBI by Site")
#Using boxplots to identify outliers
outliers<-boxplot(data1$HBI~data1$Site)
outliers
?boxplot
names(outliers)
#Multipanel plotting (easy version)
lattice::xyplot(Richness~MonYr|Site, data=data1, type=c("p","r"))
##########################################################################################
#Section 4 - Loops
#
##########################################################################################
#Common structure of a loop
for (i in 1:10) {
print(i)
}
#Can run multiple lines of code in a loop
for (i in 1:10) {
print(i)
print (i*i)
}
#i can take numeric or character form
for (n in c("a","b","c")){
print(n)
}
output<-data.frame(matrix(nrow=length(unique(data1$Site)), ncol=3))
rownames(output)<-unique(data1$Site)
colnames(output)<-c("Mean","Max","SD")
for (i in unique(data1$Site)){
output[i,"Mean"]<-mean(data1$HBI[data1$Site==i])
output[i,"Max"]<-max(data1$HBI[data1$Site==i])
output[i,"SD"]<-sd(data1$HBI[data1$Site==i])
}
#Write a loop that creates site grouped boxplots for each metric
#i.e. >boxplot(data1$HBI~data1$Site,main="HBI by Site")
#Bonus points for correctly labeling axis and main title