-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHeatmap.R
107 lines (68 loc) · 3.22 KB
/
Heatmap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
## HEATMAP - use it with the 'example_heatmapdata.csv'
# read data
data <- read.csv(choose.files(), sep = ",")
# autoscaling
## package mdatools - install only the first time to use
install.packages("mdatools")
library(mdatools)
## only numeric columns - excluded the label and the samples columns
# 26 is the max number of features. If you have more, change that
datanorm <- data[,3:26]
datanorm <- prep.autoscale(datanorm, center = T, scale = T)
## graphs of normalization
### the lines starting with png will save the following graph on your WORKING directory (getwd() to check)
### You won't be able to see it first. If you wish, run the line below the "png" first,
### check the plot and then run the three lines to save the plot (png, boxplot, dev.off)
png("boxplot_norm.png",width = 480, height = 480, units = "px")
boxplot(datanorm, main = "Mean centered and standardized")
dev.off()
###separate data for the following plots
datab <- as.matrix(data[3:26])
dataa <- datanorm
### same as before, the png line will only save the plots, you won't be able to see it
png("Before_After.png", height = 480, width = 980 )
par(mfrow = c(1,2))
plot(density(datab), main = "Before Normalization", lwd=3 )
plot(density(datanorm), main = "After Normalization", lwd=3)
dev.off()
#brings the two columns back
datanorm<-as.data.frame(datanorm)
#can be done mannually until line 55
#using the [] garantees the name is exactly the same from the original data frame (data)
datanormnames <- cbind(data["X"], data["label"], datanorm)
#creates a df with the labels, so later we can substitute F... for month name
month <- c("Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May")
label <- c("F01", "F02", "F03", "F04", "F05", "F06", "F07", "F08", "F09", "F10", "F11", "F12")
label <- cbind(label, month)
#changes F... for the month names and brings it to the first column
library(dplyr)
datanormlabeled <- merge(datanormnames, label, by.y= "label", all.x = TRUE)
datanormlabeled <- select(datanormlabeled, last_col(), everything())
#takes out the extgra column with F...
datanormlabeled2<- datanormlabeled[ , c(1,3, 4:27)]
# mean for each feature, per month, for all individuals
# split por month. It looks the same as the other dataframe, but it is splitted for
# operations
colnames(datanormlabeled2) <- c("month", "sample",letters[1:24])
datanormlabeled2$month<-as.factor(datanormlabeled2$month) #?
meses <- group_by(datanormlabeled2, datanormlabeled2$month)
#media por feature
summarise(meses, a=mean(a)) #loop through all columns? try with one column first
#loop throught letters columns dataframe
for (i in 3:length(meses)){
letters[1:24]<- summarise(meses, i = mean(i))
}
for(i in 1:length(splitted)) {
assign(paste0("F", i), splitted[[i]])
}
for (i in F) {
}
mean <- as.data.frame(apply(F1, 2, mean))
f1 <- as.data.frame(splitted$F01)
mean
ggplot(heatmap, aes(x = label, y = features, fill = )) +
geom_tile()
"2E-Hexenal","α-Thujene","α-Pinene","Thuja-2,4 10-diene",
"Sabinene","UF","1-Octen-3-ol","3-Octanone","Myrcene","3-Octanol","3Z-Hexenyl acetate",
"δ-3-Carene","o-Cymene","UF1","Limonene","E-β-Ocimene","α-Ocimene","α-Cubebene","α-Copaene",
"β-Bourbonene","β-Cubebene","E-Caryophyllene","Germacrene D","UF3"