-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcampaign_5_eug_ts.r
123 lines (92 loc) · 2.44 KB
/
campaign_5_eug_ts.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
install.packages("forecast")
require(gbm)
require(tseries)
require(forecast)
rm(list=ls())
crossvalidate <- function(Ysimulated, Yreal) {
#pick every 10th row
i <- 0
rmsle <- 0
while(i < nrow(Ysimulated)){
for(j in 1:ncol(Ysimulated)){
rmsle <- rmsle + log(Ysimulated[i,j] + 1) - log(Yreal[i,j] + 1)
i <- i + 10
}
}
n = nrow(Ysimulated) * ncol(Ysimulated)
rmsle <- sqrt(rmsle)/n
return (rmsle)
}
#setup data
setwd("C:\\Projects\\R")
data <- read.table(file="TrainingDataset.csv",header=TRUE, sep=",")
testData <- read.table(file="TestDataset.csv",header=TRUE, sep=",")
X <- data[,13:30]
Y <- data[,1:12]
Y <- log(Y)
Y[is.na(Y)] <- 0.0
Xtest <- testData[,2:19]
Ytestrows <- nrow(Xtest)
Ytest <- matrix(nrow = Ytestrows , ncol = 13)
Ytest[,1] <- 2.0
Ymonthlysales <- rep(NA, 10)
#Y - labels as sum of all month sales
for(i in 1:ncol(Y))
{
Ymonthlysales[i] <- log(sum(data[,i],na.rm=TRUE))
}
Box.test(ts(Ymonthlysales))
fit <- Arima(Ymonthlysales, order=c(3,0,1), seasonal=list(order=c(0,1,1), period=12),
include.drift=TRUE, lambda=0, method="ML")
m <- forecast(auto.arima(Ymonthlysales), h=12)
plot(m)
m$model
m$residuals
fit <- ets(Ymonthlysales)
plot(Ymonthlysales)
lines(simulate(fit, 12),col="red")
predict(arima(Ymonthlysales, order=c(0,2,0)))
y.acf <- acf(Ymonthlysales, lag.max = NULL,
type = "correlation",
plot = TRUE, na.action = na.fail, demean = TRUE)
y.pacf <- pacf(ts(Ymonthlysales))
#estimate time series from totals
Ycorr <- ar(Ymonthlysales)
r.arima <- arima(ts(Ymonthlysales), order=c(1,1,1), method="ML")
r.arima
ts.plot(arima.sim(n=12,model=r.arima))
Ypr <- predict(r.arima, n.ahead = 12)
Ymonthlyts <- ar(Ymonthlysales)
Ymonthlyts$ar
Ypr <- predict(Ymonthlyts, n.ahead = 11)
nc1 <- ncol(X)
d1 <- dimnames(X)[[2]]
#Y - labels as sum of all month sales
for(i in 1:nrow(data))
{
Y[i] <- log(sum(data[i,1:12],na.rm=TRUE))
}
#cleanup data - factor variables are still problematic on prediction
idxCat <- c(1,18)
for(i in 1:length(idxCat)) {
v <- as.factor(X[,idxCat[i]])
X[,idxCat[i]] <- v
v <- as.factor(Xtest[,idxCat[i]])
Xtest[,idxCat[i]] <- v
}
for(i in 1:16){
v <- is.nan(X[,i])
if(sum(v)>0)
{
meanx <- mean(X[!v,i])
X[v,i] <- meanx
X <- cbind(X,as.factor(v))
}
else
{
X <- cbind(X, as.factor(FALSE))
}
}
#fix up the labels of the new columns
newCols <- paste("V",1:(ncol(X)-nc1),sep="")
dimnames(X)[[2]] <- c(d1,newCols)