-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDecision Trees CH 8.R
118 lines (87 loc) · 2.9 KB
/
Decision Trees CH 8.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
##8.3 Lab: Decision Trees
#8.3.1 Fitting Classification Trees
library(tree)
library(ISLR)
attach(Carseats)
High<-ifelse(Sales<=8,"No","Yes")
Carseat<-data.frame(Carseats,High)
tree.carseats<-tree(~.,-Sales,Carseats)
summary(tree.carseats)
plot(tree.carseats)
text(tree.carseats,pretty=0)
tree.carseats
set.seeds(2)
train<-sample(1:nrow(Carseats),200)
Carseats.test<-Carseats[-train,]
High.test<-High[-train]
tree.carseats<-tree(High~.-Sales,Carseats,subset=train)
tree.pred<-predict(tree.carseats,Carseats.test,type="class")
table(tree.pred,High.test)
(86+57)/200
set.seed(3)
cv.carseats<-cv.tree(tree.carseats,FUN=prune.misclass)
names(cv.carseats)
par(mfrow=c(1,2))
plot(cv.carseats$size,cv.carseats$dev,type="b")
plot(cv.carseats$k,cv.carseats$dev,type="b")
prune.carseats<-prune.misclass(tree.carseats,best=9)
plot(prune.carseats)
text(prune.carseats,pretty=0)
tree.pred<-predict(prun.carseats,Carseats.test,type="class")
table(tree.pred,High.test)
(94+60)/200
prune.carseats<-prune.misclass(tree.carseats,best=15)
plot(prune.carseats)
text(prune.carseats,pretty=0)
tree.pred<-predict(prune.carseats,Carseats.test,type="class")
table(tree.pred,High.test)
(86+62)/200
#8.3.3 Fitting Regression Trees
library(MASS)
set.seed(1)
train<-sample(1:nrow(Boston),nrow(Boston)/2)
tree.boston<-tree(medv~.,Boston,subset=train)
summary(tree.boston)
plot(tree.boston)
text(tree.boston,pretty=0)
cv.boston=cv.tree(tree.boston)
plot(cv.boston$size,cv.boston$dev,type="b")
prune.boston<-prune.tree(tree.boston,best=5)
plot(prune.boston)
text(prune.boston,pretty=0)
yhat<-predict(tree.boston,newdata=Boston[-train,])
boston.test<-Boston[-train,"medv"]
plot(yhat,boston.test)
abline(0,1)
mean((yhat-boston.test)^2)
#8.3.3 Bagging and Random Forests
library(randomForest)
set.seed(1)
bag.boston<-randomForest(medv~.,data=Boston,subset=train,mtry=13,importance=TRUE)
bag.boston
yhat.bag<-predict(bag.boston,newdata=Boston[-train,])
plot(yhat.bag,boston.test)
abline(0,1)
mean((yhat.bag-boston.test)^2)
bag.boston<-randomForest(medv~.,data=Boston,subset=train,mtry=13,ntree=25)
yhat.bag<-predict(bag.boston,newdata=Boston[-train,])
mean((yhat.bag-boston.test)^2)
set.seed(1)
rf.boston<-randomForest(medv~.,data=Boston,subset=train,mtry=6,importance=TRUE)
yhat.rf<-predict(rf.boston,newdata=Boston[-train,])
mean((yhat.rf-boston.test)^2)
importance(rf.boston)
varImpPlot(rf.boston)
#8.3.4 Boosting
library(gbm)
set.seed(1)
boost.boston<-gbm(medv~.,data=Boston[train,],distribution="gaussian",n.trees=5000,interaction.depth=4)
summary(boost.boston)
par(mfrow=c(1,2))
plot(boost.boston,i="rm")
plot(boost.boston,i="lstat")
yhat.boost<-predict(boost.boston,newdata=Boston[-train,],n.trees=5000)
mean((yhat.boost-boston.test)^2)
boost.boston<-gbm(medv~.,data=Boston[train,],distribution="gaussian",n.trees=5000,interaction.depth=4,shrinkage=0.2,verbose=F)
yhat.boost<-predict(boost.boston,newdata=Boston[-train,],n.trees=5000)
mean((yhat.boost-boston.test)^2)