-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLinear.py
127 lines (112 loc) · 3.36 KB
/
Linear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
class Linear():
def __init__(self, model='gradient_descent', alpha=0.01, maxEpoch=1000, E=0.01):
self.model = model
self.alpha = alpha
self.maxEpoch = maxEpoch
self.E = E
self.coef_ = []
self.intercept_ = 0
self.row = 0
self.col = 0
self.history = {'loss': []}
def fit(self, X, y):
"""
训练模型
:param X:
:param y:
:return:
"""
self.X = np.asarray(X)
self.y = np.asarray(y).reshape((-1, 1))
if self.model == 'gradient_descent':
self.gradient_descent()
elif self.model == 'least_squares':
self.least_squares()
def least_squares(self):
"""
最小二乘法
:return:
"""
def gradient_descent(self):
"""
梯度下降
:return:
"""
self.row, self.col = self.X.shape
self.col += 1
# 在源数据前面添加数值全为1的一列
X = np.column_stack((np.ones((self.row, 1)), self.X))
# 初始化权重
Wper = np.random.normal(size=(self.col, 1)) # 记录更新前的权重
self.W = Wper - self.alpha / self.row * np.dot(X.T, (np.dot(X, Wper) - self.y))
count = 0
while abs(np.mean(abs(self.W - Wper)) - self.E) >= 0 and count < self.maxEpoch:
Wper = np.copy(self.W)
self.W = Wper - self.alpha / self.row * np.dot(X.T, (np.dot(X, Wper) - self.y))
count += 1
y_ = np.dot(X, self.W)
loss = self.loss(self.y, y_)
print(f'Epoch={count}, loss={loss}')
self.history['loss'].append(loss)
self.coef_ = self.W[1:, 0]
self.intercept_ = self.W[0, 0]
def predict(self, x):
"""
预测数据属于哪个类别
:return:
"""
x = np.array(x)
row, col = x.shape
if col != self.col - 1:
print('({}, {}) cannot ({}, {})'.format(row, col, -1, self.col - 1))
exit(-1)
x = np.column_stack((np.ones((row, 1)), x))
y_ = np.dot(x, self.W)
return y_
def getEquation(self):
"""
获取多元线性回归方程
:return: string
"""
s = 'f='
for i, elem in enumerate(self.coef_):
s += '{} * x{} + '.format(round(elem, 4), i)
s += '{}'.format(round(self.intercept_, 4))
return s
def loss(self, y, y_):
"""
求RMSE
:param y:
:param y_:
:return:
"""
print(np.mean((y-y_)**2))
return np.mean((y-y_)**2)
if __name__ == '__main__':
# 获取数据
from sklearn import datasets
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
import matplotlib
font = {'family': 'MicroSoft YaHei',
'weight': 'bold',
'size': '7'}
matplotlib.rc("font", **font)
iris = datasets.load_iris()
x = iris.data
y = iris.target
Model = Linear()
Model.fit(x, y)
Model.getEquation()
y_ = Model.predict(x)
y_ = np.around(y_) # 四舍五入
# 显示loss
acc = accuracy_score(y, y_)
print(f'准确率为:{round(acc, 2)}')
loss = Model.history['loss']
x = range(len(loss))
plt.plot(x, loss)
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.show()