-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhedge.py
80 lines (62 loc) · 2.65 KB
/
hedge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
class Hedge:
def __init__(self, n_actions, learning_rate=0.5):
self.n_actions = n_actions
self.learning_rate = learning_rate
self.weights = np.ones(n_actions) / n_actions
def predict(self):
return np.random.choice(self.n_actions, p=self.weights)
def update(self, loss_vector):
self.weights *= np.exp(-self.learning_rate * loss_vector)
ee = self.learning_rate * loss_vector
self.weights /= np.sum(self.weights)
class OnlineGradientDescent:
def __init__(self, n_actions, learning_rate=0.5):
self.n_actions = n_actions
self.learning_rate = learning_rate
self.weights = np.ones(n_actions) / n_actions
def predict(self):
return np.random.choice(self.n_actions, p=self.weights)
def update(self, loss_vector):
self.weights -= self.learning_rate * loss_vector
self.weights = self.project(self.weights)
def project(self, weights):
"""Project weights onto the simplex."""
if np.sum(weights) <= 1 and np.alltrue(weights >= 0):
return weights # already a probability distribution
u = np.sort(weights)[::-1]
cssv = np.cumsum(u) - 1.0
ind = np.arange(self.n_actions) + 1
cond = u - cssv / ind > 0
rho = ind[cond][-1]
theta = cssv[cond][-1] / float(rho)
return np.maximum(weights - theta, 0)
class MLProd:
def __init__(self, n_actions, learning_rate):
self.n_actions = n_actions
if np.isscalar(learning_rate):
self.learning_rate = np.full(n_actions, learning_rate)
else:
assert (
len(learning_rate) == n_actions
), "Need a learning rate for each action"
self.learning_rate = np.array(learning_rate)
self.w = np.ones(n_actions) / n_actions
self.weights = self.w
def predict(self):
return np.random.choice(self.n_actions, p=self.weights)
def update(self, loss_vector):
loss_vector = np.array(loss_vector)
expected_loss = np.dot(self.weights, loss_vector)
self.w *= np.clip(1 - self.learning_rate * loss_vector, 1e-8, None)
self.w /= np.sum(self.w)
self.weights = self.w
class OptimisticHedge(Hedge):
def __init__(self, n_actions, learning_rate=0.5, optimism=0.5):
super().__init__(n_actions, learning_rate)
self.last_loss_vector = 0
def update(self, loss_vector):
adjusted_loss = 2 * loss_vector - self.last_loss_vector
self.weights *= np.exp(-self.learning_rate * adjusted_loss)
self.weights /= np.sum(self.weights)
self.last_loss_vector = loss_vector