-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagents.py
104 lines (80 loc) · 3 KB
/
agents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
class greedy:
def __init__(self, n_adds):
self.n_adds = n_adds
self.scores = np.full((n_adds), np.Inf)
def get_optimal(self):
return np.argmax(self.scores)+1
def update_score(self, add, score):
add-=1
try:
assert add>=0 and add<self.n_adds
if self.scores[add] == np.Inf:
self.scores[add] = score
else:
self.scores[add] += score
except Exception:
print("The add no. exceeds the add limit")
def get_total_score(self):
return np.sum(self.scores)
def print_scores(self):
print("The scores are as follows\n")
for add in range(1, self.n_adds+1):
print(f"Add {add}: ", self.scores[add-1])
class greedy_epsilon:
def __init__(self, n_adds, exp):
self.exp = exp
self.n_adds = n_adds
self.scores = np.full((n_adds), np.Inf)
def get_optimal(self):
rnd = np.random.random()
if(rnd<self.exp):
return np.random.randint(0, self.n_adds)+1
else:
return np.argmax(self.scores)+1
def update_score(self, add, score):
add-=1
try:
assert add>=0 and add<self.n_adds
if self.scores[add] == np.Inf:
self.scores[add] = score
else:
self.scores[add] += score
except Exception:
print("The add no. exceeds the add limit")
def get_total_score(self):
return np.sum(self.scores)
def print_scores(self):
print("The scores are as follows\n")
for add in range(1, self.n_adds+1):
print(f"Add {add}: ", self.scores[add-1])
class upper_confidence_bound:
def __init__(self, n_adds):
self.n_adds = n_adds
self.scores = np.zeros((n_adds))
self.selections = np.zeros((n_adds))
self.turn = 0;
def get_optimal(self):
upper_bounds = ([np.inf if self.selections[i] == 0 else
self.scores[i]/self.selections[i] +
np.sqrt(3/2 * np.log(self.turn + 1) / self.selections[i])
for i in range(self.n_adds)])
return np.argmax(upper_bounds)+1
def update_score(self, add, score):
add-=1
try:
assert add>=0 and add<self.n_adds
if self.scores[add] == np.Inf:
self.scores[add] = score
else:
self.scores[add]+=score
self.turn+=1
self.selections[add]+=1
except Exception:
print("The add no. exceeds the add limit")
def get_total_score(self):
return np.sum(self.scores)
def print_scores(self):
print("The scores are as follows\n")
for add in range(1, self.n_adds+1):
print(f"Add {add}: ", self.scores[add-1])