-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompute_accuracy.py
48 lines (45 loc) · 1.64 KB
/
compute_accuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import operator
from typing import List
def load_dataset(path) -> List[tuple[List[str], str]]:
result = []
for file in os.listdir(path):
if file.endswith('.txt'):
with open(path + '/' + file) as f:
cluster_reading = True
cluster = []
outliers = []
for line in f:
line = line.rstrip()
if len(line) > 0:
if not line.startswith('#'):
if cluster_reading:
cluster.append(line)
else:
outliers.append(line)
else:
cluster_reading = False
for outlier in outliers:
result.append((cluster, outlier))
return result
def compute_accuracy(sim_fun, log_failed_cases=True):
dataset = load_dataset('evaluation/outlier-detection')
similarities = {}
correct_guesses = 0
total_guesses = 0
for sample in dataset:
cluster, outlier = sample
items = cluster + [outlier]
compactness = {}
for item1 in items:
compactness[item1] = 0.0
for item2 in items:
if item1 != item2:
compactness[item1] += sim_fun(item1, item2)
least_compact = sorted(compactness.items(), key=operator.itemgetter(1))[0][0]
if least_compact == outlier:
correct_guesses += 1
elif log_failed_cases:
print(f"Failed {cluster} / {outlier}")
total_guesses += 1
return correct_guesses / total_guesses