-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathutils.py
89 lines (74 loc) · 3.59 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import logging
import os
import numpy as np
from crawler import Crawler
logger = logging.getLogger(__name__)
def log_time(elapsed_time: int, remaining_time: int) -> None:
remaining_time_hours, remaining_time_remainder = divmod(remaining_time, 3600)
remaining_time_minutes, remaining_time_seconds = divmod(remaining_time_remainder, 60)
if remaining_time_hours > 0:
remaining_time_str = f"{int(remaining_time_hours)}h {int(remaining_time_minutes)}m {int(remaining_time_seconds)}s"
elif remaining_time_minutes > 0:
remaining_time_str = f"{int(remaining_time_minutes)}m {int(remaining_time_seconds)}s"
else:
remaining_time_str = f"{int(remaining_time_seconds)}s"
logger.info(
f"Elapsed time this iter: {elapsed_time:.2f}s, remaining time: {remaining_time_str}"
)
def eval_and_plot(args, crawler: Crawler) -> None:
import scipy.stats as stats
from matplotlib import pyplot as plt
logger.info("Initializing seed docs")
docids = []
if args.seed_docs_file is None:
raise ValueError("Seed docs file must be provided")
with open(args.seed_docs_file, "r") as fin:
for line in fin:
docids.append(line.strip())
crawler.wandb_logger = None
results = crawler.get_scores_for_docs(docids)
# Plot correlations between different rating methods
annotation_labels = args.plots
num_raters = len(annotation_labels)
fig, axes = plt.subplots(num_raters, num_raters, figsize=(18, 18))
fig.suptitle("Correlations between different rating methods")
for i, rater1 in enumerate(annotation_labels):
for j, rater2 in enumerate(annotation_labels):
ax = axes[i, j]
if i == j:
# Plot score distribution
scores = [doc.annotations[rater1] for doc in results]
ax.hist(scores, bins=100, density=True, alpha=0.6, color="g")
if "normalized" in rater1:
ax.set_xlim(-3, 3)
ax.set_xlabel(rater1)
ax.set_ylabel("Percentage (%)")
ax.set_title(f"{rater1} Score Distribution")
else:
rater1_name = rater1
rater2_name = rater2
rater1_scores = [doc.annotations[rater1_name] for doc in results]
rater2_scores = [doc.annotations[rater2_name] for doc in results]
corr, _ = stats.spearmanr(rater1_scores, rater2_scores)
logger.info(f"{rater1_name} vs {rater2_name}, spearman corr: {corr}")
ax.scatter(rater1_scores, rater2_scores, label="Data points")
# Fit line
m, b = np.polyfit(rater1_scores, rater2_scores, 1)
ax.plot(
rater1_scores, np.array(rater1_scores) * m + b, color="red", label="Fit line"
)
# Calculate mean y value for each bin
bins = np.linspace(min(rater1_scores), max(rater1_scores), 100)
bin_means = stats.binned_statistic(
rater1_scores, rater2_scores, statistic="mean", bins=bins
)[0]
bin_centers = (bins[:-1] + bins[1:]) / 2
ax.plot(bin_centers, bin_means, color="blue", label="Mean values")
ax.set_xlabel(rater1_name)
ax.set_ylabel(rater2_name)
if rater2_name == "length":
ax.set_ylim(0, 30000)
ax.set_title(f"Spearman corr: {corr:.2f}")
ax.legend()
# plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig(os.path.join(args.output_dir, "correlations.png"))