-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcal_cos_similarity,py
51 lines (39 loc) · 1.56 KB
/
cal_cos_similarity,py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import json
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def calculate_cosine_similarity(model_answer, human_label):
# load pre-trained sentence-transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# text embedding
answer_embedding = model.encode([model_answer])
label_embedding = model.encode([human_label])
# cosine similarity
similarity = cosine_similarity(answer_embedding, label_embedding)[0][0]
return similarity
def process_results(json_file):
# read JSON file
with open(json_file, 'r') as f:
results = json.load(f)
similarities = []
if isinstance(results, dict):
results = [results]
# process each question
for result in results:
model_answer = result['answer']
human_label = result['labeled reason']
# similarity
similarity = calculate_cosine_similarity(model_answer, human_label)
similarities.append({
'question_id': result['question id'],
'similarity': similarity
})
# average similarity
avg_similarity = np.mean([s['similarity'] for s in similarities])
return similarities, avg_similarity
if __name__ == "__main__":
json_file = "results.json"
similarities, avg_similarity = process_results(json_file)
print(f"平均余弦相似度: {avg_similarity:.4f}")
for sim in similarities:
print(f"问题 ID {sim['question_id']}: 相似度 = {sim['similarity']:.4f}")