-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathllm_eval.py
109 lines (75 loc) · 4.45 KB
/
llm_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
os.environ["OPENAI_API_KEY"] = "***"
os.environ["OPENAI_BASE_URL"] = "***"
# always remember to put these lines at the top of your code if you are using clash
# os.environ["http_proxy"] = "http://127.0.0.1:7890"
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
# os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
import json
from eval_helper.get_evaluation import get_evaluation
from agentverse.agentverse import AgentVerse
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("--config", type=str, default="config.yaml")
parser.add_argument("--reverse_input", default=False, action="store_true")
args = parser.parse_args()
agentverse, args_data_path, args_output_dir = AgentVerse.from_task(args.config)
print(args)
os.makedirs(args_output_dir, exist_ok=True)
with open(os.path.join(args_output_dir, "args.txt"), "w") as f:
f.writelines(str(args))
# uncomment this line if you don't want to overwrite your output_dir
# if os.path.exists(args_output_dir) and len(os.listdir(args_output_dir)) > 1 :
#
# raise ValueError("the output_dir is not empty, check if is expected.")
with open(args_data_path) as f:
data = json.load(f)
if "faireval" in args_data_path:
pair_comparison_output = []
for num, ins in enumerate(data[:80]):
print(f"================================instance {num}====================================")
# reassign the text to agents, and set final_prompt to null for debate at first round
for agent_id in range(len(agentverse.agents)):
agentverse.agents[agent_id].source_text = ins["question"]
if args.reverse_input:
agentverse.agents[agent_id].compared_text_one = ins["response"]["vicuna"]
agentverse.agents[agent_id].compared_text_two = ins["response"]["gpt35"]
else:
agentverse.agents[agent_id].compared_text_one = ins["response"]["gpt35"]
agentverse.agents[agent_id].compared_text_two = ins["response"]["vicuna"]
agentverse.agents[agent_id].final_prompt = ""
agentverse.run()
evaluation = get_evaluation(setting="every_agent", messages=agentverse.agents[0].memory.messages, agent_nums=len(agentverse.agents))
pair_comparison_output.append({"question": ins["question"],
"response": {"gpt35": ins["response"]["gpt35"],
"vicuna": ins["response"]["vicuna"]},
"evaluation": evaluation})
os.makedirs(args_output_dir, exist_ok=True)
with open(os.path.join(args_output_dir, "pair_comparison_results.json"), "w") as f:
json.dump(pair_comparison_output, f, indent=4)
# with open(os.path.join(args_output_dir, "gt_origin_results.json"), "w") as f:
# json.dump(gt_origin_output, f, indent=4)
elif "adversarial" in args_data_path:
pair_comparison_output = []
for num, ins in enumerate(data):
print(f"================================instance {num}====================================")
# reassign the text to agents, and set final_prompt to null for debate at first round
for agent_id in range(len(agentverse.agents)):
agentverse.agents[agent_id].source_text = ins["question"]
if args.reverse_input:
agentverse.agents[agent_id].compared_text_one = ins["response"]["output_2"]
agentverse.agents[agent_id].compared_text_two = ins["response"]["output_1"]
else:
agentverse.agents[agent_id].compared_text_one = ins["response"]["output_1"]
agentverse.agents[agent_id].compared_text_two = ins["response"]["output_2"]
agentverse.agents[agent_id].final_prompt = ""
agentverse.run()
evaluation = get_evaluation(setting="every_agent", messages=agentverse.agents[0].memory.messages,
agent_nums=len(agentverse.agents))
pair_comparison_output.append({"question": ins["question"],
"response": {"output_1": ins["response"]["output_1"],
"output_2": ins["response"]["output_2"]},
"evaluation": evaluation})
os.makedirs(args_output_dir, exist_ok=True)
with open(os.path.join(args_output_dir, "pair_comparison_results.json"), "w") as f:
json.dump(pair_comparison_output, f, indent=4)