From 9caf6443cc467e59a61060c9b36dde75644c2322 Mon Sep 17 00:00:00 2001 From: Tim Date: Thu, 12 Sep 2024 17:00:46 +0800 Subject: [PATCH 1/4] Update eval_method.py abort subsequent evaluators for invalid gen_df --- rdagent/components/benchmark/eval_method.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py index c94a0e3e4..983bbe2fe 100644 --- a/rdagent/components/benchmark/eval_method.py +++ b/rdagent/components/benchmark/eval_method.py @@ -99,6 +99,16 @@ def eval_case( for each item If the evaluation run successfully, return the evaluate results. Otherwise, return the exception. """ + if not case_gen: + return Exception("invalid implementation") + execution_feedback, gen_df = case_gen.execute() + if (isinstance(gen_df, pd.DataFrame) and gen_df.empty) or (not isinstance(gen_df, pd.DataFrame) and not gen_df): + execution_feedback = re.sub(r"(?<=\D)(,\s+-?\d+\.\d+){50,}(?=\D)", ", ", execution_feedback) + execution_feedback = "\n".join( + [line for line in execution_feedback.split("\n") if "warning" not in line.lower()] + ) + return Exception(execution_feedback) + eval_res = [] for ev in self.evaluator_l: try: From b8aa7f3d523779122a9946079e70d39c6e714af0 Mon Sep 17 00:00:00 2001 From: Tim Date: Thu, 12 Sep 2024 18:36:41 +0800 Subject: [PATCH 2/4] add AttributeError --- rdagent/components/benchmark/eval_method.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py index 983bbe2fe..4ca9afe5a 100644 --- a/rdagent/components/benchmark/eval_method.py +++ b/rdagent/components/benchmark/eval_method.py @@ -114,7 +114,7 @@ def eval_case( try: eval_res.append((ev, ev.evaluate(implementation=case_gen, gt_implementation=case_gt))) # if the corr ev is successfully evaluated and achieve the best performance, then break - except CoderError as e: + except (CoderError, AttributeError) as e: return e except Exception as e: # exception when evaluation From b14e20f67a526b521ea94329c15383d5cffcf149 Mon Sep 17 00:00:00 2001 From: Tim Date: Thu, 12 Sep 2024 18:38:22 +0800 Subject: [PATCH 3/4] add raise_exception for eval_method --- rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py b/rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py index 5f080f80e..5e04a0d4b 100644 --- a/rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py +++ b/rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py @@ -55,10 +55,9 @@ def load(self, json_file_path: Path) -> list: factor_formulation=factor_data["formulation"], variables=factor_data["variables"], ) - gt = FactorFBWorkspace(task) + gt = FactorFBWorkspace(task, raise_exception=True) code = {"factor.py": factor_data["gt_code"]} gt.inject_code(**code) - gt.execute() TestData.target_task.sub_tasks.append(task) TestData.ground_truth.append(gt) From a8ddd593aae4535f485876cd2a6f6431f7b7716b Mon Sep 17 00:00:00 2001 From: Tim Date: Thu, 12 Sep 2024 18:40:38 +0800 Subject: [PATCH 4/4] remove custom exception catch --- rdagent/components/benchmark/eval_method.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py index 4ca9afe5a..3bf998b6d 100644 --- a/rdagent/components/benchmark/eval_method.py +++ b/rdagent/components/benchmark/eval_method.py @@ -99,16 +99,6 @@ def eval_case( for each item If the evaluation run successfully, return the evaluate results. Otherwise, return the exception. """ - if not case_gen: - return Exception("invalid implementation") - execution_feedback, gen_df = case_gen.execute() - if (isinstance(gen_df, pd.DataFrame) and gen_df.empty) or (not isinstance(gen_df, pd.DataFrame) and not gen_df): - execution_feedback = re.sub(r"(?<=\D)(,\s+-?\d+\.\d+){50,}(?=\D)", ", ", execution_feedback) - execution_feedback = "\n".join( - [line for line in execution_feedback.split("\n") if "warning" not in line.lower()] - ) - return Exception(execution_feedback) - eval_res = [] for ev in self.evaluator_l: try: