From 9caf6443cc467e59a61060c9b36dde75644c2322 Mon Sep 17 00:00:00 2001
From: Tim <illking@foxmail.com>
Date: Thu, 12 Sep 2024 17:00:46 +0800
Subject: [PATCH 1/4] Update eval_method.py

abort subsequent evaluators for invalid gen_df
---
 rdagent/components/benchmark/eval_method.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py
index c94a0e3e4..983bbe2fe 100644
--- a/rdagent/components/benchmark/eval_method.py
+++ b/rdagent/components/benchmark/eval_method.py
@@ -99,6 +99,16 @@ def eval_case(
             for each item
                 If the evaluation run successfully, return the evaluate results.  Otherwise, return the exception.
         """
+        if not case_gen:
+            return Exception("invalid implementation")
+        execution_feedback, gen_df = case_gen.execute()
+        if (isinstance(gen_df, pd.DataFrame) and gen_df.empty) or (not isinstance(gen_df, pd.DataFrame) and not gen_df):
+            execution_feedback = re.sub(r"(?<=\D)(,\s+-?\d+\.\d+){50,}(?=\D)", ", ", execution_feedback)
+            execution_feedback = "\n".join(
+                [line for line in execution_feedback.split("\n") if "warning" not in line.lower()]
+            )
+            return Exception(execution_feedback)
+            
         eval_res = []
         for ev in self.evaluator_l:
             try:

From b8aa7f3d523779122a9946079e70d39c6e714af0 Mon Sep 17 00:00:00 2001
From: Tim <illking@foxmail.com>
Date: Thu, 12 Sep 2024 18:36:41 +0800
Subject: [PATCH 2/4] add AttributeError

---
 rdagent/components/benchmark/eval_method.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py
index 983bbe2fe..4ca9afe5a 100644
--- a/rdagent/components/benchmark/eval_method.py
+++ b/rdagent/components/benchmark/eval_method.py
@@ -114,7 +114,7 @@ def eval_case(
             try:
                 eval_res.append((ev, ev.evaluate(implementation=case_gen, gt_implementation=case_gt)))
                 # if the corr ev is successfully evaluated and achieve the best performance, then break
-            except CoderError as e:
+            except (CoderError, AttributeError) as e:
                 return e
             except Exception as e:
                 # exception when evaluation

From b14e20f67a526b521ea94329c15383d5cffcf149 Mon Sep 17 00:00:00 2001
From: Tim <illking@foxmail.com>
Date: Thu, 12 Sep 2024 18:38:22 +0800
Subject: [PATCH 3/4] add raise_exception for eval_method

---
 rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py b/rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py
index 5f080f80e..5e04a0d4b 100644
--- a/rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py
+++ b/rdagent/scenarios/qlib/factor_experiment_loader/json_loader.py
@@ -55,10 +55,9 @@ def load(self, json_file_path: Path) -> list:
                 factor_formulation=factor_data["formulation"],
                 variables=factor_data["variables"],
             )
-            gt = FactorFBWorkspace(task)
+            gt = FactorFBWorkspace(task, raise_exception=True)
             code = {"factor.py": factor_data["gt_code"]}
             gt.inject_code(**code)
-            gt.execute()
             TestData.target_task.sub_tasks.append(task)
             TestData.ground_truth.append(gt)
 

From a8ddd593aae4535f485876cd2a6f6431f7b7716b Mon Sep 17 00:00:00 2001
From: Tim <illking@foxmail.com>
Date: Thu, 12 Sep 2024 18:40:38 +0800
Subject: [PATCH 4/4] remove custom exception catch

---
 rdagent/components/benchmark/eval_method.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py
index 4ca9afe5a..3bf998b6d 100644
--- a/rdagent/components/benchmark/eval_method.py
+++ b/rdagent/components/benchmark/eval_method.py
@@ -99,16 +99,6 @@ def eval_case(
             for each item
                 If the evaluation run successfully, return the evaluate results.  Otherwise, return the exception.
         """
-        if not case_gen:
-            return Exception("invalid implementation")
-        execution_feedback, gen_df = case_gen.execute()
-        if (isinstance(gen_df, pd.DataFrame) and gen_df.empty) or (not isinstance(gen_df, pd.DataFrame) and not gen_df):
-            execution_feedback = re.sub(r"(?<=\D)(,\s+-?\d+\.\d+){50,}(?=\D)", ", ", execution_feedback)
-            execution_feedback = "\n".join(
-                [line for line in execution_feedback.split("\n") if "warning" not in line.lower()]
-            )
-            return Exception(execution_feedback)
-            
         eval_res = []
         for ev in self.evaluator_l:
             try: