From 0e7bcff6ffdde904dc7934db0ef20f417f9b23ff Mon Sep 17 00:00:00 2001 From: Way2Learn <118058822+Xisen-Wang@users.noreply.github.com> Date: Thu, 19 Sep 2024 21:04:13 +0800 Subject: [PATCH 1/6] Update feedback.py to support all actions Feedback.py is updated to support all actions. --- .../scenarios/kaggle/developer/feedback.py | 67 +++++++++++++++---- 1 file changed, 55 insertions(+), 12 deletions(-) diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py index f82977b8e..b1e2ba9b4 100644 --- a/rdagent/scenarios/kaggle/developer/feedback.py +++ b/rdagent/scenarios/kaggle/developer/feedback.py @@ -46,6 +46,32 @@ def process_results(current_result, sota_result): class KGHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): + def get_available_features(self, exp: Experiment): + features = [] + + for feature_info in exp.experiment_workspace.data_description: + task_info, feature_shape = feature_info + features.append({ + "name": task_info.factor_name, + "description": task_info.factor_description, + "shape": feature_shape + }) + + return features + + def get_model_code(self, exp: Experiment): + model_type = exp.sub_tasks[0].model_type if exp.sub_tasks else None + if model_type == "XGBoost": + return exp.sub_workspace_list[0].code_dict.get("model_xgb.py") #TODO Check if we need to replace this by using RepoAnalyzer + elif model_type == "RandomForest": + return exp.sub_workspace_list[0].code_dict.get("model_rf.py") + elif model_type == "LightGBM": + return exp.sub_workspace_list[0].code_dict.get("model_lgb.py") + elif model_type == "NN": + return exp.sub_workspace_list[0].code_dict.get("model_nn.py") + else: + return None + def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback: """ The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM). @@ -84,28 +110,45 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac combined_result = process_results(current_result, current_result) # Compare with itself print("Warning: No previous experiments to compare against. Using current result as baseline.") + # Get the appropriate model code + model_code = self.get_model_code(exp) + + # Generate the user prompt based on the action type + if hypothesis.action == "Model tuning": + prompt_key = "model_tuning_feedback_generation" + elif hypothesis.action == "Model feature selection": + prompt_key = "feature_selection_feedback_generation" + else: + prompt_key = "factor_feedback_generation" + # Generate the system prompt sys_prompt = ( Environment(undefined=StrictUndefined) - .from_string(prompt_dict["factor_feedback_generation"]["system"]) + .from_string(feedback_prompts[prompt_key]["system"]) .render(scenario=self.scen.get_scenario_all_desc()) ) - # Generate the user prompt based on the action type - if hypothesis.action == "Model Tuning": # TODO Add other prompts here - prompt_key = "model_feedback_generation" - else: - prompt_key = "factor_feedback_generation" + # Prepare render dictionary + render_dict = { + "context": self.scen.get_scenario_all_desc(), + "last_hypothesis": trace.hist[-1][0] if trace.hist else None, + "last_task": trace.hist[-1][1] if trace.hist else None, + "last_code": self.get_model_code(trace.hist[-1][1]) if trace.hist else None, + "last_result": trace.hist[-1][1].result if trace.hist else None, + "hypothesis": hypothesis, + "exp": exp, + "model_code": model_code, + "available_features": available_features, + "combined_result": combined_result, + "hypothesis_text": hypothesis_text, + "task_details": tasks_factors, + } # Generate the user prompt usr_prompt = ( Environment(undefined=StrictUndefined) - .from_string(prompt_dict[prompt_key]["user"]) - .render( - hypothesis_text=hypothesis_text, - task_details=tasks_factors, - combined_result=combined_result, - ) + .from_string(feedback_prompts[prompt_key]["user"]) + .render(**render_dict) ) # Call the APIBackend to generate the response for hypothesis feedback From 8c740325ac5fc2d6b95d9edb97af239da63598f8 Mon Sep 17 00:00:00 2001 From: Way2Learn <118058822+Xisen-Wang@users.noreply.github.com> Date: Thu, 19 Sep 2024 21:07:06 +0800 Subject: [PATCH 2/6] Update prompts.yaml to support all actions --- rdagent/scenarios/kaggle/prompts.yaml | 66 ++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/rdagent/scenarios/kaggle/prompts.yaml b/rdagent/scenarios/kaggle/prompts.yaml index c2c616b23..00019b1e6 100644 --- a/rdagent/scenarios/kaggle/prompts.yaml +++ b/rdagent/scenarios/kaggle/prompts.yaml @@ -198,4 +198,68 @@ factor_feedback_generation: Consider Changing Direction for Significant Gaps with the Best Result: - If the new results significantly differ from the best, consider exploring a new direction. - Avoid re-implementing previous features as those that surpassed the best are already included in the feature library and will be used in each run. - Note: Only features with 'Feature Implementation' as True are implemented and tested in this experiment. If 'Feature Implementation' is False, the hypothesis for that feature cannot be verified in this run. \ No newline at end of file + Note: Only features with 'Feature Implementation' as True are implemented and tested in this experiment. If 'Feature Implementation' is False, the hypothesis for that feature cannot be verified in this run. + + +feature_selection_feedback_generation: + system: |- + You are a professional feature selection assistant for machine learning models. Your task is to analyze the current feature selection strategy, evaluate its effectiveness, and suggest improvements. + + Consider the following when analyzing: + 1. How well does the current feature selection support the hypothesis? + 2. Which features seem to contribute most to the model's performance? + 3. Are there any features that might be redundant or noisy? + 4. What new feature selection strategies might improve the model? + + Provide detailed and constructive feedback, focusing on actionable insights for feature selection improvement. + + Respond in JSON format. Example JSON structure for Result Analysis: + { + "Observations": "Your overall observations about the feature selection effectiveness", + "Feedback for Hypothesis": "How well the results support or refute the hypothesis", + "New Hypothesis": "Suggested new hypothesis for feature selection in the next iteration", + "Reasoning": "Detailed reasoning for the new hypothesis, including which features to keep, remove, or add", + "Decision": true or false + } + + user: |- + We are in an experiment of finding hypotheses for feature selection and validating or rejecting them to optimize our model's performance. + Here is the context: {{context}}. + + {% if last_hypothesis %} + Last Round Information: + Hypothesis: {{last_hypothesis.hypothesis}} + Task: {{last_task}} + Code Implemented: {{last_code}} + Result: {{last_result}} + {% else %} + This is the first round. No previous information available. As long as the performance is not too negative (e.g., ICIR is greater than 0), treat it as successful. Do not set the threshold too high. + {% endif %} + + Current Round Information: + Hypothesis: {{hypothesis.hypothesis}} + Experiment Setup: {{exp.sub_tasks[0]}} + Model Code Implemented (focus on the select() method): + ```python + {{model_code}} + ``` + Relevant Reasoning: {{hypothesis.reason}} + Result: {{exp.result}} + + Available Features: + {% for feature in available_features %} + - {{feature.name}}: {{feature.description}} + Shape: {{feature.shape}} + {% endfor %} + + Compare and observe the results. Which result has a better return and lower risk? If the performance increases, the hypothesis should be considered positive (working). + + Based on the hypotheses, relevant reasoning, and results (comparison), provide detailed and constructive feedback and suggest a new hypothesis for feature selection. + + In your feedback, consider: + 1. How effective is the current feature selection strategy? + 2. Are there any patterns in the selected or discarded features that might inform future selections? + 3. How might we refine or change the feature selection approach to improve model performance? + 4. Are there any domain-specific considerations that should inform our feature selection? + + Remember to focus on the select() method in the model code, as this is where feature selection is implemented. From b9fd3c33a3f2a58b02cf5ed42977baefee6de2a2 Mon Sep 17 00:00:00 2001 From: Xisen-Wang Date: Thu, 19 Sep 2024 13:45:39 +0000 Subject: [PATCH 3/6] Revised for CI --- .../scenarios/kaggle/developer/feedback.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py index b1e2ba9b4..f64c0597b 100644 --- a/rdagent/scenarios/kaggle/developer/feedback.py +++ b/rdagent/scenarios/kaggle/developer/feedback.py @@ -48,21 +48,21 @@ def process_results(current_result, sota_result): class KGHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): def get_available_features(self, exp: Experiment): features = [] - + for feature_info in exp.experiment_workspace.data_description: task_info, feature_shape = feature_info - features.append({ - "name": task_info.factor_name, - "description": task_info.factor_description, - "shape": feature_shape - }) - + features.append( + {"name": task_info.factor_name, "description": task_info.factor_description, "shape": feature_shape} + ) + return features def get_model_code(self, exp: Experiment): model_type = exp.sub_tasks[0].model_type if exp.sub_tasks else None if model_type == "XGBoost": - return exp.sub_workspace_list[0].code_dict.get("model_xgb.py") #TODO Check if we need to replace this by using RepoAnalyzer + return exp.sub_workspace_list[0].code_dict.get( + "model_xgb.py" + ) # TODO Check if we need to replace this by using RepoAnalyzer elif model_type == "RandomForest": return exp.sub_workspace_list[0].code_dict.get("model_rf.py") elif model_type == "LightGBM": @@ -77,7 +77,6 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM). For example: `mlflow` of Qlib will be included. """ - """ Generate feedback for the given experiment and hypothesis. Args: @@ -110,6 +109,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac combined_result = process_results(current_result, current_result) # Compare with itself print("Warning: No previous experiments to compare against. Using current result as baseline.") + available_features = self.get_available_features(exp) # Get the appropriate model code model_code = self.get_model_code(exp) @@ -124,7 +124,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac # Generate the system prompt sys_prompt = ( Environment(undefined=StrictUndefined) - .from_string(feedback_prompts[prompt_key]["system"]) + .from_string(prompt_dict[prompt_key]["system"]) .render(scenario=self.scen.get_scenario_all_desc()) ) @@ -147,7 +147,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac # Generate the user prompt usr_prompt = ( Environment(undefined=StrictUndefined) - .from_string(feedback_prompts[prompt_key]["user"]) + .from_string(prompt_dict[prompt_key]["user"]) .render(**render_dict) ) From a3b71f0f0ca074c6adbc47fbd42fbc312d4f6e8d Mon Sep 17 00:00:00 2001 From: Xisen-Wang Date: Thu, 19 Sep 2024 15:41:06 +0000 Subject: [PATCH 4/6] CI --- rdagent/scenarios/kaggle/developer/feedback.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py index f64c0597b..83624f4b0 100644 --- a/rdagent/scenarios/kaggle/developer/feedback.py +++ b/rdagent/scenarios/kaggle/developer/feedback.py @@ -71,7 +71,7 @@ def get_model_code(self, exp: Experiment): return exp.sub_workspace_list[0].code_dict.get("model_nn.py") else: return None - + def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback: """ The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM). @@ -145,11 +145,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac } # Generate the user prompt - usr_prompt = ( - Environment(undefined=StrictUndefined) - .from_string(prompt_dict[prompt_key]["user"]) - .render(**render_dict) - ) + usr_prompt = Environment(undefined=StrictUndefined).from_string(prompt_dict[prompt_key]["user"]).render(**render_dict) # Call the APIBackend to generate the response for hypothesis feedback response = APIBackend().build_messages_and_create_chat_completion( From 021e15d79f927b6f73dbe1cae60b9932e2910c52 Mon Sep 17 00:00:00 2001 From: WinstonLiye <1957922024@qq.com> Date: Fri, 20 Sep 2024 07:45:00 +0000 Subject: [PATCH 5/6] fix a ci bug --- rdagent/scenarios/kaggle/developer/feedback.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py index 83624f4b0..1d2aef616 100644 --- a/rdagent/scenarios/kaggle/developer/feedback.py +++ b/rdagent/scenarios/kaggle/developer/feedback.py @@ -145,7 +145,10 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac } # Generate the user prompt - usr_prompt = Environment(undefined=StrictUndefined).from_string(prompt_dict[prompt_key]["user"]).render(**render_dict) + usr_prompt = ( + Environment(undefined=StrictUndefined).from_string(prompt_dict[prompt_key]["user"]).render(**render_dict) + ) + # Call the APIBackend to generate the response for hypothesis feedback response = APIBackend().build_messages_and_create_chat_completion( From 02363cf9b501ac0cb27abd3dba331f1780e67a15 Mon Sep 17 00:00:00 2001 From: WinstonLiye <1957922024@qq.com> Date: Fri, 20 Sep 2024 07:50:37 +0000 Subject: [PATCH 6/6] fix a ci bug --- rdagent/scenarios/kaggle/developer/feedback.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py index 1d2aef616..a32245f1b 100644 --- a/rdagent/scenarios/kaggle/developer/feedback.py +++ b/rdagent/scenarios/kaggle/developer/feedback.py @@ -149,7 +149,6 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac Environment(undefined=StrictUndefined).from_string(prompt_dict[prompt_key]["user"]).render(**render_dict) ) - # Call the APIBackend to generate the response for hypothesis feedback response = APIBackend().build_messages_and_create_chat_completion( user_prompt=usr_prompt,