microsoft · WinstonLiyt · Sep 11, 2024 · Jul 17, 2024 · Jul 23, 2024 · Jul 23, 2024
diff --git a/rdagent/log/ui/app.py b/rdagent/log/ui/app.py
@@ -9,7 +9,6 @@
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
-from rdagent.scenarios.kaggle.experiment.scenario import KGScenario
 import streamlit as st
 from plotly.subplots import make_subplots
 from streamlit import session_state as state
@@ -28,6 +27,7 @@
 from rdagent.log.ui.qlib_report_figure import report_figure
 from rdagent.scenarios.data_mining.experiment.model_experiment import DMModelScenario
 from rdagent.scenarios.general_model.scenario import GeneralModelScenario
+from rdagent.scenarios.kaggle.experiment.scenario import KGScenario
 from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorScenario
 from rdagent.scenarios.qlib.experiment.factor_from_report_experiment import (
     QlibFactorFromReportScenario,

diff --git a/rdagent/scenarios/kaggle/developer/runner.py b/rdagent/scenarios/kaggle/developer/runner.py
@@ -68,7 +68,6 @@ class KGFactorRunner(KGCachedRunner[KGFactorExperiment]):
     def init_develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
         """
         For the initial development, the experiment serves as a benchmark for feature engineering.
-        #TODO 不是特别确定写的对不对
         """
         self.build_from_SOTA(exp)
         if RUNNER_SETTINGS.cache_result:
@@ -88,8 +87,6 @@ def init_develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
         return exp
 
     def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
-        # TODO 这里是用来跑读一次的sota的，就是不做特征工程的。后面轮次exp.based_experiments[-1]应该都有至
-        # TODO 但是不知道为啥 这里exp.based_experiments 是空。但是在proposal.py 是有定义的
         if exp.based_experiments and exp.based_experiments[-1].result is None:
             exp.based_experiments[-1] = self.init_develop(exp.based_experiments[-1])
         self.build_from_SOTA(exp)

diff --git a/rdagent/scenarios/kaggle/experiment/meta_tpl/fea_share_preprocess.py b/rdagent/scenarios/kaggle/experiment/meta_tpl/fea_share_preprocess.py
@@ -1,9 +1,9 @@
 import pandas as pd
-from sklearn.preprocessing import LabelEncoder
-from sklearn.model_selection import train_test_split
 from sklearn.compose import ColumnTransformer
 from sklearn.impute import SimpleImputer
+from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import LabelEncoder
 from sklearn.preprocessing import OneHotEncoder
 
 

diff --git a/rdagent/scenarios/kaggle/experiment/meta_tpl/train.py b/rdagent/scenarios/kaggle/experiment/meta_tpl/train.py
@@ -1,11 +1,11 @@
+from fea_share_preprocess import preprocess_script
 import importlib.util
-import random
-from pathlib import Path
 import numpy as np
 import pandas as pd
-from sklearn.preprocessing import LabelEncoder
-from fea_share_preprocess import preprocess_script
+from pathlib import Path
+import random
 from sklearn.metrics import accuracy_score, matthews_corrcoef
+from sklearn.preprocessing import LabelEncoder
 
 
 # Set random seed for reproducibility

diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml
@@ -91,7 +91,11 @@ kg_feature_interface: |-
       return X.fillna(0) # Example feature processing
   ```
 
-  Ensure that your code meets these requirements and produces a feature-engineered DataFrame that contains only the newly engineered columns, aligning with the user's data and objectives.
+  To Note:
+  1. Ensure that your code meets these requirements and produces a feature-engineered DataFrame that contains only the newly engineered columns, aligning with the user's data and objectives.
+  2. Ensure that the index of the output DataFrame matches the index of the original DataFrame. For example:
+    Incorrect: `normalized_df = pd.DataFrame(normalized_features, columns=X.columns)`
+    Correct: `normalized_df = pd.DataFrame(normalized_features, columns=X.columns, index=X.index)`
 
 kg_model_interface: |-
   Your code should contain several parts:

diff --git a/requirements.txt b/requirements.txt
@@ -63,6 +63,10 @@ st-theme
 selenium
 kaggle
 
-#model related
+# tool
+seaborn
+setuptools-scm
+
+# This is a temporary package installed to pass the test_import test
 xgboost
-lightgbm
+lightgbm