Skip to content

Commit b054017

Browse files
authored
fix: Optiver fixes (#357)
* Adding the competition: Optiver Volatility Prediction * Fixing for CI * Updating a new competition @ Optiver * re-writing the optiver competition * Revise for better commit * Further fixes * Further fixes * Fixes * Further Fixing Optiver Template * Fix further to pass the test * Fixing for CI * Fixing for CI
1 parent 3705efe commit b054017

File tree

3 files changed

+23
-7
lines changed

3 files changed

+23
-7
lines changed

rdagent/scenarios/kaggle/docker/Dockerfile

+2
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ RUN pip install catboost
2424
RUN pip install xgboost
2525
RUN pip install sparse
2626
RUN pip install lightgbm
27+
RUN pip install pyarrow
28+
RUN pip install fastparquet

rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_xgboost.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_v
1818
dtrain = xgb.DMatrix(X_train, label=y_train)
1919
dvalid = xgb.DMatrix(X_valid, label=y_valid)
2020

21-
# TODO: for quick running....
21+
# Parameters for regression
2222
params = {
23-
"nthred": -1,
23+
"objective": "reg:squarederror", # Use squared error for regression
24+
"nthread": -1,
2425
}
2526
num_round = 100
2627

rdagent/scenarios/kaggle/experiment/optiver-realized-volatility-prediction_template/fea_share_preprocess.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,18 @@
1111

1212
def prepreprocess():
1313
# Load the training data
14-
train_df = pd.read_csv("/kaggle/input/optiver-realized-volatility-prediction/train.csv")
14+
train_df = pd.read_csv("/kaggle/input/train.csv").head(1000)
1515

1616
# Load book and trade data
17-
book_train = pd.read_parquet("/kaggle/input/optiver-realized-volatility-prediction/book_train.parquet")
18-
trade_train = pd.read_parquet("/kaggle/input/optiver-realized-volatility-prediction/trade_train.parquet")
17+
book_train = pd.read_parquet("/kaggle/input/book_train.parquet").head(1000)
18+
trade_train = pd.read_parquet("/kaggle/input/trade_train.parquet").head(1000)
1919

2020
# Merge book and trade data with train_df
2121
merged_df = pd.merge(train_df, book_train, on=["stock_id", "time_id"], how="left")
2222
merged_df = pd.merge(merged_df, trade_train, on=["stock_id", "time_id"], how="left")
2323

24+
print(merged_df.head())
25+
2426
# Split the data
2527
X = merged_df.drop(["target"], axis=1)
2628
y = merged_df["target"]
@@ -83,8 +85,19 @@ def preprocess_script():
8385
X_valid = preprocess_transform(X_valid, preprocessor, numerical_cols, categorical_cols)
8486

8587
submission_df = pd.read_csv("/kaggle/input/test.csv")
86-
ids = submission_df["id"]
87-
submission_df = submission_df.drop(["id"], axis=1)
88+
89+
ids = submission_df["row_id"]
90+
submission_df = submission_df.drop(["row_id"], axis=1)
91+
92+
# Add missing columns to submission_df
93+
for col in X_train.columns:
94+
if col not in submission_df.columns:
95+
submission_df[col] = 0 # Fill with 0 or another appropriate value
96+
8897
X_test = preprocess_transform(submission_df, preprocessor, numerical_cols, categorical_cols)
8998

99+
# Handle missing values
100+
for df in [X_train, X_valid, X_test]:
101+
df.fillna(df.mean(), inplace=True)
102+
90103
return X_train, X_valid, y_train, y_valid, X_test, ids

0 commit comments

Comments
 (0)