Skip to content

Commit

Permalink
fix: Optiver fixes (#357)
Browse files Browse the repository at this point in the history
* Adding the competition: Optiver Volatility Prediction

* Fixing for CI

* Updating a new competition @ Optiver

* re-writing the optiver competition

* Revise for better commit

* Further fixes

* Further fixes

* Fixes

* Further Fixing Optiver Template

* Fix further to pass the test

* Fixing for CI

* Fixing for CI
  • Loading branch information
xisen-w authored Sep 26, 2024
1 parent 3705efe commit b054017
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
2 changes: 2 additions & 0 deletions rdagent/scenarios/kaggle/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ RUN pip install catboost
RUN pip install xgboost
RUN pip install sparse
RUN pip install lightgbm
RUN pip install pyarrow
RUN pip install fastparquet
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_v
dtrain = xgb.DMatrix(X_train, label=y_train)
dvalid = xgb.DMatrix(X_valid, label=y_valid)

# TODO: for quick running....
# Parameters for regression
params = {
"nthred": -1,
"objective": "reg:squarederror", # Use squared error for regression
"nthread": -1,
}
num_round = 100

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,18 @@

def prepreprocess():
# Load the training data
train_df = pd.read_csv("/kaggle/input/optiver-realized-volatility-prediction/train.csv")
train_df = pd.read_csv("/kaggle/input/train.csv").head(1000)

# Load book and trade data
book_train = pd.read_parquet("/kaggle/input/optiver-realized-volatility-prediction/book_train.parquet")
trade_train = pd.read_parquet("/kaggle/input/optiver-realized-volatility-prediction/trade_train.parquet")
book_train = pd.read_parquet("/kaggle/input/book_train.parquet").head(1000)
trade_train = pd.read_parquet("/kaggle/input/trade_train.parquet").head(1000)

# Merge book and trade data with train_df
merged_df = pd.merge(train_df, book_train, on=["stock_id", "time_id"], how="left")
merged_df = pd.merge(merged_df, trade_train, on=["stock_id", "time_id"], how="left")

print(merged_df.head())

# Split the data
X = merged_df.drop(["target"], axis=1)
y = merged_df["target"]
Expand Down Expand Up @@ -83,8 +85,19 @@ def preprocess_script():
X_valid = preprocess_transform(X_valid, preprocessor, numerical_cols, categorical_cols)

submission_df = pd.read_csv("/kaggle/input/test.csv")
ids = submission_df["id"]
submission_df = submission_df.drop(["id"], axis=1)

ids = submission_df["row_id"]
submission_df = submission_df.drop(["row_id"], axis=1)

# Add missing columns to submission_df
for col in X_train.columns:
if col not in submission_df.columns:
submission_df[col] = 0 # Fill with 0 or another appropriate value

X_test = preprocess_transform(submission_df, preprocessor, numerical_cols, categorical_cols)

# Handle missing values
for df in [X_train, X_valid, X_test]:
df.fillna(df.mean(), inplace=True)

return X_train, X_valid, y_train, y_valid, X_test, ids

0 comments on commit b054017

Please sign in to comment.