Skip to content

Commit

Permalink
fix: fix a bug in the format of the model input (#327)
Browse files Browse the repository at this point in the history
* fix a bug

* fix a bug

* fix a ci bug
  • Loading branch information
WinstonLiyt authored Sep 25, 2024
1 parent e126471 commit 8f0574e
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,23 @@ def import_module_from_path(module_name, module_path):
# Handle inf and -inf values
X_train, X_valid, X_test = clean_and_impute_data(X_train, X_valid, X_test)


# 3) Train the model
def flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Flatten the columns of a DataFrame with MultiIndex columns,
for (feature_0, a), (feature_0, b) -> feature_0_a, feature_0_b
"""
if df.columns.nlevels == 1:
return df
df.columns = ["_".join(col).strip() for col in df.columns.values]
return df


X_train = flatten_columns(X_train)
X_valid = flatten_columns(X_valid)
X_test = flatten_columns(X_test)

model_l = [] # list[tuple[model, predict_func]]
for f in DIRNAME.glob("model/model*.py"):
m = import_module_from_path(f.stem, f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,23 @@ def import_module_from_path(module_name, module_path):
X_valid = pd.concat(X_valid_l, axis=1, keys=[f"feature_{i}" for i in range(len(X_valid_l))])
X_test = pd.concat(X_test_l, axis=1, keys=[f"feature_{i}" for i in range(len(X_test_l))])


# 3) Train the model
def flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Flatten the columns of a DataFrame with MultiIndex columns,
for (feature_0, a), (feature_0, b) -> feature_0_a, feature_0_b
"""
if df.columns.nlevels == 1:
return df
df.columns = ["_".join(col).strip() for col in df.columns.values]
return df


X_train = flatten_columns(X_train)
X_valid = flatten_columns(X_valid)
X_test = flatten_columns(X_test)

model_l = [] # list[tuple[model, predict_func]]
for f in DIRNAME.glob("model/model*.py"):
m = import_module_from_path(f.stem, f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,23 @@ def import_module_from_path(module_name, module_path):

print(X_train.shape, X_valid.shape, X_test.shape)


# 3) Train the model
def flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Flatten the columns of a DataFrame with MultiIndex columns,
for (feature_0, a), (feature_0, b) -> feature_0_a, feature_0_b
"""
if df.columns.nlevels == 1:
return df
df.columns = ["_".join(col).strip() for col in df.columns.values]
return df


X_train = flatten_columns(X_train)
X_valid = flatten_columns(X_valid)
X_test = flatten_columns(X_test)

model_l = [] # list[tuple[model, predict_func]]
for f in DIRNAME.glob("model/model*.py"):
m = import_module_from_path(f.stem, f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,23 @@ def import_module_from_path(module_name, module_path):
X_valid = X_valid.loc[:, ~X_valid.columns.duplicated()]
X_test = X_test.loc[:, ~X_test.columns.duplicated()]


# 3) Train the model
def flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Flatten the columns of a DataFrame with MultiIndex columns,
for (feature_0, a), (feature_0, b) -> feature_0_a, feature_0_b
"""
if df.columns.nlevels == 1:
return df
df.columns = ["_".join(col).strip() for col in df.columns.values]
return df


X_train = flatten_columns(X_train)
X_valid = flatten_columns(X_valid)
X_test = flatten_columns(X_test)

model_l = [] # list[tuple[model, predict_func,]]
for f in DIRNAME.glob("model/model*.py"):
m = import_module_from_path(f.stem, f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,23 @@ def import_module_from_path(module_name, module_path):
X_valid = X_valid.loc[:, ~X_valid.columns.duplicated()]
X_test = X_test.loc[:, ~X_test.columns.duplicated()]


# 3) Train the model
def flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Flatten the columns of a DataFrame with MultiIndex columns,
for (feature_0, a), (feature_0, b) -> feature_0_a, feature_0_b
"""
if df.columns.nlevels == 1:
return df
df.columns = ["_".join(col).strip() for col in df.columns.values]
return df


X_train = flatten_columns(X_train)
X_valid = flatten_columns(X_valid)
X_test = flatten_columns(X_test)

model_l = [] # list[tuple[model, predict_func,]]
for f in DIRNAME.glob("model/model*.py"):
m = import_module_from_path(f.stem, f)
Expand Down
16 changes: 16 additions & 0 deletions rdagent/scenarios/kaggle/experiment/sf-crime_template/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,23 @@ def import_module_from_path(module_name, module_path):

print(X_train.shape, X_valid.shape, X_test.shape)


# 3) Train the model
def flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Flatten the columns of a DataFrame with MultiIndex columns,
for (feature_0, a), (feature_0, b) -> feature_0_a, feature_0_b
"""
if df.columns.nlevels == 1:
return df
df.columns = ["_".join(col).strip() for col in df.columns.values]
return df


X_train = flatten_columns(X_train)
X_valid = flatten_columns(X_valid)
X_test = flatten_columns(X_test)

model_l = [] # list[tuple[model, predict_func]]
for f in DIRNAME.glob("model/model*.py"):
m = import_module_from_path(f.stem, f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,23 @@ def import_module_from_path(module_name, module_path):
X_valid = X_valid.loc[:, ~X_valid.columns.duplicated()]
X_test = X_test.loc[:, ~X_test.columns.duplicated()]


# 3) Train the model
def flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Flatten the columns of a DataFrame with MultiIndex columns,
for (feature_0, a), (feature_0, b) -> feature_0_a, feature_0_b
"""
if df.columns.nlevels == 1:
return df
df.columns = ["_".join(col).strip() for col in df.columns.values]
return df


X_train = flatten_columns(X_train)
X_valid = flatten_columns(X_valid)
X_test = flatten_columns(X_test)

model_l = [] # list[tuple[model, predict_func,]]
for f in DIRNAME.glob("model/model*.py"):
m = import_module_from_path(f.stem, f)
Expand Down

0 comments on commit 8f0574e

Please sign in to comment.