-
-
Notifications
You must be signed in to change notification settings - Fork 73
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Kaggle loop update (Feature & Model) (#241)
* Init todo * Evaluation & dataset * Generate new data * dataset generation * add the result * Analysis * Factor update * Updates * Reformat analysis.py * CI fix * Revised Preprocessing & Supported Random Forest * Revised to support three models with feature * Further revised prompts * Slight Revision * docs: update contributors (#230) * Revised to support three models with feature * Further revised prompts * Slight Revision * feat: kaggle model and feature (#238) * update first version code * make hypothesis_gen and experiment_builder fit for both feature and model * feat: continue kaggle feature and model coder (#239) * use qlib docker to run qlib models * feature coder ready * model coder ready * fix CI * finish the first round of runner (#240) * Optimized the factor scenario and added the front-end. * fix a small bug * fix a typo * update the kaggle scenario * delete model_template folder * use experiment to run data preprocess script * add source data to scenarios * minor fix * minor bug fix * train.py debug * fixed a bug in train.py and added some TODOs * For Debugging * fix two small bugs in based_exp * fix some bugs * update preprocess * fix a bug in preprocess * fix a bug in train.py * reformat * Follow-up * fix a bug in train.py * fix a bug in workspace * fix a bug in feature duplication * fix a bug in feedback * fix a bug in preprocessed data * fix a bug om feature engineering * fix a ci error * Debugged & Connected * Fixed error on feedback & added other fixes * fix CI errors * fix a CI bug * fix: fix_dotenv_error (#257) * fix_dotenv_error * format with isort * Update rdagent/app/cli.py --------- Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * chore(main): release 0.2.1 (#249) Release-As: 0.2.1 * init a scenario for kaggle feature engineering * delete error codes * Delete rdagent/app/kaggle_feature/conf.py --------- Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: Taozhi Wang <taozhi.mark.wang@gmail.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> Co-authored-by: cyncyw <47289405+taozhiwang@users.noreply.github.com> Co-authored-by: Xisen-Wang <xisen_application@163.com> Co-authored-by: Haotian Chen <113661982+Hytn@users.noreply.github.com> Co-authored-by: WinstonLiye <1957922024@qq.com> Co-authored-by: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com> Co-authored-by: Linlang <30293408+SunsetWolf@users.noreply.github.com>
- Loading branch information
1 parent
44031d5
commit 4cf22a6
Showing
42 changed files
with
1,518 additions
and
686 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
from collections import defaultdict | ||
from typing import Any | ||
|
||
import fire | ||
|
||
from rdagent.app.kaggle.conf import PROP_SETTING | ||
from rdagent.components.workflow.conf import BasePropSetting | ||
from rdagent.components.workflow.rd_loop import RDLoop | ||
from rdagent.core.developer import Developer | ||
from rdagent.core.exception import ModelEmptyError | ||
from rdagent.core.proposal import ( | ||
Hypothesis2Experiment, | ||
HypothesisExperiment2Feedback, | ||
HypothesisGen, | ||
Trace, | ||
) | ||
from rdagent.core.scenario import Scenario | ||
from rdagent.core.utils import import_class | ||
from rdagent.log import rdagent_logger as logger | ||
from rdagent.scenarios.kaggle.proposal.proposal import ( | ||
KG_ACTION_FEATURE_ENGINEERING, | ||
KG_ACTION_FEATURE_PROCESSING, | ||
) | ||
|
||
|
||
class ModelRDLoop(RDLoop): | ||
def __init__(self, PROP_SETTING: BasePropSetting): | ||
with logger.tag("init"): | ||
scen: Scenario = import_class(PROP_SETTING.scen)(PROP_SETTING.competition) | ||
logger.log_object(scen, tag="scenario") | ||
|
||
self.hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.hypothesis_gen)(scen) | ||
logger.log_object(self.hypothesis_gen, tag="hypothesis generator") | ||
|
||
self.hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.hypothesis2experiment)() | ||
logger.log_object(self.hypothesis2experiment, tag="hypothesis2experiment") | ||
|
||
self.feature_coder: Developer = import_class(PROP_SETTING.feature_coder)(scen) | ||
logger.log_object(self.feature_coder, tag="feature coder") | ||
self.model_coder: Developer = import_class(PROP_SETTING.model_coder)(scen) | ||
logger.log_object(self.model_coder, tag="model coder") | ||
|
||
self.feature_runner: Developer = import_class(PROP_SETTING.feature_runner)(scen) | ||
logger.log_object(self.feature_runner, tag="feature runner") | ||
self.model_runner: Developer = import_class(PROP_SETTING.model_runner)(scen) | ||
logger.log_object(self.model_runner, tag="model runner") | ||
|
||
self.summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.summarizer)(scen) | ||
logger.log_object(self.summarizer, tag="summarizer") | ||
self.trace = Trace(scen=scen) | ||
super(RDLoop, self).__init__() | ||
|
||
def coding(self, prev_out: dict[str, Any]): | ||
with logger.tag("d"): # develop | ||
if prev_out["propose"].action in [KG_ACTION_FEATURE_ENGINEERING, KG_ACTION_FEATURE_PROCESSING]: | ||
exp = self.feature_coder.develop(prev_out["exp_gen"]) | ||
else: | ||
exp = self.model_coder.develop(prev_out["exp_gen"]) | ||
logger.log_object(exp.sub_workspace_list, tag="coder result") | ||
return exp | ||
|
||
def running(self, prev_out: dict[str, Any]): | ||
with logger.tag("ef"): # evaluate and feedback | ||
if prev_out["propose"].action in [KG_ACTION_FEATURE_ENGINEERING, KG_ACTION_FEATURE_PROCESSING]: | ||
exp = self.feature_runner.develop(prev_out["coding"]) | ||
else: | ||
exp = self.model_runner.develop(prev_out["coding"]) | ||
logger.log_object(exp, tag="runner result") | ||
return exp | ||
|
||
skip_loop_error = (ModelEmptyError,) | ||
|
||
|
||
def main(path=None, step_n=None, competition=None): | ||
""" | ||
Auto R&D Evolving loop for models in a kaggle{} scenario. | ||
You can continue running session by | ||
.. code-block:: python | ||
dotenv run -- python rdagent/app/kaggle/loop.py [--competition titanic] $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional paramter | ||
""" | ||
if competition: | ||
PROP_SETTING.competition = competition | ||
if path is None: | ||
model_loop = ModelRDLoop(PROP_SETTING) | ||
else: | ||
model_loop = ModelRDLoop.load(path) | ||
model_loop.run(step_n=step_n) | ||
|
||
|
||
if __name__ == "__main__": | ||
from dotenv import load_dotenv | ||
|
||
load_dotenv(override=True) | ||
fire.Fire(main) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
13 changes: 13 additions & 0 deletions
13
rdagent/components/coder/factor_coder/factor_execution_template.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import os | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from factor import feat_eng | ||
|
||
if os.path.exists("valid.pkl"): | ||
valid_df = pd.read_pickle("valid.pkl") | ||
else: | ||
raise FileNotFoundError("No valid data found.") | ||
|
||
new_feat = feat_eng(valid_df) | ||
new_feat.to_hdf("result.h5", key="data", mode="w") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.