Skip to content

Commit

Permalink
add inf evaluator to factor costeer and some minor improvement (#435)
Browse files Browse the repository at this point in the history
  • Loading branch information
peteryang1 authored Oct 16, 2024
1 parent 657f6b5 commit b82e597
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 3 deletions.
26 changes: 26 additions & 0 deletions rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,28 @@ def evaluate(
return critic_response, None


class FactorInfEvaluator(FactorEvaluator):
def evaluate(
self,
implementation: Workspace,
gt_implementation: Workspace,
) -> Tuple[str, object]:
_, gen_df = self._get_df(gt_implementation, implementation)
if gen_df is None:
return (
"The source dataframe is None. Please check the implementation.",
False,
)
INF_count = gen_df.isin([float("inf"), -float("inf")]).sum().sum()
if INF_count == 0:
return "The source dataframe does not have any infinite values.", True
else:
return (
f"The source dataframe has {INF_count} infinite values. Please check the implementation.",
False,
)


class FactorSingleColumnEvaluator(FactorEvaluator):
def evaluate(
self,
Expand Down Expand Up @@ -417,6 +439,9 @@ def evaluate(
"Output dataframe has more columns than input feature which is not acceptable in feature processing tasks. Please check the implementation to avoid generating too many columns. Consider this implementation as a failure."
)

feedback_str, inf_evaluate_res = FactorInfEvaluator(self.scen).evaluate(implementation, gt_implementation)
conclusions.append(feedback_str)

# Check if the index of the dataframe is ("datetime", "instrument")
feedback_str, _ = FactorOutputFormatEvaluator(self.scen).evaluate(implementation, gt_implementation)
conclusions.append(feedback_str)
Expand Down Expand Up @@ -465,6 +490,7 @@ def evaluate(
and row_result <= 0.99
or output_format_result is False
or daily_check_result is False
or inf_evaluate_res is False
):
decision_from_value_check = False
else:
Expand Down
2 changes: 1 addition & 1 deletion rdagent/scenarios/qlib/experiment/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ qlib_factor_output_format: |-
0 your factor name 40914 non-null float64
dtypes: float64(1)
memory usage: <ignore>
None
Notice: The non-null count is OK to be different to the total number of entries since some instruments may not have the factor value on some days.
One possible format of `result.h5` may be like following:
datetime instrument
2020-01-02 SZ000001 -0.001796
Expand Down
15 changes: 13 additions & 2 deletions rdagent/scenarios/qlib/experiment/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,21 @@ def get_file_desc(p: Path) -> str:

buffer = io.StringIO()
df.info(verbose=True, buf=buffer, show_counts=False)

df_info = buffer.getvalue()
if isinstance(df.index, pd.MultiIndex):
df_info += f"\nMultiIndex names:, {df.index.names})"
if "REPORT_PERIOD" in df.columns:
one_instrument = df.index.get_level_values("instrument")[0]
df_on_one_instrument = df.loc[pd.IndexSlice[:, one_instrument], ["REPORT_PERIOD"]]
df_info += f"""
A snapshot of one instrument, from which you can tell the distribution of the data:
{df_on_one_instrument.head(10)}
"""
return JJ_TPL.render(
file_name=p.name,
type_desc="generated by `df.info(verbose=True, show_counts=False)`",
content=buffer.getvalue(),
type_desc="h5, generated by `df.info(verbose=True, show_counts=False)` and appendix info",
content=df_info,
)
elif p.name.endswith(".md"):
with open(p) as f:
Expand Down

0 comments on commit b82e597

Please sign in to comment.