Skip to content

Commit

Permalink
perf: some small upgrade to factor costeer to improve the performance (
Browse files Browse the repository at this point in the history
…#420)

* 1. use dataframe.info instead of head
2. in former trace query, add the latest attempt to the last success execution

* fix CI
  • Loading branch information
peteryang1 authored Oct 10, 2024
1 parent bba55fb commit 9eb931f
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,17 @@ def implement_one_factor(
) # A dict, {{error_type:[[error_imp_knowledge, success_imp_knowledge],...]},...}

queried_former_failed_knowledge = (
queried_knowledge.former_traces[target_factor_task_information] if queried_knowledge is not None else []
queried_knowledge.former_traces[target_factor_task_information][0]
if queried_knowledge is not None
else []
)

queried_former_failed_knowledge_to_render = queried_former_failed_knowledge

latest_attempt_to_latest_successful_execution = queried_knowledge.former_traces[
target_factor_task_information
][1]

system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(
Expand Down Expand Up @@ -296,6 +302,7 @@ def implement_one_factor(
queried_similar_error_knowledge=queried_similar_error_knowledge_to_render,
error_summary=error_summary,
error_summary_critics=error_summary_critics,
latest_attempt_to_latest_successful_execution=latest_attempt_to_latest_successful_execution,
)
.strip("\n")
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def query(self, evo: EvolvableSubjects, evolving_trace: list[EvoStep]) -> Querie
evo,
factor_implementation_queried_graph_knowledge,
FACTOR_IMPLEMENT_SETTINGS.v2_query_former_trace_limit,
FACTOR_IMPLEMENT_SETTINGS.v2_add_fail_attempt_to_latest_successful_execution,
)
factor_implementation_queried_graph_knowledge = self.component_query(
evo,
Expand Down Expand Up @@ -392,6 +393,7 @@ def former_trace_query(
evo: EvolvableSubjects,
factor_implementation_queried_graph_knowledge: FactorQueriedGraphKnowledge,
v2_query_former_trace_limit: int = 5,
v2_add_fail_attempt_to_latest_successful_execution: bool = False,
) -> Union[QueriedKnowledge, set]:
"""
Query the former trace knowledge of the working trace, and find all the failed task information which tried more than fail_task_trial_limit times
Expand Down Expand Up @@ -429,11 +431,25 @@ def former_trace_query(
else:
current_index += 1

factor_implementation_queried_graph_knowledge.former_traces[
target_factor_task_information
] = former_trace_knowledge[-v2_query_former_trace_limit:]
latest_attempt = None
if v2_add_fail_attempt_to_latest_successful_execution:
# When the last successful execution is not the last one in the working trace, it means we have tried to correct it. We should tell the agent this fail trial to avoid endless loop in the future.
if (
len(former_trace_knowledge) > 0
and len(self.knowledgebase.working_trace_knowledge[target_factor_task_information]) > 1
and self.knowledgebase.working_trace_knowledge[target_factor_task_information].index(
former_trace_knowledge[-1]
)
< len(self.knowledgebase.working_trace_knowledge[target_factor_task_information]) - 1
):
latest_attempt = self.knowledgebase.working_trace_knowledge[target_factor_task_information][-1]

factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = (
former_trace_knowledge[-v2_query_former_trace_limit:],
latest_attempt,
)
else:
factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = []
factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = ([], None)

return factor_implementation_queried_graph_knowledge

Expand Down Expand Up @@ -607,7 +623,7 @@ def error_query(
):
queried_last_trace = factor_implementation_queried_graph_knowledge.former_traces[
target_factor_task_information
][-1]
][0][-1]
target_index = self.knowledgebase.working_trace_knowledge[target_factor_task_information].index(
queried_last_trace,
)
Expand Down
2 changes: 1 addition & 1 deletion rdagent/components/coder/factor_coder/CoSTEER/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def LLMSelect(
# find corresponding former trace for each task
target_factor_task_information = evo.sub_tasks[i].get_task_information()
if target_factor_task_information in former_trace:
tasks.append((i, evo.sub_tasks[i], former_trace[target_factor_task_information]))
tasks.append((i, evo.sub_tasks[i], former_trace[target_factor_task_information][0]))

system_prompt = (
Environment(undefined=StrictUndefined)
Expand Down
1 change: 1 addition & 0 deletions rdagent/components/coder/factor_coder/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Config:
v2_query_component_limit: int = 1
v2_query_error_limit: int = 1
v2_query_former_trace_limit: int = 1
v2_add_fail_attempt_to_latest_successful_execution: bool = False
v2_error_summary: bool = False
v2_knowledge_sampler: float = 1.0

Expand Down
7 changes: 7 additions & 0 deletions rdagent/components/coder/factor_coder/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ evolving_strategy_factor_implementation_v2_user: |-
{{ similar_component_knowledge.implementation.code }}
{% endfor %}
{% endif %}
{% if latest_attempt_to_latest_successful_execution is not none %}
You have tried to correct your former failed code but still met some errors. Here is the latest attempt to the latest successful execution, try not to get the same error to your new code:
=====Your latest attempt=====
{{ latest_attempt_to_latest_successful_execution.implementation.code }}
=====Feedback to your latest attempt=====
{{ latest_attempt_to_latest_successful_execution.feedback }}
{% endif %}
evolving_strategy_error_summary_v2_system: |-
User is trying to implement some factors in the following scenario:
Expand Down
8 changes: 6 additions & 2 deletions rdagent/scenarios/qlib/experiment/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import io
import re
import shutil
from pathlib import Path
Expand Down Expand Up @@ -81,10 +82,13 @@ def get_file_desc(p: Path) -> str:
pd.set_option("display.max_columns", None) # or 1000
pd.set_option("display.max_rows", None) # or 1000
pd.set_option("display.max_colwidth", None) # or 199

buffer = io.StringIO()
df.info(verbose=True, buf=buffer, show_counts=False)
return JJ_TPL.render(
file_name=p.name,
type_desc="generated by `pd.read_hdf(filename).head()`",
content=df.head().to_string(),
type_desc="generated by `df.info(verbose=True, show_counts=False)`",
content=buffer.getvalue(),
)
elif p.name.endswith(".md"):
with open(p) as f:
Expand Down

0 comments on commit 9eb931f

Please sign in to comment.