fix: refine the ucb algorithm (#406)

* refine the ucb algorithm * fix a ci error
microsoft · Sep 30, 2024 · 14f7d97 · 14f7d97
1 parent b16b4be
commit 14f7d97
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 10 deletions.
diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py
@@ -177,6 +177,9 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
         elif self.scen.if_using_graph_rag:
             trace.knowledge_base.add_document(experiment_feedback, self.scen)
 
+        if self.scen.if_action_choosing_based_on_UCB:
+            self.scen.action_counts[hypothesis.action] += 1
+
         return HypothesisFeedback(
             observations=observations,
             hypothesis_evaluation=hypothesis_evaluation,

diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml
@@ -25,7 +25,7 @@ kg_description_template:
 
 kg_background: |-
   You are solving a data science tasks and the type of the competition is {{ competition_type }}.
-  The competition description is:{{competition_description}}. 
+  The competition description is: {{competition_description}}. 
   
   We provide an overall script in file: train.py. The user will run the train.py script along with several feature and model scripts to train several model to get a good performance on this task.
 

diff --git a/rdagent/scenarios/kaggle/proposal/proposal.py b/rdagent/scenarios/kaggle/proposal/proposal.py
@@ -130,13 +130,13 @@ def generate_RAG_content(self, trace: Trace, hypothesis_and_feedback: str, targe
 
         found_hypothesis_nodes = []
         for similar_node in similar_nodes:
-            for hypothesis_type in KG_ACTION_LIST:
-                hypothesis_nodes = trace.knowledge_base.get_nodes_within_steps(
-                    start_node=similar_node,
-                    steps=3,
-                    constraint_labels=[hypothesis_type],
-                )
-                found_hypothesis_nodes.extend(hypothesis_nodes[:2])
+            # for hypothesis_type in KG_ACTION_LIST:
+            hypothesis_nodes = trace.knowledge_base.get_nodes_within_steps(
+                start_node=similar_node,
+                steps=3,
+                constraint_labels=[target],
+            )
+            found_hypothesis_nodes.extend(hypothesis_nodes[:2])
 
         found_hypothesis_nodes = sorted(list(set(found_hypothesis_nodes)), key=lambda x: len(x.content))
 
@@ -204,7 +204,6 @@ def execute_next_action(self, trace: Trace) -> str:
         for action in actions:
             if self.scen.action_counts[action] == 0:
                 selected_action = action
-                self.scen.action_counts[selected_action] += 1
                 return selected_action
 
         c = self.scen.confidence_parameter
@@ -216,7 +215,6 @@ def execute_next_action(self, trace: Trace) -> str:
             ucb_values[action] = ucb
         # Select action with highest UCB
         selected_action = max(ucb_values, key=ucb_values.get)
-        self.scen.action_counts[selected_action] += 1
 
         return selected_action