add cost budget; move loc of make_dir (Significant-Gravitas#888)

* add cost budget; move loc of make_dir * remove None in return --------- Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
SquareandCompass · Feb 6, 2023 · 3b6bfc2 · 3b6bfc2
1 parent fbea1d0
commit 3b6bfc2
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 19 deletions.
diff --git a/flaml/tune/searcher/blendsearch.py b/flaml/tune/searcher/blendsearch.py
@@ -62,6 +62,7 @@ def __init__(
         metric_constraints: Optional[List[Tuple[str, str, float]]] = None,
         seed: Optional[int] = 20,
         cost_attr: Optional[str] = "auto",
+        cost_budget: Optional[float] = None,
         experimental: Optional[bool] = False,
         lexico_objectives: Optional[dict] = None,
         use_incumbent_result_in_evaluation=False,
@@ -111,10 +112,12 @@ def __init__(
             metric_constraints: A list of metric constraints to be satisfied.
                 E.g., `['precision', '>=', 0.9]`. The sign can be ">=" or "<=".
             seed: An integer of the random seed.
-            cost_attr: Choose from ["auto", None] to specify the attribute to evaluate the cost of different trials.
-                Default is "auto", which means that we will automatically chose the cost attribute to use (depending
+            cost_attr: None or str to specify the attribute to evaluate the cost of different trials.
+                Default is "auto", which means that we will automatically choose the cost attribute to use (depending
                 on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted
-                in our search algorithm.
+                in our search algorithm. When cost_attr is set to a str different from "auto" and "time_total_s",
+                this cost_attr must be available in the result dict of the trial.
+            cost_budget: A float of the cost budget. Only valid when cost_attr is a str different from "auto" and "time_total_s".
             lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective
                 optimization with lexicographic preferences. This is only supported in CFO currently.
                 When lexico_objectives is not None, the arguments metric, mode will be invalid.
@@ -154,8 +157,10 @@ def __init__(
                 self.cost_attr = TIME_TOTAL_S
             else:
                 self.cost_attr = None
+            self._cost_budget = None
         else:
             self.cost_attr = cost_attr
+            self._cost_budget = cost_budget
         self.penalty = PENALTY  # penalty term for constraints
         self._metric, self._mode = metric, mode
         self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation
@@ -388,6 +393,7 @@ def _init_search(self):
         i = 0
         # config_signature: tuple -> result: Dict
         self._result = {}
+        self._cost_used = 0
         while self._evaluated_rewards:
             # go over the evaluated rewards
             trial_id = f"trial_for_evaluated_{i}"
@@ -467,6 +473,7 @@ def on_trial_complete(
             if error:  # remove from result cache
                 del self._result[signature]
             else:  # add to result cache
+                self._cost_used += result.get(self.cost_attr, 0)
                 self._result[signature] = result
                 # update target metric if improved
                 objective = result[self._ls.metric]
@@ -702,9 +709,9 @@ def on_trial_result(self, trial_id: str, result: Dict):
     def suggest(self, trial_id: str) -> Optional[Dict]:
         """choose thread, suggest a valid config."""
         if self._init_used and not self._points_to_evaluate:
+            if self._cost_budget and self._cost_used >= self._cost_budget:
+                return
             choice, backup = self._select_thread()
-            # if choice < 0:  # timeout
-            #     return None
             config = self._search_thread_pool[choice].suggest(trial_id)
             if not choice and config is not None and self._ls.resource:
                 config[self._ls.resource_attr] = self.best_resource
@@ -717,19 +724,19 @@ def suggest(self, trial_id: str) -> Optional[Dict]:
                         self._search_thread_pool[choice].space,
                     )
                     del self._search_thread_pool[choice]
-                return None
+                return
             # preliminary check; not checking config validation
             space = self._search_thread_pool[choice].space
             skip = self._should_skip(choice, trial_id, config, space)
             use_rs = 0
             if skip:
                 if choice:
-                    return None
+                    return
                 # use rs when BO fails to suggest a config
                 config, space = self._ls.complete_config({})
                 skip = self._should_skip(-1, trial_id, config, space)
                 if skip:
-                    return None
+                    return
                 use_rs = 1
             if choice or self._valid(
                 config,
@@ -756,7 +763,7 @@ def suggest(self, trial_id: str) -> Optional[Dict]:
                     space = thread.space
                     skip = self._should_skip(backup, trial_id, config, space)
                     if skip:
-                        return None
+                        return
                     self._trial_proposed_by[trial_id] = backup
                     choice = backup
             if not choice:  # global search
@@ -801,14 +808,14 @@ def suggest(self, trial_id: str) -> Optional[Dict]:
             if reward is None:
                 result = self._result.get(config_signature)
                 if result:  # tried before
-                    return None
+                    return
                 elif result is None:  # not tried before
                     if self._violate_config_constriants(config, config_signature):
                         # violate config constraints
-                        return None
+                        return
                     self._result[config_signature] = {}
                 else:  # running but no result yet
-                    return None
+                    return
             self._init_used = True
             self._trial_proposed_by[trial_id] = 0
             self._search_thread_pool[0].running += 1
@@ -817,7 +824,7 @@ def suggest(self, trial_id: str) -> Optional[Dict]:
                 result = {self._metric: reward, self.cost_attr: 1, "config": config}
                 # result = self._result[config_signature]
                 self.on_trial_complete(trial_id, result)
-                return None
+                return
         if self._use_incumbent_result_in_evaluation:
             if self._trial_proposed_by[trial_id] > 0:
                 choice_thread = self._search_thread_pool[
@@ -900,6 +907,8 @@ def _select_thread(self) -> Tuple:
                     time_used = now - self._start_time + self._time_used
                     min_eci = min(min_eci, time_used / num_finished * num_left)
                 # print(f"{min_eci}, {time_used / num_finished * num_left}, {num_finished}, {num_left}")
+        elif self.cost_attr is not None and self._cost_budget:
+            min_eci = max(self._cost_budget - self._cost_used, 0)
         elif self._num_samples and self._num_samples > 0:
             num_finished = len(self._result)
             num_proposed = num_finished + len(self._trial_proposed_by)

diff --git a/flaml/tune/searcher/search_thread.py b/flaml/tune/searcher/search_thread.py
@@ -17,6 +17,7 @@
     from .suggestion import Searcher
 from .flow2 import FLOW2
 from ..space import add_cost_to_space, unflatten_hierarchical
+from ..result import TIME_TOTAL_S
 import logging
 
 logger = logging.getLogger(__name__)
@@ -29,7 +30,7 @@ def __init__(
         self,
         mode: str = "min",
         search_alg: Optional[Searcher] = None,
-        cost_attr: Optional[str] = "time_total_s",
+        cost_attr: Optional[str] = TIME_TOTAL_S,
         eps: Optional[float] = 1.0,
     ):
         """When search_alg is omitted, use local search FLOW2."""

diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py
@@ -447,7 +447,11 @@ def easy_objective(config):
     old_verbose = _verbose
     old_running_trial = _running_trial
     old_training_iteration = _training_iteration
-    if local_dir and not log_file_name and verbose > 0:
+    if log_file_name:
+        dir_name = os.path.dirname(log_file_name)
+        if dir_name:
+            os.makedirs(dir_name, exist_ok=True)
+    elif local_dir and verbose > 0:
         os.makedirs(local_dir, exist_ok=True)
         log_file_name = os.path.join(
             local_dir, "tune_" + str(datetime.datetime.now()).replace(":", "-") + ".log"
@@ -472,9 +476,6 @@ def easy_objective(config):
             logger.addHandler(old_handlers[0])
         if verbose > 0:
             if log_file_name:
-                dir_name = os.path.dirname(log_file_name)
-                if dir_name:
-                    os.makedirs(dir_name, exist_ok=True)
                 logger.addHandler(logging.FileHandler(log_file_name))
             elif not logger.hasHandlers():
                 # Add the console handler.

diff --git a/test/tune/test_searcher.py b/test/tune/test_searcher.py
@@ -31,7 +31,7 @@ def wrong_define_search_space(trial):
     return {1: 1}
 
 
-def test_searcher():
+def test_searchers():
     from flaml.tune.searcher.suggestion import (
         OptunaSearch,
         Searcher,
@@ -303,6 +303,13 @@ def test_searcher():
     from flaml import tune
 
     tune.run(lambda x: 1, config={}, use_ray=use_ray, log_file_name="logs/searcher.log")
+    searcher = BlendSearch(
+        space=config, cost_attr="cost", cost_budget=10, metric="m", mode="min"
+    )
+    analysis = tune.run(
+        lambda x: {"cost": 2, "m": x["b"]}, search_alg=searcher, num_samples=10
+    )
+    assert len(analysis.trials) == 5
 
 
 def test_no_optuna():