From 3b6bfc2876e632d8334d19ebaa6a128d8f5c2d35 Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Sun, 5 Feb 2023 16:34:59 -0800 Subject: [PATCH] add cost budget; move loc of make_dir (#888) * add cost budget; move loc of make_dir * remove None in return --------- Co-authored-by: Qingyun Wu --- flaml/tune/searcher/blendsearch.py | 35 +++++++++++++++++----------- flaml/tune/searcher/search_thread.py | 3 ++- flaml/tune/tune.py | 9 +++---- test/tune/test_searcher.py | 9 ++++++- 4 files changed, 37 insertions(+), 19 deletions(-) diff --git a/flaml/tune/searcher/blendsearch.py b/flaml/tune/searcher/blendsearch.py index d091df9c7622..e7aa2e4b6eaf 100644 --- a/flaml/tune/searcher/blendsearch.py +++ b/flaml/tune/searcher/blendsearch.py @@ -62,6 +62,7 @@ def __init__( metric_constraints: Optional[List[Tuple[str, str, float]]] = None, seed: Optional[int] = 20, cost_attr: Optional[str] = "auto", + cost_budget: Optional[float] = None, experimental: Optional[bool] = False, lexico_objectives: Optional[dict] = None, use_incumbent_result_in_evaluation=False, @@ -111,10 +112,12 @@ def __init__( metric_constraints: A list of metric constraints to be satisfied. E.g., `['precision', '>=', 0.9]`. The sign can be ">=" or "<=". seed: An integer of the random seed. - cost_attr: Choose from ["auto", None] to specify the attribute to evaluate the cost of different trials. - Default is "auto", which means that we will automatically chose the cost attribute to use (depending + cost_attr: None or str to specify the attribute to evaluate the cost of different trials. + Default is "auto", which means that we will automatically choose the cost attribute to use (depending on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted - in our search algorithm. + in our search algorithm. When cost_attr is set to a str different from "auto" and "time_total_s", + this cost_attr must be available in the result dict of the trial. + cost_budget: A float of the cost budget. Only valid when cost_attr is a str different from "auto" and "time_total_s". lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective optimization with lexicographic preferences. This is only supported in CFO currently. When lexico_objectives is not None, the arguments metric, mode will be invalid. @@ -154,8 +157,10 @@ def __init__( self.cost_attr = TIME_TOTAL_S else: self.cost_attr = None + self._cost_budget = None else: self.cost_attr = cost_attr + self._cost_budget = cost_budget self.penalty = PENALTY # penalty term for constraints self._metric, self._mode = metric, mode self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation @@ -388,6 +393,7 @@ def _init_search(self): i = 0 # config_signature: tuple -> result: Dict self._result = {} + self._cost_used = 0 while self._evaluated_rewards: # go over the evaluated rewards trial_id = f"trial_for_evaluated_{i}" @@ -467,6 +473,7 @@ def on_trial_complete( if error: # remove from result cache del self._result[signature] else: # add to result cache + self._cost_used += result.get(self.cost_attr, 0) self._result[signature] = result # update target metric if improved objective = result[self._ls.metric] @@ -702,9 +709,9 @@ def on_trial_result(self, trial_id: str, result: Dict): def suggest(self, trial_id: str) -> Optional[Dict]: """choose thread, suggest a valid config.""" if self._init_used and not self._points_to_evaluate: + if self._cost_budget and self._cost_used >= self._cost_budget: + return choice, backup = self._select_thread() - # if choice < 0: # timeout - # return None config = self._search_thread_pool[choice].suggest(trial_id) if not choice and config is not None and self._ls.resource: config[self._ls.resource_attr] = self.best_resource @@ -717,19 +724,19 @@ def suggest(self, trial_id: str) -> Optional[Dict]: self._search_thread_pool[choice].space, ) del self._search_thread_pool[choice] - return None + return # preliminary check; not checking config validation space = self._search_thread_pool[choice].space skip = self._should_skip(choice, trial_id, config, space) use_rs = 0 if skip: if choice: - return None + return # use rs when BO fails to suggest a config config, space = self._ls.complete_config({}) skip = self._should_skip(-1, trial_id, config, space) if skip: - return None + return use_rs = 1 if choice or self._valid( config, @@ -756,7 +763,7 @@ def suggest(self, trial_id: str) -> Optional[Dict]: space = thread.space skip = self._should_skip(backup, trial_id, config, space) if skip: - return None + return self._trial_proposed_by[trial_id] = backup choice = backup if not choice: # global search @@ -801,14 +808,14 @@ def suggest(self, trial_id: str) -> Optional[Dict]: if reward is None: result = self._result.get(config_signature) if result: # tried before - return None + return elif result is None: # not tried before if self._violate_config_constriants(config, config_signature): # violate config constraints - return None + return self._result[config_signature] = {} else: # running but no result yet - return None + return self._init_used = True self._trial_proposed_by[trial_id] = 0 self._search_thread_pool[0].running += 1 @@ -817,7 +824,7 @@ def suggest(self, trial_id: str) -> Optional[Dict]: result = {self._metric: reward, self.cost_attr: 1, "config": config} # result = self._result[config_signature] self.on_trial_complete(trial_id, result) - return None + return if self._use_incumbent_result_in_evaluation: if self._trial_proposed_by[trial_id] > 0: choice_thread = self._search_thread_pool[ @@ -900,6 +907,8 @@ def _select_thread(self) -> Tuple: time_used = now - self._start_time + self._time_used min_eci = min(min_eci, time_used / num_finished * num_left) # print(f"{min_eci}, {time_used / num_finished * num_left}, {num_finished}, {num_left}") + elif self.cost_attr is not None and self._cost_budget: + min_eci = max(self._cost_budget - self._cost_used, 0) elif self._num_samples and self._num_samples > 0: num_finished = len(self._result) num_proposed = num_finished + len(self._trial_proposed_by) diff --git a/flaml/tune/searcher/search_thread.py b/flaml/tune/searcher/search_thread.py index f6366c9a16ad..9bb58a8ea3dd 100644 --- a/flaml/tune/searcher/search_thread.py +++ b/flaml/tune/searcher/search_thread.py @@ -17,6 +17,7 @@ from .suggestion import Searcher from .flow2 import FLOW2 from ..space import add_cost_to_space, unflatten_hierarchical +from ..result import TIME_TOTAL_S import logging logger = logging.getLogger(__name__) @@ -29,7 +30,7 @@ def __init__( self, mode: str = "min", search_alg: Optional[Searcher] = None, - cost_attr: Optional[str] = "time_total_s", + cost_attr: Optional[str] = TIME_TOTAL_S, eps: Optional[float] = 1.0, ): """When search_alg is omitted, use local search FLOW2.""" diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index 4b517c988293..a6046cd832de 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -447,7 +447,11 @@ def easy_objective(config): old_verbose = _verbose old_running_trial = _running_trial old_training_iteration = _training_iteration - if local_dir and not log_file_name and verbose > 0: + if log_file_name: + dir_name = os.path.dirname(log_file_name) + if dir_name: + os.makedirs(dir_name, exist_ok=True) + elif local_dir and verbose > 0: os.makedirs(local_dir, exist_ok=True) log_file_name = os.path.join( local_dir, "tune_" + str(datetime.datetime.now()).replace(":", "-") + ".log" @@ -472,9 +476,6 @@ def easy_objective(config): logger.addHandler(old_handlers[0]) if verbose > 0: if log_file_name: - dir_name = os.path.dirname(log_file_name) - if dir_name: - os.makedirs(dir_name, exist_ok=True) logger.addHandler(logging.FileHandler(log_file_name)) elif not logger.hasHandlers(): # Add the console handler. diff --git a/test/tune/test_searcher.py b/test/tune/test_searcher.py index ff29b522cf4d..cb8c9e06af31 100644 --- a/test/tune/test_searcher.py +++ b/test/tune/test_searcher.py @@ -31,7 +31,7 @@ def wrong_define_search_space(trial): return {1: 1} -def test_searcher(): +def test_searchers(): from flaml.tune.searcher.suggestion import ( OptunaSearch, Searcher, @@ -303,6 +303,13 @@ def test_searcher(): from flaml import tune tune.run(lambda x: 1, config={}, use_ray=use_ray, log_file_name="logs/searcher.log") + searcher = BlendSearch( + space=config, cost_attr="cost", cost_budget=10, metric="m", mode="min" + ) + analysis = tune.run( + lambda x: {"cost": 2, "m": x["b"]}, search_alg=searcher, num_samples=10 + ) + assert len(analysis.trials) == 5 def test_no_optuna():