PiBO (#813)

Co-authored-by: C. Benjamins <75323339+benjamc@users.noreply.github.com> Co-authored-by: Carl Hvarfner <hvarfner>
automl · Apr 7, 2022 · e0e892f · e0e892f
1 parent 9319600
commit e0e892f
Show file tree

Hide file tree

Showing 11 changed files with 828 additions and 13 deletions.
diff --git a/changelog.md b/changelog.md
@@ -1,17 +1,23 @@
 # 1.2.1
 
 ## Features
-* The `RunHistory` can now act as a `Mapping` in that you can use the usual methods you can use on dicts, i.e. `len(rh)`, `rh.items()`, `rh[key]`. Previously this was usually done by accessing `rh.data` which is still possible.
+* PiBO: Augment the acquisition function by multiplying by a pdf given by the user.
+The prior then decays over time, allowing for the optimization to carry on as per default.
+* The `RunHistory` can now act as a `Mapping` in that you can use the usual methods you
+can use on dicts, i.e. `len(rh)`, `rh.items()`, `rh[key]`. Previously this was usually done by
+accessing `rh.data` which is still possible.
 
 ## Minor Changes
-* Updated the signature of the `ROAR` facade to match with it's parent class `SMAC4AC`. Anyone relying on the output directory **without** specifying an explicit `run_id` to a `ROAR` facade should now expect to see the output directory at `run_0` instead of `run_1`. See #827
+* Updated the signature of the `ROAR` facade to match with it's parent class `SMAC4AC`.
+Anyone relying on the output directory **without** specifying an explicit `run_id` to a `ROAR`
+facade should now expect to see the output directory at `run_0` instead of `run_1`. See #827
 
 ## Code-Quality
 * Updated and integrated flake8, mypy, black, and isort.
 
 ## Documentation
-* Uses `automl_sphinx_theme` now.
-* Better API.
+* SMAC uses `automl_sphinx_theme` now.
+* Better API view.
 
 
 # 1.2

diff --git a/examples/python/user_prior_mlp.py b/examples/python/user_prior_mlp.py
@@ -0,0 +1,166 @@
+"""
+HPO with User Priors over the Optimum
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Example for optimizing a Multi-Layer Perceptron (MLP) setting priors over the optimum on the
+hyperparameters. These priors are derived from user knowledge - from previous runs on similar
+tasks, common knowledge or intuition gained from manual tuning. To create the priors, we make
+use of the Normal and Beta Hyperparameters, as well as the "weights" property of the
+CategoricalHyperparameter. This can be integrated into the optimiztion for any SMAC facade,
+but we stick with SMAC4HPO here. To incorporate user priors into the optimization, 
+πBO (nolinkexistsyet) is used to bias the point selection strategy.
+
+MLP is used as the deep neural network.
+The digits datasetis chosen to optimize the average accuracy on 5-fold cross validation.
+"""
+
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+import warnings
+import numpy as np
+
+import ConfigSpace as CS
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+    BetaIntegerHyperparameter,
+    NormalFloatHyperparameter,
+)
+
+from sklearn.datasets import load_digits
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.model_selection import cross_val_score, StratifiedKFold
+from sklearn.neural_network import MLPClassifier
+
+from smac.configspace import ConfigurationSpace
+from smac.facade.smac_hpo_facade import SMAC4HPO
+from smac.facade.smac_bb_facade import SMAC4BB
+from smac.scenario.scenario import Scenario
+from smac.initial_design.random_configuration_design import RandomConfigurations
+
+__copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
+__license__ = "3-clause BSD"
+
+
+digits = load_digits()
+
+
+# Target Algorithm
+def mlp_from_cfg(cfg, seed):
+    """
+    Creates a MLP classifier from sklearn and fits the given data on it.
+
+    Parameters
+    ----------
+    cfg: Configuration
+        configuration chosen by smac
+    seed: int or RandomState
+        used to initialize the rf's random generator
+    budget: float
+        used to set max iterations for the MLP
+
+    Returns
+    -------
+    float
+    """
+
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=ConvergenceWarning)
+
+        mlp = MLPClassifier(
+            hidden_layer_sizes=[cfg["n_neurons"]] * cfg["n_layer"],
+            solver=cfg["optimizer"],
+            batch_size=cfg["batch_size"],
+            activation=cfg["activation"],
+            learning_rate_init=cfg["learning_rate_init"],
+            random_state=seed,
+        )
+
+        # returns the cross validation accuracy
+        cv = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True)  # to make CV splits consistent
+        score = cross_val_score(mlp, digits.data, digits.target, cv=cv, error_score="raise")
+
+    return 1 - np.mean(score)
+
+
+if __name__ == "__main__":
+    # Build Configuration Space which defines all parameters and their ranges.
+    # To illustrate different parameter types,
+    # we use continuous, integer and categorical parameters.
+    cs = ConfigurationSpace()
+
+    # We do not have an educated belief on the number of layers beforehand
+    # As such, the prior on the HP is uniform
+    n_layer = UniformIntegerHyperparameter("n_layer", lower=1, upper=5)
+
+    # We believe the optimal network is likely going to be relatively wide,
+    # And place a Beta Prior skewed towards wider networks in log space
+    n_neurons = BetaIntegerHyperparameter("n_neurons", lower=8, upper=1024, alpha=4, beta=2, log=True)
+
+    # We believe that ReLU is likely going to be the optimal activation function about
+    # 60% of the time, and thus place weight on that accordingly
+    activation = CategoricalHyperparameter(
+        "activation", ["logistic", "tanh", "relu"], weights=[1, 1, 3], default_value="relu"
+    )
+
+    # Moreover, we believe ADAM is the most likely optimizer
+    optimizer = CategoricalHyperparameter("optimizer", ["sgd", "adam"], weights=[1, 2], default_value="adam")
+
+    # We do not have an educated opinion on the batch size, and thus leave it as-is
+    batch_size = UniformIntegerHyperparameter("batch_size", 16, 512, default_value=128)
+
+    # We place a log-normal prior on the learning rate, so that it is centered on 10^-3,
+    # with one unit of standard deviation per multiple of 10 (in log space)
+    learning_rate_init = NormalFloatHyperparameter(
+        "learning_rate_init", lower=1e-5, upper=1.0, mu=np.log(1e-3), sigma=np.log(10), log=True
+    )
+
+    # Add all hyperparameters at once:
+    cs.add_hyperparameters([n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init])
+
+    # SMAC scenario object
+    scenario = Scenario(
+        {
+            "run_obj": "quality",  # we optimize quality (alternative to runtime)
+            "runcount-limit": 20,  # max duration to run the optimization (in seconds)
+            "cs": cs,  # configuration space
+            "deterministic": "true",
+            "limit_resources": True,  # Uses pynisher to limit memory and runtime
+            # Alternatively, you can also disable this.
+            # Then you should handle runtime and memory yourself in the TA
+            "cutoff": 30,  # runtime limit for target algorithm
+            "memory_limit": 3072,  # adapt this to reasonable value for your hardware
+        }
+    )
+
+    # The rate at which SMAC forgets the prior. The higher the value, the more the prior is considered.
+    # Defaults to # n_iterations / 10
+    user_prior_kwargs = {"decay_beta": 1.5}
+
+    # To optimize, we pass the function to the SMAC-object
+    smac = SMAC4HPO(
+        scenario=scenario,
+        rng=np.random.RandomState(42),
+        tae_runner=mlp_from_cfg,
+        user_priors=True,  # This flag is required to conduct the optimisation using priors over the optimum
+        user_prior_kwargs=user_prior_kwargs,
+        initial_design=RandomConfigurations,  # Using random configurations will cause the initialization to be samples drawn from the prior
+    )
+
+    # Example call of the function with default values
+    # It returns: Status, Cost, Runtime, Additional Infos
+    def_value = smac.get_tae_runner().run(config=cs.get_default_configuration(), seed=0)[1]
+
+    print("Value for default configuration: %.4f" % def_value)
+
+    # Start optimization
+    try:
+        incumbent = smac.optimize()
+    finally:
+        incumbent = smac.solver.incumbent
+
+    inc_value = smac.get_tae_runner().run(config=incumbent, seed=0)[1]
+
+    print("Optimized Value: %.4f" % inc_value)
diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ numpy>=1.7.1
 scipy>=1.7.0
 psutil
 pynisher>=0.4.1
-ConfigSpace>=0.4.14,<0.5
+ConfigSpace>=0.5.0
 joblib
 scikit-learn>=0.22.0
 pyrfr>=0.8.0

diff --git a/smac/epm/base_epm.py b/smac/epm/base_epm.py
@@ -301,3 +301,13 @@ def predict_marginalized_over_instances(self, X: np.ndarray) -> Tuple[np.ndarray
             var = var.reshape((-1, 1))
 
         return mean, var
+
+    def get_configspace(self) -> ConfigurationSpace:
+        """
+        Retrieves the ConfigurationSpace used for the model.
+
+        Returns
+        -------
+            self.configspace: The ConfigurationSpace of the model
+        """
+        return self.configspace
diff --git a/smac/epm/util_funcs.py b/smac/epm/util_funcs.py
@@ -5,8 +5,12 @@
 import numpy as np
 from ConfigSpace import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
+    BetaFloatHyperparameter,
+    BetaIntegerHyperparameter,
     CategoricalHyperparameter,
     Constant,
+    NormalFloatHyperparameter,
+    NormalIntegerHyperparameter,
     OrdinalHyperparameter,
     UniformFloatHyperparameter,
     UniformIntegerHyperparameter,
@@ -22,7 +26,9 @@ def get_types(
     config_space: ConfigurationSpace,
     instance_features: typing.Optional[np.ndarray] = None,
 ) -> typing.Tuple[typing.List[int], typing.List[typing.Tuple[float, float]]]:
-    """TODO."""
+    """Return the types of the hyperparameters and the bounds of the
+    hyperparameters and instance features.
+    """
     # Extract types vector for rf from config space and the bounds
     types = [0] * len(config_space.get_hyperparameters())
     bounds = [(np.nan, np.nan)] * len(types)
@@ -40,15 +46,13 @@ def get_types(
                 n_cats = len(param.choices) + 1
             types[i] = n_cats
             bounds[i] = (int(n_cats), np.nan)
-
         elif isinstance(param, (OrdinalHyperparameter)):
             n_cats = len(param.sequence)
             types[i] = 0
             if can_be_inactive:
                 bounds[i] = (0, int(n_cats))
             else:
                 bounds[i] = (0, int(n_cats) - 1)
-
         elif isinstance(param, Constant):
             # for constants we simply set types to 0 which makes it a numerical
             # parameter
@@ -71,13 +75,37 @@ def get_types(
                 bounds[i] = (-1.0, 1.0)
             else:
                 bounds[i] = (0, 1.0)
+        elif isinstance(param, NormalFloatHyperparameter):
+            if can_be_inactive:
+                raise ValueError("Inactive parameters not supported for Beta and Normal Hyperparameters")
+
+            bounds[i] = (param._lower, param._upper)
+        elif isinstance(param, NormalIntegerHyperparameter):
+            if can_be_inactive:
+                raise ValueError("Inactive parameters not supported for Beta and Normal Hyperparameters")
+
+            bounds[i] = (param.nfhp._lower, param.nfhp._upper)
+        elif isinstance(param, BetaFloatHyperparameter):
+            if can_be_inactive:
+                raise ValueError("Inactive parameters not supported for Beta and Normal Hyperparameters")
+
+            bounds[i] = (param._lower, param._upper)
+        elif isinstance(param, BetaIntegerHyperparameter):
+            if can_be_inactive:
+                raise ValueError("Inactive parameters not supported for Beta and Normal Hyperparameters")
+
+            bounds[i] = (param.bfhp._lower, param.bfhp._upper)
         elif not isinstance(
             param,
             (
                 UniformFloatHyperparameter,
                 UniformIntegerHyperparameter,
                 OrdinalHyperparameter,
                 CategoricalHyperparameter,
+                NormalFloatHyperparameter,
+                NormalIntegerHyperparameter,
+                BetaFloatHyperparameter,
+                BetaIntegerHyperparameter,
             ),
         ):
             raise TypeError("Unknown hyperparameter type %s" % type(param))

diff --git a/smac/facade/smac_ac_facade.py b/smac/facade/smac_ac_facade.py
@@ -33,9 +33,11 @@
     AbstractAcquisitionFunction,
     IntegratedAcquisitionFunction,
     LogEI,
+    PriorAcquisitionFunction,
 )
 from smac.optimizer.ei_optimization import (
     AcquisitionFunctionMaximizer,
+    LocalAndSortedPriorRandomSearch,
     LocalAndSortedRandomSearch,
 )
 from smac.optimizer.multi_objective.abstract_multi_objective_algorithm import (
@@ -121,6 +123,11 @@ class SMAC4AC(object):
     integrate_acquisition_function : bool, default=False
         Whether to integrate the acquisition function. Works only with models which can sample their
         hyperparameters (i.e. GaussianProcessMCMC).
+    user_priors : bool, default=False
+        Whether to make use of user priors in the optimization procedure, using PriorAcquisitionFunction.
+    user_prior_kwargs : Optional[Dict]
+        Dictionary to pass specific arguments to optimization with prior, e.g. prior confidence parameter,
+        and the floor value for the prior (lowest possible value the prior can take).
     acquisition_function_optimizer : ~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
         Object that implements the :class:`~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
         Will use :class:`smac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
@@ -199,6 +206,8 @@ def __init__(
         acquisition_function: Optional[Type[AbstractAcquisitionFunction]] = None,
         acquisition_function_kwargs: Optional[Dict] = None,
         integrate_acquisition_function: bool = False,
+        user_priors: bool = False,
+        user_prior_kwargs: Optional[Dict] = None,
         acquisition_function_optimizer: Optional[Type[AcquisitionFunctionMaximizer]] = None,
         acquisition_function_optimizer_kwargs: Optional[Dict] = None,
         model: Optional[Type[AbstractEPM]] = None,
@@ -361,12 +370,32 @@ def __init__(
                 **acq_def_kwargs,
             )
 
+        if user_priors:
+            if user_prior_kwargs is None:
+                user_prior_kwargs = {}
+
+            # a solid default value for decay_beta - empirically founded
+            default_beta = scenario.ta_run_limit / 10  # type: ignore
+            discretize = isinstance(model_instance, (RandomForestWithInstances, RFRImputator))
+            user_prior_kwargs["decay_beta"] = user_prior_kwargs.get("decay_beta", default_beta)
+            user_prior_kwargs["discretize"] = discretize
+
+            acquisition_function_instance = PriorAcquisitionFunction(
+                acquisition_function=acquisition_function_instance,  # type: ignore
+                **user_prior_kwargs,
+                **acq_def_kwargs,  # type: ignore
+            )
+            acquisition_function_optimizer = LocalAndSortedPriorRandomSearch
+
         # initialize optimizer on acquisition function
         acq_func_opt_kwargs = {
             "acquisition_function": acquisition_function_instance,
             "config_space": scenario.cs,  # type: ignore[attr-defined] # noqa F821
             "rng": rng,
         }
+        if user_priors:
+            acq_func_opt_kwargs["uniform_config_space"] = scenario.cs.remove_hyperparameter_priors()  # type: ignore
+
         if acquisition_function_optimizer_kwargs is not None:
             acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs)
         if acquisition_function_optimizer is None: