Skip to content

Commit

Permalink
Implement possibility to run hyperparameter opts in the strategy (#287)
Browse files Browse the repository at this point in the history
  • Loading branch information
jduerholt authored Sep 21, 2023
1 parent da20f2d commit b320d85
Show file tree
Hide file tree
Showing 33 changed files with 849 additions and 741 deletions.
4 changes: 2 additions & 2 deletions bofire/benchmarks/api.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Union

from bofire.benchmarks.aspen_benchmark import Aspen_benchmark
from bofire.benchmarks.benchmark import Benchmark, GenericBenchmark, run
from bofire.benchmarks.hyperopt import Hyperopt, hyperoptimize
from bofire.benchmarks.benchmark import Benchmark, GenericBenchmark
from bofire.benchmarks.hyperopt import Hyperopt
from bofire.benchmarks.multi import C2DTLZ2, DTLZ2, ZDT1, CrossCoupling, SnarBenchmark
from bofire.benchmarks.single import Ackley, Branin, Branin30, Hartmann, Himmelblau

Expand Down
127 changes: 1 addition & 126 deletions bofire/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,14 @@
import json
import os
from abc import abstractmethod
from copy import deepcopy
from typing import Callable, List, Literal, Optional, Protocol, Tuple, Union
from typing import Callable, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd
from multiprocess.pool import Pool
from pydantic import Field, PositiveFloat
from scipy.stats import norm, uniform
from tqdm import tqdm
from typing_extensions import Annotated

import bofire.strategies.api as strategies
from bofire.data_models.base import BaseModel
from bofire.data_models.domain.api import Domain
from bofire.data_models.strategies.api import AnyStrategy


class OutlierPrior(BaseModel):
Expand Down Expand Up @@ -100,121 +93,3 @@ def __init__(

def _f(self, candidates: pd.DataFrame) -> pd.DataFrame:
return self.func(candidates)


class StrategyFactory(Protocol):
def __call__(self, domain: Domain) -> AnyStrategy:
...


def _single_run(
run_idx: int,
benchmark: Benchmark,
strategy_factory: StrategyFactory,
n_iterations: int,
metric: Callable[[Domain, pd.DataFrame], float],
n_candidates_per_proposals: int,
safe_intervall: int,
initial_sampler: Optional[
Union[Callable[[Domain], pd.DataFrame], pd.DataFrame]
] = None,
) -> Tuple[pd.DataFrame, pd.Series]:
def autosafe_results(benchmark):
"""Safes results into a .json file to prevent data loss during time-expensive optimization runs.
Autosave should operate every 10 iterations.
Args:
benchmark: Benchmark function that is suposed be evaluated.
"""

benchmark_name = benchmark.__class__.__name__
# Create a folder for autosaves, if not already exists.
if not os.path.exists("bofire_autosaves/" + benchmark_name):
os.makedirs("bofire_autosaves/" + benchmark_name)

filename = (
"bofire_autosaves/" + benchmark_name + "/run" + str(run_idx) + ".json"
)
parsed_domain = benchmark.domain.json()
with open(filename, "w") as file:
json.dump(parsed_domain, file)

# sample initial values
if initial_sampler is not None:
if isinstance(initial_sampler, Callable):
X = initial_sampler(benchmark.domain)
XY = benchmark.f(X, return_complete=True)
else:
XY = initial_sampler
strategy_data = strategy_factory(domain=benchmark.domain)
# map it
strategy = strategies.map(strategy_data) # type: ignore
# tell it
if initial_sampler is not None:
strategy.tell(XY) # type: ignore
metric_values = np.zeros(n_iterations)
pbar = tqdm(range(n_iterations), position=run_idx)
for i in pbar:
X = strategy.ask(candidate_count=n_candidates_per_proposals)
X = X[benchmark.domain.inputs.get_keys()]
Y = benchmark.f(X)
XY = pd.concat([X, Y], axis=1)
# pd.concat() changes datatype of str to np.int32 if column contains whole numbers.
# colum needs to be converted back to str to be added to the benchmark domain.
strategy.tell(XY)
metric_values[i] = metric(strategy.domain, strategy.experiments) # type: ignore
pbar.set_description(
f"run {run_idx:02d} with current best {metric_values[i]:0.3f}"
)
if (i + 1) % safe_intervall == 0:
autosafe_results(benchmark=benchmark)
return strategy.experiments, pd.Series(metric_values) # type: ignore


def run(
benchmark: Benchmark,
strategy_factory: StrategyFactory,
n_iterations: int,
metric: Callable[[Domain, pd.DataFrame], float],
initial_sampler: Optional[Callable[[Domain], pd.DataFrame]] = None,
n_candidates_per_proposal: int = 1,
n_runs: int = 5,
n_procs: int = 5,
safe_intervall: int = 1000,
) -> List[Tuple[pd.DataFrame, pd.Series]]:
"""Run a benchmark problem several times in parallel
Args:
benchmark: problem to be benchmarked
strategy_factory: creates the strategy to be benchmarked on the benchmark problem
n_iterations: number of times the strategy is asked
metric: measure of success, e.g, best value found so far for single objective or
hypervolume for multi-objective
initial_sampler: Creates initial data
n_candidates: also known as batch size, number of proposals made at once by the strategy
n_runs: number of runs
n_procs: number of parallel processes to execute the runs
Returns:
per run, a tuple with the benchmark object containing the proposed data and metric values
"""

def make_args(run_idx: int):
return (
run_idx,
deepcopy(benchmark),
strategy_factory,
n_iterations,
metric,
n_candidates_per_proposal,
safe_intervall,
initial_sampler,
)

if n_procs == 1:
results = [_single_run(*make_args(i)) for i in range(n_runs)]
else:
p = Pool(min(n_procs, n_runs))
results = [p.apply_async(_single_run, make_args(i)) for i in range(n_runs)]
results = [r.get() for r in results]
return results
84 changes: 2 additions & 82 deletions bofire/benchmarks/hyperopt.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
import warnings
from typing import Optional, Tuple
from typing import Optional

import pandas as pd

import bofire.strategies.api as strategies
import bofire.surrogates.api as surrogates
from bofire.benchmarks.benchmark import Benchmark, run
from bofire.benchmarks.benchmark import Benchmark
from bofire.data_models.domain.api import Domain
from bofire.data_models.enum import RegressionMetricsEnum
from bofire.data_models.objectives.api import MinimizeObjective
from bofire.data_models.strategies.api import (
FactorialStrategy,
RandomStrategy,
SoboStrategy,
)
from bofire.data_models.surrogates.api import AnyTrainableSurrogate


Expand Down Expand Up @@ -59,74 +50,3 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame:
)
results[f"valid_{self.target_metric.value}"] = 1 # type: ignore
return results # type: ignore


def hyperoptimize(
surrogate_data: AnyTrainableSurrogate,
training_data: pd.DataFrame,
folds: int,
random_state: Optional[int] = None,
) -> Tuple[AnyTrainableSurrogate, pd.DataFrame]:
if surrogate_data.hyperconfig is None:
warnings.warn(
"No hyperopt is possible as no hyperopt config is available. Returning initial config."
)
return surrogate_data, pd.DataFrame({e.name: [] for e in RegressionMetricsEnum})

def best(domain: Domain, experiments: pd.DataFrame) -> float:
return (
experiments[domain.outputs[0].key].min()
if isinstance(domain.outputs[0].objective, MinimizeObjective)
else experiments[domain.outputs[0].key].max()
)

def sample(domain):
datamodel = RandomStrategy(domain=domain)
sampler = strategies.map(data_model=datamodel)
sampled = sampler.ask(len(domain.inputs) + 1)
return sampled

benchmark = Hyperopt(
surrogate_data=surrogate_data,
training_data=training_data,
folds=folds,
random_state=random_state,
)

if surrogate_data.hyperconfig.hyperstrategy == "FactorialStrategy": # type: ignore
strategy = strategies.map(FactorialStrategy(domain=benchmark.domain))
experiments = benchmark.f(
strategy.ask(candidate_count=None), return_complete=True
)
else:
experiments = run(
benchmark=benchmark,
strategy_factory=RandomStrategy
if surrogate_data.hyperconfig.hyperstrategy == "RandomStrategy" # type: ignore
else SoboStrategy, # type: ignore
metric=best,
n_runs=1,
n_iterations=surrogate_data.hyperconfig.n_iterations # type: ignore
- len(benchmark.domain.inputs)
- 1,
initial_sampler=sample,
n_procs=1,
)[0][0]

# analyze the results and get the best
experiments = experiments.sort_values(
by=benchmark.target_metric.name,
ascending=True
if isinstance(benchmark.domain.outputs[0].objective, MinimizeObjective)
else False,
)

surrogate_data.update_hyperparameters(experiments.iloc[0])

return (
surrogate_data,
experiments[
surrogate_data.hyperconfig.domain.inputs.get_keys()
+ [e.name for e in RegressionMetricsEnum]
],
)
12 changes: 7 additions & 5 deletions bofire/data_models/strategies/predictives/botorch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional, Type
from typing import Annotated, Optional, Type

from pydantic import PositiveInt, root_validator, validator
from pydantic import Field, PositiveInt, root_validator, validator

from bofire.data_models.constraints.api import (
Constraint,
Expand All @@ -10,9 +10,7 @@
from bofire.data_models.domain.api import Domain, Outputs
from bofire.data_models.enum import CategoricalEncodingEnum, CategoricalMethodEnum
from bofire.data_models.features.api import CategoricalDescriptorInput, CategoricalInput
from bofire.data_models.outlier_detection.api import (
OutlierDetections,
)
from bofire.data_models.outlier_detection.api import OutlierDetections
from bofire.data_models.strategies.predictives.predictive import PredictiveStrategy
from bofire.data_models.surrogates.api import (
BotorchSurrogates,
Expand All @@ -33,9 +31,13 @@ class BotorchStrategy(PredictiveStrategy):
categorical_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE
discrete_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE
surrogate_specs: Optional[BotorchSurrogates] = None
# outlier detection params
outlier_detection_specs: Optional[OutlierDetections] = None
min_experiments_before_outlier_check: PositiveInt = 1
frequency_check: PositiveInt = 1
# hyperopt params
frequency_hyperopt: Annotated[int, Field(ge=0)] = 0 # 0 indicates no hyperopt
folds: int = 5

@classmethod
def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool:
Expand Down
Empty file added bofire/runners/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions bofire/runners/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from bofire.runners.hyperoptimize import hyperoptimize
from bofire.runners.run import run
88 changes: 88 additions & 0 deletions bofire/runners/hyperoptimize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import warnings
from typing import Optional, Tuple

import pandas as pd

import bofire.strategies.api as strategies
from bofire.benchmarks.api import Hyperopt
from bofire.data_models.domain.api import Domain
from bofire.data_models.enum import RegressionMetricsEnum
from bofire.data_models.objectives.api import MinimizeObjective
from bofire.data_models.strategies.api import (
FactorialStrategy,
RandomStrategy,
SoboStrategy,
)
from bofire.data_models.surrogates.api import AnyTrainableSurrogate
from bofire.runners.run import run


def hyperoptimize(
surrogate_data: AnyTrainableSurrogate,
training_data: pd.DataFrame,
folds: int,
random_state: Optional[int] = None,
) -> Tuple[AnyTrainableSurrogate, pd.DataFrame]:
if surrogate_data.hyperconfig is None:
warnings.warn(
"No hyperopt is possible as no hyperopt config is available. Returning initial config."
)
return surrogate_data, pd.DataFrame({e.name: [] for e in RegressionMetricsEnum})

def best(domain: Domain, experiments: pd.DataFrame) -> float:
return (
experiments[domain.outputs[0].key].min()
if isinstance(domain.outputs[0].objective, MinimizeObjective)
else experiments[domain.outputs[0].key].max()
)

def sample(domain):
datamodel = RandomStrategy(domain=domain)
sampler = strategies.map(data_model=datamodel)
sampled = sampler.ask(len(domain.inputs) + 1)
return sampled

benchmark = Hyperopt(
surrogate_data=surrogate_data,
training_data=training_data,
folds=folds,
random_state=random_state,
)

if surrogate_data.hyperconfig.hyperstrategy == "FactorialStrategy": # type: ignore
strategy = strategies.map(FactorialStrategy(domain=benchmark.domain))
experiments = benchmark.f(
strategy.ask(candidate_count=None), return_complete=True
)
else:
experiments = run(
benchmark=benchmark,
strategy_factory=RandomStrategy
if surrogate_data.hyperconfig.hyperstrategy == "RandomStrategy" # type: ignore
else SoboStrategy, # type: ignore
metric=best,
n_runs=1,
n_iterations=surrogate_data.hyperconfig.n_iterations # type: ignore
- len(benchmark.domain.inputs)
- 1,
initial_sampler=sample,
n_procs=1,
)[0][0]

# analyze the results and get the best
experiments = experiments.sort_values(
by=benchmark.target_metric.name,
ascending=True
if isinstance(benchmark.domain.outputs[0].objective, MinimizeObjective)
else False,
)

surrogate_data.update_hyperparameters(experiments.iloc[0])

return (
surrogate_data,
experiments[
surrogate_data.hyperconfig.domain.inputs.get_keys()
+ [e.name for e in RegressionMetricsEnum]
],
)
Loading

0 comments on commit b320d85

Please sign in to comment.