Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial attempt to incorporate MultiTask GPs #353

Merged
merged 18 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,7 @@ dmypy.json

# generated version file
bofire/version.py

# OS generated files
.DS_Store
.DS_Store?
79 changes: 79 additions & 0 deletions bofire/benchmarks/single.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
ContinuousInput,
ContinuousOutput,
DiscreteInput,
TaskInput,
)
from bofire.data_models.objectives.api import MaximizeObjective, MinimizeObjective
from bofire.utils.torch_tools import tkwargs
Expand Down Expand Up @@ -375,6 +376,84 @@ def get_optima(self) -> pd.DataFrame:
)


class MultiFidelityHimmelblau(Benchmark):
"""Himmelblau function for testing optimization algorithms
Link to the definition: https://en.wikipedia.org/wiki/Himmelblau%27s_function
"""

def __init__(self, use_constraints: bool = False, **kwargs):
"""Initialiszes class of type Himmelblau.

Args:
best_possible_f (float, optional): Not implemented yet. Defaults to 0.0.
use_constraints (bool, optional): Whether constraints should be used or not (Not implemented yet.). Defaults to False.

Raises:
ValueError: As constraints are not implemeted yet, a True value for use_constraints yields a ValueError.
"""
super().__init__(**kwargs)
self.use_constraints = use_constraints
inputs = []

inputs.append(TaskInput(key="task_id", categories=["task_1", "task_2"]))
inputs.append(ContinuousInput(key="x_1", bounds=(-6, 6)))
inputs.append(ContinuousInput(key="x_2", bounds=(-6, 6)))

objective = MinimizeObjective(w=1.0)
output_feature = ContinuousOutput(key="y", objective=objective)
if self.use_constraints:
raise ValueError("Not implemented yet!")
self._domain = Domain(
inputs=Inputs(features=inputs),
outputs=Outputs(features=[output_feature]),
)

def _f(self, X: pd.DataFrame, **kwargs) -> pd.DataFrame:
"""Evaluates benchmark function.

Args:
X (pd.DataFrame): Input values. Columns are x_1 and x_2

Returns:
pd.DataFrame: y values of the function. Columns are y and valid_y.
"""
# initialize y outputs
Y = pd.DataFrame({"y": np.zeros(len(X)), "valid_y": 0})
# evaluate task 1
X_temp = X.query("task_id == 'task_1'").eval(
"y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=False
)
Y.loc[X_temp.index, "y"] = X_temp["y"]
Y.loc[X_temp.index, "valid_y"] = 1
# evaluate task 2
X_temp = X.query("task_id == 'task_2'").eval(
"y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2) + x_1 * x_2", inplace=False
)
Y.loc[X_temp.index, "y"] = X_temp["y"]
Y.loc[X_temp.index, "valid_y"] = 1
return Y

def get_optima(self) -> pd.DataFrame:
"""Returns positions of optima of the benchmark function.

Returns:
pd.DataFrame: x values of optima. Colums are x_1 and x_2
"""
x = np.array(
[
[3.0, 2.0],
[-2.805118, 3.131312],
[-3.779310, -3.283186],
[3.584428, -1.848126],
]
)
y = np.zeros(4)
return pd.DataFrame(
np.c_[x, y],
columns=self.domain.inputs.get_keys() + self.domain.outputs.get_keys(),
)


class DiscreteHimmelblau(Himmelblau):
def __init__(self, **kwargs):
super().__init__(**kwargs)
Expand Down
3 changes: 3 additions & 0 deletions bofire/data_models/features/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
MolecularInput,
)
from bofire.data_models.features.numerical import NumericalInput
from bofire.data_models.features.tasks import TaskInput

AbstractFeature = Union[
Feature,
Expand All @@ -32,6 +33,7 @@
CategoricalDescriptorInput,
MolecularInput,
CategoricalMolecularInput,
TaskInput,
]

AnyInput = Union[
Expand All @@ -42,6 +44,7 @@
CategoricalDescriptorInput,
MolecularInput,
CategoricalMolecularInput,
TaskInput,
]

AnyOutput = Union[ContinuousOutput, CategoricalOutput]
54 changes: 54 additions & 0 deletions bofire/data_models/features/tasks.py
jduerholt marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from typing import List, Literal, Optional

import numpy as np
from pydantic import Field, field_validator, model_validator
from typing_extensions import Annotated

from bofire.data_models.features.api import CategoricalInput, DiscreteInput


class TaskInputDiscrete(DiscreteInput):
type: Literal["TaskInput"] = "TaskInput"
n_tasks: int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

n_tasks is not necessary if you base it on the CategoricalInput as it would be just the number of categories.

fidelities: List[int]

@field_validator("fidelities")
def validate_fidelities(cls, fidelities: List[int], values):
# if fidelities is None:
# return [0 for _ in range(self.n_tasks)]
if len(fidelities) != values["n_tasks"]:
raise ValueError(
"Length of fidelity lists must be equal to the number of tasks"
)
if list(set(fidelities)) != list(range(np.max(fidelities) + 1)):
raise ValueError(
"Fidelities must be a list containing integers, starting from 0 and increasing by 1"
)
return fidelities

@model_validator(mode="before")
def validate_values(cls, values):
if "n_tasks" in values:
values["values"] = list(range(values["n_tasks"]))
return values


class TaskInput(CategoricalInput):
type: Literal["TaskInputCategorical"] = "TaskInput"
jduerholt marked this conversation as resolved.
Show resolved Hide resolved
fidelities: Annotated[Optional[List[int]], Field(validate_default=True)] = None

@field_validator("fidelities")
def validate_fidelities(cls, fidelities: List[int], values):
if "categories" in values.data:
n_tasks = len(values.data["categories"])
if fidelities is None:
return [0 for _ in range(n_tasks)]
if len(fidelities) != n_tasks:
raise ValueError(
"Length of fidelity lists must be equal to the number of tasks"
)
if list(set(fidelities)) != list(range(np.max(fidelities) + 1)):
raise ValueError(
"Fidelities must be a list containing integers, starting from 0 and increasing by 1"
)
return fidelities
6 changes: 6 additions & 0 deletions bofire/data_models/priors/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Union

from bofire.data_models.priors.gamma import GammaPrior
from bofire.data_models.priors.lkj import LKJPrior
from bofire.data_models.priors.normal import NormalPrior
from bofire.data_models.priors.prior import Prior

Expand All @@ -25,3 +26,8 @@
MBO_LENGTHCALE_PRIOR = partial(GammaPrior, concentration=2.0, rate=0.2)
MBO_NOISE_PRIOR = partial(GammaPrior, concentration=2.0, rate=4.0)
MBO_OUTPUTSCALE_PRIOR = partial(GammaPrior, concentration=2.0, rate=4.0)

# prior for multitask kernel
LKJ_PRIOR = partial(
LKJPrior, eta=2.0, sd_prior=GammaPrior(concentration=2.0, rate=0.15)
)
21 changes: 21 additions & 0 deletions bofire/data_models/priors/lkj.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import Literal

from pydantic import PositiveFloat

from bofire.data_models.priors.gamma import GammaPrior
from bofire.data_models.priors.prior import Prior


class LKJPrior(Prior):
"""LKJ prior over correlation matrices. Allows to specify the shape of the prior.

Attributes:
n(int): number of dimensions of the correlation matrix
eta(PositiveFloat): shape parameter of the LKJ distribution
sd_prior(Prior): prior over the standard deviations of the correlation matrix
"""

type: Literal["LKJPrior"] = "LKJPrior"
eta: PositiveFloat
sd_prior: GammaPrior
n_tasks: int = 1
6 changes: 6 additions & 0 deletions bofire/data_models/surrogates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
MixedTanimotoGPSurrogate,
)
from bofire.data_models.surrogates.mlp import MLPEnsemble
from bofire.data_models.surrogates.multi_task_gp import (
MultiTaskGPHyperconfig,
MultiTaskGPSurrogate,
)
from bofire.data_models.surrogates.polynomial import PolynomialSurrogate
from bofire.data_models.surrogates.random_forest import RandomForestSurrogate
from bofire.data_models.surrogates.single_task_gp import (
Expand All @@ -43,6 +47,7 @@
LinearSurrogate,
PolynomialSurrogate,
TanimotoGPSurrogate,
MultiTaskGPSurrogate,
]

AnyTrainableSurrogate = Union[
Expand All @@ -56,6 +61,7 @@
LinearSurrogate,
PolynomialSurrogate,
TanimotoGPSurrogate,
MultiTaskGPSurrogate,
]
except ImportError:
# with the minimal installationwe don't have botorch
Expand Down
93 changes: 93 additions & 0 deletions bofire/data_models/surrogates/multi_task_gp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from typing import Literal, Optional

import pandas as pd
from pydantic import Field

from bofire.data_models.domain.api import Inputs
from bofire.data_models.enum import RegressionMetricsEnum
from bofire.data_models.features.api import CategoricalInput
from bofire.data_models.kernels.api import (
AnyKernel,
MaternKernel,
RBFKernel,
)
from bofire.data_models.priors.api import (
BOTORCH_LENGTHCALE_PRIOR,
BOTORCH_NOISE_PRIOR,
LKJ_PRIOR,
MBO_LENGTHCALE_PRIOR,
MBO_NOISE_PRIOR,
AnyPrior,
)
from bofire.data_models.priors.lkj import LKJPrior

# from bofire.data_models.strategies.api import FactorialStrategy
from bofire.data_models.surrogates.trainable import Hyperconfig
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class MultiTaskGPHyperconfig(Hyperconfig):
type: Literal["MultiTaskGPHyperconfig"] = "MultiTaskGPHyperconfig"
inputs: Inputs = Inputs(
features=[
CategoricalInput(
key="kernel", categories=["rbf", "matern_1.5", "matern_2.5"]
),
CategoricalInput(key="prior", categories=["mbo", "botorch"]),
CategoricalInput(key="ard", categories=["True", "False"]),
]
)
target_metric: RegressionMetricsEnum = RegressionMetricsEnum.MAE
hyperstrategy: Literal[
"FactorialStrategy", "SoboStrategy", "RandomStrategy"
] = "FactorialStrategy"

@staticmethod
def _update_hyperparameters(
surrogate_data: "MultiTaskGPSurrogate", hyperparameters: pd.Series
):
def matern_25(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel:
return MaternKernel(nu=2.5, lengthscale_prior=lengthscale_prior, ard=ard)

def matern_15(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel:
return MaternKernel(nu=1.5, lengthscale_prior=lengthscale_prior, ard=ard)

if hyperparameters.prior == "mbo":
noise_prior, lengthscale_prior = (MBO_NOISE_PRIOR(), MBO_LENGTHCALE_PRIOR())
else:
noise_prior, lengthscale_prior = (
BOTORCH_NOISE_PRIOR(),
BOTORCH_LENGTHCALE_PRIOR(),
)

surrogate_data.noise_prior = noise_prior
if hyperparameters.kernel == "rbf":
surrogate_data.kernel = (
RBFKernel(ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior),
)
elif hyperparameters.kernel == "matern_2.5":
surrogate_data.kernel = matern_25(
ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior
)
elif hyperparameters.kernel == "matern_1.5":
surrogate_data.kernel = matern_15(
ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior
)
else:
raise ValueError(f"Kernel {hyperparameters.kernel} not known.")


class MultiTaskGPSurrogate(TrainableBotorchSurrogate):
type: Literal["MultiTaskGPSurrogate"] = "MultiTaskGPSurrogate"
kernel: AnyKernel = Field(
default_factory=lambda: MaternKernel(
ard=True,
nu=2.5,
lengthscale_prior=BOTORCH_LENGTHCALE_PRIOR(),
)
)
noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
lkj_prior: LKJPrior = Field(default_factory=lambda: LKJ_PRIOR())
hyperconfig: Optional[MultiTaskGPHyperconfig] = Field(
default_factory=lambda: MultiTaskGPHyperconfig()
)
7 changes: 7 additions & 0 deletions bofire/priors/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,16 @@ def map_GammaPrior(data_model: data_models.GammaPrior) -> gpytorch.priors.GammaP
)


def map_LKJPrior(data_model: data_models.LKJPrior) -> gpytorch.priors.LKJPrior:
jduerholt marked this conversation as resolved.
Show resolved Hide resolved
return gpytorch.priors.LKJCovariancePrior(
n=data_model.n_tasks, eta=data_model.eta, sd_prior=map(data_model.sd_prior)
)


PRIOR_MAP = {
data_models.NormalPrior: map_NormalPrior,
data_models.GammaPrior: map_GammaPrior,
data_models.LKJPrior: map_LKJPrior,
}


Expand Down
2 changes: 2 additions & 0 deletions bofire/surrogates/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from bofire.surrogates.mixed_single_task_gp import MixedSingleTaskGPSurrogate
from bofire.surrogates.mixed_tanimoto_gp import MixedTanimotoGPSurrogate
from bofire.surrogates.mlp import MLPEnsemble
from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
from bofire.surrogates.random_forest import RandomForestSurrogate
from bofire.surrogates.single_task_gp import SingleTaskGPSurrogate
from bofire.surrogates.surrogate import Surrogate
Expand All @@ -23,6 +24,7 @@
data_models.LinearSurrogate: SingleTaskGPSurrogate,
data_models.PolynomialSurrogate: SingleTaskGPSurrogate,
data_models.TanimotoGPSurrogate: SingleTaskGPSurrogate,
data_models.MultiTaskGPSurrogate: MultiTaskGPSurrogate,
}


Expand Down
Loading