experimental-design · jduerholt · Apr 12, 2024 · Feb 21, 2024 · Feb 27, 2024 · Feb 27, 2024
diff --git a/.gitignore b/.gitignore
@@ -133,3 +133,7 @@ dmypy.json
 
 # generated version file
 bofire/version.py
+
+# OS generated files
+.DS_Store
+.DS_Store?
diff --git a/bofire/benchmarks/single.py b/bofire/benchmarks/single.py
@@ -17,6 +17,7 @@
     ContinuousInput,
     ContinuousOutput,
     DiscreteInput,
+    TaskInput,
 )
 from bofire.data_models.objectives.api import MaximizeObjective, MinimizeObjective
 from bofire.utils.torch_tools import tkwargs
@@ -375,6 +376,84 @@ def get_optima(self) -> pd.DataFrame:
         )
 
 
+class MultiFidelityHimmelblau(Benchmark):
+    """Himmelblau function for testing optimization algorithms
+    Link to the definition: https://en.wikipedia.org/wiki/Himmelblau%27s_function
+    """
+
+    def __init__(self, use_constraints: bool = False, **kwargs):
+        """Initialiszes class of type Himmelblau.
+
+        Args:
+            best_possible_f (float, optional): Not implemented yet. Defaults to 0.0.
+            use_constraints (bool, optional): Whether constraints should be used or not (Not implemented yet.). Defaults to False.
+
+        Raises:
+            ValueError: As constraints are not implemeted yet, a True value for use_constraints yields a ValueError.
+        """
+        super().__init__(**kwargs)
+        self.use_constraints = use_constraints
+        inputs = []
+
+        inputs.append(TaskInput(key="task_id", categories=["task_1", "task_2"]))
+        inputs.append(ContinuousInput(key="x_1", bounds=(-6, 6)))
+        inputs.append(ContinuousInput(key="x_2", bounds=(-6, 6)))
+
+        objective = MinimizeObjective(w=1.0)
+        output_feature = ContinuousOutput(key="y", objective=objective)
+        if self.use_constraints:
+            raise ValueError("Not implemented yet!")
+        self._domain = Domain(
+            inputs=Inputs(features=inputs),
+            outputs=Outputs(features=[output_feature]),
+        )
+
+    def _f(self, X: pd.DataFrame, **kwargs) -> pd.DataFrame:
+        """Evaluates benchmark function.
+
+        Args:
+            X (pd.DataFrame): Input values. Columns are x_1 and x_2
+
+        Returns:
+            pd.DataFrame: y values of the function. Columns are y and valid_y.
+        """
+        # initialize y outputs
+        Y = pd.DataFrame({"y": np.zeros(len(X)), "valid_y": 0})
+        # evaluate task 1
+        X_temp = X.query("task_id == 'task_1'").eval(
+            "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=False
+        )
+        Y.loc[X_temp.index, "y"] = X_temp["y"]
+        Y.loc[X_temp.index, "valid_y"] = 1
+        # evaluate task 2
+        X_temp = X.query("task_id == 'task_2'").eval(
+            "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2) + x_1 * x_2", inplace=False
+        )
+        Y.loc[X_temp.index, "y"] = X_temp["y"]
+        Y.loc[X_temp.index, "valid_y"] = 1
+        return Y
+
+    def get_optima(self) -> pd.DataFrame:
+        """Returns positions of optima of the benchmark function.
+
+        Returns:
+            pd.DataFrame: x values of optima. Colums are x_1 and x_2
+        """
+        x = np.array(
+            [
+                [3.0, 2.0],
+                [-2.805118, 3.131312],
+                [-3.779310, -3.283186],
+                [3.584428, -1.848126],
+            ]
+        )
+        y = np.zeros(4)
+        return pd.DataFrame(
+            np.c_[x, y],
+            columns=self.domain.inputs.get_keys() + self.domain.outputs.get_keys(),
+        )
+
+
 class DiscreteHimmelblau(Himmelblau):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)

diff --git a/bofire/data_models/features/api.py b/bofire/data_models/features/api.py
@@ -13,6 +13,7 @@
     MolecularInput,
 )
 from bofire.data_models.features.numerical import NumericalInput
+from bofire.data_models.features.tasks import TaskInput
 
 AbstractFeature = Union[
     Feature,
@@ -32,6 +33,7 @@
     CategoricalDescriptorInput,
     MolecularInput,
     CategoricalMolecularInput,
+    TaskInput,
 ]
 
 AnyInput = Union[
@@ -42,6 +44,7 @@
     CategoricalDescriptorInput,
     MolecularInput,
     CategoricalMolecularInput,
+    TaskInput,
 ]
 
 AnyOutput = Union[ContinuousOutput, CategoricalOutput]
diff --git a/bofire/data_models/features/tasks.py b/bofire/data_models/features/tasks.py
@@ -0,0 +1,54 @@
+from typing import List, Literal, Optional
+
+import numpy as np
+from pydantic import Field, field_validator, model_validator
+from typing_extensions import Annotated
+
+from bofire.data_models.features.api import CategoricalInput, DiscreteInput
+
+
+class TaskInputDiscrete(DiscreteInput):
+    type: Literal["TaskInput"] = "TaskInput"
+    n_tasks: int
+    fidelities: List[int]
+
+    @field_validator("fidelities")
+    def validate_fidelities(cls, fidelities: List[int], values):
+        # if fidelities is None:
+        #    return [0 for _ in range(self.n_tasks)]
+        if len(fidelities) != values["n_tasks"]:
+            raise ValueError(
+                "Length of fidelity lists must be equal to the number of tasks"
+            )
+        if list(set(fidelities)) != list(range(np.max(fidelities) + 1)):
+            raise ValueError(
+                "Fidelities must be a list containing integers, starting from 0 and increasing by 1"
+            )
+        return fidelities
+
+    @model_validator(mode="before")
+    def validate_values(cls, values):
+        if "n_tasks" in values:
+            values["values"] = list(range(values["n_tasks"]))
+        return values
+
+
+class TaskInput(CategoricalInput):
+    type: Literal["TaskInputCategorical"] = "TaskInput"
+    fidelities: Annotated[Optional[List[int]], Field(validate_default=True)] = None
+
+    @field_validator("fidelities")
+    def validate_fidelities(cls, fidelities: List[int], values):
+        if "categories" in values.data:
+            n_tasks = len(values.data["categories"])
+            if fidelities is None:
+                return [0 for _ in range(n_tasks)]
+            if len(fidelities) != n_tasks:
+                raise ValueError(
+                    "Length of fidelity lists must be equal to the number of tasks"
+                )
+            if list(set(fidelities)) != list(range(np.max(fidelities) + 1)):
+                raise ValueError(
+                    "Fidelities must be a list containing integers, starting from 0 and increasing by 1"
+                )
+        return fidelities
diff --git a/bofire/data_models/priors/api.py b/bofire/data_models/priors/api.py
@@ -2,6 +2,7 @@
 from typing import Union
 
 from bofire.data_models.priors.gamma import GammaPrior
+from bofire.data_models.priors.lkj import LKJPrior
 from bofire.data_models.priors.normal import NormalPrior
 from bofire.data_models.priors.prior import Prior
 
@@ -25,3 +26,8 @@
 MBO_LENGTHCALE_PRIOR = partial(GammaPrior, concentration=2.0, rate=0.2)
 MBO_NOISE_PRIOR = partial(GammaPrior, concentration=2.0, rate=4.0)
 MBO_OUTPUTSCALE_PRIOR = partial(GammaPrior, concentration=2.0, rate=4.0)
+
+# prior for multitask kernel
+LKJ_PRIOR = partial(
+    LKJPrior, eta=2.0, sd_prior=GammaPrior(concentration=2.0, rate=0.15)
+)
diff --git a/bofire/data_models/priors/lkj.py b/bofire/data_models/priors/lkj.py
@@ -0,0 +1,21 @@
+from typing import Literal
+
+from pydantic import PositiveFloat
+
+from bofire.data_models.priors.gamma import GammaPrior
+from bofire.data_models.priors.prior import Prior
+
+
+class LKJPrior(Prior):
+    """LKJ prior over correlation matrices. Allows to specify the shape of the prior.
+
+    Attributes:
+        n(int): number of dimensions of the correlation matrix
+        eta(PositiveFloat): shape parameter of the LKJ distribution
+        sd_prior(Prior): prior over the standard deviations of the correlation matrix
+    """
+
+    type: Literal["LKJPrior"] = "LKJPrior"
+    eta: PositiveFloat
+    sd_prior: GammaPrior
+    n_tasks: int = 1
diff --git a/bofire/data_models/surrogates/api.py b/bofire/data_models/surrogates/api.py
@@ -18,6 +18,10 @@
         MixedTanimotoGPSurrogate,
     )
     from bofire.data_models.surrogates.mlp import MLPEnsemble
+    from bofire.data_models.surrogates.multi_task_gp import (
+        MultiTaskGPHyperconfig,
+        MultiTaskGPSurrogate,
+    )
     from bofire.data_models.surrogates.polynomial import PolynomialSurrogate
     from bofire.data_models.surrogates.random_forest import RandomForestSurrogate
     from bofire.data_models.surrogates.single_task_gp import (
@@ -43,6 +47,7 @@
         LinearSurrogate,
         PolynomialSurrogate,
         TanimotoGPSurrogate,
+        MultiTaskGPSurrogate,
     ]
 
     AnyTrainableSurrogate = Union[
@@ -56,6 +61,7 @@
         LinearSurrogate,
         PolynomialSurrogate,
         TanimotoGPSurrogate,
+        MultiTaskGPSurrogate,
     ]
 except ImportError:
     # with the minimal installationwe don't have botorch

diff --git a/bofire/data_models/surrogates/multi_task_gp.py b/bofire/data_models/surrogates/multi_task_gp.py
@@ -0,0 +1,93 @@
+from typing import Literal, Optional
+
+import pandas as pd
+from pydantic import Field
+
+from bofire.data_models.domain.api import Inputs
+from bofire.data_models.enum import RegressionMetricsEnum
+from bofire.data_models.features.api import CategoricalInput
+from bofire.data_models.kernels.api import (
+    AnyKernel,
+    MaternKernel,
+    RBFKernel,
+)
+from bofire.data_models.priors.api import (
+    BOTORCH_LENGTHCALE_PRIOR,
+    BOTORCH_NOISE_PRIOR,
+    LKJ_PRIOR,
+    MBO_LENGTHCALE_PRIOR,
+    MBO_NOISE_PRIOR,
+    AnyPrior,
+)
+from bofire.data_models.priors.lkj import LKJPrior
+
+# from bofire.data_models.strategies.api import FactorialStrategy
+from bofire.data_models.surrogates.trainable import Hyperconfig
+from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate
+
+
+class MultiTaskGPHyperconfig(Hyperconfig):
+    type: Literal["MultiTaskGPHyperconfig"] = "MultiTaskGPHyperconfig"
+    inputs: Inputs = Inputs(
+        features=[
+            CategoricalInput(
+                key="kernel", categories=["rbf", "matern_1.5", "matern_2.5"]
+            ),
+            CategoricalInput(key="prior", categories=["mbo", "botorch"]),
+            CategoricalInput(key="ard", categories=["True", "False"]),
+        ]
+    )
+    target_metric: RegressionMetricsEnum = RegressionMetricsEnum.MAE
+    hyperstrategy: Literal[
+        "FactorialStrategy", "SoboStrategy", "RandomStrategy"
+    ] = "FactorialStrategy"
+
+    @staticmethod
+    def _update_hyperparameters(
+        surrogate_data: "MultiTaskGPSurrogate", hyperparameters: pd.Series
+    ):
+        def matern_25(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel:
+            return MaternKernel(nu=2.5, lengthscale_prior=lengthscale_prior, ard=ard)
+
+        def matern_15(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel:
+            return MaternKernel(nu=1.5, lengthscale_prior=lengthscale_prior, ard=ard)
+
+        if hyperparameters.prior == "mbo":
+            noise_prior, lengthscale_prior = (MBO_NOISE_PRIOR(), MBO_LENGTHCALE_PRIOR())
+        else:
+            noise_prior, lengthscale_prior = (
+                BOTORCH_NOISE_PRIOR(),
+                BOTORCH_LENGTHCALE_PRIOR(),
+            )
+
+        surrogate_data.noise_prior = noise_prior
+        if hyperparameters.kernel == "rbf":
+            surrogate_data.kernel = (
+                RBFKernel(ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior),
+            )
+        elif hyperparameters.kernel == "matern_2.5":
+            surrogate_data.kernel = matern_25(
+                ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior
+            )
+        elif hyperparameters.kernel == "matern_1.5":
+            surrogate_data.kernel = matern_15(
+                ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior
+            )
+        else:
+            raise ValueError(f"Kernel {hyperparameters.kernel} not known.")
+
+
+class MultiTaskGPSurrogate(TrainableBotorchSurrogate):
+    type: Literal["MultiTaskGPSurrogate"] = "MultiTaskGPSurrogate"
+    kernel: AnyKernel = Field(
+        default_factory=lambda: MaternKernel(
+            ard=True,
+            nu=2.5,
+            lengthscale_prior=BOTORCH_LENGTHCALE_PRIOR(),
+        )
+    )
+    noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
+    lkj_prior: LKJPrior = Field(default_factory=lambda: LKJ_PRIOR())
+    hyperconfig: Optional[MultiTaskGPHyperconfig] = Field(
+        default_factory=lambda: MultiTaskGPHyperconfig()
+    )
diff --git a/bofire/priors/mapper.py b/bofire/priors/mapper.py
@@ -13,9 +13,16 @@ def map_GammaPrior(data_model: data_models.GammaPrior) -> gpytorch.priors.GammaP
     )
 
 
+def map_LKJPrior(data_model: data_models.LKJPrior) -> gpytorch.priors.LKJPrior:
+    return gpytorch.priors.LKJCovariancePrior(
+        n=data_model.n_tasks, eta=data_model.eta, sd_prior=map(data_model.sd_prior)
+    )
+
+
 PRIOR_MAP = {
     data_models.NormalPrior: map_NormalPrior,
     data_models.GammaPrior: map_GammaPrior,
+    data_models.LKJPrior: map_LKJPrior,
 }
 
 

diff --git a/bofire/surrogates/mapper.py b/bofire/surrogates/mapper.py
@@ -6,6 +6,7 @@
 from bofire.surrogates.mixed_single_task_gp import MixedSingleTaskGPSurrogate
 from bofire.surrogates.mixed_tanimoto_gp import MixedTanimotoGPSurrogate
 from bofire.surrogates.mlp import MLPEnsemble
+from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
 from bofire.surrogates.random_forest import RandomForestSurrogate
 from bofire.surrogates.single_task_gp import SingleTaskGPSurrogate
 from bofire.surrogates.surrogate import Surrogate
@@ -23,6 +24,7 @@
     data_models.LinearSurrogate: SingleTaskGPSurrogate,
     data_models.PolynomialSurrogate: SingleTaskGPSurrogate,
     data_models.TanimotoGPSurrogate: SingleTaskGPSurrogate,
+    data_models.MultiTaskGPSurrogate: MultiTaskGPSurrogate,
 }