diff --git a/.gitignore b/.gitignore
index 6df9fafed..830f88f1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,3 +135,6 @@ dmypy.json
 # generated version file
 bofire/version.py
 
+# OS generated files
+.DS_Store
+.DS_Store?
diff --git a/bofire/benchmarks/api.py b/bofire/benchmarks/api.py
index 9d9dd1257..136a1bdac 100644
--- a/bofire/benchmarks/api.py
+++ b/bofire/benchmarks/api.py
@@ -4,7 +4,16 @@
 from bofire.benchmarks.benchmark import Benchmark, GenericBenchmark
 from bofire.benchmarks.hyperopt import Hyperopt
 from bofire.benchmarks.multi import C2DTLZ2, DTLZ2, ZDT1, CrossCoupling, SnarBenchmark
-from bofire.benchmarks.single import Ackley, Branin, Branin30, Hartmann, Himmelblau
+from bofire.benchmarks.single import (
+    Ackley,
+    Branin,
+    Branin30,
+    Hartmann,
+    Himmelblau,
+    MultiTaskHimmelblau,
+)
 
 AnyMultiBenchmark = Union[C2DTLZ2, DTLZ2, ZDT1, CrossCoupling, SnarBenchmark]
-AnySingleBenchmark = Union[Ackley, Branin, Branin30, Hartmann, Himmelblau]
+AnySingleBenchmark = Union[
+    Ackley, Branin, Branin30, Hartmann, Himmelblau, MultiTaskHimmelblau
+]
diff --git a/bofire/benchmarks/single.py b/bofire/benchmarks/single.py
index 565b216a7..f54e74deb 100644
--- a/bofire/benchmarks/single.py
+++ b/bofire/benchmarks/single.py
@@ -17,6 +17,7 @@
     ContinuousInput,
     ContinuousOutput,
     DiscreteInput,
+    TaskInput,
 )
 from bofire.data_models.objectives.api import MaximizeObjective, MinimizeObjective
 from bofire.utils.torch_tools import tkwargs
@@ -381,6 +382,82 @@ def get_optima(self) -> pd.DataFrame:
         )
 
 
+class MultiTaskHimmelblau(Benchmark):
+    """Himmelblau function for testing optimization algorithms
+    Link to the definition: https://en.wikipedia.org/wiki/Himmelblau%27s_function
+    """
+
+    def __init__(self, use_constraints: bool = False, **kwargs):
+        """Initialiszes class of type Himmelblau.
+
+        Args:
+            best_possible_f (float, optional): Not implemented yet. Defaults to 0.0.
+            use_constraints (bool, optional): Whether constraints should be used or not (Not implemented yet.). Defaults to False.
+
+        Raises:
+            ValueError: As constraints are not implemeted yet, a True value for use_constraints yields a ValueError.
+        """
+        super().__init__(**kwargs)
+        self.use_constraints = use_constraints
+        inputs = []
+
+        inputs.append(TaskInput(key="task_id", categories=["task_1", "task_2"]))
+        inputs.append(ContinuousInput(key="x_1", bounds=(-6, 6)))
+        inputs.append(ContinuousInput(key="x_2", bounds=(-6, 6)))
+
+        objective = MinimizeObjective(w=1.0)
+        output_feature = ContinuousOutput(key="y", objective=objective)
+        if self.use_constraints:
+            raise ValueError("Not implemented yet!")
+        self._domain = Domain(
+            inputs=Inputs(features=inputs),
+            outputs=Outputs(features=[output_feature]),
+        )
+
+    def _f(self, X: pd.DataFrame, **kwargs) -> pd.DataFrame:
+        """Evaluates benchmark function.
+
+        Args:
+            X (pd.DataFrame): Input values. Columns are x_1 and x_2
+
+        Returns:
+            pd.DataFrame: y values of the function. Columns are y and valid_y.
+        """
+        # initialize y outputs
+        Y = pd.DataFrame({"y": np.zeros(len(X)), "valid_y": 0})
+        # evaluate task 1
+        X_temp = X.query("task_id == 'task_1'").eval(
+            "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=False
+        )
+        Y.loc[X_temp.index, "y"] = X_temp["y"]
+        Y.loc[X_temp.index, "valid_y"] = 1
+        # evaluate task 2
+        X_temp = X.query("task_id == 'task_2'").eval(
+            "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2) + x_1 * x_2", inplace=False
+        )
+        Y.loc[X_temp.index, "y"] = X_temp["y"]
+        Y.loc[X_temp.index, "valid_y"] = 1
+        return Y
+
+    def get_optima(self) -> pd.DataFrame:
+        """Returns positions of optima of the benchmark function.
+
+        Returns:
+            pd.DataFrame: x values of optima. Colums are x_1, x_2, task_id
+        """
+        out = [
+            [3.0, 2.0, "task_1", 0],
+            [-2.805118, 3.131312, "task_1", 0],
+            [-3.779310, -3.283186, "task_1", 0],
+            [3.584428, -1.848126, "task_1", 0],
+        ]
+
+        return pd.DataFrame(
+            out,
+            columns=self.domain.inputs.get_keys() + self.domain.outputs.get_keys(),
+        )
+
+
 class DiscreteHimmelblau(Himmelblau):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
diff --git a/bofire/data_models/priors/api.py b/bofire/data_models/priors/api.py
index 37f80bca8..b7bea6466 100644
--- a/bofire/data_models/priors/api.py
+++ b/bofire/data_models/priors/api.py
@@ -2,6 +2,7 @@
 from typing import Union
 
 from bofire.data_models.priors.gamma import GammaPrior
+from bofire.data_models.priors.lkj import LKJPrior
 from bofire.data_models.priors.normal import NormalPrior
 from bofire.data_models.priors.prior import Prior
 
@@ -10,6 +11,7 @@
 AnyPrior = Union[
     GammaPrior,
     NormalPrior,
+    LKJPrior,
 ]
 
 # default priors of interest
@@ -25,3 +27,8 @@
 MBO_LENGTHCALE_PRIOR = partial(GammaPrior, concentration=2.0, rate=0.2)
 MBO_NOISE_PRIOR = partial(GammaPrior, concentration=2.0, rate=4.0)
 MBO_OUTPUTSCALE_PRIOR = partial(GammaPrior, concentration=2.0, rate=4.0)
+
+# prior for multitask kernel
+LKJ_PRIOR = partial(
+    LKJPrior, shape=2.0, sd_prior=GammaPrior(concentration=2.0, rate=0.15)
+)
diff --git a/bofire/data_models/priors/lkj.py b/bofire/data_models/priors/lkj.py
new file mode 100644
index 000000000..f1fb55ca1
--- /dev/null
+++ b/bofire/data_models/priors/lkj.py
@@ -0,0 +1,21 @@
+from typing import Literal
+
+from pydantic import PositiveFloat
+
+from bofire.data_models.priors.gamma import GammaPrior
+from bofire.data_models.priors.prior import Prior
+
+
+class LKJPrior(Prior):
+    """LKJ prior over correlation matrices. Allows to specify the shape of the prior.
+
+    Attributes:
+        n(int): number of dimensions of the correlation matrix
+        eta(PositiveFloat): shape parameter of the LKJ distribution
+        sd_prior(Prior): prior over the standard deviations of the correlation matrix
+    """
+
+    type: Literal["LKJPrior"] = "LKJPrior"
+    shape: PositiveFloat
+    sd_prior: GammaPrior
+    n_tasks: int = 2
diff --git a/bofire/data_models/surrogates/api.py b/bofire/data_models/surrogates/api.py
index d984cac7a..60d36b489 100644
--- a/bofire/data_models/surrogates/api.py
+++ b/bofire/data_models/surrogates/api.py
@@ -19,6 +19,10 @@
     MLPEnsemble,
     RegressionMLPEnsemble,
 )
+from bofire.data_models.surrogates.multi_task_gp import (
+    MultiTaskGPHyperconfig,
+    MultiTaskGPSurrogate,
+)
 from bofire.data_models.surrogates.polynomial import PolynomialSurrogate
 from bofire.data_models.surrogates.random_forest import RandomForestSurrogate
 from bofire.data_models.surrogates.scaler import ScalerEnum
@@ -48,6 +52,7 @@
     PolynomialSurrogate,
     TanimotoGPSurrogate,
     LinearDeterministicSurrogate,
+    MultiTaskGPSurrogate,
 ]
 
 AnyTrainableSurrogate = Union[
@@ -77,6 +82,7 @@
     PolynomialSurrogate,
     TanimotoGPSurrogate,
     LinearDeterministicSurrogate,
+    MultiTaskGPSurrogate,
 ]
 
 AnyClassificationSurrogate = ClassificationMLPEnsemble
diff --git a/bofire/data_models/surrogates/botorch.py b/bofire/data_models/surrogates/botorch.py
index f10917a58..a5c7039f7 100644
--- a/bofire/data_models/surrogates/botorch.py
+++ b/bofire/data_models/surrogates/botorch.py
@@ -15,6 +15,11 @@ class BotorchSurrogate(Surrogate):
     @field_validator("input_preprocessing_specs")
     @classmethod
     def validate_input_preprocessing_specs(cls, v, info):
+        # when validator for inputs fails, this validator is still checked and causes an Exception error instead of a ValueError
+        # fix this by checking if inputs is in info.data
+        if "inputs" not in info.data:
+            return
+
         inputs = info.data["inputs"]
         categorical_keys = inputs.get_keys(CategoricalInput, exact=True)
         descriptor_keys = inputs.get_keys(CategoricalDescriptorInput, exact=True)
diff --git a/bofire/data_models/surrogates/botorch_surrogates.py b/bofire/data_models/surrogates/botorch_surrogates.py
index 6314060f4..3b9594ee5 100644
--- a/bofire/data_models/surrogates/botorch_surrogates.py
+++ b/bofire/data_models/surrogates/botorch_surrogates.py
@@ -17,6 +17,7 @@
     ClassificationMLPEnsemble,
     RegressionMLPEnsemble,
 )
+from bofire.data_models.surrogates.multi_task_gp import MultiTaskGPSurrogate
 from bofire.data_models.surrogates.polynomial import PolynomialSurrogate
 from bofire.data_models.surrogates.random_forest import RandomForestSurrogate
 from bofire.data_models.surrogates.single_task_gp import SingleTaskGPSurrogate
@@ -36,6 +37,7 @@
     LinearSurrogate,
     PolynomialSurrogate,
     LinearDeterministicSurrogate,
+    MultiTaskGPSurrogate,
 ]
 
 
diff --git a/bofire/data_models/surrogates/multi_task_gp.py b/bofire/data_models/surrogates/multi_task_gp.py
new file mode 100644
index 000000000..66dbf83c1
--- /dev/null
+++ b/bofire/data_models/surrogates/multi_task_gp.py
@@ -0,0 +1,135 @@
+from typing import Literal, Optional, Type
+
+import pandas as pd
+from pydantic import Field, field_validator
+
+from bofire.data_models.domain.api import Inputs
+from bofire.data_models.enum import CategoricalEncodingEnum, RegressionMetricsEnum
+from bofire.data_models.features.api import (
+    AnyOutput,
+    CategoricalInput,
+    ContinuousOutput,
+    TaskInput,
+)
+from bofire.data_models.kernels.api import (
+    AnyKernel,
+    MaternKernel,
+    RBFKernel,
+)
+from bofire.data_models.priors.api import (
+    BOTORCH_LENGTHCALE_PRIOR,
+    BOTORCH_NOISE_PRIOR,
+    MBO_LENGTHCALE_PRIOR,
+    MBO_NOISE_PRIOR,
+    AnyPrior,
+)
+from bofire.data_models.priors.lkj import LKJPrior
+
+# from bofire.data_models.strategies.api import FactorialStrategy
+from bofire.data_models.surrogates.trainable import Hyperconfig
+from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate
+
+
+class MultiTaskGPHyperconfig(Hyperconfig):
+    type: Literal["MultiTaskGPHyperconfig"] = "MultiTaskGPHyperconfig"
+    inputs: Inputs = Inputs(
+        features=[
+            CategoricalInput(
+                key="kernel", categories=["rbf", "matern_1.5", "matern_2.5"]
+            ),
+            CategoricalInput(key="prior", categories=["mbo", "botorch"]),
+            CategoricalInput(key="ard", categories=["True", "False"]),
+        ]
+    )
+    target_metric: RegressionMetricsEnum = RegressionMetricsEnum.MAE
+    hyperstrategy: Literal[
+        "FactorialStrategy", "SoboStrategy", "RandomStrategy"
+    ] = "FactorialStrategy"
+
+    @staticmethod
+    def _update_hyperparameters(
+        surrogate_data: "MultiTaskGPSurrogate", hyperparameters: pd.Series
+    ):
+        def matern_25(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel:
+            return MaternKernel(nu=2.5, lengthscale_prior=lengthscale_prior, ard=ard)
+
+        def matern_15(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel:
+            return MaternKernel(nu=1.5, lengthscale_prior=lengthscale_prior, ard=ard)
+
+        if hyperparameters.prior == "mbo":
+            noise_prior, lengthscale_prior = (MBO_NOISE_PRIOR(), MBO_LENGTHCALE_PRIOR())
+        else:
+            noise_prior, lengthscale_prior = (
+                BOTORCH_NOISE_PRIOR(),
+                BOTORCH_LENGTHCALE_PRIOR(),
+            )
+
+        surrogate_data.noise_prior = noise_prior
+        if hyperparameters.kernel == "rbf":
+            surrogate_data.kernel = RBFKernel(
+                ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior
+            )
+        elif hyperparameters.kernel == "matern_2.5":
+            surrogate_data.kernel = matern_25(
+                ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior
+            )
+        elif hyperparameters.kernel == "matern_1.5":
+            surrogate_data.kernel = matern_15(
+                ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior
+            )
+        else:
+            raise ValueError(f"Kernel {hyperparameters.kernel} not known.")
+
+
+class MultiTaskGPSurrogate(TrainableBotorchSurrogate):
+    type: Literal["MultiTaskGPSurrogate"] = "MultiTaskGPSurrogate"
+    kernel: AnyKernel = Field(
+        default_factory=lambda: MaternKernel(
+            ard=True,
+            nu=2.5,
+            lengthscale_prior=BOTORCH_LENGTHCALE_PRIOR(),
+        )
+    )
+    noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
+    task_prior: Optional[LKJPrior] = Field(default_factory=lambda: None)
+    hyperconfig: Optional[MultiTaskGPHyperconfig] = Field(
+        default_factory=lambda: MultiTaskGPHyperconfig()
+    )
+
+    @classmethod
+    def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool:
+        """Abstract method to check output type for surrogate models
+        Args:
+            my_type: continuous or categorical output
+        Returns:
+            bool: True if the output type is valid for the surrogate chosen, False otherwise
+        """
+        return isinstance(my_type, type(ContinuousOutput))
+
+    @field_validator("inputs")
+    @classmethod
+    def validate_task_inputs(cls, inputs: Inputs):
+
+        if len(inputs.get_keys(TaskInput)) != 1:
+            raise ValueError("Exactly one task input is required for multi-task GPs.")
+        return inputs
+
+    @field_validator("input_preprocessing_specs")
+    @classmethod
+    def validate_encoding(cls, v, info):
+        # also validate that the task feature has ordinal encoding
+        if "inputs" not in info.data:
+            return v
+
+        if len(info.data["inputs"].get_keys(TaskInput)) == 0:
+            return v
+
+        task_feature_id = info.data["inputs"].get_keys(TaskInput)[0]
+        if v.get(task_feature_id) is None:
+            v[task_feature_id] = CategoricalEncodingEnum.ORDINAL
+        elif v[task_feature_id] != CategoricalEncodingEnum.ORDINAL:
+            raise ValueError(
+                f"The task feature {task_feature_id} has to be encoded as ordinal."
+            )
+
+        return v
diff --git a/bofire/priors/mapper.py b/bofire/priors/mapper.py
index 48e4cb028..1f782df22 100644
--- a/bofire/priors/mapper.py
+++ b/bofire/priors/mapper.py
@@ -13,9 +13,16 @@ def map_GammaPrior(data_model: data_models.GammaPrior) -> gpytorch.priors.GammaP
     )
 
 
+def map_LKJPrior(data_model: data_models.LKJPrior) -> gpytorch.priors.LKJPrior:
+    return gpytorch.priors.LKJCovariancePrior(
+        n=data_model.n_tasks, eta=data_model.shape, sd_prior=map(data_model.sd_prior)
+    )
+
+
 PRIOR_MAP = {
     data_models.NormalPrior: map_NormalPrior,
     data_models.GammaPrior: map_GammaPrior,
+    data_models.LKJPrior: map_LKJPrior,
 }
 
 
diff --git a/bofire/strategies/predictives/botorch.py b/bofire/strategies/predictives/botorch.py
index 87b347347..e413bbb10 100644
--- a/bofire/strategies/predictives/botorch.py
+++ b/bofire/strategies/predictives/botorch.py
@@ -110,10 +110,14 @@ def _get_optimizer_options(self) -> Dict[str, int]:
             Dict[str, int]: The dictionary with the settings.
         """
         return {
-            "batch_limit": self.batch_limit
-            if len(self.domain.constraints.get([NChooseKConstraint, ProductConstraint]))
-            == 0
-            else 1,  # type: ignore
+            "batch_limit": (
+                self.batch_limit
+                if len(
+                    self.domain.constraints.get([NChooseKConstraint, ProductConstraint])
+                )
+                == 0
+                else 1
+            ),  # type: ignore
             "maxiter": self.maxiter,
         }
 
diff --git a/bofire/strategies/random.py b/bofire/strategies/random.py
index 98e806a37..4dca36ab4 100644
--- a/bofire/strategies/random.py
+++ b/bofire/strategies/random.py
@@ -290,9 +290,9 @@ def _sample_from_polytope(
                 n=1,
                 q=n,
                 bounds=bounds.to(**tkwargs),
-                inequality_constraints=unfixed_ineqs
-                if len(unfixed_ineqs) > 0  # type: ignore
-                else None,
+                inequality_constraints=(
+                    unfixed_ineqs if len(unfixed_ineqs) > 0 else None  # type: ignore
+                ),
                 equality_constraints=combined_eqs if len(combined_eqs) > 0 else None,
                 n_burnin=n_burnin,
                 thinning=n_thinning,
diff --git a/bofire/surrogates/api.py b/bofire/surrogates/api.py
index 9115f2d19..5ca8ef9d9 100644
--- a/bofire/surrogates/api.py
+++ b/bofire/surrogates/api.py
@@ -9,6 +9,7 @@
     MLPEnsemble,
     RegressionMLPEnsemble,
 )
+from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
 from bofire.surrogates.random_forest import RandomForestSurrogate
 from bofire.surrogates.single_task_gp import SingleTaskGPSurrogate
 from bofire.surrogates.surrogate import Surrogate
diff --git a/bofire/surrogates/mapper.py b/bofire/surrogates/mapper.py
index 8c711e647..3dfb89384 100644
--- a/bofire/surrogates/mapper.py
+++ b/bofire/surrogates/mapper.py
@@ -7,6 +7,7 @@
 from bofire.surrogates.mixed_single_task_gp import MixedSingleTaskGPSurrogate
 from bofire.surrogates.mixed_tanimoto_gp import MixedTanimotoGPSurrogate
 from bofire.surrogates.mlp import ClassificationMLPEnsemble, RegressionMLPEnsemble
+from bofire.surrogates.multi_task_gp import MultiTaskGPSurrogate
 from bofire.surrogates.random_forest import RandomForestSurrogate
 from bofire.surrogates.single_task_gp import SingleTaskGPSurrogate
 from bofire.surrogates.surrogate import Surrogate
@@ -26,6 +27,7 @@
     data_models.PolynomialSurrogate: SingleTaskGPSurrogate,
     data_models.TanimotoGPSurrogate: SingleTaskGPSurrogate,
     data_models.LinearDeterministicSurrogate: LinearDeterministicSurrogate,
+    data_models.MultiTaskGPSurrogate: MultiTaskGPSurrogate,
 }
 
 
diff --git a/bofire/surrogates/multi_task_gp.py b/bofire/surrogates/multi_task_gp.py
new file mode 100644
index 000000000..f04f3433e
--- /dev/null
+++ b/bofire/surrogates/multi_task_gp.py
@@ -0,0 +1,107 @@
+import warnings
+from typing import Dict, Optional
+
+import botorch
+import numpy as np
+import pandas as pd
+import torch
+from botorch.fit import fit_gpytorch_mll
+from botorch.models.transforms.outcome import Standardize
+from gpytorch.mlls import ExactMarginalLogLikelihood
+
+import bofire.kernels.api as kernels
+import bofire.priors.api as priors
+from bofire.data_models.enum import OutputFilteringEnum
+from bofire.data_models.features.api import TaskInput
+from bofire.data_models.priors.api import LKJPrior
+
+# from bofire.data_models.molfeatures.api import MolFeatures
+from bofire.data_models.surrogates.api import MultiTaskGPSurrogate as DataModel
+from bofire.data_models.surrogates.scaler import ScalerEnum
+from bofire.surrogates.botorch import BotorchSurrogate
+from bofire.surrogates.trainable import TrainableSurrogate
+from bofire.surrogates.utils import get_scaler
+from bofire.utils.torch_tools import tkwargs
+
+
+class MultiTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
+    def __init__(
+        self,
+        data_model: DataModel,
+        **kwargs,
+    ):
+        self.n_tasks = len(data_model.inputs.get(TaskInput).features[0].categories)
+        self.kernel = data_model.kernel
+        self.scaler = data_model.scaler
+        self.output_scaler = data_model.output_scaler
+        self.noise_prior = data_model.noise_prior
+        self.task_prior = data_model.task_prior
+        if isinstance(self.task_prior, LKJPrior):
+            # set the number of tasks in the prior
+            self.task_prior.n_tasks = self.n_tasks
+        # obtain the name of the task feature
+        self.task_feature_key = data_model.inputs.get_keys(TaskInput)[0]
+
+        super().__init__(data_model=data_model, **kwargs)
+
+    model: Optional[botorch.models.MultiTaskGP] = None
+    _output_filtering: OutputFilteringEnum = OutputFilteringEnum.ALL
+    training_specs: Dict = {}
+
+    def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
+        scaler = get_scaler(self.inputs, self.input_preprocessing_specs, self.scaler, X)
+        transformed_X = self.inputs.transform(X, self.input_preprocessing_specs)
+
+        tX, tY = torch.from_numpy(transformed_X.values).to(**tkwargs), torch.from_numpy(
+            Y.values
+        ).to(**tkwargs)
+
+        self.model = botorch.models.MultiTaskGP(  # type: ignore
+            train_X=tX,
+            train_Y=tY,
+            task_feature=transformed_X.columns.get_loc(
+                self.task_feature_key
+            ),  # obtain the fidelity index
+            covar_module=kernels.map(
+                self.kernel,
+                batch_shape=torch.Size(),
+                active_dims=list(
+                    range(tX.shape[1] - 1)
+                ),  # kernel is for input space so we subtract one for the fidelity index
+                ard_num_dims=1,  # this keyword is ingored
+            ),
+            outcome_transform=(
+                Standardize(m=tY.shape[-1])
+                if self.output_scaler == ScalerEnum.STANDARDIZE
+                else None
+            ),
+            input_transform=scaler,
+        )
+
+        if isinstance(self.task_prior, LKJPrior):
+            warnings.warn(
+                "The LKJ prior has issues when sampling from the prior, prior has been defaulted to None.",
+                UserWarning,
+            )
+            # once the issue is fixed, the following line should be uncommented
+            # self.model.task_covar_module.register_prior(
+            #     "IndexKernelPrior", priors.map(self.lkj_prior), _index_kernel_prior_closure
+            # )
+        self.model.likelihood.noise_covar.noise_prior = priors.map(self.noise_prior)  # type: ignore
+
+        mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
+        fit_gpytorch_mll(mll, options=self.training_specs, max_attempts=10)
+
+    def _predict(self, transformed_X: pd.DataFrame):
+        # transform to tensor
+        X = torch.from_numpy(transformed_X.values).to(**tkwargs)
+        with torch.no_grad():
+            preds = self.model.posterior(X=X, observation_noise=False).mean.cpu().detach().numpy()  # type: ignore
+            vars = self.model.posterior(X=X, observation_noise=False).variance.cpu().detach().numpy()  # type: ignore
+            # add the observation noise to the stds
+            stds = np.sqrt(vars + self.model.likelihood.noise.cpu().detach().numpy())  # type: ignore
+        return preds, stds
+
+
+def _index_kernel_prior_closure(m):
+    return m._eval_covar_matrix()
diff --git a/tests/bofire/benchmarks/test_single.py b/tests/bofire/benchmarks/test_single.py
index 8ce3ad88f..6a0ca58a6 100644
--- a/tests/bofire/benchmarks/test_single.py
+++ b/tests/bofire/benchmarks/test_single.py
@@ -8,6 +8,7 @@
     DiscreteHimmelblau,
     Hartmann,
     Himmelblau,
+    MultiTaskHimmelblau,
     _CategoricalDiscreteHimmelblau,
 )
 
@@ -41,6 +42,8 @@ def test_hartmann():
         (Branin, False, {}),
         (Branin30, True, {}),
         (Branin30, False, {}),
+        (MultiTaskHimmelblau, False, {}),
+        (MultiTaskHimmelblau, True, {}),
         # TO DO: Implement feature that tests Ackley for categorical and descriptive inputs.
         # (Ackley, {"categorical": True}),
         # (Ackley, {"descriptor": True}),
diff --git a/tests/bofire/data_models/specs/priors.py b/tests/bofire/data_models/specs/priors.py
index f6e3d2f95..eb2d56dc1 100644
--- a/tests/bofire/data_models/specs/priors.py
+++ b/tests/bofire/data_models/specs/priors.py
@@ -43,3 +43,47 @@
             },
             error=ValidationError,
         )
+
+specs.add_valid(
+    priors.LKJPrior,
+    lambda: {
+        "n_tasks": random.randint(2, 10),
+        "shape": random.random(),
+        "sd_prior": {
+            "type": "GammaPrior",
+            "concentration": random.random(),
+            "rate": random.random(),
+        },
+    },
+)
+
+for shape in [-1, 0]:
+    specs.add_invalid(
+        priors.LKJPrior,
+        lambda: {
+            "n_tasks": random.randint(1, 10),
+            "shape": shape,  # noqa: B023
+            "sd_prior": {
+                "type": "GammaPrior",
+                "concentration": random.random(),
+                "rate": random.random(),
+            },
+        },
+        error=ValidationError,
+    )
+
+for concentration in [-1, 0]:
+    for rate in [-1, 0]:
+        specs.add_invalid(
+            priors.LKJPrior,
+            lambda: {
+                "n_tasks": random.randint(1, 10),
+                "shape": random.random(),
+                "sd_prior": {
+                    "type": "GammaPrior",
+                    "concentration": concentration,  # noqa: B023
+                    "rate": rate,  # noqa: B023
+                },
+            },
+            error=ValidationError,
+        )
diff --git a/tests/bofire/data_models/specs/surrogates.py b/tests/bofire/data_models/specs/surrogates.py
index d4bf765ee..7027a3fbb 100644
--- a/tests/bofire/data_models/specs/surrogates.py
+++ b/tests/bofire/data_models/specs/surrogates.py
@@ -9,6 +9,7 @@
     ContinuousInput,
     ContinuousOutput,
     MolecularInput,
+    TaskInput,
 )
 from bofire.data_models.kernels.api import (
     HammingDistanceKernel,
@@ -27,6 +28,7 @@
     ScalerEnum,
     SumAggregation,
 )
+from bofire.data_models.surrogates.multi_task_gp import MultiTaskGPHyperconfig
 from bofire.data_models.surrogates.single_task_gp import SingleTaskGPHyperconfig
 from tests.bofire.data_models.specs.features import specs as features
 from tests.bofire.data_models.specs.specs import Specs
@@ -461,3 +463,105 @@
     error=ValueError,
     message="Only numerical inputs are suppoerted for the `LinearDeterministicSurrogate`",
 )
+
+specs.add_valid(
+    models.MultiTaskGPSurrogate,
+    lambda: {
+        "inputs": Inputs(
+            features=[
+                features.valid(ContinuousInput).obj(),
+            ]
+            + [TaskInput(key="task", categories=["a", "b", "c"])]
+        ).model_dump(),
+        "outputs": Outputs(
+            features=[
+                features.valid(ContinuousOutput).obj(),
+            ]
+        ).model_dump(),
+        "kernel": ScaleKernel(
+            base_kernel=MaternKernel(
+                ard=True, nu=2.5, lengthscale_prior=BOTORCH_LENGTHCALE_PRIOR()
+            ),
+            outputscale_prior=BOTORCH_SCALE_PRIOR(),
+        ).model_dump(),
+        "aggregations": None,
+        "scaler": ScalerEnum.NORMALIZE,
+        "output_scaler": ScalerEnum.STANDARDIZE,
+        "noise_prior": BOTORCH_NOISE_PRIOR().model_dump(),
+        "task_prior": None,
+        "input_preprocessing_specs": {
+            "task": CategoricalEncodingEnum.ORDINAL,
+        },
+        "dump": None,
+        "hyperconfig": MultiTaskGPHyperconfig().model_dump(),
+    },
+)
+
+# if wrong encoding (one-hot) is used, there should be a validation error
+specs.add_invalid(
+    models.MultiTaskGPSurrogate,
+    lambda: {
+        "inputs": Inputs(
+            features=[
+                features.valid(ContinuousInput).obj(),
+            ]
+            + [TaskInput(key="task", categories=["a", "b", "c"])]
+        ).model_dump(),
+        "outputs": Outputs(
+            features=[
+                features.valid(ContinuousOutput).obj(),
+            ]
+        ).model_dump(),
+        "kernel": ScaleKernel(
+            base_kernel=MaternKernel(
+                ard=True, nu=2.5, lengthscale_prior=BOTORCH_LENGTHCALE_PRIOR()
+            ),
+            outputscale_prior=BOTORCH_SCALE_PRIOR(),
+        ).model_dump(),
+        "aggregations": None,
+        "scaler": ScalerEnum.NORMALIZE,
+        "output_scaler": ScalerEnum.STANDARDIZE,
+        "noise_prior": BOTORCH_NOISE_PRIOR().model_dump(),
+        "task_prior": None,
+        "input_preprocessing_specs": {
+            "task": CategoricalEncodingEnum.ONE_HOT,
+        },
+        "dump": None,
+        "hyperconfig": MultiTaskGPHyperconfig().model_dump(),
+    },
+    error=ValueError,
+)
+
+# if there is no task input, there should be a validation error
+specs.add_invalid(
+    models.MultiTaskGPSurrogate,
+    lambda: {
+        "inputs": Inputs(
+            features=[
+                features.valid(ContinuousInput).obj(),
+            ]
+        ).model_dump(),
+        "outputs": Outputs(
+            features=[
+                features.valid(ContinuousOutput).obj(),
+            ]
+        ).model_dump(),
+        "kernel": ScaleKernel(
+            base_kernel=MaternKernel(
+                ard=True, nu=2.5, lengthscale_prior=BOTORCH_LENGTHCALE_PRIOR()
+            ),
+            outputscale_prior=BOTORCH_SCALE_PRIOR(),
+        ).model_dump(),
+        "aggregations": None,
+        "scaler": ScalerEnum.NORMALIZE,
+        "output_scaler": ScalerEnum.STANDARDIZE,
+        "noise_prior": BOTORCH_NOISE_PRIOR().model_dump(),
+        "task_prior": None,
+        "input_preprocessing_specs": {
+            "task": CategoricalEncodingEnum.ORDINAL,
+        },
+        "dump": None,
+        "hyperconfig": MultiTaskGPHyperconfig().model_dump(),
+    },
+    error=ValueError,
+)
diff --git a/tests/bofire/priors/test_mapper.py b/tests/bofire/priors/test_mapper.py
index 22fc9e47a..5a59a4e99 100644
--- a/tests/bofire/priors/test_mapper.py
+++ b/tests/bofire/priors/test_mapper.py
@@ -2,7 +2,7 @@
 import pytest
 
 import bofire.priors.api as priors
-from bofire.data_models.priors.api import GammaPrior, NormalPrior
+from bofire.data_models.priors.api import GammaPrior, LKJPrior, NormalPrior
 
 
 @pytest.mark.parametrize(
@@ -19,3 +19,18 @@ def test_map(prior, expected_prior):
         if key == "type":
             continue
         assert value == getattr(gprior, key)
+
+
+def test_lkj_map():
+    prior = LKJPrior(
+        n_tasks=3, shape=0.4, sd_prior=GammaPrior(concentration=2.0, rate=0.2)
+    )
+    expected_prior = gpytorch.priors.LKJPrior
+
+    gprior = priors.map(prior)
+    assert isinstance(gprior, expected_prior)
+    assert prior.n_tasks == gprior.correlation_prior.n
+    assert prior.shape == gprior.correlation_prior.concentration
+    assert isinstance(gprior.sd_prior, gpytorch.priors.GammaPrior)
+    assert prior.sd_prior.concentration == gprior.sd_prior.concentration
+    assert prior.sd_prior.rate == gprior.sd_prior.rate
diff --git a/tests/bofire/surrogates/test_multitask_gps.py b/tests/bofire/surrogates/test_multitask_gps.py
new file mode 100644
index 000000000..60c6910ec
--- /dev/null
+++ b/tests/bofire/surrogates/test_multitask_gps.py
@@ -0,0 +1,169 @@
+import importlib
+
+import pytest
+from botorch.models import MultiTaskGP
+from botorch.models.transforms.input import (
+    InputStandardize,
+    Normalize,
+)
+from botorch.models.transforms.outcome import Standardize
+from pandas.testing import assert_frame_equal
+
+import bofire.surrogates.api as surrogates
+from bofire.benchmarks.api import MultiTaskHimmelblau
+from bofire.data_models.domain.api import Inputs, Outputs
+from bofire.data_models.enum import CategoricalEncodingEnum
+from bofire.data_models.features.api import (
+    CategoricalInput,
+    ContinuousInput,
+    ContinuousOutput,
+    TaskInput,
+)
+from bofire.data_models.kernels.api import (
+    MaternKernel,
+    RBFKernel,
+)
+from bofire.data_models.priors.api import (
+    BOTORCH_LENGTHCALE_PRIOR,
+    BOTORCH_NOISE_PRIOR,
+    LKJ_PRIOR,
+    MBO_LENGTHCALE_PRIOR,
+    MBO_NOISE_PRIOR,
+)
+from bofire.data_models.surrogates.api import (
+    MultiTaskGPSurrogate,
+    ScalerEnum,
+)
+
+RDKIT_AVAILABLE = importlib.util.find_spec("rdkit") is not None
+
+
+def test_MultiTaskGPHyperconfig():
+    # we test here also the basic trainable
+    benchmark = MultiTaskHimmelblau()
+    surrogate_data_no_hy = MultiTaskGPSurrogate(
+        inputs=benchmark.domain.inputs,
+        outputs=benchmark.domain.outputs,
+        hyperconfig=None,
+    )
+
+    with pytest.raises(ValueError, match="No hyperconfig available."):
+        surrogate_data_no_hy.update_hyperparameters(
+            benchmark.domain.inputs.sample(1).loc[0]
+        )
+    # test that correct stuff is written
+    surrogate_data = MultiTaskGPSurrogate(
+        inputs=benchmark.domain.inputs, outputs=benchmark.domain.outputs
+    )
+    candidate = surrogate_data.hyperconfig.inputs.sample(1).loc[0]
+    surrogate_data.update_hyperparameters(candidate)
+
+    assert surrogate_data.kernel.ard == (candidate["ard"] == "True")
+    if candidate.kernel == "matern_1.5":
+        assert isinstance(surrogate_data.kernel, MaternKernel)
+        assert surrogate_data.kernel.nu == 1.5
+    elif candidate.kernel == "matern_2.5":
+        assert isinstance(surrogate_data.kernel, MaternKernel)
+        assert surrogate_data.kernel.nu == 2.5
+    else:
+        assert isinstance(surrogate_data.kernel, RBFKernel)
+    if candidate.prior == "mbo":
+        assert surrogate_data.noise_prior == MBO_NOISE_PRIOR()
+        assert surrogate_data.kernel.lengthscale_prior == MBO_LENGTHCALE_PRIOR()
+    else:
+        assert surrogate_data.noise_prior == BOTORCH_NOISE_PRIOR()
+        assert surrogate_data.kernel.lengthscale_prior == BOTORCH_LENGTHCALE_PRIOR()
+
+
+def test_MultiTask_input_preprocessing():
+    # test that if no input_preprocessing_specs are provided, the ordinal encoding is used
+    inputs = Inputs(
+        features=[ContinuousInput(key="x", bounds=(-1, 1))]
+        + [TaskInput(key="task_id", categories=["1", "2"])]
+    )
+    outputs = Outputs(features=[ContinuousOutput(key="y")])
+    data_model = MultiTaskGPSurrogate(inputs=inputs, outputs=outputs)
+    assert data_model.input_preprocessing_specs == {
+        "task_id": CategoricalEncodingEnum.ORDINAL
+    }
+
+    # test that if we have a categorical input, one-hot encoding is correctly applied
+    inputs = Inputs(
+        features=[ContinuousInput(key="x", bounds=(-1, 1))]
+        + [CategoricalInput(key="categories", categories=["1", "2"])]
+        + [TaskInput(key="task_id", categories=["1", "2"])]
+    )
+    outputs = Outputs(features=[ContinuousOutput(key="y")])
+    data_model = MultiTaskGPSurrogate(
+        inputs=inputs,
+        outputs=outputs,
+    )
+    assert data_model.input_preprocessing_specs == {
+        "categories": CategoricalEncodingEnum.ONE_HOT,
+        "task_id": CategoricalEncodingEnum.ORDINAL,
+    }
+
+
+@pytest.mark.parametrize(
+    "kernel, scaler, output_scaler, task_prior",
+    [
+        (RBFKernel(ard=True), ScalerEnum.NORMALIZE, ScalerEnum.STANDARDIZE, None),
+        (RBFKernel(ard=False), ScalerEnum.STANDARDIZE, ScalerEnum.STANDARDIZE, None),
+        (RBFKernel(ard=False), ScalerEnum.IDENTITY, ScalerEnum.IDENTITY, LKJ_PRIOR()),
+    ],
+)
+def test_MultiTaskGPModel(kernel, scaler, output_scaler, task_prior):
+    benchmark = MultiTaskHimmelblau()
+    inputs = benchmark.domain.inputs
+    outputs = benchmark.domain.outputs
+    experiments = benchmark.f(inputs.sample(10), return_complete=True)
+
+    model = MultiTaskGPSurrogate(
+        inputs=inputs,
+        outputs=outputs,
+        scaler=scaler,
+        output_scaler=output_scaler,
+        kernel=kernel,
+        task_prior=task_prior,
+    )
+
+    model = surrogates.map(model)
+    with pytest.raises(ValueError):
+        model.dumps()
+    # if task_prior is not None, a warning should be raised
+    if task_prior is not None:
+        with pytest.warns(UserWarning):
+            model.fit(experiments)
+    else:
+        model.fit(experiments)
+    # dump the model
+    dump = model.dumps()
+    # make predictions
+    samples = inputs.sample(5)
+    preds = model.predict(samples)
+    assert preds.shape == (5, 2)
+    # check that model is composed correctly
+    assert isinstance(model.model, MultiTaskGP)
+    if output_scaler == ScalerEnum.STANDARDIZE:
+        assert isinstance(model.model.outcome_transform, Standardize)
+    elif output_scaler == ScalerEnum.IDENTITY:
+        assert not hasattr(model.model, "outcome_transform")
+    if scaler == ScalerEnum.NORMALIZE:
+        assert isinstance(model.model.input_transform, Normalize)
+    elif scaler == ScalerEnum.STANDARDIZE:
+        assert isinstance(model.model.input_transform, InputStandardize)
+    else:
+        assert not hasattr(model.model, "input_transform")
+    assert model.is_compatibilized is False
+    # reload the model from dump and check for equality in predictions
+    model2 = MultiTaskGPSurrogate(
+        inputs=inputs,
+        outputs=outputs,
+        kernel=kernel,
+        scaler=scaler,
+        output_scaler=output_scaler,
+    )
+    model2 = surrogates.map(model2)
+    model2.loads(dump)
+    preds2 = model2.predict(samples)
+    assert_frame_equal(preds, preds2)