Interpoint Constraints (#313)

* add interpoint constraint data models * harmonize intra and inter constraints * implement interpoint constraint into polytope sampler * refactor polytope sampler
experimental-design · Nov 30, 2023 · cb12051 · cb12051
1 parent 176b83a
commit cb12051
Show file tree

Hide file tree

Showing 15 changed files with 322 additions and 100 deletions.
diff --git a/bofire/data_models/constraints/api.py b/bofire/data_models/constraints/api.py
@@ -4,6 +4,11 @@
     Constraint,
     ConstraintError,
     ConstraintNotFulfilledError,
+    IntrapointConstraint,
+)
+from bofire.data_models.constraints.interpoint import (
+    InterpointConstraint,
+    InterpointEqualityConstraint,
 )
 from bofire.data_models.constraints.linear import (
     LinearConstraint,
@@ -21,6 +26,8 @@
     Constraint,
     LinearConstraint,
     NonlinearConstraint,
+    IntrapointConstraint,
+    InterpointConstraint,
 ]
 
 AnyConstraint = Union[
@@ -29,6 +36,7 @@
     NonlinearEqualityConstraint,
     NonlinearInequalityConstraint,
     NChooseKConstraint,
+    InterpointEqualityConstraint,
 ]
 
 AnyConstraintError = Union[ConstraintError, ConstraintNotFulfilledError]

diff --git a/bofire/data_models/constraints/constraint.py b/bofire/data_models/constraints/constraint.py
@@ -52,6 +52,14 @@ def jacobian(self, experiments: pd.DataFrame) -> pd.DataFrame:
         pass
 
 
+class IntrapointConstraint(Constraint):
+    """An intrapoint constraint describes required relationships within a candidate
+    when asking a strategy to return one or more candidates.
+    """
+
+    type: str
+
+
 class ConstraintError(Exception):
     """Base Error for Constraints"""
 

diff --git a/bofire/data_models/constraints/interpoint.py b/bofire/data_models/constraints/interpoint.py
@@ -0,0 +1,49 @@
+import math
+from typing import Annotated, Literal, Optional
+
+import numpy as np
+import pandas as pd
+from pydantic import Field
+
+from bofire.data_models.constraints.constraint import Constraint
+
+
+class InterpointConstraint(Constraint):
+    """An interpoint constraint describes required relationships between individual
+    candidates when asking a strategy for returning more than one candidate.
+    """
+
+    type: str
+
+
+class InterpointEqualityConstraint(InterpointConstraint):
+    """Constraint that forces that values of a certain feature of a set/batch of
+    candidates should have the same value.
+
+    Attributes:
+        feature(str): The constrained feature.
+        multiplicity(int): The multiplicity of the constraint, stating how many
+            values of the feature in the batch should have always the same value.
+    """
+
+    type: Literal["InterpointEqualityConstraint"] = "InterpointEqualityConstraint"
+    feature: str
+    multiplicity: Optional[Annotated[int, Field(ge=2)]]
+
+    def is_fulfilled(
+        self, experiments: pd.DataFrame, tol: Optional[float] = 1e-6
+    ) -> pd.Series:
+        multiplicity = self.multiplicity or len(experiments)
+        for i in range(math.ceil(len(experiments) / multiplicity)):
+            batch = experiments[self.feature].values[
+                i * multiplicity : min((i + 1) * multiplicity, len(experiments))
+            ]
+            if not np.allclose(batch, batch[0]):
+                return pd.Series([False])
+        return pd.Series([True])
+
+    def __call__(self, experiments: pd.DataFrame) -> pd.Series:
+        raise NotImplementedError("Method `__call__` currently not implemented.")
+
+    def jacobian(self, experiments: pd.DataFrame) -> pd.DataFrame:
+        raise NotImplementedError("Method `jacobian` currently not implemented.")
diff --git a/bofire/data_models/constraints/linear.py b/bofire/data_models/constraints/linear.py
@@ -47,19 +47,6 @@ def __call__(self, experiments: pd.DataFrame) -> pd.Series:
             experiments[self.features] @ self.coefficients - self.rhs
         ) / np.linalg.norm(self.coefficients)
 
-    # def lhs(self, df_data: pd.DataFrame) -> float:
-    #     """Evaluate the left-hand side of the constraint on each row of a dataframe
-
-    #     Args:
-    #         df_data (pd.DataFrame): Dataframe on which the left-hand side should be evaluated.
-
-    #     Returns:
-    #         np.array: 1-dim array with left-hand side of each row of the provided dataframe.
-    #     """
-    #     cols = self.features
-    #     coefficients = self.coefficients
-    #     return np.sum(df_data[cols].values * np.array(coefficients), axis=1)
-
     def __str__(self) -> str:
         """Generate string representation of the constraint.
 

diff --git a/bofire/data_models/domain/constraints.py b/bofire/data_models/domain/constraints.py
@@ -67,9 +67,13 @@ def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Serie
         """
         if len(self.constraints) == 0:
             return pd.Series([True] * len(experiments), index=experiments.index)
-        return pd.concat(
-            [c.is_fulfilled(experiments, tol) for c in self.constraints], axis=1
-        ).all(axis=1)
+        return (
+            pd.concat(
+                [c.is_fulfilled(experiments, tol) for c in self.constraints], axis=1
+            )
+            .fillna(True)
+            .all(axis=1)
+        )
 
     def get(
         self,

diff --git a/bofire/data_models/strategies/random.py b/bofire/data_models/strategies/random.py
@@ -2,8 +2,10 @@
 
 from bofire.data_models.constraints.api import (
     Constraint,
+    InterpointEqualityConstraint,
+    LinearEqualityConstraint,
+    LinearInequalityConstraint,
     NChooseKConstraint,
-    NonlinearEqualityConstraint,
 )
 from bofire.data_models.features.api import Feature
 from bofire.data_models.objectives.api import Objective
@@ -15,9 +17,12 @@ class RandomStrategy(Strategy):
 
     @classmethod
     def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool:
-        if my_type in [NChooseKConstraint, NonlinearEqualityConstraint]:
-            return False
-        return True
+        return my_type in [
+            LinearInequalityConstraint,
+            LinearEqualityConstraint,
+            NChooseKConstraint,
+            InterpointEqualityConstraint,
+        ]
 
     @classmethod
     def is_feature_implemented(cls, my_type: Type[Feature]) -> bool:

diff --git a/bofire/data_models/strategies/samplers/polytope.py b/bofire/data_models/strategies/samplers/polytope.py
@@ -1,6 +1,9 @@
-from typing import Literal, Type
+from typing import Annotated, Literal, Type
+
+from pydantic import Field
 
 from bofire.data_models.constraints.api import (
+    InterpointEqualityConstraint,
     LinearEqualityConstraint,
     LinearInequalityConstraint,
     NChooseKConstraint,
@@ -29,13 +32,16 @@ class PolytopeSampler(SamplerStrategy):
 
     type: Literal["PolytopeSampler"] = "PolytopeSampler"
     fallback_sampling_method: SamplingMethodEnum = SamplingMethodEnum.UNIFORM
+    n_burnin: Annotated[int, Field(ge=1)] = 1000
+    n_thinning: Annotated[int, Field(ge=1)] = 32
 
     @classmethod
     def is_constraint_implemented(cls, my_type: Type[Feature]) -> bool:
         return my_type in [
             LinearInequalityConstraint,
             LinearEqualityConstraint,
             NChooseKConstraint,
+            InterpointEqualityConstraint,
         ]
 
     @classmethod

diff --git a/bofire/strategies/samplers/polytope.py b/bofire/strategies/samplers/polytope.py
@@ -1,9 +1,11 @@
 import warnings
+from typing import Dict
 
 import numpy as np
 import pandas as pd
 import torch
-from botorch.utils.sampling import get_polytope_samples
+from botorch.optim.initializers import sample_q_batches_from_polytope
+from botorch.optim.parameter_constraints import _generate_unfixed_lin_constraints
 
 from bofire.data_models.constraints.api import (
     LinearEqualityConstraint,
@@ -17,7 +19,11 @@
 )
 from bofire.data_models.strategies.api import PolytopeSampler as DataModel
 from bofire.strategies.samplers.sampler import SamplerStrategy
-from bofire.utils.torch_tools import get_linear_constraints, tkwargs
+from bofire.utils.torch_tools import (
+    get_interpoint_constraints,
+    get_linear_constraints,
+    tkwargs,
+)
 
 
 class PolytopeSampler(SamplerStrategy):
@@ -35,6 +41,8 @@ def __init__(
         **kwargs,
     ):
         super().__init__(data_model=data_model, **kwargs)
+        self.n_burnin = data_model.n_burnin
+        self.n_thinning = data_model.n_thinning
         self.fallback_sampling_method = data_model.fallback_sampling_method
 
     def _ask(self, n: int) -> pd.DataFrame:
@@ -50,34 +58,47 @@ def _ask(self, n: int) -> pd.DataFrame:
             unit_scaled=False,
         )
         cleaned_eqs = []
-        pseudo_fixed = {}
+        fixed_features: Dict[str, float] = {
+            feat.key: feat.fixed_value()[0]  # type: ignore
+            for feat in self.domain.inputs.get(ContinuousInput)
+            if feat.is_fixed()  # type: ignore
+        }
+
         for eq in eqs:
             if (
                 len(eq[0]) == 1
             ):  # only one coefficient, so this is a pseudo fixed feature
-                pseudo_fixed[
+                fixed_features[
                     self.domain.inputs.get_keys(ContinuousInput)[eq[0][0]]
                 ] = float(eq[2] / eq[1][0])
             else:
                 cleaned_eqs.append(eq)
 
-        # we have to map the indices in case of fixed features
-        # as we remove all fixed feature for the sampler, we have to adjust the
-        # indices in the constraints, here we get the mapper to map original
-        # to adjusted indices
-        feature_map = {}
-        counter = 0
-        for i, feat in enumerate(self.domain.get_features(ContinuousInput)):
-            if (not feat.is_fixed()) and (feat.key not in pseudo_fixed.keys()):  # type: ignore
-                feature_map[i] = counter
-                counter += 1
-
-        # get the bounds
+        fixed_features_indices: Dict[int, float] = {
+            self.domain.inputs.get_keys(ContinuousInput).index(key): value
+            for key, value in fixed_features.items()
+        }
+
+        ineqs = get_linear_constraints(
+            domain=self.domain,
+            constraint=LinearInequalityConstraint,  # type: ignore
+            unit_scaled=False,
+        )
+
+        interpoints = get_interpoint_constraints(domain=self.domain, n_candidates=n)
+
         lower = [
             feat.lower_bound  # type: ignore
             for feat in self.domain.get_features(ContinuousInput)
-            if not feat.is_fixed() and feat.key not in pseudo_fixed.keys()  # type: ignore
+            if feat.key not in fixed_features.keys()  # type: ignore
+        ]
+
+        upper = [
+            feat.upper_bound  # type: ignore
+            for feat in self.domain.get_features(ContinuousInput)
+            if feat.key not in fixed_features.keys()  # type: ignore
         ]
+
         if len(lower) == 0:
             warnings.warn(
                 "Nothing to sample, all is fixed. Just the fixed set is returned.",
@@ -87,48 +108,42 @@ def _ask(self, n: int) -> pd.DataFrame:
                 data=np.nan, index=range(n), columns=self.domain.inputs.get_keys()
             )
         else:
-            upper = [
-                feat.upper_bound  # type: ignore
-                for feat in self.domain.get_features(ContinuousInput)
-                if not feat.is_fixed() and feat.key not in pseudo_fixed.keys()  # type: ignore
-            ]
             bounds = torch.tensor([lower, upper]).to(**tkwargs)
-            assert bounds.shape[-1] == len(feature_map) == counter
-
-            # get the inequality constraints and map features back
-            # we also check that only features present in the mapper
-            # are present in the constraints
-            ineqs = get_linear_constraints(
-                domain=self.domain,
-                constraint=LinearInequalityConstraint,  # type: ignore
-                unit_scaled=False,
+
+            unfixed_ineqs = _generate_unfixed_lin_constraints(
+                constraints=ineqs,
+                eq=False,
+                fixed_features=fixed_features_indices,
+                dimension=len(self.domain.inputs.get(ContinuousInput)),
+            )
+            unfixed_eqs = _generate_unfixed_lin_constraints(
+                constraints=cleaned_eqs,
+                eq=True,
+                fixed_features=fixed_features_indices,
+                dimension=len(self.domain.inputs.get(ContinuousInput)),
             )
-            for ineq in ineqs:
-                for key, value in feature_map.items():
-                    if key != value:
-                        ineq[0][ineq[0] == key] = value
-                assert (
-                    ineq[0].max() <= counter
-                ), "Something went wrong when transforming the linear constraints. Revisit the problem."
-
-            # map the indice of the equality constraints
-            for eq in cleaned_eqs:
-                for key, value in feature_map.items():
-                    if key != value:
-                        eq[0][eq[0] == key] = value
-                assert (
-                    eq[0].max() <= counter
-                ), "Something went wrong when transforming the linear constraints. Revisit the problem."
+            unfixed_interpoints = _generate_unfixed_lin_constraints(
+                constraints=interpoints,
+                eq=True,
+                fixed_features=fixed_features_indices,
+                dimension=len(self.domain.inputs.get(ContinuousInput)),
+            )
+
+            combined_eqs = unfixed_eqs + unfixed_interpoints  # type: ignore
 
             # now use the hit and run sampler
-            candidates = get_polytope_samples(
-                n=n,
+            candidates = sample_q_batches_from_polytope(
+                n=1,
+                q=n,
                 bounds=bounds.to(**tkwargs),
-                inequality_constraints=ineqs if len(ineqs) > 0 else None,
-                equality_constraints=cleaned_eqs if len(cleaned_eqs) > 0 else None,
-                n_burnin=1000,
-                # thinning=200
-            )
+                inequality_constraints=unfixed_ineqs
+                if len(unfixed_ineqs) > 0  # type: ignore
+                else None,
+                equality_constraints=combined_eqs if len(combined_eqs) > 0 else None,
+                n_burnin=self.n_burnin,
+                thinning=self.n_thinning,
+                seed=self.seed,
+            ).squeeze(dim=0)
 
             # check that the random generated candidates are not always the same
             if (candidates.unique(dim=0).shape[0] != n) and (n > 1):
@@ -137,12 +152,11 @@ def _ask(self, n: int) -> pd.DataFrame:
             free_continuals = [
                 feat.key
                 for feat in self.domain.get_features(ContinuousInput)
-                if not feat.is_fixed() and feat.key not in pseudo_fixed.keys()  # type: ignore
+                if feat.key not in fixed_features.keys()  # type: ignore
             ]
-
             # setup the output
             samples = pd.DataFrame(
-                data=candidates.detach().numpy().reshape(n, len(free_continuals)),
+                data=candidates.detach().numpy(),
                 index=range(n),
                 columns=free_continuals,
             )
@@ -152,11 +166,7 @@ def _ask(self, n: int) -> pd.DataFrame:
             samples[feat.key] = feat.sample(n)  # type: ignore
 
         # setup the fixed continuous ones
-        for feat in self.domain.inputs.get_fixed():
-            samples[feat.key] = feat.fixed_value()[0]  # type: ignore
-
-        # setup the pseudo fixed ones
-        for key, value in pseudo_fixed.items():
+        for key, value in fixed_features.items():
             samples[key] = value
 
         return samples