DoE: Fix bug if fixed_experiments contain columns that are not in dom…

…ain (#321) * add fix and test * add fix and test * partial and fixed experiments are internally reduced to only columns that are present in domain * condense tests * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs * fix test * fix test * fix test * test warnings * partially fixed experiments only validate if every input is present in columns. Revert changes to validate_candidates
experimental-design · Dec 19, 2023 · c57bcab · c57bcab
1 parent d4c5c49
commit c57bcab
Show file tree

Hide file tree

Showing 2 changed files with 289 additions and 27 deletions.
diff --git a/bofire/strategies/doe/design.py b/bofire/strategies/doe/design.py
@@ -402,6 +402,33 @@ def find_local_max_ipopt(
         )
         raise e
 
+    # determine number of experiments (only relevant if n_experiments is not provided by the user)
+    n_experiments = get_n_experiments(
+        domain=domain, model_type=model_type, n_experiments=n_experiments
+    )
+
+    if partially_fixed_experiments is not None:
+        # check if partially fixed experiments are valid
+        check_partially_fixed_experiments(
+            domain, n_experiments, partially_fixed_experiments
+        )
+        # no columns from partially fixed experiments which are not in the domain
+        partially_fixed_experiments = partially_fixed_experiments[
+            domain.inputs.get_keys()
+        ]
+
+    if fixed_experiments is not None:
+        # check if  fixed experiments are valid
+        check_fixed_experiments(domain, n_experiments, fixed_experiments)
+        # no columns from fixed experiments which are not in the domain
+        fixed_experiments = fixed_experiments[domain.inputs.get_keys()]
+
+    if (partially_fixed_experiments is not None) and (fixed_experiments is not None):
+        # check if partially fixed experiments and fixed experiments are valid
+        check_partially_and_fully_fixed_experiments(
+            domain, n_experiments, fixed_experiments, partially_fixed_experiments
+        )
+
     # warn user about usage of nonlinear constraints
     if domain.constraints:
         if np.any([isinstance(c, NonlinearConstraint) for c in domain.constraints]):
@@ -418,11 +445,6 @@ def find_local_max_ipopt(
         if isinstance(c, NChooseKConstraint)
     ), "NChooseKConstraint with min_count !=0 is not supported!"
 
-    # determine number of experiments (only relevant if n_experiments is not provided by the user)
-    n_experiments = get_n_experiments(
-        domain=domain, model_type=model_type, n_experiments=n_experiments
-    )
-
     #
     # Sampling initital values
     #
@@ -583,59 +605,80 @@ def partially_fix_experiment(
 
 
 def check_fixed_experiments(
-    domain: Domain, n_experiments: int, fixed_experiments: np.ndarray
+    domain: Domain, n_experiments: int, fixed_experiments: pd.DataFrame
 ) -> None:
     """Checks if the shape of the fixed experiments is correct and if the number of fixed experiments is valid
     Args:
         domain (Domain): domain defining the input variables used for the check.
         n_experiments (int): total number of experiments in the design that fixed_experiments are part of.
-        fixed_experiments (np.ndarray): fixed experiment proposals to be checked.
+        fixed_experiments (pd.DataFrame): fixed experiment proposals to be checked.
     """
 
-    n_fixed_experiments, D = np.array(fixed_experiments).shape
+    n_fixed_experiments = len(fixed_experiments.index)
 
     if n_fixed_experiments >= n_experiments:
         raise ValueError(
             "For starting the optimization the total number of experiments must be larger that the number of fixed experiments."
         )
 
-    if D != len(domain.inputs):
+    domain.validate_candidates(
+        candidates=fixed_experiments,
+        only_inputs=True,
+    )
+
+
+def check_partially_fixed_experiments(
+    domain: Domain,
+    n_experiments: int,
+    partially_fixed_experiments: pd.DataFrame,
+) -> None:
+
+    n_partially_fixed_experiments = len(partially_fixed_experiments.index)
+
+    # for partially fixed experiments only check if all inputs are part of the domain
+    if not all(
+        key in partially_fixed_experiments.columns for key in domain.inputs.get_keys()
+    ):
         raise ValueError(
-            f"Invalid shape of fixed_experiments. Length along axis 1 is {D}, but must be {len(domain.inputs)}"
+            "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column."
+        )
+
+    if n_partially_fixed_experiments > n_experiments:
+        warnings.warn(
+            UserWarning(
+                "The number of partially fixed experiments exceeds the amount "
+                "of the overall count of experiments. Partially fixed experiments may be cut off"
+            )
         )
 
 
 def check_partially_and_fully_fixed_experiments(
     domain: Domain,
     n_experiments: int,
-    fixed_experiments: np.ndarray,
-    paritally_fixed_experiments: np.ndarray,
+    fixed_experiments: pd.DataFrame,
+    partially_fixed_experiments: pd.DataFrame,
 ) -> None:
     """Checks if the shape of the fixed experiments is correct and if the number of fixed experiments is valid
     Args:
         domain (Domain): domain defining the input variables used for the check.
         n_experiments (int): total number of experiments in the design that fixed_experiments are part of.
-        fixed_experiments (np.ndarray): fixed experiment proposals to be checked.
-        paritally_fixed_experiments (np.ndarray): partially fixed experiment proposals to be checked.
+        fixed_experiments (pd.DataFrame): fixed experiment proposals to be checked.
+        partially_fixed_experiments (pd.DataFrame): partially fixed experiment proposals to be checked.
     """
 
     check_fixed_experiments(domain, n_experiments, fixed_experiments)
-    n_fixed_experiments, dim = np.array(fixed_experiments).shape
-
-    n_partially_fixed_experiments, partially_dim = np.array(
-        paritally_fixed_experiments
-    ).shape
+    check_partially_fixed_experiments(
+        domain, n_experiments, partially_fixed_experiments
+    )
+    n_fixed_experiments = len(fixed_experiments.index)
 
-    if partially_dim != len(domain.inputs):
-        raise ValueError(
-            f"Invalid shape of partially_fixed_experiments. Length along axis 1 is {partially_dim}, but must be {len(domain.inputs)}"
-        )
+    n_partially_fixed_experiments = len(partially_fixed_experiments.index)
 
     if n_fixed_experiments + n_partially_fixed_experiments > n_experiments:
         warnings.warn(
             UserWarning(
                 "The number of fixed experiments and partially fixed experiments exceeds the amount "
-                "of the overall count of experiments. Partially fixed experiments may be cut of"
+                "of the overall count of experiments. Partially fixed experiments may be cut off"
             )
         )
 

diff --git a/tests/bofire/strategies/doe/test_design.py b/tests/bofire/strategies/doe/test_design.py
@@ -18,6 +18,7 @@
 from bofire.strategies.doe.design import (
     check_fixed_experiments,
     check_partially_and_fully_fixed_experiments,
+    check_partially_fixed_experiments,
     find_local_max_ipopt,
     get_n_experiments,
 )
@@ -480,7 +481,7 @@ def test_get_n_experiments():
 
 
 @pytest.mark.skipif(not CYIPOPT_AVAILABLE, reason="requires cyipopt")
-def test_partially_fixed_experiments():
+def test_fixed_experiments_checker():
     domain = Domain(
         inputs=[
             ContinuousInput(key="x1", bounds=(0, 5)),
@@ -540,13 +541,231 @@ def test_partially_fixed_experiments():
     )
 
     # partially fixed will be cut of
-    with pytest.warns(UserWarning):
+    with pytest.warns(UserWarning) as record:
         check_partially_and_fully_fixed_experiments(
             domain, 3, fixed_experiments, partially_fixed_experiments
         )
+        assert len(record) == 1
+        assert record[0].message.args[0] == (
+            "The number of fixed experiments and partially fixed experiments exceeds the amount "
+            "of the overall count of experiments. Partially fixed experiments may be cut off"
+        )
+
+    with pytest.warns(UserWarning) as record:
+        check_partially_fixed_experiments(domain, 1, partially_fixed_experiments)
+        assert len(record) == 1
+        assert record[0].message.args[0] == (
+            "The number of partially fixed experiments exceeds the amount "
+            "of the overall count of experiments. Partially fixed experiments may be cut off"
+        )
 
     # to few experiments
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError) as e:
         check_partially_and_fully_fixed_experiments(
             domain, 2, fixed_experiments, partially_fixed_experiments
         )
+        assert e == ValueError(
+            "For starting the optimization the total number of experiments must be larger that the number of fixed experiments."
+        )
+
+    with pytest.raises(ValueError) as e:
+        check_fixed_experiments(domain, 2, fixed_experiments)
+        assert e == ValueError(
+            "For starting the optimization the total number of experiments must be larger that the number of fixed experiments."
+        )
+
+
+def test_partially_fixed_experiments():
+    domain = Domain(
+        inputs=[
+            ContinuousInput(key="x1", bounds=(0, 5)),
+            ContinuousInput(key="x2", bounds=(0, 15)),
+            ContinuousInput(key="a1", bounds=(0, 1)),
+            ContinuousInput(key="a2", bounds=(0, 1)),
+        ],
+        outputs=[ContinuousOutput(key="y")],
+        constraints=[
+            # Case 1: a and b are active
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 10, -10], rhs=15
+            ),
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, -2], rhs=5
+            ),
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, 3], rhs=5
+            ),
+            # Case 2: a and c are active
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, -10, -10], rhs=5
+            ),
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, 2], rhs=7
+            ),
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, -3], rhs=2
+            ),
+            # Case 3: c and b are active
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 0, -10], rhs=5
+            ),
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 0, 2], rhs=5
+            ),
+            LinearInequalityConstraint(
+                features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, 0, 3], rhs=5
+            ),
+        ],
+    )
+
+    def get_domain_error(feature):
+        return ValueError(f"no col for input feature `{feature}`")
+
+    fixed_experiments = pd.DataFrame(
+        np.array([[1, 0, 0, 0], [0, 1, 0.7, 1]]), columns=domain.inputs.get_keys()
+    )
+
+    doe = find_local_max_ipopt(
+        domain, "linear", n_experiments=3, fixed_experiments=fixed_experiments
+    ).reset_index(drop=True)
+
+    assert doe.shape == (3, 4)
+    assert np.allclose(doe.iloc[[0, 1]]["x1"], fixed_experiments["x1"])
+    assert np.allclose(doe.iloc[[0, 1]]["x2"], fixed_experiments["x2"])
+    assert np.allclose(doe.iloc[[0, 1]]["a1"], fixed_experiments["a1"])
+    assert np.allclose(doe.iloc[[0, 1]]["a2"], fixed_experiments["a2"])
+
+    fixed_experiments = pd.DataFrame(
+        np.array([[1, 0, 0], [0, 1, 0.7]]), columns=["x1", "x2", "a1"]
+    )
+
+    with pytest.raises(ValueError) as e:
+        doe = find_local_max_ipopt(
+            domain, "linear", n_experiments=2, fixed_experiments=fixed_experiments
+        )
+        assert e == get_domain_error("a2")
+
+    partially_fixed_experiments = pd.DataFrame(
+        np.array([[1.0, None, None], [0.0, None, None]]),
+        columns=["x1", "x2", "a1"],
+    )
+
+    with pytest.raises(ValueError) as e:
+        doe = find_local_max_ipopt(
+            domain,
+            "linear",
+            n_experiments=2,
+            partially_fixed_experiments=partially_fixed_experiments,
+        )
+        assert e == get_domain_error("a2")
+
+    fixed_experiments = pd.DataFrame(
+        np.array([[1, 0, 0, 0, 1], [0, 1, 0.7, 1, 2]]),
+        columns=domain.inputs.get_keys() + ["c0"],
+    )
+
+    doe = find_local_max_ipopt(
+        domain, "linear", n_experiments=3, fixed_experiments=fixed_experiments
+    ).reset_index(drop=True)
+
+    assert doe.shape == (3, 4)
+    assert np.allclose(doe.iloc[[0, 1]]["x1"], fixed_experiments["x1"])
+    assert np.allclose(doe.iloc[[0, 1]]["x2"], fixed_experiments["x2"])
+    assert np.allclose(doe.iloc[[0, 1]]["a1"], fixed_experiments["a1"])
+    assert np.allclose(doe.iloc[[0, 1]]["a2"], fixed_experiments["a2"])
+
+    partially_fixed_experiments = pd.DataFrame(
+        np.array([[1.0, None, None, None, 1.0], [0.0, None, None, None, 2.0]]),
+        columns=["x1", "x2", "a1", "a2", "c0"],
+    )
+    doe = find_local_max_ipopt(
+        domain,
+        "linear",
+        n_experiments=3,
+        partially_fixed_experiments=partially_fixed_experiments,
+    ).reset_index(drop=True)
+
+    assert doe.shape == (3, 4)
+    assert np.allclose(
+        doe.iloc[[0, 1]]["x1"], partially_fixed_experiments["x1"].astype(float)
+    )
+
+    doe = find_local_max_ipopt(
+        domain,
+        "linear",
+        n_experiments=4,
+        fixed_experiments=fixed_experiments,
+        partially_fixed_experiments=partially_fixed_experiments,
+    ).reset_index(drop=True)
+
+    assert doe.shape == (4, 4)
+    assert np.allclose(doe.iloc[[0, 1]]["x1"], fixed_experiments["x1"])
+    assert np.allclose(doe.iloc[[0, 1]]["x2"], fixed_experiments["x2"])
+    assert np.allclose(doe.iloc[[0, 1]]["a1"], fixed_experiments["a1"])
+    assert np.allclose(doe.iloc[[0, 1]]["a2"], fixed_experiments["a2"])
+    assert np.allclose(
+        doe.iloc[[2, 3]]["x1"], partially_fixed_experiments["x1"].astype(float)
+    )
+
+    too_few_experiments_error = ValueError(
+        "For starting the optimization the total number of experiments must be larger that the number of fixed experiments."
+    )
+    with pytest.raises(ValueError) as e:
+        doe = find_local_max_ipopt(
+            domain,
+            "linear",
+            n_experiments=1,
+            fixed_experiments=fixed_experiments,
+            partially_fixed_experiments=partially_fixed_experiments,
+        )
+        assert e == too_few_experiments_error
+    with pytest.raises(ValueError) as e:
+        doe = find_local_max_ipopt(
+            domain,
+            "linear",
+            n_experiments=2,
+            fixed_experiments=fixed_experiments,
+            partially_fixed_experiments=partially_fixed_experiments,
+        )
+        assert e == too_few_experiments_error
+
+    _fixed_experiments = fixed_experiments.drop(columns=["x1"])
+    with pytest.raises(ValueError) as e:
+        doe = find_local_max_ipopt(
+            domain,
+            "linear",
+            n_experiments=3,
+            fixed_experiments=_fixed_experiments,
+            partially_fixed_experiments=partially_fixed_experiments,
+        )
+        assert e == get_domain_error("x1")
+
+    _partially_fixed_experiments = partially_fixed_experiments.drop(columns=["x1"])
+    with pytest.raises(ValueError) as e:
+        doe = find_local_max_ipopt(
+            domain,
+            "linear",
+            n_experiments=3,
+            fixed_experiments=fixed_experiments,
+            partially_fixed_experiments=_partially_fixed_experiments,
+        )
+        assert e == ValueError(
+            "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column."
+        )
+
+    with pytest.raises(ValueError) as e:
+        doe = find_local_max_ipopt(
+            domain,
+            "linear",
+            n_experiments=3,
+            fixed_experiments=_fixed_experiments,
+            partially_fixed_experiments=_partially_fixed_experiments,
+        )
+        assert e == ValueError(
+            "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column."
+        )
+
+
+if __name__ == "__main__":
+    test_fixed_experiments_checker()
+    test_partially_fixed_experiments()