Skip to content

Commit

Permalink
DoE: Fix bug if fixed_experiments contain columns that are not in dom…
Browse files Browse the repository at this point in the history
…ain (#321)

* add fix and test

* add fix and test

* partial and fixed experiments are internally reduced to only columns that are present in domain

* condense tests

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* first check if (partially) fixed experiments have all inputs in domain defined - then project the tables on only inputs

* fix test

* fix test

* fix test

* test warnings

* partially fixed experiments only validate if every input is present in columns. Revert changes to validate_candidates
  • Loading branch information
dlinzner-bcs authored Dec 19, 2023
1 parent d4c5c49 commit c57bcab
Show file tree
Hide file tree
Showing 2 changed files with 289 additions and 27 deletions.
91 changes: 67 additions & 24 deletions bofire/strategies/doe/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,33 @@ def find_local_max_ipopt(
)
raise e

# determine number of experiments (only relevant if n_experiments is not provided by the user)
n_experiments = get_n_experiments(
domain=domain, model_type=model_type, n_experiments=n_experiments
)

if partially_fixed_experiments is not None:
# check if partially fixed experiments are valid
check_partially_fixed_experiments(
domain, n_experiments, partially_fixed_experiments
)
# no columns from partially fixed experiments which are not in the domain
partially_fixed_experiments = partially_fixed_experiments[
domain.inputs.get_keys()
]

if fixed_experiments is not None:
# check if fixed experiments are valid
check_fixed_experiments(domain, n_experiments, fixed_experiments)
# no columns from fixed experiments which are not in the domain
fixed_experiments = fixed_experiments[domain.inputs.get_keys()]

if (partially_fixed_experiments is not None) and (fixed_experiments is not None):
# check if partially fixed experiments and fixed experiments are valid
check_partially_and_fully_fixed_experiments(
domain, n_experiments, fixed_experiments, partially_fixed_experiments
)

# warn user about usage of nonlinear constraints
if domain.constraints:
if np.any([isinstance(c, NonlinearConstraint) for c in domain.constraints]):
Expand All @@ -418,11 +445,6 @@ def find_local_max_ipopt(
if isinstance(c, NChooseKConstraint)
), "NChooseKConstraint with min_count !=0 is not supported!"

# determine number of experiments (only relevant if n_experiments is not provided by the user)
n_experiments = get_n_experiments(
domain=domain, model_type=model_type, n_experiments=n_experiments
)

#
# Sampling initital values
#
Expand Down Expand Up @@ -583,59 +605,80 @@ def partially_fix_experiment(


def check_fixed_experiments(
domain: Domain, n_experiments: int, fixed_experiments: np.ndarray
domain: Domain, n_experiments: int, fixed_experiments: pd.DataFrame
) -> None:
"""Checks if the shape of the fixed experiments is correct and if the number of fixed experiments is valid
Args:
domain (Domain): domain defining the input variables used for the check.
n_experiments (int): total number of experiments in the design that fixed_experiments are part of.
fixed_experiments (np.ndarray): fixed experiment proposals to be checked.
fixed_experiments (pd.DataFrame): fixed experiment proposals to be checked.
"""

n_fixed_experiments, D = np.array(fixed_experiments).shape
n_fixed_experiments = len(fixed_experiments.index)

if n_fixed_experiments >= n_experiments:
raise ValueError(
"For starting the optimization the total number of experiments must be larger that the number of fixed experiments."
)

if D != len(domain.inputs):
domain.validate_candidates(
candidates=fixed_experiments,
only_inputs=True,
)


def check_partially_fixed_experiments(
domain: Domain,
n_experiments: int,
partially_fixed_experiments: pd.DataFrame,
) -> None:

n_partially_fixed_experiments = len(partially_fixed_experiments.index)

# for partially fixed experiments only check if all inputs are part of the domain
if not all(
key in partially_fixed_experiments.columns for key in domain.inputs.get_keys()
):
raise ValueError(
f"Invalid shape of fixed_experiments. Length along axis 1 is {D}, but must be {len(domain.inputs)}"
"Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column."
)

if n_partially_fixed_experiments > n_experiments:
warnings.warn(
UserWarning(
"The number of partially fixed experiments exceeds the amount "
"of the overall count of experiments. Partially fixed experiments may be cut off"
)
)


def check_partially_and_fully_fixed_experiments(
domain: Domain,
n_experiments: int,
fixed_experiments: np.ndarray,
paritally_fixed_experiments: np.ndarray,
fixed_experiments: pd.DataFrame,
partially_fixed_experiments: pd.DataFrame,
) -> None:
"""Checks if the shape of the fixed experiments is correct and if the number of fixed experiments is valid
Args:
domain (Domain): domain defining the input variables used for the check.
n_experiments (int): total number of experiments in the design that fixed_experiments are part of.
fixed_experiments (np.ndarray): fixed experiment proposals to be checked.
paritally_fixed_experiments (np.ndarray): partially fixed experiment proposals to be checked.
fixed_experiments (pd.DataFrame): fixed experiment proposals to be checked.
partially_fixed_experiments (pd.DataFrame): partially fixed experiment proposals to be checked.
"""

check_fixed_experiments(domain, n_experiments, fixed_experiments)
n_fixed_experiments, dim = np.array(fixed_experiments).shape

n_partially_fixed_experiments, partially_dim = np.array(
paritally_fixed_experiments
).shape
check_partially_fixed_experiments(
domain, n_experiments, partially_fixed_experiments
)
n_fixed_experiments = len(fixed_experiments.index)

if partially_dim != len(domain.inputs):
raise ValueError(
f"Invalid shape of partially_fixed_experiments. Length along axis 1 is {partially_dim}, but must be {len(domain.inputs)}"
)
n_partially_fixed_experiments = len(partially_fixed_experiments.index)

if n_fixed_experiments + n_partially_fixed_experiments > n_experiments:
warnings.warn(
UserWarning(
"The number of fixed experiments and partially fixed experiments exceeds the amount "
"of the overall count of experiments. Partially fixed experiments may be cut of"
"of the overall count of experiments. Partially fixed experiments may be cut off"
)
)

Expand Down
225 changes: 222 additions & 3 deletions tests/bofire/strategies/doe/test_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from bofire.strategies.doe.design import (
check_fixed_experiments,
check_partially_and_fully_fixed_experiments,
check_partially_fixed_experiments,
find_local_max_ipopt,
get_n_experiments,
)
Expand Down Expand Up @@ -480,7 +481,7 @@ def test_get_n_experiments():


@pytest.mark.skipif(not CYIPOPT_AVAILABLE, reason="requires cyipopt")
def test_partially_fixed_experiments():
def test_fixed_experiments_checker():
domain = Domain(
inputs=[
ContinuousInput(key="x1", bounds=(0, 5)),
Expand Down Expand Up @@ -540,13 +541,231 @@ def test_partially_fixed_experiments():
)

# partially fixed will be cut of
with pytest.warns(UserWarning):
with pytest.warns(UserWarning) as record:
check_partially_and_fully_fixed_experiments(
domain, 3, fixed_experiments, partially_fixed_experiments
)
assert len(record) == 1
assert record[0].message.args[0] == (
"The number of fixed experiments and partially fixed experiments exceeds the amount "
"of the overall count of experiments. Partially fixed experiments may be cut off"
)

with pytest.warns(UserWarning) as record:
check_partially_fixed_experiments(domain, 1, partially_fixed_experiments)
assert len(record) == 1
assert record[0].message.args[0] == (
"The number of partially fixed experiments exceeds the amount "
"of the overall count of experiments. Partially fixed experiments may be cut off"
)

# to few experiments
with pytest.raises(ValueError):
with pytest.raises(ValueError) as e:
check_partially_and_fully_fixed_experiments(
domain, 2, fixed_experiments, partially_fixed_experiments
)
assert e == ValueError(
"For starting the optimization the total number of experiments must be larger that the number of fixed experiments."
)

with pytest.raises(ValueError) as e:
check_fixed_experiments(domain, 2, fixed_experiments)
assert e == ValueError(
"For starting the optimization the total number of experiments must be larger that the number of fixed experiments."
)


def test_partially_fixed_experiments():
domain = Domain(
inputs=[
ContinuousInput(key="x1", bounds=(0, 5)),
ContinuousInput(key="x2", bounds=(0, 15)),
ContinuousInput(key="a1", bounds=(0, 1)),
ContinuousInput(key="a2", bounds=(0, 1)),
],
outputs=[ContinuousOutput(key="y")],
constraints=[
# Case 1: a and b are active
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 10, -10], rhs=15
),
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, -2], rhs=5
),
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, 3], rhs=5
),
# Case 2: a and c are active
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, -10, -10], rhs=5
),
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, 2], rhs=7
),
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, -3], rhs=2
),
# Case 3: c and b are active
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 0, -10], rhs=5
),
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 0, 2], rhs=5
),
LinearInequalityConstraint(
features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, 0, 3], rhs=5
),
],
)

def get_domain_error(feature):
return ValueError(f"no col for input feature `{feature}`")

fixed_experiments = pd.DataFrame(
np.array([[1, 0, 0, 0], [0, 1, 0.7, 1]]), columns=domain.inputs.get_keys()
)

doe = find_local_max_ipopt(
domain, "linear", n_experiments=3, fixed_experiments=fixed_experiments
).reset_index(drop=True)

assert doe.shape == (3, 4)
assert np.allclose(doe.iloc[[0, 1]]["x1"], fixed_experiments["x1"])
assert np.allclose(doe.iloc[[0, 1]]["x2"], fixed_experiments["x2"])
assert np.allclose(doe.iloc[[0, 1]]["a1"], fixed_experiments["a1"])
assert np.allclose(doe.iloc[[0, 1]]["a2"], fixed_experiments["a2"])

fixed_experiments = pd.DataFrame(
np.array([[1, 0, 0], [0, 1, 0.7]]), columns=["x1", "x2", "a1"]
)

with pytest.raises(ValueError) as e:
doe = find_local_max_ipopt(
domain, "linear", n_experiments=2, fixed_experiments=fixed_experiments
)
assert e == get_domain_error("a2")

partially_fixed_experiments = pd.DataFrame(
np.array([[1.0, None, None], [0.0, None, None]]),
columns=["x1", "x2", "a1"],
)

with pytest.raises(ValueError) as e:
doe = find_local_max_ipopt(
domain,
"linear",
n_experiments=2,
partially_fixed_experiments=partially_fixed_experiments,
)
assert e == get_domain_error("a2")

fixed_experiments = pd.DataFrame(
np.array([[1, 0, 0, 0, 1], [0, 1, 0.7, 1, 2]]),
columns=domain.inputs.get_keys() + ["c0"],
)

doe = find_local_max_ipopt(
domain, "linear", n_experiments=3, fixed_experiments=fixed_experiments
).reset_index(drop=True)

assert doe.shape == (3, 4)
assert np.allclose(doe.iloc[[0, 1]]["x1"], fixed_experiments["x1"])
assert np.allclose(doe.iloc[[0, 1]]["x2"], fixed_experiments["x2"])
assert np.allclose(doe.iloc[[0, 1]]["a1"], fixed_experiments["a1"])
assert np.allclose(doe.iloc[[0, 1]]["a2"], fixed_experiments["a2"])

partially_fixed_experiments = pd.DataFrame(
np.array([[1.0, None, None, None, 1.0], [0.0, None, None, None, 2.0]]),
columns=["x1", "x2", "a1", "a2", "c0"],
)
doe = find_local_max_ipopt(
domain,
"linear",
n_experiments=3,
partially_fixed_experiments=partially_fixed_experiments,
).reset_index(drop=True)

assert doe.shape == (3, 4)
assert np.allclose(
doe.iloc[[0, 1]]["x1"], partially_fixed_experiments["x1"].astype(float)
)

doe = find_local_max_ipopt(
domain,
"linear",
n_experiments=4,
fixed_experiments=fixed_experiments,
partially_fixed_experiments=partially_fixed_experiments,
).reset_index(drop=True)

assert doe.shape == (4, 4)
assert np.allclose(doe.iloc[[0, 1]]["x1"], fixed_experiments["x1"])
assert np.allclose(doe.iloc[[0, 1]]["x2"], fixed_experiments["x2"])
assert np.allclose(doe.iloc[[0, 1]]["a1"], fixed_experiments["a1"])
assert np.allclose(doe.iloc[[0, 1]]["a2"], fixed_experiments["a2"])
assert np.allclose(
doe.iloc[[2, 3]]["x1"], partially_fixed_experiments["x1"].astype(float)
)

too_few_experiments_error = ValueError(
"For starting the optimization the total number of experiments must be larger that the number of fixed experiments."
)
with pytest.raises(ValueError) as e:
doe = find_local_max_ipopt(
domain,
"linear",
n_experiments=1,
fixed_experiments=fixed_experiments,
partially_fixed_experiments=partially_fixed_experiments,
)
assert e == too_few_experiments_error
with pytest.raises(ValueError) as e:
doe = find_local_max_ipopt(
domain,
"linear",
n_experiments=2,
fixed_experiments=fixed_experiments,
partially_fixed_experiments=partially_fixed_experiments,
)
assert e == too_few_experiments_error

_fixed_experiments = fixed_experiments.drop(columns=["x1"])
with pytest.raises(ValueError) as e:
doe = find_local_max_ipopt(
domain,
"linear",
n_experiments=3,
fixed_experiments=_fixed_experiments,
partially_fixed_experiments=partially_fixed_experiments,
)
assert e == get_domain_error("x1")

_partially_fixed_experiments = partially_fixed_experiments.drop(columns=["x1"])
with pytest.raises(ValueError) as e:
doe = find_local_max_ipopt(
domain,
"linear",
n_experiments=3,
fixed_experiments=fixed_experiments,
partially_fixed_experiments=_partially_fixed_experiments,
)
assert e == ValueError(
"Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column."
)

with pytest.raises(ValueError) as e:
doe = find_local_max_ipopt(
domain,
"linear",
n_experiments=3,
fixed_experiments=_fixed_experiments,
partially_fixed_experiments=_partially_fixed_experiments,
)
assert e == ValueError(
"Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column."
)


if __name__ == "__main__":
test_fixed_experiments_checker()
test_partially_fixed_experiments()

0 comments on commit c57bcab

Please sign in to comment.