Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix autosave function for single core #770

Merged
merged 15 commits into from
Dec 7, 2021
537 changes: 303 additions & 234 deletions pypesto/objective/history.py

Large diffs are not rendered by default.

32 changes: 17 additions & 15 deletions pypesto/optimize/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .optimizer import Optimizer, ScipyOptimizer
from .options import OptimizeOptions
from .task import OptimizerTask
from .util import check_hdf5_mp, fill_hdf5_file, autosave
from .util import preprocess_hdf5_history, postprocess_hdf5_history, autosave

logger = logging.getLogger(__name__)

Expand All @@ -27,7 +27,7 @@ def minimize(
progress_bar: bool = True,
options: OptimizeOptions = None,
history_options: HistoryOptions = None,
filename: Union[str, None] = "Auto"
filename: str = "Auto"
yannikschaelte marked this conversation as resolved.
Show resolved Hide resolved
) -> Result:
"""
Do multistart optimization.
Expand Down Expand Up @@ -94,6 +94,7 @@ def minimize(
options = OptimizeOptions()
options = OptimizeOptions.assert_instance(options)

# history options
if history_options is None:
history_options = HistoryOptions()
history_options = HistoryOptions.assert_instance(history_options)
Expand All @@ -119,13 +120,14 @@ def minimize(
if engine is None:
engine = SingleCoreEngine()

# maybe change to one hdf5 storage file per start if parallel
yannikschaelte marked this conversation as resolved.
Show resolved Hide resolved
history_file = history_options.storage_file
history_requires_postprocessing = preprocess_hdf5_history(
history_options, engine
)

# define tasks
tasks = []
filename_hist = None
if history_options.storage_file is not None and \
history_options.storage_file.endswith(('.h5', '.hdf5')):
filename_hist = check_hdf5_mp(history_options, engine)

for startpoint, id in zip(startpoints, ids):
task = OptimizerTask(
optimizer=optimizer,
Expand All @@ -139,11 +141,12 @@ def minimize(
)
tasks.append(task)

# do multistart optimization
# perform multistart optimization
ret = engine.execute(tasks, progress_bar=progress_bar)

if filename_hist is not None:
fill_hdf5_file(ret, filename_hist)
# merge hdf5 history files
if history_requires_postprocessing:
postprocess_hdf5_history(ret, history_file, history_options)

# aggregate results
for optimizer_result in ret:
Expand All @@ -152,10 +155,9 @@ def minimize(
# sort by best fval
result.optimize_result.sort()

if filename == "Auto" and filename_hist is not None:
filename = filename_hist
autosave(filename=filename,
result=result,
type="optimization")
# if history file provided, set storage file to that one
if filename == "Auto" and history_file is not None:
filename = history_file
autosave(filename=filename, result=result, type="optimization")

return result
25 changes: 16 additions & 9 deletions pypesto/optimize/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,14 @@ def history_decorator(minimize):
Default decorator for the minimize() method.
"""

def wrapped_minimize(self, problem, x0, id, allow_failed_starts,
history_options=None):
def wrapped_minimize(
self,
problem,
x0,
id,
allow_failed_starts,
history_options=None,
):
objective = problem.objective

# initialize the objective
Expand All @@ -75,8 +81,9 @@ def wrapped_minimize(self, problem, x0, id, allow_failed_starts,
if history_options is None:
history_options = HistoryOptions()
history = history_options.create_history(
id=id, x_names=[problem.x_names[ix]
for ix in problem.x_free_indices])
id=id,
x_names=[problem.x_names[ix] for ix in problem.x_free_indices],
)
optimizer_history = OptimizerHistory(history=history, x0=x0)

# plug in history for the objective to record it
Expand Down Expand Up @@ -297,11 +304,11 @@ def __init__(self):
@time_decorator
@history_decorator
def minimize(
self,
problem: Problem,
x0: np.ndarray,
id: str,
history_options: HistoryOptions = None,
self,
problem: Problem,
x0: np.ndarray,
id: str,
history_options: HistoryOptions = None,
) -> OptimizerResult:
"""
Perform optimization.
Expand Down
7 changes: 5 additions & 2 deletions pypesto/optimize/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,12 @@ def execute(self) -> 'pypesto.optimize.OptimizerResult':
logger.info(f"Executing task {self.id}.")

optimizer_result = self.optimizer.minimize(
problem=self.problem, x0=self.x0, id=self.id,
problem=self.problem,
x0=self.x0,
id=self.id,
allow_failed_starts=self.options.allow_failed_starts,
history_options=self.history_options)
history_options=self.history_options,
)
if not self.report_hess:
optimizer_result.hess = None
if not self.report_sres:
Expand Down
118 changes: 77 additions & 41 deletions pypesto/optimize/util.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,118 @@
"""Utility functions for :py:func:`pypesto.optimize.minimize`."""
import datetime
from pathlib import Path
from typing import List
import h5py

from ..engine import Engine, SingleCoreEngine
from ..objective import HistoryOptions
from ..store.save_to_hdf5 import get_or_create_group
from ..store import write_result
from ..result import Result
from pathlib import Path
from typing import Union
from .optimizer import OptimizerResult


import h5py


def check_hdf5_mp(
def preprocess_hdf5_history(
history_options: HistoryOptions,
engine: Engine,
) -> Union[str, None]:
"""
Create a folder for partial HDF5 files.
):
"""Create a folder for partial HDF5 files if parallelization is used.

If no parallelization engine is used, do nothing.
This is because single hdf5 file access is not thread-safe.

Parameters
----------
engine:
The Engine which is used in the optimization
The Engine which is used in the optimization.
history_options:
The HistoryOptions used in the optimization
The HistoryOptions used in the optimization.

Returns
-------
The filename that will be used to combine the partial HDF5 files later.
If a parallelization engine is not used, `None` is returned.
history_requires_postprocessing:
Whether history storage post-processing is required.
"""
storage_file = history_options.storage_file

# nothing to do if no history stored
if storage_file is None:
return False

# extract storage type
path = Path(storage_file)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


# nothing to do if csv history and correctly set
if path.suffix == ".csv":
if "{id}" not in storage_file:
raise ValueError(
"For csv history, the `storage_file` must contain an `{id}` "
"template"
)
return False

# assuming hdf5 history henceforth
if path.suffix not in [".h5", ".hdf5"]:
raise ValueError(
"Only history storage to '.csv' and '.hdf5' is supported, got "
f"{path.suffix}",
)

# nothing to do if no parallelization
if isinstance(engine, SingleCoreEngine):
return None
filename = history_options.storage_file
file_path = Path(filename)
return False

# create directory with same name as original file stem
partial_file_path = (
file_path.parent / file_path.stem /
(file_path.stem + '_{id}' + file_path.suffix)
template_path = (
path.parent / path.stem / (path.stem + "_{id}" + path.suffix)
)
partial_file_path.parent.mkdir(parents=True, exist_ok=True)
history_options.storage_file = str(partial_file_path)
template_path.parent.mkdir(parents=True, exist_ok=True)
# set history file to template path
history_options.storage_file = str(template_path)

# create hdf5 file that gathers the others within history group
with h5py.File(filename, mode='a') as f:
get_or_create_group(f, "history")
return filename
return True


def fill_hdf5_file(
ret: list,
filename: str
def postprocess_hdf5_history(
ret: List[OptimizerResult],
storage_file: str,
history_options: HistoryOptions,
) -> None:
"""
Create single history file pointing to files of multiple starts.
"""Create single history file pointing to files of multiple starts.

Create links in `filename` to the
history of each start contained in ret, the results
of the optimization.
Create links in `storage_file` to the history of each start contained in
`ret`, the results of the optimization.

Parameters
----------
ret:
The result iterable returned by the optimization.
filename:
storage_file:
The filename of the hdf5 file in which the histories
are to be gathered.
history_options:
History options used in the optimization.
"""
with h5py.File(filename, mode='a') as f:
# create hdf5 file that gathers the others within history group
with h5py.File(storage_file, mode='w') as f:
# create file and group
get_or_create_group(f, "history")
# append links to each single result file
for result in ret:
id = result['id']
f[f'history/{id}'] = h5py.ExternalLink(
result['history'].file,
f'history/{id}'
)

# reset storage file (undo preprocessing changes)
history_options.storage_file = storage_file


def autosave(filename: str, result: Result,
type: str):
def autosave(
filename: str,
result: Result,
type: str,
):
"""
Save the result of optimization, profiling or sampling automatically.

Expand All @@ -99,9 +129,15 @@ def autosave(filename: str, result: Result,
method the function is called in.
"""
if filename is None:
return None
return

if filename == "Auto":
time = datetime.datetime.now().strftime("%Y_%d_%m_%H_%M_%S")
filename = time+f"_{type}_result.hdf5"
write_result(result=result, overwrite=True,
optimize=True, filename=filename)

write_result(
result=result,
overwrite=True,
optimize=True,
filename=filename,
)
12 changes: 6 additions & 6 deletions pypesto/sample/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@


def sample(
problem: Problem,
n_samples: int,
sampler: Sampler = None,
x0: Union[np.ndarray, List[np.ndarray]] = None,
result: Result = None,
filename: str = "Auto"
problem: Problem,
n_samples: int,
sampler: Sampler = None,
x0: Union[np.ndarray, List[np.ndarray]] = None,
result: Result = None,
filename: str = "Auto",
) -> Result:
"""
Call to do parameter sampling.
Expand Down
2 changes: 1 addition & 1 deletion test/base/test_startpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_latin_hypercube():
assert np.array_equal(sorted(x), range(0, n_starts))


def test_ubounded_startpoints(spmethod):
def test_unbounded_startpoints(spmethod):
"""Test Exceptions for non-finite lb/ub"""
for lb_, ub_ in [
(-np.inf * np.ones(lb.shape), ub),
Expand Down
Loading