Skip to content

Commit

Permalink
Correct timeout logic (#2825)
Browse files Browse the repository at this point in the history
Summary:

Context:
D61930178 introduced incorrect timeout logic. At each iteration, the time remaining was decremented by the *total* amount of time elapsed so far. So if there were n iterations that each took 1 second, the time remaining would be decremented by n * (n + 1) / 2 seconds, rather than n seconds. This was also happening silently, whereas timing out within Scheduler would log a warning.

This diff:
* Fixes the timeout logic
* Logs a warning

Reviewed By: sdaulton, Balandat

Differential Revision: D63920497
  • Loading branch information
esantorella authored and facebook-github-bot committed Oct 4, 2024
1 parent 331da3a commit 8daaabd
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
9 changes: 5 additions & 4 deletions ax/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,26 +125,27 @@ def benchmark_replication(
generation_strategy=method.generation_strategy.clone_reset(),
options=method.scheduler_options,
)
timeout_hours = scheduler.options.timeout_hours

# list of parameters for each trial
best_params_by_trial: list[list[TParameterization]] = []

is_mf_or_mt = len(problem.runner.target_fidelity_and_task) > 0
# Run the optimization loop.
timeout_hours = scheduler.options.timeout_hours
remaining_hours = timeout_hours
with with_rng_seed(seed=seed):
start = monotonic()
for _ in range(problem.num_trials):
next(
scheduler.run_trials_and_yield_results(
max_trials=1, timeout_hours=timeout_hours
max_trials=1, timeout_hours=remaining_hours
)
)
if timeout_hours is not None:
elapsed_hours = (monotonic() - start) / 3600
timeout_hours = timeout_hours - elapsed_hours
if timeout_hours <= 0:
remaining_hours = timeout_hours - elapsed_hours
if remaining_hours <= 0.0:
logger.warning("The optimization loop timed out.")
break

if problem.is_moo or is_mf_or_mt:
Expand Down
16 changes: 13 additions & 3 deletions ax/benchmark/tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import tempfile
from itertools import product
from time import monotonic
from unittest.mock import patch

import numpy as np
Expand Down Expand Up @@ -448,20 +449,29 @@ def test_timeout(self) -> None:
num_sobol_trials=1000, # Ensures we don't use BO
).generation_strategy

timeout_seconds = 2.0
method = BenchmarkMethod(
name=generation_strategy.name,
generation_strategy=generation_strategy,
scheduler_options=SchedulerOptions(
max_pending_trials=1,
init_seconds_between_polls=0,
min_seconds_before_poll=0,
timeout_hours=0.0001, # Strict timeout of 0.36 seconds
timeout_hours=timeout_seconds / 3600,
),
)

# Each replication will have a different number of trials
result = benchmark_one_method_problem(
problem=problem, method=method, seeds=(0, 1)

start = monotonic()
with self.assertLogs("ax.benchmark.benchmark", level="WARNING") as cm:
result = benchmark_one_method_problem(
problem=problem, method=method, seeds=(0, 1)
)
elapsed = monotonic() - start
self.assertGreater(elapsed, timeout_seconds)
self.assertIn(
"WARNING:ax.benchmark.benchmark:The optimization loop timed out.", cm.output
)

# Test the traces get composited correctly. The AggregatedResult's traces
Expand Down

0 comments on commit 8daaabd

Please sign in to comment.