From f7027dfffc066494f792d51a97ec68e3d9475c08 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Sat, 7 Nov 2020 14:29:56 +0100 Subject: [PATCH 1/3] add check to runhistory.add() --- smac/runhistory/runhistory.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index bfce29ce8..da16fa7a0 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -254,6 +254,21 @@ def add( config_id = typing.cast(int, config_id_tmp) # Construct keys and values for the data dictionary + for key, value in ( + ('config', config.get_dictionary()), + ('config_id', config_id), + ('instance_id', instance_id), + ('seed', seed), + ('budget', budget), + ('cost', cost), + ('time', time), + ('status', status), + ('starttime', starttime), + ('endtime', endtime), + ('additional_info', additional_info), + ('origin', config.origin), + ): + self._check_json_serializable(key, value, EnumEncoder) k = RunKey(config_id, instance_id, seed, budget) v = RunValue(cost, time, status, starttime, endtime, additional_info) @@ -268,6 +283,20 @@ def add( # overwrite if censored with a larger cutoff self._add(k, v, status, origin) + def _check_json_serializable( + self, + key: str, + obj: typing.Any, + encoder: typing.Type[json.JSONEncoder], + ) -> None: + try: + json.dumps(obj, cls=encoder) + except Exception as e: + raise ValueError( + "Cannot add %s: %s to runhistory because it raises an error during JSON encoding, " + "please see the error above." % (key, str(obj)) + ) from e + def _add(self, k: RunKey, v: RunValue, status: StatusType, origin: DataOrigin) -> None: """Actual function to add new entry to data structures From edd0c1a48f6a7bbebd899c120309be7059d5bc1f Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Mon, 9 Nov 2020 10:44:10 +0100 Subject: [PATCH 2/3] move serialization checks from tae to runhistory --- smac/intensification/intensification.py | 7 +++--- smac/intensification/simple_intensifier.py | 2 +- smac/intensification/successive_halving.py | 2 +- smac/runhistory/runhistory.py | 12 ++++++----- smac/tae/execute_func.py | 18 +--------------- test/test_runhistory/test_runhistory.py | 25 ++++++++++++++++++++++ test/test_tae/test_exec_func.py | 17 --------------- 7 files changed, 39 insertions(+), 44 deletions(-) diff --git a/smac/intensification/intensification.py b/smac/intensification/intensification.py index f5e69832f..83dcff54c 100644 --- a/smac/intensification/intensification.py +++ b/smac/intensification/intensification.py @@ -604,14 +604,15 @@ def _get_next_inc_run(self, Max time for a given instance/seed pair """ - # Line 5 - next_instance = self.rs.choice(available_insts) + # Line 5 - and avoid https://github.com/numpy/numpy/issues/10791 + _idx = self.rs.choice(len(available_insts)) + next_instance = available_insts[_idx] # Line 6 if self.deterministic: next_seed = 0 else: - next_seed = self.rs.randint(low=0, high=MAXINT, size=1)[0] + next_seed = int(self.rs.randint(low=0, high=MAXINT, size=1)[0]) # Line 7 self.logger.debug( diff --git a/smac/intensification/simple_intensifier.py b/smac/intensification/simple_intensifier.py index 255bfa136..34b5c17bc 100644 --- a/smac/intensification/simple_intensifier.py +++ b/smac/intensification/simple_intensifier.py @@ -163,7 +163,7 @@ def get_next_run(self, config=challenger, instance=self.instances[-1], instance_specific="0", - seed=0 if self.deterministic else self.rs.randint(low=0, high=MAXINT, size=1)[0], + seed=0 if self.deterministic else int(self.rs.randint(low=0, high=MAXINT, size=1)[0]), cutoff=self.cutoff, capped=False, budget=0.0, diff --git a/smac/intensification/successive_halving.py b/smac/intensification/successive_halving.py index 6980002bc..1a9d814a4 100644 --- a/smac/intensification/successive_halving.py +++ b/smac/intensification/successive_halving.py @@ -164,7 +164,7 @@ def __init__(self, if self.deterministic: seeds = [0] else: - seeds = self.rs.randint(low=0, high=MAXINT, size=self.n_seeds) + seeds = [int(s) for s in self.rs.randint(low=0, high=MAXINT, size=self.n_seeds)] if self.n_seeds == 1: self.logger.warning('The target algorithm is specified to be non deterministic, ' 'but number of seeds to evaluate are set to 1. ' diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index da16fa7a0..090eef7e8 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -253,6 +253,8 @@ def add( else: config_id = typing.cast(int, config_id_tmp) + k = RunKey(config_id, instance_id, seed, budget) + v = RunValue(cost, time, status, starttime, endtime, additional_info) # Construct keys and values for the data dictionary for key, value in ( ('config', config.get_dictionary()), @@ -268,9 +270,7 @@ def add( ('additional_info', additional_info), ('origin', config.origin), ): - self._check_json_serializable(key, value, EnumEncoder) - k = RunKey(config_id, instance_id, seed, budget) - v = RunValue(cost, time, status, starttime, endtime, additional_info) + self._check_json_serializable(key, value, EnumEncoder, k, v) # Each runkey is supposed to be used only once. Repeated tries to add # the same runkey will be ignored silently if not capped. @@ -288,13 +288,15 @@ def _check_json_serializable( key: str, obj: typing.Any, encoder: typing.Type[json.JSONEncoder], + runkey: RunKey, + runvalue: RunValue ) -> None: try: json.dumps(obj, cls=encoder) except Exception as e: raise ValueError( - "Cannot add %s: %s to runhistory because it raises an error during JSON encoding, " - "please see the error above." % (key, str(obj)) + "Cannot add %s: %s of type %s to runhistory because it raises an error during JSON encoding, " + "please see the error above.\nRunKey: %s\nRunValue %s" % (key, str(obj), type(obj), runkey, runvalue) ) from e def _add(self, k: RunKey, v: RunValue, status: StatusType, diff --git a/smac/tae/execute_func.py b/smac/tae/execute_func.py index 0c19bf038..38c83afaa 100644 --- a/smac/tae/execute_func.py +++ b/smac/tae/execute_func.py @@ -215,28 +215,12 @@ def run(self, config: Configuration, cost = result except Exception as e: self.logger.exception(e) + cost, result = self.cost_for_crash, self.cost_for_crash status = StatusType.CRASHED - cost = self.cost_for_crash additional_run_info = {} runtime = time.time() - start_time - # check serializability of results - try: - json.dumps(cost) - except TypeError as e: - self.logger.exception(e) - raise TypeError("Target Algorithm returned 'cost' {} (type {}) but it is not serializable. " - "Please ensure all objects returned are JSON serializable.".format(result, type(result))) \ - from e - try: - json.dumps(additional_run_info) - except TypeError as e: - self.logger.exception(e) - raise TypeError("Target Algorithm returned 'additional_run_info' ({}) with some non-serializable items. " - "Please ensure all objects returned are JSON serializable.".format(additional_run_info)) \ - from e - if status == StatusType.SUCCESS and not isinstance(result, (int, float)): status = StatusType.CRASHED cost = self.cost_for_crash diff --git a/test/test_runhistory/test_runhistory.py b/test/test_runhistory/test_runhistory.py index 56d98ddfd..c55774392 100644 --- a/test/test_runhistory/test_runhistory.py +++ b/test/test_runhistory/test_runhistory.py @@ -5,6 +5,8 @@ from ConfigSpace import Configuration, ConfigurationSpace from ConfigSpace.hyperparameters import UniformIntegerHyperparameter +import numpy as np +import pynisher from smac.tae import StatusType from smac.runhistory.runhistory import RunHistory @@ -251,6 +253,29 @@ def test_json_origin(self): os.remove(path) + def test_add_json_serializable(self): + """Test if entries added to the runhistory are correctly checked for serializability.""" + rh = RunHistory() + cs = get_config_space() + config = cs.sample_configuration() + + rh.add(config, 0.0, 0.0, StatusType.SUCCESS, None, None, 0.0, 0.0, 0.0, None) + rh.add(config, 0.0, 0.0, StatusType.SUCCESS, None, None, 0.0, 0.0, 0.0, {}) + + with self.assertRaisesRegex( + ValueError, + r"Cannot add cost: 0\.0 of type to runhistory because " + r"it raises an error during JSON encoding" + ): + rh.add(config, np.float32(0.0), 0.0, StatusType.SUCCESS, None, None, 0.0, 0.0, 0.0, None) + with self.assertRaisesRegex( + ValueError, + r"Cannot add additional_info: \{'error': \} " + r"of type to runhistory because it raises an error during JSON encoding", + ): + rh.add(config, 0.0, 0.0, StatusType.SUCCESS, None, None, 0.0, 0.0, 0.0, + {'error': pynisher.AnythingException}) + if __name__ == "__main__": unittest.main() diff --git a/test/test_tae/test_exec_func.py b/test/test_tae/test_exec_func.py index aa851eed8..5ea9aadd3 100644 --- a/test/test_tae/test_exec_func.py +++ b/test/test_tae/test_exec_func.py @@ -187,20 +187,3 @@ def target(x): return x**2 taf = ExecuteTAFuncDict(ta=target, stats=self.stats) self.assertRaises(ValueError, taf.run, config=2, cutoff=65536) - - def test_non_serializable(self): - # cost non serializable - def target(x): - return np.int32(x) - taf = ExecuteTAFuncDict(ta=target, stats=self.stats) - msg = "Please ensure all objects returned are JSON serializable." - with self.assertRaisesRegex(TypeError, msg): - taf.run(config=2) - - # additional info non serializable - def target(x): - return x, {'x': np.int32(x)} - taf = ExecuteTAFuncDict(ta=target, stats=self.stats) - msg = "Please ensure all objects returned are JSON serializable." - with self.assertRaisesRegex(TypeError, msg): - taf.run(config=2) From 8340bc0a5a2d155494e414fed68d69f2c413cc8a Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Mon, 9 Nov 2020 10:57:18 +0100 Subject: [PATCH 3/3] pep8 --- smac/tae/execute_func.py | 1 - 1 file changed, 1 deletion(-) diff --git a/smac/tae/execute_func.py b/smac/tae/execute_func.py index 38c83afaa..0d5df8c96 100644 --- a/smac/tae/execute_func.py +++ b/smac/tae/execute_func.py @@ -1,7 +1,6 @@ import inspect import math import time -import json import traceback import typing