From 09cf45345add860e2cad83a26a1beb9ab160e457 Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Mon, 29 Apr 2019 21:46:00 -0400 Subject: [PATCH 1/9] Contribute Parameter Optimization module This program finds good parameters for an HTM system. --- py/src/nupic/optimization/__init__.py | 0 py/src/nupic/optimization/ae.py | 898 ++++++++++++++++++++++++++ 2 files changed, 898 insertions(+) create mode 100644 py/src/nupic/optimization/__init__.py create mode 100644 py/src/nupic/optimization/ae.py diff --git a/py/src/nupic/optimization/__init__.py b/py/src/nupic/optimization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/py/src/nupic/optimization/ae.py b/py/src/nupic/optimization/ae.py new file mode 100644 index 0000000000..20eb76c903 --- /dev/null +++ b/py/src/nupic/optimization/ae.py @@ -0,0 +1,898 @@ +#!/usr/bin/python3 +""" +Automatic Experimenter +Written by David McDougall, 2018-2019 + +This is a parameter optimization framework. + * It methodically records the results of different sets of parameters and +analyses the results. It then automatically suggests and evaluates +modifications to the parameters. + * It exposes a convenient API for users to hook their program into this. + * The framework allows for testing each set of parameters several times and +calculates the average and standard deviation of the results. It also +calculates the confidence that a parameter change caused the score to change. + * It is extensible: new methods for automated parameter optimization can be +added. Currently this implements a basic grid search strategy. In the future I +hope to add a particle swarming method. + +To use this module, structure experiments as follows: + ExperimentModule is a python3 module containing the model to be optimized as + well as code to evaluate model performance. + + ExperimentModule.default_parameters = {} + This global dictionary contains all of the parameters to modify. + Parameters must be one of the following types: dict, tuple, float, int. + Parameters can be nested in multiple levels of dictionaries and tuples. + + ExperimentModule.main(parameters=default_parameters, argv=None, verbose=True) + Returns (float) performance of parameters, to be maximized. + +Usage: $ ae.py [ae-arguments] ExperimentModule.py [experiment-arguments] + +The outputs and data of this program are kept in a directory named after the +experiment which generated it. If the experiment is "foo/bar.py" then AE's +directory is "foo/bar_ae/". The primary output of this program is the file +"foo/bar_ae/lab_report.txt" which contains a summary of its operations. The lab +report format is: + +1) Introduction. This text is not parsed, it is preserved so you can keep notes +here This area is initialized with hopefully useful information, including +the experiment name, timestamps. + +2) Methods. This section contains the default parameters and the command line +invocation. + +3) Summary of each experiment. Each experiments summary contains the following +information: + 1) Modified Parameters & their New Value. This is the only required field, + the remaining fields will be generated and written to this file as soon as + the report is loaded. You may choose to manually add experiments to this + lab report in this way. + 2) MD5 Checksum of Parameters and Command Line. This hash checksum is used + to uniquely identify an experimental setup, it's the name of the + experiment. These hashes are used in filenames and searching for a hash + finds all references to it. + 3) File Path of Experiment Journal + 4) Number of Attempted Runs + 5) Score of each Completed Run + 6) Mean & Standard Deviation of Scores + 7) Notes, these are not parsed they are preserved so you can keep notes here + +This program keeps a Journal of each experimental setup. Journals are named +after the hash of the parameters & command line they contain, with the extension +".journal". Journals start with a self contained description of how to +reproduce the experiment, followed by a section for every run of this +experiment. The section for each run contains the output (std-out & std-err) of +the program, as well as diagnostic information such as timestamps and memory +usage reports. Files with the extension ".log" are temporary files for in- +progress experiment, and when the experiment finishes running they are copied to +their journal and then the ".log" file is deleted. +""" + +# TODO: Default parameters need better handling... + +# TODO: ExperimentSummary should X-Ref with journal file. Journal should +# contain complete parameter set, needed for full automatic reproducibility. +# Changing default parameters should not make experiments inaccessible. + +# TODO: Maybe the command line invocation should be included in the experiment +# hash? Then I could experiment with the CLI args within a single lab report. + +# TODO: Every run should track elapsed time and report the average in the +# experiment journal & summary. Some of these experiments clearly take 4x +# longer than others but its not recorded. + +# TODO: Log files should have memory usage ... + +import argparse +import os +import sys +import shutil +import random +import pprint +import time +import datetime +import tempfile +import multiprocessing +import resource +import signal # Probably not X-platform ... +from copy import copy, deepcopy +import re +import hashlib +import numpy as np +import scipy +import math + + +class ParameterSet(dict): + def __init__(self, data): + super().__init__(self) + if isinstance(data, str): + try: + data = eval(data.strip()) + except: + print("Error parsing: " + data.strip()) + raise + assert(isinstance(data, dict)) + self.update(data) + + def __hash__(self): + string = pprint.pformat(self).encode('utf-8') + checksum = hashlib.md5(string).hexdigest() + return abs(int(checksum[:8], base=16)) + + def __eq__(self, other): + assert(isinstance(other, type(self))) + return _recursive_equal(self, other) + + def diff(a, b): + return _recursive_diff(a, b) + + def get(self, path): + try: + return eval('self' + path) + except: + print('Failed to get self' + path) + raise + + def apply(self, modification, value): + try: + _recursive_apply(self, modification.strip(), value) + except: + print('Failed to apply modification %s = %s'%(modification, str(value))) + raise + + +def _recursive_equal(a, b): + if isinstance(a, dict): + return all(_recursive_equal(a[k], b[k]) for k in a) + elif isinstance(a, tuple): + return all(_recursive_equal(ai, bi) for ai, bi in zip(a, b)) + else: + return a == b + +def _recursive_diff(old, new): + diffs = [] + if isinstance(old, dict): + for key in old: + inner_diffs = _recursive_diff(old[key], new[key]) + for path, new_value in inner_diffs: + diffs.append(("['%s']%s"%(key, path), new_value)) + elif isinstance(old, tuple): + for idx in range(len(old)): + inner_diffs = _recursive_diff(old[idx], new[idx]) + for path, new_value in inner_diffs: + diffs.append(("[%d]%s"%(idx, path), new_value)) + elif old != new: + diffs.append(('', new)) + return diffs + +def _recursive_apply(self, mod, value): + access = mod.split(']')[0].strip('[]"\' ') + if not access: + return value + tail = mod.split(']', maxsplit=1)[1] + if isinstance(self, dict): + self[access] = _recursive_apply(self[access], tail, value) + return self + if isinstance(self, tuple): + self = list(self) + index = int(access) + self[index] = _recursive_apply(self[index], tail, value) + return tuple(self) + + +class ExperimentSummary: + """ + Attributes: + lab - circular reference to LabReport instance + attempts - + scores - + notes - + journal - + parameters - + modifications - + """ + def __init__(self, lab, + string=None, + modifications=None, + parameters=None,): + """ """ + self.lab = lab + self.attempts = 0 + self.scores = [] + self.notes = ' ' + # Load or create this experiment's data. + if string is not None: + self.parse(string) + elif modifications is not None: + self.parameters = deepcopy(self.lab.default_parameters) + for path, value in modifications: + self.parameters.apply(path, value) + elif parameters is not None: + self.parameters = ParameterSet(parameters) + else: + raise TypeError("Not enough arguments to ExperimentSummary.__init__()") + + self.parameters = self.lab.typecast_parameters(self.parameters) + self.modifications = self.lab.default_parameters.diff(self.parameters) + + if hash(self) not in self.lab.experiment_ids: + self.lab.experiments.append(self) + self.lab.experiment_ids[hash(self)] = self + else: + raise ValueError("Duplicate Parameters Hash %X"%hash(self)) + + # Start a journal file for this experiment. + if not hasattr(self, 'journal'): + self.journal = os.path.join(self.lab.ae_directory, "%X.journal"%hash(self)) + with open(self.journal, 'a') as file: + file.write('Experiment Journal For Parameters:\n') + file.write(pprint.pformat(self.parameters) + '\n') + file.write('Hash: %X\n'%hash(self)) + file.write('Command Line Invocation: $ ' + ' '.join(self.lab.argv) + '\n') + else: + # Scrape some info from the journal file. + with open(self.journal, 'r') as file: + journal = file.read() + journal = journal.split(self.lab.section_divider) + journal.pop(0) # Discard header + elapsed_times = [] + memory_usages = [] + + def parse(self, string): + # Reconstruct the parameters. + self.modifications = [] + if "Modification:" in string: + for change in re.findall("Modification: (.*)", string): + path, eq, value = change.partition('=') + self.modifications.append((path.strip(), value.strip())) + self.parameters = deepcopy(self.lab.default_parameters) + for path, value in self.modifications: + self.parameters.apply(path, value) + # + if "Attempts:" in string: + self.attempts = int(re.search("Attempts: (.*)", string).groups()[0]) + if "Scores:" in string: + self.scores = re.search("Scores: (.*)", string).groups()[0].strip() + self.scores = [float(s.strip()) for s in self.scores.split(',') if s.strip()] + if "Journal:" in string: + self.journal = re.search("Journal: (.*)", string).groups()[0] + if "Notes:" in string: + self.notes = string.partition('Notes:')[2] + if "Hash:" in string: + # Override hash(self) with whats on file since this is reconstructed + # from defaults + modifications, and the defaults might have changed. + self._hash = int(re.search("Hash: (.*)", string).groups()[0], base=16) + + def significance(self): + """ + Returns the P-Value of the Null-Hypothesis test (these parameters + against the default parameters) + """ + try: + null_experiment = self.lab.experiment_ids[hash(self.lab.default_parameters)] + except KeyError: + return float('nan') + if not self.scores or not null_experiment.scores: + return float('nan') + if len(self.scores) == 1: + pass # TODO: How to pass probabilities & statistics? + stat, pval = scipy.stats.ttest_ind( + null_experiment.scores, self.scores, axis=None, + # Since both samples come from the same experimential setup they + # should have the same variance. + equal_var=True,) + return pval + + def mean(self): + return np.mean(self.scores) if self.scores else float('-inf') + + def __str__(self): + s = '' + if not self.modifications: + s += "Default Parameters\n" + for mod, value in self.modifications: + s += "Modification: %s = %s\n"%(mod, str(value)) + s += 'Hash: %X\n'%hash(self) + s += 'Journal: %s\n'%self.journal + s += 'Attempts: %d\n'%self.attempts + s += 'Scores: %s\n'%', '.join(str(s) for s in self.scores) + if self.scores: + mean = np.mean(self.scores) + std = np.std(self.scores) + s += 'Mean & Standard Deviation: %g & %g\n'%(mean, std) + s += 'P-Value: %g\n'%self.significance() + s += 'Notes:' + self.notes + return s + + def __hash__(self): + if not hasattr(self, '_hash'): + self._hash = hash(self.parameters) + return self._hash + + +class LabReport: + """ + Attributes: + lab.module - Experiment python module + lab.name - Name of experiment module + lab.path - Directory containing experiment module + lab.structure - Types of parameters + lab.default_parameters - ex.module.default_parameters + lab.argv - Command line invocation of experiment program + lab.tag - Optional, identifier string for this LabReport + lab.ae_directory - Directory containing all files created by this program + lab.lab_report - File path of Lab Report + lab.experiments - List of ExperimentSummary + lab.experiment_ids - Experiments accessed by their unique hash + """ + default_extension = '_ae' + section_divider = '\n' + ('=' * 80) + '\n' + def __init__(self, experiment_argv, method=None, tag='', verbose=False): + if isinstance(experiment_argv, str): + experiment_argv = experiment_argv.split() + self.argv = experiment_argv + self.method = method + self.tag = tag + self.verbose = verbose + self.load_experiment_module(experiment_argv[0]) + self.ae_directory = os.path.join(self.path, self.name) + self.default_extension + if self.tag: + self.ae_directory = self.ae_directory + '_' + self.tag + self.lab_report = os.path.join(self.ae_directory, 'lab_report.txt') + self.experiments = [] + self.experiment_ids = {} + if os.path.isdir(self.ae_directory): + with open(self.lab_report, 'r') as file: + report = file.read() + self.parse_lab_report(report) + else: + # Initialize the Lab Reports attributes and write the skeleton of it + # to file. + self.init_header() + os.mkdir(self.ae_directory) + # Always have an experiment for the default parameters. + try: + ExperimentSummary(self, parameters = self.default_parameters) + except ValueError: + pass + + # Parse & Write this file immediately at start up. + self.save() + + def init_header(self): + self.header = self.name + if self.tag: + self.header += ' - ' + self.tag + self.header += ' - Automatic Experiments\n' + self.header += time.asctime( time.localtime(time.time()) ) + '\n' + + def load_experiment_module(self, experiment_module): + """ + Argument experiment_module is command line argument 0, specifying the + file path to the experiment module. + """ + self.path, experiment_module = os.path.split(experiment_module) + self.name, dot_py = os.path.splitext(experiment_module) + assert(dot_py == '.py') + self.module_reload = 'import sys; sys.path.append("%s"); '%self.path + self.module_reload += 'import %s; '%self.name + exec_globals = {} + exec(self.module_reload, exec_globals) + self.module = exec_globals[self.name] + + self.default_parameters = ParameterSet(self.module.default_parameters) + self.structure = _recursive_parameter_structure(self.default_parameters) + + def parse_lab_report(self, report): + if not report.strip(): + raise ValueError("Empty lab report file.") + sections = report.split(self.section_divider) + self.header = sections[0] + default_parameters = '\n'.join( sections[1].split('\n')[1:-1] ) + cli = sections[1].split('\n')[-1].strip('$ ').split() + sorted_pval_table = sections[2] + experiment_sections = sections[3:] + file_defaults = ParameterSet(default_parameters) + # Consistency check for parameters. + if file_defaults != self.default_parameters: + while True: + q = input("Default parameters have changed, options: old new abort: ") + q = q.strip().lower() + if q == 'old': + self.default_parameters = file_defaults + break + elif q == 'new': + shutil.copy(self.lab_report, self.lab_report + '.backup_defaults') + break + elif q == 'abort': + sys.exit() + # Consistency check for experiment. + if cli != self.argv: + while True: + q = input("Experiment command line invocation have changed, options: old new abort: ") + q = q.strip().lower() + if q == 'old': + self.argv = cli + break + elif q == 'new': + shutil.copy(self.lab_report, self.lab_report + '.backup_argv') + break + elif q == 'abort': + sys.exit() + + [ExperimentSummary(self, s) for s in experiment_sections if s.strip()] + + def significant_experiments_table(self): + """ + Returns string + """ + ex = sorted(self.experiments, key = lambda x: -x.mean()) + ex = ex[:20] + s = ' Hash | N | Score | P-Value | Modifications\n' + fmt = '%8X | %3d | % 10g | % 9.3g | ' + for x in ex: + s += fmt%(hash(x), len(x.scores), x.mean(), x.significance()) + if not x.modifications: + s += 'Default Parameters\n' + else: + for idx, mod in enumerate(x.modifications): + param, value = mod + if idx > 0: + s += ' ' * 42 + s += '%s = %s\n'%(param, str(value)) + return s + + def __str__(self): + """ Returns the lab report. """ + s = self.header + s += self.section_divider + s += 'Default Parameter Values = \n' + s += pprint.pformat(self.default_parameters) + s += '\n$ ' + ' '.join(self.argv) + s += self.section_divider + s += self.significant_experiments_table().rstrip() + s += '\n\nFailed Experiments: ' + for x in self.experiments: + if x.attempts > len(x.scores): + s += '%X '%hash(x) + s += self.section_divider + s += self.section_divider.join(str(s) for s in self.experiments) + return s + + def save(self): + with open(self.lab_report + '.tmp', 'w') as file: + file.write( str(self) ) + os.rename(self.lab_report + '.tmp', self.lab_report) + + def run(self, processes, + time_limit = None, + memory_limit = None,): + """ + """ + pool = multiprocessing.Pool(processes, maxtasksperchild=1) + async_results = [] # Contains pairs of (Promise, Parameters) + + while True: + # Check for jobs which have finished + run_slot = 0 + while run_slot < len(async_results): + promise, value = async_results[run_slot] + if promise.ready(): + # Experiment run has finished, deal with the results. + result = self._get_promised_results(promise, value) + self.save_results(value, result) + async_results.pop(run_slot) + else: + run_slot += 1 + + # Start running new experiments + while len(async_results) < processes: + # Pickle is picky, so clean up 'self' which is sent via pickle + # to the process pool. pickle_self only needs to work with + # evaluate_parameters + pickle_self = copy(self) + pickle_self.module = None # Won't pickle, use self.module_reload instead. + # Pickle balks at circular references, remove them. + pickle_self.experiments = None + pickle_self.experiment_ids = None + value = self.method(self) + value = self.typecast_parameters(value) + if self.verbose: + print("%X"%hash(value)) + promise = pool.apply_async( + Experiment_evaluate_parameters, + args = (pickle_self, value,), + kwds = {'time_limit' : time_limit, + 'memory_limit' : memory_limit,},) + async_results.append((promise, value)) + # Wait for experiments to complete + time.sleep(1) + + def _get_promised_results(self, promise, value): + try: + return promise.get() + except (ValueError, MemoryError, ZeroDivisionError, AssertionError) as err: + print("") + pprint.pprint(value) + print("%s:"%(type(err).__name__), err) + print("") + except Exception: + print("") + pprint.pprint(value) + print("Unhandled Exception.") + print("") + raise + + def evaluate_parameters(self, parameters, + time_limit = None, + memory_limit = None,): + """ + This function executes in a child processes. + """ + parameters = self.typecast_parameters(parameters) + # Redirect stdour & stderr to a temporary file. + journal = tempfile.NamedTemporaryFile( + mode = 'w+t', + delete = False, + buffering = 1, + dir = self.ae_directory, + prefix = "%X_"%hash(parameters), + suffix = ".log",) + stdout, stderr = sys.stdout, sys.stderr + sys.stdout = journal + sys.stderr = journal + start_time = time.time() + journal.write("Started: " + time.asctime( time.localtime(start_time) ) + '\n') + # Setup memory limit + if memory_limit is not None: + soft, hard = resource.getrlimit(resource.RLIMIT_AS) + resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard)) + # Setup time limit + if time_limit is not None: + signal.signal(signal.SIGALRM, _timeout_callback) + time_limit = max(1, int(round(time_limit * 60 * 60))) + signal.alarm(time_limit) + + eval_str = (self.module_reload + + 'score = %s.main(parameters=%s, argv=[%s], verbose=%s)'%( + self.name, + repr(parameters), + ', '.join(repr(arg) for arg in self.argv[1:]), + str(self.verbose))) + exec_globals = {} + exec(eval_str, exec_globals) + + # Clean up time limit + if time_limit is not None: + signal.alarm(0) + # Clean up memory limit + if memory_limit is not None: + resource.setrlimit(resource.RLIMIT_AS, (soft, hard)) + # Restore file descriptors + sys.stdout, sys.stderr = stdout, stderr + run_time = datetime.timedelta(seconds = time.time() - start_time) + journal.write("Elapsed Time: " + str(run_time)) + + return exec_globals['score'], journal.name + + def typecast_parameters(self, parameters): + return _recursive_typecast_parameters(parameters, self.structure) + + def save_results(self, parameters, result): + # Update this experiment + param_hash = hash(ParameterSet(parameters)) + if param_hash in self.experiment_ids: + experiment = self.experiment_ids[param_hash] + else: + experiment = ExperimentSummary(self, parameters = parameters) + experiment.attempts += 1 + if result is not None: + score, run_journal = result + experiment.scores.append(score) + + self.save() # Write the updated Lab Report to file. + + # Append the temporary journal file to the experiments journal. + if result is None: + # Sadly if the experiment crashes, the temp file is abandoned and + # the debugger must search for it manually if they want to see it... + return + with open(run_journal) as journal: + content = journal.read() + with open(experiment.journal, 'a') as experiment_journal: + experiment_journal.write(self.section_divider) + experiment_journal.write(content) + os.remove(run_journal) + +def Experiment_evaluate_parameters(self, *args, **kwds): + """ + Global wrapper for LabReport.evaluate_parameters which is safe for + multiprocessing. + """ + return LabReport.evaluate_parameters(self, *args, **kwds) + +def _timeout_callback(signum, frame): + raise ValueError("Time limit exceded.") + +def _recursive_parameter_structure(default_parameters): + """ + Convert a set of parameters into the data types used to represent them. + Returned result has the same structure as the parameters. + """ + # Recurse through the parameter data structure. + if isinstance(default_parameters, dict): + return {key: _recursive_parameter_structure(value) + for key, value in default_parameters.items()} + elif isinstance(default_parameters, tuple): + return tuple(_recursive_parameter_structure(value) + for value in default_parameters) + # Determine data type of each entry in parameter data structure. + elif isinstance(default_parameters, float): + return float + elif isinstance(default_parameters, int): + return int + raise TypeError('Unaccepted type in experiment parameters: type "%s".'%(type(default_parameters).__name__)) + +def _recursive_typecast_parameters(values, structure): + # Recurse through the parameter data structure. + if isinstance(structure, dict): + for key in structure: + values[key] = _recursive_typecast_parameters(values[key], structure[key]) + return values + elif isinstance(structure, tuple): + return tuple(_recursive_typecast_parameters(*args) + for args in zip(values, structure)) + # Type cast values. + elif structure == float: + value = float(values) + return float(str(value)) + elif structure == int: + return int(round(float(values))) + +def paths(structure): + retval = [] + if isinstance(structure, dict): + for key, value in structure.items(): + retval.extend( "['%s']%s"%(key, path) for path in paths(value) ) + elif isinstance(structure, tuple): + for idx, value in enumerate(structure): + retval.extend( "[%d]%s"%(idx, path) for path in paths(value) ) + else: + retval.append('') + return sorted(retval) + +################################################################################ + + +def evaluate_default_parameters(lab): + # print('%X'%hash(lab.default_parameters)) + return lab.default_parameters + + +class EvaluateHashes: + def __init__(self, hashes): + self.hashes = [int(h, base=16) for h in hashes] + + def __call__(self, lab): + try: + experiments = [lab.experiment_ids[h] for h in self.hashes] + except KeyError: + unknown = [h for h in self.hashes if h not in lab.experiment_ids] + raise ValueError('Hash not recognized: %X'%unknown[0]) + rnd = random.random + return min(experiments, key=lambda x: x.attempts + rnd()).parameters + return random.choice(experiments).parameters + + +def evaluate_all(lab): + rnd = random.random + return min(lab.experiments, key=lambda x: x.attempts + rnd()).parameters + + +def evaluate_best(lab): + rnd = random.random + best = max(lab.experiments, key = lambda X: X.mean() + rnd() ) + return best.parameters + + +class GridSearch(object): + """docstring for GridSearch""" + mod_funcs = [ + lambda v: v * .40, + lambda v: v * .60, + lambda v: v * .80, + lambda v: v * 1.00, # Include the default parameters. + lambda v: v * 1.20, + lambda v: v * 1.40, + lambda v: v * 1.60, + ] + + def __init__(self, directive): + self.directive = directive + + def __call__(self, lab): + + if lab.experiment_ids[hash(lab.default_parameters)].attempts < 10: + return lab.default_parameters + + # Get a list of every parameter to experiment with. + if self.directive: + manifest = [] + for start in self.directive.split(','): + node = eval("lab.default_parameters" + start) + manifest.extend(start + end for end in paths(node)) + else: + manifest = paths(lab.default_parameters) + + # Suggest the following modifications to each parameter. + experiments = [] + for path in manifest: + value = lab.default_parameters.get(path) + for mod in self.mod_funcs: + params = deepcopy(lab.default_parameters) + params.apply( path, mod(value) ) + try: + experiments.append( + ExperimentSummary(lab, parameters=params)) + except ValueError: + # ExperimentSummary raises ValueError if it detects + # duplicate entry in the database. + experiments.append( + lab.experiment_ids[hash(params)]) + + lab.save() # Write all of the new grid-search experiments to the lab report. + + # TODO: Reject experiments which have failed a few times. + + rnd = random.random + return min(experiments, key=lambda x: x.attempts + rnd()).parameters + + +class CombineBest: + def __init__(self, n=20): + self.n = n + + def merge(self, lab, ideas): + """ Take several experiments and return the best combination of them. """ + # Marshal all of the modifications together. + ideas = sorted(ideas, key = lambda x: -x.mean()) + paths = [] + values = [] + for x in ideas: + for path, value in x.modifications: + if path in paths: + continue # Higher scoring experiments take precedence. + paths.append(path) + values.append(value) + # Create or get the experiment object. + mods = list(zip(paths, values)) + try: + return ExperimentSummary(lab, modifications=mods) + except ValueError: + # ExperimentSummary raises ValueError if it detects duplicate entry + # in the database. + params = deepcopy(lab.default_parameters) + for p, v in mods: + params.apply(p, v) + return lab.experiment_ids[hash(params)] + + def __call__(self, lab): + + suggest = [] # Retval accumulator + # Ignore all underperforming experiments. + null = lab.experiment_ids[hash(lab.default_parameters)] + ex = [x for x in lab.experiments if x.mean() > null.mean()] + # For sanity: Limit to the top experiments. + ex = sorted(ex, key = lambda x: -x.mean())[ : self.n] + # Keep trying experiments which are not yet significant. Experiments + # with a single datum have a significance of NaN... + trymore = [x for x in ex if (x.significance() > .50 or math.isnan(x.significance()))] + ex = [x for x in ex if x not in trymore] + suggest.extend(trymore) + # Suggests combinations + import itertools + for ideas in itertools.combinations(ex, 2): + suggest.append( self.merge(lab, ideas) ) + + if False: # Dump the suggestions for debugging + for x in suggest: + for p, v in x.modifications: + print(p , v) + print() + 1/0 + rnd = random.random + return min(suggest, key=lambda x: x.attempts + rnd()).parameters + + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser() + + arg_parser.add_argument('--parse', action='store_true', + help='Parse the lab report and write it back to the same file, then exits.') + + arg_parser.add_argument('--rmz', action='store_true', + help='Remove all experiments which have zero attempts.') + + arg_parser.add_argument('--default_parameters', action='store_true',) + + arg_parser.add_argument('--all_experiments', action='store_true', + help='Evaluate all experiments in the lab report, don\'t start new experiments') + + arg_parser.add_argument('--hashes', type=str,) + + arg_parser.add_argument('--best', action='store_true', + help='Evaluate the best set of parameters on file, with verbose=True.') + + arg_parser.add_argument('--grid_search', type=str) + + arg_parser.add_argument('--combine', type=int, default=0, help='Combine the NUM best experiments.') + + arg_parser.add_argument('--verbose', action='store_true',) + arg_parser.add_argument('--tag', type=str, + help='Optional string appended to the name of the AE directory. Use tags to ' + 'keep multiple variants of an experiment alive and working at the same time') + arg_parser.add_argument('-n', '--processes', type=int, default=os.cpu_count(),) + arg_parser.add_argument('--time_limit', type=float, default=None, + help='Hours, time limit for each run of the experiment.',) + arg_parser.add_argument('--memory_limit', type=float, default=None, + help='Gigabytes, RAM memory limit for each run of the experiment.') + arg_parser.add_argument('experiment', nargs=argparse.REMAINDER, + help='Name of experiment module followed by its command line arguments.') + + args = arg_parser.parse_args() + giga = 2**30 + if args.memory_limit is not None: + memory_limit = int(args.memory_limit * giga) + else: + available_memory = int(os.popen("free -b").readlines()[1].split()[3]) + memory_limit = int(available_memory / args.processes) + print("Memory Limit %.2g GB per instance."%(memory_limit / giga)) + + if args.parse: + ae = LabReport(args.experiment, None, args.tag) + print("Lab Report written to %s"%ae.lab_report) + print("Exit.") + sys.exit(0) # All done. + + elif args.rmz: + ae = LabReport(args.experiment, None, args.tag) + rm = [x for x in ae.experiments if x.attempts == 0] + for x in rm: + ae.experiments.remove(x) + ae.experiment_ids.pop(hash(x)) + ae.save() + sys.exit(0) + + elif args.default_parameters: + method = evaluate_default_parameters + + elif args.all_experiments: + method = evaluate_all + + elif args.hashes: + method = EvaluateHashes(args.hashes.split(',')) + + elif args.best: + method = evaluate_best # Test this! + + elif args.grid_search: + method = GridSearch(args.grid_search) + + elif args.combine: + method = CombineBest(args.combine) + + else: + print("Missing command line argument: what to do?") + sys.exit(1) + + ae = LabReport(args.experiment, + method = method, + tag = args.tag, + verbose = args.verbose) + + ae.run( + processes = args.processes, + time_limit = args.time_limit, + memory_limit = memory_limit,) From e9f7c66166f71d6f06e0eb137de04082525a8c7e Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Sun, 12 May 2019 19:13:42 -0400 Subject: [PATCH 2/9] AE: Review & reorg. --- py/src/nupic/optimization/ae.py | 347 +++++---------------- py/src/nupic/optimization/basic_search.py | 128 ++++++++ py/src/nupic/optimization/parameter_set.py | 126 ++++++++ 3 files changed, 330 insertions(+), 271 deletions(-) create mode 100644 py/src/nupic/optimization/basic_search.py create mode 100644 py/src/nupic/optimization/parameter_set.py diff --git a/py/src/nupic/optimization/ae.py b/py/src/nupic/optimization/ae.py index 20eb76c903..b369f9df4a 100644 --- a/py/src/nupic/optimization/ae.py +++ b/py/src/nupic/optimization/ae.py @@ -1,9 +1,24 @@ #!/usr/bin/python3 +# ------------------------------------------------------------------------------ +# Numenta Platform for Intelligent Computing (NuPIC) +# +# Copyright (C) 2018-2019, David McDougall +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero Public License version 3 as published by the Free +# Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License along with +# this program. If not, see http://www.gnu.org/licenses. +# ------------------------------------------------------------------------------ """ Automatic Experimenter -Written by David McDougall, 2018-2019 -This is a parameter optimization framework. +This is a framework for parameter optimization. * It methodically records the results of different sets of parameters and analyses the results. It then automatically suggests and evaluates modifications to the parameters. @@ -69,20 +84,21 @@ their journal and then the ".log" file is deleted. """ -# TODO: Default parameters need better handling... - -# TODO: ExperimentSummary should X-Ref with journal file. Journal should -# contain complete parameter set, needed for full automatic reproducibility. -# Changing default parameters should not make experiments inaccessible. +# TODO: Default parameters need better handling... When they change, update +# all of the modifications to be diffs of the current parameters. # TODO: Maybe the command line invocation should be included in the experiment # hash? Then I could experiment with the CLI args within a single lab report. # TODO: Every run should track elapsed time and report the average in the -# experiment journal & summary. Some of these experiments clearly take 4x -# longer than others but its not recorded. +# experiment journal & summary. Some of these experiments clearly take longer +# than others but its not recorded. + +# TODO: Log files should report memory usage ... + +# TODO: Remove LabReport.experiment, then rename lab.experiment_ids to experiments -# TODO: Log files should have memory usage ... +# TODO: Reject experiments which have failed a few times. import argparse import os @@ -98,89 +114,11 @@ import signal # Probably not X-platform ... from copy import copy, deepcopy import re -import hashlib import numpy as np import scipy import math - -class ParameterSet(dict): - def __init__(self, data): - super().__init__(self) - if isinstance(data, str): - try: - data = eval(data.strip()) - except: - print("Error parsing: " + data.strip()) - raise - assert(isinstance(data, dict)) - self.update(data) - - def __hash__(self): - string = pprint.pformat(self).encode('utf-8') - checksum = hashlib.md5(string).hexdigest() - return abs(int(checksum[:8], base=16)) - - def __eq__(self, other): - assert(isinstance(other, type(self))) - return _recursive_equal(self, other) - - def diff(a, b): - return _recursive_diff(a, b) - - def get(self, path): - try: - return eval('self' + path) - except: - print('Failed to get self' + path) - raise - - def apply(self, modification, value): - try: - _recursive_apply(self, modification.strip(), value) - except: - print('Failed to apply modification %s = %s'%(modification, str(value))) - raise - - -def _recursive_equal(a, b): - if isinstance(a, dict): - return all(_recursive_equal(a[k], b[k]) for k in a) - elif isinstance(a, tuple): - return all(_recursive_equal(ai, bi) for ai, bi in zip(a, b)) - else: - return a == b - -def _recursive_diff(old, new): - diffs = [] - if isinstance(old, dict): - for key in old: - inner_diffs = _recursive_diff(old[key], new[key]) - for path, new_value in inner_diffs: - diffs.append(("['%s']%s"%(key, path), new_value)) - elif isinstance(old, tuple): - for idx in range(len(old)): - inner_diffs = _recursive_diff(old[idx], new[idx]) - for path, new_value in inner_diffs: - diffs.append(("[%d]%s"%(idx, path), new_value)) - elif old != new: - diffs.append(('', new)) - return diffs - -def _recursive_apply(self, mod, value): - access = mod.split(']')[0].strip('[]"\' ') - if not access: - return value - tail = mod.split(']', maxsplit=1)[1] - if isinstance(self, dict): - self[access] = _recursive_apply(self[access], tail, value) - return self - if isinstance(self, tuple): - self = list(self) - index = int(access) - self[index] = _recursive_apply(self[index], tail, value) - return tuple(self) - +from .nupic.optimization.parameter_set import ParameterSet class ExperimentSummary: """ @@ -265,6 +203,8 @@ def parse(self, string): # from defaults + modifications, and the defaults might have changed. self._hash = int(re.search("Hash: (.*)", string).groups()[0], base=16) + # TODO: This should accept the baseline to compare against, and then have + # the defaults parameters as the default baseline. def significance(self): """ Returns the P-Value of the Null-Hypothesis test (these parameters @@ -362,7 +302,7 @@ def __init__(self, experiment_argv, method=None, tag='', verbose=False): self.save() def init_header(self): - self.header = self.name + self.header = str(self.name) if self.tag: self.header += ' - ' + self.tag self.header += ' - Automatic Experiments\n' @@ -383,7 +323,7 @@ def load_experiment_module(self, experiment_module): self.module = exec_globals[self.name] self.default_parameters = ParameterSet(self.module.default_parameters) - self.structure = _recursive_parameter_structure(self.default_parameters) + self.structure = self.default_parameters.get_types() def parse_lab_report(self, report): if not report.strip(): @@ -395,6 +335,7 @@ def parse_lab_report(self, report): sorted_pval_table = sections[2] experiment_sections = sections[3:] file_defaults = ParameterSet(default_parameters) + # TODO: Better instructions here! # Consistency check for parameters. if file_defaults != self.default_parameters: while True: @@ -411,7 +352,10 @@ def parse_lab_report(self, report): # Consistency check for experiment. if cli != self.argv: while True: - q = input("Experiment command line invocation have changed, options: old new abort: ") + q = input( "Experiment command line invocation has changed, options:\n" + + " old - Ignore the given invocation, use what's on file.\n" + + " new - Use the given arguments, overwrites the old invocation!\n" + + " abort.\n") q = q.strip().lower() if q == 'old': self.argv = cli @@ -578,7 +522,22 @@ def evaluate_parameters(self, parameters, return exec_globals['score'], journal.name def typecast_parameters(self, parameters): - return _recursive_typecast_parameters(parameters, self.structure) + def recursive_typecast_parameters(values, structure): + # Recurse through the parameter data structure. + if isinstance(structure, dict): + for key in structure: + values[key] = recursive_typecast_parameters(values[key], structure[key]) + return values + elif isinstance(structure, tuple): + return tuple(recursive_typecast_parameters(*args) + for args in zip(values, structure)) + # Type cast values. + elif structure == float: + value = float(values) + return float(str(value)) + elif structure == int: + return int(round(float(values))) + return recursive_typecast_parameters(parameters, self.structure) def save_results(self, parameters, result): # Update this experiment @@ -616,53 +575,6 @@ def Experiment_evaluate_parameters(self, *args, **kwds): def _timeout_callback(signum, frame): raise ValueError("Time limit exceded.") -def _recursive_parameter_structure(default_parameters): - """ - Convert a set of parameters into the data types used to represent them. - Returned result has the same structure as the parameters. - """ - # Recurse through the parameter data structure. - if isinstance(default_parameters, dict): - return {key: _recursive_parameter_structure(value) - for key, value in default_parameters.items()} - elif isinstance(default_parameters, tuple): - return tuple(_recursive_parameter_structure(value) - for value in default_parameters) - # Determine data type of each entry in parameter data structure. - elif isinstance(default_parameters, float): - return float - elif isinstance(default_parameters, int): - return int - raise TypeError('Unaccepted type in experiment parameters: type "%s".'%(type(default_parameters).__name__)) - -def _recursive_typecast_parameters(values, structure): - # Recurse through the parameter data structure. - if isinstance(structure, dict): - for key in structure: - values[key] = _recursive_typecast_parameters(values[key], structure[key]) - return values - elif isinstance(structure, tuple): - return tuple(_recursive_typecast_parameters(*args) - for args in zip(values, structure)) - # Type cast values. - elif structure == float: - value = float(values) - return float(str(value)) - elif structure == int: - return int(round(float(values))) - -def paths(structure): - retval = [] - if isinstance(structure, dict): - for key, value in structure.items(): - retval.extend( "['%s']%s"%(key, path) for path in paths(value) ) - elif isinstance(structure, tuple): - for idx, value in enumerate(structure): - retval.extend( "[%d]%s"%(idx, path) for path in paths(value) ) - else: - retval.append('') - return sorted(retval) - ################################################################################ @@ -692,154 +604,45 @@ def evaluate_all(lab): def evaluate_best(lab): - rnd = random.random - best = max(lab.experiments, key = lambda X: X.mean() + rnd() ) + best = max(lab.experiments, key = lambda X: X.mean() ) return best.parameters -class GridSearch(object): - """docstring for GridSearch""" - mod_funcs = [ - lambda v: v * .40, - lambda v: v * .60, - lambda v: v * .80, - lambda v: v * 1.00, # Include the default parameters. - lambda v: v * 1.20, - lambda v: v * 1.40, - lambda v: v * 1.60, - ] - - def __init__(self, directive): - self.directive = directive - - def __call__(self, lab): - - if lab.experiment_ids[hash(lab.default_parameters)].attempts < 10: - return lab.default_parameters - - # Get a list of every parameter to experiment with. - if self.directive: - manifest = [] - for start in self.directive.split(','): - node = eval("lab.default_parameters" + start) - manifest.extend(start + end for end in paths(node)) - else: - manifest = paths(lab.default_parameters) - - # Suggest the following modifications to each parameter. - experiments = [] - for path in manifest: - value = lab.default_parameters.get(path) - for mod in self.mod_funcs: - params = deepcopy(lab.default_parameters) - params.apply( path, mod(value) ) - try: - experiments.append( - ExperimentSummary(lab, parameters=params)) - except ValueError: - # ExperimentSummary raises ValueError if it detects - # duplicate entry in the database. - experiments.append( - lab.experiment_ids[hash(params)]) - - lab.save() # Write all of the new grid-search experiments to the lab report. - - # TODO: Reject experiments which have failed a few times. - - rnd = random.random - return min(experiments, key=lambda x: x.attempts + rnd()).parameters - - -class CombineBest: - def __init__(self, n=20): - self.n = n - - def merge(self, lab, ideas): - """ Take several experiments and return the best combination of them. """ - # Marshal all of the modifications together. - ideas = sorted(ideas, key = lambda x: -x.mean()) - paths = [] - values = [] - for x in ideas: - for path, value in x.modifications: - if path in paths: - continue # Higher scoring experiments take precedence. - paths.append(path) - values.append(value) - # Create or get the experiment object. - mods = list(zip(paths, values)) - try: - return ExperimentSummary(lab, modifications=mods) - except ValueError: - # ExperimentSummary raises ValueError if it detects duplicate entry - # in the database. - params = deepcopy(lab.default_parameters) - for p, v in mods: - params.apply(p, v) - return lab.experiment_ids[hash(params)] - - def __call__(self, lab): - - suggest = [] # Retval accumulator - # Ignore all underperforming experiments. - null = lab.experiment_ids[hash(lab.default_parameters)] - ex = [x for x in lab.experiments if x.mean() > null.mean()] - # For sanity: Limit to the top experiments. - ex = sorted(ex, key = lambda x: -x.mean())[ : self.n] - # Keep trying experiments which are not yet significant. Experiments - # with a single datum have a significance of NaN... - trymore = [x for x in ex if (x.significance() > .50 or math.isnan(x.significance()))] - ex = [x for x in ex if x not in trymore] - suggest.extend(trymore) - # Suggests combinations - import itertools - for ideas in itertools.combinations(ex, 2): - suggest.append( self.merge(lab, ideas) ) - - if False: # Dump the suggestions for debugging - for x in suggest: - for p, v in x.modifications: - print(p , v) - print() - 1/0 - rnd = random.random - return min(suggest, key=lambda x: x.attempts + rnd()).parameters - - if __name__ == '__main__': arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('--verbose', action='store_true',) + arg_parser.add_argument('--tag', type=str, + help='Optional string appended to the name of the AE directory. Use tags to ' + 'keep multiple variants of an experiment alive and working at the same time') + arg_parser.add_argument('-n', '--processes', type=int, default=os.cpu_count(),) + arg_parser.add_argument('--time_limit', type=float, default=None, + help='Hours, time limit for each run of the experiment.',) + arg_parser.add_argument('--memory_limit', type=float, default=None, + help='Gigabytes, RAM memory limit for each run of the experiment.') + arg_parser.add_argument('experiment', nargs=argparse.REMAINDER, + help='Name of experiment module followed by its command line arguments.') + + action_parser = arg_parser.add_mutually_exclusive_group(required=True) - arg_parser.add_argument('--parse', action='store_true', + action_parser.add_argument('--parse', action='store_true', help='Parse the lab report and write it back to the same file, then exits.') - arg_parser.add_argument('--rmz', action='store_true', + action_parser.add_argument('--rmz', action='store_true', help='Remove all experiments which have zero attempts.') - arg_parser.add_argument('--default_parameters', action='store_true',) + action_parser.add_argument('--default_parameters', action='store_true',) - arg_parser.add_argument('--all_experiments', action='store_true', + action_parser.add_argument('--all_experiments', action='store_true', help='Evaluate all experiments in the lab report, don\'t start new experiments') - arg_parser.add_argument('--hashes', type=str,) + action_parser.add_argument('--hashes', type=str,) - arg_parser.add_argument('--best', action='store_true', + action_parser.add_argument('--best', action='store_true', help='Evaluate the best set of parameters on file, with verbose=True.') - arg_parser.add_argument('--grid_search', type=str) + action_parser.add_argument('--grid_search', type=str) - arg_parser.add_argument('--combine', type=int, default=0, help='Combine the NUM best experiments.') - - arg_parser.add_argument('--verbose', action='store_true',) - arg_parser.add_argument('--tag', type=str, - help='Optional string appended to the name of the AE directory. Use tags to ' - 'keep multiple variants of an experiment alive and working at the same time') - arg_parser.add_argument('-n', '--processes', type=int, default=os.cpu_count(),) - arg_parser.add_argument('--time_limit', type=float, default=None, - help='Hours, time limit for each run of the experiment.',) - arg_parser.add_argument('--memory_limit', type=float, default=None, - help='Gigabytes, RAM memory limit for each run of the experiment.') - arg_parser.add_argument('experiment', nargs=argparse.REMAINDER, - help='Name of experiment module followed by its command line arguments.') + action_parser.add_argument('--combine', type=int, default=0, help='Combine the NUM best experiments.') args = arg_parser.parse_args() giga = 2**30 @@ -878,9 +681,11 @@ def __call__(self, lab): method = evaluate_best # Test this! elif args.grid_search: + from .nupic.optimization.basic_search import GridSearch method = GridSearch(args.grid_search) elif args.combine: + from .nupic.optimization.basic_search import CombineBest method = CombineBest(args.combine) else: diff --git a/py/src/nupic/optimization/basic_search.py b/py/src/nupic/optimization/basic_search.py new file mode 100644 index 0000000000..5ef63c8124 --- /dev/null +++ b/py/src/nupic/optimization/basic_search.py @@ -0,0 +1,128 @@ +# ------------------------------------------------------------------------------ +# Numenta Platform for Intelligent Computing (NuPIC) +# +# Copyright (C) 2018-2019, David McDougall +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero Public License version 3 as published by the Free +# Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License along with +# this program. If not, see http://www.gnu.org/licenses. +# ------------------------------------------------------------------------------ + +from .nupic.optimization.parameter_set import ParameterSet +import itertools +import random + + +class GridSearch(object): + """ TODO: docstring for GridSearch""" + mod_funcs = [ + lambda v: v * .40, + lambda v: v * .60, + lambda v: v * .80, + lambda v: v * 1.20, + lambda v: v * 1.40, + lambda v: v * 1.60, + ] + + def __init__(self, directive): + self.directive = directive + + def __call__(self, lab): + + if lab.experiment_ids[hash(lab.default_parameters)].attempts < 10: + return lab.default_parameters + + # Get a list of every parameter to experiment with. + if self.directive: + manifest = [] + for start in self.directive.split(','): + node = eval("lab.default_parameters" + start) + manifest.extend(start + end for end in paths(node)) + else: + manifest = lab.default_parameters.enumerate() + + # Suggest the following modifications to each parameter. + experiments = [] + for path in manifest: + value = lab.default_parameters.get(path) + for mod in self.mod_funcs: + params = deepcopy(lab.default_parameters) + params.apply( path, mod(value) ) + try: + experiments.append( + ExperimentSummary(lab, parameters=params)) + except ValueError: + # ExperimentSummary raises ValueError if it detects + # duplicate entry in the database. + experiments.append( + lab.experiment_ids[hash(params)]) + + lab.save() # Write all of the new grid-search experiments to the lab report. + + rnd = random.random + return min(experiments, key=lambda x: x.attempts + rnd()).parameters + + + +class CombineBest: + """ TODO Docs """ + def __init__(self, n=20): + self.n = n + + def merge(self, lab, ideas): + """ Take several experiments and return the best combination of them. """ + # Marshal all of the modifications together. + ideas = sorted(ideas, key = lambda x: -x.mean()) + paths = [] + values = [] + for x in ideas: + for path, value in x.modifications: + if path in paths: + continue # Higher scoring experiments take precedence. + paths.append(path) + values.append(value) + # Create or get the experiment object. + mods = list(zip(paths, values)) + try: + return ExperimentSummary(lab, modifications=mods) + except ValueError: + # ExperimentSummary raises ValueError if it detects duplicate entry + # in the database. + params = deepcopy(lab.default_parameters) + for p, v in mods: + params.apply(p, v) + return lab.experiment_ids[hash(params)] + + def __call__(self, lab): + + suggest = [] # Retval accumulator + # Ignore all underperforming experiments. + null = lab.experiment_ids[hash(lab.default_parameters)] + ex = [x for x in lab.experiments if x.mean() > null.mean()] + # For sanity: Limit to the top experiments. + ex = sorted(ex, key = lambda x: -x.mean())[ : self.n] + # Keep trying experiments which are not yet significant. Experiments + # with a single datum have a significance of NaN... + trymore = [x for x in ex if (x.significance() > .50 or math.isnan(x.significance()))] + ex = [x for x in ex if x not in trymore] + suggest.extend(trymore) + # Suggests combinations + for ideas in itertools.combinations(ex, 2): + suggest.append( self.merge(lab, ideas) ) + + if False: # Dump the suggestions for debugging + for x in suggest: + for p, v in x.modifications: + print(p , v) + print() + 1/0 + rnd = random.random + return min(suggest, key=lambda x: x.attempts + rnd()).parameters + diff --git a/py/src/nupic/optimization/parameter_set.py b/py/src/nupic/optimization/parameter_set.py new file mode 100644 index 0000000000..070a643a19 --- /dev/null +++ b/py/src/nupic/optimization/parameter_set.py @@ -0,0 +1,126 @@ +# ------------------------------------------------------------------------------ +# Numenta Platform for Intelligent Computing (NuPIC) +# +# Copyright (C) 2018-2019, David McDougall +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero Public License version 3 as published by the Free +# Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License along with +# this program. If not, see http://www.gnu.org/licenses. +# ------------------------------------------------------------------------------ + +import pprint +import hashlib + +class ParameterSet(dict): + """ TODO: Documentation """ + def __init__(self, data): + super().__init__(self) + if isinstance(data, str): + try: + data = eval(data.strip()) + except: + print("Error parsing: " + data.strip()) + raise + assert(isinstance(data, dict)) + self.update(data) + + def __hash__(self): + string = pprint.pformat(self).encode('utf-8') + checksum = hashlib.md5(string).hexdigest() + return abs(int(checksum[:8], base=16)) + + def __eq__(self, other): + assert(isinstance(other, type(self))) + if isinstance(self, dict): + return all(ParameterSet.__eq__(self[k], other[k]) for k in self) + elif isinstance(self, tuple): + return all(ParameterSet.__eq__(X, Y) for X, Y in zip(self, other)) + else: + return self == other + + def diff(old, new): + """ Returns list of pairs of (path, new-value) """ + diffs = [] + if isinstance(old, dict): + for key in old: + inner_diffs = ParameterSet.diff(old[key], new[key]) + for path, new_value in inner_diffs: + diffs.append(("['%s']%s"%(key, path), new_value)) + elif isinstance(old, tuple): + for idx in range(len(old)): + inner_diffs = ParameterSet.diff(old[idx], new[idx]) + for path, new_value in inner_diffs: + diffs.append(("[%d]%s"%(idx, path), new_value)) + elif old != new: + diffs.append(('', new)) + return diffs + + def get(self, path): + try: + return eval('self' + path) + except: + print('Failed to get self' + path) + raise + + def apply(self, modification, value): + """ + Modifies this set of parameters! + """ + try: + access = modification.split(']')[0].strip('[]"\' ') + if not access: + return value + tail = modification.split(']', maxsplit=1)[1] + if isinstance(self, dict): + self[access] = ParameterSet.apply(self[access], tail, value) + return self + if isinstance(self, tuple): + self = list(self) + index = int(access) + self[index] = ParameterSet.apply(self[index], tail, value) + return tuple(self) + except: + print('Failed to apply modification %s = %s'%(modification, str(value))) + raise + + def get_types(self): + """ + Convert a set of parameters into the data types used to represent them. + Returned result has the same structure as the parameters. + """ + # Recurse through the parameter data structure. + if isinstance(self, dict): + return {key: LabReport.get_types(value) + for key, value in self.items()} + elif isinstance(self, tuple): + return tuple(LabReport.get_types(value) + for value in self) + # Determine data type of each entry in parameter data structure. + elif isinstance(self, float): + return float + elif isinstance(self, int): + return int + raise TypeError('Unaccepted type in experiment parameters: type "%s".'%(type(self).__name__)) + + def enumerate(self): + """ + Convert parameters from a recursive structure into a list of parameters. + Returned parameters are represented as executable strings. + """ + retval = [] + if isinstance(self, dict): + for key, value in self.items(): + retval.extend( "['%s']%s"%(key, path) for path in paths(value) ) + elif isinstance(self, tuple): + for idx, value in enumerate(self): + retval.extend( "[%d]%s"%(idx, path) for path in paths(value) ) + else: + retval.append('') + return sorted(retval) From f0e0c98e8c84bdbf3cbf2209dcae26e85a5dcef6 Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Sun, 12 May 2019 21:12:42 -0400 Subject: [PATCH 3/9] Particle Swarm Optimization - WIP --- py/src/nupic/optimization/ae.py | 86 +++----- py/src/nupic/optimization/parameter_set.py | 18 ++ py/src/nupic/optimization/swarming.py | 239 +++++++++++++++++++++ 3 files changed, 286 insertions(+), 57 deletions(-) create mode 100644 py/src/nupic/optimization/swarming.py diff --git a/py/src/nupic/optimization/ae.py b/py/src/nupic/optimization/ae.py index b369f9df4a..562d7a3285 100644 --- a/py/src/nupic/optimization/ae.py +++ b/py/src/nupic/optimization/ae.py @@ -111,7 +111,7 @@ import tempfile import multiprocessing import resource -import signal # Probably not X-platform ... +import signal # TODO: X-Plat issue: Replace signal with threading.timer from copy import copy, deepcopy import re import numpy as np @@ -152,7 +152,7 @@ def __init__(self, lab, else: raise TypeError("Not enough arguments to ExperimentSummary.__init__()") - self.parameters = self.lab.typecast_parameters(self.parameters) + self.parameters = self.parameters.typecast_parameters( self.lab.structure ) self.modifications = self.lab.default_parameters.diff(self.parameters) if hash(self) not in self.lab.experiment_ids: @@ -335,33 +335,20 @@ def parse_lab_report(self, report): sorted_pval_table = sections[2] experiment_sections = sections[3:] file_defaults = ParameterSet(default_parameters) - # TODO: Better instructions here! # Consistency check for parameters. - if file_defaults != self.default_parameters: + if file_defaults != self.default_parameters or cli != self.argv: while True: - q = input("Default parameters have changed, options: old new abort: ") + q = input("Default parameters or invovation have changed, options:\n" + + " old - Ignore the new/given, use what's on file.\n" + + " new - Use the new/given, overwrites the old file!\n" + + " abort.\n") q = q.strip().lower() if q == 'old': self.default_parameters = file_defaults + self.argv = cli break elif q == 'new': - shutil.copy(self.lab_report, self.lab_report + '.backup_defaults') - break - elif q == 'abort': - sys.exit() - # Consistency check for experiment. - if cli != self.argv: - while True: - q = input( "Experiment command line invocation has changed, options:\n" + - " old - Ignore the given invocation, use what's on file.\n" + - " new - Use the given arguments, overwrites the old invocation!\n" + - " abort.\n") - q = q.strip().lower() - if q == 'old': - self.argv = cli - break - elif q == 'new': - shutil.copy(self.lab_report, self.lab_report + '.backup_argv') + shutil.copy(self.lab_report, self.lab_report + '.backup') break elif q == 'abort': sys.exit() @@ -442,7 +429,7 @@ def run(self, processes, pickle_self.experiments = None pickle_self.experiment_ids = None value = self.method(self) - value = self.typecast_parameters(value) + value = value.typecast_parameters( self.structure ) if self.verbose: print("%X"%hash(value)) promise = pool.apply_async( @@ -475,7 +462,7 @@ def evaluate_parameters(self, parameters, """ This function executes in a child processes. """ - parameters = self.typecast_parameters(parameters) + parameters = parameters.typecast_parameters( self.structure ) # Redirect stdour & stderr to a temporary file. journal = tempfile.NamedTemporaryFile( mode = 'w+t', @@ -521,24 +508,6 @@ def evaluate_parameters(self, parameters, return exec_globals['score'], journal.name - def typecast_parameters(self, parameters): - def recursive_typecast_parameters(values, structure): - # Recurse through the parameter data structure. - if isinstance(structure, dict): - for key in structure: - values[key] = recursive_typecast_parameters(values[key], structure[key]) - return values - elif isinstance(structure, tuple): - return tuple(recursive_typecast_parameters(*args) - for args in zip(values, structure)) - # Type cast values. - elif structure == float: - value = float(values) - return float(str(value)) - elif structure == int: - return int(round(float(values))) - return recursive_typecast_parameters(parameters, self.structure) - def save_results(self, parameters, result): # Update this experiment param_hash = hash(ParameterSet(parameters)) @@ -579,7 +548,6 @@ def _timeout_callback(signum, frame): def evaluate_default_parameters(lab): - # print('%X'%hash(lab.default_parameters)) return lab.default_parameters @@ -644,59 +612,63 @@ def evaluate_best(lab): action_parser.add_argument('--combine', type=int, default=0, help='Combine the NUM best experiments.') + action_parser.add_argument('--swarming', type=int, default=0, help='Particle Swarm Optimization.') + args = arg_parser.parse_args() giga = 2**30 if args.memory_limit is not None: memory_limit = int(args.memory_limit * giga) else: + # TODO: Not X-Platform ... available_memory = int(os.popen("free -b").readlines()[1].split()[3]) memory_limit = int(available_memory / args.processes) print("Memory Limit %.2g GB per instance."%(memory_limit / giga)) + ae = LabReport(args.experiment, + tag = args.tag, + verbose = args.verbose) + if args.parse: - ae = LabReport(args.experiment, None, args.tag) print("Lab Report written to %s"%ae.lab_report) print("Exit.") sys.exit(0) # All done. elif args.rmz: - ae = LabReport(args.experiment, None, args.tag) rm = [x for x in ae.experiments if x.attempts == 0] for x in rm: ae.experiments.remove(x) ae.experiment_ids.pop(hash(x)) ae.save() - sys.exit(0) + sys.exit(0) # All done. elif args.default_parameters: - method = evaluate_default_parameters + ae.method = evaluate_default_parameters elif args.all_experiments: - method = evaluate_all + ae.method = evaluate_all elif args.hashes: - method = EvaluateHashes(args.hashes.split(',')) + ae.method = EvaluateHashes(args.hashes.split(',')) elif args.best: - method = evaluate_best # Test this! + ae.method = evaluate_best elif args.grid_search: from .nupic.optimization.basic_search import GridSearch - method = GridSearch(args.grid_search) + ae.method = GridSearch(args.grid_search) elif args.combine: from .nupic.optimization.basic_search import CombineBest - method = CombineBest(args.combine) + ae.method = CombineBest(args.combine) + + elif args.swarming: + from .nupic.optimization.swarming import ParticleSwarmOptimizations + ae.method = ParticleSwarmOptimizations( ae, args.swarming ) else: print("Missing command line argument: what to do?") sys.exit(1) - ae = LabReport(args.experiment, - method = method, - tag = args.tag, - verbose = args.verbose) - ae.run( processes = args.processes, time_limit = args.time_limit, diff --git a/py/src/nupic/optimization/parameter_set.py b/py/src/nupic/optimization/parameter_set.py index 070a643a19..835cb397ad 100644 --- a/py/src/nupic/optimization/parameter_set.py +++ b/py/src/nupic/optimization/parameter_set.py @@ -109,6 +109,24 @@ def get_types(self): return int raise TypeError('Unaccepted type in experiment parameters: type "%s".'%(type(self).__name__)) + def typecast_parameters(self, structure): + def recursive_typecast_parameters(values, structure): + # Recurse through the parameter data structure. + if isinstance(structure, dict): + for key in structure: + values[key] = recursive_typecast_parameters(values[key], structure[key]) + return values + elif isinstance(structure, tuple): + return tuple(recursive_typecast_parameters(*args) + for args in zip(values, structure)) + # Type cast values. + elif structure == float: + value = float(values) + return float(str(value)) + elif structure == int: + return int(round(float(values))) + return recursive_typecast_parameters(parameters, structure) + def enumerate(self): """ Convert parameters from a recursive structure into a list of parameters. diff --git a/py/src/nupic/optimization/swarming.py b/py/src/nupic/optimization/swarming.py new file mode 100644 index 0000000000..a766e64bf4 --- /dev/null +++ b/py/src/nupic/optimization/swarming.py @@ -0,0 +1,239 @@ +#!/usr/bin/python3 +# ------------------------------------------------------------------------------ +# Numenta Platform for Intelligent Computing (NuPIC) +# +# Copyright (C) 2018-2019, David McDougall +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero Public License version 3 as published by the Free +# Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License along with +# this program. If not, see http://www.gnu.org/licenses. +# ------------------------------------------------------------------------------ +""" Swarming parameter search """ + +# TODO: Deal with global constants: particle_strength, global_strength, velocity_strength +# Maybe make them into CLI Arguments? + +particle_strength = .25 +global_strength = .50 +velocity_strength = .95 +assert(velocity_strength + particle_strength / 2 + global_strength / 2 >= 1) + +import argparse +import sys +import os +import random +import pprint + +from .nupic.optimization.parameter_set import ParameterSet + +class ParticleSwarmOptimizations: + def __init__(self, lab, args): + # Setup the particle swarm. + self.particles = args.particles + self.next_particle = random.randrange(args.particles) + self.swarm_path = os.path.join(lab.ae_directory, 'swarm') + try: + with open(self.swarm_path, 'r') as swarm_file: + swarm_raw = swarm_file.read() + except FileNotFoundError: + # Initialize a new particle swarm. + self.swarm_data = {} + for particle in range(self.particles): + if particle in [0, 1, 2]: + # Evaluate the default parameters a few times, before branching out + # to the more experimential stuff. Several evals are needed since + # these defaults may have their random velocity applied. + value = lab.default_parameters + else: + value = ParameterSet( initial_parameters(lab.default_parameters)) + self.swarm_data[particle] = { + 'value': value, + 'velocity': initial_velocity(lab.default_parameters), + 'best': value, + 'best_score': None, + 'hash': hash(value), + } + self.swarm_data['best'] = random.choice(list(self.swarm_data.values()))['best'] + self.swarm_data['best_score'] = None + self.swarm_data['evals'] = 0 + else: + # Load an existing particle swarm. + try: + self.swarm_data = eval(swarm_raw) + except SyntaxError: + while True: + print("Corrupted particle swarm data file. [B]ackup, [O]verwrite, or [EXIT]?") + choice = input().upper() + if choice == 'B': + backup_path = self.swarm_path + ".backup" + os.rename(self.swarm_path, backup_path) + print("BACKUP PATH: %s"%backup_path) + self.swarm_data = initialize_particle_swarm(lab.default_parameters, self.particles) + break + elif choice == 'O': + self.swarm_data = initialize_particle_swarm(lab.default_parameters, self.particles) + break + elif choice in 'EXITQ': + print("EXIT") + sys.exit() + else: + print('Invalid input "%s".'%choice) + + if self.particles != sum(isinstance(key, int) for key in self.swarm_data): + print("Warning: argument 'particles' does not match number of particles stored on file.") + + def __call__(self, lab): + # Run the particle swarm optimization. + particle_data = self.swarm_data[self.next_particle] + self.next_particle = (self.next_particle + 1) % self.particles + + # Update the particles velocity. + particle_data['velocity'] = update_particle_velocity( + particle_data['value'], + particle_data['velocity'], + particle_data['best'], + self.swarm_data['best'],) + + # Update the particles postition. + particle_data['value'] = update_particle_position( + particle_data['value'], + particle_data['velocity']) + + # Evaluate the particle. + promise = pool.apply_async(evaluate_particle, (particle_data,)) + + return parameters + + def collect(self, results): + particle_data = self.swarm_data[particle_number] + try: + score = promise.get() + except (ValueError, MemoryError, ZeroDivisionError, AssertionError) as err: + print("") + print("Particle Number %d"%particle_number) + pprint.pprint(particle_data['value']) + print("%s:"%(type(err).__name__), err) + print("") + # Replace this particle. + particle_data['velocity'] = initial_velocity(default_parameters) + if particle_data['best_score'] is not None: + particle_data['value'] = particle_data['best'] + elif self.swarm_data['best_score'] is not None: + particle_data['value'] = self.swarm_data['best'] + else: + particle_data['value'] = initial_parameters(default_parameters) + continue + except Exception: + print("") + pprint.pprint(particle_data['value']) + raise + + # Update best scoring particles. + if particle_data['best_score'] is None or score > particle_data['best_score']: + particle_data['best'] = particle_data['value'] + particle_data['best_score'] = score + print("New particle (%d) best score %g"%(particle_number, particle_data['best_score'])) + if self.swarm_data['best_score'] is None or score > self.swarm_data['best_score']: + self.swarm_data['best'] = typecast_parameters(particle_data['best'], parameter_structure) + self.swarm_data['best_score'] = particle_data['best_score'] + self.swarm_data['best_particle'] = particle_number + print("New global best score %g"%self.swarm_data['best_score']) + + # Save the swarm to file. + self.swarm_data['evals'] += 1 + with open(swarm_path, 'w') as swarm_file: + print('# ' + ' '.join(sys.argv), file=swarm_file) # TODO: Get this from lab-report object. + pprint.pprint(self.swarm_data, stream = swarm_file) + + +def initial_parameters(default_parameters): + # Recurse through the parameter data structure. + if isinstance(default_parameters, dict): + return {key: initial_parameters(value) + for key, value in default_parameters.items()} + elif isinstance(default_parameters, tuple): + return tuple(initial_parameters(value) for value in default_parameters) + # Calculate good initial values. + elif isinstance(default_parameters, float): + return default_parameters * 1.25 ** (random.random()*2-1) + elif isinstance(default_parameters, int): + if abs(default_parameters) < 10: + return default_parameters + random.choice([-1, 0, +1]) + else: + initial_value_float = initial_parameters(float(default_parameters)) + return int(round(initial_value_float)) + +def initial_velocity(default_parameters): + # Recurse through the parameter data structure. + if isinstance(default_parameters, dict): + return {key: initial_velocity(value) + for key, value in default_parameters.items()} + elif isinstance(default_parameters, tuple): + return tuple(initial_velocity(value) for value in default_parameters) + # Calculate good initial velocities. + elif isinstance(default_parameters, float): + max_percent_change = 10 + uniform = 2 * random.random() - 1 + return default_parameters * uniform * (max_percent_change / 100.) + elif isinstance(default_parameters, int): + if abs(default_parameters) < 10: + uniform = 2 * random.random() - 1 + return uniform + else: + return initial_velocity(float(default_parameters)) + +def update_particle_position(position, velocity): + # Recurse through the parameter data structure. + if isinstance(position, dict): + return {key: update_particle_position(value, velocity[key]) + for key, value in position.items()} + elif isinstance(position, tuple): + return tuple(update_particle_position(value, velocity[index]) + for index, value in enumerate(position)) + else: + return position + velocity + +def update_particle_velocity(postition, velocity, particle_best, global_best): + # Recurse through the parameter data structure. + if isinstance(postition, dict): + return {key: update_particle_velocity( + postition[key], + velocity[key], + particle_best[key], + global_best[key]) + for key in postition.keys()} + elif isinstance(postition, tuple): + return tuple(update_particle_velocity( + postition[index], + velocity[index], + particle_best[index], + global_best[index]) + for index, value in enumerate(postition)) + else: + # Update velocity. + particle_bias = (particle_best - postition) * particle_strength * random.random() + global_bias = (global_best - postition) * global_strength * random.random() + return velocity * velocity_strength + particle_bias + global_bias + + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser() + assert(args.particles >= args.processes) + + if args.clear_scores: + print("Removing Scores from Particle Swarm File %s."%swarm_path) + swarm_data['best_score'] = None + for entry in swarm_data: + if isinstance(entry, int): + swarm_data[entry]['best_score'] = None + with open(swarm_path, 'w') as swarm_file: + pprint.pprint(swarm_data, stream = swarm_file) + sys.exit() + From b1461aec94cb8dc523fe40dfb178e6dba6022251 Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Wed, 15 May 2019 22:03:00 -0400 Subject: [PATCH 4/9] AE: Refactored Optimization code. --- py/src/nupic/optimization/ae.py | 148 +++++-------- py/src/nupic/optimization/basic_search.py | 110 ---------- py/src/nupic/optimization/optimizers.py | 229 +++++++++++++++++++++ py/src/nupic/optimization/parameter_set.py | 34 +-- py/src/nupic/optimization/swarming.py | 27 +-- 5 files changed, 314 insertions(+), 234 deletions(-) create mode 100644 py/src/nupic/optimization/optimizers.py diff --git a/py/src/nupic/optimization/ae.py b/py/src/nupic/optimization/ae.py index 562d7a3285..d45f71acab 100644 --- a/py/src/nupic/optimization/ae.py +++ b/py/src/nupic/optimization/ae.py @@ -1,4 +1,3 @@ -#!/usr/bin/python3 # ------------------------------------------------------------------------------ # Numenta Platform for Intelligent Computing (NuPIC) # @@ -38,6 +37,7 @@ This global dictionary contains all of the parameters to modify. Parameters must be one of the following types: dict, tuple, float, int. Parameters can be nested in multiple levels of dictionaries and tuples. + The outer most layer of parameters must be a dict. ExperimentModule.main(parameters=default_parameters, argv=None, verbose=True) Returns (float) performance of parameters, to be maximized. @@ -100,6 +100,8 @@ # TODO: Reject experiments which have failed a few times. +# TODO: Failed experiments should have its own section in the LabReport. + import argparse import os import sys @@ -118,7 +120,7 @@ import scipy import math -from .nupic.optimization.parameter_set import ParameterSet +from nupic.optimization.parameter_set import ParameterSet class ExperimentSummary: """ @@ -204,7 +206,7 @@ def parse(self, string): self._hash = int(re.search("Hash: (.*)", string).groups()[0], base=16) # TODO: This should accept the baseline to compare against, and then have - # the defaults parameters as the default baseline. + # the defaults argument as the default baseline. def significance(self): """ Returns the P-Value of the Null-Hypothesis test (these parameters @@ -276,6 +278,7 @@ def __init__(self, experiment_argv, method=None, tag='', verbose=False): self.method = method self.tag = tag self.verbose = verbose + # TODO: Needs better error messages when user forgets CLI arg for experiment module! self.load_experiment_module(experiment_argv[0]) self.ae_directory = os.path.join(self.path, self.name) + self.default_extension if self.tag: @@ -544,37 +547,6 @@ def Experiment_evaluate_parameters(self, *args, **kwds): def _timeout_callback(signum, frame): raise ValueError("Time limit exceded.") -################################################################################ - - -def evaluate_default_parameters(lab): - return lab.default_parameters - - -class EvaluateHashes: - def __init__(self, hashes): - self.hashes = [int(h, base=16) for h in hashes] - - def __call__(self, lab): - try: - experiments = [lab.experiment_ids[h] for h in self.hashes] - except KeyError: - unknown = [h for h in self.hashes if h not in lab.experiment_ids] - raise ValueError('Hash not recognized: %X'%unknown[0]) - rnd = random.random - return min(experiments, key=lambda x: x.attempts + rnd()).parameters - return random.choice(experiments).parameters - - -def evaluate_all(lab): - rnd = random.random - return min(lab.experiments, key=lambda x: x.attempts + rnd()).parameters - - -def evaluate_best(lab): - best = max(lab.experiments, key = lambda X: X.mean() ) - return best.parameters - if __name__ == '__main__': arg_parser = argparse.ArgumentParser() @@ -582,7 +554,8 @@ def evaluate_best(lab): arg_parser.add_argument('--tag', type=str, help='Optional string appended to the name of the AE directory. Use tags to ' 'keep multiple variants of an experiment alive and working at the same time') - arg_parser.add_argument('-n', '--processes', type=int, default=os.cpu_count(),) + arg_parser.add_argument('-n', '--processes', type=int, default=os.cpu_count(), + help='Number of experiments to run simultaneously, defaults to the number of CPU cores available.') arg_parser.add_argument('--time_limit', type=float, default=None, help='Hours, time limit for each run of the experiment.',) arg_parser.add_argument('--memory_limit', type=float, default=None, @@ -591,85 +564,60 @@ def evaluate_best(lab): help='Name of experiment module followed by its command line arguments.') action_parser = arg_parser.add_mutually_exclusive_group(required=True) - action_parser.add_argument('--parse', action='store_true', - help='Parse the lab report and write it back to the same file, then exits.') - + help='Parse the lab report and write it back to the same file, then exit.') action_parser.add_argument('--rmz', action='store_true', help='Remove all experiments which have zero attempts.') - action_parser.add_argument('--default_parameters', action='store_true',) - - action_parser.add_argument('--all_experiments', action='store_true', - help='Evaluate all experiments in the lab report, don\'t start new experiments') - - action_parser.add_argument('--hashes', type=str,) - - action_parser.add_argument('--best', action='store_true', - help='Evaluate the best set of parameters on file, with verbose=True.') - - action_parser.add_argument('--grid_search', type=str) - - action_parser.add_argument('--combine', type=int, default=0, help='Combine the NUM best experiments.') - - action_parser.add_argument('--swarming', type=int, default=0, help='Particle Swarm Optimization.') + import nupic.optimization.optimizers + import nupic.optimization.swarming + actions = [ + nupic.optimization.optimizers.EvaluateHashes, + nupic.optimization.optimizers.EvaluateDefaultParameters, + nupic.optimization.optimizers.EvaluateAllExperiments, + nupic.optimization.optimizers.EvaluateBestExperiment, + nupic.optimization.optimizers.GridSearch, + nupic.optimization.optimizers.CombineBest, + nupic.optimization.swarming.ParticleSwarmOptimizations, + ] + for method in actions: + method.addArguments(action_parser) args = arg_parser.parse_args() - giga = 2**30 - if args.memory_limit is not None: - memory_limit = int(args.memory_limit * giga) - else: - # TODO: Not X-Platform ... - available_memory = int(os.popen("free -b").readlines()[1].split()[3]) - memory_limit = int(available_memory / args.processes) - print("Memory Limit %.2g GB per instance."%(memory_limit / giga)) ae = LabReport(args.experiment, - tag = args.tag, - verbose = args.verbose) + tag = args.tag, + verbose = args.verbose) + print("Lab Report written to %s"%ae.lab_report) if args.parse: - print("Lab Report written to %s"%ae.lab_report) - print("Exit.") - sys.exit(0) # All done. + pass elif args.rmz: - rm = [x for x in ae.experiments if x.attempts == 0] - for x in rm: - ae.experiments.remove(x) - ae.experiment_ids.pop(hash(x)) + for x in ae.experiments: + if x.attempts == 0: + ae.experiments.remove(x) + ae.experiment_ids.pop(hash(x)) ae.save() - sys.exit(0) # All done. - - elif args.default_parameters: - ae.method = evaluate_default_parameters + print("Removed all experiments which had not yet been attempted.") - elif args.all_experiments: - ae.method = evaluate_all - - elif args.hashes: - ae.method = EvaluateHashes(args.hashes.split(',')) - - elif args.best: - ae.method = evaluate_best - - elif args.grid_search: - from .nupic.optimization.basic_search import GridSearch - ae.method = GridSearch(args.grid_search) - - elif args.combine: - from .nupic.optimization.basic_search import CombineBest - ae.method = CombineBest(args.combine) + else: + selected_method = [X for X in actions if X.useThisOptimizer(args)] + assert(len(selected_method) == 1) # ArgParse should ensure this via "add_mutually_exclusive_group". + ae.method = selected_method[0]( ae, args ) - elif args.swarming: - from .nupic.optimization.swarming import ParticleSwarmOptimizations - ae.method = ParticleSwarmOptimizations( ae, args.swarming ) + giga = 2**30 + if args.memory_limit is not None: + memory_limit = int(args.memory_limit * giga) + else: + # TODO: Not X-Platform ... + available_memory = int(os.popen("free -b").readlines()[1].split()[3]) + memory_limit = int(available_memory / args.processes) + print("Memory Limit %.2g GB per instance."%(memory_limit / giga)) - else: - print("Missing command line argument: what to do?") - sys.exit(1) + ae.run( + processes = args.processes, + time_limit = args.time_limit, + memory_limit = memory_limit,) - ae.run( - processes = args.processes, - time_limit = args.time_limit, - memory_limit = memory_limit,) + print("Exit.") diff --git a/py/src/nupic/optimization/basic_search.py b/py/src/nupic/optimization/basic_search.py index 5ef63c8124..347d2b1b2c 100644 --- a/py/src/nupic/optimization/basic_search.py +++ b/py/src/nupic/optimization/basic_search.py @@ -15,114 +15,4 @@ # this program. If not, see http://www.gnu.org/licenses. # ------------------------------------------------------------------------------ -from .nupic.optimization.parameter_set import ParameterSet -import itertools -import random - - -class GridSearch(object): - """ TODO: docstring for GridSearch""" - mod_funcs = [ - lambda v: v * .40, - lambda v: v * .60, - lambda v: v * .80, - lambda v: v * 1.20, - lambda v: v * 1.40, - lambda v: v * 1.60, - ] - - def __init__(self, directive): - self.directive = directive - - def __call__(self, lab): - - if lab.experiment_ids[hash(lab.default_parameters)].attempts < 10: - return lab.default_parameters - - # Get a list of every parameter to experiment with. - if self.directive: - manifest = [] - for start in self.directive.split(','): - node = eval("lab.default_parameters" + start) - manifest.extend(start + end for end in paths(node)) - else: - manifest = lab.default_parameters.enumerate() - - # Suggest the following modifications to each parameter. - experiments = [] - for path in manifest: - value = lab.default_parameters.get(path) - for mod in self.mod_funcs: - params = deepcopy(lab.default_parameters) - params.apply( path, mod(value) ) - try: - experiments.append( - ExperimentSummary(lab, parameters=params)) - except ValueError: - # ExperimentSummary raises ValueError if it detects - # duplicate entry in the database. - experiments.append( - lab.experiment_ids[hash(params)]) - - lab.save() # Write all of the new grid-search experiments to the lab report. - - rnd = random.random - return min(experiments, key=lambda x: x.attempts + rnd()).parameters - - - -class CombineBest: - """ TODO Docs """ - def __init__(self, n=20): - self.n = n - - def merge(self, lab, ideas): - """ Take several experiments and return the best combination of them. """ - # Marshal all of the modifications together. - ideas = sorted(ideas, key = lambda x: -x.mean()) - paths = [] - values = [] - for x in ideas: - for path, value in x.modifications: - if path in paths: - continue # Higher scoring experiments take precedence. - paths.append(path) - values.append(value) - # Create or get the experiment object. - mods = list(zip(paths, values)) - try: - return ExperimentSummary(lab, modifications=mods) - except ValueError: - # ExperimentSummary raises ValueError if it detects duplicate entry - # in the database. - params = deepcopy(lab.default_parameters) - for p, v in mods: - params.apply(p, v) - return lab.experiment_ids[hash(params)] - - def __call__(self, lab): - - suggest = [] # Retval accumulator - # Ignore all underperforming experiments. - null = lab.experiment_ids[hash(lab.default_parameters)] - ex = [x for x in lab.experiments if x.mean() > null.mean()] - # For sanity: Limit to the top experiments. - ex = sorted(ex, key = lambda x: -x.mean())[ : self.n] - # Keep trying experiments which are not yet significant. Experiments - # with a single datum have a significance of NaN... - trymore = [x for x in ex if (x.significance() > .50 or math.isnan(x.significance()))] - ex = [x for x in ex if x not in trymore] - suggest.extend(trymore) - # Suggests combinations - for ideas in itertools.combinations(ex, 2): - suggest.append( self.merge(lab, ideas) ) - - if False: # Dump the suggestions for debugging - for x in suggest: - for p, v in x.modifications: - print(p , v) - print() - 1/0 - rnd = random.random - return min(suggest, key=lambda x: x.attempts + rnd()).parameters diff --git a/py/src/nupic/optimization/optimizers.py b/py/src/nupic/optimization/optimizers.py new file mode 100644 index 0000000000..01026fcb74 --- /dev/null +++ b/py/src/nupic/optimization/optimizers.py @@ -0,0 +1,229 @@ +# ------------------------------------------------------------------------------ +# Numenta Platform for Intelligent Computing (NuPIC) +# +# Copyright (C) 2018-2019, David McDougall +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero Public License version 3 as published by the Free +# Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License along with +# this program. If not, see http://www.gnu.org/licenses. +# ------------------------------------------------------------------------------ + +from nupic.optimization.parameter_set import ParameterSet +import itertools +import random + +class BaseOptimizer: + """ + TODO + """ + def addArguments(parser): + """ + TODO + """ + pass + + def useThisOptimizer(args): + """ + TODO + """ + return False + + def __init__(self, labReport, args): + """ + TODO + """ + self.lab = labReport + self.args = args + + def suggestExperiment(self): + """ + TODO + """ + pass + + def collectResults(self, experiment, result): + """ + TODO + """ + pass + + +class EvaluateDefaultParameters(BaseOptimizer): + def addArguments(parser): + parser.add_argument('--default_parameters', action='store_true',) + + def useThisOptimizer(args): + return args.default_parameters + + def suggestExperiment(self): + return self.lab.default_parameters + + +class EvaluateAllExperiments(BaseOptimizer): + def addArguments(parser): + parser.add_argument('--all_experiments', action='store_true', + help='Evaluate all experiments in the lab report, don\'t start new experiments') + + def useThisOptimizer(args): + return args.all_experiments + + def suggestExperiment(self): + rnd = lambda: random.random() / 100 # Random Tiebreaker + return min(self.lab.experiments, key=lambda x: x.attempts + rnd()).parameters + + +class EvaluateBestExperiment(BaseOptimizer): + def addArguments(parser): + parser.add_argument('--best', action='store_true', + help='Evaluate the best set of parameters on file.') + + def useThisOptimizer(args): + return args.best + + def suggestExperiment(self): + best = max(self.lab.experiments, key = lambda X: X.mean() ) + return best.parameters + + +class EvaluateHashes(BaseOptimizer): + def addArguments(parser): + parser.add_argument('--hashes', type=str,) + + def useThisOptimizer(args): + return args.hashes + + def __init__(self, labReport, args): + hashes = [int(h, base=16) for h in args.hashes.split(',')] + try: + self.experiments = [lab.experiment_ids[h] for h in hashes] + except KeyError: + unknown = [h for h in hashes if h not in lab.experiment_ids] + raise ValueError('Hash not recognized: %X'%unknown[0]) + + def suggestExperiment(self): + rnd = lambda: random.random() / 100 # Random Tiebreaker + return min(self.experiments, key=lambda x: x.attempts + rnd()).parameters + + +class GridSearch(BaseOptimizer): + # TODO: Make these into a CLI argument? + mod_funcs = [ + lambda v: v * .40, + lambda v: v * .60, + lambda v: v * .80, + lambda v: v * 1.20, + lambda v: v * 1.40, + lambda v: v * 1.60, + ] + + def addArguments(parser): + parser.add_argument('--grid_search', type=str, + help="TODO CLI argument help for GridSearch") + + def useThisOptimizer(args): + return args.grid_search + + def __init__(self, labReport, args): + self.lab = labReport + + # Get a list of every parameter to experiment with. + target_parameters = [] + for start in args.grid_search.split(','): + node = eval("lab.default_parameters" + start) + target_parameters.extend(start + end for end in paths(node)) + + # Suggest modifications to each parameter. + self.experiments = [] + for path in target_parameters: + value = lab.default_parameters.get(path) + for mod in self.mod_funcs: + params = deepcopy(lab.default_parameters) + params.apply( path, mod(value) ) + try: + self.experiments.append( + ExperimentSummary(lab, parameters=params)) + except ValueError: + # ExperimentSummary raises ValueError if it detects + # duplicate entry in the database. + self.experiments.append( + lab.experiment_ids[hash(params)]) + + lab.save() # Write all of the new grid-search experiments to the lab report. + + def suggestExperiment(self): + # Start with a good baseline of the default parameters. + if self.lab.experiment_ids[hash(self.lab.default_parameters)].attempts < 7: + return self.lab.default_parameters + + rnd = lambda: random.random() / 100 # Random Tiebreaker + return min(self.experiments, key=lambda x: x.attempts + rnd()).parameters + + +class CombineBest: + def addArguments(parser): + parser.add_argument('--combine', type=int, default=0, + help='Combine the NUM best experiments.') + + def useThisOptimizer(args): + return args.combine + + def merge(self, lab, ideas): + """ Take several experiments and return the best combination of them. """ + # Marshal all of the modifications together. + ideas = sorted(ideas, key = lambda x: -x.mean()) + paths = [] + values = [] + for x in ideas: + for path, value in x.modifications: + if path in paths: + continue # Higher scoring experiments take precedence. + paths.append(path) + values.append(value) + # Create or get the experiment object. + mods = list(zip(paths, values)) + try: + return ExperimentSummary(lab, modifications=mods) + except ValueError: + # ExperimentSummary raises ValueError if it detects duplicate entry + # in the database. + params = deepcopy(lab.default_parameters) + for p, v in mods: + params.apply(p, v) + return lab.experiment_ids[hash(params)] + + def suggestExperiment(self): + suggest = [] # Retval accumulator + + # Ignore all underperforming experiments. + null = self.lab.experiment_ids[hash(self.lab.default_parameters)] + ex = [x for x in self.lab.experiments if x.mean() > null.mean()] + + # Limit to the top/best experiments. + ex = sorted(ex, key = lambda x: -x.mean())[ : self.args.combine] + + # Keep trying experiments which are not yet significant. Experiments + # with a single datum have a significance of NaN... + trymore = [x for x in ex if (x.significance() > .50 or math.isnan(x.significance()))] + ex = [x for x in ex if x not in trymore] + suggest.extend(trymore) + # Suggests combinations + for ideas in itertools.combinations(ex, 2): + suggest.append( self.merge(self.lab, ideas) ) + + if False: # Dump the suggestions for debugging + for x in suggest: + for p, v in x.modifications: + print(p , v) + print() + 1/0 + + rnd = lambda: random.random() / 100 # Random Tiebreaker + return min(suggest, key=lambda x: x.attempts + rnd()).parameters + diff --git a/py/src/nupic/optimization/parameter_set.py b/py/src/nupic/optimization/parameter_set.py index 835cb397ad..57610d70a0 100644 --- a/py/src/nupic/optimization/parameter_set.py +++ b/py/src/nupic/optimization/parameter_set.py @@ -17,17 +17,27 @@ import pprint import hashlib +import number + +# TODO: Consider allowing lists, and converting all lists into tuples. class ParameterSet(dict): - """ TODO: Documentation """ + """ + This class holds the arguments to an experiment, which the "AE" program will + modify as it attempts to optimize the experiment. + + Parameters must be one of the following types: dict, tuple, float, int. + Parameters can be nested in multiple levels of dictionaries and tuples. + The outer most layer of parameters must be a dict. + """ def __init__(self, data): super().__init__(self) if isinstance(data, str): + data = data.strip() try: - data = eval(data.strip()) + data = eval(data) except: - print("Error parsing: " + data.strip()) - raise + raise SyntaxError("Parsing parameters: " + data) assert(isinstance(data, dict)) self.update(data) @@ -63,16 +73,18 @@ def diff(old, new): return diffs def get(self, path): + assert(isinstance(path, str)) try: return eval('self' + path) except: - print('Failed to get self' + path) - raise + raise ValueError('Get parameters' + path) def apply(self, modification, value): """ Modifies this set of parameters! """ + assert(isinstance(modification, str)) + assert(isinstance(value, number.Number)) try: access = modification.split(']')[0].strip('[]"\' ') if not access: @@ -87,8 +99,7 @@ def apply(self, modification, value): self[index] = ParameterSet.apply(self[index], tail, value) return tuple(self) except: - print('Failed to apply modification %s = %s'%(modification, str(value))) - raise + raise ValueError('Apply parameters%s = %s'%(modification, str(value))) def get_types(self): """ @@ -97,11 +108,10 @@ def get_types(self): """ # Recurse through the parameter data structure. if isinstance(self, dict): - return {key: LabReport.get_types(value) + return {key: ParameterSet.get_types(value) for key, value in self.items()} elif isinstance(self, tuple): - return tuple(LabReport.get_types(value) - for value in self) + return tuple(ParameterSet.get_types(value) for value in self) # Determine data type of each entry in parameter data structure. elif isinstance(self, float): return float @@ -125,7 +135,7 @@ def recursive_typecast_parameters(values, structure): return float(str(value)) elif structure == int: return int(round(float(values))) - return recursive_typecast_parameters(parameters, structure) + return recursive_typecast_parameters(self, structure) def enumerate(self): """ diff --git a/py/src/nupic/optimization/swarming.py b/py/src/nupic/optimization/swarming.py index a766e64bf4..57c843a4d4 100644 --- a/py/src/nupic/optimization/swarming.py +++ b/py/src/nupic/optimization/swarming.py @@ -1,4 +1,3 @@ -#!/usr/bin/python3 # ------------------------------------------------------------------------------ # Numenta Platform for Intelligent Computing (NuPIC) # @@ -17,9 +16,7 @@ # ------------------------------------------------------------------------------ """ Swarming parameter search """ -# TODO: Deal with global constants: particle_strength, global_strength, velocity_strength -# Maybe make them into CLI Arguments? - +# TODO: Make CLI Arguments for these global constants: particle_strength, global_strength, velocity_strength particle_strength = .25 global_strength = .50 velocity_strength = .95 @@ -31,10 +28,20 @@ import random import pprint -from .nupic.optimization.parameter_set import ParameterSet +from nupic.optimization.parameter_set import ParameterSet +from nupic.optimization.optimizers import BaseOptimizer + +class ParticleSwarmOptimizations(BaseOptimizer): + def addArguments(argparser): + argparser.add_argument('--swarming', type=int, default=0, + help='Particle Swarm Optimization.') + + def useThisOptimizer(args): + return args.swarming -class ParticleSwarmOptimizations: def __init__(self, lab, args): + assert(args.particles >= args.processes) + # Setup the particle swarm. self.particles = args.particles self.next_particle = random.randrange(args.particles) @@ -89,7 +96,7 @@ def __init__(self, lab, args): if self.particles != sum(isinstance(key, int) for key in self.swarm_data): print("Warning: argument 'particles' does not match number of particles stored on file.") - def __call__(self, lab): + def suggestExperiment(self, lab): # Run the particle swarm optimization. particle_data = self.swarm_data[self.next_particle] self.next_particle = (self.next_particle + 1) % self.particles @@ -111,7 +118,7 @@ def __call__(self, lab): return parameters - def collect(self, results): + def collectResults(self, experiment, results): particle_data = self.swarm_data[particle_number] try: score = promise.get() @@ -129,7 +136,6 @@ def collect(self, results): particle_data['value'] = self.swarm_data['best'] else: particle_data['value'] = initial_parameters(default_parameters) - continue except Exception: print("") pprint.pprint(particle_data['value']) @@ -225,7 +231,6 @@ def update_particle_velocity(postition, velocity, particle_best, global_best): if __name__ == '__main__': arg_parser = argparse.ArgumentParser() - assert(args.particles >= args.processes) if args.clear_scores: print("Removing Scores from Particle Swarm File %s."%swarm_path) @@ -235,5 +240,3 @@ def update_particle_velocity(postition, velocity, particle_best, global_best): swarm_data[entry]['best_score'] = None with open(swarm_path, 'w') as swarm_file: pprint.pprint(swarm_data, stream = swarm_file) - sys.exit() - From 5e0d271821e37683f1667335aa8a92fee8d1b22a Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Wed, 15 May 2019 23:55:04 -0400 Subject: [PATCH 5/9] AE: MNIST Example, scores 92% --- py/src/nupic/examples/mnist.py | 147 +++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 py/src/nupic/examples/mnist.py diff --git a/py/src/nupic/examples/mnist.py b/py/src/nupic/examples/mnist.py new file mode 100644 index 0000000000..c1ec4375ee --- /dev/null +++ b/py/src/nupic/examples/mnist.py @@ -0,0 +1,147 @@ +""" An MNIST classifier using Spatial Pooler.""" + +import argparse +import random +import gzip +import numpy as np +import os + +from nupic.bindings.algorithms import SpatialPooler, Classifier +from nupic.bindings.sdr import SDR, Metrics + + +def load_mnist(path): + """See: http://yann.lecun.com/exdb/mnist/ for MNIST download and binary file format spec.""" + def int32(b): + i = 0 + for char in b: + i *= 256 + # i += ord(char) # python2 + i += char + return i + + def load_labels(file_name): + with gzip.open(file_name, 'rb') as f: + raw = f.read() + assert(int32(raw[0:4]) == 2049) # Magic number + labels = [] + for char in raw[8:]: + # labels.append(ord(char)) # python2 + labels.append(char) + return labels + + def load_images(file_name): + with gzip.open(file_name, 'rb') as f: + raw = f.read() + assert(int32(raw[0:4]) == 2051) # Magic number + num_imgs = int32(raw[4:8]) + rows = int32(raw[8:12]) + cols = int32(raw[12:16]) + assert(rows == 28) + assert(cols == 28) + img_size = rows*cols + data_start = 4*4 + imgs = [] + for img_index in range(num_imgs): + vec = raw[data_start + img_index*img_size : data_start + (img_index+1)*img_size] + # vec = [ord(c) for c in vec] # python2 + vec = list(vec) + vec = np.array(vec, dtype=np.uint8) + buf = np.reshape(vec, (rows, cols, 1)) + imgs.append(buf) + assert(len(raw) == data_start + img_size * num_imgs) # All data should be used. + return imgs + + train_labels = load_labels(os.path.join(path, 'train-labels-idx1-ubyte.gz')) + train_images = load_images(os.path.join(path, 'train-images-idx3-ubyte.gz')) + test_labels = load_labels(os.path.join(path, 't10k-labels-idx1-ubyte.gz')) + test_images = load_images(os.path.join(path, 't10k-images-idx3-ubyte.gz')) + + return train_labels, train_images, test_labels, test_images + + +class BWImageEncoder: + """Simple grey scale image encoder for MNIST.""" + def __init__(self, input_space): + self.output = SDR(tuple(input_space)) + + def encode(self, image): + self.output.dense = image >= np.mean(image) + return self.output + + +default_parameters = { + "columnDimensions": (10*1000,), + "potentialPct": .5, + "localAreaDensity": .015, + "stimulusThreshold": 6, + "synPermInactiveDec": 0.005, + "synPermActiveInc": 0.01, + "synPermConnected": 0.422, + "minPctOverlapDutyCycle": 0.001, + "dutyCyclePeriod": 1402, + "boostStrength": 2.5, +} + + +def main(parameters=default_parameters, argv=None, verbose=True): + parser = argparse.ArgumentParser() + parser.add_argument('--data_dir', type=str, + default = os.path.join( os.path.dirname(__file__), 'MNIST_data')) + args = parser.parse_args(args = argv) + + # Load data. + train_labels, train_images, test_labels, test_images = load_mnist(args.data_dir) + training_data = list(zip(train_images, train_labels)) + test_data = list(zip(test_images, test_labels)) + random.shuffle(training_data) + random.shuffle(test_data) + + # Setup the AI. + enc = BWImageEncoder(train_images[0].shape[:2]) + sp = SpatialPooler( + inputDimensions = (enc.output.size,), + columnDimensions = parameters['columnDimensions'], + potentialRadius = 99999999, + potentialPct = parameters['potentialPct'], + globalInhibition = True, + localAreaDensity = parameters['localAreaDensity'], + numActiveColumnsPerInhArea = -1, + stimulusThreshold = int(round(parameters['stimulusThreshold'])), + synPermInactiveDec = parameters['synPermInactiveDec'], + synPermActiveInc = parameters['synPermActiveInc'], + synPermConnected = parameters['synPermConnected'], + minPctOverlapDutyCycle = parameters['minPctOverlapDutyCycle'], + dutyCyclePeriod = int(round(parameters['dutyCyclePeriod'])), + boostStrength = parameters['boostStrength'], + seed = 42, + spVerbosity = 99, + wrapAround = False) + columns = SDR( sp.getColumnDimensions() ) + columns_stats = Metrics( columns, 99999999 ) + sdrc = Classifier() + + # Training Loop + for i in range(len(train_images)): + img, lbl = random.choice(training_data) + enc.encode(np.squeeze(img)) + sp.compute( enc.output.flatten(), True, columns ) + sdrc.learn( columns, lbl ) + + print(str(sp)) + print(str(columns_stats)) + + # Testing Loop + score = 0 + for img, lbl in test_data: + enc.encode(np.squeeze(img)) + sp.compute( enc.output.flatten(), False, columns ) + if lbl == np.argmax( sdrc.infer( columns ) ): + score += 1 + + print('Score:', 100 * score / len(test_data), '%') + return score / len(test_data) + + +if __name__ == '__main__': + main() From 98fb9cb4c5fd1e470729779fd9234485badd1ff7 Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Thu, 16 May 2019 19:21:39 -0400 Subject: [PATCH 6/9] AE & Swarming: Review, Refactor, Cleanup --- py/src/nupic/examples/mnist.py | 16 + py/src/nupic/optimization/ae.py | 327 +++++++++--------- py/src/nupic/optimization/optimizers.py | 146 ++++---- py/src/nupic/optimization/parameter_set.py | 70 ++-- py/src/nupic/optimization/swarming.py | 371 ++++++++++----------- 5 files changed, 455 insertions(+), 475 deletions(-) diff --git a/py/src/nupic/examples/mnist.py b/py/src/nupic/examples/mnist.py index c1ec4375ee..0b0ec7c529 100644 --- a/py/src/nupic/examples/mnist.py +++ b/py/src/nupic/examples/mnist.py @@ -1,3 +1,19 @@ +# ------------------------------------------------------------------------------ +# Numenta Platform for Intelligent Computing (NuPIC) +# +# Copyright (C) 2018-2019, David McDougall +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero Public License version 3 as published by the Free +# Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License along with +# this program. If not, see http://www.gnu.org/licenses. +# ------------------------------------------------------------------------------ """ An MNIST classifier using Spatial Pooler.""" import argparse diff --git a/py/src/nupic/optimization/ae.py b/py/src/nupic/optimization/ae.py index d45f71acab..684cd57380 100644 --- a/py/src/nupic/optimization/ae.py +++ b/py/src/nupic/optimization/ae.py @@ -85,22 +85,40 @@ """ # TODO: Default parameters need better handling... When they change, update -# all of the modifications to be diffs of the current parameters. +# all of the modifications to be diffs of the current parameters? # TODO: Maybe the command line invocation should be included in the experiment # hash? Then I could experiment with the CLI args within a single lab report. # TODO: Every run should track elapsed time and report the average in the # experiment journal & summary. Some of these experiments clearly take longer -# than others but its not recorded. +# than others but its not recorded & displayed. # TODO: Log files should report memory usage ... -# TODO: Remove LabReport.experiment, then rename lab.experiment_ids to experiments +# TODO: Remove Laboratory.experiment, then rename lab.experiment_ids to experiments -# TODO: Reject experiments which have failed a few times. +# TODO: Failed experiments should have its own section in the Laboratory. Maybe +# organize them by the exception type & string? -# TODO: Failed experiments should have its own section in the LabReport. +# TODO: Consider renaming *log files to *tmp for clarity. + +# TODO: Make the leader board base the P-Values off of the best experiment, and +# always keep the default parameters on the board. + +# TODO: Do not lose the log file when worker process crashes! With this fixed I +# won't need to explain what the temp files are for, the user shouldn't need to +# know about them... + +# TODO: How hard would it be to allow the user to edit the lab report while the +# program is running? Read timestamps to detect user writes. All of the latest +# data is loaded in the program, so it should be simple to load in the new +# version and merge the human readable text into the latest data, write out to +# new file and attempt to swap it into place. + +# TODO: Experiment if all of the parameters are modified, show the +# parameters instead of the modifications. This is useful for swarming which +# touches every parameter. import argparse import os @@ -113,19 +131,17 @@ import tempfile import multiprocessing import resource -import signal # TODO: X-Plat issue: Replace signal with threading.timer -from copy import copy, deepcopy +import signal # TODO: X-Plat issue: Replace signal with threading.timer? import re import numpy as np -import scipy -import math +import scipy.stats from nupic.optimization.parameter_set import ParameterSet -class ExperimentSummary: +class Experiment: """ Attributes: - lab - circular reference to LabReport instance + lab - circular reference to Laboratory instance attempts - scores - notes - @@ -144,66 +160,63 @@ def __init__(self, lab, self.notes = ' ' # Load or create this experiment's data. if string is not None: - self.parse(string) + self.parse( string ) elif modifications is not None: - self.parameters = deepcopy(self.lab.default_parameters) + self.parameters = ParameterSet( self.lab.default_parameters ) for path, value in modifications: - self.parameters.apply(path, value) + self.parameters.apply( path, value ) elif parameters is not None: - self.parameters = ParameterSet(parameters) + self.parameters = ParameterSet( parameters ) else: - raise TypeError("Not enough arguments to ExperimentSummary.__init__()") + raise TypeError("Not enough arguments to Experiment.__init__()") - self.parameters = self.parameters.typecast_parameters( self.lab.structure ) - self.modifications = self.lab.default_parameters.diff(self.parameters) + self.parameters = self.parameters.typecast( self.lab.structure ) + self.modifications = self.lab.default_parameters.diff( self.parameters ) if hash(self) not in self.lab.experiment_ids: self.lab.experiments.append(self) self.lab.experiment_ids[hash(self)] = self else: - raise ValueError("Duplicate Parameters Hash %X"%hash(self)) + existing = self.lab.experiment_ids[hash(self)] + if existing.parameters == self.parameters: + raise ValueError("Duplicate Parameters, Hash %X"%hash(self)) + else: + raise SystemExit("Hash Collision!") # Start a journal file for this experiment. if not hasattr(self, 'journal'): self.journal = os.path.join(self.lab.ae_directory, "%X.journal"%hash(self)) with open(self.journal, 'a') as file: file.write('Experiment Journal For Parameters:\n') - file.write(pprint.pformat(self.parameters) + '\n') + file.write( str(self.parameters) + '\n') file.write('Hash: %X\n'%hash(self)) file.write('Command Line Invocation: $ ' + ' '.join(self.lab.argv) + '\n') - else: - # Scrape some info from the journal file. - with open(self.journal, 'r') as file: - journal = file.read() - journal = journal.split(self.lab.section_divider) - journal.pop(0) # Discard header - elapsed_times = [] - memory_usages = [] def parse(self, string): # Reconstruct the parameters. self.modifications = [] if "Modification:" in string: - for change in re.findall("Modification: (.*)", string): + for change in re.findall("Modification:(.*)", string): path, eq, value = change.partition('=') self.modifications.append((path.strip(), value.strip())) - self.parameters = deepcopy(self.lab.default_parameters) + self.parameters = ParameterSet(self.lab.default_parameters) for path, value in self.modifications: self.parameters.apply(path, value) - # - if "Attempts:" in string: - self.attempts = int(re.search("Attempts: (.*)", string).groups()[0]) - if "Scores:" in string: - self.scores = re.search("Scores: (.*)", string).groups()[0].strip() - self.scores = [float(s.strip()) for s in self.scores.split(',') if s.strip()] - if "Journal:" in string: - self.journal = re.search("Journal: (.*)", string).groups()[0] - if "Notes:" in string: - self.notes = string.partition('Notes:')[2] - if "Hash:" in string: + + if "Hash: " in string: # Override hash(self) with whats on file since this is reconstructed # from defaults + modifications, and the defaults might have changed. self._hash = int(re.search("Hash: (.*)", string).groups()[0], base=16) + if "Journal: " in string: + self.journal = re.search("Journal: (.*)", string).groups()[0] + if "Attempts: " in string: + self.attempts = int(re.search("Attempts: (.*)", string).groups()[0]) + if "Scores: " in string: + self.scores = re.search("Scores: (.*)", string).groups()[0].strip() + self.scores = [float(s.strip()) for s in self.scores.split(',') if s.strip()] + assert( len(self.scores) <= self.attempts ) # Attempts may fail and not return a score. + if "Notes:" in string: + self.notes = string.partition('Notes:')[2] # TODO: This should accept the baseline to compare against, and then have # the defaults argument as the default baseline. @@ -222,7 +235,7 @@ def significance(self): pass # TODO: How to pass probabilities & statistics? stat, pval = scipy.stats.ttest_ind( null_experiment.scores, self.scores, axis=None, - # Since both samples come from the same experimential setup they + # Since both samples come from the same experimental setup they # should have the same variance. equal_var=True,) return pval @@ -230,6 +243,8 @@ def significance(self): def mean(self): return np.mean(self.scores) if self.scores else float('-inf') + # TODO: Consider showing min & max scores. + # TODO: Don't show scores & P-Value if attempts == 0. def __str__(self): s = '' if not self.modifications: @@ -254,31 +269,32 @@ def __hash__(self): return self._hash -class LabReport: +class Laboratory: """ Attributes: lab.module - Experiment python module lab.name - Name of experiment module lab.path - Directory containing experiment module lab.structure - Types of parameters - lab.default_parameters - ex.module.default_parameters + lab.default_parameters - lab.module.default_parameters lab.argv - Command line invocation of experiment program - lab.tag - Optional, identifier string for this LabReport + lab.tag - Optional, identifier string for this Laboratory lab.ae_directory - Directory containing all files created by this program lab.lab_report - File path of Lab Report - lab.experiments - List of ExperimentSummary + lab.experiments - List of Experiment instances lab.experiment_ids - Experiments accessed by their unique hash """ default_extension = '_ae' section_divider = '\n' + ('=' * 80) + '\n' def __init__(self, experiment_argv, method=None, tag='', verbose=False): + if not experiment_argv: + raise ValueError('Missing arguments for the experiment to run!') if isinstance(experiment_argv, str): experiment_argv = experiment_argv.split() self.argv = experiment_argv self.method = method self.tag = tag self.verbose = verbose - # TODO: Needs better error messages when user forgets CLI arg for experiment module! self.load_experiment_module(experiment_argv[0]) self.ae_directory = os.path.join(self.path, self.name) + self.default_extension if self.tag: @@ -289,7 +305,7 @@ def __init__(self, experiment_argv, method=None, tag='', verbose=False): if os.path.isdir(self.ae_directory): with open(self.lab_report, 'r') as file: report = file.read() - self.parse_lab_report(report) + self.parse(report) else: # Initialize the Lab Reports attributes and write the skeleton of it # to file. @@ -297,13 +313,10 @@ def __init__(self, experiment_argv, method=None, tag='', verbose=False): os.mkdir(self.ae_directory) # Always have an experiment for the default parameters. try: - ExperimentSummary(self, parameters = self.default_parameters) + Experiment(self, parameters = self.default_parameters) except ValueError: pass - # Parse & Write this file immediately at start up. - self.save() - def init_header(self): self.header = str(self.name) if self.tag: @@ -328,23 +341,24 @@ def load_experiment_module(self, experiment_module): self.default_parameters = ParameterSet(self.module.default_parameters) self.structure = self.default_parameters.get_types() - def parse_lab_report(self, report): + def parse(self, report): if not report.strip(): - raise ValueError("Empty lab report file.") + raise ValueError("Empty lab report file!") sections = report.split(self.section_divider) self.header = sections[0] default_parameters = '\n'.join( sections[1].split('\n')[1:-1] ) cli = sections[1].split('\n')[-1].strip('$ ').split() sorted_pval_table = sections[2] experiment_sections = sections[3:] - file_defaults = ParameterSet(default_parameters) - # Consistency check for parameters. + file_defaults = ParameterSet(default_parameters) + # Consistency check for parameters & experiment argv. if file_defaults != self.default_parameters or cli != self.argv: while True: - q = input("Default parameters or invovation have changed, options:\n" + - " old - Ignore the new/given, use what's on file.\n" + - " new - Use the new/given, overwrites the old file!\n" + - " abort.\n") + q = input("Default parameters or invocation have changed, options:\n" + + " old - Ignore the new/given, use what's on file.\n" + + " new - Use the new/given, overwrites the old file!\n" + + " abort.\n" + + ">>> ") q = q.strip().lower() if q == 'old': self.default_parameters = file_defaults @@ -353,15 +367,21 @@ def parse_lab_report(self, report): elif q == 'new': shutil.copy(self.lab_report, self.lab_report + '.backup') break - elif q == 'abort': + elif q in ('abort', 'exit', 'quit') or q in 'aeq': sys.exit() - [ExperimentSummary(self, s) for s in experiment_sections if s.strip()] + [Experiment(self, s) for s in experiment_sections if s.strip()] + + def get_experiment(self, parameters): + p = ParameterSet( parameters ).typecast( self.structure ) + h = hash(p) + if h in self.experiment_ids: + return self.experiment_ids[h] + else: + return Experiment(self, parameters=p) def significant_experiments_table(self): - """ - Returns string - """ + """ Returns string """ ex = sorted(self.experiments, key = lambda x: -x.mean()) ex = ex[:20] s = ' Hash | N | Score | P-Value | Modifications\n' @@ -406,8 +426,7 @@ def run(self, processes, """ """ pool = multiprocessing.Pool(processes, maxtasksperchild=1) - async_results = [] # Contains pairs of (Promise, Parameters) - + async_results = [] # Contains pairs of (Promise, Experiment) while True: # Check for jobs which have finished run_slot = 0 @@ -415,49 +434,23 @@ def run(self, processes, promise, value = async_results[run_slot] if promise.ready(): # Experiment run has finished, deal with the results. - result = self._get_promised_results(promise, value) - self.save_results(value, result) + self.collect_results(value, promise) async_results.pop(run_slot) else: run_slot += 1 - # Start running new experiments while len(async_results) < processes: - # Pickle is picky, so clean up 'self' which is sent via pickle - # to the process pool. pickle_self only needs to work with - # evaluate_parameters - pickle_self = copy(self) - pickle_self.module = None # Won't pickle, use self.module_reload instead. - # Pickle balks at circular references, remove them. - pickle_self.experiments = None - pickle_self.experiment_ids = None - value = self.method(self) - value = value.typecast_parameters( self.structure ) + X = self.get_experiment( self.method.suggest_parameters() ) if self.verbose: - print("%X"%hash(value)) + print("Evaluating %X"%hash(X)) promise = pool.apply_async( Experiment_evaluate_parameters, - args = (pickle_self, value,), + args = (self.argv, self.tag, self.verbose, X.parameters,), kwds = {'time_limit' : time_limit, 'memory_limit' : memory_limit,},) - async_results.append((promise, value)) + async_results.append((promise, X)) # Wait for experiments to complete - time.sleep(1) - - def _get_promised_results(self, promise, value): - try: - return promise.get() - except (ValueError, MemoryError, ZeroDivisionError, AssertionError) as err: - print("") - pprint.pprint(value) - print("%s:"%(type(err).__name__), err) - print("") - except Exception: - print("") - pprint.pprint(value) - print("Unhandled Exception.") - print("") - raise + time.sleep(2) def evaluate_parameters(self, parameters, time_limit = None, @@ -465,8 +458,7 @@ def evaluate_parameters(self, parameters, """ This function executes in a child processes. """ - parameters = parameters.typecast_parameters( self.structure ) - # Redirect stdour & stderr to a temporary file. + # Redirect stdout & stderr to a temporary file. journal = tempfile.NamedTemporaryFile( mode = 'w+t', delete = False, @@ -496,6 +488,8 @@ def evaluate_parameters(self, parameters, ', '.join(repr(arg) for arg in self.argv[1:]), str(self.verbose))) exec_globals = {} + # TODO: Deal with all of the contingencies where this fails. Do not + # lose the journal file! Do append that time-stamp! exec(eval_str, exec_globals) # Clean up time limit @@ -511,83 +505,95 @@ def evaluate_parameters(self, parameters, return exec_globals['score'], journal.name - def save_results(self, parameters, result): + def collect_results(self, experiment, async_promise): + try: + score, run_journal = async_promise.get() + except (ValueError, MemoryError, ZeroDivisionError, AssertionError, RuntimeError) as err: + print("") + print( str( experiment.parameters )) + print("%s:"%(type(err).__name__), err) + print("") + score = err + except Exception: + print("") + print( str( experiment.parameters )) + print("Unhandled Exception.") + print("") + raise + # Update this experiment - param_hash = hash(ParameterSet(parameters)) - if param_hash in self.experiment_ids: - experiment = self.experiment_ids[param_hash] - else: - experiment = ExperimentSummary(self, parameters = parameters) experiment.attempts += 1 - if result is not None: - score, run_journal = result + if not isinstance(score, Exception): experiment.scores.append(score) - + # Append the temporary journal file to the experiments journal. + # TODO !!! Don't lose the data vvv + # Sadly if the experiment crashes, the temp file is abandoned and + # the debugger (you) must search for it manually if they want to see it... + with open(run_journal) as journal: + content = journal.read() + with open(experiment.journal, 'a') as experiment_journal: + experiment_journal.write(self.section_divider) + experiment_journal.write(content) + os.remove(run_journal) + # Notify the parameter optimization method that the parameters which it + # suggested have finished evaluating. + self.method.collect_results(experiment.parameters, score) self.save() # Write the updated Lab Report to file. - # Append the temporary journal file to the experiments journal. - if result is None: - # Sadly if the experiment crashes, the temp file is abandoned and - # the debugger must search for it manually if they want to see it... - return - with open(run_journal) as journal: - content = journal.read() - with open(experiment.journal, 'a') as experiment_journal: - experiment_journal.write(self.section_divider) - experiment_journal.write(content) - os.remove(run_journal) - -def Experiment_evaluate_parameters(self, *args, **kwds): +def Experiment_evaluate_parameters(*args, **kwds): """ - Global wrapper for LabReport.evaluate_parameters which is safe for + Global wrapper for Laboratory.evaluate_parameters which is safe for multiprocessing. """ - return LabReport.evaluate_parameters(self, *args, **kwds) + experiment_argv, tag, verbose, parameters = args + self = Laboratory(experiment_argv, tag = tag, verbose = verbose) + return self.evaluate_parameters( parameters, **kwds) def _timeout_callback(signum, frame): - raise ValueError("Time limit exceded.") + raise ValueError("Time limit exceeded.") if __name__ == '__main__': - arg_parser = argparse.ArgumentParser() - arg_parser.add_argument('--verbose', action='store_true',) - arg_parser.add_argument('--tag', type=str, + parser = argparse.ArgumentParser() + parser.add_argument('--verbose', action='store_true', + help='Passed onto the experiment\'s main function.') + parser.add_argument('--tag', type=str, help='Optional string appended to the name of the AE directory. Use tags to ' - 'keep multiple variants of an experiment alive and working at the same time') - arg_parser.add_argument('-n', '--processes', type=int, default=os.cpu_count(), + 'keep multiple variants of an experiment alive and working at the same time.') + parser.add_argument('-n', '--processes', type=int, default=os.cpu_count(), help='Number of experiments to run simultaneously, defaults to the number of CPU cores available.') - arg_parser.add_argument('--time_limit', type=float, default=None, + parser.add_argument('--time_limit', type=float, default=None, help='Hours, time limit for each run of the experiment.',) - arg_parser.add_argument('--memory_limit', type=float, default=None, + parser.add_argument('--memory_limit', type=float, default=None, help='Gigabytes, RAM memory limit for each run of the experiment.') - arg_parser.add_argument('experiment', nargs=argparse.REMAINDER, - help='Name of experiment module followed by its command line arguments.') - - action_parser = arg_parser.add_mutually_exclusive_group(required=True) - action_parser.add_argument('--parse', action='store_true', + parser.add_argument('--parse', action='store_true', help='Parse the lab report and write it back to the same file, then exit.') - action_parser.add_argument('--rmz', action='store_true', + parser.add_argument('--rmz', action='store_true', help='Remove all experiments which have zero attempts.') + parser.add_argument('experiment', nargs=argparse.REMAINDER, + help='Name of experiment module followed by its command line arguments.') - import nupic.optimization.optimizers - import nupic.optimization.swarming + import nupic.optimization.optimizers as optimizers + from nupic.optimization.swarming import ParticleSwarmOptimization actions = [ - nupic.optimization.optimizers.EvaluateHashes, - nupic.optimization.optimizers.EvaluateDefaultParameters, - nupic.optimization.optimizers.EvaluateAllExperiments, - nupic.optimization.optimizers.EvaluateBestExperiment, - nupic.optimization.optimizers.GridSearch, - nupic.optimization.optimizers.CombineBest, - nupic.optimization.swarming.ParticleSwarmOptimizations, - ] + optimizers.EvaluateDefaultParameters, + optimizers.EvaluateAllExperiments, + optimizers.EvaluateBestExperiment, + optimizers.EvaluateHashes, + optimizers.GridSearch, + optimizers.CombineBest, + ParticleSwarmOptimization] + assert( all( issubclass(Z, optimizers.BaseOptimizer) for Z in actions)) for method in actions: - method.addArguments(action_parser) + method.add_arguments(parser) - args = arg_parser.parse_args() + args = parser.parse_args() + selected_method = [X for X in actions if X.use_this_optimizer(args)] - ae = LabReport(args.experiment, + ae = Laboratory(args.experiment, tag = args.tag, verbose = args.verbose) + ae.save() print("Lab Report written to %s"%ae.lab_report) if args.parse: @@ -601,23 +607,24 @@ def _timeout_callback(signum, frame): ae.save() print("Removed all experiments which had not yet been attempted.") + elif not selected_method: + print("Error: missing argument for what to to.") + elif len(selected_method) > 1: + print("Error: too many argument for what to to.") else: - selected_method = [X for X in actions if X.useThisOptimizer(args)] - assert(len(selected_method) == 1) # ArgParse should ensure this via "add_mutually_exclusive_group". ae.method = selected_method[0]( ae, args ) giga = 2**30 if args.memory_limit is not None: memory_limit = int(args.memory_limit * giga) else: - # TODO: Not X-Platform ... + # TODO: Not X-Platform, replace with "psutil.virtual_memory.available" available_memory = int(os.popen("free -b").readlines()[1].split()[3]) memory_limit = int(available_memory / args.processes) - print("Memory Limit %.2g GB per instance."%(memory_limit / giga)) + print("Memory Limit %.2g GB per instance."%(memory_limit / giga)) - ae.run( - processes = args.processes, - time_limit = args.time_limit, - memory_limit = memory_limit,) + ae.run( processes = args.processes, + time_limit = args.time_limit, + memory_limit = memory_limit,) print("Exit.") diff --git a/py/src/nupic/optimization/optimizers.py b/py/src/nupic/optimization/optimizers.py index 01026fcb74..30b301d5f8 100644 --- a/py/src/nupic/optimization/optimizers.py +++ b/py/src/nupic/optimization/optimizers.py @@ -16,39 +16,40 @@ # ------------------------------------------------------------------------------ from nupic.optimization.parameter_set import ParameterSet -import itertools import random +import itertools +import math class BaseOptimizer: """ TODO """ - def addArguments(parser): + def add_arguments(parser): """ TODO """ pass - def useThisOptimizer(args): + def use_this_optimizer(args): """ TODO """ return False - def __init__(self, labReport, args): + def __init__(self, laboratory, args): """ TODO """ - self.lab = labReport + self.lab = laboratory self.args = args - def suggestExperiment(self): + def suggest_parameters(self): # TODO Rename this to suggest_parameters! """ TODO """ - pass + raise NotImplementedError("BaseOptimizer.suggest_parameters") - def collectResults(self, experiment, result): + def collect_results(self, parameters, result): """ TODO """ @@ -56,50 +57,52 @@ def collectResults(self, experiment, result): class EvaluateDefaultParameters(BaseOptimizer): - def addArguments(parser): - parser.add_argument('--default_parameters', action='store_true',) + def add_arguments(parser): + parser.add_argument('--default_parameters', action='store_true', + help='Evaluate only "experiment_module.default_parameters".') - def useThisOptimizer(args): + def use_this_optimizer(args): return args.default_parameters - def suggestExperiment(self): + def suggest_parameters(self): return self.lab.default_parameters class EvaluateAllExperiments(BaseOptimizer): - def addArguments(parser): + def add_arguments(parser): parser.add_argument('--all_experiments', action='store_true', - help='Evaluate all experiments in the lab report, don\'t start new experiments') + help='Evaluate all experiments in the lab report, don\'t start new experiments.') - def useThisOptimizer(args): + def use_this_optimizer(args): return args.all_experiments - def suggestExperiment(self): + def suggest_parameters(self): rnd = lambda: random.random() / 100 # Random Tiebreaker - return min(self.lab.experiments, key=lambda x: x.attempts + rnd()).parameters + return min(self.lab.experiments, key=lambda X: X.attempts + rnd()).parameters class EvaluateBestExperiment(BaseOptimizer): - def addArguments(parser): + def add_arguments(parser): parser.add_argument('--best', action='store_true', help='Evaluate the best set of parameters on file.') - def useThisOptimizer(args): + def use_this_optimizer(args): return args.best - def suggestExperiment(self): + def suggest_parameters(self): best = max(self.lab.experiments, key = lambda X: X.mean() ) return best.parameters class EvaluateHashes(BaseOptimizer): - def addArguments(parser): - parser.add_argument('--hashes', type=str,) + def add_arguments(parser): + parser.add_argument('--hashes', type=str, + help='Evaluate specific experiments, identified by their hashes. Comma separated list.') - def useThisOptimizer(args): + def use_this_optimizer(args): return args.hashes - def __init__(self, labReport, args): + def __init__(self, lab, args): hashes = [int(h, base=16) for h in args.hashes.split(',')] try: self.experiments = [lab.experiment_ids[h] for h in hashes] @@ -107,13 +110,13 @@ def __init__(self, labReport, args): unknown = [h for h in hashes if h not in lab.experiment_ids] raise ValueError('Hash not recognized: %X'%unknown[0]) - def suggestExperiment(self): + def suggest_parameters(self): rnd = lambda: random.random() / 100 # Random Tiebreaker - return min(self.experiments, key=lambda x: x.attempts + rnd()).parameters + return min(self.experiments, key=lambda X: X.attempts + rnd()).parameters class GridSearch(BaseOptimizer): - # TODO: Make these into a CLI argument? + # TODO: Make these into a CLI argument. mod_funcs = [ lambda v: v * .40, lambda v: v * .60, @@ -123,55 +126,52 @@ class GridSearch(BaseOptimizer): lambda v: v * 1.60, ] - def addArguments(parser): + def add_arguments(parser): parser.add_argument('--grid_search', type=str, - help="TODO CLI argument help for GridSearch") + help="TODO: CLI argument help for GridSearch") - def useThisOptimizer(args): - return args.grid_search + def use_this_optimizer(args): + return args.grid_search is not None - def __init__(self, labReport, args): - self.lab = labReport + def __init__(self, laboratory, args): + self.lab = laboratory # Get a list of every parameter to experiment with. target_parameters = [] for start in args.grid_search.split(','): - node = eval("lab.default_parameters" + start) - target_parameters.extend(start + end for end in paths(node)) + node = self.lab.default_parameters.get( start ) + subtree = ParameterSet.enumerate( node ) + target_parameters.extend( start + end for end in subtree ) # Suggest modifications to each parameter. self.experiments = [] for path in target_parameters: - value = lab.default_parameters.get(path) + value = self.lab.default_parameters.get(path) for mod in self.mod_funcs: - params = deepcopy(lab.default_parameters) + params = ParameterSet(self.lab.default_parameters) params.apply( path, mod(value) ) - try: - self.experiments.append( - ExperimentSummary(lab, parameters=params)) - except ValueError: - # ExperimentSummary raises ValueError if it detects - # duplicate entry in the database. - self.experiments.append( - lab.experiment_ids[hash(params)]) - - lab.save() # Write all of the new grid-search experiments to the lab report. - - def suggestExperiment(self): + X = self.lab.get_experiment( params ) + if not X.notes.strip(): + X.notes += "Suggested by Grid Search.\n" + self.experiments.append(X) + + self.lab.save() # Write all of the new grid-search experiments to the lab report. + + def suggest_parameters(self): # Start with a good baseline of the default parameters. - if self.lab.experiment_ids[hash(self.lab.default_parameters)].attempts < 7: + if self.lab.experiment_ids[hash(self.lab.default_parameters)].attempts < 5: return self.lab.default_parameters rnd = lambda: random.random() / 100 # Random Tiebreaker - return min(self.experiments, key=lambda x: x.attempts + rnd()).parameters + return min(self.experiments, key=lambda X: X.attempts + rnd()).parameters -class CombineBest: - def addArguments(parser): +class CombineBest(BaseOptimizer): + def add_arguments(parser): parser.add_argument('--combine', type=int, default=0, help='Combine the NUM best experiments.') - def useThisOptimizer(args): + def use_this_optimizer(args): return args.combine def merge(self, lab, ideas): @@ -186,22 +186,16 @@ def merge(self, lab, ideas): continue # Higher scoring experiments take precedence. paths.append(path) values.append(value) - # Create or get the experiment object. - mods = list(zip(paths, values)) - try: - return ExperimentSummary(lab, modifications=mods) - except ValueError: - # ExperimentSummary raises ValueError if it detects duplicate entry - # in the database. - params = deepcopy(lab.default_parameters) - for p, v in mods: - params.apply(p, v) - return lab.experiment_ids[hash(params)] - - def suggestExperiment(self): - suggest = [] # Retval accumulator - - # Ignore all underperforming experiments. + # Create and get the experiment object. + params = ParameterSet(lab.default_parameters) + for p, v in zip(paths, values): + params.apply(p, v) + return lab.get_experiment(params) + + def suggest_parameters(self): + suggest = [] # Return value accumulator + + # Ignore all under-performing experiments. null = self.lab.experiment_ids[hash(self.lab.default_parameters)] ex = [x for x in self.lab.experiments if x.mean() > null.mean()] @@ -215,14 +209,10 @@ def suggestExperiment(self): suggest.extend(trymore) # Suggests combinations for ideas in itertools.combinations(ex, 2): - suggest.append( self.merge(self.lab, ideas) ) - - if False: # Dump the suggestions for debugging - for x in suggest: - for p, v in x.modifications: - print(p , v) - print() - 1/0 + X = self.merge(self.lab, ideas) + if not X.notes.strip(): + X.notes += "Suggested by Combine Best.\n" + suggest.append( X ) rnd = lambda: random.random() / 100 # Random Tiebreaker return min(suggest, key=lambda x: x.attempts + rnd()).parameters diff --git a/py/src/nupic/optimization/parameter_set.py b/py/src/nupic/optimization/parameter_set.py index 57610d70a0..1babe89154 100644 --- a/py/src/nupic/optimization/parameter_set.py +++ b/py/src/nupic/optimization/parameter_set.py @@ -17,7 +17,7 @@ import pprint import hashlib -import number +import copy # TODO: Consider allowing lists, and converting all lists into tuples. @@ -32,6 +32,7 @@ class ParameterSet(dict): """ def __init__(self, data): super().__init__(self) + data = copy.deepcopy( data ) if isinstance(data, str): data = data.strip() try: @@ -42,19 +43,23 @@ def __init__(self, data): self.update(data) def __hash__(self): - string = pprint.pformat(self).encode('utf-8') + string = str(self).encode('utf-8') checksum = hashlib.md5(string).hexdigest() return abs(int(checksum[:8], base=16)) def __eq__(self, other): - assert(isinstance(other, type(self))) if isinstance(self, dict): + assert(isinstance(other, dict)) return all(ParameterSet.__eq__(self[k], other[k]) for k in self) elif isinstance(self, tuple): + assert(isinstance(other, tuple)) return all(ParameterSet.__eq__(X, Y) for X, Y in zip(self, other)) else: return self == other + def __str__(self): + return pprint.pformat(self) + def diff(old, new): """ Returns list of pairs of (path, new-value) """ diffs = [] @@ -84,7 +89,6 @@ def apply(self, modification, value): Modifies this set of parameters! """ assert(isinstance(modification, str)) - assert(isinstance(value, number.Number)) try: access = modification.split(']')[0].strip('[]"\' ') if not access: @@ -106,49 +110,45 @@ def get_types(self): Convert a set of parameters into the data types used to represent them. Returned result has the same structure as the parameters. """ - # Recurse through the parameter data structure. - if isinstance(self, dict): - return {key: ParameterSet.get_types(value) - for key, value in self.items()} - elif isinstance(self, tuple): - return tuple(ParameterSet.get_types(value) for value in self) - # Determine data type of each entry in parameter data structure. - elif isinstance(self, float): - return float - elif isinstance(self, int): - return int - raise TypeError('Unaccepted type in experiment parameters: type "%s".'%(type(self).__name__)) + structure = ParameterSet( self ) + for path in structure.enumerate(): + value = structure.get( path ) + if type(value) not in (float, int): + raise TypeError('Unaccepted type in experiment parameters: type "%s".'%(type(value).__name__)) + structure.apply( path, type(value) ) + return structure - def typecast_parameters(self, structure): - def recursive_typecast_parameters(values, structure): - # Recurse through the parameter data structure. - if isinstance(structure, dict): - for key in structure: - values[key] = recursive_typecast_parameters(values[key], structure[key]) - return values - elif isinstance(structure, tuple): - return tuple(recursive_typecast_parameters(*args) - for args in zip(values, structure)) + def typecast(self, structure): + for path in structure.enumerate(): + type_ = structure.get( path ) + value = float( self.get( path ) ) # Type cast values. - elif structure == float: - value = float(values) - return float(str(value)) - elif structure == int: - return int(round(float(values))) - return recursive_typecast_parameters(self, structure) + if type_ == float: + value = float(str( value )) + elif type_ == int: + value = int(round( value )) + else: + raise TypeError('Unaccepted type in experiment parameters: type "%s".'%(type_.__name__)) + self.apply( path, value ) + return self def enumerate(self): """ - Convert parameters from a recursive structure into a list of parameters. + Convert parameters from a recursive structure into a flat list of strings. Returned parameters are represented as executable strings. + + Use this to iterate through all of the leaves in the structure, which is + where the numbers are stored. """ retval = [] if isinstance(self, dict): for key, value in self.items(): - retval.extend( "['%s']%s"%(key, path) for path in paths(value) ) + subtree = ParameterSet.enumerate( value ) + retval.extend( "['%s']%s"%(key, path) for path in subtree ) elif isinstance(self, tuple): for idx, value in enumerate(self): - retval.extend( "[%d]%s"%(idx, path) for path in paths(value) ) + subtree = ParameterSet.enumerate( value ) + retval.extend( "[%d]%s"%(idx, path) for path in subtree ) else: retval.append('') return sorted(retval) diff --git a/py/src/nupic/optimization/swarming.py b/py/src/nupic/optimization/swarming.py index 57c843a4d4..61d204c261 100644 --- a/py/src/nupic/optimization/swarming.py +++ b/py/src/nupic/optimization/swarming.py @@ -16,227 +16,194 @@ # ------------------------------------------------------------------------------ """ Swarming parameter search """ +# TODO: I'd like to see a big summary of what this is doing, Ideally in the main +# lab report file. It should include the min/mean/std/max of each value, +# velocity, and score across the swarm. + +# TODO: Add notes to experiment summaries that they were created by swarming. + # TODO: Make CLI Arguments for these global constants: particle_strength, global_strength, velocity_strength particle_strength = .25 global_strength = .50 velocity_strength = .95 assert(velocity_strength + particle_strength / 2 + global_strength / 2 >= 1) -import argparse import sys import os import random -import pprint +import pickle from nupic.optimization.parameter_set import ParameterSet from nupic.optimization.optimizers import BaseOptimizer -class ParticleSwarmOptimizations(BaseOptimizer): - def addArguments(argparser): - argparser.add_argument('--swarming', type=int, default=0, - help='Particle Swarm Optimization.') - - def useThisOptimizer(args): - return args.swarming +class ParticleData: + """ + Attributes: + p.parameters - ParameterSet + p.velocities - ParameterSet full of float + p.best - ParameterSet + p.score - float + p.age - Number of times this particle has been evaluated/updated. + """ + def __init__(self, default_parameters): + self.parameters = ParameterSet( default_parameters ) + self.best = None + self.best_score = None + self.age = 0 + self.initialize_velocities() + + def initialize_velocities(self): + # Make a new parameter structure for the velocity data. + self.velocities = ParameterSet( self.parameters ) + # Iterate through every field in the structure. + for path in self.parameters.enumerate(): + value = self.parameters.get(path) + max_percent_change = 10 + uniform = 2 * random.random() - 1 + if isinstance(value, float): + velocity = value * uniform * (max_percent_change / 100.) + elif isinstance(value, int): + if abs(value) < 10: + velocity = uniform + else: + velocity = value * uniform * (max_percent_change / 100.) + else: + raise NotImplementedError() + self.velocities.apply( path, velocity ) + + def update_position(self): + for path in self.parameters.enumerate(): + position = self.parameters.get( path ) + velocity = self.velocities.get( path ) + self.parameters.apply( path, position + velocity ) + + def update_velocity(self, global_best): + for path in self.parameters.enumerate(): + postition = self.parameters.get( path ) + velocity = self.velocities.get( path ) + particle_best = self.best.get( path ) if self.best is not None else postition + global_best_x = global_best.get( path ) if global_best is not None else postition + + # Update velocity. + particle_bias = (particle_best - postition) * particle_strength * random.random() + global_bias = (global_best_x - postition) * global_strength * random.random() + velocity = velocity * velocity_strength + particle_bias + global_bias + self.velocities.apply( path, velocity ) + + def update(self, score, global_best): + self.age += 1 + self.update_position() + self.update_velocity( global_best ) + if self.best is None or score > self.best_score: + self.best = self.parameters + self.best_score = score + print("New particle best score %g."%self.best_score) + + +class ParticleSwarmOptimization(BaseOptimizer): + """ + Attributes: + pso.lab - Laboratory + pso.particles - Number of particles to use. + pso.next_particle - Index into pso.swarm + pso.swarm_path - Data File for this particle swarm. + pso.swarm - List of ParticleData + pso.best - ParameterSet + pso.best_score - float + """ + def add_arguments(parser): + parser.add_argument('--swarming', type=int, + help='Particle Swarm Optimization, number of particles to use.') + + parser.add_argument('--clear_scores', action='store_true', + help=('Remove all scores from the particle swarm so that the ' + 'experiment can be safely altered.')) + + def use_this_optimizer(args): + return args.swarming or args.clear_scores def __init__(self, lab, args): - assert(args.particles >= args.processes) - + self.swarm_path = os.path.join( lab.ae_directory, 'particle_swarm.pickle' ) + if args.clear_scores: + self.clear_scores() + sys.exit() # Setup the particle swarm. - self.particles = args.particles - self.next_particle = random.randrange(args.particles) - self.swarm_path = os.path.join(lab.ae_directory, 'swarm') + self.lab = lab + self.swarm = [] + self.particles = args.swarming + self.next_particle = random.randrange( self.particles ) + self.best = None + self.best_score = None + assert( self.particles >= args.processes ) + # Try loading an existing particle swarm. try: - with open(self.swarm_path, 'r') as swarm_file: - swarm_raw = swarm_file.read() + self.load() + if self.particles != len(self.swarm): + print("Warning: requested number of particles does not match number stored on file.") except FileNotFoundError: - # Initialize a new particle swarm. - self.swarm_data = {} - for particle in range(self.particles): - if particle in [0, 1, 2]: - # Evaluate the default parameters a few times, before branching out - # to the more experimential stuff. Several evals are needed since - # these defaults may have their random velocity applied. - value = lab.default_parameters - else: - value = ParameterSet( initial_parameters(lab.default_parameters)) - self.swarm_data[particle] = { - 'value': value, - 'velocity': initial_velocity(lab.default_parameters), - 'best': value, - 'best_score': None, - 'hash': hash(value), - } - self.swarm_data['best'] = random.choice(list(self.swarm_data.values()))['best'] - self.swarm_data['best_score'] = None - self.swarm_data['evals'] = 0 - else: - # Load an existing particle swarm. - try: - self.swarm_data = eval(swarm_raw) - except SyntaxError: - while True: - print("Corrupted particle swarm data file. [B]ackup, [O]verwrite, or [EXIT]?") - choice = input().upper() - if choice == 'B': - backup_path = self.swarm_path + ".backup" - os.rename(self.swarm_path, backup_path) - print("BACKUP PATH: %s"%backup_path) - self.swarm_data = initialize_particle_swarm(lab.default_parameters, self.particles) - break - elif choice == 'O': - self.swarm_data = initialize_particle_swarm(lab.default_parameters, self.particles) - break - elif choice in 'EXITQ': - print("EXIT") - sys.exit() - else: - print('Invalid input "%s".'%choice) - - if self.particles != sum(isinstance(key, int) for key in self.swarm_data): - print("Warning: argument 'particles' does not match number of particles stored on file.") - - def suggestExperiment(self, lab): - # Run the particle swarm optimization. - particle_data = self.swarm_data[self.next_particle] + pass + # Add new particles as necessary. + while len(self.swarm) < self.particles: + if self.best is not None: + new_particle = ParticleData( self.best ) + else: + new_particle = ParticleData( self.lab.default_parameters ) + self.swarm.append( new_particle ) + # Evaluate the default parameters a few times, before branching out + # to the more experimental stuff. + if( len(self.swarm) > 3 ): + new_particle.update_position() + new_particle.update_position() + + def suggest_parameters(self): + particle_data = self.swarm[self.next_particle] self.next_particle = (self.next_particle + 1) % self.particles - - # Update the particles velocity. - particle_data['velocity'] = update_particle_velocity( - particle_data['value'], - particle_data['velocity'], - particle_data['best'], - self.swarm_data['best'],) - - # Update the particles postition. - particle_data['value'] = update_particle_position( - particle_data['value'], - particle_data['velocity']) - - # Evaluate the particle. - promise = pool.apply_async(evaluate_particle, (particle_data,)) - - return parameters - - def collectResults(self, experiment, results): - particle_data = self.swarm_data[particle_number] - try: - score = promise.get() - except (ValueError, MemoryError, ZeroDivisionError, AssertionError) as err: - print("") - print("Particle Number %d"%particle_number) - pprint.pprint(particle_data['value']) - print("%s:"%(type(err).__name__), err) - print("") - # Replace this particle. - particle_data['velocity'] = initial_velocity(default_parameters) - if particle_data['best_score'] is not None: - particle_data['value'] = particle_data['best'] - elif self.swarm_data['best_score'] is not None: - particle_data['value'] = self.swarm_data['best'] + particle_data.parameters.typecast( self.lab.structure ) + return particle_data.parameters + + def collect_results(self, parameters, score): + for particle in self.swarm: + if particle.parameters == parameters: + break + + if isinstance(score, Exception): + # Program crashed, replace this particle. + if particle.best is not None: + particle.parameters = ParameterSet( particle.best ) + elif self.best is not None: + particle.parameters = ParameterSet( self.best ) else: - particle_data['value'] = initial_parameters(default_parameters) - except Exception: - print("") - pprint.pprint(particle_data['value']) - raise - - # Update best scoring particles. - if particle_data['best_score'] is None or score > particle_data['best_score']: - particle_data['best'] = particle_data['value'] - particle_data['best_score'] = score - print("New particle (%d) best score %g"%(particle_number, particle_data['best_score'])) - if self.swarm_data['best_score'] is None or score > self.swarm_data['best_score']: - self.swarm_data['best'] = typecast_parameters(particle_data['best'], parameter_structure) - self.swarm_data['best_score'] = particle_data['best_score'] - self.swarm_data['best_particle'] = particle_number - print("New global best score %g"%self.swarm_data['best_score']) - - # Save the swarm to file. - self.swarm_data['evals'] += 1 - with open(swarm_path, 'w') as swarm_file: - print('# ' + ' '.join(sys.argv), file=swarm_file) # TODO: Get this from lab-report object. - pprint.pprint(self.swarm_data, stream = swarm_file) - - -def initial_parameters(default_parameters): - # Recurse through the parameter data structure. - if isinstance(default_parameters, dict): - return {key: initial_parameters(value) - for key, value in default_parameters.items()} - elif isinstance(default_parameters, tuple): - return tuple(initial_parameters(value) for value in default_parameters) - # Calculate good initial values. - elif isinstance(default_parameters, float): - return default_parameters * 1.25 ** (random.random()*2-1) - elif isinstance(default_parameters, int): - if abs(default_parameters) < 10: - return default_parameters + random.choice([-1, 0, +1]) + particle.parameters = ParameterSet( self.lab.default_parameters) + particle.initialize_velocities() + particle.update_position() else: - initial_value_float = initial_parameters(float(default_parameters)) - return int(round(initial_value_float)) - -def initial_velocity(default_parameters): - # Recurse through the parameter data structure. - if isinstance(default_parameters, dict): - return {key: initial_velocity(value) - for key, value in default_parameters.items()} - elif isinstance(default_parameters, tuple): - return tuple(initial_velocity(value) for value in default_parameters) - # Calculate good initial velocities. - elif isinstance(default_parameters, float): - max_percent_change = 10 - uniform = 2 * random.random() - 1 - return default_parameters * uniform * (max_percent_change / 100.) - elif isinstance(default_parameters, int): - if abs(default_parameters) < 10: - uniform = 2 * random.random() - 1 - return uniform + # Update with results of this particles evaluation. + particle.update( score, self.best ) + if self.best is None or score > self.best_score: + self.best = particle.parameters + self.best_score = score + print("New global best score %g."%score) + self.save() + + def save(self): + data = (self.swarm, self.best, self.best_score) + with open(self.swarm_path, 'wb') as file: + pickle.dump(data, file) + + def load(self): + with open(self.swarm_path, 'rb') as file: + data = pickle.load( file ) + self.swarm, self.best, self.best_score = data + + def clear_scores(self): + try: + self.load() + except FileNotFoundError: + print("Particle Swarm not initialized, nothing to do.") else: - return initial_velocity(float(default_parameters)) - -def update_particle_position(position, velocity): - # Recurse through the parameter data structure. - if isinstance(position, dict): - return {key: update_particle_position(value, velocity[key]) - for key, value in position.items()} - elif isinstance(position, tuple): - return tuple(update_particle_position(value, velocity[index]) - for index, value in enumerate(position)) - else: - return position + velocity - -def update_particle_velocity(postition, velocity, particle_best, global_best): - # Recurse through the parameter data structure. - if isinstance(postition, dict): - return {key: update_particle_velocity( - postition[key], - velocity[key], - particle_best[key], - global_best[key]) - for key in postition.keys()} - elif isinstance(postition, tuple): - return tuple(update_particle_velocity( - postition[index], - velocity[index], - particle_best[index], - global_best[index]) - for index, value in enumerate(postition)) - else: - # Update velocity. - particle_bias = (particle_best - postition) * particle_strength * random.random() - global_bias = (global_best - postition) * global_strength * random.random() - return velocity * velocity_strength + particle_bias + global_bias - - -if __name__ == '__main__': - arg_parser = argparse.ArgumentParser() - - if args.clear_scores: - print("Removing Scores from Particle Swarm File %s."%swarm_path) - swarm_data['best_score'] = None - for entry in swarm_data: - if isinstance(entry, int): - swarm_data[entry]['best_score'] = None - with open(swarm_path, 'w') as swarm_file: - pprint.pprint(swarm_data, stream = swarm_file) + self.best_score = float('-inf') + for entry in self.swarm: + entry.best_score = float('-inf') + self.save() + print("Removed scores from Particle Swarm.") + From 3d09a1ad9cea396d65880138a57050e8e84b96da Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Fri, 17 May 2019 11:47:47 -0400 Subject: [PATCH 7/9] MNIST python example, used AE program to improve score, 92->94% --- py/src/nupic/examples/mnist.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/py/src/nupic/examples/mnist.py b/py/src/nupic/examples/mnist.py index 0b0ec7c529..8672868f42 100644 --- a/py/src/nupic/examples/mnist.py +++ b/py/src/nupic/examples/mnist.py @@ -87,16 +87,16 @@ def encode(self, image): default_parameters = { - "columnDimensions": (10*1000,), - "potentialPct": .5, - "localAreaDensity": .015, - "stimulusThreshold": 6, - "synPermInactiveDec": 0.005, - "synPermActiveInc": 0.01, - "synPermConnected": 0.422, - "minPctOverlapDutyCycle": 0.001, - "dutyCyclePeriod": 1402, - "boostStrength": 2.5, + 'boostStrength': 2.5, + 'columnDimensions': (16000,), + 'dutyCyclePeriod': 1402, + 'localAreaDensity': 0.024, + 'minPctOverlapDutyCycle': 0.001, + 'potentialPct': 0.2, + 'stimulusThreshold': 6, + 'synPermActiveInc': 0.01, + 'synPermConnected': 0.422, + 'synPermInactiveDec': 0.005 } @@ -130,7 +130,7 @@ def main(parameters=default_parameters, argv=None, verbose=True): minPctOverlapDutyCycle = parameters['minPctOverlapDutyCycle'], dutyCyclePeriod = int(round(parameters['dutyCyclePeriod'])), boostStrength = parameters['boostStrength'], - seed = 42, + seed = 0, spVerbosity = 99, wrapAround = False) columns = SDR( sp.getColumnDimensions() ) From 7c3df28edd59774c51ff00b22ee0be467d90af4b Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Sat, 18 May 2019 01:24:07 -0400 Subject: [PATCH 8/9] AE: Documentation --- py/src/nupic/examples/mnist.py | 20 ++-- py/src/nupic/optimization/ae.py | 112 ++++++++++++--------- py/src/nupic/optimization/optimizers.py | 44 ++++++-- py/src/nupic/optimization/parameter_set.py | 4 + 4 files changed, 109 insertions(+), 71 deletions(-) diff --git a/py/src/nupic/examples/mnist.py b/py/src/nupic/examples/mnist.py index 8672868f42..dad6a20154 100644 --- a/py/src/nupic/examples/mnist.py +++ b/py/src/nupic/examples/mnist.py @@ -87,16 +87,16 @@ def encode(self, image): default_parameters = { - 'boostStrength': 2.5, - 'columnDimensions': (16000,), - 'dutyCyclePeriod': 1402, - 'localAreaDensity': 0.024, - 'minPctOverlapDutyCycle': 0.001, - 'potentialPct': 0.2, - 'stimulusThreshold': 6, - 'synPermActiveInc': 0.01, - 'synPermConnected': 0.422, - 'synPermInactiveDec': 0.005 + 'boostStrength': 7.80643753517375, + 'columnDimensions': (35415,), + 'dutyCyclePeriod': 1321, + 'localAreaDensity': 0.05361688506086096, + 'minPctOverlapDutyCycle': 0.0016316043362658, + 'potentialPct': 0.06799785776775163, + 'stimulusThreshold': 8, + 'synPermActiveInc': 0.01455789388651146, + 'synPermConnected': 0.021649964738697944, + 'synPermInactiveDec': 0.006442691852205935 } diff --git a/py/src/nupic/optimization/ae.py b/py/src/nupic/optimization/ae.py index 684cd57380..74309b76f8 100644 --- a/py/src/nupic/optimization/ae.py +++ b/py/src/nupic/optimization/ae.py @@ -15,34 +15,29 @@ # this program. If not, see http://www.gnu.org/licenses. # ------------------------------------------------------------------------------ """ -Automatic Experimenter - -This is a framework for parameter optimization. - * It methodically records the results of different sets of parameters and -analyses the results. It then automatically suggests and evaluates -modifications to the parameters. - * It exposes a convenient API for users to hook their program into this. - * The framework allows for testing each set of parameters several times and -calculates the average and standard deviation of the results. It also -calculates the confidence that a parameter change caused the score to change. - * It is extensible: new methods for automated parameter optimization can be -added. Currently this implements a basic grid search strategy. In the future I -hope to add a particle swarming method. - -To use this module, structure experiments as follows: - ExperimentModule is a python3 module containing the model to be optimized as - well as code to evaluate model performance. +The Automatic Experimenter + +This is a framework for parameter optimization. Key features include: + * An API for users to hook their programs/experiments into this framework. + * An API for adding optimization methods, and several good methods included. + * Records everything, and helps manage the data. + +Structure your program as follows: + ExperimentModule is a python module containing the model to be optimized as + well as code to evaluate the models performance. ExperimentModule.default_parameters = {} - This global dictionary contains all of the parameters to modify. + Global dictionary containing all of the parameters to modify. Parameters must be one of the following types: dict, tuple, float, int. Parameters can be nested in multiple levels of dictionaries and tuples. - The outer most layer of parameters must be a dict. + For more details see nupic.optimization.parameter_set ExperimentModule.main(parameters=default_parameters, argv=None, verbose=True) Returns (float) performance of parameters, to be maximized. + For example, see file: py/src/nupic/examples/mnist.py -Usage: $ ae.py [ae-arguments] ExperimentModule.py [experiment-arguments] +Run your experiment with the AE program: +$ python3 -m nupic.optimization.ae [ae-arguments] ExperimentModule.py [experiment-arguments] The outputs and data of this program are kept in a directory named after the experiment which generated it. If the experiment is "foo/bar.py" then AE's @@ -51,11 +46,15 @@ report format is: 1) Introduction. This text is not parsed, it is preserved so you can keep notes -here This area is initialized with hopefully useful information, including -the experiment name, timestamps. +here. This area is initialized with hopefully useful information, including the +experiment name, timestamps. Do not modify the lab report file while AE is +running! -2) Methods. This section contains the default parameters and the command line -invocation. +2) Methods & Analysis. These sections contain summaries and useful information, +including: + * The default parameters and command line invocation, + * A leader board of the best experiments, + * A list of all parameters which crashed the program. 3) Summary of each experiment. Each experiments summary contains the following information: @@ -65,8 +64,7 @@ lab report in this way. 2) MD5 Checksum of Parameters and Command Line. This hash checksum is used to uniquely identify an experimental setup, it's the name of the - experiment. These hashes are used in filenames and searching for a hash - finds all references to it. + experiment. These hashes are used in filenames and are searchable. 3) File Path of Experiment Journal 4) Number of Attempted Runs 5) Score of each Completed Run @@ -140,14 +138,15 @@ class Experiment: """ + An experiment represents a unique ParameterSet. + Attributes: - lab - circular reference to Laboratory instance - attempts - - scores - - notes - - journal - - parameters - - modifications - + parameters - ParameterSet + lab - Circular reference to Laboratory instance. + attempts - Number of times attempted to evaluate. + scores - List of float + notes - string + journal - File path to log file for this experiment. """ def __init__(self, lab, string=None, @@ -160,7 +159,7 @@ def __init__(self, lab, self.notes = ' ' # Load or create this experiment's data. if string is not None: - self.parse( string ) + self._parse( string ) elif modifications is not None: self.parameters = ParameterSet( self.lab.default_parameters ) for path, value in modifications: @@ -192,7 +191,7 @@ def __init__(self, lab, file.write('Hash: %X\n'%hash(self)) file.write('Command Line Invocation: $ ' + ' '.join(self.lab.argv) + '\n') - def parse(self, string): + def _parse(self, string): # Reconstruct the parameters. self.modifications = [] if "Modification:" in string: @@ -241,6 +240,7 @@ def significance(self): return pval def mean(self): + """ Returns the average score. """ return np.mean(self.scores) if self.scores else float('-inf') # TODO: Consider showing min & max scores. @@ -271,8 +271,10 @@ def __hash__(self): class Laboratory: """ + Main class of the AE program. + Attributes: - lab.module - Experiment python module + lab.module - Users Experiment python module lab.name - Name of experiment module lab.path - Directory containing experiment module lab.structure - Types of parameters @@ -295,7 +297,7 @@ def __init__(self, experiment_argv, method=None, tag='', verbose=False): self.method = method self.tag = tag self.verbose = verbose - self.load_experiment_module(experiment_argv[0]) + self._load_experiment_module(experiment_argv[0]) self.ae_directory = os.path.join(self.path, self.name) + self.default_extension if self.tag: self.ae_directory = self.ae_directory + '_' + self.tag @@ -305,7 +307,7 @@ def __init__(self, experiment_argv, method=None, tag='', verbose=False): if os.path.isdir(self.ae_directory): with open(self.lab_report, 'r') as file: report = file.read() - self.parse(report) + self._parse(report) else: # Initialize the Lab Reports attributes and write the skeleton of it # to file. @@ -318,13 +320,17 @@ def __init__(self, experiment_argv, method=None, tag='', verbose=False): pass def init_header(self): + """ + Sets attribute lab.header containing the initial text in the Notes + section at the top of the lab-report. + """ self.header = str(self.name) if self.tag: self.header += ' - ' + self.tag self.header += ' - Automatic Experiments\n' self.header += time.asctime( time.localtime(time.time()) ) + '\n' - def load_experiment_module(self, experiment_module): + def _load_experiment_module(self, experiment_module): """ Argument experiment_module is command line argument 0, specifying the file path to the experiment module. @@ -341,7 +347,7 @@ def load_experiment_module(self, experiment_module): self.default_parameters = ParameterSet(self.module.default_parameters) self.structure = self.default_parameters.get_types() - def parse(self, report): + def _parse(self, report): if not report.strip(): raise ValueError("Empty lab report file!") sections = report.split(self.section_divider) @@ -373,6 +379,10 @@ def parse(self, report): [Experiment(self, s) for s in experiment_sections if s.strip()] def get_experiment(self, parameters): + """ + Returns Experiment instance for the given parameters. If one does not + already exist for these parameter then it is created. + """ p = ParameterSet( parameters ).typecast( self.structure ) h = hash(p) if h in self.experiment_ids: @@ -423,8 +433,7 @@ def save(self): def run(self, processes, time_limit = None, memory_limit = None,): - """ - """ + """ Main loop of the AE program. """ pool = multiprocessing.Pool(processes, maxtasksperchild=1) async_results = [] # Contains pairs of (Promise, Experiment) while True: @@ -444,7 +453,7 @@ def run(self, processes, if self.verbose: print("Evaluating %X"%hash(X)) promise = pool.apply_async( - Experiment_evaluate_parameters, + _Experiment_evaluate_parameters, args = (self.argv, self.tag, self.verbose, X.parameters,), kwds = {'time_limit' : time_limit, 'memory_limit' : memory_limit,},) @@ -456,7 +465,8 @@ def evaluate_parameters(self, parameters, time_limit = None, memory_limit = None,): """ - This function executes in a child processes. + Run the users program/experiment with the given parameters. + This function should execute in a child processes. """ # Redirect stdout & stderr to a temporary file. journal = tempfile.NamedTemporaryFile( @@ -506,6 +516,7 @@ def evaluate_parameters(self, parameters, return exec_globals['score'], journal.name def collect_results(self, experiment, async_promise): + """ Deals with the aftermath & bookkeeping of running an experiment. """ try: score, run_journal = async_promise.get() except (ValueError, MemoryError, ZeroDivisionError, AssertionError, RuntimeError) as err: @@ -540,7 +551,7 @@ def collect_results(self, experiment, async_promise): self.method.collect_results(experiment.parameters, score) self.save() # Write the updated Lab Report to file. -def Experiment_evaluate_parameters(*args, **kwds): +def _Experiment_evaluate_parameters(*args, **kwds): """ Global wrapper for Laboratory.evaluate_parameters which is safe for multiprocessing. @@ -575,20 +586,21 @@ def _timeout_callback(signum, frame): import nupic.optimization.optimizers as optimizers from nupic.optimization.swarming import ParticleSwarmOptimization - actions = [ + all_optimizers = [ optimizers.EvaluateDefaultParameters, optimizers.EvaluateAllExperiments, optimizers.EvaluateBestExperiment, optimizers.EvaluateHashes, optimizers.GridSearch, optimizers.CombineBest, - ParticleSwarmOptimization] - assert( all( issubclass(Z, optimizers.BaseOptimizer) for Z in actions)) - for method in actions: + ParticleSwarmOptimization, + ] + assert( all( issubclass(Z, optimizers.BaseOptimizer) for Z in all_optimizers)) + for method in all_optimizers: method.add_arguments(parser) args = parser.parse_args() - selected_method = [X for X in actions if X.use_this_optimizer(args)] + selected_method = [X for X in all_optimizers if X.use_this_optimizer(args)] ae = Laboratory(args.experiment, tag = args.tag, diff --git a/py/src/nupic/optimization/optimizers.py b/py/src/nupic/optimization/optimizers.py index 30b301d5f8..f7a16a225d 100644 --- a/py/src/nupic/optimization/optimizers.py +++ b/py/src/nupic/optimization/optimizers.py @@ -22,36 +22,58 @@ class BaseOptimizer: """ - TODO + Optimizer classes control what parameters to try. This class defines the + API which they must implement. """ + @classmethod def add_arguments(parser): """ - TODO + Argument parser is an instance of ArgumentParser, from the standard + library argparse. Optimizer classes should add their command line + arguments to this. """ pass - def use_this_optimizer(args): + @classmethod + def use_this_optimizer(arguments): """ - TODO + Argument is the parsed arguments, result of add_arguments and the users + command line. + + Returns bool, if the user has requested to use this optimizer via + command line arguments. """ return False - def __init__(self, laboratory, args): + def __init__(self, laboratory, arguments): """ - TODO + Argument laboratory is the main class of the framework. + See class nupic.optimization.ae.Laboratory + + Argument arguments is the parsed arguments, result of add_arguments and + the users command line. """ self.lab = laboratory - self.args = args + self.args = arguments - def suggest_parameters(self): # TODO Rename this to suggest_parameters! + def suggest_parameters(self): """ - TODO + Returns instance of ParameterSet, to be evaluated. The parameters will + be type cast before being passed to the main function of the users + program/experiment. """ raise NotImplementedError("BaseOptimizer.suggest_parameters") def collect_results(self, parameters, result): """ - TODO + Argument parameters was returned by suggest_parameters, and has now been + evaluated. + + Argument results is either a float or an exception. + If results is a float, then it is the score to be maximized. + If results is an Exception, then it was raised by the experiment. + + This method is optional, optimizers do not need to implement this. """ pass @@ -128,7 +150,7 @@ class GridSearch(BaseOptimizer): def add_arguments(parser): parser.add_argument('--grid_search', type=str, - help="TODO: CLI argument help for GridSearch") + help="Grid Search, parameter to search, use \"\" for all.") def use_this_optimizer(args): return args.grid_search is not None diff --git a/py/src/nupic/optimization/parameter_set.py b/py/src/nupic/optimization/parameter_set.py index 1babe89154..de2342f3ea 100644 --- a/py/src/nupic/optimization/parameter_set.py +++ b/py/src/nupic/optimization/parameter_set.py @@ -78,6 +78,10 @@ def diff(old, new): return diffs def get(self, path): + """ + Gets a value stored in the ParameterSet. + Argument path is an executable string description of which parameter[s] to retrieve. + """ assert(isinstance(path, str)) try: return eval('self' + path) From 1119123e9291e584d2bea6eae85b3566a923a7c3 Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Sun, 19 May 2019 14:27:26 -0400 Subject: [PATCH 9/9] AE Improvements, replaced multiprocessing.pool w/ subprocesses The process pool that comes with python standard library does not always work very well, and does not give the caller enough control to fix problems with it. --- py/src/nupic/optimization/ae.py | 309 +++++++++++---------- py/src/nupic/optimization/parameter_set.py | 2 + 2 files changed, 158 insertions(+), 153 deletions(-) diff --git a/py/src/nupic/optimization/ae.py b/py/src/nupic/optimization/ae.py index 74309b76f8..aa33ed7aae 100644 --- a/py/src/nupic/optimization/ae.py +++ b/py/src/nupic/optimization/ae.py @@ -77,20 +77,18 @@ reproduce the experiment, followed by a section for every run of this experiment. The section for each run contains the output (std-out & std-err) of the program, as well as diagnostic information such as timestamps and memory -usage reports. Files with the extension ".log" are temporary files for in- -progress experiment, and when the experiment finishes running they are copied to -their journal and then the ".log" file is deleted. +usage reports. """ # TODO: Default parameters need better handling... When they change, update -# all of the modifications to be diffs of the current parameters? +# all of the modifications to be diffs from the new parameters? # TODO: Maybe the command line invocation should be included in the experiment # hash? Then I could experiment with the CLI args within a single lab report. # TODO: Every run should track elapsed time and report the average in the # experiment journal & summary. Some of these experiments clearly take longer -# than others but its not recorded & displayed. +# than others but its not displayed in an easy to find way. # TODO: Log files should report memory usage ... @@ -99,15 +97,6 @@ # TODO: Failed experiments should have its own section in the Laboratory. Maybe # organize them by the exception type & string? -# TODO: Consider renaming *log files to *tmp for clarity. - -# TODO: Make the leader board base the P-Values off of the best experiment, and -# always keep the default parameters on the board. - -# TODO: Do not lose the log file when worker process crashes! With this fixed I -# won't need to explain what the temp files are for, the user shouldn't need to -# know about them... - # TODO: How hard would it be to allow the user to edit the lab report while the # program is running? Read timestamps to detect user writes. All of the latest # data is loaded in the program, so it should be simple to load in the new @@ -123,13 +112,12 @@ import sys import shutil import random -import pprint import time import datetime import tempfile -import multiprocessing +import threading +from multiprocessing import Process, Pipe import resource -import signal # TODO: X-Plat issue: Replace signal with threading.timer? import re import numpy as np import scipy.stats @@ -139,6 +127,7 @@ class Experiment: """ An experiment represents a unique ParameterSet. + This class primarily deals with bookkeeping. Attributes: parameters - ParameterSet @@ -192,12 +181,13 @@ def __init__(self, lab, file.write('Command Line Invocation: $ ' + ' '.join(self.lab.argv) + '\n') def _parse(self, string): + if "Notes:" in string: + string, _, self.notes = string.partition('Notes:') # Reconstruct the parameters. self.modifications = [] - if "Modification:" in string: - for change in re.findall("Modification:(.*)", string): - path, eq, value = change.partition('=') - self.modifications.append((path.strip(), value.strip())) + for change in re.findall(r"^[Mm]od.*:(.*)$", string, re.MULTILINE): + path, eq, value = change.partition('=') + self.modifications.append((path, value)) self.parameters = ParameterSet(self.lab.default_parameters) for path, value in self.modifications: self.parameters.apply(path, value) @@ -214,26 +204,25 @@ def _parse(self, string): self.scores = re.search("Scores: (.*)", string).groups()[0].strip() self.scores = [float(s.strip()) for s in self.scores.split(',') if s.strip()] assert( len(self.scores) <= self.attempts ) # Attempts may fail and not return a score. - if "Notes:" in string: - self.notes = string.partition('Notes:')[2] - # TODO: This should accept the baseline to compare against, and then have - # the defaults argument as the default baseline. - def significance(self): + def significance(self, baseline=None): """ - Returns the P-Value of the Null-Hypothesis test (these parameters - against the default parameters) + Returns the P-Value of the Null-Hypothesis test, the probability that + this experiment and the given experiment have the same distribution of + scores, meaning that the change in scores is merely by chance. + + Argument baseline is an Experiment, optional defaults to default_parameters """ - try: - null_experiment = self.lab.experiment_ids[hash(self.lab.default_parameters)] - except KeyError: - return float('nan') - if not self.scores or not null_experiment.scores: + if baseline is None: + baseline = self.lab.default_parameters + baseline = self.lab.get_experiment( baseline ) + + if not self.scores or not baseline.scores: return float('nan') if len(self.scores) == 1: pass # TODO: How to pass probabilities & statistics? stat, pval = scipy.stats.ttest_ind( - null_experiment.scores, self.scores, axis=None, + baseline.scores, self.scores, axis=None, # Since both samples come from the same experimental setup they # should have the same variance. equal_var=True,) @@ -243,8 +232,6 @@ def mean(self): """ Returns the average score. """ return np.mean(self.scores) if self.scores else float('-inf') - # TODO: Consider showing min & max scores. - # TODO: Don't show scores & P-Value if attempts == 0. def __str__(self): s = '' if not self.modifications: @@ -254,12 +241,12 @@ def __str__(self): s += 'Hash: %X\n'%hash(self) s += 'Journal: %s\n'%self.journal s += 'Attempts: %d\n'%self.attempts - s += 'Scores: %s\n'%', '.join(str(s) for s in self.scores) if self.scores: + s += 'Scores: %s\n'%', '.join(str(s) for s in sorted(self.scores)) mean = np.mean(self.scores) std = np.std(self.scores) s += 'Mean & Standard Deviation: %g & %g\n'%(mean, std) - s += 'P-Value: %g\n'%self.significance() + s += 'P-Value: %g\n'%self.significance() s += 'Notes:' + self.notes return s @@ -383,6 +370,9 @@ def get_experiment(self, parameters): Returns Experiment instance for the given parameters. If one does not already exist for these parameter then it is created. """ + if isinstance( parameters, Experiment ): + return parameters + p = ParameterSet( parameters ).typecast( self.structure ) h = hash(p) if h in self.experiment_ids: @@ -392,12 +382,16 @@ def get_experiment(self, parameters): def significant_experiments_table(self): """ Returns string """ - ex = sorted(self.experiments, key = lambda x: -x.mean()) + ex = sorted(self.experiments, key = lambda x: (-x.mean(), -x.attempts)) ex = ex[:20] + # Always keep the default parameters on the leader board. + if self.default_parameters not in (X.parameters for X in ex): + ex.pop() + ex.append( self.get_experiment( self.default_parameters)) s = ' Hash | N | Score | P-Value | Modifications\n' fmt = '%8X | %3d | % 10g | % 9.3g | ' for x in ex: - s += fmt%(hash(x), len(x.scores), x.mean(), x.significance()) + s += fmt%(hash(x), len(x.scores), x.mean(), x.significance(ex[0])) if not x.modifications: s += 'Default Parameters\n' else: @@ -413,7 +407,7 @@ def __str__(self): s = self.header s += self.section_divider s += 'Default Parameter Values = \n' - s += pprint.pformat(self.default_parameters) + s += str(self.default_parameters) s += '\n$ ' + ' '.join(self.argv) s += self.section_divider s += self.significant_experiments_table().rstrip() @@ -434,134 +428,143 @@ def run(self, processes, time_limit = None, memory_limit = None,): """ Main loop of the AE program. """ - pool = multiprocessing.Pool(processes, maxtasksperchild=1) - async_results = [] # Contains pairs of (Promise, Experiment) + pool = [] while True: - # Check for jobs which have finished - run_slot = 0 - while run_slot < len(async_results): - promise, value = async_results[run_slot] - if promise.ready(): - # Experiment run has finished, deal with the results. - self.collect_results(value, promise) - async_results.pop(run_slot) - else: - run_slot += 1 # Start running new experiments - while len(async_results) < processes: + while len(pool) < processes: X = self.get_experiment( self.method.suggest_parameters() ) - if self.verbose: - print("Evaluating %X"%hash(X)) - promise = pool.apply_async( - _Experiment_evaluate_parameters, - args = (self.argv, self.tag, self.verbose, X.parameters,), - kwds = {'time_limit' : time_limit, - 'memory_limit' : memory_limit,},) - async_results.append((promise, X)) - # Wait for experiments to complete + trial = Worker(self, X.parameters, time_limit, memory_limit) + trial.start() + pool.append(trial) + + # Wait for experiments to complete. time.sleep(2) - def evaluate_parameters(self, parameters, - time_limit = None, - memory_limit = None,): - """ - Run the users program/experiment with the given parameters. - This function should execute in a child processes. - """ - # Redirect stdout & stderr to a temporary file. - journal = tempfile.NamedTemporaryFile( + # Check for jobs which have finished. + for idx in range(len(pool)-1, -1, -1): + if not pool[idx].is_alive(): + trial = pool.pop( idx ) + X = self.get_experiment( trial.parameters ) + trial.collect_journal( X ) + trial.collect_score( X ) + # Notify the parameter optimization method that the + # parameters which it suggested have finished evaluating. + self.method.collect_results( X.parameters, trial.score ) + self.save() # Write the updated Lab Report to file. + + +class Worker(Process): + """ + This class runs a single trial of an experiment. + Each trial is run in a subprocess. + """ + def __init__(self, lab, parameters, time_limit, memory_limit): + Process.__init__(self) + self.parameters = parameters + self.time_limit = time_limit + self.memory_limit = memory_limit + self.journal = tempfile.NamedTemporaryFile( mode = 'w+t', delete = False, buffering = 1, - dir = self.ae_directory, + dir = lab.ae_directory, prefix = "%X_"%hash(parameters), - suffix = ".log",) - stdout, stderr = sys.stdout, sys.stderr - sys.stdout = journal - sys.stderr = journal + suffix = ".tmp",).name + # Make pipe to return outputs/results from worker back to main AE process. + self.output, self.input = Pipe() + # Worker will execute this string. + self.exec_str = (lab.module_reload + + 'score = %s.main(parameters=%s, argv=[%s], verbose=%s)'%( + lab.name, + repr(parameters), + ', '.join(repr(arg) for arg in lab.argv[1:]), + str(lab.verbose))) + + def start(self): + Process.start(self) + # Setup time limit, arm the watchdog timer. + if self.time_limit is not None: + def watchdog(): + if self.is_alive(): + self.terminate() + threading.Timer( self.time_limit * 60, watchdog ).start() + + def run(self): + # Redirect stdout & stderr to the temporary log file. + sys.stdout = open(self.journal, 'a', buffering=1) + sys.stderr = open(self.journal, 'a', buffering=1) start_time = time.time() - journal.write("Started: " + time.asctime( time.localtime(start_time) ) + '\n') + print("Started: " + time.asctime( time.localtime(start_time) ) + '\n') # Setup memory limit - if memory_limit is not None: + if self.memory_limit is not None: soft, hard = resource.getrlimit(resource.RLIMIT_AS) - resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard)) - # Setup time limit - if time_limit is not None: - signal.signal(signal.SIGALRM, _timeout_callback) - time_limit = max(1, int(round(time_limit * 60 * 60))) - signal.alarm(time_limit) - - eval_str = (self.module_reload + - 'score = %s.main(parameters=%s, argv=[%s], verbose=%s)'%( - self.name, - repr(parameters), - ', '.join(repr(arg) for arg in self.argv[1:]), - str(self.verbose))) + resource.setrlimit(resource.RLIMIT_AS, (self.memory_limit, hard)) + exec_globals = {} - # TODO: Deal with all of the contingencies where this fails. Do not - # lose the journal file! Do append that time-stamp! - exec(eval_str, exec_globals) - - # Clean up time limit - if time_limit is not None: - signal.alarm(0) - # Clean up memory limit - if memory_limit is not None: - resource.setrlimit(resource.RLIMIT_AS, (soft, hard)) - # Restore file descriptors - sys.stdout, sys.stderr = stdout, stderr - run_time = datetime.timedelta(seconds = time.time() - start_time) - journal.write("Elapsed Time: " + str(run_time)) + try: + exec(self.exec_str, exec_globals) + except Exception as err: + exec_globals['score'] = err - return exec_globals['score'], journal.name + run_time = datetime.timedelta(seconds = time.time() - start_time) + print("Elapsed Time: " + str(run_time)) + self.input.send( exec_globals['score'] ) - def collect_results(self, experiment, async_promise): - """ Deals with the aftermath & bookkeeping of running an experiment. """ - try: - score, run_journal = async_promise.get() - except (ValueError, MemoryError, ZeroDivisionError, AssertionError, RuntimeError) as err: - print("") - print( str( experiment.parameters )) - print("%s:"%(type(err).__name__), err) - print("") - score = err - except Exception: - print("") - print( str( experiment.parameters )) - print("Unhandled Exception.") - print("") - raise - - # Update this experiment + def is_alive(self): + """ + If a process has reported a score then it is done, even if it's + technically still alive. Sometimes processes just don't die. After a + process reported back to us, it has 60 seconds to finish before we kill + it. + """ + if not Process.is_alive(self): + return False + if self.output.poll(0): + def watchdog(): + if self.is_alive(): + print("Warning: experiment returned but process still alive, terminating ...") + self.terminate() + threading.Timer( 60, watchdog ).start() + return False + return True + + def collect_journal(self, experiment): + """ Append the text output of this run to the main journal for the experiment. """ + # Append the temporary journal file to the experiments journal. + with open( self.journal ) as journal: + content = journal.read() + with open( experiment.journal, 'a') as experiment_journal: + experiment_journal.write(Laboratory.section_divider) + experiment_journal.write(content) + os.remove( self.journal ) + + def collect_score(self, experiment): + """ + Get the score returned by this run, saved as attribute 'score'. + Score may be an exception raised by the experiment. + """ + assert( self.output.poll(0) ) experiment.attempts += 1 - if not isinstance(score, Exception): - experiment.scores.append(score) - # Append the temporary journal file to the experiments journal. - # TODO !!! Don't lose the data vvv - # Sadly if the experiment crashes, the temp file is abandoned and - # the debugger (you) must search for it manually if they want to see it... - with open(run_journal) as journal: - content = journal.read() - with open(experiment.journal, 'a') as experiment_journal: - experiment_journal.write(self.section_divider) - experiment_journal.write(content) - os.remove(run_journal) - # Notify the parameter optimization method that the parameters which it - # suggested have finished evaluating. - self.method.collect_results(experiment.parameters, score) - self.save() # Write the updated Lab Report to file. - -def _Experiment_evaluate_parameters(*args, **kwds): - """ - Global wrapper for Laboratory.evaluate_parameters which is safe for - multiprocessing. - """ - experiment_argv, tag, verbose, parameters = args - self = Laboratory(experiment_argv, tag = tag, verbose = verbose) - return self.evaluate_parameters( parameters, **kwds) - -def _timeout_callback(signum, frame): - raise ValueError("Time limit exceeded.") + self.score = self.output.recv() + if not isinstance(self.score, Exception): + experiment.scores.append(self.score) + else: + for err in (ValueError, MemoryError, ZeroDivisionError, AssertionError, RuntimeError): + if isinstance( self.score, err ): + print("") + print( str( experiment.parameters )) + print("Hash: %X" % hash( experiment.parameters )) + print("%s:"%(type( self.score).__name__), self.score) + print("") + break + else: + print("") + print( str( experiment.parameters )) + print("Hash: %X" % hash( experiment.parameters )) + print("%s:"%(type(self.result).__name__), self.result) + print("Unhandled Exception.") + print("") + raise if __name__ == '__main__': diff --git a/py/src/nupic/optimization/parameter_set.py b/py/src/nupic/optimization/parameter_set.py index de2342f3ea..513f4e52ee 100644 --- a/py/src/nupic/optimization/parameter_set.py +++ b/py/src/nupic/optimization/parameter_set.py @@ -93,6 +93,8 @@ def apply(self, modification, value): Modifies this set of parameters! """ assert(isinstance(modification, str)) + if isinstance(value, str): + value = eval(value.strip()) try: access = modification.split(']')[0].strip('[]"\' ') if not access: