From 9f7cce6366a0f9124537c06d15e3686a844cbe6e Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Fri, 14 Sep 2018 20:13:21 +0800 Subject: [PATCH 01/10] Fix stderr path (#74) --- src/nni_manager/rest_server/restHandler.ts | 2 +- src/nni_manager/yarn.lock | 42 ---------------------- 2 files changed, 1 insertion(+), 43 deletions(-) diff --git a/src/nni_manager/rest_server/restHandler.ts b/src/nni_manager/rest_server/restHandler.ts index e94c256ca1..89ede1d51f 100644 --- a/src/nni_manager/rest_server/restHandler.ts +++ b/src/nni_manager/rest_server/restHandler.ts @@ -293,7 +293,7 @@ class NNIRestHandler { if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) { return jobInfo; } - jobInfo.stderrPath = path.join(jobInfo.logPath, '.nni', 'stderr'); + jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr'); return jobInfo; } diff --git a/src/nni_manager/yarn.lock b/src/nni_manager/yarn.lock index 4f20c6e832..8611053414 100644 --- a/src/nni_manager/yarn.lock +++ b/src/nni_manager/yarn.lock @@ -349,18 +349,6 @@ boom@2.6.x: dependencies: hoek "2.x.x" -boxen@1.3.0: - version "1.3.0" - resolved "https://registry.yarnpkg.com/boxen/-/boxen-1.3.0.tgz#55c6c39a8ba58d9c61ad22cd877532deb665a20b" - dependencies: - ansi-align "^2.0.0" - camelcase "^4.0.0" - chalk "^2.0.1" - cli-boxes "^1.0.0" - string-width "^2.0.0" - term-size "^1.2.0" - widest-line "^2.0.0" - brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" @@ -600,30 +588,6 @@ etag@~1.8.1: version "1.8.1" resolved "https://registry.yarnpkg.com/etag/-/etag-1.8.1.tgz#41ae2eeb65efa62268aebfea83ac7d79299b0887" -execa@^0.7.0: - version "0.7.0" - resolved "https://registry.yarnpkg.com/execa/-/execa-0.7.0.tgz#944becd34cc41ee32a63a9faf27ad5a65fc59777" - dependencies: - cross-spawn "^5.0.1" - get-stream "^3.0.0" - is-stream "^1.1.0" - npm-run-path "^2.0.0" - p-finally "^1.0.0" - signal-exit "^3.0.0" - strip-eof "^1.0.0" - -execa@^0.8.0: - version "0.8.0" - resolved "https://registry.yarnpkg.com/execa/-/execa-0.8.0.tgz#d8d76bbc1b55217ed190fd6dd49d3c774ecfc8da" - dependencies: - cross-spawn "^5.0.1" - get-stream "^3.0.0" - is-stream "^1.1.0" - npm-run-path "^2.0.0" - p-finally "^1.0.0" - signal-exit "^3.0.0" - strip-eof "^1.0.0" - express-joi-validator@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/express-joi-validator/-/express-joi-validator-2.0.0.tgz#24e26e6a8327f69985ed72588f00e295dc3e3234" @@ -1546,12 +1510,6 @@ tough-cookie@~2.3.3: dependencies: punycode "^1.4.1" -toxic@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/toxic/-/toxic-1.0.1.tgz#8c2e2528da591100adc3883f2c0e56acfb1c7288" - dependencies: - lodash "^4.17.10" - tree-kill@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/tree-kill/-/tree-kill-1.2.0.tgz#5846786237b4239014f05db156b643212d4c6f36" From 693cf20fad2fdd6649c962c66d570a2f78d3d8ca Mon Sep 17 00:00:00 2001 From: xuehui Date: Thu, 13 Sep 2018 14:51:26 +0800 Subject: [PATCH 02/10] add batch_tuner --- src/sdk/pynni/nni/batch_tuner/__init__.py | 0 src/sdk/pynni/nni/batch_tuner/batch_tuner.py | 82 ++++++++++++++++++++ src/sdk/pynni/nni/msg_dispatcher.py | 11 ++- src/sdk/pynni/nni/tuner.py | 7 +- tools/nnicmd/launcher_utils.py | 3 +- 5 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 src/sdk/pynni/nni/batch_tuner/__init__.py create mode 100644 src/sdk/pynni/nni/batch_tuner/batch_tuner.py diff --git a/src/sdk/pynni/nni/batch_tuner/__init__.py b/src/sdk/pynni/nni/batch_tuner/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py new file mode 100644 index 0000000000..8e7a703875 --- /dev/null +++ b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py @@ -0,0 +1,82 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +''' +batch_tuner.py including: + class BatchTuner +''' + +import copy +from enum import Enum, unique +import random + +import numpy as np + +from nni.tuner import Tuner +from . import parameter_expressions + + +TYPE = '_type' +CHOICE = 'choice' +VALUE = '_value' + + +class BatchTuner(Tuner): + ''' + BatchTuner is tuner will running all the configure that user want to run batchly. + The search space only be accepted like: + { + 'combine_params': { '_type': 'choice', + '_value': '[{...}, {...}, {...}]', + } + } + ''' + + def __init__(self): + self.count = -1 + self.values = [] + + def is_valid(self, search_space) + ''' + Check the search space is valid: only contains 'choice' type + ''' + if not len(search_space) == 1: + raise RuntimeException('BatchTuner only supprt one combined-paramreters key.') + + for param in search_space: + param_type = param[TYPE] + if param_type is not CHOICE: + raise RuntimeException('BatchTuner only supprt one combined-paramreters type is choice.') + else: + if isinstance(param[VALUE], list): + return param[VALUE] + raise RuntimeException('The combined-paramreters value in BatchTuner is not a list.') + return None + + def update_search_space(self, search_space): + self.values = is_valid(search_space) + + def generate_parameters(self, parameter_id): + count +=1 + if count>len(self.value)-1: + return None + return self.values[count] + + def receive_trial_result(self, parameter_id, parameters, reward): + pass \ No newline at end of file diff --git a/src/sdk/pynni/nni/msg_dispatcher.py b/src/sdk/pynni/nni/msg_dispatcher.py index fcf07248dd..f6223a313c 100644 --- a/src/sdk/pynni/nni/msg_dispatcher.py +++ b/src/sdk/pynni/nni/msg_dispatcher.py @@ -89,9 +89,14 @@ def handle_request_trial_jobs(self, data): # data: number or trial jobs ids = [_create_parameter_id() for _ in range(data)] params_list = self.tuner.generate_multiple_parameters(ids) - assert len(ids) == len(params_list) - for i, _ in enumerate(ids): - send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i])) + #assert len(ids) == len(params_list) + + # when parameters is None. + if len(params_list) == 0: + send(CommandType.NoMoreTrialJobs, _pack_parameter(ids[0], '')) + else: + for i, _ in enumerate(ids): + send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i])) return True def handle_update_search_space(self, data): diff --git a/src/sdk/pynni/nni/tuner.py b/src/sdk/pynni/nni/tuner.py index cc3d07a436..0129f7624e 100644 --- a/src/sdk/pynni/nni/tuner.py +++ b/src/sdk/pynni/nni/tuner.py @@ -42,7 +42,12 @@ def generate_multiple_parameters(self, parameter_id_list): User code must override either this function or 'generate_parameters()'. parameter_id_list: list of int """ - return [self.generate_parameters(parameter_id) for parameter_id in parameter_id_list] + result = [] + for parameter_id in parameter_id_list: + temp = self.generate_parameters(parameter_id) + if temp: + result.append(temp) + return result def receive_trial_result(self, parameter_id, parameters, reward): """Invoked when a trial reports its final result. Must override. diff --git a/tools/nnicmd/launcher_utils.py b/tools/nnicmd/launcher_utils.py index 84e120234e..ee33ff37a8 100644 --- a/tools/nnicmd/launcher_utils.py +++ b/tools/nnicmd/launcher_utils.py @@ -90,7 +90,8 @@ def parse_tuner_content(experiment_config): tuner_class_name_dict = {'TPE': 'HyperoptTuner',\ 'Random': 'HyperoptTuner',\ 'Anneal': 'HyperoptTuner',\ - 'Evolution': 'EvolutionTuner'} + 'Evolution': 'EvolutionTuner',\ + 'BatchTuning': 'BatchTuner'} tuner_algorithm_name_dict = {'TPE': 'tpe',\ 'Random': 'random_search',\ From bfb4074d2a95f1ccfa63960d782f7600f0698167 Mon Sep 17 00:00:00 2001 From: xuehui Date: Thu, 13 Sep 2018 16:28:27 +0800 Subject: [PATCH 03/10] update __main__.py --- src/sdk/pynni/nni/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdk/pynni/nni/__main__.py b/src/sdk/pynni/nni/__main__.py index fd9b63fcb8..25d7c1050b 100644 --- a/src/sdk/pynni/nni/__main__.py +++ b/src/sdk/pynni/nni/__main__.py @@ -35,7 +35,7 @@ logger = logging.getLogger('nni.main') logger.debug('START') -BUILT_IN_CLASS_NAMES = ['HyperoptTuner', 'EvolutionTuner', 'MedianstopAssessor'] +BUILT_IN_CLASS_NAMES = ['HyperoptTuner', 'EvolutionTuner', 'BatchTuner', 'MedianstopAssessor'] def create_builtin_class_instance(classname, jsonstr_args): if jsonstr_args: From 71688b860684a55c86d4aaf10140918ffaf13b45 Mon Sep 17 00:00:00 2001 From: xuehui Date: Fri, 14 Sep 2018 15:06:09 +0800 Subject: [PATCH 04/10] fix bug --- .../trials/mnist-batch-tune-keras/config.yml | 20 +++ .../mnist-batch-tune-keras/mnist-keras.py | 133 ++++++++++++++++++ .../mnist-batch-tune-keras/search_space.json | 12 ++ src/sdk/pynni/nni/__main__.py | 1 + src/sdk/pynni/nni/batch_tuner/batch_tuner.py | 28 ++-- tools/nnicmd/launcher_utils.py | 2 +- 6 files changed, 180 insertions(+), 16 deletions(-) create mode 100644 examples/trials/mnist-batch-tune-keras/config.yml create mode 100644 examples/trials/mnist-batch-tune-keras/mnist-keras.py create mode 100644 examples/trials/mnist-batch-tune-keras/search_space.json diff --git a/examples/trials/mnist-batch-tune-keras/config.yml b/examples/trials/mnist-batch-tune-keras/config.yml new file mode 100644 index 0000000000..866fd4d0bd --- /dev/null +++ b/examples/trials/mnist-batch-tune-keras/config.yml @@ -0,0 +1,20 @@ +authorName: default +experimentName: example_mnist-keras +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote +trainingServicePlatform: local +searchSpacePath: ~/nni/examples/trials/mnist-batch-tune-keras/search_space.json +#choice: true, false +useAnnotation: false +tuner: + #choice: TPE, Random, Anneal, Evolution, BatchTuner + builtinTunerName: BatchTuner + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 mnist-keras.py + codeDir: ~/nni/examples/trials/mnist-batch-tune-keras + gpuNum: 0 \ No newline at end of file diff --git a/examples/trials/mnist-batch-tune-keras/mnist-keras.py b/examples/trials/mnist-batch-tune-keras/mnist-keras.py new file mode 100644 index 0000000000..87c2114991 --- /dev/null +++ b/examples/trials/mnist-batch-tune-keras/mnist-keras.py @@ -0,0 +1,133 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import argparse +import logging + +import os +import keras +import numpy as np +from keras import backend as K +from keras.callbacks import TensorBoard +from keras.datasets import mnist +from keras.layers import Conv2D, Dense, Flatten, MaxPooling2D +from keras.models import Sequential + +import nni + +LOG = logging.getLogger('mnist_keras') +K.set_image_data_format('channels_last') +TENSORBOARD_DIR = os.environ['NNI_OUTPUT_DIR'] + +H, W = 28, 28 +NUM_CLASSES = 10 + +def create_mnist_model(hyper_params, input_shape=(H, W, 1), num_classes=NUM_CLASSES): + ''' + Create simple convolutional model + ''' + layers = [ + Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape), + Conv2D(64, (3, 3), activation='relu'), + MaxPooling2D(pool_size=(2, 2)), + Flatten(), + Dense(100, activation='relu'), + Dense(num_classes, activation='softmax') + ] + + model = Sequential(layers) + + if hyper_params['optimizer'] == 'Adam': + optimizer = keras.optimizers.Adam(lr=hyper_params['learning_rate']) + else: + optimizer = keras.optimizers.SGD(lr=hyper_params['learning_rate'], momentum=0.9) + model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy']) + + return model + +def load_mnist_data(args): + ''' + Load MNIST dataset + ''' + (x_train, y_train), (x_test, y_test) = mnist.load_data() + + x_train = (np.expand_dims(x_train, -1).astype(np.float) / 255.)[:args.num_train] + x_test = (np.expand_dims(x_test, -1).astype(np.float) / 255.)[:args.num_test] + y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)[:args.num_train] + y_test = keras.utils.to_categorical(y_test, NUM_CLASSES)[:args.num_test] + + LOG.debug('x_train shape: %s', (x_train.shape,)) + LOG.debug('x_test shape: %s', (x_test.shape,)) + + return x_train, y_train, x_test, y_test + +class SendMetrics(keras.callbacks.Callback): + ''' + Keras callback to send metrics to NNI framework + ''' + def on_epoch_end(self, epoch, logs={}): + ''' + Run on end of each epoch + ''' + LOG.debug(logs) + nni.report_intermediate_result(logs['acc']) + +def train(args, params): + ''' + Train model + ''' + x_train, y_train, x_test, y_test = load_mnist_data(args) + model = create_mnist_model(params) + + # nni + model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, verbose=1, + validation_data=(x_test, y_test), callbacks=[SendMetrics(), TensorBoard(log_dir=TENSORBOARD_DIR)]) + + _, acc = model.evaluate(x_test, y_test, verbose=0) + LOG.debug('Final result is: %d', acc) + nni.report_final_result(acc) + +def generate_default_params(): + ''' + Generate default hyper parameters + ''' + return { + 'optimizer': 'Adam', + 'learning_rate': 0.001 + } + +if __name__ == '__main__': + PARSER = argparse.ArgumentParser() + PARSER.add_argument("--batch_size", type=int, default=200, help="batch size", required=False) + PARSER.add_argument("--epochs", type=int, default=10, help="Train epochs", required=False) + PARSER.add_argument("--num_train", type=int, default=60000, help="Number of train samples to be used, maximum 60000", required=False) + PARSER.add_argument("--num_test", type=int, default=10000, help="Number of test samples to be used, maximum 10000", required=False) + + ARGS, UNKNOWN = PARSER.parse_known_args() + + try: + # get parameters from tuner + # RECEIVED_PARAMS = {"optimizer": "Adam", "learning_rate": 0.00001} + RECEIVED_PARAMS = nni.get_parameters() + LOG.debug(RECEIVED_PARAMS) + PARAMS = generate_default_params() + PARAMS.update(RECEIVED_PARAMS) + # train + train(ARGS, PARAMS) + except Exception as e: + LOG.exception(e) + raise diff --git a/examples/trials/mnist-batch-tune-keras/search_space.json b/examples/trials/mnist-batch-tune-keras/search_space.json new file mode 100644 index 0000000000..a0be3609da --- /dev/null +++ b/examples/trials/mnist-batch-tune-keras/search_space.json @@ -0,0 +1,12 @@ +{ + "combine_params": + { + "_type" : "choice", + "_value" : [{"optimizer": "Adam", "learning_rate": 0.00001}, + {"optimizer": "Adam", "learning_rate": 0.0001}, + {"optimizer": "Adam", "learning_rate": 0.001}, + {"optimizer": "SGD", "learning_rate": 0.01}, + {"optimizer": "SGD", "learning_rate": 0.005}, + {"optimizer": "SGD", "learning_rate": 0.0002}] + } +} \ No newline at end of file diff --git a/src/sdk/pynni/nni/__main__.py b/src/sdk/pynni/nni/__main__.py index 25d7c1050b..e3a39bac96 100644 --- a/src/sdk/pynni/nni/__main__.py +++ b/src/sdk/pynni/nni/__main__.py @@ -30,6 +30,7 @@ from nni.msg_dispatcher import MsgDispatcher from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner from nni.evolution_tuner.evolution_tuner import EvolutionTuner +from nni.batch_tuner.batch_tuner import BatchTuner from nni.medianstop_assessor.medianstop_assessor import MedianstopAssessor logger = logging.getLogger('nni.main') diff --git a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py index 8e7a703875..05457d52b4 100644 --- a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py +++ b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py @@ -29,8 +29,6 @@ class BatchTuner import numpy as np from nni.tuner import Tuner -from . import parameter_expressions - TYPE = '_type' CHOICE = 'choice' @@ -48,35 +46,35 @@ class BatchTuner(Tuner): } ''' - def __init__(self): + def __init__(self, optimize_mode): self.count = -1 self.values = [] - def is_valid(self, search_space) + def is_valid(self, search_space): ''' Check the search space is valid: only contains 'choice' type ''' if not len(search_space) == 1: - raise RuntimeException('BatchTuner only supprt one combined-paramreters key.') + raise RuntimeError('BatchTuner only supprt one combined-paramreters key.') for param in search_space: - param_type = param[TYPE] - if param_type is not CHOICE: - raise RuntimeException('BatchTuner only supprt one combined-paramreters type is choice.') + param_type = search_space[param][TYPE] + if not param_type == CHOICE: + raise RuntimeError('BatchTuner only supprt one combined-paramreters type is choice.') else: - if isinstance(param[VALUE], list): - return param[VALUE] - raise RuntimeException('The combined-paramreters value in BatchTuner is not a list.') + if isinstance(search_space[param][VALUE], list): + return search_space[param][VALUE] + raise RuntimeError('The combined-paramreters value in BatchTuner is not a list.') return None def update_search_space(self, search_space): - self.values = is_valid(search_space) + self.values = self.is_valid(search_space) def generate_parameters(self, parameter_id): - count +=1 - if count>len(self.value)-1: + self.count +=1 + if self.count>len(self.values)-1: return None - return self.values[count] + return self.values[self.count] def receive_trial_result(self, parameter_id, parameters, reward): pass \ No newline at end of file diff --git a/tools/nnicmd/launcher_utils.py b/tools/nnicmd/launcher_utils.py index ee33ff37a8..dc97910244 100644 --- a/tools/nnicmd/launcher_utils.py +++ b/tools/nnicmd/launcher_utils.py @@ -91,7 +91,7 @@ def parse_tuner_content(experiment_config): 'Random': 'HyperoptTuner',\ 'Anneal': 'HyperoptTuner',\ 'Evolution': 'EvolutionTuner',\ - 'BatchTuning': 'BatchTuner'} + 'BatchTuner': 'BatchTuner'} tuner_algorithm_name_dict = {'TPE': 'tpe',\ 'Random': 'random_search',\ From be2fbe271f4a9ed81413f723af673d7d758a2e1b Mon Sep 17 00:00:00 2001 From: xuehui Date: Fri, 14 Sep 2018 19:35:39 +0800 Subject: [PATCH 05/10] fix bugs --- examples/trials/mnist-batch-tune-keras/config.yml | 4 ++-- src/sdk/pynni/nni/batch_tuner/batch_tuner.py | 3 ++- src/sdk/pynni/nni/msg_dispatcher.py | 1 - src/sdk/pynni/nni/tuner.py | 9 ++++++--- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/examples/trials/mnist-batch-tune-keras/config.yml b/examples/trials/mnist-batch-tune-keras/config.yml index 866fd4d0bd..4b750a2394 100644 --- a/examples/trials/mnist-batch-tune-keras/config.yml +++ b/examples/trials/mnist-batch-tune-keras/config.yml @@ -2,7 +2,7 @@ authorName: default experimentName: example_mnist-keras trialConcurrency: 1 maxExecDuration: 1h -maxTrialNum: 10 +maxTrialNum: 6 #choice: local, remote trainingServicePlatform: local searchSpacePath: ~/nni/examples/trials/mnist-batch-tune-keras/search_space.json @@ -17,4 +17,4 @@ tuner: trial: command: python3 mnist-keras.py codeDir: ~/nni/examples/trials/mnist-batch-tune-keras - gpuNum: 0 \ No newline at end of file + gpuNum: 0 diff --git a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py index 05457d52b4..e9967ea8ea 100644 --- a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py +++ b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py @@ -28,6 +28,7 @@ class BatchTuner import numpy as np +import nni from nni.tuner import Tuner TYPE = '_type' @@ -73,7 +74,7 @@ def update_search_space(self, search_space): def generate_parameters(self, parameter_id): self.count +=1 if self.count>len(self.values)-1: - return None + raise nni.NoMoreTrialError('no more parameters now.') return self.values[self.count] def receive_trial_result(self, parameter_id, parameters, reward): diff --git a/src/sdk/pynni/nni/msg_dispatcher.py b/src/sdk/pynni/nni/msg_dispatcher.py index f6223a313c..e379ac5d8b 100644 --- a/src/sdk/pynni/nni/msg_dispatcher.py +++ b/src/sdk/pynni/nni/msg_dispatcher.py @@ -89,7 +89,6 @@ def handle_request_trial_jobs(self, data): # data: number or trial jobs ids = [_create_parameter_id() for _ in range(data)] params_list = self.tuner.generate_multiple_parameters(ids) - #assert len(ids) == len(params_list) # when parameters is None. if len(params_list) == 0: diff --git a/src/sdk/pynni/nni/tuner.py b/src/sdk/pynni/nni/tuner.py index 0129f7624e..5437f8ed7c 100644 --- a/src/sdk/pynni/nni/tuner.py +++ b/src/sdk/pynni/nni/tuner.py @@ -21,6 +21,7 @@ import logging +import nni from .recoverable import Recoverable _logger = logging.getLogger(__name__) @@ -44,9 +45,11 @@ def generate_multiple_parameters(self, parameter_id_list): """ result = [] for parameter_id in parameter_id_list: - temp = self.generate_parameters(parameter_id) - if temp: - result.append(temp) + try: + res = self.generate_parameters(parameter_id) + except nni.NoMoreTrialError: + return result + result.append(res) return result def receive_trial_result(self, parameter_id, parameters, reward): From c6683d102aaebc2674e15278ef23a84049f66282 Mon Sep 17 00:00:00 2001 From: xuehui Date: Fri, 14 Sep 2018 21:00:25 +0800 Subject: [PATCH 06/10] change to raise NoMoreTrialError --- src/sdk/pynni/nni/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/sdk/pynni/nni/__init__.py b/src/sdk/pynni/nni/__init__.py index 7fe4aa2789..0358b45bbf 100644 --- a/src/sdk/pynni/nni/__init__.py +++ b/src/sdk/pynni/nni/__init__.py @@ -23,3 +23,11 @@ from .trial import * from .smartparam import * + +class NoMoreTrialError(Exception): + def __init__(self,ErrorInfo): + super().__init__(self) + self.errorinfo=ErrorInfo + + def __str__(self): + return self.errorinfo \ No newline at end of file From 5e01504d742ef7e0840043d2af50c2eda80335e5 Mon Sep 17 00:00:00 2001 From: Sinan Tan Date: Fri, 14 Sep 2018 15:11:25 +0800 Subject: [PATCH 07/10] Bugfix for SQuAD QA model. --- examples/trials/ga_squad/train_model.py | 2 +- examples/trials/ga_squad/trial.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/trials/ga_squad/train_model.py b/examples/trials/ga_squad/train_model.py index 9eaf8544fe..47a5374c6d 100644 --- a/examples/trials/ga_squad/train_model.py +++ b/examples/trials/ga_squad/train_model.py @@ -37,7 +37,7 @@ def __init__(self): self.dropout = 0.1 - self.char_vcb_size = 1371 + self.char_vcb_size = 1500 self.max_char_length = 20 self.char_embed_dim = 100 diff --git a/examples/trials/ga_squad/trial.py b/examples/trials/ga_squad/trial.py index adc3622a9a..839b1638f4 100644 --- a/examples/trials/ga_squad/trial.py +++ b/examples/trials/ga_squad/trial.py @@ -58,7 +58,7 @@ def get_config(): default='./glove.840B.300d.txt', help='dev file') parser.add_argument('--root_path', default='./data/', type=str, help='Root path of models') - parser.add_argument('--batch_size', type=int, default=2, help='batch size') + parser.add_argument('--batch_size', type=int, default=64, help='batch size') parser.add_argument('--save_path', type=str, default='./save', help='save path dir') parser.add_argument('--learning_rate', type=float, default=0.0001, @@ -241,8 +241,6 @@ def run_epoch(batches, answer_net, is_training): if count % 100 == 0: logger.debug('%d %g except:%g' % (count, used, used / count * len(batches))) - if count % 100 == 0: - break loss = loss_sum / len(batches) if is_training: return loss From 241e3254f557db07bd0baa0c8dc491b4598d29c9 Mon Sep 17 00:00:00 2001 From: Sinan Tan Date: Fri, 14 Sep 2018 16:50:43 +0800 Subject: [PATCH 08/10] Fix some pylint warnings for SQuAD QA model. --- examples/trials/ga_squad/data.py | 64 +++++++++++++------------ examples/trials/ga_squad/evaluate.py | 27 ++++++----- examples/trials/ga_squad/graph.py | 20 ++++---- examples/trials/ga_squad/train_model.py | 5 ++ 4 files changed, 63 insertions(+), 53 deletions(-) diff --git a/examples/trials/ga_squad/data.py b/examples/trials/ga_squad/data.py index 7af35af106..638ae1e84f 100644 --- a/examples/trials/ga_squad/data.py +++ b/examples/trials/ga_squad/data.py @@ -19,6 +19,10 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +''' +Data processing script for the QA model. +''' + import csv import json from random import shuffle @@ -73,19 +77,19 @@ def load_from_file(path, fmt=None, is_training=True): for doc in data: for paragraph in doc['paragraphs']: passage = paragraph['context'] - for qa in paragraph['qas']: - question = qa['question'] - id = qa['id'] + for qa_pair in paragraph['qas']: + question = qa_pair['question'] + qa_id = qa_pair['id'] if not is_training: qp_pairs.append( - {'passage': passage, 'question': question, 'id': id}) + {'passage': passage, 'question': question, 'id': qa_id}) else: - for answer in qa['answers']: + for answer in qa_pair['answers']: answer_begin = int(answer['answer_start']) answer_end = answer_begin + len(answer['text']) qp_pairs.append({'passage': passage, 'question': question, - 'id': id, + 'id': qa_id, 'answer_begin': answer_begin, 'answer_end': answer_end}) else: @@ -121,21 +125,21 @@ def collect_vocab(qp_pairs): Build the vocab from corpus. ''' vocab = set() - for qp in qp_pairs: - for word in qp['question_tokens']: + for qp_pair in qp_pairs: + for word in qp_pair['question_tokens']: vocab.add(word['word']) - for word in qp['passage_tokens']: + for word in qp_pair['passage_tokens']: vocab.add(word['word']) return vocab -def shuffle_step(l, step): +def shuffle_step(entries, step): ''' Shuffle the step ''' answer = [] - for i in range(0, len(l), step): - sub = l[i:i+step] + for i in range(0, len(entries), step): + sub = entries[i:i+step] shuffle(sub) answer += sub return answer @@ -163,13 +167,13 @@ def get_char_input(data, char_dict, max_char_length): char_id = np.zeros((max_char_length, sequence_length, batch_size), dtype=np.int32) char_lengths = np.zeros((sequence_length, batch_size), dtype=np.float32) - for b in range(0, min(len(data), batch_size)): - d = data[b] - for s in range(0, min(len(d), sequence_length)): - word = d[s]['word'] - char_lengths[s, b] = min(len(word), max_char_length) + for batch_idx in range(0, min(len(data), batch_size)): + batch_data = data[batch_idx] + for sample_idx in range(0, min(len(batch_data), sequence_length)): + word = batch_data[sample_idx]['word'] + char_lengths[sample_idx, batch_idx] = min(len(word), max_char_length) for i in range(0, min(len(word), max_char_length)): - char_id[i, s, b] = get_id(char_dict, word[i]) + char_id[i, sample_idx, batch_idx] = get_id(char_dict, word[i]) return char_id, char_lengths @@ -180,26 +184,26 @@ def get_word_input(data, word_dict, embed, embed_dim): batch_size = len(data) max_sequence_length = max(len(d) for d in data) sequence_length = max_sequence_length - t = np.zeros((max_sequence_length, batch_size, - embed_dim), dtype=np.float32) + word_input = np.zeros((max_sequence_length, batch_size, + embed_dim), dtype=np.float32) ids = np.zeros((sequence_length, batch_size), dtype=np.int32) masks = np.zeros((sequence_length, batch_size), dtype=np.float32) lengths = np.zeros([batch_size], dtype=np.int32) - for b in range(0, min(len(data), batch_size)): - d = data[b] + for batch_idx in range(0, min(len(data), batch_size)): + batch_data = data[batch_idx] - lengths[b] = len(d) + lengths[batch_idx] = len(batch_data) - for s in range(0, min(len(d), sequence_length)): - word = d[s]['word'].lower() + for sample_idx in range(0, min(len(batch_data), sequence_length)): + word = batch_data[sample_idx]['word'].lower() if word in word_dict.keys(): - t[s, b] = embed[word_dict[word]] - ids[s, b] = word_dict[word] - masks[s, b] = 1 + word_input[sample_idx, batch_idx] = embed[word_dict[word]] + ids[sample_idx, batch_idx] = word_dict[word] + masks[sample_idx, batch_idx] = 1 - t = np.reshape(t, (-1, embed_dim)) - return t, ids, masks, lengths + word_input = np.reshape(word_input, (-1, embed_dim)) + return word_input, ids, masks, lengths def get_word_index(tokens, char_index): diff --git a/examples/trials/ga_squad/evaluate.py b/examples/trials/ga_squad/evaluate.py index 8cc734fadb..27ffd93da9 100644 --- a/examples/trials/ga_squad/evaluate.py +++ b/examples/trials/ga_squad/evaluate.py @@ -19,6 +19,10 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +''' +Evaluation scripts for QA model. +''' + from __future__ import print_function from collections import Counter import string @@ -68,8 +72,8 @@ def f1_score(prediction, ground_truth): return 0 precision = 1.0 * num_same / len(prediction_tokens) recall = 1.0 * num_same / len(ground_truth_tokens) - f1 = (2 * precision * recall) / (precision + recall) - return f1 + f1_result = (2 * precision * recall) / (precision + recall) + return f1_result def exact_match_score(prediction, ground_truth): ''' @@ -91,28 +95,25 @@ def _evaluate(dataset, predictions): ''' Evaluate function. ''' - f1 = exact_match = total = 0 + f1_result = exact_match = total = 0 count = 0 for article in dataset: for paragraph in article['paragraphs']: - for qa in paragraph['qas']: + for qa_pair in paragraph['qas']: total += 1 - if qa['id'] not in predictions: - message = 'Unanswered question ' + qa['id'] + \ - ' will receive score 0.' - #print(message, file=sys.stderr) + if qa_pair['id'] not in predictions: count += 1 continue - ground_truths = list(map(lambda x: x['text'], qa['answers'])) - prediction = predictions[qa['id']] + ground_truths = list(map(lambda x: x['text'], qa_pair['answers'])) + prediction = predictions[qa_pair['id']] exact_match += metric_max_over_ground_truths( exact_match_score, prediction, ground_truths) - f1 += metric_max_over_ground_truths( + f1_result += metric_max_over_ground_truths( f1_score, prediction, ground_truths) print('total', total, 'exact_match', exact_match, 'unanswer_question ', count) exact_match = 100.0 * exact_match / total - f1 = 100.0 * f1 / total - return {'exact_match': exact_match, 'f1': f1} + f1_result = 100.0 * f1_result / total + return {'exact_match': exact_match, 'f1': f1_result} def evaluate(data_file, pred_file): ''' diff --git a/examples/trials/ga_squad/graph.py b/examples/trials/ga_squad/graph.py index 15a5d350cd..c8da15fe9d 100644 --- a/examples/trials/ga_squad/graph.py +++ b/examples/trials/ga_squad/graph.py @@ -43,8 +43,8 @@ class Layer(object): ''' Layer class, which contains the information of graph. ''' - def __init__(self, graph_type, input=None, output=None, size=None): - self.input = input if input is not None else [] + def __init__(self, graph_type, inputs=None, output=None, size=None): + self.input = inputs if inputs is not None else [] self.output = output if output is not None else [] self.graph_type = graph_type self.is_delete = False @@ -117,11 +117,11 @@ class Graph(object): ''' Customed Graph class. ''' - def __init__(self, max_layer_num, input, output, hide): + def __init__(self, max_layer_num, inputs, output, hide): self.layers = [] self.max_layer_num = max_layer_num - for layer in input: + for layer in inputs: self.layers.append(layer) for layer in output: self.layers.append(layer) @@ -240,7 +240,7 @@ def mutation(self, only_add=False): if graph_type <= 1: new_id = len(layers) out = random.choice(layers_out) - input = [] + inputs = [] output = [out] pos = random.randint(0, len(layers[out].input) - 1) last_in = layers[out].input[pos] @@ -250,13 +250,13 @@ def mutation(self, only_add=False): if graph_type == 1: layers[last_in].output.remove(out) layers[last_in].output.append(new_id) - input = [last_in] - lay = Layer(graph_type=layer_type, input=input, output=output) - while len(input) < lay.input_size: + inputs = [last_in] + lay = Layer(graph_type=layer_type, inputs=inputs, output=output) + while len(inputs) < lay.input_size: layer1 = random.choice(layers_in) - input.append(layer1) + inputs.append(layer1) layers[layer1].output.append(new_id) - lay.input = input + lay.input = inputs layers.append(lay) else: layer1 = random.choice(layers_del) diff --git a/examples/trials/ga_squad/train_model.py b/examples/trials/ga_squad/train_model.py index 47a5374c6d..36ea2d5ccd 100644 --- a/examples/trials/ga_squad/train_model.py +++ b/examples/trials/ga_squad/train_model.py @@ -32,6 +32,7 @@ class GAGConfig: + """The class for model hyper-parameter configuration.""" def __init__(self): self.batch_size = 128 @@ -56,6 +57,7 @@ def __init__(self): class GAG: + """The class for the computation graph based QA model.""" def __init__(self, cfg, embed, graph): self.cfg = cfg self.embed = embed @@ -83,6 +85,7 @@ def __init__(self, cfg, embed, graph): def build_net(self, is_training): + """Build the whole neural network for the QA model.""" cfg = self.cfg with tf.device('/cpu:0'): word_embed = tf.get_variable( @@ -202,6 +205,7 @@ def build_net(self, is_training): if is_training: def label_smoothing(inputs, masks, epsilon=0.1): + """Modify target for label smoothing.""" epsilon = cfg.labelsmoothing num_of_channel = tf.shape(inputs)[-1] # number of channels inputs = tf.cast(inputs, tf.float32) @@ -229,6 +233,7 @@ def label_smoothing(inputs, masks, epsilon=0.1): return tf.stack([self.begin_prob, self.end_prob]) def build_char_states(self, char_embed, is_training, reuse, char_ids, char_lengths): + """Build char embedding network for the QA model.""" max_char_length = self.cfg.max_char_length inputs = dropout(tf.nn.embedding_lookup(char_embed, char_ids), From fb08ee59ba3d1f2d94be6e95d2ed22d4a2d63e30 Mon Sep 17 00:00:00 2001 From: Sinan Tan Date: Fri, 14 Sep 2018 17:33:44 +0800 Subject: [PATCH 09/10] Update readme.md and download script for SQuAD QA model. --- examples/trials/ga_squad/download.sh | 6 ++++++ examples/trials/ga_squad/readme.md | 29 +++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 examples/trials/ga_squad/download.sh diff --git a/examples/trials/ga_squad/download.sh b/examples/trials/ga_squad/download.sh new file mode 100644 index 0000000000..308fbaedbf --- /dev/null +++ b/examples/trials/ga_squad/download.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json +wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json +wget http://nlp.stanford.edu/data/glove.840B.300d.zip +unzip glove.840B.300d.zip \ No newline at end of file diff --git a/examples/trials/ga_squad/readme.md b/examples/trials/ga_squad/readme.md index d281496f7b..99eaf12fd5 100644 --- a/examples/trials/ga_squad/readme.md +++ b/examples/trials/ga_squad/readme.md @@ -1,9 +1,32 @@ -## How to download data +# Download data + +## Use downloading script + +Execute the following command to download needed files +using the downloading script: + +``` +chmod +x ./download.sh +./download.sh +``` + +## Download manually 1. download "dev-v1.1.json" and "train-v1.1.json" in https://rajpurkar.github.io/SQuAD-explorer/ -2. download "glove.840B.300d.txt" in "https://nlp.stanford.edu/projects/glove/" -## How to submit this job +``` +wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json +wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json +``` + +2. download "glove.840B.300d.txt" in https://nlp.stanford.edu/projects/glove/ + +``` +wget http://nlp.stanford.edu/data/glove.840B.300d.zip +unzip glove.840B.300d.zip +``` + +# How to submit this job 1. run "$NNI_ROOT_DIR/auto_run.py" as "$NNI_ROOT_DIR/README-AUTO.md" said. 2. use the dockerImage openpai.azurecr.io/nni_v0.0.1, which means it use a tensorflow cpu-version. From 45650c41aefbedf96c0a1495eefbc28c34f896ea Mon Sep 17 00:00:00 2001 From: Sinan Tan Date: Fri, 14 Sep 2018 18:06:56 +0800 Subject: [PATCH 10/10] Fix a bug when using official GLoVE embedding. --- examples/trials/ga_squad/trial.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/trials/ga_squad/trial.py b/examples/trials/ga_squad/trial.py index 839b1638f4..7373711e36 100644 --- a/examples/trials/ga_squad/trial.py +++ b/examples/trials/ga_squad/trial.py @@ -88,11 +88,13 @@ def load_embedding(path): ''' return embedding for a specif file by given file path. ''' + EMBEDDING_DIM = 300 embedding_dict = {} with open(path, 'r', encoding='utf-8') as file: pairs = [line.strip('\r\n').split() for line in file.readlines()] for pair in pairs: - embedding_dict[pair[0]] = [float(x) for x in pair[1:]] + if len(pair) == EMBEDDING_DIM + 1: + embedding_dict[pair[0]] = [float(x) for x in pair[1:]] logger.debug('embedding_dict size: %d', len(embedding_dict)) return embedding_dict