microsoft · chicm-ms · Jan 17, 2019 · Jan 17, 2019 · Jan 17, 2019
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -1,35 +1,41 @@
 trigger:
 - master
 - dev-remote-ci
+- dev-it
 
 jobs:
 
 - job: 'Ubuntu_16_04'
-  pool:
-    vmImage: 'Ubuntu 16.04'
-  strategy:
-    matrix:
-      Python36:
-        PYTHON_VERSION: '3.6'
+  pool: 'NNI CI GPU'
 
   steps:
   - script: python3 -m pip install --upgrade pip setuptools
     displayName: 'Install python tools'
   - script: |
       source install.sh
     displayName: 'Install nni toolkit via source code'
+  - script: |
+      python3 -m pip install sklearn --user
+      python3 -m pip install torchvision --user
+      python3 -m pip install keras --user
+      python3 -m pip install tensorflow==1.9.0 --user
+    displayName: 'Install dependencies for integration tests'
+  - script: |
+      cd test
+      PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts local --config config_test/examples/mnist.test.yml
+    displayName: 'Examples on local machine tests'
   - script: |
       cd test
       source unittest.sh
     displayName: 'Unit test'
   - script: |
       cd test
       PATH=$HOME/.local/bin:$PATH python3 naive_test.py
-    displayName: 'Integration tests'
+    displayName: 'Naive test'
   - script: |
       cd test
-      PATH=$HOME/.local/bin:$PATH python3 sdk_test.py
-    displayName: 'Built-in dispatcher tests'
+      PATH=$HOME/.local/bin:$PATH python3 tuner_test.py
+    displayName: 'Built-in tuners / assessors tests'
 
 - job: 'macOS_10_13'
   pool:
@@ -45,15 +51,24 @@ jobs:
   - script: |
       source install.sh
     displayName: 'Install nni toolkit via source code'
+  - script: |
+      python3 -m pip install sklearn --user
+      python3 -m pip install torchvision --user
+      python3 -m pip install keras --user
+    displayName: 'Install dependencies for integration tests'
   - script: |
       cd test
       PATH=$HOME/Library/Python/3.7/bin:$PATH && source unittest.sh
     displayName: 'Unit test'
   - script: |
       cd test
       PATH=$HOME/Library/Python/3.7/bin:$PATH python3 naive_test.py
-    displayName: 'Integration tests'
+    displayName: 'Naive test'
+  - script: |
+      cd test
+      PATH=$HOME/Library/Python/3.7/bin:$PATH python3 tuner_test.py
+    displayName: 'Built-in tuners / assessors tests'
   - script: |
       cd test
-      PATH=$HOME/Library/Python/3.7/bin:$PATH python3 sdk_test.py
-    displayName: 'Built-in dispatcher tests'
+      PATH=$HOME/Library/Python/3.7/bin:$PATH python3 config_test.py --ts local
+    displayName: 'Examples on local machine tests'
diff --git a/examples/trials/cifar10_pytorch/main.py b/examples/trials/cifar10_pytorch/main.py
@@ -1,6 +1,6 @@
 '''Train CIFAR10 with PyTorch.'''
 from __future__ import print_function
-
+import argparse
 import torch
 import torch.nn as nn
 import torch.optim as optim
@@ -174,6 +174,10 @@ def test(epoch):
 
 
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--epochs", type=int, default=200)
+    args, _ = parser.parse_known_args()
+
     try:
         RCV_CONFIG = nni.get_next_parameter()
         #RCV_CONFIG = {'lr': 0.1, 'optimizer': 'Adam', 'model':'senet18'}
@@ -182,7 +186,7 @@ def test(epoch):
         prepare(RCV_CONFIG)
         acc = 0.0
         best_acc = 0.0
-        for epoch in range(start_epoch, start_epoch+200):
+        for epoch in range(start_epoch, start_epoch+args.epochs):
             train(epoch)
             acc, best_acc = test(epoch)
             nni.report_intermediate_result(acc)

diff --git a/examples/trials/mnist-annotation/mnist.py b/examples/trials/mnist-annotation/mnist.py
@@ -1,5 +1,6 @@
 """A deep MNIST classifier using convolutional layers."""
 
+import argparse
 import logging
 import math
 import tempfile
@@ -180,7 +181,7 @@ def main(params):
     test_acc = 0.0
     with tf.Session() as sess:
         sess.run(tf.global_variables_initializer())
-        """@nni.variable(nni.choice(1, 4, 8, 16, 32), name=batch_size)"""
+        """@nni.variable(nni.choice(16, 32), name=batch_size)"""
         batch_size = params['batch_size']
         for i in range(params['batch_num']):
             batch = mnist.train.next_batch(batch_size)
@@ -210,29 +211,27 @@ def main(params):
         logger.debug('Final result is %g', test_acc)
         logger.debug('Send final result done.')
 
-
-def generate_default_params():
-    '''
-    Generate default parameters for mnist network.
-    '''
-    params = {
-        'data_dir': '/tmp/tensorflow/mnist/input_data',
-        'dropout_rate': 0.5,
-        'channel_1_num': 32,
-        'channel_2_num': 64,
-        'conv_size': 5,
-        'pool_size': 2,
-        'hidden_size': 1024,
-        'learning_rate': 1e-4,
-        'batch_num': 2000,
-        'batch_size': 32}
-    return params
-
+def get_params():
+    ''' Get parameters from command line '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
+    parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
+    parser.add_argument("--channel_1_num", type=int, default=32)
+    parser.add_argument("--channel_2_num", type=int, default=64)
+    parser.add_argument("--conv_size", type=int, default=5)
+    parser.add_argument("--pool_size", type=int, default=2)
+    parser.add_argument("--hidden_size", type=int, default=1024)
+    parser.add_argument("--learning_rate", type=float, default=1e-4)
+    parser.add_argument("--batch_num", type=int, default=2000)
+    parser.add_argument("--batch_size", type=int, default=32)
+
+    args, _ = parser.parse_known_args()
+    return args
 
 if __name__ == '__main__':
     '''@nni.get_next_parameter()'''
     try:
-        main(generate_default_params())
+        main(vars(get_params()))
     except Exception as exception:
         logger.exception(exception)
         raise
diff --git a/examples/trials/mnist/mnist.py b/examples/trials/mnist/mnist.py
@@ -1,5 +1,6 @@
 """A deep MNIST classifier using convolutional layers."""
 
+import argparse
 import logging
 import math
 import tempfile
@@ -148,7 +149,8 @@ def main(params):
     Main function, build mnist network, run and send result to NNI.
     '''
     # Import data
-    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+    data_dir = params['data_dir'] + str(nni.get_sequence_id())
+    mnist = input_data.read_data_sets(data_dir, one_hot=True)
     print('Mnist download data down.')
     logger.debug('Mnist download data down.')
 
@@ -198,33 +200,30 @@ def main(params):
         logger.debug('Final result is %g', test_acc)
         logger.debug('Send final result done.')
 
-
-def generate_default_params():
-    '''
-    Generate default parameters for mnist network.
-    '''
-    params = {
-        'data_dir': '/tmp/tensorflow/mnist/input_data',
-        'dropout_rate': 0.5,
-        'channel_1_num': 32,
-        'channel_2_num': 64,
-        'conv_size': 5,
-        'pool_size': 2,
-        'hidden_size': 1024,
-        'learning_rate': 1e-4,
-        'batch_num': 2000,
-        'batch_size': 32}
-    return params
-
+def get_params():
+    ''' Get parameters from command line '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
+    parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
+    parser.add_argument("--channel_1_num", type=int, default=32)
+    parser.add_argument("--channel_2_num", type=int, default=64)
+    parser.add_argument("--conv_size", type=int, default=5)
+    parser.add_argument("--pool_size", type=int, default=2)
+    parser.add_argument("--hidden_size", type=int, default=1024)
+    parser.add_argument("--learning_rate", type=float, default=1e-4)
+    parser.add_argument("--batch_num", type=int, default=2000)
+    parser.add_argument("--batch_size", type=int, default=32)
+
+    args, _ = parser.parse_known_args()
+    return args
 
 if __name__ == '__main__':
     try:
         # get parameters form tuner
-        RCV_PARAMS = nni.get_next_parameter()
-        logger.debug(RCV_PARAMS)
-        # run
-        params = generate_default_params()
-        params.update(RCV_PARAMS)
+        tuner_params = nni.get_next_parameter()
+        logger.debug(tuner_params)
+        params = vars(get_params())
+        params.update(tuner_params)
         main(params)
     except Exception as exception:
         logger.exception(exception)

diff --git a/test/config_test.py b/test/config_test.py
@@ -0,0 +1,127 @@
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import os
+import argparse
+import glob
+import subprocess
+import sys
+import time
+import traceback
+
+from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, \
+    parse_max_duration_time, get_succeeded_trial_num, print_stderr
+
+GREEN = '\33[32m'
+RED = '\33[31m'
+CLEAR = '\33[0m'
+
+STATUS_URL = 'http://localhost:8080/api/v1/nni/check-status'
+TRIAL_JOBS_URL = 'http://localhost:8080/api/v1/nni/trial-jobs'
+
+def gen_new_config(config_file, training_service='local'):
+    ''' 
+    Generates temporary config file for integration test, the file
+    should be deleted after testing.
+    '''
+    config = get_yml_content(config_file)
+    new_config_file = config_file + '.tmp'
+
+    ts = get_yml_content('training_service.yml')[training_service]
+    print(config)
+    print(ts)
+    config.update(ts)
+    print(config)
+    dump_yml_content(new_config_file, config)
+
+    return new_config_file, config
+
+def run_test(config_file, training_service, local_gpu=False):
+    '''run test per configuration file'''
+
+    new_config_file, config = gen_new_config(config_file, training_service)
+
+    if training_service == 'local' and not local_gpu and config['trial']['gpuNum'] > 0:
+        print('no gpu, skiping: ', config_file)
+        return
+
+    try:
+        print('Testing %s...' % config_file)
+        proc = subprocess.run(['nnictl', 'create', '--config', new_config_file])
+        assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
+
+        max_duration, max_trial_num = get_max_values(config_file)
+        sleep_interval = 3
+
+        for _ in range(0, max_duration+30, sleep_interval):
+            time.sleep(sleep_interval)
+            status = get_experiment_status(STATUS_URL)
+            if status == 'DONE':
+                num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL)
+                if training_service == 'local':
+                    print_stderr(TRIAL_JOBS_URL)
+                assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num)
+                break
+
+        assert status == 'DONE', 'Failed to finish in maxExecDuration'
+    finally:
+        if os.path.exists(new_config_file):
+            os.remove(new_config_file)
+
+def get_max_values(config_file):
+    '''Get maxExecDuration and maxTrialNum of experiment'''
+    experiment_config = get_yml_content(config_file)
+    return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum']
+
+def run(args):
+    '''test all configuration files'''
+    if args.config is None:
+        config_files = glob.glob('./config_test/**/*.test.yml')
+    else:
+        config_files = args.config.split(',')
+    print(config_files)
+
+    for config_file in config_files:
+        try:
+            # sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict
+            time.sleep(5)
+            run_test(config_file, args.ts, args.local_gpu)
+            print(GREEN + 'Test %s: TEST PASS' % (config_file) + CLEAR)
+        except Exception as error:
+            print(RED + 'Test %s: TEST FAIL' % (config_file) + CLEAR)
+            print('%r' % error)
+            traceback.print_exc()
+            raise error
+        finally:
+            subprocess.run(['nnictl', 'stop'])
+
+if __name__ == '__main__':
+    import tensorflow as tf
+    print('TF VERSION:', tf.__version__)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, default=None)
+    parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai'], default='local')
+    parser.add_argument("--local_gpu", action='store_true')
+    parser.add_argument("--preinstall", action='store_true')
+    args = parser.parse_args()
+
+    setup_experiment(args.preinstall)
+
+    run(args)
diff --git a/test/config_test/examples/cifar10-pytorch.test.yml b/test/config_test/examples/cifar10-pytorch.test.yml
@@ -0,0 +1,25 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 15m
+maxTrialNum: 2
+trialConcurrency: 2
+searchSpacePath: ./cifar10_search_space.json
+
+tuner:
+  builtinTunerName: Random
+  classArgs:
+    optimize_mode: maximize
+assessor:
+  builtinAssessorName: Medianstop
+  classArgs:
+    optimize_mode: maximize
+trial:
+  codeDir: ../../../examples/trials/cifar10_pytorch
+  command: python3 main.py --epochs 2
+  gpuNum: 1
+
+useAnnotation: false
+multiPhase: false
+multiThread: false
+
+trainingServicePlatform: local