Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
Update ci (#175)
Browse files Browse the repository at this point in the history
* Update RemoteMachineMode.md (#63)

* Remove unused classes for SQuAD QA example.

* Remove more unused functions for SQuAD QA example.

* Fix default dataset config.

* Add Makefile README (#64)

* update document (#92)

* Edit readme.md

* updated a word

* Update GetStarted.md

* Update GetStarted.md

* refact readme, getstarted and write your trial md.

* Update README.md

* Update WriteYourTrial.md

* Update WriteYourTrial.md

* Update WriteYourTrial.md

* Update WriteYourTrial.md

* Fix nnictl bugs and add new feature (#75)

* fix nnictl bug

* fix nnictl create bug

* add experiment status logic

* add more information for nnictl

* fix Evolution Tuner bug

* refactor code

* fix code in updater.py

* fix nnictl --help

* fix classArgs bug

* update check response.status_code logic

* remove Buffer warning (#100)

* update readme in ga_squad

* update readme

* fix typo

* Update README.md

* Update README.md

* Update README.md

* Add support for debugging mode

* modify CI cuz of refracting exp stop

* update CI for expstop

* update CI for expstop

* update CI for expstop

* update CI for expstop

* update CI for expstop

* update CI for expstop

* update CI for expstop

* update CI for expstop

* update CI for expstop
  • Loading branch information
Crysple authored and QuanluZhang committed Oct 16, 2018
1 parent 1392c93 commit 0dab726
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 62 deletions.
2 changes: 1 addition & 1 deletion examples/trials/ga_squad/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,4 +251,4 @@ Every model configuration will has a "layers" section, which is a JSON list of l
* `input_size` is the number of inputs the layer has.
* `input` is the indices of layers taken as input of this layer.
* `output` is the indices of layers use this layer's output as their input.
* `is_delete` means whether the layer is still available.
* `is_delete` means whether the layer is still available.
1 change: 1 addition & 0 deletions src/nni_manager/core/nnimanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ class NNIManager implements Manager {
suspendStartTime = Date.now();
}
this.status.status = 'SUSPENDED';
this.log.info('Experiment suspended.');
} else {
if (this.status.status === 'SUSPENDED') {
assert(suspendStartTime !== 0);
Expand Down
5 changes: 5 additions & 0 deletions test/naive/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__pycache__

tuner_search_space.json
tuner_result.txt
assessor_result.txt
19 changes: 19 additions & 0 deletions test/naive/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## Usage
To test before installing:
./run.py --preinstall
To test the integrity of installation:
./run.py
It will print `PASS` in green eventually if everything works well.

## Details
* This test case tests the communication between trials and tuner/assessor.
* The naive trials receive an integer `x` as parameter, and reports `x`, ``, ``, ... , `x¹⁰` as metrics.
* The naive tuner simply generates the sequence of natural numbers, and print received metrics to `tuner_result.txt`.
* The naive assessor kills trials when `sum(metrics) % 11 == 1`, and print killed trials to `assessor_result.txt`.
* When tuner and assessor exit with exception, they will append `ERROR` to corresponding result file.
* When the experiment is suspended, meaning it is successfully done in this case, `Experiment suspended` can be detected in the nni_manager.log file.

## Issues
* Private APIs are used to detect whether tuner and assessor have terminated successfully.
* The output of REST server is not tested.
* Remote machine training service is not tested.
1 change: 0 additions & 1 deletion test/naive/expected_assessor_result.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,3 @@
5 3
7 2
8 3
DONE
1 change: 0 additions & 1 deletion test/naive/expected_tuner_result.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
6 60466176
9 3486784401
10 10000000000
DONE
6 changes: 4 additions & 2 deletions test/naive/naive_assessor.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import logging
import os

from nni.assessor import Assessor, AssessResult

_logger = logging.getLogger('NaiveAssessor')
_logger.info('start')
_result = open('/tmp/nni_assessor_result.txt', 'w')

_pwd = os.path.dirname(__file__)
_result = open(os.path.join(_pwd, 'assessor_result.txt'), 'w')

class NaiveAssessor(Assessor):
def __init__(self, optimize_mode):
Expand All @@ -30,7 +33,6 @@ def assess_trial(self, trial_job_id, trial_history):
return AssessResult.Good

def _on_exit(self):
_result.write('DONE\n')
_result.close()

def _on_error(self):
Expand Down
8 changes: 5 additions & 3 deletions test/naive/naive_tuner.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import json
import logging
import os

from nni.tuner import Tuner

_logger = logging.getLogger('NaiveTuner')
_logger.info('start')
_result = open('/tmp/nni_tuner_result.txt', 'w')

_pwd = os.path.dirname(__file__)
_result = open(os.path.join(_pwd, 'tuner_result.txt'), 'w')

class NaiveTuner(Tuner):
def __init__(self, optimize_mode):
Expand All @@ -24,11 +27,10 @@ def receive_trial_result(self, parameter_id, parameters, reward):

def update_search_space(self, search_space):
_logger.info('update_search_space: %s' % search_space)
with open('/tmp/nni_tuner_search_space.json', 'w') as file_:
with open(os.path.join(_pwd, 'tuner_search_space.json'), 'w') as file_:
json.dump(search_space, file_)

def _on_exit(self):
_result.write('DONE\n')
_result.close()

def _on_error(self):
Expand Down
147 changes: 93 additions & 54 deletions test/naive/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,82 +4,121 @@
import json
import os
import subprocess
import requests
import sys
import time
import traceback

GREEN = '\33[32m'
RED = '\33[31m'
CLEAR = '\33[0m'

def read_last_line(file_name):
try:
*_, last_line = open(file_name)
return last_line.strip()
except (FileNotFoundError, ValueError):
return None

def run():
os.environ['PATH'] = os.environ['PATH'] + ':' + os.environ['PWD']

with contextlib.suppress(FileNotFoundError):
os.remove('tuner_search_space.txt')
with contextlib.suppress(FileNotFoundError):
os.remove('tuner_result.txt')
with contextlib.suppress(FileNotFoundError):
os.remove('/tmp/nni_assessor_result.txt')

proc = subprocess.run(['nnictl', 'create', '--config', 'local.yml'])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode

print('Spawning trials...')
current_trial = 0

for _ in range(60):
class Integration_test():
def __init__(self):
self.experiment_url = 'http://localhost:51188/api/v1/nni/experiment'
self.experiment_id = None
self.experiment_suspended_signal = '"Experiment suspended"'

def read_last_line(self, file_name):
try:
*_, last_line = open(file_name)
return last_line.strip()
except (FileNotFoundError, ValueError):
return None

def fetch_experiment_config(self):
experiment_profile = requests.get(self.experiment_url)
self.experiment_id = json.loads(experiment_profile.text)['id']
self.experiment_path = os.path.join(os.environ['HOME'], 'nni/experiments', self.experiment_id)
self.nnimanager_log_path = os.path.join(self.experiment_path, 'log', 'nnimanager.log')

def check_experiment_status(self):
assert os.path.exists(self.nnimanager_log_path), 'Experiment starts failed'
cmds = ['cat', self.nnimanager_log_path, '|', 'grep', self.experiment_suspended_signal]
completed_process = subprocess.run(' '.join(cmds), shell = True)

return completed_process.returncode == 0

def remove_files(self, file_list):
for file_path in file_list:
with contextlib.suppress(FileNotFoundError):
os.remove(file_path)

def run(self, installed = True):
if not installed:
os.environ['PATH'] = os.environ['PATH'] + ':' + os.environ['PWD']
sdk_path = os.path.abspath('../../src/sdk/pynni')
cmd_path = os.path.abspath('../../tools')
pypath = os.environ.get('PYTHONPATH')
if pypath:
pypath = ':'.join([pypath, sdk_path, cmd_path])
else:
pypath = ':'.join([sdk_path, cmd_path])
os.environ['PYTHONPATH'] = pypath

to_remove = ['tuner_search_space.json', 'tuner_result.txt', 'assessor_result.txt']
self.remove_files(to_remove)

proc = subprocess.run(['nnictl', 'create', '--config', 'local.yml'])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode

print('Spawning trials...')
time.sleep(1)
self.fetch_experiment_config()
current_trial = 0

for _ in range(60):
time.sleep(1)

tuner_status = read_last_line('/tmp/nni_tuner_result.txt')
assessor_status = read_last_line('/tmp/nni_assessor_result.txt')
tuner_status = self.read_last_line('tuner_result.txt')
assessor_status = self.read_last_line('assessor_result.txt')
experiment_status = self.check_experiment_status()

assert tuner_status != 'ERROR', 'Tuner exited with error'
assert assessor_status != 'ERROR', 'Assessor exited with error'
assert tuner_status != 'ERROR', 'Tuner exited with error'
assert assessor_status != 'ERROR', 'Assessor exited with error'

if tuner_status == 'DONE' and assessor_status == 'DONE':
break
if experiment_status:
break

if tuner_status is not None:
for line in open('/tmp/nni_tuner_result.txt'):
if line.strip() in ('DONE', 'ERROR'):
break
trial = int(line.split(' ')[0])
if trial > current_trial:
current_trial = trial
print('Trial #%d done' % trial)
if tuner_status is not None:
for line in open('tuner_result.txt'):
if line.strip() == 'ERROR':
break
trial = int(line.split(' ')[0])
if trial > current_trial:
current_trial = trial
print('Trial #%d done' % trial)

assert tuner_status == 'DONE' and assessor_status == 'DONE', 'Failed to finish in 1 min'
assert experiment_status, 'Failed to finish in 1 min'

ss1 = json.load(open('search_space.json'))
ss2 = json.load(open('/tmp/nni_tuner_search_space.json'))
assert ss1 == ss2, 'Tuner got wrong search space'
ss1 = json.load(open('search_space.json'))
ss2 = json.load(open('tuner_search_space.json'))
assert ss1 == ss2, 'Tuner got wrong search space'

tuner_result = set(open('/tmp/nni_tuner_result.txt'))
expected = set(open('expected_tuner_result.txt'))
# Trials may complete before NNI gets assessor's result,
# so it is possible to have more final result than expected
assert tuner_result.issuperset(expected), 'Bad tuner result'
# Waiting for naive_trial to report_final_result
time.sleep(2)
tuner_result = set(open('tuner_result.txt'))
expected = set(open('expected_tuner_result.txt'))
# Trials may complete before NNI gets assessor's result,
# so it is possible to have more final result than expected
assert tuner_result.issuperset(expected), 'Bad tuner result'

assessor_result = set(open('/tmp/nni_assessor_result.txt'))
expected = set(open('expected_assessor_result.txt'))
assert assessor_result == expected, 'Bad assessor result'
assessor_result = set(open('assessor_result.txt'))
expected = set(open('expected_assessor_result.txt'))
assert assessor_result == expected, 'Bad assessor result'

if __name__ == '__main__':
installed = (sys.argv[-1] != '--preinstall')

ic = Integration_test()
try:
run()
ic.run(installed)
# TODO: check the output of rest server
print(GREEN + 'PASS' + CLEAR)
except Exception as error:
print(RED + 'FAIL' + CLEAR)
print('%r' % error)
traceback.print_exc()
raise error

subprocess.run(['nnictl', 'stop', '--port', '51188'])
sys.exit(1)
finally:
subprocess.run(['nnictl', 'stop'])
2 changes: 2 additions & 0 deletions tools/nnicmd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def set_pai_config(experiment_config, port):
if not response or not response.status_code == 200:
if response is not None:
err_message = response.text
with open(STDERR_FULL_PATH, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message

#set trial_config
Expand Down

0 comments on commit 0dab726

Please sign in to comment.