Skip to content

Commit

Permalink
Merge pull request #13 from Microsoft/master
Browse files Browse the repository at this point in the history
merge master
  • Loading branch information
SparkSnail authored Sep 27, 2018
2 parents 334b0a4 + 36b583b commit efe93df
Show file tree
Hide file tree
Showing 46 changed files with 2,233 additions and 284 deletions.
26 changes: 13 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ SHELL := /bin/bash
PIP_INSTALL := python3 -m pip install
PIP_UNINSTALL := python3 -m pip uninstall

## Colorful output
_INFO := $(shell echo -e '\e[1;36m')
_WARNING := $(shell echo -e '\e[1;33m')
_END := $(shell echo -e '\e[0m')


## Install directories
ifeq ($(shell id -u), 0) # is root
_ROOT := 1
Expand Down Expand Up @@ -37,41 +43,37 @@ SERVE_PATH ?= $(INSTALL_PREFIX)/nni/serve

## Check if dependencies have been installed globally
ifeq (, $(shell command -v node 2>/dev/null))
$(info Node.js not found)
$(info $(_INFO) Node.js not found $(_END))
_MISS_DEPS := 1 # node not found
else
_VER := $(shell node --version)
_NEWER := $(shell echo -e "$(NODE_VERSION)\n$(_VER)" | sort -Vr | head -n 1)
ifneq ($(_VER), $(_NEWER))
$(info Node.js version not match)
$(info $(_INFO) Node.js version not match $(_END))
_MISS_DEPS := 1 # node outdated
endif
endif
ifeq (, $(shell command -v yarnpkg 2>/dev/null))
$(info Yarn not found)
$(info $(_INFO) Yarn not found $(_END))
_MISS_DEPS := 1 # yarn not found
endif
ifeq (, $(shell command -v serve 2>/dev/null))
$(info Serve not found)
$(info $(_INFO) Serve not found $(_END))
_MISS_DEPS := 1 # serve not found
endif

ifdef _MISS_DEPS
$(info Missing dependencies, use local toolchain)
$(info $(_INFO) Missing dependencies, use local toolchain $(_END))
NODE := $(NODE_PATH)/bin/node
YARN := PATH=$${PATH}:$(NODE_PATH)/bin $(YARN_PATH)/bin/yarn
SERVE := $(SERVE_PATH)/serve
else
$(info All dependencies found, use global toolchain)
$(info $(_INFO) All dependencies found, use global toolchain $(_END))
NODE := node
YARN := yarnpkg
SERVE := serve
endif

## Colorful output
_INFO := $(shell echo -e '\e[1;36m')
_WARNING := $(shell echo -e '\e[1;33m')
_END := $(shell echo -e '\e[0m')

# Setting variables end

Expand All @@ -92,7 +94,6 @@ build:
#$(_INFO) Building nnictl $(_END)
cd tools && python3 setup.py build


# Standard installation target
# Must be invoked after building
.PHONY: install
Expand Down Expand Up @@ -207,7 +208,6 @@ install-python-modules:
#$(_INFO) Installing nnictl $(_END)
cd tools && python3 setup.py install $(PIP_MODE)


.PHONY: install-node-modules
install-node-modules:
mkdir -p $(INSTALL_PREFIX)/nni
Expand All @@ -227,7 +227,7 @@ install-dev-modules:

#$(_INFO) Installing nnictl $(_END)
cd tools && $(PIP_INSTALL) $(PIP_MODE) -e .

mkdir -p $(INSTALL_PREFIX)/nni

#$(_INFO) Installing NNI Manager $(_END)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pip Installation Prerequisites
* git, wget

```
pip3 install -v --user git+https://github.com/Microsoft/nni.git@v0.1
python3 -m pip install -v --user git+https://github.com/Microsoft/nni.git@v0.1
source ~/.bashrc
```

Expand Down
12 changes: 2 additions & 10 deletions docs/GetStarted.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
git
wget

python pip should also be correctly installed. You could use "which pip" or "pip -V" to check in Linux.
python pip should also be correctly installed. You could use "python3 -m pip -V" to check in Linux.

* Note: we don't support virtual environment in current releases.

* __Install NNI through pip__

pip3 install -v --user git+https://github.com/Microsoft/nni.git@v0.1
python3 -m pip install -v --user git+https://github.com/Microsoft/nni.git@v0.1
source ~/.bashrc

* __Install NNI through source code__
Expand All @@ -24,14 +24,6 @@
chmod +x install.sh
source install.sh


* __Install NNI for all users__

sudo pip3 install -v --user git+https://github.com/Microsoft/nni.git@v0.1

* Note: NNI will be installed to `/usr/share/nni` for all users and to `~/.local/nni` for current user. Respectively, the examples will be copied to `/usr/share/nni/examples` or `~/.local/nni/examples`.
* The following tutorial assumes that NNI is installed for current user.

## **Quick start: run a customized experiment**
An experiment is to run multiple trial jobs, each trial job tries a configuration which includes a specific neural architecture (or model) and hyper-parameter values. To run an experiment through NNI, you should:

Expand Down
15 changes: 15 additions & 0 deletions docs/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
# Release 0.2.0 - 9/29/2018
## Major Features
* Support for [OpenPAI](https://github.com/Microsoft/pai) (aka pai) Training Service
* Support training services on pai mode. NNI trials will be scheduled to run on OpenPAI cluster
* NNI trial's output (including logs and model file) will be copied to OpenPAI HDFS for further debugging and checking
* Support [SMAC](https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf) tuner
* [SMAC](https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf) is based on Sequential Model-Based Optimization (SMBO). It adapts the most prominent previously used model class (Gaussian stochastic process models) and introduces the model class of random forests to SMBO to handle categorical parameters. The SMAC supported by NNI is a wrapper on [SMAC3](https://github.com/automl/SMAC3)
* Support NNI installation on [conda](https://conda.io/docs/index.html) and python virtual environment
* Others
* Update ga squad example and related documentation
* WebUI UX small enhancement and bug fix

## Known Issues
[Known Issues in release 0.2.0](https://github.com/Microsoft/nni/labels/nni020knownissues).

# Release 0.1.0 - 9/10/2018 (initial release)

Initial release of Neural Network Intelligence (NNI).
Expand Down
2 changes: 1 addition & 1 deletion docs/RemoteMachineMode.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ For remote machines that are used only to run trials but not the nnictl, you can

* __Install python SDK through pip__

pip3 install --user git+https://github.com/Microsoft/NeuralNetworkIntelligence.git#subdirectory=src/sdk/pynni
python3 -m pip install --user git+https://github.com/Microsoft/NeuralNetworkIntelligence.git#subdirectory=src/sdk/pynni

* __Install python SDK through source code__

Expand Down
9 changes: 6 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def run(self):

setup(
name = 'NNI',
version = '0.1.0',
version = '0.2.0',
author = 'Microsoft NNI Team',
author_email = 'nni@microsoft.com',
description = 'Neural Network Intelligence project',
Expand All @@ -47,8 +47,10 @@ def run(self):
package_dir = {
'nni_annotation': 'tools/nni_annotation',
'nni': 'src/sdk/pynni/nni',
'nnicmd': 'tools/nnicmd'
'nnicmd': 'tools/nnicmd',
'trial_tool':'tools/trial_tool'
},
package_data = {'nni': ['**/requirements.txt']},
python_requires = '>=3.5',
install_requires = [
'astor',
Expand All @@ -59,7 +61,8 @@ def run(self):
'pyyaml',
'requests',
'scipy',
'schema'
'schema',
'pyhdfs'
],

cmdclass={
Expand Down
18 changes: 16 additions & 2 deletions src/nni_manager/common/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -225,5 +225,19 @@ function cleanupUnitTest(): void {
Container.restore(ExperimentStartupInfo);
}

export { getMsgDispatcherCommand, getLogDir, getExperimentRootDir, getDefaultDatabaseDir, mkDirP, delay, prepareUnitTest,
parseArg, cleanupUnitTest, uniqueString, randomSelect };
/**
* Get IPv4 address of current machine
*/
function getIPV4Address(): string {
let ipv4Address : string = '';

for(const item of os.networkInterfaces().eth0) {
if(item.family === 'IPv4') {
ipv4Address = item.address;
}
}
return ipv4Address;
}

export { getMsgDispatcherCommand, getLogDir, getExperimentRootDir, getDefaultDatabaseDir, getIPV4Address,
mkDirP, delay, prepareUnitTest, parseArg, cleanupUnitTest, uniqueString, randomSelect };
9 changes: 4 additions & 5 deletions src/nni_manager/core/nniDataStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,11 @@ class NNIDataStore implements DataStore {

private async getFinalMetricData(trialJobId: string): Promise<any> {
const metrics: MetricDataRecord[] = await this.getMetricData(trialJobId, 'FINAL');
assert(metrics.length <= 1);
if (metrics.length === 1) {
return metrics[0];
} else {
return undefined;
if (metrics.length > 1) {
this.log.error(`Found multiple final results for trial job: ${trialJobId}`);
}

return metrics[0];
}

private getJobStatusByLatestEvent(event: TrialJobEvent): TrialJobStatus {
Expand Down
7 changes: 5 additions & 2 deletions src/nni_manager/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import { LocalTrainingServiceForGPU } from './training_service/local/localTraini
import {
RemoteMachineTrainingService
} from './training_service/remote_machine/remoteMachineTrainingService';
import { PAITrainingService } from './training_service/pai/paiTrainingService'


function initStartupInfo(startExpMode: string, resumeExperimentId: string) {
Expand All @@ -49,6 +50,8 @@ async function initContainer(platformMode: string): Promise<void> {
Container.bind(TrainingService).to(LocalTrainingServiceForGPU).scope(Scope.Singleton);
} else if (platformMode === 'remote') {
Container.bind(TrainingService).to(RemoteMachineTrainingService).scope(Scope.Singleton);
} else if (platformMode === 'pai'){
Container.bind(TrainingService).to(PAITrainingService).scope(Scope.Singleton);
} else {
throw new Error(`Error: unsupported mode: ${mode}`);
}
Expand All @@ -61,7 +64,7 @@ async function initContainer(platformMode: string): Promise<void> {
}

function usage(): void {
console.info('usage: node main.js --port <port> --mode <local/remote> --start_mode <new/resume> --experiment_id <id>');
console.info('usage: node main.js --port <port> --mode <local/remote/pai> --start_mode <new/resume> --experiment_id <id>');
}

let port: number = NNIRestServer.DEFAULT_PORT;
Expand All @@ -71,7 +74,7 @@ if (strPort && strPort.length > 0) {
}

const mode: string = parseArg(['--mode', '-m']);
if (!['local', 'remote'].includes(mode)) {
if (!['local', 'remote', 'pai'].includes(mode)) {
usage();
process.exit(1);
}
Expand Down
4 changes: 3 additions & 1 deletion src/nni_manager/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
"tree-kill": "^1.2.0",
"ts-deferred": "^1.0.4",
"typescript-ioc": "^1.2.4",
"typescript-string-operations": "^1.3.1"
"typescript-string-operations": "^1.3.1",
"webhdfs":"^1.2.0"
},
"devDependencies": {
"@types/chai": "^4.1.4",
Expand All @@ -40,6 +41,7 @@
"chai": "^4.1.2",
"mocha": "^5.2.0",
"request": "^2.87.0",
"rmdir": "^1.2.0",
"tmp": "^0.0.33",
"ts-node": "^7.0.0",
"tslint": "^5.11.0",
Expand Down
14 changes: 12 additions & 2 deletions src/nni_manager/rest_server/restValidationSchemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,19 @@ export namespace ValidationSchemas {
passphrase: joi.string()
})),
trial_config: joi.object({
gpuNum: joi.number().min(0).required(),
image: joi.string().min(1),
codeDir: joi.string().min(1).required(),
command: joi.string().min(1).required()
dataDir: joi.string(),
outputDir: joi.string(),
cpuNum: joi.number().min(1),
memoryMB: joi.number().min(100),
gpuNum: joi.number().min(0).required(),
command: joi.string().min(1).required()
}),
pai_config: joi.object({
userName: joi.string().min(1).required(),
passWord: joi.string().min(1).required(),
host: joi.string().min(1).required()
})
}
};
Expand Down
37 changes: 37 additions & 0 deletions src/nni_manager/training_service/common/jobMetrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* Copyright (c) Microsoft Corporation
* All rights reserved.
*
* MIT License
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
* documentation files (the "Software"), to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
* to permit persons to whom the Software is furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
* BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

'use strict';

import { TrialJobStatus } from '../../common/trainingService';

// tslint:disable-next-line:max-classes-per-file
export class JobMetrics {
public readonly jobId: string;
public readonly metrics: string[];
public readonly jobStatus: TrialJobStatus;
public readonly endTimestamp: number;

constructor(jobId : string, metrics : string[], jobStatus : TrialJobStatus, endTimestamp : number) {
this.jobId = jobId;
this.metrics = metrics;
this.jobStatus = jobStatus;
this.endTimestamp = endTimestamp;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ export enum TrialConfigMetadataKey {
MACHINE_LIST = 'machine_list',
TRIAL_CONFIG = 'trial_config',
EXPERIMENT_ID = 'experimentId',
RANDOM_SCHEDULER = 'random_scheduler'
RANDOM_SCHEDULER = 'random_scheduler',
PAI_CLUSTER_CONFIG = 'pai_config'
}
Loading

0 comments on commit efe93df

Please sign in to comment.