From c9f70a6ad4666e809e6bd0ee30e55784e9522649 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Tue, 7 Jul 2020 14:33:22 +0800 Subject: [PATCH 1/5] Fix customized tuner doc (#2628) --- docs/en_US/Tutorial/InstallCustomizedAlgos.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en_US/Tutorial/InstallCustomizedAlgos.md b/docs/en_US/Tutorial/InstallCustomizedAlgos.md index df2783df89..ff5a3d3514 100644 --- a/docs/en_US/Tutorial/InstallCustomizedAlgos.md +++ b/docs/en_US/Tutorial/InstallCustomizedAlgos.md @@ -77,7 +77,7 @@ Once you have the meta info in `setup.py`, you can build your pip installation s NNI will look for the classifier starts with `NNI Package` to retrieve the package meta information while the package being installed with `nnictl package install ` command. -Reference [customized tuner example](https://github.com/microsoft/nni/blob/master/examples/tuners/customized_tuner/README.md) for a full example. +Reference [customized tuner example](../Tuner/InstallCustomizedTuner.md) for a full example. ### 4. Install customized algorithms package into NNI From 3cffe34030b1696df10b6f7a5330ffabab7cc4ae Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Tue, 7 Jul 2020 16:17:12 +0800 Subject: [PATCH 2/5] remove gupNum in accessor (#2641) --- docs/en_US/TrainingService/FrameworkControllerMode.md | 1 - docs/en_US/TrainingService/KubeflowMode.md | 1 - examples/trials/mnist-distributed/config_kubeflow.yml | 1 - examples/trials/mnist-pytorch/config_frameworkcontroller.yml | 1 - examples/trials/mnist-tfv1/config_frameworkcontroller.yml | 1 - src/nni_manager/core/test/nnimanager.test.ts | 2 -- 6 files changed, 7 deletions(-) diff --git a/docs/en_US/TrainingService/FrameworkControllerMode.md b/docs/en_US/TrainingService/FrameworkControllerMode.md index dfa4260eea..d936ba3011 100644 --- a/docs/en_US/TrainingService/FrameworkControllerMode.md +++ b/docs/en_US/TrainingService/FrameworkControllerMode.md @@ -57,7 +57,6 @@ assessor: builtinAssessorName: Medianstop classArgs: optimize_mode: maximize - gpuNum: 0 trial: codeDir: ~/nni/examples/trials/mnist-tfv1 taskRoles: diff --git a/docs/en_US/TrainingService/KubeflowMode.md b/docs/en_US/TrainingService/KubeflowMode.md index 130dd9b3c6..9097ab7a1f 100644 --- a/docs/en_US/TrainingService/KubeflowMode.md +++ b/docs/en_US/TrainingService/KubeflowMode.md @@ -107,7 +107,6 @@ assessor: builtinAssessorName: Medianstop classArgs: optimize_mode: maximize - gpuNum: 0 trial: codeDir: . worker: diff --git a/examples/trials/mnist-distributed/config_kubeflow.yml b/examples/trials/mnist-distributed/config_kubeflow.yml index 87120319ea..96d32bf59a 100644 --- a/examples/trials/mnist-distributed/config_kubeflow.yml +++ b/examples/trials/mnist-distributed/config_kubeflow.yml @@ -18,7 +18,6 @@ assessor: builtinAssessorName: Medianstop classArgs: optimize_mode: maximize - gpuNum: 0 trial: codeDir: . worker: diff --git a/examples/trials/mnist-pytorch/config_frameworkcontroller.yml b/examples/trials/mnist-pytorch/config_frameworkcontroller.yml index e01900a8ef..6047f4daf6 100644 --- a/examples/trials/mnist-pytorch/config_frameworkcontroller.yml +++ b/examples/trials/mnist-pytorch/config_frameworkcontroller.yml @@ -18,7 +18,6 @@ assessor: builtinAssessorName: Medianstop classArgs: optimize_mode: maximize - gpuNum: 0 trial: codeDir: . taskRoles: diff --git a/examples/trials/mnist-tfv1/config_frameworkcontroller.yml b/examples/trials/mnist-tfv1/config_frameworkcontroller.yml index bf230ea872..63cdb34585 100644 --- a/examples/trials/mnist-tfv1/config_frameworkcontroller.yml +++ b/examples/trials/mnist-tfv1/config_frameworkcontroller.yml @@ -18,7 +18,6 @@ assessor: builtinAssessorName: Medianstop classArgs: optimize_mode: maximize - gpuNum: 0 trial: codeDir: . taskRoles: diff --git a/src/nni_manager/core/test/nnimanager.test.ts b/src/nni_manager/core/test/nnimanager.test.ts index a04cb8bad5..27432f3e77 100644 --- a/src/nni_manager/core/test/nnimanager.test.ts +++ b/src/nni_manager/core/test/nnimanager.test.ts @@ -49,12 +49,10 @@ describe('Unit test for nnimanager', function () { optimize_mode: 'maximize' }, checkpointDir: '', - gpuNum: 0 }, assessor: { builtinAssessorName: 'Medianstop', checkpointDir: '', - gpuNum: 1 } } From a77f52ec9efb8b9fe2b69d3c1cd5c3ddab527102 Mon Sep 17 00:00:00 2001 From: Tom Rochette Date: Tue, 7 Jul 2020 22:49:41 -0400 Subject: [PATCH 3/5] Update search space documentation (#2637) --- docs/en_US/Tutorial/SearchSpaceSpec.md | 71 +++++++++++--------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/docs/en_US/Tutorial/SearchSpaceSpec.md b/docs/en_US/Tutorial/SearchSpaceSpec.md index eb5d39315c..54264be2b5 100644 --- a/docs/en_US/Tutorial/SearchSpaceSpec.md +++ b/docs/en_US/Tutorial/SearchSpaceSpec.md @@ -4,9 +4,9 @@ In NNI, tuner will sample parameters/architecture according to the search space, which is defined as a json file. -To define a search space, users should define the name of variable, the type of sampling strategy and its parameters. +To define a search space, users should define the name of the variable, the type of sampling strategy and its parameters. -* An example of search space definition as follow: +* An example of a search space definition is as follow: ```yaml { @@ -19,9 +19,9 @@ To define a search space, users should define the name of variable, the type of ``` -Take the first line as an example. `dropout_rate` is defined as a variable whose priori distribution is a uniform distribution of a range from `0.1` and `0.5`. +Take the first line as an example. `dropout_rate` is defined as a variable whose priori distribution is a uniform distribution with a range from `0.1` to `0.5`. -Note that the ability of a search space is highly connected with your tuner. We listed the supported types for each builtin tuner below. For a customized tuner, you don't have to follow our convention and you will have the flexibility to define any type you want. +Note that the available sampling strategies within a search space depend on the tuner you want to use. We list the supported types for each builtin tuner below. For a customized tuner, you don't have to follow our convention and you will have the flexibility to define any type you want. ## Types @@ -29,73 +29,64 @@ All types of sampling strategies and their parameter are listed here: * `{"_type": "choice", "_value": options}` - * Which means the variable's value is one of the options. Here `options` should be a list of numbers or a list of strings. Using arbitrary objects as members of this list (like sublists, a mixture of numbers and strings, or null values) should work in most cases, but may trigger undefined behaviors. - * `options` could also be a nested sub-search-space, this sub-search-space takes effect only when the corresponding element is chosen. The variables in this sub-search-space could be seen as conditional variables. Here is an simple [example of nested search space definition](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nested-search-space/search_space.json). If an element in the options list is a dict, it is a sub-search-space, and for our built-in tuners you have to add a key `_name` in this dict, which helps you to identify which element is chosen. Accordingly, here is a [sample](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nested-search-space/sample.json) which users can get from nni with nested search space definition. Tuners which support nested search space are as follows: - - - Random Search - - TPE - - Anneal - - Evolution + * The variable's value is one of the options. Here `options` should be a list of numbers or a list of strings. Using arbitrary objects as members of this list (like sublists, a mixture of numbers and strings, or null values) should work in most cases, but may trigger undefined behaviors. + * `options` can also be a nested sub-search-space, this sub-search-space takes effect only when the corresponding element is chosen. The variables in this sub-search-space can be seen as conditional variables. Here is an simple [example of nested search space definition](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nested-search-space/search_space.json). If an element in the options list is a dict, it is a sub-search-space, and for our built-in tuners you have to add a `_name` key in this dict, which helps you to identify which element is chosen. Accordingly, here is a [sample](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nested-search-space/sample.json) which users can get from nni with nested search space definition. See the table below for the tuners which support nested search spaces. * `{"_type": "randint", "_value": [lower, upper]}` - * Choosing a random integer from `lower` (inclusive) to `upper` (exclusive). - * Note: Different tuners may interpret `randint` differently. Some (e.g., TPE, GridSearch) treat integers from lower - to upper as unordered ones, while others respect the ordering (e.g., SMAC). If you want all the tuners to respect + * Choosing a random integer between `lower` (inclusive) and `upper` (exclusive). + * Note: Different tuners may interpret `randint` differently. Some (e.g., TPE, GridSearch) treat integers from lower + to upper as unordered ones, while others respect the ordering (e.g., SMAC). If you want all the tuners to respect the ordering, please use `quniform` with `q=1`. * `{"_type": "uniform", "_value": [low, high]}` - * Which means the variable value is a value uniformly between low and high. + * The variable value is uniformly sampled between low and high. * When optimizing, this variable is constrained to a two-sided interval. * `{"_type": "quniform", "_value": [low, high, q]}` - * Which means the variable value is a value like `clip(round(uniform(low, high) / q) * q, low, high)`, where the clip operation is used to constraint the generated value in the bound. For example, for `_value` specified as [0, 10, 2.5], possible values are [0, 2.5, 5.0, 7.5, 10.0]; For `_value` specified as [2, 10, 5], possible values are [2, 5, 10]. - * Suitable for a discrete value with respect to which the objective is still somewhat "smooth", but which should be bounded both above and below. If you want to uniformly choose integer from a range [low, high], you can write `_value` like this: `[low, high, 1]`. + * The variable value is determined using `clip(round(uniform(low, high) / q) * q, low, high)`, where the clip operation is used to constrain the generated value within the bounds. For example, for `_value` specified as [0, 10, 2.5], possible values are [0, 2.5, 5.0, 7.5, 10.0]; For `_value` specified as [2, 10, 5], possible values are [2, 5, 10]. + * Suitable for a discrete value with respect to which the objective is still somewhat "smooth", but which should be bounded both above and below. If you want to uniformly choose an integer from a range [low, high], you can write `_value` like this: `[low, high, 1]`. * `{"_type": "loguniform", "_value": [low, high]}` - * Which means the variable value is a value drawn from a range [low, high] according to a loguniform distribution like exp(uniform(log(low), log(high))), so that the logarithm of the return value is uniformly distributed. + * The variable value is drawn from a range [low, high] according to a loguniform distribution like exp(uniform(log(low), log(high))), so that the logarithm of the return value is uniformly distributed. * When optimizing, this variable is constrained to be positive. * `{"_type": "qloguniform", "_value": [low, high, q]}` - * Which means the variable value is a value like `clip(round(loguniform(low, high) / q) * q, low, high)`, where the clip operation is used to constraint the generated value in the bound. + * The variable value is determined using `clip(round(loguniform(low, high) / q) * q, low, high)`, where the clip operation is used to constrain the generated value within the bounds. * Suitable for a discrete variable with respect to which the objective is "smooth" and gets smoother with the size of the value, but which should be bounded both above and below. * `{"_type": "normal", "_value": [mu, sigma]}` - * Which means the variable value is a real value that's normally-distributed with mean mu and standard deviation sigma. When optimizing, this is an unconstrained variable. + * The variable value is a real value that's normally-distributed with mean mu and standard deviation sigma. When optimizing, this is an unconstrained variable. * `{"_type": "qnormal", "_value": [mu, sigma, q]}` - * Which means the variable value is a value like `round(normal(mu, sigma) / q) * q` + * The variable value is determined using `round(normal(mu, sigma) / q) * q` * Suitable for a discrete variable that probably takes a value around mu, but is fundamentally unbounded. * `{"_type": "lognormal", "_value": [mu, sigma]}` - * Which means the variable value is a value drawn according to `exp(normal(mu, sigma))` so that the logarithm of the return value is normally distributed. When optimizing, this variable is constrained to be positive. + * The variable value is drawn according to `exp(normal(mu, sigma))` so that the logarithm of the return value is normally distributed. When optimizing, this variable is constrained to be positive. * `{"_type": "qlognormal", "_value": [mu, sigma, q]}` - * Which means the variable value is a value like `round(exp(normal(mu, sigma)) / q) * q` + * The variable value is determined using `round(exp(normal(mu, sigma)) / q) * q` * Suitable for a discrete variable with respect to which the objective is smooth and gets smoother with the size of the variable, which is bounded from one side. - ## Search Space Types Supported by Each Tuner -| | choice | randint | uniform | quniform | loguniform | qloguniform | normal | qnormal | lognormal | qlognormal | -|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:| -| TPE Tuner | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | -| Random Search Tuner| ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | -| Anneal Tuner | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | -| Evolution Tuner | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | -| SMAC Tuner | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | | -| Batch Tuner | ✓ | | | | | | | | | | -| Grid Search Tuner | ✓ | ✓ | | ✓ | | | | | | | -| Hyperband Advisor | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | -| Metis Tuner | ✓ | ✓ | ✓ | ✓ | | | | | | | -| GP Tuner | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | - +| | choice | choice(nested) | randint | uniform | quniform | loguniform | qloguniform | normal | qnormal | lognormal | qlognormal | +|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:| +| TPE Tuner | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| Random Search Tuner| ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| Anneal Tuner | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| Evolution Tuner | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| SMAC Tuner | ✓ | | ✓ | ✓ | ✓ | ✓ | | | | | | +| Batch Tuner | ✓ | | | | | | | | | | | +| Grid Search Tuner | ✓ | | ✓ | | ✓ | | | | | | | +| Hyperband Advisor | ✓ | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| Metis Tuner | ✓ | | ✓ | ✓ | ✓ | | | | | | | +| GP Tuner | ✓ | | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | Known Limitations: -* GP Tuner and Metis Tuner support only **numerical values** in search space (`choice` type values can be no-numeraical with other tuners, e.g. string values). Both GP Tuner and Metis Tuner use Gaussian Process Regressor(GPR). GPR make predictions based on a kernel function and the 'distance' between different points, it's hard to get the true distance between no-numerical values. +* GP Tuner and Metis Tuner support only **numerical values** in search space (`choice` type values can be no-numerical with other tuners, e.g. string values). Both GP Tuner and Metis Tuner use Gaussian Process Regressor(GPR). GPR make predictions based on a kernel function and the 'distance' between different points, it's hard to get the true distance between no-numerical values. * Note that for nested search space: * Only Random Search/TPE/Anneal/Evolution tuner supports nested search space - - * We do not support nested search space "Hyper Parameter" in visualization now, the enhancement is being considered in [#1110](https://github.com/microsoft/nni/issues/1110), any suggestions or discussions or contributions are warmly welcomed From 688feedbde9d19147ac80f7a59a647ca3778fb99 Mon Sep 17 00:00:00 2001 From: Tom Rochette Date: Wed, 8 Jul 2020 02:34:45 -0400 Subject: [PATCH 4/5] Typo fixes in the documentation (#2638) --- .../{HpoComparision.md => HpoComparison.md} | 0 .../{NasComparision.md => NasComparison.md} | 0 docs/en_US/CommunitySharings/community_sharings.rst | 4 ++-- docs/en_US/Release.md | 10 +++++----- docs/en_US/Tuner/BuiltinTuner.md | 6 +++--- docs/en_US/contents.rst | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) rename docs/en_US/CommunitySharings/{HpoComparision.md => HpoComparison.md} (100%) rename docs/en_US/CommunitySharings/{NasComparision.md => NasComparison.md} (100%) diff --git a/docs/en_US/CommunitySharings/HpoComparision.md b/docs/en_US/CommunitySharings/HpoComparison.md similarity index 100% rename from docs/en_US/CommunitySharings/HpoComparision.md rename to docs/en_US/CommunitySharings/HpoComparison.md diff --git a/docs/en_US/CommunitySharings/NasComparision.md b/docs/en_US/CommunitySharings/NasComparison.md similarity index 100% rename from docs/en_US/CommunitySharings/NasComparision.md rename to docs/en_US/CommunitySharings/NasComparison.md diff --git a/docs/en_US/CommunitySharings/community_sharings.rst b/docs/en_US/CommunitySharings/community_sharings.rst index 23431301c1..ae1f1f1099 100644 --- a/docs/en_US/CommunitySharings/community_sharings.rst +++ b/docs/en_US/CommunitySharings/community_sharings.rst @@ -9,8 +9,8 @@ In addtion to the official tutorilas and examples, we encourage community contri NNI in Recommenders Automatically tuning SPTAG with NNI - Neural Architecture Search Comparison - Hyper-parameter Tuning Algorithm Comparsion + Neural Architecture Search Comparison + Hyper-parameter Tuning Algorithm Comparison Parallelizing Optimization for TPE Automatically tune systems with NNI NNI review article from Zhihu: - By Garvin Li diff --git a/docs/en_US/Release.md b/docs/en_US/Release.md index 412b1c7533..4aecec1fc1 100644 --- a/docs/en_US/Release.md +++ b/docs/en_US/Release.md @@ -139,7 +139,7 @@ * [BNN Quantizer](https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Quantizer.md#bnn-quantizer) #### Training Service * NFS Support for PAI - + Instead of using HDFS as default storage, since OpenPAI v0.11, OpenPAI can have NFS or AzureBlob or other storage as default storage. In this release, NNI extended the support for this recent change made by OpenPAI, and could integrate with OpenPAI v0.11 or later version with various default storage. * Kubeflow update adoption @@ -273,11 +273,11 @@ ### Major Features * General NAS programming interface * Add `enas-mode` and `oneshot-mode` for NAS interface: [PR #1201](https://github.com/microsoft/nni/pull/1201#issue-291094510) -* [Gaussian Process Tuner with Matern kernel](Tuner/GPTuner.md) +* [Gaussian Process Tuner with Matern kernel](Tuner/GPTuner.md) * (deprecated) Multiphase experiment supports * Added new training service support for multiphase experiment: PAI mode supports multiphase experiment since v0.9. - * Added multiphase capability for the following builtin tuners: + * Added multiphase capability for the following builtin tuners: * TPE, Random Search, Anneal, Naïve Evolution, SMAC, Network Morphism, Metis Tuner. * Web Portal @@ -326,8 +326,8 @@ * Fix bug of table entries * Nested search space refinement * Refine 'randint' type and support lower bound -* [Comparison of different hyper-parameter tuning algorithm](CommunitySharings/HpoComparision.md) -* [Comparison of NAS algorithm](CommunitySharings/NasComparision.md) +* [Comparison of different hyper-parameter tuning algorithm](CommunitySharings/HpoComparison.md) +* [Comparison of NAS algorithm](CommunitySharings/NasComparison.md) * [NNI practice on Recommenders](CommunitySharings/RecommendersSvd.md) ## Release 0.7 - 4/29/2018 diff --git a/docs/en_US/Tuner/BuiltinTuner.md b/docs/en_US/Tuner/BuiltinTuner.md index 1c3f77c825..48daa0a28d 100644 --- a/docs/en_US/Tuner/BuiltinTuner.md +++ b/docs/en_US/Tuner/BuiltinTuner.md @@ -2,7 +2,7 @@ NNI provides state-of-the-art tuning algorithms as part of our built-in tuners and makes them easy to use. Below is the brief summary of NNI's current built-in tuners: -Note: Click the **Tuner's name** to get the Tuner's installation requirements, suggested scenario, and an example configuration. A link for a detailed description of each algorithm is located at the end of the suggested scenario for each tuner. Here is an [article](../CommunitySharings/HpoComparision.md) comparing different Tuners on several problems. +Note: Click the **Tuner's name** to get the Tuner's installation requirements, suggested scenario, and an example configuration. A link for a detailed description of each algorithm is located at the end of the suggested scenario for each tuner. Here is an [article](../CommunitySharings/HpoComparison.md) comparing different Tuners on several problems. Currently, we support the following algorithms: @@ -218,7 +218,7 @@ The search space file should include the high-level key `combine_params`. The ty **Suggested scenario** -Note that the only acceptable types within the search space are `choice`, `quniform`, and `randint`. +Note that the only acceptable types within the search space are `choice`, `quniform`, and `randint`. This is suggested when the search space is small. It's suggested when it is feasible to exhaustively sweep the whole search space. [Detailed Description](./GridsearchTuner.md) @@ -388,7 +388,7 @@ As a strategy in a Sequential Model-based Global Optimization (SMBO) algorithm, **classArgs Requirements:** * **optimize_mode** (*'maximize' or 'minimize', optional, default = 'maximize'*) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics. -* **utility** (*'ei', 'ucb' or 'poi', optional, default = 'ei'*) - The utility function (acquisition function). 'ei', 'ucb', and 'poi' correspond to 'Expected Improvement', 'Upper Confidence Bound', and 'Probability of Improvement', respectively. +* **utility** (*'ei', 'ucb' or 'poi', optional, default = 'ei'*) - The utility function (acquisition function). 'ei', 'ucb', and 'poi' correspond to 'Expected Improvement', 'Upper Confidence Bound', and 'Probability of Improvement', respectively. * **kappa** (*float, optional, default = 5*) - Used by the 'ucb' utility function. The bigger `kappa` is, the more exploratory the tuner will be. * **xi** (*float, optional, default = 0*) - Used by the 'ei' and 'poi' utility functions. The bigger `xi` is, the more exploratory the tuner will be. * **nu** (*float, optional, default = 2.5*) - Used to specify the Matern kernel. The smaller nu, the less smooth the approximated function is. diff --git a/docs/en_US/contents.rst b/docs/en_US/contents.rst index d40792a74a..afc7a0d09b 100644 --- a/docs/en_US/contents.rst +++ b/docs/en_US/contents.rst @@ -18,5 +18,5 @@ Neural Network Intelligence References Community Sharings FAQ - How to Contribution - Changelog \ No newline at end of file + How to Contribute + Changelog From 51aebf18ec5d45ac8c93d4fb7eb4ad01787c4fc1 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Wed, 8 Jul 2020 21:25:02 +0800 Subject: [PATCH 5/5] Fix aml doc (#2631) --- README.md | 3 ++- docs/en_US/TrainingService/Overview.md | 15 ++++++++------- docs/en_US/TrainingService/PaiMode.md | 3 +-- docs/en_US/_templates/index.html | 2 ++ 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a9dd388482..b65e3f4418 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ **NNI (Neural Network Intelligence)** is a lightweight but powerful toolkit to help users **automate** Feature Engineering, Neural Architecture Search, Hyperparameter Tuning and Model Compression. -The tool manages automated machine learning (AutoML) experiments, **dispatches and runs** experiments' trial jobs generated by tuning algorithms to search the best neural architecture and/or hyper-parameters in **different training environments** like Local Machine, Remote Servers, OpenPAI, Kubeflow, FrameworkController on K8S (AKS etc.), DLWorkspace (aka. DLTS) and other cloud options. +The tool manages automated machine learning (AutoML) experiments, **dispatches and runs** experiments' trial jobs generated by tuning algorithms to search the best neural architecture and/or hyper-parameters in **different training environments** like Local Machine, Remote Servers, OpenPAI, Kubeflow, FrameworkController on K8S (AKS etc.), DLWorkspace (aka. DLTS), AML (Azure Machine Learning) and other cloud options. ## **Who should consider using NNI** @@ -170,6 +170,7 @@ Within the following table, we summarized the current NNI capabilities, we are g
  • Local Machine
  • Remote Servers
  • +
  • AML(Azure Machine Learning)
  • Kubernetes based services
    • OpenPAI
    • Kubeflow
    • diff --git a/docs/en_US/TrainingService/Overview.md b/docs/en_US/TrainingService/Overview.md index 77e46fafdf..c75cfc7a58 100644 --- a/docs/en_US/TrainingService/Overview.md +++ b/docs/en_US/TrainingService/Overview.md @@ -4,7 +4,7 @@ NNI training service is designed to allow users to focus on AutoML itself, agnostic to the underlying computing infrastructure where the trials are actually run. When migrating from one cluster to another (e.g., local machine to Kubeflow), users only need to tweak several configurations, and the experiment can be easily scaled. -Users can use training service provided by NNI, to run trial jobs on [local machine](./LocalMode.md), [remote machines](./RemoteMachineMode.md), and on clusters like [PAI](./PaiMode.md), [Kubeflow](./KubeflowMode.md) and [FrameworkController](./FrameworkControllerMode.md). These are called *built-in training services*. +Users can use training service provided by NNI, to run trial jobs on [local machine](./LocalMode.md), [remote machines](./RemoteMachineMode.md), and on clusters like [PAI](./PaiMode.md), [Kubeflow](./KubeflowMode.md), [FrameworkController](./FrameworkControllerMode.md), [DLTS](./DLTSMode.md) and [AML](./AMLMode.md). These are called *built-in training services*. If the computing resource customers try to use is not listed above, NNI provides interface that allows users to build their own training service easily. Please refer to "[how to implement training service](./HowToImplementTrainingService)" for details. @@ -20,12 +20,13 @@ In case users intend to use large files in their experiment (like large-scaled d |TrainingService|Brief Introduction| |---|---| -|[__Local__](./LocalMode.html)|NNI supports running an experiment on local machine, called local mode. Local mode means that NNI will run the trial jobs and nniManager process in same machine, and support gpu schedule function for trial jobs.| -|[__Remote__](./RemoteMachineMode.html)|NNI supports running an experiment on multiple machines through SSH channel, called remote mode. NNI assumes that you have access to those machines, and already setup the environment for running deep learning training code. NNI will submit the trial jobs in remote machine, and schedule suitable machine with enough gpu resource if specified.| -|[__PAI__](./PaiMode.html)|NNI supports running an experiment on [OpenPAI](https://github.com/Microsoft/pai) (aka PAI), called PAI mode. Before starting to use NNI PAI mode, you should have an account to access an [OpenPAI](https://github.com/Microsoft/pai) cluster. See [here](https://github.com/Microsoft/pai#how-to-deploy) if you don't have any OpenPAI account and want to deploy an OpenPAI cluster. In PAI mode, your trial program will run in PAI's container created by Docker.| -|[__Kubeflow__](./KubeflowMode.html)|NNI supports running experiment on [Kubeflow](https://github.com/kubeflow/kubeflow), called kubeflow mode. Before starting to use NNI kubeflow mode, you should have a Kubernetes cluster, either on-premises or [Azure Kubernetes Service(AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service/), a Ubuntu machine on which [kubeconfig](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/) is setup to connect to your Kubernetes cluster. If you are not familiar with Kubernetes, [here](https://kubernetes.io/docs/tutorials/kubernetes-basics/) is a good start. In kubeflow mode, your trial program will run as Kubeflow job in Kubernetes cluster.| -|[__FrameworkController__](./FrameworkControllerMode.html)|NNI supports running experiment using [FrameworkController](https://github.com/Microsoft/frameworkcontroller), called frameworkcontroller mode. FrameworkController is built to orchestrate all kinds of applications on Kubernetes, you don't need to install Kubeflow for specific deep learning framework like tf-operator or pytorch-operator. Now you can use FrameworkController as the training service to run NNI experiment.| -|[__DLTS__](./DLTSMode.html)|NNI supports running experiment using [DLTS](https://github.com/microsoft/DLWorkspace.git), which is an open source toolkit, developed by Microsoft, that allows AI scientists to spin up an AI cluster in turn-key fashion.| +|[__Local__](./LocalMode.md)|NNI supports running an experiment on local machine, called local mode. Local mode means that NNI will run the trial jobs and nniManager process in same machine, and support gpu schedule function for trial jobs.| +|[__Remote__](./RemoteMachineMode.md)|NNI supports running an experiment on multiple machines through SSH channel, called remote mode. NNI assumes that you have access to those machines, and already setup the environment for running deep learning training code. NNI will submit the trial jobs in remote machine, and schedule suitable machine with enough gpu resource if specified.| +|[__PAI__](./PaiMode.md)|NNI supports running an experiment on [OpenPAI](https://github.com/Microsoft/pai) (aka PAI), called PAI mode. Before starting to use NNI PAI mode, you should have an account to access an [OpenPAI](https://github.com/Microsoft/pai) cluster. See [here](https://github.com/Microsoft/pai#how-to-deploy) if you don't have any OpenPAI account and want to deploy an OpenPAI cluster. In PAI mode, your trial program will run in PAI's container created by Docker.| +|[__Kubeflow__](./KubeflowMode.md)|NNI supports running experiment on [Kubeflow](https://github.com/kubeflow/kubeflow), called kubeflow mode. Before starting to use NNI kubeflow mode, you should have a Kubernetes cluster, either on-premises or [Azure Kubernetes Service(AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service/), a Ubuntu machine on which [kubeconfig](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/) is setup to connect to your Kubernetes cluster. If you are not familiar with Kubernetes, [here](https://kubernetes.io/docs/tutorials/kubernetes-basics/) is a good start. In kubeflow mode, your trial program will run as Kubeflow job in Kubernetes cluster.| +|[__FrameworkController__](./FrameworkControllerMode.md)|NNI supports running experiment using [FrameworkController](https://github.com/Microsoft/frameworkcontroller), called frameworkcontroller mode. FrameworkController is built to orchestrate all kinds of applications on Kubernetes, you don't need to install Kubeflow for specific deep learning framework like tf-operator or pytorch-operator. Now you can use FrameworkController as the training service to run NNI experiment.| +|[__DLTS__](./DLTSMode.md)|NNI supports running experiment using [DLTS](https://github.com/microsoft/DLWorkspace.git), which is an open source toolkit, developed by Microsoft, that allows AI scientists to spin up an AI cluster in turn-key fashion.| +|[__AML__](./AMLMode.md)|NNI supports running an experiment on [AML](https://azure.microsoft.com/en-us/services/machine-learning/) , called aml mode. ## What does Training Service do? diff --git a/docs/en_US/TrainingService/PaiMode.md b/docs/en_US/TrainingService/PaiMode.md index a14ae25798..02f696da95 100644 --- a/docs/en_US/TrainingService/PaiMode.md +++ b/docs/en_US/TrainingService/PaiMode.md @@ -28,7 +28,6 @@ For example, use the following command: ```bash sudo mount -t nfs4 gcr-openpai-infra02:/pai/data /local/mnt ``` - Then the `/data` folder in container will be mounted to `/local/mnt` folder in your local machine. You could use the following configuration in your NNI's config file: @@ -87,7 +86,7 @@ paiConfig: reuse: true ``` -Note: You should set `trainingServicePlatform: pai` in NNI config YAML file if you want to start experiment in pai mode. +Note: You should set `trainingServicePlatform: pai` in NNI config YAML file if you want to start experiment in pai mode. The host field in configuration file is PAI's job submission page uri, like `10.10.5.1`, the default http protocol in NNI is `http`, if your PAI's cluster enabled https, please use the uri in `https://10.10.5.1` format. ### Trial configurations diff --git a/docs/en_US/_templates/index.html b/docs/en_US/_templates/index.html index 5cc8257298..7a405267c3 100644 --- a/docs/en_US/_templates/index.html +++ b/docs/en_US/_templates/index.html @@ -25,6 +25,7 @@ Kubeflow, FrameworkController on K8S (AKS etc.) DLWorkspace (aka. DLTS) + AML (Azure Machine Learning) and other cloud options.

      @@ -197,6 +198,7 @@

      NNI capabilities in a glance

      (AKSetc.)
    • DLWorkspace (aka. DLTS)
    • +
    • AML (Azure Machine Learning)