Merge pull request #266 from microsoft/master

merge master
SparkSnail · Aug 13, 2020 · 14e9619 · 14e9619
2 parents 68abe2f + 3fdbbdb
commit 14e9619
Show file tree

Hide file tree

Showing 130 changed files with 9,459 additions and 1,370 deletions.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -49,9 +49,9 @@ jobs:
       sphinx-build -M html . _build -W
     displayName: 'Sphinx Documentation Build check'
 
-- job: 'ubuntu_1604_python35_legacy_torch_tf'
+- job: 'ubuntu_1804_python36_legacy_torch_tf'
   pool:
-    vmImage: 'Ubuntu 16.04'
+    vmImage: 'Ubuntu 18.04'
 
   steps:
   - script: |
@@ -141,7 +141,7 @@ jobs:
       powershell.exe -file install.ps1
     displayName: 'Install nni toolkit via source code'
   - script: |
-      python -m pip install scikit-learn==0.20.0 --user
+      python -m pip install scikit-learn==0.23.2 --user
       python -m pip install keras==2.1.6 --user
       python -m pip install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html --user
       python -m pip install tensorflow==1.15.2 --user

diff --git a/deployment/docker/Dockerfile b/deployment/docker/Dockerfile
@@ -1,12 +1,13 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
-FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
+FROM nvidia/cuda:9.2-cudnn7-runtime-ubuntu18.04
 
 LABEL maintainer='Microsoft NNI Team<nni@microsoft.com>'
 
-RUN DEBIAN_FRONTEND=noninteractive && \
-    apt-get -y update && \
+ENV DEBIAN_FRONTEND=noninteractive 
+
+RUN apt-get -y update && \
     apt-get -y install sudo \
     apt-utils \
     git \
@@ -21,7 +22,7 @@ RUN DEBIAN_FRONTEND=noninteractive && \
     openssh-client \
     openssh-server \
     lsof \
-    python3.5 \
+    python3.6 \
     python3-dev \
     python3-pip \
     python3-tk \
@@ -37,7 +38,7 @@ RUN cp /usr/bin/python3 /usr/bin/python
 #
 # update pip
 #
-RUN python3 -m pip install --upgrade pip setuptools==39.1.0
+RUN python3 -m pip install --upgrade pip==20.0.2 setuptools==39.1.0
 
 # numpy 1.14.3  scipy 1.1.0
 RUN python3 -m pip --no-cache-dir install \
@@ -46,7 +47,7 @@ RUN python3 -m pip --no-cache-dir install \
 #
 # Tensorflow 1.15
 #
-RUN python3 -m pip --no-cache-dir install tensorflow-gpu==1.15
+RUN python3 -m pip --no-cache-dir install tensorflow-gpu==1.15.0
 
 #
 # Keras 2.1.6
@@ -60,9 +61,9 @@ RUN python3 -m pip --no-cache-dir install torch==1.4.0
 RUN python3 -m pip install torchvision==0.5.0
 
 #
-# sklearn 0.20.0
+# sklearn 0.23.2
 #
-RUN python3 -m pip --no-cache-dir install scikit-learn==0.20.0
+RUN python3 -m pip --no-cache-dir install scikit-learn==0.23.2
 
 #
 # pandas==0.23.4 lightgbm==2.2.2

diff --git a/deployment/docker/README.md b/deployment/docker/README.md
@@ -11,7 +11,7 @@ scipy 1.1.0
 tensorflow-gpu 1.15.0
 keras 2.1.6
 torch 1.4.0
-scikit-learn 0.20.0
+scikit-learn 0.23.2
 pandas 0.23.4
 lightgbm 2.2.2
 nni

diff --git a/deployment/docker/README_zh_CN.md b/deployment/docker/README_zh_CN.md
@@ -47,4 +47,4 @@
 
 使用下列命令从 docker Hub 中拉取 NNI docker 映像。
 
-    docker pull msranni/nni:latest
+    docker pull msranni/nni:latest
diff --git a/deployment/pypi/setup.py b/deployment/pypi/setup.py
@@ -63,7 +63,7 @@
         'scipy',
         'coverage',
         'colorama',
-        'scikit-learn>=0.20,<0.22',
+        'scikit-learn>=0.23.2',
         'pkginfo',
         'websockets'
     ],

diff --git a/docs/en_US/CommunitySharings/ModelCompressionComparison.md b/docs/en_US/CommunitySharings/ModelCompressionComparison.md
@@ -0,0 +1,89 @@
+# Comparison of Filter Pruning Algorithms
+
+To provide an initial insight into the performance of various filter pruning algorithms, 
+we conduct extensive experiments with various pruning algorithms on some benchmark models and datasets.
+We present the experiment result in this document.
+In addition, we provide friendly instructions on the re-implementation of these experiments to facilitate further contributions to this effort.
+
+## Experiment Setting
+
+The experiments are performed with the following pruners/datasets/models:
+
+* Models: [VGG16, ResNet18, ResNet50](https://github.com/microsoft/nni/tree/master/examples/model_compress/models/cifar10)
+
+* Datasets: CIFAR-10
+
+* Pruners: 
+    - These pruners are included:
+        - Pruners with scheduling : `SimulatedAnnealing Pruner`, `NetAdapt Pruner`, `AutoCompress Pruner`.
+        Given the overal sparsity requirement, these pruners can automatically generate a sparsity distribution among different layers.
+        - One-shot pruners: `L1Filter Pruner`, `L2Filter Pruner`, `FPGM Pruner`.
+        The sparsity of each layer is set the same as the overall sparsity in this experiment.
+    - Only **filter pruning** performances are compared here. 
+
+    For the pruners with scheduling, `L1Filter Pruner` is used as the base algorithm. That is to say, after the sparsities distribution is decided by the scheduling algorithm, `L1Filter Pruner` is used to performn real pruning.
+
+    - All the pruners listed above are implemented in [nni](https://github.com/microsoft/nni/tree/master/docs/en_US/Compressor/Overview.md).
+
+## Experiment Result
+
+For each dataset/model/pruner combination, we prune the model to different levels by setting a series of target sparsities for the pruner. 
+
+Here we plot both **Number of Weights - Performances** curve and **FLOPs - Performance** curve. 
+As a reference, we also plot the result declared in the paper [AutoCompress: An Automatic DNN Structured Pruning Framework for Ultra-High Compression Rates](http://arxiv.org/abs/1907.03141) for models VGG16 and ResNet18 on CIFAR-10.
+
+The experiment result are shown in the following figures:
+
+CIFAR-10, VGG16:
+
+![](../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_vgg16.png)
+
+CIFAR-10, ResNet18:
+
+![](../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet18.png)
+
+CIFAR-10, ResNet50:
+
+![](../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet50.png)
+
+## Analysis
+
+From the experiment result, we get the following conclusions:
+
+* Given the constraint on the number of parameters, the pruners with scheduling ( `AutoCompress Pruner` , `SimualatedAnnealing Pruner` ) performs better than the others when the constraint is strict. However, they have no such advantage in FLOPs/Performances comparison since only number of parameters constraint is considered in the optimization process; 
+* The basic algorithms `L1Filter Pruner` , `L2Filter Pruner` , `FPGM Pruner` performs very similarly in these experiments; 
+* `NetAdapt Pruner` can not achieve very high compression rate. This is caused by its mechanism that it prunes only one layer each pruning iteration. This leads to un-acceptable complexity if the sparsity per iteration is much lower than the overall sparisity constraint.
+
+## Experiments Reproduction
+
+### Implementation Details
+
+* The experiment results are all collected with the default configuration of the pruners in nni, which means that when we call a pruner class in nni, we don't change any default class arguments.
+
+* Both FLOPs and the number of parameters are counted with [Model FLOPs/Parameters Counter](https://github.com/microsoft/nni/blob/master/docs/en_US/Compressor/CompressionUtils.md#model-flopsparameters-counter) after [model speed up](https://github.com/microsoft/nni/blob/master/docs/en_US/Compressor/ModelSpeedup.md). This avoids potential issues of counting them of masked models.
+
+* The experiment code can be found [here]( https://github.com/microsoft/nni/tree/master/examples/model_compress/auto_pruners_torch.py).
+
+### Experiment Result Rendering
+
+* If you follow the practice in the [example]( https://github.com/microsoft/nni/tree/master/examples/model_compress/auto_pruners_torch.py), for every single pruning experiment, the experiment result will be saved in JSON format as follows:
+    ``` json
+    {
+        "performance": {"original": 0.9298, "pruned": 0.1, "speedup": 0.1, "finetuned": 0.7746}, 
+        "params": {"original": 14987722.0, "speedup": 167089.0}, 
+        "flops": {"original": 314018314.0, "speedup": 38589922.0}
+    }
+    ```
+
+* The experiment results are saved [here](https://github.com/microsoft/nni/tree/master/examples/model_compress/experiment_data). 
+You can refer to [analyze](https://github.com/microsoft/nni/tree/master/examples/model_compress/experiment_data/analyze.py) to plot new performance comparison figures.
+
+## Contribution
+
+### TODO Items
+
+* Pruners constrained by FLOPS/latency
+* More pruning algorithms/datasets/models
+
+### Issues
+For algorithm implementation & experiment issues, please [create an issue](https://github.com/microsoft/nni/issues/new/).
diff --git a/docs/en_US/CommunitySharings/perf_compare.rst b/docs/en_US/CommunitySharings/perf_compare.rst
@@ -8,4 +8,5 @@ Performance comparison and analysis can help users decide a proper algorithm (e.
     :maxdepth: 1
 
     Neural Architecture Search Comparison <NasComparison>
-    Hyper-parameter Tuning Algorithm Comparsion <HpoComparison>
+    Hyper-parameter Tuning Algorithm Comparsion <HpoComparison>
+    Model Compression Algorithm Comparsion <ModelCompressionComparison>
diff --git a/docs/en_US/Compressor/Overview.md b/docs/en_US/Compressor/Overview.md
@@ -42,6 +42,7 @@ Pruning algorithms compress the original network by removing redundant weights o
 | [SimulatedAnnealing Pruner](https://nni.readthedocs.io/en/latest/Compressor/Pruner.html#simulatedannealing-pruner) | Automatic pruning with a guided heuristic search method, Simulated Annealing algorithm [Reference Paper](https://arxiv.org/abs/1907.03141) |
 | [AutoCompress Pruner](https://nni.readthedocs.io/en/latest/Compressor/Pruner.html#autocompress-pruner) | Automatic pruning by iteratively call SimulatedAnnealing Pruner and ADMM Pruner [Reference Paper](https://arxiv.org/abs/1907.03141) |
 
+You can refer to this [benchmark](https://github.com/microsoft/nni/tree/master/docs/en_US/Benchmark.md) for the performance of these pruners on some benchmark problems.
 
 ### Quantization Algorithms
 

diff --git a/docs/en_US/Compressor/Pruner.md b/docs/en_US/Compressor/Pruner.md
@@ -20,6 +20,8 @@ We provide several pruning algorithms that support fine-grained weight pruning a
 * [NetAdapt Pruner](#netadapt-pruner)
 * [SimulatedAnnealing Pruner](#simulatedannealing-pruner)
 * [AutoCompress Pruner](#autocompress-pruner)
+* [AutoML for Model Compression Pruner](#automl-for-model-compression-pruner)
+* [Sensitivity Pruner](#sensitivity-pruner)
 
 **Others**
 * [ADMM Pruner](#admm-pruner)
@@ -37,7 +39,7 @@ Tensorflow code
 ```python
 from nni.compression.tensorflow import LevelPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
-pruner = LevelPruner(model_graph, config_list)
+pruner = LevelPruner(model, config_list)
 pruner.compress()
 ```
 
@@ -116,17 +118,6 @@ FPGMPruner prune filters with the smallest geometric median.
 
 ### Usage
 
-Tensorflow code
-```python
-from nni.compression.tensorflow import FPGMPruner
-config_list = [{
-    'sparsity': 0.5,
-    'op_types': ['Conv2D']
-}]
-pruner = FPGMPruner(model, config_list)
-pruner.compress()
-```
-
 PyTorch code
 ```python
 from nni.compression.torch import FPGMPruner
@@ -145,11 +136,6 @@ pruner.compress()
 ..  autoclass:: nni.compression.torch.FPGMPruner
 ```
 
-##### Tensorflow
-```eval_rst
-..  autoclass:: nni.compression.tensorflow.FPGMPruner
-```
-
 ## L1Filter Pruner
 
 This is an one-shot pruner, In ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710), authors Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet and Hans Peter Graf.
@@ -382,12 +368,6 @@ You can view [example](https://github.com/microsoft/nni/blob/master/examples/mod
 ..  autoclass:: nni.compression.torch.AGPPruner
 ```
 
-##### Tensorflow
-
-```eval_rst
-..  autoclass:: nni.compression.tensorflow.AGPPruner
-```
-
 ***
 
 ## NetAdapt Pruner
@@ -497,6 +477,39 @@ You can view [example](https://github.com/microsoft/nni/blob/master/examples/mod
 ..  autoclass:: nni.compression.torch.AutoCompressPruner
 ```
 
+## AutoML for Model Compression Pruner
+
+AutoML for Model Compression Pruner (AMCPruner) leverages reinforcement learning to provide the model compression policy.
+This learning-based compression policy outperforms conventional rule-based compression policy by having higher compression ratio,
+better preserving the accuracy and freeing human labor.
+
+![](../../img/amc_pruner.jpg)
+
+For more details, please refer to [AMC: AutoML for Model Compression and Acceleration on Mobile Devices](https://arxiv.org/pdf/1802.03494.pdf).
+
+
+#### Usage
+
+PyTorch code
+
+```python
+from nni.compression.torch import AMCPruner
+config_list = [{
+        'op_types': ['Conv2d', 'Linear']
+    }]
+pruner = AMCPruner(model, config_list, evaluator, val_loader, flops_ratio=0.5)
+pruner.compress()
+```
+
+You can view [example](https://github.com/microsoft/nni/blob/master/examples/model_compress/amc/) for more information.
+
+#### User configuration for AutoCompress Pruner
+
+##### PyTorch
+
+```eval_rst
+..  autoclass:: nni.compression.torch.AMCPruner
+```
 
 ## ADMM Pruner
 Alternating Direction Method of Multipliers (ADMM) is a mathematical optimization technique,
@@ -588,3 +601,35 @@ We try to reproduce the experiment result of the fully connected network on MNIS
 ![](../../img/lottery_ticket_mnist_fc.png)
 
 The above figure shows the result of the fully connected network. `round0-sparsity-0.0` is the performance without pruning. Consistent with the paper, pruning around 80% also obtain similar performance compared to non-pruning, and converges a little faster. If pruning too much, e.g., larger than 94%, the accuracy becomes lower and convergence becomes a little slower. A little different from the paper, the trend of the data in the paper is relatively more clear.
+
+
+## Sensitivity Pruner
+For each round, SensitivityPruner prunes the model based on the sensitivity to the accuracy of each layer until meeting the final configured sparsity of the whole model:
+        1. Analyze the sensitivity of each layer in the current state of the model.
+        2. Prune each layer according to the sensitivity.
+
+For more details, please refer to [Learning both Weights and Connections for Efficient Neural Networks ](https://arxiv.org/abs/1506.02626).
+
+#### Usage
+
+PyTorch code
+
+```python
+from nni.compression.torch import SensitivityPruner
+config_list = [{
+        'sparsity': 0.5,
+        'op_types': ['Conv2d']
+    }]
+pruner = SensitivityPruner(model, config_list, finetuner=fine_tuner, evaluator=evaluator)
+# eval_args and finetune_args are the parameters passed to the evaluator and finetuner respectively
+pruner.compress(eval_args=[model], finetune_args=[model])
+```
+
+
+#### User configuration for Sensitivity Pruner
+
+##### PyTorch
+
+```eval_rst
+..  autoclass:: nni.compression.torch.SensitivityPruner
+```
diff --git a/docs/en_US/TrainingService/AMLMode.md b/docs/en_US/TrainingService/AMLMode.md
@@ -49,30 +49,34 @@ tuner:
 trial:
   command: python3 mnist.py
   codeDir: .
-  computeTarget: ${replace_to_your_computeTarget}
   image: msranni/nni
+  gpuNum: 1
 amlConfig:
   subscriptionId: ${replace_to_your_subscriptionId}
   resourceGroup: ${replace_to_your_resourceGroup}
   workspaceName: ${replace_to_your_workspaceName}
-
+  computeTarget: ${replace_to_your_computeTarget}
 ```
 
 Note: You should set `trainingServicePlatform: aml` in NNI config YAML file if you want to start experiment in aml mode.
 
 Compared with [LocalMode](LocalMode.md) trial configuration in aml mode have these additional keys:
-* computeTarget
-    * required key. The compute cluster name you want to use in your AML workspace. See Step 6.
 * image
     * required key. The docker image name used in job. The image `msranni/nni` of this example only support GPU computeTargets.
 
 amlConfig:
 * subscriptionId
-    * the subscriptionId of your account
+    * required key, the subscriptionId of your account
 * resourceGroup
-    * the resourceGroup of your account
+    * required key, the resourceGroup of your account
 * workspaceName
-    * the workspaceName of your account
+    * required key, the workspaceName of your account
+* computeTarget
+    * required key, the compute cluster name you want to use in your AML workspace. See Step 6.
+* maxTrialNumPerGpu
+    * optional key, used to specify the max concurrency trial number on a GPU device.
+* useActiveGpu
+    * optional key, used to specify whether to use a GPU if there is another process. By default, NNI will use the GPU only if there is no other active process in the GPU.
 
 The required information of amlConfig could be found in the downloaded `config.json` in Step 5.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -47,4 +47,4 @@

		使用下列命令从 docker Hub 中拉取 NNI docker 映像。

		docker pull msranni/nni:latest
		docker pull msranni/nni:latest