Switch to cached op in the testing suite (apache#18579)

* add default ctx to cachedop fwd * add test * perl fix * initial commit * update sparse tests * add aux_states * fix aux-state type * fix some tests * fix check symbolic forwrad/backward * fix symbolic grad check * arg_dict fixes * support init ops * support forward only graph * fix check symbolic backward stype * add missing file * replace extension test bind * replace bind with _bind * simplify backward_mul implementation * small fix * drop contrib.sparseembedding * remove simple_bind in test sparse ops * use simple_bind * replave simple bind in quantization * fix aux index * update amp simple_bind calls * drop ifft * fix a bug found in subgraph op * add aux_array method * replace symbols * minor fix * fix executor default context * fix import * bug fix for nd.where * add subgraph test * fix forward grad req * fix batch dot dtype * remove unused code * fix slice dtype * fix attach grad * remove tests for non-existing sparse ops * MXCachedOpGetOptimizedSymbol * fix foreach test * enhance err msg * skip failed test * add docs * add docs * fix lint * fix lint, remove quantization * fix lint * fix lint * fix lint * fix build and import * fix import * fix perl call * fix test * remove perl binding * remove reshape test * fix profiler, trt * remove tensorrt test * remove quantization tests * fix import * fix conflcit * fix lint * skip buggy test Co-authored-by: EC2 Default User <ec2-user@ip-172-31-81-80.ec2.internal> Co-authored-by: Lin <haibilin@a483e7be4c92.ant.amazon.com>
chinakook · Nov 17, 2020 · 39d8a6a · 39d8a6a
1 parent 15c6079
commit 39d8a6a
Show file tree

Hide file tree

Showing 260 changed files with 852 additions and 58,459 deletions.
diff --git a/cd/mxnet_lib/mxnet_lib_pipeline.groovy b/cd/mxnet_lib/mxnet_lib_pipeline.groovy
@@ -42,16 +42,6 @@ def get_pipeline(mxnet_variant, build_fn) {
           }
         }
 
-        if (mxnet_variant.startsWith('cu')) {
-          tests["${mxnet_variant}: Quantization Python 3"] = {
-            stage("${mxnet_variant}: Quantization Python 3") {
-              timeout(time: max_time, unit: 'MINUTES') {
-                test_gpu_quantization_py3(mxnet_variant)
-              }
-            }
-          }
-        }
-
         parallel tests
       }
 
@@ -103,17 +93,6 @@ def unittest_py3(mxnet_variant) {
   }
 }
 
-// Tests quantization in P3 instance using Python 3
-def test_gpu_quantization_py3(mxnet_variant) {
-  node(NODE_LINUX_GPU_P3) {
-    ws("workspace/mxnet_${libtype}/${mxnet_variant}/${env.BUILD_NUMBER}") {
-      def image = get_environment(mxnet_variant)
-      ci_utils.unpack_and_init("mxnet_${mxnet_variant}", get_stash(mxnet_variant), false)
-      ci_utils.docker_run(image, "unittest_ubuntu_python3_quantization_gpu", true)
-    }
-  }
-}
-
 // Pushes artifact to artifact repository
 def push(mxnet_variant) {
   node(NODE_LINUX_CPU) {

diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
@@ -126,10 +126,6 @@ RUN update-java-alternatives -s java-1.8.0-openjdk-amd64
 COPY install/ubuntu_julia.sh /work/
 RUN /work/ubuntu_julia.sh
 
-# PDL::CCS missing on 18.04
-COPY install/ubuntu_perl.sh /work/
-RUN /work/ubuntu_perl.sh
-
 # MXNetJS nightly needs emscripten for wasm
 COPY install/ubuntu_emscripten.sh /work/
 RUN /work/ubuntu_emscripten.sh

diff --git a/ci/docker/Dockerfile.build.ubuntu_cpu_julia b/ci/docker/Dockerfile.build.ubuntu_cpu_julia
@@ -39,9 +39,6 @@ RUN /work/ubuntu_scala.sh
 COPY install/ubuntu_clojure.sh /work/
 RUN /work/ubuntu_clojure.sh
 
-COPY install/ubuntu_perl.sh /work/
-RUN /work/ubuntu_perl.sh
-
 COPY install/ubuntu_julia.sh /work/
 RUN /work/ubuntu_julia.sh
 

diff --git a/ci/docker/install/ubuntu_perl.sh b/ci/docker/install/ubuntu_perl.sh
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
@@ -925,7 +925,6 @@ cd_unittest_ubuntu() {
 
     pytest -m 'not serial' -n 4 --durations=50 --verbose tests/python/unittest
     pytest -m 'serial' --durations=50 --verbose tests/python/unittest
-    pytest -n 4 --durations=50 --verbose tests/python/quantization
 
     # https://github.com/apache/incubator-mxnet/issues/11801
     # if [[ ${mxnet_variant} = "cpu" ]] || [[ ${mxnet_variant} = "mkl" ]]; then
@@ -963,7 +962,6 @@ unittest_ubuntu_python3_cpu() {
     MXNET_ENGINE_TYPE=NaiveEngine \
         pytest -m 'not serial' -k 'test_operator' -n 4 --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest
     pytest -m 'serial' --durations=50 --cov-report xml:tests_unittest.xml --cov-append --verbose tests/python/unittest
-    pytest -n 4 --durations=50 --cov-report xml:tests_quantization.xml --verbose tests/python/quantization
 }
 
 unittest_ubuntu_python3_cpu_serial() {
@@ -976,7 +974,6 @@ unittest_ubuntu_python3_cpu_serial() {
     export MXNET_ENABLE_CYTHON=0
     export DMLC_LOG_STACK_TRACE_DEPTH=10
     pytest --durations=50 --cov-report xml:tests_unittest.xml --verbose tests/python/unittest
-    pytest --durations=50 --cov-report xml:tests_quantization.xml --verbose tests/python/quantization
 }
 
 unittest_ubuntu_python3_cpu_mkldnn() {
@@ -1044,38 +1041,6 @@ unittest_ubuntu_python3_gpu_nocudnn() {
     pytest -m 'serial' --durations=50 --cov-report xml:tests_gpu.xml --cov-append --verbose tests/python/gpu
 }
 
-unittest_ubuntu_tensorrt_gpu() {
-    set -ex
-    export PYTHONPATH=./python/
-    export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
-    export MXNET_SUBGRAPH_VERBOSE=0
-    export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH
-    export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3}
-    export MXNET_ENABLE_CYTHON=0
-    export DMLC_LOG_STACK_TRACE_DEPTH=10
-    MXNET_GPU_MEM_POOL_TYPE=Unpooled \
-        pytest -n 4 --durations=50 --cov-report xml:tests_trt_gpu.xml --verbose --capture=no tests/python/tensorrt/test_ops.py
-    pytest -k 'not test_ops' --durations=50 --cov-report xml:tests_trt_gpu.xml --cov-append --verbose --capture=no tests/python/tensorrt/
-}
-
-# quantization gpu currently only runs on P3 instances
-# need to separte it from unittest_ubuntu_python3_gpu()
-unittest_ubuntu_python3_quantization_gpu() {
-    set -ex
-    if [ -f /etc/redhat-release ]; then
-        source /opt/rh/rh-python36/enable
-    fi
-    export PYTHONPATH=./python/
-    export MXNET_MKLDNN_DEBUG=0 # Ignored if not present
-    export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
-    export MXNET_SUBGRAPH_VERBOSE=0
-    export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3}
-    export MXNET_ENABLE_CYTHON=0
-    export DMLC_LOG_STACK_TRACE_DEPTH=10
-    MXNET_GPU_MEM_POOL_TYPE=Unpooled \
-        pytest -n 4 --durations=50 --cov-report xml:tests_quantization_gpu.xml --verbose tests/python/quantization_gpu
-}
-
 unittest_centos7_cpu_scala() {
     set -ex
     source /opt/rh/devtoolset-7/enable
@@ -1104,11 +1069,6 @@ unittest_ubuntu_cpu_clojure_integration() {
 }
 
 
-unittest_ubuntu_cpugpu_perl() {
-    set -ex
-    ./perl-package/test.sh
-}
-
 unittest_cpp() {
     set -ex
     build/tests/mxnet_unit_tests

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
@@ -839,24 +839,6 @@ def test_unix_python3_gpu(lib_name) {
     }]
 }
 
-def test_unix_python3_quantize_gpu(lib_name) {
-    return ['Python3: Quantize GPU': {
-      node(NODE_LINUX_GPU_P3) {
-        ws('workspace/ut-python3-quantize-gpu') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            try {
-              utils.unpack_and_init(lib_name, mx_lib)
-              utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true)
-              utils.publish_test_coverage()
-            } finally {
-              utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml')
-            }
-          }
-        }
-      }
-    }]
-}
-
 def test_unix_python3_debug_cpu() {
     return ['Python3: CPU debug': {
       node(NODE_LINUX_CPU) {
@@ -955,24 +937,6 @@ def test_unix_python3_mkldnn_nocudnn_gpu(lib_name) {
     }]
 }
 
-def test_unix_python3_tensorrt_gpu(lib_name) {
-    return ['Python3: TensorRT GPU': {
-      node(NODE_LINUX_GPU_P3) {
-        ws('workspace/build-tensorrt') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            try {
-              utils.unpack_and_init(lib_name, mx_tensorrt_lib)
-              utils.docker_run('ubuntu_gpu_tensorrt', 'unittest_ubuntu_tensorrt_gpu', true)
-              utils.publish_test_coverage()
-            } finally {
-              utils.collect_test_results_unix('tests_tensorrt.xml', 'tests_python3_tensorrt_gpu.xml')
-            }
-          }
-        }
-      }
-    }]
-}
-
 def test_unix_cpp_package_gpu(lib_name) {
     return ['cpp-package GPU Makefile': {
       node(NODE_LINUX_GPU_G4) {
@@ -1084,20 +1048,6 @@ def test_unix_r_mkldnn_cpu(lib_name) {
     }]
 }
 
-def test_unix_perl_cpu(lib_name) {
-    return ['Perl: CPU Makefile': {
-      node(NODE_LINUX_CPU) {
-        ws('workspace/ut-perl-cpu') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            utils.unpack_and_init(lib_name, mx_lib_make)
-            utils.docker_run('ubuntu_cpu', 'unittest_ubuntu_cpugpu_perl', false)
-            utils.publish_test_coverage()
-          }
-        }
-      }
-    }]
-}
-
 def test_unix_cpp_gpu(lib_name) {
     return ['Cpp: GPU': {
       node(NODE_LINUX_GPU_G4) {
@@ -1126,20 +1076,6 @@ def test_unix_cpp_cpu(lib_name) {
     }]
 }
 
-def test_unix_perl_gpu(lib_name) {
-    return ['Perl: GPU Makefile': {
-      node(NODE_LINUX_GPU_G4) {
-        ws('workspace/ut-perl-gpu') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            utils.unpack_and_init(lib_name, mx_lib_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true)
-            utils.publish_test_coverage()
-          }
-        }
-      }
-    }]
-}
-
 def test_unix_r_gpu(lib_name) {
     return ['R: GPU': {
       node(NODE_LINUX_GPU_G4) {
@@ -1312,19 +1248,6 @@ def test_centos7_python3_cd_gpu(lib_name) {
     }]
 }
 
-def test_centos7_quantization_cd_gpu(lib_name) {
-    return ['Quantization Python3: CentOS 7 GPU CD': {
-      node(NODE_LINUX_GPU_P3) {
-        ws('workspace/test-cd-static/gpu') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            utils.unpack_and_init(lib_name, mx_cd_lib)
-            utils.docker_run('centos7_gpu_cu102', 'unittest_ubuntu_python3_quantization_gpu', true)
-          }
-        }
-      }
-    }]
-}
-
 def test_centos7_pypi_package_cd_gpu(lib_name) {
     return ['PyPI package: CentOS 7 GPU CD': {
       node(NODE_LINUX_GPU) {

diff --git a/ci/jenkins/Jenkinsfile_centos_gpu b/ci/jenkins/Jenkinsfile_centos_gpu
@@ -42,7 +42,6 @@ core_logic: {
   utils.parallel_stage('Tests', [
     custom_steps.test_centos7_python3_gpu('centos7_gpu'),
     custom_steps.test_centos7_python3_cd_gpu('centos7_gpu_cd'),
-    custom_steps.test_centos7_quantization_cd_gpu('centos7_gpu_cd'),
     custom_steps.test_centos7_pypi_package_cd_gpu('centos7_gpu_cd')
   ])
 }

diff --git a/ci/jenkins/Jenkinsfile_unix_cpu b/ci/jenkins/Jenkinsfile_unix_cpu
@@ -54,7 +54,6 @@ core_logic: {
     custom_steps.test_unix_scala_mkldnn_cpu('mkldnn_cpu_make'),
     custom_steps.test_unix_clojure_cpu('cpu_make'),
     custom_steps.test_unix_clojure_integration_cpu('cpu_make'),
-    custom_steps.test_unix_perl_cpu('cpu_make'),
     custom_steps.test_unix_r_cpu('cpu'),
     custom_steps.test_unix_r_mkldnn_cpu('mkldnn_cpu'),
     custom_steps.test_unix_julia07_cpu('cpu'),

diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -48,11 +48,8 @@ core_logic: {
 
   utils.parallel_stage('Tests', [
     custom_steps.test_unix_python3_gpu('gpu'),
-    custom_steps.test_unix_python3_quantize_gpu('gpu'),
     custom_steps.test_unix_python3_mkldnn_gpu('mkldnn_gpu'),
     custom_steps.test_unix_python3_mkldnn_nocudnn_gpu('mkldnn_gpu_nocudnn'),
-    custom_steps.test_unix_python3_tensorrt_gpu('tensorrt'),
-    custom_steps.test_unix_perl_gpu('gpu_make'),
     custom_steps.test_unix_r_gpu('gpu'),
     custom_steps.test_unix_cpp_gpu('cmake_gpu'),
     custom_steps.test_unix_cpp_package_gpu('gpu_make'),

diff --git a/docs/python_docs/python/api/gluon/contrib/index.rst b/docs/python_docs/python/api/gluon/contrib/index.rst
@@ -50,7 +50,6 @@ Neural Network
     Concurrent
     HybridConcurrent
     Identity
-    SparseEmbedding
     SyncBatchNorm
     PixelShuffle1D
     PixelShuffle2D
@@ -165,4 +164,4 @@ API Reference
 
 .. automodule:: mxnet.gluon.contrib.estimator
     :members:
-    :imported-members:
+    :imported-members:
diff --git a/docs/python_docs/python/tutorials/packages/ndarray/sparse/train_gluon.md b/docs/python_docs/python/tutorials/packages/ndarray/sparse/train_gluon.md
@@ -465,10 +465,6 @@ Memory Allocation for Weight Gradient:
   0.000 MBs (  0.050%) for fullyconnected3                         
 ```
 
-### Advanced: Sparse `weight`
-
-You can optimize this example further by setting the weight's `stype` to `'row_sparse'`, but whether `'row_sparse'` weights make sense or not will depends on your specific task. See [contrib.SparseEmbedding](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/contrib/nn/basic_layers.py#L118) for an example of this.
-
 ## Conclusion
 
 As part of this tutorial, we learned how to write sparse data to disk in LibSVM format and load it back in sparse batches with the [LibSVMIter](/api/python/docs/api/mxnet/io/index.html#mxnet.io.LibSVMIter). We learned how to improve the performance of Gluon's [nn.Dense](/api/python/docs/api/gluon/nn/index.html#mxnet.gluon.nn.Dense) on sparse arrays using `mx.nd.sparse`. And lastly, we set `grad_stype` to `'row_sparse'` to reduce the size of the gradient and speed up the parameter update step.