diff --git a/CMakeLists.txt b/CMakeLists.txt index 483108a68419..000bbbf17ea5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -372,13 +372,13 @@ endif() # ---[ LAPack if(USE_LAPACK) + message("USE_LAPACK is ON") add_definitions(-DMXNET_USE_LAPACK=1) if (NOT MSVC) list(APPEND mxnet_LINKER_LIBS lapack) endif() endif() -message("USE LAPACK ${USE_LAPACK}") # ---[ jemalloc if(USE_JEMALLOC) diff --git a/Jenkinsfile b/Jenkinsfile index 6d21f496426e..9d7792066e37 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -26,6 +26,8 @@ mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdpart mx_dist_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a' // mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default. mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so' +// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default. +mx_cmake_lib_debug = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests' mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0' mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a' // timeout in minutes @@ -233,6 +235,17 @@ try { } } }, + 'CPU: Openblas, debug': { + node('mxnetlinux-cpu') { + ws('workspace/build-cpu-openblas') { + timeout(time: max_time, unit: 'MINUTES') { + init_git() + docker_run('ubuntu_cpu', 'build_ubuntu_cpu_cmake_debug', false) + pack_lib('cpu_debug', mx_cmake_lib_debug) + } + } + } + }, 'CPU: Clang 3.9': { node('mxnetlinux-cpu') { ws('workspace/build-cpu-clang39') { @@ -378,28 +391,8 @@ try { ws('workspace/build-cpu') { withEnv(['OpenBLAS_HOME=C:\\mxnet\\openblas', 'OpenCV_DIR=C:\\mxnet\\opencv_vc14', 'CUDA_PATH=C:\\CUDA\\v8.0']) { init_git_win() - bat """mkdir build_vc14_cpu - call "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin\\x86_amd64\\vcvarsx86_amd64.bat" - cd build_vc14_cpu - cmake -G \"Visual Studio 14 2015 Win64\" -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DUSE_MKL_IF_AVAILABLE=0 ${env.WORKSPACE}""" - bat 'C:\\mxnet\\build_vc14_cpu.bat' - - bat '''rmdir /s/q pkg_vc14_cpu - mkdir pkg_vc14_cpu\\lib - mkdir pkg_vc14_cpu\\python - mkdir pkg_vc14_cpu\\include - mkdir pkg_vc14_cpu\\build - copy build_vc14_cpu\\Release\\libmxnet.lib pkg_vc14_cpu\\lib - copy build_vc14_cpu\\Release\\libmxnet.dll pkg_vc14_cpu\\build - xcopy python pkg_vc14_cpu\\python /E /I /Y - xcopy include pkg_vc14_cpu\\include /E /I /Y - xcopy 3rdparty\\dmlc-core\\include pkg_vc14_cpu\\include /E /I /Y - xcopy 3rdparty\\mshadow\\mshadow pkg_vc14_cpu\\include\\mshadow /E /I /Y - xcopy 3rdparty\\nnvm\\include pkg_vc14_cpu\\nnvm\\include /E /I /Y - del /Q *.7z - 7z.exe a vc14_cpu.7z pkg_vc14_cpu\\ - ''' - stash includes: 'vc14_cpu.7z', name: 'vc14_cpu' + powershell 'python ci/build_windows.py -f WIN_CPU' + stash includes: 'windows_package.7z', name: 'windows_package_cpu' } } } @@ -411,28 +404,9 @@ try { timeout(time: max_time, unit: 'MINUTES') { ws('workspace/build-gpu') { withEnv(['OpenBLAS_HOME=C:\\mxnet\\openblas', 'OpenCV_DIR=C:\\mxnet\\opencv_vc14', 'CUDA_PATH=C:\\CUDA\\v8.0']) { - init_git_win() - bat """mkdir build_vc14_gpu - call "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin\\x86_amd64\\vcvarsx86_amd64.bat" - cd build_vc14_gpu - cmake -G \"NMake Makefiles JOM\" -DUSE_CUDA=1 -DUSE_CUDNN=1 -DUSE_NVRTC=1 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN=52 -DCUDA_ARCH_PTX=52 -DCMAKE_CXX_FLAGS_RELEASE="/FS /MD /O2 /Ob2 /DNDEBUG" -DCMAKE_BUILD_TYPE=Release -DUSE_MKL_IF_AVAILABLE=0 ${env.WORKSPACE}""" - bat 'C:\\mxnet\\build_vc14_gpu.bat' - bat '''rmdir /s/q pkg_vc14_gpu - mkdir pkg_vc14_gpu\\lib - mkdir pkg_vc14_gpu\\python - mkdir pkg_vc14_gpu\\include - mkdir pkg_vc14_gpu\\build - copy build_vc14_gpu\\libmxnet.lib pkg_vc14_gpu\\lib - copy build_vc14_gpu\\libmxnet.dll pkg_vc14_gpu\\build - xcopy python pkg_vc14_gpu\\python /E /I /Y - xcopy include pkg_vc14_gpu\\include /E /I /Y - xcopy 3rdparty\\dmlc-core\\include pkg_vc14_gpu\\include /E /I /Y - xcopy 3rdparty\\mshadow\\mshadow pkg_vc14_gpu\\include\\mshadow /E /I /Y - xcopy 3rdparty\\nnvm\\include pkg_vc14_gpu\\nnvm\\include /E /I /Y - del /Q *.7z - 7z.exe a vc14_gpu.7z pkg_vc14_gpu\\ - ''' - stash includes: 'vc14_gpu.7z', name: 'vc14_gpu' + init_git_win() + powershell 'python ci/build_windows.py -f WIN_GPU' + stash includes: 'windows_package.7z', name: 'windows_package_gpu' } } } @@ -443,37 +417,9 @@ try { timeout(time: max_time, unit: 'MINUTES') { ws('workspace/build-gpu') { withEnv(['OpenBLAS_HOME=C:\\mxnet\\openblas', 'OpenCV_DIR=C:\\mxnet\\opencv_vc14', 'CUDA_PATH=C:\\CUDA\\v8.0','BUILD_NAME=vc14_gpu_mkldnn']) { - init_git_win() - bat """mkdir build_%BUILD_NAME% - call "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin\\x86_amd64\\vcvarsx86_amd64.bat" - cd build_%BUILD_NAME% - copy ${env.WORKSPACE}\\3rdparty\\mkldnn\\config_template.vcxproj.user ${env.WORKSPACE}\\config_template.vcxproj.user /y - cmake -G \"NMake Makefiles JOM\" -DUSE_CUDA=1 -DUSE_CUDNN=1 -DUSE_NVRTC=1 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN=52 -DCUDA_ARCH_PTX=52 -DUSE_MKLDNN=1 -DCMAKE_CXX_FLAGS_RELEASE="/FS /MD /O2 /Ob2 /DNDEBUG" -DCMAKE_BUILD_TYPE=Release ${env.WORKSPACE}""" - bat ''' - call "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin\\x86_amd64\\vcvarsx86_amd64.bat" - cd build_%BUILD_NAME% - set /a cores=%NUMBER_OF_PROCESSORS% * 2 - jom -j %cores% - ''' - bat '''rmdir /s/q pkg_%BUILD_NAME% - mkdir pkg_%BUILD_NAME%\\lib - mkdir pkg_%BUILD_NAME%\\python - mkdir pkg_%BUILD_NAME%\\include - mkdir pkg_%BUILD_NAME%\\build - copy build_%BUILD_NAME%\\libmxnet.lib pkg_%BUILD_NAME%\\lib - copy build_%BUILD_NAME%\\libmxnet.dll pkg_%BUILD_NAME%\\build - copy build_%BUILD_NAME%\\3rdparty\\mkldnn\\src\\mkldnn.dll pkg_%BUILD_NAME%\\build - copy build_%BUILD_NAME%\\libiomp5md.dll pkg_%BUILD_NAME%\\build - copy build_%BUILD_NAME%\\mklml.dll pkg_%BUILD_NAME%\\build - xcopy python pkg_%BUILD_NAME%\\python /E /I /Y - xcopy include pkg_%BUILD_NAME%\\include /E /I /Y - xcopy 3rdparty\\dmlc-core\\include pkg_%BUILD_NAME%\\include /E /I /Y - xcopy 3rdparty\\mshadow\\mshadow pkg_%BUILD_NAME%\\include\\mshadow /E /I /Y - xcopy 3rdparty\\nnvm\\include pkg_%BUILD_NAME%\\nnvm\\include /E /I /Y - del /Q *.7z - 7z.exe a %BUILD_NAME%.7z pkg_%BUILD_NAME%\\ - ''' - stash includes: 'vc14_gpu_mkldnn.7z', name: 'vc14_gpu_mkldnn' + init_git_win() + powershell 'python ci/build_windows.py -f WIN_GPU_MKLDNN' + stash includes: 'windows_package.7z', name: 'windows_package_gpu_mkldnn' } } } @@ -574,6 +520,20 @@ try { } } }, + 'Python3: CPU debug': { + node('mxnetlinux-cpu') { + ws('workspace/ut-python3-cpu-debug') { + try { + init_git() + unpack_lib('cpu_debug', mx_cmake_lib_debug) + python3_ut('ubuntu_cpu') + } finally { + collect_test_results_unix('nosetests_unittest.xml', 'nosetests_python3_cpu_debug_unittest.xml') + collect_test_results_unix('nosetests_quantization.xml', 'nosetests_python3_cpu_debug_quantization.xml') + } + } + } + }, 'Python2: GPU': { node('mxnetlinux-gpu') { ws('workspace/ut-python2-gpu') { @@ -843,16 +803,8 @@ try { ws('workspace/ut-python-cpu') { try { init_git_win() - unstash 'vc14_cpu' - bat '''rmdir /s/q pkg_vc14_cpu - 7z x -y vc14_cpu.7z''' - bat """xcopy C:\\mxnet\\data data /E /I /Y - xcopy C:\\mxnet\\model model /E /I /Y - call activate py2 - pip install mock - set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_cpu\\python - del /S /Q ${env.WORKSPACE}\\pkg_vc14_cpu\\python\\*.pyc - C:\\mxnet\\test_cpu.bat""" + unstash 'windows_package_cpu' + powershell 'ci/windows/test_py2_cpu.ps1' } finally { collect_test_results_windows('nosetests_unittest.xml', 'nosetests_unittest_windows_python2_cpu.xml') } @@ -866,15 +818,8 @@ try { ws('workspace/ut-python-cpu') { try { init_git_win() - unstash 'vc14_cpu' - bat '''rmdir /s/q pkg_vc14_cpu - 7z x -y vc14_cpu.7z''' - bat """xcopy C:\\mxnet\\data data /E /I /Y - xcopy C:\\mxnet\\model model /E /I /Y - call activate py3 - set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_cpu\\python - del /S /Q ${env.WORKSPACE}\\pkg_vc14_cpu\\python\\*.pyc - C:\\mxnet\\test_cpu.bat""" + unstash 'windows_package_cpu' + powershell 'ci/windows/test_py3_cpu.ps1' } finally { collect_test_results_windows('nosetests_unittest.xml', 'nosetests_unittest_windows_python3_cpu.xml') } @@ -888,19 +833,11 @@ try { ws('workspace/ut-python-gpu') { try { init_git_win() - unstash 'vc14_gpu' - bat '''rmdir /s/q pkg_vc14_gpu - 7z x -y vc14_gpu.7z''' - bat """xcopy C:\\mxnet\\data data /E /I /Y - xcopy C:\\mxnet\\model model /E /I /Y - call activate py2 - pip install mock - set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_gpu\\python - del /S /Q ${env.WORKSPACE}\\pkg_vc14_gpu\\python\\*.pyc - C:\\mxnet\\test_gpu.bat""" + unstash 'windows_package_gpu' + powershell 'ci/windows/test_py2_gpu.ps1' } finally { - collect_test_results_windows('nosetests_gpu_forward.xml', 'nosetests_gpu_forward_windows_python2_gpu.xml') - collect_test_results_windows('nosetests_gpu_operator.xml', 'nosetests_gpu_operator_windows_python2_gpu.xml') + collect_test_results_windows('nosetests_forward.xml', 'nosetests_gpu_forward_windows_python2_gpu.xml') + collect_test_results_windows('nosetests_operator.xml', 'nosetests_gpu_operator_windows_python2_gpu.xml') } } } @@ -912,18 +849,11 @@ try { ws('workspace/ut-python-gpu') { try { init_git_win() - unstash 'vc14_gpu' - bat '''rmdir /s/q pkg_vc14_gpu - 7z x -y vc14_gpu.7z''' - bat """xcopy C:\\mxnet\\data data /E /I /Y - xcopy C:\\mxnet\\model model /E /I /Y - call activate py3 - set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_gpu\\python - del /S /Q ${env.WORKSPACE}\\pkg_vc14_gpu\\python\\*.pyc - C:\\mxnet\\test_gpu.bat""" + unstash 'windows_package_gpu' + powershell 'ci/windows/test_py3_gpu.ps1' } finally { - collect_test_results_windows('nosetests_gpu_forward.xml', 'nosetests_gpu_forward_windows_python3_gpu.xml') - collect_test_results_windows('nosetests_gpu_operator.xml', 'nosetests_gpu_operator_windows_python3_gpu.xml') + collect_test_results_windows('nosetests_forward.xml', 'nosetests_gpu_forward_windows_python3_gpu.xml') + collect_test_results_windows('nosetests_operator.xml', 'nosetests_gpu_operator_windows_python3_gpu.xml') } } } @@ -935,18 +865,11 @@ try { ws('workspace/ut-python-gpu') { try { init_git_win() - unstash 'vc14_gpu_mkldnn' - bat '''rmdir /s/q pkg_vc14_gpu_mkldnn - 7z x -y vc14_gpu_mkldnn.7z''' - bat """xcopy C:\\mxnet\\data data /E /I /Y - xcopy C:\\mxnet\\model model /E /I /Y - call activate py3 - set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_gpu_mkldnn\\python - del /S /Q ${env.WORKSPACE}\\pkg_vc14_gpu_mkldnn\\python\\*.pyc - C:\\mxnet\\test_gpu.bat""" + unstash 'windows_package_gpu_mkldnn' + powershell 'ci/windows/test_py3_gpu.ps1' } finally { - collect_test_results_windows('nosetests_gpu_forward.xml', 'nosetests_gpu_forward_windows_python3_gpu_mkldnn.xml') - collect_test_results_windows('nosetests_gpu_operator.xml', 'nosetests_gpu_operator_windows_python3_gpu_mkldnn.xml') + collect_test_results_windows('nosetests_forward.xml', 'nosetests_gpu_forward_windows_python3_gpu_mkldnn.xml') + collect_test_results_windows('nosetests_operator.xml', 'nosetests_gpu_operator_windows_python3_gpu_mkldnn.xml') } } } diff --git a/R-package/R/mlp.R b/R-package/R/mlp.R index ecc30999d1c2..aa510d103f53 100644 --- a/R-package/R/mlp.R +++ b/R-package/R/mlp.R @@ -8,7 +8,7 @@ #' @param activation either a single string or a vector containing the names of the activation functions. #' @param out_activation a single string containing the name of the output activation function. #' @param ctx whether train on cpu (default) or gpu. -#' @param eval_metric the evaluation metric/ +#' @param eval.metric the evaluation metric/ #' @param ... other parameters passing to \code{mx.model.FeedForward.create}/ #' #' @examples diff --git a/ci/build.py b/ci/build.py index 09f2d4709bdd..a9d6a63537f2 100755 --- a/ci/build.py +++ b/ci/build.py @@ -39,6 +39,7 @@ from itertools import chain from subprocess import call, check_call from typing import * +from util import * CCACHE_MAXSIZE = '500G' @@ -138,24 +139,9 @@ def _get_local_image_id(docker_binary, docker_tag): return image_id -def get_mxnet_root() -> str: - curpath = os.path.abspath(os.path.dirname(__file__)) - - def is_mxnet_root(path: str) -> bool: - return os.path.exists(os.path.join(path, ".mxnet_root")) - - while not is_mxnet_root(curpath): - parent = os.path.abspath(os.path.join(curpath, os.pardir)) - if parent == curpath: - raise RuntimeError("Got to the root and couldn't find a parent folder with .mxnet_root") - curpath = parent - return curpath - - def buildir() -> str: return os.path.join(get_mxnet_root(), "build") - def default_ccache_dir() -> str: # Share ccache across containers if 'CCACHE_DIR' in os.environ: diff --git a/ci/build_windows.py b/ci/build_windows.py new file mode 100755 index 000000000000..5eca58db7b74 --- /dev/null +++ b/ci/build_windows.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""User friendly / multi platform builder script""" + +import subprocess +import logging +import os +import tempfile +import sys +from distutils import spawn +import logging +from subprocess import check_call +import platform +import argparse +from util import * +import json +from enum import Enum +import time +import datetime +import shutil +import glob +from distutils.dir_util import copy_tree + +KNOWN_VCVARS = [ + # VS 2015 + r'C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64\vcvarsx86_amd64.bat' + # VS 2017 + , r'c:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsx86_amd64.bat' +] + +class BuildFlavour(Enum): + WIN_CPU = 'WIN_CPU' + WIN_CPU_MKLDNN = 'WIN_CPU_MKLDNN' + WIN_GPU = 'WIN_GPU' + WIN_GPU_MKLDNN = 'WIN_GPU_MKLDNN' + +CMAKE_FLAGS = { + 'WIN_CPU': '-DUSE_CUDA=0 \ + -DUSE_CUDNN=0 \ + -DUSE_NVRTC=0 \ + -DUSE_OPENCV=1 \ + -DUSE_OPENMP=1 \ + -DUSE_PROFILER=1 \ + -DUSE_BLAS=open \ + -DUSE_LAPACK=1 \ + -DUSE_DIST_KVSTORE=0 \ + -DBUILD_CPP_EXAMPLES=1 \ + -DUSE_MKL_IF_AVAILABLE=0' + + ,'WIN_CPU_MKLDNN': '-DUSE_CUDA=0 \ + -DUSE_CUDNN=0 \ + -DUSE_NVRTC=0 \ + -DUSE_OPENCV=1 \ + -DUSE_OPENMP=1 \ + -DUSE_PROFILER=1 \ + -DUSE_BLAS=open \ + -DUSE_LAPACK=1 \ + -DUSE_DIST_KVSTORE=0 \ + -DUSE_MKL_IF_AVAILABLE=1' + + ,'WIN_GPU': '-DUSE_CUDA=1 \ + -DUSE_CUDNN=1 \ + -DUSE_NVRTC=1 \ + -DUSE_OPENCV=1 \ + -DUSE_OPENMP=1 \ + -DUSE_PROFILER=1 \ + -DUSE_BLAS=open \ + -DUSE_LAPACK=1 \ + -DUSE_DIST_KVSTORE=0 \ + -DCUDA_ARCH_NAME=Manual \ + -DCUDA_ARCH_BIN=52 \ + -DCUDA_ARCH_PTX=52 \ + -DCMAKE_CXX_FLAGS_RELEASE="/FS /MD /O2 /Ob2 /DNDEBUG" \ + -DUSE_MKL_IF_AVAILABLE=0 \ + -DCMAKE_BUILD_TYPE=Release' + + ,'WIN_GPU_MKLDNN': '-DUSE_CUDA=1 \ + -DUSE_CUDNN=1 \ + -DUSE_NVRTC=1 \ + -DUSE_OPENCV=1 \ + -DUSE_OPENMP=1 \ + -DUSE_PROFILER=1 \ + -DUSE_BLAS=open \ + -DUSE_LAPACK=1 \ + -DUSE_DIST_KVSTORE=0 \ + -DCUDA_ARCH_NAME=Manual \ + -DCUDA_ARCH_BIN=52 \ + -DCUDA_ARCH_PTX=52 \ + -DUSE_MKLDNN=1 \ + -DCMAKE_CXX_FLAGS_RELEASE="/FS /MD /O2 /Ob2 \ + /DNDEBUG" \ + -DCMAKE_BUILD_TYPE=Release' + +} + + +def get_vcvars_environment(architecture, vcvars): + """ + Returns a dictionary containing the environment variables set up by vcvars + """ + result = None + python = sys.executable + + vcvars_list = [vcvars] + vcvars_list.extend(KNOWN_VCVARS) + for vcvars in vcvars_list: + if os.path.isfile(vcvars): + process = subprocess.Popen('("%s" %s>nul) && "%s" -c "import os; import json; print(json.dumps(dict(os.environ)))"' % (vcvars, architecture, python), stdout=subprocess.PIPE, shell=True) + stdout, stderr = process.communicate() + exitcode = process.wait() + if exitcode == 0: + logging.info("Using build environment from: %s", vcvars) + return(json.loads(stdout.strip())) + else: + raise RuntimeError('Failed cloning environment from vcvars file: %s stdout: %s stderr: %s', vcvars, stdout, stderr) + raise RuntimeError('Couldn\'t find vcvars batch file: %s', vcvars) + + +def windows_build(args): + vcvars_env = get_vcvars_environment(args.arch, args.vcvars) + logging.debug("vcvars environment: %s", vcvars_env) + os.environ.update(vcvars_env) + + path = args.output + os.makedirs(path, exist_ok=True) + mxnet_root = get_mxnet_root() + logging.info("Found mxnet root: {}".format(mxnet_root)) + with remember_cwd(): + os.chdir(path) + logging.info("Generating project with CMake") + check_call("cmake -G \"Visual Studio 14 2015 Win64\" {} {}".format(CMAKE_FLAGS[args.flavour], mxnet_root), shell=True) + logging.info("Building with visual studio") + t0 = int(time.time()) + check_call(["msbuild", "mxnet.sln","/p:configuration=release;platform=x64", "/maxcpucount","/v:minimal"]) + logging.info("Build flavour: %s complete in directory: \"%s\"", args.flavour, os.path.abspath(path)) + logging.info("Build took %s" , datetime.timedelta(seconds=int(time.time()-t0))) + windows_package(args) + +def windows_package(args): + pkgfile = 'windows_package.7z' + pkgdir = os.path.abspath('windows_package') + logging.info("Packaging libraries and headers in package: %s", pkgfile) + j = os.path.join + pkgdir_lib = os.path.abspath(j(pkgdir, 'lib')) + with remember_cwd(): + os.chdir(args.output) + logging.info("Looking for static libraries and dlls in: \"%s", os.getcwd()) + libs = list(glob.iglob('**/*.lib', recursive=True)) + dlls = list(glob.iglob('**/*.dll', recursive=True)) + os.makedirs(pkgdir_lib, exist_ok=True) + for lib in libs: + logging.info("packing lib: %s", lib) + shutil.copy(lib, pkgdir_lib) + for dll in dlls: + logging.info("packing dll: %s", dll) + shutil.copy(dll, pkgdir_lib) + os.chdir(get_mxnet_root()) + logging.info('packing python bindings') + copy_tree('python', j(pkgdir, 'python')) + logging.info('packing headers') + copy_tree('include', j(pkgdir, 'include')) + copy_tree(j('3rdparty','dmlc-core','include'), j(pkgdir, 'include')) + copy_tree(j('3rdparty','mshadow', 'mshadow'), j(pkgdir, 'include', 'mshadow')) + copy_tree(j('3rdparty','tvm','nnvm', 'include'), j(pkgdir,'include', 'nnvm', 'include')) + logging.info("Compressing package: %s", pkgfile) + check_call(['7z', 'a', pkgfile, pkgdir]) + + +def nix_build(args): + path = args.output + os.makedirs(path, exist_ok=True) + with remember_cwd(): + os.chdir(path) + logging.info("Generating project with CMake") + check_call("cmake \ + -DUSE_CUDA=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=OFF \ + -DCMAKE_BUILD_TYPE=Debug \ + -GNinja ..", shell=True) + check_call("ninja", shell=True) + +def main(): + logging.getLogger().setLevel(logging.INFO) + logging.basicConfig(format='%(asctime)-15s %(message)s') + logging.info("MXNet Windows build helper") + + parser = argparse.ArgumentParser() + parser.add_argument("-o", "--output", + help="output directory", + default='build', + type=str) + + parser.add_argument("--vcvars", + help="vcvars batch file location, typically inside vs studio install dir", + default=r'c:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsx86_amd64.bat', + type=str) + + parser.add_argument("--arch", + help="architecture", + default='x64', + type=str) + + parser.add_argument("-f", "--flavour", + help="build flavour", + default='WIN_CPU', + choices=[x.name for x in BuildFlavour], + type=str) + + args = parser.parse_args() + logging.info("Build flavour: %s", args.flavour) + + system = platform.system() + if system == 'Windows': + logging.info("Detected Windows platform") + if 'OpenBLAS_HOME' not in os.environ: + os.environ["OpenBLAS_HOME"] = "C:\\mxnet\\openblas" + if 'OpenCV_DIR' not in os.environ: + os.environ["OpenCV_DIR"] = "C:\\mxnet\\opencv_vc14" + if 'CUDA_PATH' not in os.environ: + os.environ["CUDA_PATH"] = "C:\\CUDA\\v8.0" + windows_build(args) + + elif system == 'Linux' or system == 'Darwin': + nix_build(args) + + else: + logging.error("Don't know how to build for {} yet".format(platform.system())) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 52a2650a1cc4..1c861beb916c 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -110,23 +110,6 @@ build_jetson() { popd } -report_ccache_usage() { - set -ex - pushd . - - # Show global ccache summary at the end of each run. - ccache -s - if [ -e $CCACHE_LOGFILE ] - then - # Display local ccache log, excluding some overly verbose output. - cat $CCACHE_LOGFILE | grep -v "Config:" | grep -v "stats.lock" - else - echo "No ccache log found." - fi - - popd -} - # # ARM builds # @@ -159,7 +142,6 @@ build_armv6() { -G Ninja /work/mxnet ninja -v - report_ccache_usage build_wheel popd } @@ -191,7 +173,6 @@ build_armv7() { -G Ninja /work/mxnet ninja -v - report_ccache_usage build_wheel popd } @@ -210,7 +191,6 @@ build_armv8() { -DUSE_MKL_IF_AVAILABLE=OFF\ -G Ninja /work/mxnet ninja -v - report_ccache_usage build_wheel } @@ -237,7 +217,6 @@ build_android_armv7() { -DUSE_MKL_IF_AVAILABLE=OFF\ -G Ninja /work/mxnet ninja -v - report_ccache_usage } build_android_armv8() { @@ -270,8 +249,6 @@ build_centos7_cpu() { USE_BLAS=openblas \ USE_DIST_KVSTORE=1 \ -j$(nproc) - - report_ccache_usage } build_amzn_linux_cpu() { @@ -289,7 +266,6 @@ build_amzn_linux_cpu() { -DUSE_DIST_KVSTORE=ON\ -G Ninja /work/mxnet ninja -v - report_ccache_usage } @@ -306,8 +282,6 @@ build_centos7_mkldnn() { USE_MKLDNN=1 \ USE_BLAS=openblas \ -j$(nproc) - - report_ccache_usage } build_centos7_gpu() { @@ -341,26 +315,38 @@ build_ubuntu_cpu_openblas() { USE_BLAS=openblas \ USE_DIST_KVSTORE=1 \ -j$(nproc) +} + +build_ubuntu_cpu_cmake_debug() { + set -ex + pushd . + cd /work/build + cmake \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DUSE_CUDA=OFF \ + -DUSE_MKL_IF_AVAILABLE=OFF \ + -DUSE_OPENMP=OFF \ + -DUSE_OPENCV=ON \ + -DCMAKE_BUILD_TYPE=Debug \ + -G Ninja \ + /work/mxnet - report_ccache_usage + ninja -v + popd } build_ubuntu_cpu_clang39() { set -ex - - export CXX=clang++-3.9 + export CXX=clang++-3.9 export CC=clang-3.9 - - build_ccache_wrappers - - make \ + build_ccache_wrappers + make \ USE_CPP_PACKAGE=1 \ USE_BLAS=openblas \ USE_OPENMP=0 \ USE_DIST_KVSTORE=1 \ -j$(nproc) - - report_ccache_usage } build_ubuntu_cpu_clang50() { @@ -377,8 +363,6 @@ build_ubuntu_cpu_clang50() { USE_OPENMP=1 \ USE_DIST_KVSTORE=1 \ -j$(nproc) - - report_ccache_usage } build_ubuntu_cpu_clang39_mkldnn() { @@ -395,8 +379,6 @@ build_ubuntu_cpu_clang39_mkldnn() { USE_MKLDNN=1 \ USE_OPENMP=0 \ -j$(nproc) - - report_ccache_usage } build_ubuntu_cpu_clang50_mkldnn() { @@ -413,8 +395,6 @@ build_ubuntu_cpu_clang50_mkldnn() { USE_MKLDNN=1 \ USE_OPENMP=1 \ -j$(nproc) - - report_ccache_usage } build_ubuntu_cpu_mkldnn() { @@ -428,8 +408,6 @@ build_ubuntu_cpu_mkldnn() { USE_BLAS=openblas \ USE_MKLDNN=1 \ -j$(nproc) - - report_ccache_usage } build_ubuntu_gpu() { @@ -450,8 +428,6 @@ build_ubuntu_gpu_mkldnn() { USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=1 \ -j$(nproc) - - report_ccache_usage } build_ubuntu_gpu_mkldnn_nocudnn() { @@ -467,8 +443,6 @@ build_ubuntu_gpu_mkldnn_nocudnn() { USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=0 \ -j$(nproc) - - report_ccache_usage } build_ubuntu_gpu_cuda91_cudnn7() { @@ -515,7 +489,6 @@ build_ubuntu_gpu_cmake_mkldnn() { /work/mxnet ninja -v - report_ccache_usage # libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0. cp 3rdparty/mkldnn/src/libmkldnn.so.0 3rdparty/mkldnn/src/libmkldnn.so.0.tmp mv 3rdparty/mkldnn/src/libmkldnn.so.0.tmp 3rdparty/mkldnn/src/libmkldnn.so.0 @@ -537,7 +510,6 @@ build_ubuntu_gpu_cmake() { /work/mxnet ninja -v - report_ccache_usage } @@ -759,6 +731,7 @@ integrationtest_ubuntu_cpu_dist_kvstore() { ../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py --no-multiprecision ../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py --type=compressed_cpu ../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py --type=compressed_cpu --no-multiprecision + ../../tools/launch.py -n 3 --launcher local python test_server_profiling.py } integrationtest_ubuntu_gpu_scala() { diff --git a/ci/docker_cache.py b/ci/docker_cache.py index 6637ec377165..7a6d1106d38d 100755 --- a/ci/docker_cache.py +++ b/ci/docker_cache.py @@ -31,7 +31,6 @@ import subprocess import json import build as build_util -from joblib import Parallel, delayed @@ -43,6 +42,7 @@ def build_save_containers(platforms, registry, load_cache) -> int: :param load_cache: Load cache before building :return: 1 if error occurred, 0 otherwise """ + from joblib import Parallel, delayed if len(platforms) == 0: return 0 diff --git a/ci/util.py b/ci/util.py new file mode 100644 index 000000000000..22631f30435f --- /dev/null +++ b/ci/util.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import contextlib + +def get_mxnet_root() -> str: + curpath = os.path.abspath(os.path.dirname(__file__)) + + def is_mxnet_root(path: str) -> bool: + return os.path.exists(os.path.join(path, ".mxnet_root")) + + while not is_mxnet_root(curpath): + parent = os.path.abspath(os.path.join(curpath, os.pardir)) + if parent == curpath: + raise RuntimeError("Got to the root and couldn't find a parent folder with .mxnet_root") + curpath = parent + return curpath + +@contextlib.contextmanager +def remember_cwd(): + ''' + Restore current directory when exiting context + ''' + curdir = os.getcwd() + try: yield + finally: os.chdir(curdir) + + diff --git a/ci/windows/test_py2_cpu.ps1 b/ci/windows/test_py2_cpu.ps1 new file mode 100644 index 000000000000..1623d2956103 --- /dev/null +++ b/ci/windows/test_py2_cpu.ps1 @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +7z x -y windows_package.7z +$env:PYTHONPATH=join-path $pwd.Path windows_package\python +$env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 +c:\Anaconda3\envs\py2\Scripts\pip install -r tests\requirements.txt +c:\Anaconda3\envs\py2\python.exe -m nose -v --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest +if (! $?) { Throw ("Error running unittest") } +c:\Anaconda3\envs\py2\python.exe -m nose -v --with-xunit --xunit-file nosetests_train.xml tests\python\train +if (! $?) { Throw ("Error running train tests") } diff --git a/ci/windows/test_py2_gpu.ps1 b/ci/windows/test_py2_gpu.ps1 new file mode 100644 index 000000000000..13cd5366e0db --- /dev/null +++ b/ci/windows/test_py2_gpu.ps1 @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +7z x -y windows_package.7z +$env:PYTHONPATH=join-path $pwd.Path windows_package\python +$env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 +c:\Anaconda3\envs\py2\Scripts\pip install -r tests\requirements.txt +c:\Anaconda3\envs\py2\python.exe -m nose -v --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest +if (! $?) { Throw ("Error running unittest") } +c:\Anaconda3\envs\py2\python.exe -m nose -v --with-xunit --xunit-file nosetests_operator.xml tests\python\gpu\test_operator_gpu.py +if (! $?) { Throw ("Error running tests") } +c:\Anaconda3\envs\py2\python.exe -m nose -v --with-xunit --xunit-file nosetests_forward.xml tests\python\gpu\test_forward.py +if (! $?) { Throw ("Error running tests") } +c:\Anaconda3\envs\py2\python.exe -m nose -v tests\python\train +if (! $?) { Throw ("Error running tests") } diff --git a/ci/windows/test_py3_cpu.ps1 b/ci/windows/test_py3_cpu.ps1 new file mode 100644 index 000000000000..98d4e410e8f5 --- /dev/null +++ b/ci/windows/test_py3_cpu.ps1 @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +7z x -y windows_package.7z +$env:PYTHONPATH=join-path $pwd.Path windows_package\python +$env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 +c:\Anaconda3\envs\py3\Scripts\pip install -r tests\requirements.txt +c:\Anaconda3\envs\py3\python.exe -m nose -v --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest +if (! $?) { Throw ("Error running unittest") } +c:\Anaconda3\envs\py3\python.exe -m nose -v --with-xunit --xunit-file nosetests_train.xml tests\python\train +if (! $?) { Throw ("Error running train tests") } diff --git a/ci/windows/test_py3_gpu.ps1 b/ci/windows/test_py3_gpu.ps1 new file mode 100644 index 000000000000..b94b4f389be8 --- /dev/null +++ b/ci/windows/test_py3_gpu.ps1 @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +7z x -y windows_package.7z +$env:PYTHONPATH=join-path $pwd.Path windows_package\python +$env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 +c:\Anaconda3\envs\py3\Scripts\pip install -r tests\requirements.txt +c:\Anaconda3\envs\py3\python.exe -m nose -v --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest +if (! $?) { Throw ("Error running unittest") } +c:\Anaconda3\envs\py3\python.exe -m nose -v --with-xunit --xunit-file nosetests_operator.xml tests\python\gpu\test_operator_gpu.py +if (! $?) { Throw ("Error running tests") } +c:\Anaconda3\envs\py3\python.exe -m nose -v --with-xunit --xunit-file nosetests_forward.xml tests\python\gpu\test_forward.py +if (! $?) { Throw ("Error running tests") } +c:\Anaconda3\envs\py3\python.exe -m nose -v --with-xunit --xunit-file nosetests_train.xml tests\python\train +if (! $?) { Throw ("Error running tests") } diff --git a/contrib/clojure-package/examples/scripts/get_cifar_data.sh b/contrib/clojure-package/examples/scripts/get_cifar_data.sh index 372c7bb5781e..12b3770c2700 100755 --- a/contrib/clojure-package/examples/scripts/get_cifar_data.sh +++ b/contrib/clojure-package/examples/scripts/get_cifar_data.sh @@ -20,8 +20,8 @@ set -evx -if [ ! -z "$MXNET_DATA_DIR" ]; then - data_path="$MXNET_DATA_DIR" +if [ ! -z "$MXNET_HOME" ]; then + data_path="$MXNET_HOME" else data_path="./data" fi diff --git a/contrib/clojure-package/examples/scripts/get_mnist_data.sh b/contrib/clojure-package/examples/scripts/get_mnist_data.sh index 6f32b85f480b..703ece207a1f 100755 --- a/contrib/clojure-package/examples/scripts/get_mnist_data.sh +++ b/contrib/clojure-package/examples/scripts/get_mnist_data.sh @@ -20,8 +20,8 @@ set -evx -if [ ! -z "$MXNET_DATA_DIR" ]; then - data_path="$MXNET_DATA_DIR" +if [ ! -z "$MXNET_HOME" ]; then + data_path="$MXNET_HOME" else data_path="./data" fi diff --git a/contrib/clojure-package/scripts/get_cifar_data.sh b/contrib/clojure-package/scripts/get_cifar_data.sh index 372c7bb5781e..12b3770c2700 100755 --- a/contrib/clojure-package/scripts/get_cifar_data.sh +++ b/contrib/clojure-package/scripts/get_cifar_data.sh @@ -20,8 +20,8 @@ set -evx -if [ ! -z "$MXNET_DATA_DIR" ]; then - data_path="$MXNET_DATA_DIR" +if [ ! -z "$MXNET_HOME" ]; then + data_path="$MXNET_HOME" else data_path="./data" fi diff --git a/contrib/clojure-package/scripts/get_mnist_data.sh b/contrib/clojure-package/scripts/get_mnist_data.sh index 6f32b85f480b..703ece207a1f 100755 --- a/contrib/clojure-package/scripts/get_mnist_data.sh +++ b/contrib/clojure-package/scripts/get_mnist_data.sh @@ -20,8 +20,8 @@ set -evx -if [ ! -z "$MXNET_DATA_DIR" ]; then - data_path="$MXNET_DATA_DIR" +if [ ! -z "$MXNET_HOME" ]; then + data_path="$MXNET_HOME" else data_path="./data" fi diff --git a/contrib/clojure-package/src/org/apache/clojure_mxnet/module.clj b/contrib/clojure-package/src/org/apache/clojure_mxnet/module.clj index 22ab761547e2..ab6d345fe91d 100644 --- a/contrib/clojure-package/src/org/apache/clojure_mxnet/module.clj +++ b/contrib/clojure-package/src/org/apache/clojure_mxnet/module.clj @@ -309,7 +309,6 @@ (defn load-checkpoint "Create a model from previously saved checkpoint. - - mod module - opts map of - prefix Path prefix of saved model files. You should have prefix-symbol.json, prefix-xxxx.params, and optionally prefix-xxxx.states, @@ -341,7 +340,7 @@ (util/->option (when workload-list (util/vec->indexed-seq workload-list))) (util/->option (when fixed-param-names (util/vec->set fixed-param-names))))) ([prefix epoch] - (load-checkpoint mod {:prefix prefix :epoch epoch}))) + (load-checkpoint {:prefix prefix :epoch epoch}))) (defn load-optimizer-states [mod fname] (.mod load fname)) @@ -670,4 +669,3 @@ (fit-params {:allow-missing true}) (fit-params {})) - diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/module_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/module_test.clj index f3d4e75e8c97..0f71b5a850cc 100644 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/module_test.clj +++ b/contrib/clojure-package/test/org/apache/clojure_mxnet/module_test.clj @@ -101,13 +101,20 @@ (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1 :momentum 0.9})}) (m/update) (m/save-checkpoint {:prefix "test" :epoch 0 :save-opt-states true})) - (let [mod2 (m/load-checkpoint {:prefix "test" :epoch 0 :load-optimizer-states true})] (-> mod2 (m/bind {:data-shapes [{:name "data" :shape [10 10] :layout "NT"}]}) (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1 :momentum 0.9})})) - (is (= (-> mod m/symbol sym/to-json) (-> mod2 m/symbol sym/to-json))) - (is (= (-> mod m/params first) (-> mod2 m/params first)))))) + (is (= (-> mod m/symbol sym/to-json) (-> mod2 m/symbol sym/to-json))) + (is (= (-> mod m/params first) (-> mod2 m/params first)))) + ;; arity 2 version of above. `load-optimizer-states` is `false` here by default, + ;; but optimizers states aren't checked here so it's not relevant to the test outcome. + (let [mod3 (m/load-checkpoint "test" 0)] + (-> mod3 + (m/bind {:data-shapes [{:name "data" :shape [10 10] :layout "NT"}]}) + (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1 :momentum 0.9})})) + (is (= (-> mod m/symbol sym/to-json) (-> mod3 m/symbol sym/to-json))) + (is (= (-> mod m/params first) (-> mod3 m/params first)))))) (deftest test-module-save-load-multi-device (let [s (sym/variable "data") @@ -321,4 +328,3 @@ (comment (m/data-shapes x)) - diff --git a/docs/api/python/contrib/onnx.md b/docs/api/python/contrib/onnx.md index 3fe2048001fc..d7c34ec1e01f 100644 --- a/docs/api/python/contrib/onnx.md +++ b/docs/api/python/contrib/onnx.md @@ -13,7 +13,7 @@ With ONNX format support for MXNet, developers can build and train models with a ``` ### Installation Instructions -- To use this module developers need to **install ONNX**, which requires the protobuf compiler to be installed separately. Please follow the [instructions to install ONNX and its dependencies](https://github.com/onnx/onnx#installation). **MXNet currently supports ONNX v1.1.1**. Once installed, you can go through the tutorials on how to use this module. +- To use this module developers need to **install ONNX**, which requires the protobuf compiler to be installed separately. Please follow the [instructions to install ONNX and its dependencies](https://github.com/onnx/onnx#installation). **MXNet currently supports ONNX v1.2.1**. Once installed, you can go through the tutorials on how to use this module. This document describes all the ONNX-MXNet APIs. @@ -24,6 +24,7 @@ This document describes all the ONNX-MXNet APIs. mxnet.contrib.onnx.import_model mxnet.contrib.onnx.get_model_metadata + mxnet.contrib.onnx.import_to_gluon mxnet.contrib.onnx.export_model ``` @@ -49,10 +50,10 @@ This document describes all the ONNX-MXNet APIs. ```eval_rst -.. automodule:: mxnet.contrib.onnx - :members: import_model - :members: get_model_metadata - :members: export_model +.. automodule:: mxnet.contrib.onnx.import_model +.. automodule:: mxnet.contrib.onnx.get_model_metadata +.. automodule:: mxnet.contrib.onnx.import_to_gluon +.. automodule:: mxnet.contrib.onnx.export_model ``` diff --git a/docs/community/ecosystem.md b/docs/community/ecosystem.md index 5ca6d7a0b479..54f8c8993ea9 100644 --- a/docs/community/ecosystem.md +++ b/docs/community/ecosystem.md @@ -57,7 +57,7 @@ Community contributions to MXNet have added many new valuable features and funct ## Model Serving -* [MXNet Model Server (MMS)](https://github.com/apache/incubator-mxnet/tree/master/example/model-server/mms.md) - simple yet scalable solution for model inference. +* [MXNet Model Server (MMS)](https://github.com/awslabs/mxnet-model-server) - simple yet scalable solution for model inference. ## Model Zoos diff --git a/docs/community/mxnet_channels.md b/docs/community/mxnet_channels.md index ef3963f7dabc..18dc1bc55ec8 100644 --- a/docs/community/mxnet_channels.md +++ b/docs/community/mxnet_channels.md @@ -2,7 +2,7 @@ Converse with the MXNet community via the following channels: -- [Forum](https://discuss.mxnet.io/): [discuss.mxnet.io](discuss.mxnet.io) +- [Forum](https://discuss.mxnet.io/): [discuss.mxnet.io](https://discuss.mxnet.io/) - [MXNet Apache developer mailing list](https://lists.apache.org/list.html?dev@mxnet.apache.org) (dev@mxnet.apache.org): To subscribe, send an email to dev-subscribe@mxnet.apache.org - [MXNet Apache user mailing list](https://lists.apache.org/list.html?user@mxnet.apache.org) (user@mxnet.apache.org): To subscribe, send an email to user-subscribe@mxnet.apache.org - [MXNet Slack channel](https://apache-mxnet.slack.com): To request an invitation to the channel please subscribe to the mailing list above and then email: dev@mxnet.apache.org diff --git a/docs/faq/env_var.md b/docs/faq/env_var.md index 881bc14fdc89..6e9a3594168f 100644 --- a/docs/faq/env_var.md +++ b/docs/faq/env_var.md @@ -152,6 +152,10 @@ When USE_PROFILER is enabled in Makefile or CMake, the following environments ca - Values: String ```(default='https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/'``` - The repository url to be used for Gluon datasets and pre-trained models. +* MXNET_HOME + - Data directory in the filesystem for storage, for example when downloading gluon models. + - Default in *nix is .mxnet APPDATA/mxnet in windows. + Settings for Minimum Memory Usage --------------------------------- - Make sure ```min(MXNET_EXEC_NUM_TEMP, MXNET_GPU_WORKER_NTHREADS) = 1``` diff --git a/docs/install/index.md b/docs/install/index.md index d4704df2ee7b..57c50eb9bb06 100644 --- a/docs/install/index.md +++ b/docs/install/index.md @@ -1784,7 +1784,7 @@ Next, we install the ```graphviz``` library that we use for visualizing network
Install the latest version (3.5.1+) of R from [CRAN](https://cran.r-project.org/bin/windows/). -You can [build MXNet-R from source](windows_setup.html#install-the-mxnet-package-for-r), or you can use a pre-built binary: +You can [build MXNet-R from source](windows_setup.html#install-mxnet-package-for-r), or you can use a pre-built binary: ```r cran <- getOption("repos") @@ -1797,14 +1797,15 @@ install.packages("mxnet")
-You can [build MXNet-R from source](windows_setup.html#install-the-mxnet-package-for-r), or you can use a pre-built binary: +You can [build MXNet-R from source](windows_setup.html#install-mxnet-package-for-r), or you can use a pre-built binary: ```r -cran <- getOption("repos") -cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/GPU" -options(repos = cran) -install.packages("mxnet") + cran <- getOption("repos") + cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/GPU/cu92" + options(repos = cran) + install.packages("mxnet") ``` +Change cu92 to cu80, cu90 or cu91 based on your CUDA toolkit version. Currently, MXNet supports these versions of CUDA.
diff --git a/docs/install/windows_setup.md b/docs/install/windows_setup.md index 9d03474b5949..40ddeb8182d8 100755 --- a/docs/install/windows_setup.md +++ b/docs/install/windows_setup.md @@ -218,11 +218,11 @@ For GPU package: ```r cran <- getOption("repos") - cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/GPU/cuX" + cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/GPU/cu92" options(repos = cran) install.packages("mxnet") ``` -Change X to 80,90,91 or 92 based on your CUDA toolkit version. Currently, MXNet supports these versions of CUDA. +Change cu92 to cu80, cu90 or cu91 based on your CUDA toolkit version. Currently, MXNet supports these versions of CUDA. #### Building MXNet from Source Code(GPU) After you have installed above software, continue with the following steps to build MXNet-R: 1. Clone the MXNet github repo. diff --git a/docs/settings.ini b/docs/settings.ini index 8459486c684c..b438a071f708 100644 --- a/docs/settings.ini +++ b/docs/settings.ini @@ -4,7 +4,7 @@ build_mxnet = 0 [document_sets_default] clojure_docs = 1 doxygen_docs = 1 -r_docs = 1 +r_docs = 0 scala_docs = 1 [document_sets_1.2.0] diff --git a/docs/tutorials/scala/index.md b/docs/tutorials/scala/index.md index cd9b2e219fcc..f14337f90f08 100644 --- a/docs/tutorials/scala/index.md +++ b/docs/tutorials/scala/index.md @@ -6,8 +6,8 @@ Using MXNet-Scala is easiest with Maven. You have a couple of options for settin **Note:** Windows is not yet supported. -* [MXNet-Scala Setup Guide Using Maven](../install/scala_setup.html) -* [Setup Scala with MXNet and Create a MXNet-Scala Project with IntelliJ](mxnet_scala_on_intellij.html) +* [MXNet-Scala Setup Guide Using Maven](../../install/scala_setup.md) +* [Setup Scala with MXNet and Create a MXNet-Scala Project with IntelliJ](mxnet_scala_on_intellij.md) ## Tutorials diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py index 67cda78172b6..b3b13053addf 100755 --- a/example/image-classification/common/fit.py +++ b/example/image-classification/common/fit.py @@ -135,6 +135,12 @@ def add_fit_args(parser): help='the epochs to ramp-up lr to scaled large-batch value') train.add_argument('--warmup-strategy', type=str, default='linear', help='the ramping-up strategy for large batch sgd') + train.add_argument('--profile-worker-suffix', type=str, default='', + help='profile workers actions into this file. During distributed training\ + filename saved will be rank1_ followed by this suffix') + train.add_argument('--profile-server-suffix', type=str, default='', + help='profile server actions into a file with name like rank1_ followed by this suffix \ + during distributed training') return train @@ -150,6 +156,17 @@ def fit(args, network, data_loader, **kwargs): if args.gc_type != 'none': kv.set_gradient_compression({'type': args.gc_type, 'threshold': args.gc_threshold}) + if args.profile_server_suffix: + mx.profiler.set_config(filename=args.profile_server_suffix, profile_all=True, profile_process='server') + mx.profiler.set_state(state='run', profile_process='server') + + if args.profile_worker_suffix: + if kv.num_workers > 1: + filename = 'rank' + str(kv.rank) + '_' + args.profile_worker_suffix + else: + filename = args.profile_worker_suffix + mx.profiler.set_config(filename=filename, profile_all=True, profile_process='worker') + mx.profiler.set_state(state='run', profile_process='worker') # logging head = '%(asctime)-15s Node[' + str(kv.rank) + '] %(message)s' @@ -180,7 +197,6 @@ def fit(args, network, data_loader, **kwargs): logging.info('Batch [%d]\tSpeed: %.2f samples/sec', i, args.disp_batches * args.batch_size / (time.time() - tic)) tic = time.time() - return # load model @@ -314,3 +330,8 @@ def fit(args, network, data_loader, **kwargs): epoch_end_callback=checkpoint, allow_missing=True, monitor=monitor) + + if args.profile_server_suffix: + mx.profiler.set_state(state='run', profile_process='server') + if args.profile_worker_suffix: + mx.profiler.set_state(state='run', profile_process='worker') \ No newline at end of file diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 75147cfd706d..6bbe9dfe8f0a 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -230,7 +230,19 @@ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); MXNET_DLL int MXNotifyShutdown(); /*! - * \brief Set up configuration of profiler + * \brief Set up configuration of profiler for the process passed as profile_process in keys + * \param num_params Number of parameters + * \param keys array of parameter keys + * \param vals array of parameter values + * \param kvstoreHandle handle to kvstore + * \return 0 when success, -1 when failure happens. + */ +MXNET_DLL int MXSetProcessProfilerConfig(int num_params, const char* const* keys, + const char* const* vals, + KVStoreHandle kvstoreHandle); + +/*! + * \brief Set up configuration of profiler for worker/current process * \param num_params Number of parameters * \param keys array of parameter keys * \param vals array of parameter values @@ -239,7 +251,21 @@ MXNET_DLL int MXNotifyShutdown(); MXNET_DLL int MXSetProfilerConfig(int num_params, const char* const* keys, const char* const* vals); /*! - * \brief Set up state of profiler + * \brief Set up state of profiler for either worker or server process + * \param state indicate the working state of profiler, + * profiler not running when state == 0, + * profiler running when state == 1 + * \param profile_process an int, + * when 0 command is for worker/current process, + * when 1 command is for server process + * \param kvstoreHandle handle to kvstore, needed for server process profiling + * \return 0 when success, -1 when failure happens. + */ +MXNET_DLL int MXSetProcessProfilerState(int state, int profile_process, + KVStoreHandle kvStoreHandle); + +/*! + * \brief Set up state of profiler for current process * \param state indicate the working state of profiler, * profiler not running when state == 0, * profiler running when state == 1 @@ -250,11 +276,22 @@ MXNET_DLL int MXSetProfilerState(int state); /*! * \brief Save profile and stop profiler * \param finished true if stat output should stop after this point + * \param profile_process an int, + * when 0 command is for worker/current process, + * when 1 command is for server process + * \param kvstoreHandle handle to kvstore * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXDumpProfile(int finished); +MXNET_DLL int MXDumpProcessProfile(int finished, int profile_process, KVStoreHandle kvStoreHandle); +/*! + * \brief Save profile and stop profiler for worker/current process + * \param finished true if stat output should stop after this point + * \return 0 when success, -1 when failure happens. + */ +MXNET_DLL int MXDumpProfile(int finished); + /*! * \brief Print aggregate stats to the a string * \param out_str Will receive a pointer to the output string @@ -267,6 +304,16 @@ MXNET_DLL int MXAggregateProfileStatsPrint(const char **out_str, int reset); /*! * \brief Pause profiler tuning collection * \param paused If nonzero, profiling pauses. Otherwise, profiling resumes/continues + * \param profile_process integer which denotes whether to process worker or server process + * \param kvstoreHandle handle to kvstore + * \return 0 when success, -1 when failure happens. + * \note pausing and resuming is global and not recursive + */ +MXNET_DLL int MXProcessProfilePause(int paused, int profile_process, KVStoreHandle kvStoreHandle); + +/*! + * \brief Pause profiler tuning collection for worker/current process + * \param paused If nonzero, profiling pauses. Otherwise, profiling resumes/continues * \return 0 when success, -1 when failure happens. * \note pausing and resuming is global and not recursive */ @@ -2145,8 +2192,7 @@ typedef void (MXKVStoreServerController)(int head, void *controller_handle); /** - * \return Run as server (or scheduler) - * + * \brief Run as server (or scheduler) * \param handle handle to the KVStore * \param controller the user-defined server controller * \param controller_handle helper handle for implementing controller @@ -2157,8 +2203,7 @@ MXNET_DLL int MXKVStoreRunServer(KVStoreHandle handle, void *controller_handle); /** - * \return Send a command to all server nodes - * + * \brief Send a command to all server nodes * \param handle handle to the KVStore * \param cmd_id the head of the command * \param cmd_body the body of the command diff --git a/include/mxnet/kvstore.h b/include/mxnet/kvstore.h index e10bd213aa26..a73d96356132 100644 --- a/include/mxnet/kvstore.h +++ b/include/mxnet/kvstore.h @@ -38,6 +38,18 @@ #endif // MXNET_USE_DIST_KVSTORE namespace mxnet { + +/*! + * \brief enum to denote types of commands kvstore sends to server regarding profiler + * kSetConfig sets profiler configs. Similar to mx.profiler.set_config() + * kState allows changing state of profiler to stop or run + * kPause allows pausing and resuming of profiler + * kDump asks profiler to dump output + */ +enum class KVStoreServerProfilerCommand { + kSetConfig, kState, kPause, kDump +}; + /*! * \brief distributed key-value store * @@ -364,6 +376,20 @@ class KVStore { */ virtual void SendCommandToServers(int cmd_id, const std::string& cmd_body) { } + /** + * \brief Sends server profiler commands to all server nodes + * Only the worker with rank=0 sends the command which will be received by all servers + * \param type ProfilerCommand type + * \param params parameters for that command in the form of a string + */ + virtual void SetServerProfilerCommand(const KVStoreServerProfilerCommand type, + const std::string& params) { + LOG(INFO) << "Unable to pass server the profiler command. If you are using " + << "distributed kvstore, you need to compile with USE_DIST_KVSTORE=1." + << "If you are training on single machine, then there is no server process" + << "to profile. Please profile the worker process instead."; + } + /** * \brief the prototype of a server controller */ diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 4df794bdfe37..3d8ee0191757 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -22,11 +22,11 @@ import atexit import ctypes -import inspect import os import sys import warnings - +import inspect +import platform import numpy as np from . import libinfo @@ -59,6 +59,26 @@ py_str = lambda x: x +def data_dir_default(): + """ + + :return: default data directory depending on the platform and environment variables + """ + system = platform.system() + if system == 'Windows': + return os.path.join(os.environ.get('APPDATA'), 'mxnet') + else: + return os.path.join(os.path.expanduser("~"), '.mxnet') + + +def data_dir(): + """ + + :return: data directory in the filesystem for storage, for example when downloading models + """ + return os.getenv('MXNET_HOME', data_dir_default()) + + class _NullType(object): """Placeholder for arguments""" def __repr__(self): diff --git a/python/mxnet/contrib/text/embedding.py b/python/mxnet/contrib/text/embedding.py index 6598718e6b01..38defb4b90bc 100644 --- a/python/mxnet/contrib/text/embedding.py +++ b/python/mxnet/contrib/text/embedding.py @@ -34,6 +34,7 @@ from . import vocab from ... import ndarray as nd from ... import registry +from ... import base def register(embedding_cls): @@ -496,7 +497,7 @@ class GloVe(_TokenEmbedding): ---------- pretrained_file_name : str, default 'glove.840B.300d.txt' The name of the pre-trained token embedding file. - embedding_root : str, default os.path.join('~', '.mxnet', 'embeddings') + embedding_root : str, default $MXNET_HOME/embeddings The root directory for storing embedding-related files. init_unknown_vec : callback The callback used to initialize the embedding vector for the unknown token. @@ -541,7 +542,7 @@ def _get_download_file_name(cls, pretrained_file_name): return archive def __init__(self, pretrained_file_name='glove.840B.300d.txt', - embedding_root=os.path.join('~', '.mxnet', 'embeddings'), + embedding_root=os.path.join(base.data_dir(), 'embeddings'), init_unknown_vec=nd.zeros, vocabulary=None, **kwargs): GloVe._check_pretrained_file_names(pretrained_file_name) @@ -600,7 +601,7 @@ class FastText(_TokenEmbedding): ---------- pretrained_file_name : str, default 'wiki.en.vec' The name of the pre-trained token embedding file. - embedding_root : str, default os.path.join('~', '.mxnet', 'embeddings') + embedding_root : str, default $MXNET_HOME/embeddings The root directory for storing embedding-related files. init_unknown_vec : callback The callback used to initialize the embedding vector for the unknown token. @@ -642,7 +643,7 @@ def _get_download_file_name(cls, pretrained_file_name): return '.'.join(pretrained_file_name.split('.')[:-1])+'.zip' def __init__(self, pretrained_file_name='wiki.simple.vec', - embedding_root=os.path.join('~', '.mxnet', 'embeddings'), + embedding_root=os.path.join(base.data_dir(), 'embeddings'), init_unknown_vec=nd.zeros, vocabulary=None, **kwargs): FastText._check_pretrained_file_names(pretrained_file_name) diff --git a/python/mxnet/gluon/contrib/data/text.py b/python/mxnet/gluon/contrib/data/text.py index 98fe6b657f2b..9e78e3c2e23c 100644 --- a/python/mxnet/gluon/contrib/data/text.py +++ b/python/mxnet/gluon/contrib/data/text.py @@ -30,8 +30,7 @@ from ...data import dataset from ...utils import download, check_sha1, _get_repo_file_url from ....contrib import text -from .... import nd - +from .... import nd, base class _LanguageModelDataset(dataset._DownloadedDataset): # pylint: disable=abstract-method def __init__(self, root, namespace, vocabulary): @@ -116,7 +115,7 @@ class WikiText2(_WikiText): Parameters ---------- - root : str, default '~/.mxnet/datasets/wikitext-2' + root : str, default $MXNET_HOME/datasets/wikitext-2 Path to temp folder for storing data. segment : str, default 'train' Dataset segment. Options are 'train', 'validation', 'test'. @@ -127,7 +126,7 @@ class WikiText2(_WikiText): The sequence length of each sample, regardless of the sentence boundary. """ - def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'wikitext-2'), + def __init__(self, root=os.path.join(base.data_dir(), 'datasets', 'wikitext-2'), segment='train', vocab=None, seq_len=35): self._archive_file = ('wikitext-2-v1.zip', '3c914d17d80b1459be871a5039ac23e752a53cbe') self._data_file = {'train': ('wiki.train.tokens', @@ -154,7 +153,7 @@ class WikiText103(_WikiText): Parameters ---------- - root : str, default '~/.mxnet/datasets/wikitext-103' + root : str, default $MXNET_HOME/datasets/wikitext-103 Path to temp folder for storing data. segment : str, default 'train' Dataset segment. Options are 'train', 'validation', 'test'. @@ -164,7 +163,7 @@ class WikiText103(_WikiText): seq_len : int, default 35 The sequence length of each sample, regardless of the sentence boundary. """ - def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'wikitext-103'), + def __init__(self, root=os.path.join(base.data_dir(), 'datasets', 'wikitext-103'), segment='train', vocab=None, seq_len=35): self._archive_file = ('wikitext-103-v1.zip', '0aec09a7537b58d4bb65362fee27650eeaba625a') self._data_file = {'train': ('wiki.train.tokens', diff --git a/python/mxnet/gluon/data/vision/datasets.py b/python/mxnet/gluon/data/vision/datasets.py index 74a5aebf17bb..2c98000389ad 100644 --- a/python/mxnet/gluon/data/vision/datasets.py +++ b/python/mxnet/gluon/data/vision/datasets.py @@ -30,7 +30,7 @@ from .. import dataset from ...utils import download, check_sha1, _get_repo_file_url -from .... import nd, image, recordio +from .... import nd, image, recordio, base class MNIST(dataset._DownloadedDataset): @@ -40,7 +40,7 @@ class MNIST(dataset._DownloadedDataset): Parameters ---------- - root : str, default '~/.mxnet/datasets/mnist' + root : str, default $MXNET_HOME/datasets/mnist Path to temp folder for storing data. train : bool, default True Whether to load the training or testing set. @@ -51,7 +51,7 @@ class MNIST(dataset._DownloadedDataset): transform=lambda data, label: (data.astype(np.float32)/255, label) """ - def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'mnist'), + def __init__(self, root=os.path.join(base.data_dir(), 'datasets', 'mnist'), train=True, transform=None): self._train = train self._train_data = ('train-images-idx3-ubyte.gz', @@ -101,7 +101,7 @@ class FashionMNIST(MNIST): Parameters ---------- - root : str, default '~/.mxnet/datasets/fashion-mnist' + root : str, default $MXNET_HOME/datasets/fashion-mnist' Path to temp folder for storing data. train : bool, default True Whether to load the training or testing set. @@ -112,7 +112,7 @@ class FashionMNIST(MNIST): transform=lambda data, label: (data.astype(np.float32)/255, label) """ - def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'fashion-mnist'), + def __init__(self, root=os.path.join(base.data_dir(), 'datasets', 'fashion-mnist'), train=True, transform=None): self._train = train self._train_data = ('train-images-idx3-ubyte.gz', @@ -134,7 +134,7 @@ class CIFAR10(dataset._DownloadedDataset): Parameters ---------- - root : str, default '~/.mxnet/datasets/cifar10' + root : str, default $MXNET_HOME/datasets/cifar10 Path to temp folder for storing data. train : bool, default True Whether to load the training or testing set. @@ -145,7 +145,7 @@ class CIFAR10(dataset._DownloadedDataset): transform=lambda data, label: (data.astype(np.float32)/255, label) """ - def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'cifar10'), + def __init__(self, root=os.path.join(base.data_dir(), 'datasets', 'cifar10'), train=True, transform=None): self._train = train self._archive_file = ('cifar-10-binary.tar.gz', 'fab780a1e191a7eda0f345501ccd62d20f7ed891') @@ -197,7 +197,7 @@ class CIFAR100(CIFAR10): Parameters ---------- - root : str, default '~/.mxnet/datasets/cifar100' + root : str, default $MXNET_HOME/datasets/cifar100 Path to temp folder for storing data. fine_label : bool, default False Whether to load the fine-grained (100 classes) or coarse-grained (20 super-classes) labels. @@ -210,7 +210,7 @@ class CIFAR100(CIFAR10): transform=lambda data, label: (data.astype(np.float32)/255, label) """ - def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'cifar100'), + def __init__(self, root=os.path.join(base.data_dir(), 'datasets', 'cifar100'), fine_label=False, train=True, transform=None): self._train = train self._archive_file = ('cifar-100-binary.tar.gz', 'a0bb982c76b83111308126cc779a992fa506b90b') diff --git a/python/mxnet/gluon/model_zoo/model_store.py b/python/mxnet/gluon/model_zoo/model_store.py index 7eead68f0dbf..11ac47bae905 100644 --- a/python/mxnet/gluon/model_zoo/model_store.py +++ b/python/mxnet/gluon/model_zoo/model_store.py @@ -21,8 +21,10 @@ __all__ = ['get_model_file', 'purge'] import os import zipfile +import logging from ..utils import download, check_sha1 +from ... import base, util _model_sha1 = {name: checksum for checksum, name in [ ('44335d1f0046b328243b32a26a4fbd62d9057b45', 'alexnet'), @@ -68,7 +70,7 @@ def short_hash(name): raise ValueError('Pretrained model for {name} is not available.'.format(name=name)) return _model_sha1[name][:8] -def get_model_file(name, root=os.path.join('~', '.mxnet', 'models')): +def get_model_file(name, root=os.path.join(base.data_dir(), 'models')): r"""Return location for the pretrained on local file system. This function will download from online model zoo when model cannot be found or has mismatch. @@ -78,7 +80,7 @@ def get_model_file(name, root=os.path.join('~', '.mxnet', 'models')): ---------- name : str Name of the model. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. Returns @@ -95,12 +97,11 @@ def get_model_file(name, root=os.path.join('~', '.mxnet', 'models')): if check_sha1(file_path, sha1_hash): return file_path else: - print('Mismatch in the content of model file detected. Downloading again.') + logging.warning('Mismatch in the content of model file detected. Downloading again.') else: - print('Model file is not found. Downloading.') + logging.info('Model file not found. Downloading to %s.', file_path) - if not os.path.exists(root): - os.makedirs(root) + util.makedirs(root) zip_file_path = os.path.join(root, file_name+'.zip') repo_url = os.environ.get('MXNET_GLUON_REPO', apache_repo_url) @@ -118,12 +119,12 @@ def get_model_file(name, root=os.path.join('~', '.mxnet', 'models')): else: raise ValueError('Downloaded file has different hash. Please try again.') -def purge(root=os.path.join('~', '.mxnet', 'models')): +def purge(root=os.path.join(base.data_dir(), 'models')): r"""Purge all pretrained model files in local file store. Parameters ---------- - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ root = os.path.expanduser(root) diff --git a/python/mxnet/gluon/model_zoo/vision/__init__.py b/python/mxnet/gluon/model_zoo/vision/__init__.py index a6e5dc137d48..7d33ce409b21 100644 --- a/python/mxnet/gluon/model_zoo/vision/__init__.py +++ b/python/mxnet/gluon/model_zoo/vision/__init__.py @@ -101,7 +101,7 @@ def get_model(name, **kwargs): Number of classes for the output layer. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. Returns diff --git a/python/mxnet/gluon/model_zoo/vision/alexnet.py b/python/mxnet/gluon/model_zoo/vision/alexnet.py index fdb006258c2a..daf4617cd12e 100644 --- a/python/mxnet/gluon/model_zoo/vision/alexnet.py +++ b/python/mxnet/gluon/model_zoo/vision/alexnet.py @@ -25,6 +25,7 @@ from ....context import cpu from ...block import HybridBlock from ... import nn +from .... import base # Net class AlexNet(HybridBlock): @@ -68,7 +69,7 @@ def hybrid_forward(self, F, x): # Constructor def alexnet(pretrained=False, ctx=cpu(), - root=os.path.join('~', '.mxnet', 'models'), **kwargs): + root=os.path.join(base.data_dir(), 'models'), **kwargs): r"""AlexNet model from the `"One weird trick..." `_ paper. Parameters @@ -77,7 +78,7 @@ def alexnet(pretrained=False, ctx=cpu(), Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ net = AlexNet(**kwargs) diff --git a/python/mxnet/gluon/model_zoo/vision/densenet.py b/python/mxnet/gluon/model_zoo/vision/densenet.py index b03f5ce8d52a..83febd3658c4 100644 --- a/python/mxnet/gluon/model_zoo/vision/densenet.py +++ b/python/mxnet/gluon/model_zoo/vision/densenet.py @@ -26,6 +26,7 @@ from ...block import HybridBlock from ... import nn from ...contrib.nn import HybridConcurrent, Identity +from .... import base # Helpers def _make_dense_block(num_layers, bn_size, growth_rate, dropout, stage_index): @@ -122,7 +123,7 @@ def hybrid_forward(self, F, x): # Constructor def get_densenet(num_layers, pretrained=False, ctx=cpu(), - root=os.path.join('~', '.mxnet', 'models'), **kwargs): + root=os.path.join(base.data_dir(), 'models'), **kwargs): r"""Densenet-BC model from the `"Densely Connected Convolutional Networks" `_ paper. @@ -134,7 +135,7 @@ def get_densenet(num_layers, pretrained=False, ctx=cpu(), Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ num_init_features, growth_rate, block_config = densenet_spec[num_layers] @@ -154,7 +155,7 @@ def densenet121(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_densenet(121, **kwargs) @@ -169,7 +170,7 @@ def densenet161(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_densenet(161, **kwargs) @@ -184,7 +185,7 @@ def densenet169(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_densenet(169, **kwargs) @@ -199,7 +200,7 @@ def densenet201(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_densenet(201, **kwargs) diff --git a/python/mxnet/gluon/model_zoo/vision/inception.py b/python/mxnet/gluon/model_zoo/vision/inception.py index 7c54691f1b59..6bdc526a6a13 100644 --- a/python/mxnet/gluon/model_zoo/vision/inception.py +++ b/python/mxnet/gluon/model_zoo/vision/inception.py @@ -26,6 +26,7 @@ from ...block import HybridBlock from ... import nn from ...contrib.nn import HybridConcurrent +from .... import base # Helpers def _make_basic_conv(**kwargs): @@ -199,7 +200,7 @@ def hybrid_forward(self, F, x): # Constructor def inception_v3(pretrained=False, ctx=cpu(), - root=os.path.join('~', '.mxnet', 'models'), **kwargs): + root=os.path.join(base.data_dir(), 'models'), **kwargs): r"""Inception v3 model from `"Rethinking the Inception Architecture for Computer Vision" `_ paper. @@ -210,7 +211,7 @@ def inception_v3(pretrained=False, ctx=cpu(), Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ net = Inception3(**kwargs) diff --git a/python/mxnet/gluon/model_zoo/vision/mobilenet.py b/python/mxnet/gluon/model_zoo/vision/mobilenet.py index 1a2c9b946190..1a84e05af208 100644 --- a/python/mxnet/gluon/model_zoo/vision/mobilenet.py +++ b/python/mxnet/gluon/model_zoo/vision/mobilenet.py @@ -30,6 +30,7 @@ from ... import nn from ....context import cpu from ...block import HybridBlock +from .... import base # Helpers @@ -188,7 +189,7 @@ def hybrid_forward(self, F, x): # Constructor def get_mobilenet(multiplier, pretrained=False, ctx=cpu(), - root=os.path.join('~', '.mxnet', 'models'), **kwargs): + root=os.path.join(base.data_dir(), 'models'), **kwargs): r"""MobileNet model from the `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" `_ paper. @@ -203,7 +204,7 @@ def get_mobilenet(multiplier, pretrained=False, ctx=cpu(), Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ net = MobileNet(multiplier, **kwargs) @@ -219,7 +220,7 @@ def get_mobilenet(multiplier, pretrained=False, ctx=cpu(), def get_mobilenet_v2(multiplier, pretrained=False, ctx=cpu(), - root=os.path.join('~', '.mxnet', 'models'), **kwargs): + root=os.path.join(base.data_dir(), 'models'), **kwargs): r"""MobileNetV2 model from the `"Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation" @@ -235,7 +236,7 @@ def get_mobilenet_v2(multiplier, pretrained=False, ctx=cpu(), Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ net = MobileNetV2(multiplier, **kwargs) diff --git a/python/mxnet/gluon/model_zoo/vision/resnet.py b/python/mxnet/gluon/model_zoo/vision/resnet.py index da279b89583e..48390decb11b 100644 --- a/python/mxnet/gluon/model_zoo/vision/resnet.py +++ b/python/mxnet/gluon/model_zoo/vision/resnet.py @@ -32,6 +32,7 @@ from ....context import cpu from ...block import HybridBlock from ... import nn +from .... import base # Helpers def _conv3x3(channels, stride, in_channels): @@ -356,7 +357,7 @@ def hybrid_forward(self, F, x): # Constructor def get_resnet(version, num_layers, pretrained=False, ctx=cpu(), - root=os.path.join('~', '.mxnet', 'models'), **kwargs): + root=os.path.join(base.data_dir(), 'models'), **kwargs): r"""ResNet V1 model from `"Deep Residual Learning for Image Recognition" `_ paper. ResNet V2 model from `"Identity Mappings in Deep Residual Networks" @@ -372,7 +373,7 @@ def get_resnet(version, num_layers, pretrained=False, ctx=cpu(), Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ assert num_layers in resnet_spec, \ @@ -400,7 +401,7 @@ def resnet18_v1(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(1, 18, **kwargs) @@ -415,7 +416,7 @@ def resnet34_v1(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(1, 34, **kwargs) @@ -430,7 +431,7 @@ def resnet50_v1(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(1, 50, **kwargs) @@ -445,7 +446,7 @@ def resnet101_v1(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(1, 101, **kwargs) @@ -460,7 +461,7 @@ def resnet152_v1(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(1, 152, **kwargs) @@ -475,7 +476,7 @@ def resnet18_v2(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(2, 18, **kwargs) @@ -490,7 +491,7 @@ def resnet34_v2(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(2, 34, **kwargs) @@ -505,7 +506,7 @@ def resnet50_v2(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(2, 50, **kwargs) @@ -520,7 +521,7 @@ def resnet101_v2(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(2, 101, **kwargs) @@ -535,7 +536,7 @@ def resnet152_v2(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_resnet(2, 152, **kwargs) diff --git a/python/mxnet/gluon/model_zoo/vision/squeezenet.py b/python/mxnet/gluon/model_zoo/vision/squeezenet.py index aaff4c36dfa0..b97d1274a6f0 100644 --- a/python/mxnet/gluon/model_zoo/vision/squeezenet.py +++ b/python/mxnet/gluon/model_zoo/vision/squeezenet.py @@ -26,6 +26,7 @@ from ...block import HybridBlock from ... import nn from ...contrib.nn import HybridConcurrent +from .... import base # Helpers def _make_fire(squeeze_channels, expand1x1_channels, expand3x3_channels): @@ -110,7 +111,7 @@ def hybrid_forward(self, F, x): # Constructor def get_squeezenet(version, pretrained=False, ctx=cpu(), - root=os.path.join('~', '.mxnet', 'models'), **kwargs): + root=os.path.join(base.data_dir(), 'models'), **kwargs): r"""SqueezeNet model from the `"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size" `_ paper. SqueezeNet 1.1 model from the `official SqueezeNet repo @@ -126,7 +127,7 @@ def get_squeezenet(version, pretrained=False, ctx=cpu(), Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ net = SqueezeNet(version, **kwargs) @@ -145,7 +146,7 @@ def squeezenet1_0(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_squeezenet('1.0', **kwargs) @@ -162,7 +163,7 @@ def squeezenet1_1(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_squeezenet('1.1', **kwargs) diff --git a/python/mxnet/gluon/model_zoo/vision/vgg.py b/python/mxnet/gluon/model_zoo/vision/vgg.py index a3b1685b4130..9a740e633182 100644 --- a/python/mxnet/gluon/model_zoo/vision/vgg.py +++ b/python/mxnet/gluon/model_zoo/vision/vgg.py @@ -30,6 +30,7 @@ from ....initializer import Xavier from ...block import HybridBlock from ... import nn +from .... import base class VGG(HybridBlock): @@ -94,7 +95,7 @@ def hybrid_forward(self, F, x): # Constructors def get_vgg(num_layers, pretrained=False, ctx=cpu(), - root=os.path.join('~', '.mxnet', 'models'), **kwargs): + root=os.path.join(base.data_dir(), 'models'), **kwargs): r"""VGG model from the `"Very Deep Convolutional Networks for Large-Scale Image Recognition" `_ paper. @@ -106,7 +107,7 @@ def get_vgg(num_layers, pretrained=False, ctx=cpu(), Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default $MXNET_HOME/models Location for keeping the model parameters. """ layers, filters = vgg_spec[num_layers] @@ -128,7 +129,7 @@ def vgg11(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_vgg(11, **kwargs) @@ -143,7 +144,7 @@ def vgg13(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_vgg(13, **kwargs) @@ -158,7 +159,7 @@ def vgg16(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_vgg(16, **kwargs) @@ -173,7 +174,7 @@ def vgg19(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ return get_vgg(19, **kwargs) @@ -189,7 +190,7 @@ def vgg11_bn(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ kwargs['batch_norm'] = True @@ -206,7 +207,7 @@ def vgg13_bn(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ kwargs['batch_norm'] = True @@ -223,7 +224,7 @@ def vgg16_bn(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ kwargs['batch_norm'] = True @@ -240,7 +241,7 @@ def vgg19_bn(**kwargs): Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. - root : str, default '~/.mxnet/models' + root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. """ kwargs['batch_norm'] = True diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py index 418c497ce832..4a7a0be2bc30 100644 --- a/python/mxnet/gluon/rnn/rnn_layer.py +++ b/python/mxnet/gluon/rnn/rnn_layer.py @@ -23,12 +23,11 @@ from __future__ import print_function __all__ = ['RNN', 'LSTM', 'GRU'] -from ... import ndarray -from .. import Block +from ... import ndarray, symbol +from .. import HybridBlock, tensor_types from . import rnn_cell - -class _RNNLayer(Block): +class _RNNLayer(HybridBlock): """Implementation of recurrent layers.""" def __init__(self, hidden_size, num_layers, layout, dropout, bidirectional, input_size, @@ -52,33 +51,28 @@ def __init__(self, hidden_size, num_layers, layout, self._gates = {'rnn_relu': 1, 'rnn_tanh': 1, 'lstm': 4, 'gru': 3}[mode] - self.i2h_weight = [] - self.h2h_weight = [] - self.i2h_bias = [] - self.h2h_bias = [] - ng, ni, nh = self._gates, input_size, hidden_size for i in range(num_layers): - for j in (['l', 'r'] if self._dir == 2 else ['l']): - self.i2h_weight.append( - self.params.get('%s%d_i2h_weight'%(j, i), shape=(ng*nh, ni), - init=i2h_weight_initializer, - allow_deferred_init=True)) - self.h2h_weight.append( - self.params.get('%s%d_h2h_weight'%(j, i), shape=(ng*nh, nh), - init=h2h_weight_initializer, - allow_deferred_init=True)) - self.i2h_bias.append( - self.params.get('%s%d_i2h_bias'%(j, i), shape=(ng*nh,), - init=i2h_bias_initializer, - allow_deferred_init=True)) - self.h2h_bias.append( - self.params.get('%s%d_h2h_bias'%(j, i), shape=(ng*nh,), - init=h2h_bias_initializer, - allow_deferred_init=True)) + for j in ['l', 'r'][:self._dir]: + self._register_param('{}{}_i2h_weight'.format(j, i), + shape=(ng*nh, ni), + init=i2h_weight_initializer) + self._register_param('{}{}_h2h_weight'.format(j, i), + shape=(ng*nh, nh), + init=h2h_weight_initializer) + self._register_param('{}{}_i2h_bias'.format(j, i), + shape=(ng*nh,), + init=i2h_bias_initializer) + self._register_param('{}{}_h2h_bias'.format(j, i), + shape=(ng*nh,), + init=h2h_bias_initializer) ni = nh * self._dir - self._unfused = self._unfuse() + def _register_param(self, name, shape, init): + p = self.params.get(name, shape=shape, init=init, + allow_deferred_init=True) + setattr(self, name, p) + return p def __repr__(self): s = '{name}({mapping}, {_layout}' @@ -89,12 +83,23 @@ def __repr__(self): if self._dir == 2: s += ', bidirectional' s += ')' - shape = self.i2h_weight[0].shape + shape = self.l0_i2h_weight.shape mapping = '{0} -> {1}'.format(shape[1] if shape[1] else None, shape[0] // self._gates) return s.format(name=self.__class__.__name__, mapping=mapping, **self.__dict__) + def _collect_params_with_prefix(self, prefix=''): + if prefix: + prefix += '.' + def convert_key(key): # for compatibility with old parameter format + key = key.split('_') + return '_unfused.{}.{}_cell.{}'.format(key[0][1:], key[0][0], '_'.join(key[1:])) + ret = {prefix + convert_key(key) : val for key, val in self._reg_params.items()} + for name, child in self._children.items(): + ret.update(child._collect_params_with_prefix(prefix + name)) + return ret + def state_info(self, batch_size=0): raise NotImplementedError @@ -111,7 +116,7 @@ def _unfuse(self): 'gru': lambda **kwargs: rnn_cell.GRUCell(self._hidden_size, **kwargs)}[self._mode] - stack = rnn_cell.SequentialRNNCell(prefix=self.prefix, params=self.params) + stack = rnn_cell.HybridSequentialRNNCell(prefix=self.prefix, params=self.params) with stack.name_scope(): ni = self._input_size for i in range(self._num_layers): @@ -169,55 +174,42 @@ def begin_state(self, batch_size=0, func=ndarray.zeros, **kwargs): states.append(func(name='%sh0_%d'%(self.prefix, i), **info)) return states - def forward(self, inputs, states=None): - batch_size = inputs.shape[self._layout.find('N')] + def hybrid_forward(self, F, inputs, states=None, **kwargs): + if F is ndarray: + batch_size = inputs.shape[self._layout.find('N')] skip_states = states is None if skip_states: - states = self.begin_state(batch_size, ctx=inputs.context) - if isinstance(states, ndarray.NDArray): + if F is ndarray: + states = self.begin_state(batch_size, ctx=inputs.context) + else: + states = self.begin_state(0, func=symbol.zeros) + if isinstance(states, tensor_types): states = [states] - for state, info in zip(states, self.state_info(batch_size)): - if state.shape != info['shape']: - raise ValueError( - "Invalid recurrent state shape. Expecting %s, got %s."%( - str(info['shape']), str(state.shape))) - if self._input_size == 0: - for i in range(self._dir): - self.i2h_weight[i].shape = (self._gates*self._hidden_size, inputs.shape[2]) - self.i2h_weight[i]._finish_deferred_init() - out = self._forward_kernel(inputs, states) + if F is ndarray: + for state, info in zip(states, self.state_info(batch_size)): + if state.shape != info['shape']: + raise ValueError( + "Invalid recurrent state shape. Expecting %s, got %s."%( + str(info['shape']), str(state.shape))) + out = self._forward_kernel(F, inputs, states, **kwargs) # out is (output, state) return out[0] if skip_states else out - def _forward(self, inputs, states): - """forward using gluon cell""" - ns = len(states) - axis = self._layout.find('T') - states = sum(zip(*((j for j in i) for i in states)), ()) - outputs, states = self._unfused.unroll( - inputs.shape[axis], inputs, states, - layout=self._layout, merge_outputs=True) - new_states = [] - for i in range(ns): - state = ndarray.concat(*(j.reshape((1,)+j.shape) for j in states[i::ns]), dim=0) - new_states.append(state) - - return outputs, new_states - - def _forward_kernel(self, inputs, states): + def _forward_kernel(self, F, inputs, states, **kwargs): """ forward using CUDNN or CPU kenrel""" if self._layout == 'NTC': - inputs = ndarray.swapaxes(inputs, dim1=0, dim2=1) - ctx = inputs.context - params = sum(zip(self.i2h_weight, self.h2h_weight), ()) - params += sum(zip(self.i2h_bias, self.h2h_bias), ()) - params = (i.data(ctx).reshape((-1,)) for i in params) - params = ndarray.concat(*params, dim=0) - - rnn = ndarray.RNN(inputs, params, *states, state_size=self._hidden_size, - num_layers=self._num_layers, bidirectional=self._dir == 2, - p=self._dropout, state_outputs=True, mode=self._mode) + inputs = F.swapaxes(inputs, dim1=0, dim2=1) + params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1) + for t in ['weight', 'bias'] + for l in range(self._num_layers) + for d in ['l', 'r'][:self._dir] + for g in ['i2h', 'h2h']) + params = F._internal._rnn_param_concat(*params, dim=0) + + rnn = F.RNN(inputs, params, *states, state_size=self._hidden_size, + num_layers=self._num_layers, bidirectional=self._dir == 2, + p=self._dropout, state_outputs=True, mode=self._mode) if self._mode == 'lstm': outputs, states = rnn[0], [rnn[1], rnn[2]] @@ -225,7 +217,7 @@ def _forward_kernel(self, inputs, states): outputs, states = rnn[0], [rnn[1]] if self._layout == 'NTC': - outputs = ndarray.swapaxes(outputs, dim1=0, dim2=1) + outputs = F.swapaxes(outputs, dim1=0, dim2=1) return outputs, states diff --git a/python/mxnet/kvstore.py b/python/mxnet/kvstore.py index 609733659753..a54817501391 100644 --- a/python/mxnet/kvstore.py +++ b/python/mxnet/kvstore.py @@ -28,6 +28,7 @@ from .base import check_call, string_types, mx_uint, py_str from .base import NDArrayHandle, KVStoreHandle from . import optimizer as opt +from .profiler import set_kvstore_handle def _ctype_key_value(keys, vals): """ @@ -88,7 +89,8 @@ def _get_kvstore_server_command_type(command): 'kSetMultiPrecision': 1, 'kStopServer': 2, 'kSyncMode': 3, - 'kSetGradientCompression': 4} + 'kSetGradientCompression': 4, + 'kSetProfilerParams': 5} assert (command in command_types), "Unknown command type to send to server" return command_types[command] @@ -670,4 +672,6 @@ def create(name='local'): handle = KVStoreHandle() check_call(_LIB.MXKVStoreCreate(c_str(name), ctypes.byref(handle))) - return KVStore(handle) + kv = KVStore(handle) + set_kvstore_handle(kv.handle) + return kv diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py index f758af5f982c..ab7dadb17a54 100644 --- a/python/mxnet/optimizer.py +++ b/python/mxnet/optimizer.py @@ -24,7 +24,7 @@ import warnings import numpy from .base import py_str -from .ndarray import (NDArray, zeros, clip, sqrt, cast, maximum, abs as NDabs) +from .ndarray import (NDArray, zeros, clip, sqrt, cast, maximum, abs as NDabs, array, multiply) from .ndarray import (sgd_update, sgd_mom_update, adam_update, rmsprop_update, rmspropalex_update, mp_sgd_update, mp_sgd_mom_update, square, ftrl_update, ftml_update, signsgd_update, signum_update) @@ -449,7 +449,7 @@ class SGD(Optimizer): **lazy updates** are applied by:: for row in grad.indices: - rescaled_grad[row] = lr * rescale_grad * clip(grad[row], clip_gradient) + wd * weight[row] + rescaled_grad[row] = lr * (rescale_grad * clip(grad[row], clip_gradient) + wd * weight[row]) state[row] = momentum[row] * state[row] + rescaled_grad[row] weight[row] = weight[row] - state[row] @@ -462,7 +462,7 @@ class SGD(Optimizer): Otherwise, **standard updates** are applied by:: - rescaled_grad = lr * rescale_grad * clip(grad, clip_gradient) + wd * weight + rescaled_grad = lr * (rescale_grad * clip(grad, clip_gradient) + wd * weight) state = momentum * state + rescaled_grad weight = weight - state @@ -616,6 +616,14 @@ class FTML(Optimizer): *FTML - Follow the Moving Leader in Deep Learning*, available at http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf. + Denote time step by t. The optimizer updates the weight by:: + + rescaled_grad = clip(grad * rescale_grad + wd * weight, clip_gradient) + v = beta2 * v + (1 - beta2) * square(rescaled_grad) + d_t = (1 - power(beta1, t)) / lr * square_root(v / (1 - power(beta2, t))) + epsilon) + z = beta1 * z + (1 - beta1) * rescaled_grad - (d_t - beta1 * d_(t-1)) * weight + weight = - z / d_t + This optimizer accepts the following parameters in addition to those accepted by :class:`.Optimizer`. @@ -1080,6 +1088,13 @@ class AdaGrad(Optimizer): Methods for Online Learning and Stochastic Optimization*, and available at http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf. + This optimizer updates each weight by:: + + grad = clip(grad * rescale_grad, clip_gradient) + history += square(grad) + div = grad / sqrt(history + float_stable_eps) + weight += (div + weight * wd) * -lr + This optimizer accepts the following parameters in addition to those accepted by :class:`.Optimizer`. @@ -1207,6 +1222,14 @@ class AdaDelta(Optimizer): This class implements AdaDelta, an optimizer described in *ADADELTA: An adaptive learning rate method*, available at https://arxiv.org/abs/1212.5701. + This optimizer updates each weight by:: + + grad = clip(grad * rescale_grad + wd * weight, clip_gradient) + acc_grad = rho * acc_grad + (1. - rho) * grad * grad + delta = sqrt(acc_delta + epsilon) / sqrt(acc_grad + epsilon) * grad + acc_delta = rho * acc_delta + (1. - rho) * delta * delta + weight -= (delta + wd * weight) + This optimizer accepts the following parameters in addition to those accepted by :class:`.Optimizer`. @@ -1333,6 +1356,13 @@ class Adamax(Optimizer): It is a variant of Adam based on the infinity norm available at http://arxiv.org/abs/1412.6980 Section 7. + The optimizer updates the weight by:: + + grad = clip(grad * rescale_grad + wd * weight, clip_gradient) + m = beta1 * m_t + (1 - beta1) * grad + u = maximum(beta2 * u, abs(grad)) + weight -= lr / (1 - beta1**t) * m / u + This optimizer accepts the following parameters in addition to those accepted by :class:`.Optimizer`. diff --git a/python/mxnet/profiler.py b/python/mxnet/profiler.py index 0e7a31c687ef..0b5e85b1eb54 100644 --- a/python/mxnet/profiler.py +++ b/python/mxnet/profiler.py @@ -22,8 +22,13 @@ from __future__ import absolute_import import ctypes import warnings -from .base import _LIB, check_call, c_str, ProfileHandle, c_str_array, py_str +from .base import _LIB, check_call, c_str, ProfileHandle, c_str_array, py_str, KVStoreHandle +profiler_kvstore_handle = KVStoreHandle() + +def set_kvstore_handle(handle): + global profiler_kvstore_handle + profiler_kvstore_handle = handle def set_config(**kwargs): """Set up the configure of profiler (only accepts keyword arguments). @@ -49,12 +54,17 @@ def set_config(**kwargs): aggregate_stats : boolean, whether to maintain aggregate stats in memory for console dump. Has some negative performance impact. + profile_process : string + whether to profile kvstore `server` or `worker`. + server can only be profiled when kvstore is of type dist. + if this is not passed, defaults to `worker` """ kk = kwargs.keys() vv = kwargs.values() - check_call(_LIB.MXSetProfilerConfig(len(kwargs), - c_str_array([key for key in kk]), - c_str_array([str(val) for val in vv]))) + check_call(_LIB.MXSetProcessProfilerConfig(len(kwargs), + c_str_array([key for key in kk]), + c_str_array([str(val) for val in vv]), + profiler_kvstore_handle)) def profiler_set_config(mode='symbolic', filename='profile.json'): @@ -73,10 +83,10 @@ def profiler_set_config(mode='symbolic', filename='profile.json'): keys = c_str_array([key for key in ["profile_" + mode, "filename"]]) values = c_str_array([str(val) for val in [True, filename]]) assert len(keys) == len(values) - check_call(_LIB.MXSetProfilerConfig(len(keys), keys, values)) + check_call(_LIB.MXSetProcessProfilerConfig(len(keys), keys, values, profiler_kvstore_handle)) -def set_state(state='stop'): +def set_state(state='stop', profile_process='worker'): """Set up the profiler state to 'run' or 'stop'. Parameters @@ -84,9 +94,16 @@ def set_state(state='stop'): state : string, optional Indicates whether to run the profiler, can be 'stop' or 'run'. Default is `stop`. + profile_process : string + whether to profile kvstore `server` or `worker`. + server can only be profiled when kvstore is of type dist. + if this is not passed, defaults to `worker` """ state2int = {'stop': 0, 'run': 1} - check_call(_LIB.MXSetProfilerState(ctypes.c_int(state2int[state]))) + profile_process2int = {'worker': 0, 'server': 1} + check_call(_LIB.MXSetProcessProfilerState(ctypes.c_int(state2int[state]), + profile_process2int[profile_process], + profiler_kvstore_handle)) def profiler_set_state(state='stop'): @@ -102,7 +119,7 @@ def profiler_set_state(state='stop'): 'Please use profiler.set_state() instead') set_state(state) -def dump(finished=True): +def dump(finished=True, profile_process='worker'): """Dump profile and stop profiler. Use this to save profile in advance in case your program cannot exit normally. @@ -111,9 +128,16 @@ def dump(finished=True): finished : boolean Indicates whether to stop statistic output (dumping) after this dump. Default is True + profile_process : string + whether to profile kvstore `server` or `worker`. + server can only be profiled when kvstore is of type dist. + if this is not passed, defaults to `worker` """ - fin = 1 if finished is True else False - check_call(_LIB.MXDumpProfile(fin)) + fin = 1 if finished is True else 0 + profile_process2int = {'worker': 0, 'server': 1} + check_call(_LIB.MXDumpProcessProfile(fin, + profile_process2int[profile_process], + profiler_kvstore_handle)) def dump_profile(): @@ -138,14 +162,37 @@ def dumps(reset=False): return py_str(debug_str.value) -def pause(): - """Pause profiling.""" - check_call(_LIB.MXProfilePause(int(1))) +def pause(profile_process='worker'): + """Pause profiling. + + Parameters + ---------- + profile_process : string + whether to profile kvstore `server` or `worker`. + server can only be profiled when kvstore is of type dist. + if this is not passed, defaults to `worker` + """ + profile_process2int = {'worker': 0, 'server': 1} + check_call(_LIB.MXProcessProfilePause(int(1), + profile_process2int[profile_process], + profiler_kvstore_handle)) + +def resume(profile_process='worker'): + """ + Resume paused profiling. -def resume(): - """Resume paused profiling.""" - check_call(_LIB.MXProfilePause(int(0))) + Parameters + ---------- + profile_process : string + whether to profile kvstore `server` or `worker`. + server can only be profiled when kvstore is of type dist. + if this is not passed, defaults to `worker` + """ + profile_process2int = {'worker': 0, 'server': 1} + check_call(_LIB.MXProcessProfilePause(int(0), + profile_process2int[profile_process], + profiler_kvstore_handle)) class Domain(object): diff --git a/python/mxnet/symbol/contrib.py b/python/mxnet/symbol/contrib.py index 884288364b3d..1d42cf7c18f8 100644 --- a/python/mxnet/symbol/contrib.py +++ b/python/mxnet/symbol/contrib.py @@ -486,12 +486,12 @@ def _union_inputs(*graphs): input_id_to_loc = {} # Dict[int, int], given id(sym), input_id_to_loc maps it # to a `loc`, where inputs[loc] = sym for graph in graphs: - # input_syms: all inputs to the `graph` - name_to_input_syms = {sym.name: sym for sym in _get_graph_inputs(graph)} # some loop_vars are inputs to `graph`, some are not name_to_loop_vars = {sym.name: sym for sym in loop_vars} # other inputs to `graph` created by cut_graph name_to_cut_g_syms = {sym.list_outputs()[0]: sym for sym in _cut_subgraph(graph)} + # input_syms: all inputs to the `graph` + name_to_input_syms = {sym.name: sym for sym in _get_graph_inputs(graph)} # also we collect the mapping from var's name to var's loc in loop_vars name_to_var_locs = {sym.name: i for i, sym in enumerate(loop_vars)} # collect arguments for each subgraph @@ -644,12 +644,12 @@ def _union_inputs(*graphs): input_id_to_loc = {} # Dict[int, int], given id(sym), input_id_to_loc maps it # to a `loc`, where inputs[loc] = sym for graph in graphs: - # input_syms: all inputs to the `graph` - name_to_input_syms = {sym.name: sym for sym in _get_graph_inputs(graph)} # some input_vars are inputs to `graph`, some are not name_to_input_vars = {sym.name: sym for sym in inputs} # other inputs to `graph` created by cut_graph name_to_cut_g_syms = {sym.list_outputs()[0]: sym for sym in _cut_subgraph(graph)} + # input_syms: all inputs to the `graph` + name_to_input_syms = {sym.name: sym for sym in _get_graph_inputs(graph)} # collect arguments for each subgraph input_locs = [] # results from the second step for name in graph.list_inputs(): @@ -696,5 +696,4 @@ def _union_inputs(*graphs): else_input_locs=else_input_locs, num_outputs=then_num_outputs ) - result = _to_symbol_tuple(result, "result") - return list(result) + return [result[i] for i in range(then_num_outputs)] diff --git a/python/mxnet/util.py b/python/mxnet/util.py new file mode 100644 index 000000000000..57bc2bf76389 --- /dev/null +++ b/python/mxnet/util.py @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""general utility functions""" + +import os +import sys + + +def makedirs(d): + """Create directories recursively if they don't exist. os.makedirs(exist_ok=True) is not + available in Python2""" + if sys.version_info[0] < 3: + from distutils.dir_util import mkpath + mkpath(d) + else: + os.makedirs(d, exist_ok=True) diff --git a/scala-package/core/scripts/get_cifar_data.sh b/scala-package/core/scripts/get_cifar_data.sh index 9ec1c39a4f99..b061c1895e4a 100755 --- a/scala-package/core/scripts/get_cifar_data.sh +++ b/scala-package/core/scripts/get_cifar_data.sh @@ -20,8 +20,8 @@ set -e -if [ ! -z "$MXNET_DATA_DIR" ]; then - data_path="$MXNET_DATA_DIR" +if [ ! -z "$MXNET_HOME" ]; then + data_path="$MXNET_HOME" else data_path="./data" fi diff --git a/scala-package/core/scripts/get_mnist_data.sh b/scala-package/core/scripts/get_mnist_data.sh index 97e151bf8333..ded206fbb134 100755 --- a/scala-package/core/scripts/get_mnist_data.sh +++ b/scala-package/core/scripts/get_mnist_data.sh @@ -20,8 +20,8 @@ set -e -if [ ! -z "$MXNET_DATA_DIR" ]; then - data_path="$MXNET_DATA_DIR" +if [ ! -z "$MXNET_HOME" ]; then + data_path="$MXNET_HOME" else data_path="./data" fi diff --git a/scala-package/core/src/test/scala/org/apache/mxnet/TestUtil.scala b/scala-package/core/src/test/scala/org/apache/mxnet/TestUtil.scala index 1187757a0331..4fc8ec9826c1 100644 --- a/scala-package/core/src/test/scala/org/apache/mxnet/TestUtil.scala +++ b/scala-package/core/src/test/scala/org/apache/mxnet/TestUtil.scala @@ -24,7 +24,7 @@ class TestUtil { * @return Data direcotry path ()may be relative) */ def getDataDirectory: String = { - var dataDir = System.getenv("MXNET_DATA_DIR") + var dataDir = System.getenv("MXNET_HOME") if(dataDir == null) { dataDir = "data" } else { diff --git a/scala-package/examples/src/main/scala/org/apache/mxnetexamples/gan/GanMnist.scala b/scala-package/examples/src/main/scala/org/apache/mxnetexamples/gan/GanMnist.scala index 6186989b74f6..70846eebfb8e 100644 --- a/scala-package/examples/src/main/scala/org/apache/mxnetexamples/gan/GanMnist.scala +++ b/scala-package/examples/src/main/scala/org/apache/mxnetexamples/gan/GanMnist.scala @@ -181,7 +181,7 @@ object GanMnist { try { parser.parseArgument(args.toList.asJava) - val dataPath = if (anst.mnistDataPath == null) System.getenv("MXNET_DATA_DIR") + val dataPath = if (anst.mnistDataPath == null) System.getenv("MXNET_HOME") else anst.mnistDataPath assert(dataPath != null) diff --git a/scala-package/examples/src/main/scala/org/apache/mxnetexamples/imclassification/TrainMnist.scala b/scala-package/examples/src/main/scala/org/apache/mxnetexamples/imclassification/TrainMnist.scala index b0ecc7d29ccf..bd0ce45ffe5f 100644 --- a/scala-package/examples/src/main/scala/org/apache/mxnetexamples/imclassification/TrainMnist.scala +++ b/scala-package/examples/src/main/scala/org/apache/mxnetexamples/imclassification/TrainMnist.scala @@ -112,7 +112,7 @@ object TrainMnist { try { parser.parseArgument(args.toList.asJava) - val dataPath = if (inst.dataDir == null) System.getenv("MXNET_DATA_DIR") + val dataPath = if (inst.dataDir == null) System.getenv("MXNET_HOME") else inst.dataDir val (dataShape, net) = diff --git a/scala-package/examples/src/main/scala/org/apache/mxnetexamples/infer/imageclassifier/ImageClassifierExample.scala b/scala-package/examples/src/main/scala/org/apache/mxnetexamples/infer/imageclassifier/ImageClassifierExample.scala index e886b908ba26..3bbd780d39b9 100644 --- a/scala-package/examples/src/main/scala/org/apache/mxnetexamples/infer/imageclassifier/ImageClassifierExample.scala +++ b/scala-package/examples/src/main/scala/org/apache/mxnetexamples/infer/imageclassifier/ImageClassifierExample.scala @@ -119,13 +119,13 @@ object ImageClassifierExample { parser.parseArgument(args.toList.asJava) - val modelPathPrefix = if (inst.modelPathPrefix == null) System.getenv("MXNET_DATA_DIR") + val modelPathPrefix = if (inst.modelPathPrefix == null) System.getenv("MXNET_HOME") else inst.modelPathPrefix - val inputImagePath = if (inst.inputImagePath == null) System.getenv("MXNET_DATA_DIR") + val inputImagePath = if (inst.inputImagePath == null) System.getenv("MXNET_HOME") else inst.inputImagePath - val inputImageDir = if (inst.inputImageDir == null) System.getenv("MXNET_DATA_DIR") + val inputImageDir = if (inst.inputImageDir == null) System.getenv("MXNET_HOME") else inst.inputImageDir val singleOutput = runInferenceOnSingleImage(modelPathPrefix, inputImagePath, context) diff --git a/scala-package/examples/src/test/scala/org/apache/mxnetexamples/multitask/MultiTaskSuite.scala b/scala-package/examples/src/test/scala/org/apache/mxnetexamples/multitask/MultiTaskSuite.scala index dab977019097..b86f6751e45b 100644 --- a/scala-package/examples/src/test/scala/org/apache/mxnetexamples/multitask/MultiTaskSuite.scala +++ b/scala-package/examples/src/test/scala/org/apache/mxnetexamples/multitask/MultiTaskSuite.scala @@ -44,21 +44,24 @@ import scala.collection.mutable.{ArrayBuffer, ListBuffer} * This will run as a part of "make scalatest" */ class MultiTaskSuite extends FunSuite { - test("Multitask Test") { val logger = LoggerFactory.getLogger(classOf[MultiTaskSuite]) - logger.info("Multitask Test...") + if (System.getenv().containsKey("SCALA_TEST_ON_GPU") && + System.getenv("SCALA_TEST_ON_GPU").toInt == 1) { + logger.info("Multitask Test...") - val batchSize = 100 - val numEpoch = 10 - val ctx = Context.cpu() + val batchSize = 100 + val numEpoch = 3 + val ctx = Context.gpu() - val modelPath = ExampleMultiTask.getTrainingData - val (executor, evalMetric) = ExampleMultiTask.train(batchSize, numEpoch, ctx, modelPath) - evalMetric.get.foreach { case (name, value) => - assert(value >= 0.95f) + val modelPath = ExampleMultiTask.getTrainingData + val (executor, evalMetric) = ExampleMultiTask.train(batchSize, numEpoch, ctx, modelPath) + evalMetric.get.foreach { case (name, value) => + assert(value >= 0.95f) + } + executor.dispose() + } else { + logger.info("GPU test only, skipped...") } - executor.dispose() } - } diff --git a/scala-package/native/osx-x86_64-cpu/pom.xml b/scala-package/native/osx-x86_64-cpu/pom.xml index 3f66fe68e041..e1c63104f9ad 100644 --- a/scala-package/native/osx-x86_64-cpu/pom.xml +++ b/scala-package/native/osx-x86_64-cpu/pom.xml @@ -73,6 +73,8 @@ -Wl,-exported_symbol,_Java_* -Wl,-x ${lddeps} + -force_load ${project.basedir}/../../../lib/libmxnet.a + -force_load ${project.basedir}/../../../3rdparty/tvm/nnvm/lib/libnnvm.a ${ldflags} diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 118af6793156..ed513c0d7785 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -443,6 +443,8 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle, API_BEGIN(); NDArray *arr = static_cast(handle); nnvm::Tuple shape(dims, dims+ndim); + CHECK_GT(arr->shape().Size(), 0) << "Source ndarray's shape is undefined. Input shape: " + << arr->shape(); TShape new_shape = mxnet::op::InferReshapeShape(shape, arr->shape(), reverse); *ptr = arr->ReshapeWithRecord(new_shape); *out = ptr; diff --git a/src/c_api/c_api_profile.cc b/src/c_api/c_api_profile.cc index c5841775794d..9c03b339e3ca 100644 --- a/src/c_api/c_api_profile.cc +++ b/src/c_api/c_api_profile.cc @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "./c_api_common.h" #include "../profiler/profiler.h" @@ -197,6 +198,10 @@ struct PythonProfileObjects { }; static PythonProfileObjects python_profile_objects; +enum class ProfileProcess { + kWorker, kServer +}; + struct ProfileConfigParam : public dmlc::Parameter { bool profile_all; bool profile_symbolic; @@ -207,6 +212,7 @@ struct ProfileConfigParam : public dmlc::Parameter { bool continuous_dump; float dump_period; bool aggregate_stats; + int profile_process; DMLC_DECLARE_PARAMETER(ProfileConfigParam) { DMLC_DECLARE_FIELD(profile_all).set_default(false) .describe("Profile all."); @@ -228,6 +234,13 @@ struct ProfileConfigParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(aggregate_stats).set_default(false) .describe("Maintain aggregate stats, required for MXDumpAggregateStats. Note that " "this can have anegative performance impact."); + DMLC_DECLARE_FIELD(profile_process) + .add_enum("worker", static_cast(ProfileProcess::kWorker)) + .add_enum("server", static_cast(ProfileProcess::kServer)) + .set_default(static_cast(ProfileProcess::kWorker)) + .describe("Specifies which process to profile: " + "worker: this is default. for single node training it should always be worker." + "server: for distributed training, this profiles server process"); } }; @@ -248,7 +261,8 @@ struct ProfileMarkerScopeParam : public dmlc::Parameter DMLC_REGISTER_PARAMETER(ProfileMarkerScopeParam); -int MXSetProfilerConfig(int num_params, const char* const* keys, const char* const* vals) { +int MXSetProcessProfilerConfig(int num_params, const char* const* keys, const char* const* vals, + KVStoreHandle kvstoreHandle) { mxnet::IgnoreProfileCallScope ignore; API_BEGIN(); std::vector> kwargs; @@ -260,19 +274,37 @@ int MXSetProfilerConfig(int num_params, const char* const* keys, const char* con } ProfileConfigParam param; param.Init(kwargs); - int mode = 0; - if (param.profile_api || param.profile_all) { mode |= profiler::Profiler::kAPI; } - if (param.profile_symbolic || param.profile_all) { mode |= profiler::Profiler::kSymbolic; } - if (param.profile_imperative || param.profile_all) { mode |= profiler::Profiler::kImperative; } - if (param.profile_memory || param.profile_all) { mode |= profiler::Profiler::kMemory; } - profiler::Profiler::Get()->SetConfig(profiler::Profiler::ProfilerMode(mode), - std::string(param.filename), - param.continuous_dump, - param.dump_period, - param.aggregate_stats); + if (static_cast(param.profile_process) == ProfileProcess::kServer) { + std::ostringstream os; + for (int i = 0; i < num_params; ++i) { + // this will be sent to the server now, those configs shouldn't have profile server again + if (strcmp(keys[i], "profile_process") == 0) continue; + os << keys[i] << ":" << vals[i]; + if (i != num_params - 1) os << ","; + } + CHECK(kvstoreHandle) << "KVStoreHandle passed to profiler is null"; + static_cast(kvstoreHandle)->SetServerProfilerCommand( + mxnet::KVStoreServerProfilerCommand::kSetConfig, os.str()); + } else { + int mode = 0; + if (param.profile_api || param.profile_all) { mode |= profiler::Profiler::kAPI; } + if (param.profile_symbolic || param.profile_all) { mode |= profiler::Profiler::kSymbolic; } + if (param.profile_imperative || + param.profile_all) { mode |= profiler::Profiler::kImperative; } + if (param.profile_memory || param.profile_all) { mode |= profiler::Profiler::kMemory; } + profiler::Profiler::Get()->SetConfig(profiler::Profiler::ProfilerMode(mode), + std::string(param.filename), + param.continuous_dump, + param.dump_period, + param.aggregate_stats); + } API_END(); } +int MXSetProfilerConfig(int num_params, const char* const* keys, const char* const* vals) { + return MXSetProcessProfilerConfig(num_params, keys, vals, nullptr); +} + int MXAggregateProfileStatsPrint(const char **out_str, int reset) { MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); @@ -293,19 +325,40 @@ int MXAggregateProfileStatsPrint(const char **out_str, int reset) { } int MXDumpProfile(int finished) { + return MXDumpProcessProfile(finished, static_cast(ProfileProcess::kWorker), nullptr); +} + +int MXDumpProcessProfile(int finished, int profile_process, KVStoreHandle kvStoreHandle) { mxnet::IgnoreProfileCallScope ignore; API_BEGIN(); + if (static_cast(profile_process) == ProfileProcess::kServer) { + CHECK(kvStoreHandle) << "Kvstore Handle passed to profiler is null"; + static_cast(kvStoreHandle)->SetServerProfilerCommand( + mxnet::KVStoreServerProfilerCommand::kDump, + std::to_string(finished)); + } else { profiler::Profiler *profiler = profiler::Profiler::Get(); CHECK(profiler->IsEnableOutput()) << "Profiler hasn't been run. Config and start profiler first"; profiler->DumpProfile(finished != 0); + } API_END() } int MXSetProfilerState(int state) { + return MXSetProcessProfilerState(state, static_cast(ProfileProcess::kWorker), nullptr); +} + +int MXSetProcessProfilerState(int state, int profile_process, KVStoreHandle kvStoreHandle) { mxnet::IgnoreProfileCallScope ignore; // state, kNotRunning: 0, kRunning: 1 API_BEGIN(); + if (static_cast(profile_process) == ProfileProcess::kServer) { + CHECK(kvStoreHandle) << "Kvstore Handle passed to profiler is null"; + static_cast(kvStoreHandle)->SetServerProfilerCommand( + mxnet::KVStoreServerProfilerCommand::kState, + std::to_string(state)); + } else { switch (state) { case profiler::Profiler::kNotRunning: profiler::vtune::vtune_pause(); @@ -315,6 +368,7 @@ int MXSetProfilerState(int state) { break; } profiler::Profiler::Get()->SetState(profiler::Profiler::ProfilerState(state)); + } API_END(); } @@ -450,8 +504,18 @@ int MXProfileDurationStop(ProfileHandle duration_handle) { } int MXProfilePause(int paused) { + return MXProcessProfilePause(paused, static_cast(ProfileProcess::kWorker), nullptr); +} + +int MXProcessProfilePause(int paused, int profile_process, KVStoreHandle kvStoreHandle) { mxnet::IgnoreProfileCallScope ignore; API_BEGIN(); + if (static_cast(profile_process) == ProfileProcess::kServer) { + CHECK(kvStoreHandle) << "Kvstore Handle passed to profiler is null"; + static_cast(kvStoreHandle)->SetServerProfilerCommand( + mxnet::KVStoreServerProfilerCommand::kPause, + std::to_string(paused)); + } else { if (paused) { profiler::vtune::vtune_pause(); profiler::Profiler::Get()->set_paused(true); @@ -459,6 +523,7 @@ int MXProfilePause(int paused) { profiler::Profiler::Get()->set_paused(false); profiler::vtune::vtune_resume(); } + } API_END(); } diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc index 7386de4d12e3..33c6f574a044 100644 --- a/src/executor/graph_executor.cc +++ b/src/executor/graph_executor.cc @@ -1282,7 +1282,7 @@ void GraphExecutor::InitDataEntryMemory(std::vector* shared_pool) { for (size_t i = 0; i < pool_info.size(); i++) { sorted_pool_index.push_back(i); } - auto pool_comparator = [&pool_info](int lhs, int rhs){ + auto pool_comparator = [&pool_info](size_t lhs, size_t rhs){ return pool_info[lhs].bytes > pool_info[rhs].bytes; }; std::sort(sorted_pool_index.begin(), sorted_pool_index.end(), pool_comparator); diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index d4da99ea9e85..1e7f8e0de1b3 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -821,12 +821,11 @@ OpStatePtr CachedOp::DynamicForward( const auto& dispatch_modes = g.GetAttr("dispatch_mode"); - if (recording && !inlining_) Imperative::Get()->set_is_recording(false); - + // If we are already recording, we don't need RunGraph to record all + // computation again. RunGraph(false, idx, arrays, 0, idx.num_nodes(), std::move(array_reqs), - std::move(ref_count), &states, dispatch_modes); - - Imperative::Get()->set_is_recording(recording); + std::move(ref_count), &states, dispatch_modes, + !recording || inlining_); return op_state; } @@ -947,7 +946,8 @@ void CachedOp::DynamicBackward( const auto& dispatch_modes = g.GetAttr("dispatch_mode"); RunGraph(retain_graph, idx, arrays, num_forward_nodes, idx.num_nodes(), - std::move(array_reqs), std::move(ref_count), &states, dispatch_modes); + std::move(array_reqs), std::move(ref_count), &states, dispatch_modes, + Imperative::Get()->is_recording()); if (retain_graph) { buff.resize(num_forward_entries); diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index e1654259a2fb..0c5ff8417754 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -495,7 +495,8 @@ std::vector Imperative::Backward( int prev_bulk_size = Engine::Get()->set_bulk_size(backward_bulk_size_); RunGraph(retain_graph, idx, arrays, num_forward_nodes, idx.num_nodes(), - std::move(array_reqs), std::move(ref_count), &states, dispatch_modes); + std::move(array_reqs), std::move(ref_count), &states, dispatch_modes, + is_recording()); Engine::Get()->set_bulk_size(prev_bulk_size); set_is_recording(prev_recording); diff --git a/src/imperative/imperative_utils.cc b/src/imperative/imperative_utils.cc index 464aefc220de..c84a3b9be502 100644 --- a/src/imperative/imperative_utils.cc +++ b/src/imperative/imperative_utils.cc @@ -30,7 +30,8 @@ void RunGraph( std::vector&& array_reqs, std::vector&& ref_count, std::vector *p_states, - const DispatchModeVector &dispatch_modes) { + const DispatchModeVector &dispatch_modes, + bool recording) { using namespace nnvm; using namespace imperative; static auto& createop = nnvm::Op::GetAttr("FCreateOpState"); @@ -40,7 +41,6 @@ void RunGraph( const auto imp = Imperative::Get(); std::vector& states = *p_states; - bool recording = imp->is_recording(); std::vector ndinputs, ndoutputs; ShapeVector arg_shapes; diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 6daf96e60d0b..9c86843ca7af 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -994,7 +994,8 @@ void RunGraph(const bool retain_graph, std::vector&& array_reqs, std::vector&& ref_count, std::vector *p_states, - const DispatchModeVector &dispatch_modes); + const DispatchModeVector &dispatch_modes, + bool recording); } // namespace imperative } // namespace mxnet diff --git a/src/initialize.cc b/src/initialize.cc index 1fd92628e9b2..342b0ee0141b 100644 --- a/src/initialize.cc +++ b/src/initialize.cc @@ -26,6 +26,9 @@ #include #include #include "./engine/openmp.h" +#if MXNET_USE_OPENCV +#include +#endif // MXNET_USE_OPENCV namespace mxnet { #if MXNET_USE_SIGNAL_HANDLER && DMLC_LOG_STACK_TRACE @@ -57,6 +60,9 @@ class LibraryInitializer { // Make children single threaded since they are typically workers dmlc::SetEnv("MXNET_CPU_WORKER_NTHREADS", 1); dmlc::SetEnv("OMP_NUM_THREADS", 1); +#if MXNET_USE_OPENCV + cv::setNumThreads(0); // disable opencv threading +#endif // MXNET_USE_OPENCV engine::OpenMP::Get()->set_enabled(false); Engine::Get()->Start(); }); diff --git a/src/kvstore/gradient_compression.cc b/src/kvstore/gradient_compression.cc index e94a0570d1f4..e4a06fa9a1f2 100644 --- a/src/kvstore/gradient_compression.cc +++ b/src/kvstore/gradient_compression.cc @@ -23,31 +23,14 @@ * \author Rahul Huilgol */ -#include #include +#include "kvstore_local.h" #include "gradient_compression.h" #include "gradient_compression-inl.h" namespace mxnet { namespace kvstore { -/*! - * \brief Splits a string into smaller strings using char as delimiter - * Example: "a,b,c,,d" is split into ["a","b","c","","d"] - * \param s string to split - * \param delim char to split string around - * \param result container for tokens extracted after splitting - */ -template -void split(const std::string &s, const char delim, Out result) { - std::stringstream ss; - ss.str(s); - std::string item; - while (std::getline(ss, item, delim)) { - *(result++) = item; - } -} - DMLC_REGISTER_PARAMETER(GradientCompressionParam); GradientCompression::GradientCompression() { @@ -90,7 +73,7 @@ std::string GradientCompression::EncodeParams() { void GradientCompression::DecodeParams(const std::string &s) { std::vector elems; - split(s, ',', std::back_inserter(elems)); + mxnet::kvstore::split(s, ',', std::back_inserter(elems)); type_ = static_cast(stoi(elems[0])); if (elems.size() > 1) { if (!elems[1].empty()) { diff --git a/src/kvstore/kvstore_dist.h b/src/kvstore/kvstore_dist.h index 7e2f5cb5faa9..23fbf67474ee 100644 --- a/src/kvstore/kvstore_dist.h +++ b/src/kvstore/kvstore_dist.h @@ -93,6 +93,15 @@ class KVStoreDist : public KVStoreLocal { } } + void SetServerProfilerCommand(const KVStoreServerProfilerCommand type, + const std::string& params) override { + if (get_rank() == 0) { + SendCommandToServers(static_cast(CommandType::kSetProfilerParams), + params + std::to_string(static_cast(type))); + } + } + + void Barrier() override { ps::Postoffice::Get()->Barrier(ps_worker_->get_customer()->customer_id(), ps::kWorkerGroup); } diff --git a/src/kvstore/kvstore_dist_server.h b/src/kvstore/kvstore_dist_server.h index 451fb78a6229..372b58dbbf3d 100644 --- a/src/kvstore/kvstore_dist_server.h +++ b/src/kvstore/kvstore_dist_server.h @@ -24,6 +24,9 @@ */ #ifndef MXNET_KVSTORE_KVSTORE_DIST_SERVER_H_ #define MXNET_KVSTORE_KVSTORE_DIST_SERVER_H_ +#include +#include +#include #include #include #include @@ -32,8 +35,7 @@ #include #include #include -#include "ps/ps.h" -#include "mxnet/kvstore.h" +#include "../profiler/profiler.h" #include "../operator/tensor/elemwise_binary_op-inl.h" #include "../operator/tensor/init_op.h" @@ -42,7 +44,8 @@ namespace kvstore { // maintain same order in frontend. enum class CommandType { - kController, kSetMultiPrecision, kStopServer, kSyncMode, kSetGradientCompression, + kController, kSetMultiPrecision, kStopServer, kSyncMode, + kSetGradientCompression, kSetProfilerParams }; enum class RequestType { @@ -164,6 +167,7 @@ class KVStoreDistServer { } ~KVStoreDistServer() { + profiler::Profiler::Get()->SetState(profiler::Profiler::ProfilerState(0)); delete ps_server_; } @@ -194,27 +198,37 @@ class KVStoreDistServer { void CommandHandle(const ps::SimpleData& recved, ps::SimpleApp* app) { CommandType recved_type = static_cast(recved.head); - if (recved_type == CommandType::kStopServer) { - exec_.Stop(); - } else if (recved_type == CommandType::kSyncMode) { - sync_mode_ = true; - } else if (recved_type == CommandType::kSetGradientCompression) { - gradient_compression_->DecodeParams(recved.body); - } else if (recved_type == CommandType::kSetMultiPrecision) { - // uses value 1 for message id from frontend - if (!multi_precision_) { - multi_precision_ = true; - CreateMultiPrecisionCopies(); - } - } else if (recved_type == CommandType::kController) { - // value of 0 - // let the main thread to execute ctrl, which is necessary for python - exec_.Exec([this, recved]() { - CHECK(controller_); - controller_(recved.head, recved.body); - }); - } else { - LOG(FATAL) << "Unknown command type received " << recved.head; + switch (recved_type) { + case CommandType::kStopServer: + exec_.Stop(); + break; + case CommandType::kSyncMode: + sync_mode_ = true; + break; + case CommandType::kSetGradientCompression: + gradient_compression_->DecodeParams(recved.body); + break; + case CommandType::kSetProfilerParams: + // last char is the type of profiler command + ProcessServerProfilerCommands(static_cast + (recved.body.back() - '0'), + recved.body); + break; + case CommandType::kSetMultiPrecision: + // uses value 1 for message id from frontend + if (!multi_precision_) { + multi_precision_ = true; + CreateMultiPrecisionCopies(); + } + break; + case CommandType::kController: + // this uses value 0 for message id from frontend + // let the main thread to execute ctrl, which is necessary for python + exec_.Exec([this, recved]() { + CHECK(controller_); + controller_(recved.head, recved.body); + }); + break; } app->Response(recved); } @@ -225,11 +239,11 @@ class KVStoreDistServer { * some keys are initialized before optimizer is set. */ void CreateMultiPrecisionCopies() { - for (auto const& stored_entry : store_) { + for (auto const &stored_entry : store_) { const int key = stored_entry.first; - const NDArray& stored = stored_entry.second; + const NDArray &stored = stored_entry.second; if (stored.dtype() != mshadow::kFloat32) { - auto& stored_realt = store_realt_[key]; + auto &stored_realt = store_realt_[key]; if (stored.storage_type() == kRowSparseStorage) { stored_realt = NDArray(kRowSparseStorage, stored.shape(), stored.ctx(), true, mshadow::kFloat32); @@ -237,7 +251,7 @@ class KVStoreDistServer { stored_realt = NDArray(stored.shape(), stored.ctx(), false, mshadow::kFloat32); } - auto& update = update_buf_[key]; + auto &update = update_buf_[key]; if (!update.merged.is_none()) { if (update.merged.storage_type() == kRowSparseStorage) { update.merged = NDArray(kRowSparseStorage, update.merged.shape(), update.merged.ctx(), @@ -254,11 +268,60 @@ class KVStoreDistServer { CopyFromTo(stored, stored_realt); } } - for (auto const& stored_realt_entry : store_realt_) { + for (auto const &stored_realt_entry : store_realt_) { stored_realt_entry.second.WaitToRead(); } } + void ProcessServerProfilerCommands(KVStoreServerProfilerCommand type, const std::string& body) { + switch (type) { + case KVStoreServerProfilerCommand::kSetConfig: + SetProfilerConfig(body.substr(0, body.size() - 1)); + break; + case KVStoreServerProfilerCommand::kState: + MXSetProfilerState(static_cast(body.front() - '0')); + break; + case KVStoreServerProfilerCommand::kPause: + MXProfilePause(static_cast(body.front() - '0')); + break; + case KVStoreServerProfilerCommand::kDump: + MXDumpProfile(static_cast(body.front() - '0')); + break; + } + } + + void SetProfilerConfig(std::string params_str) { + std::vector elems; + mxnet::kvstore::split(params_str, ',', std::back_inserter(elems)); + std::vector ckeys; + std::vector cvals; + ckeys.reserve(elems.size()); + cvals.reserve(elems.size()); + + for (size_t i=0; i < elems.size(); i++) { + std::vector parts; + mxnet::kvstore::split(elems[i], ':', std::back_inserter(parts)); + CHECK_EQ(parts.size(), 2) << "Improper profiler config passed from worker"; + CHECK(!parts[0].empty()) << "ProfilerConfig parameter is empty"; + CHECK(!parts[1].empty()) << "ProfilerConfig value is empty for parameter "<< parts[0]; + if (parts[0] == "filename") { + parts[1] = "rank" + std::to_string(ps::MyRank()) + "_" + parts[1]; + } + char* ckey = new char[parts[0].length() + 1]; + std::snprintf(ckey, parts[0].length() + 1, "%s", parts[0].c_str()); + ckeys.push_back(ckey); + + char* cval = new char[parts[1].length() + 1]; + std::snprintf(cval, parts[1].length() + 1, "%s", parts[1].c_str()); + cvals.push_back(cval); + } + MXSetProfilerConfig(elems.size(), &ckeys[0], &cvals[0]); + for (size_t i=0; i < ckeys.size(); i++) { + delete[] ckeys[i]; + delete[] cvals[i]; + } + } + void DataHandleEx(const ps::KVMeta& req_meta, const ps::KVPairs& req_data, ps::KVServer* server) { diff --git a/src/kvstore/kvstore_local.h b/src/kvstore/kvstore_local.h index 324bc2c9558a..4e004a3a3008 100644 --- a/src/kvstore/kvstore_local.h +++ b/src/kvstore/kvstore_local.h @@ -40,6 +40,22 @@ namespace mxnet { namespace kvstore { +/*! + * \brief Splits a string into smaller strings using char as delimiter + * Example: "a,b,c,,d" is split into ["a","b","c","","d"] + * \param s string to split + * \param delim char to split string around + * \param result container for tokens extracted after splitting + */ +template +void split(const std::string &s, const char delim, Out result) { + std::stringstream ss; + ss.str(s); + std::string item; + while (std::getline(ss, item, delim)) { + *(result++) = item; + } +} enum KeyType { kUndefinedKey = -1, diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index 266ccb1b1a14..7c7f403d6985 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -74,6 +74,65 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, return dshape.Size() != 0; } +// Concat for RNN param deals with the reverse shape inference from output +// for the special case of concatenating RNN parameters. +// The first (and sometimes the second) input may be unknown on the target axis. +// If the two inputs are unknown, they always have the same shape. +static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs, + std::vector *in_shape, + std::vector *out_shape) { + using namespace mshadow; + const ConcatParam& param_ = nnvm::get(attrs.parsed); + CHECK_EQ(in_shape->size(), static_cast(param_.num_args)); + TShape dshape; + index_t size = 0; + int num_zero = 0; + int axis = -1; + for (int i = 0; i < param_.num_args; ++i) { + TShape tmp = (*in_shape)[i]; + if (tmp.ndim()) { + axis = CheckAxis(param_.dim, tmp.ndim()); + num_zero += tmp[axis] == 0; + size += tmp[axis]; + tmp[axis] = 0; + shape_assign(&dshape, tmp); + } + } + + TShape tmp = (*out_shape)[0]; + if (tmp.ndim()) { + axis = CheckAxis(param_.dim, tmp.ndim()); + tmp[axis] = 0; + shape_assign(&dshape, tmp); + } + + if (dshape.ndim() == 0) return false; + + for (int i = 0; i < param_.num_args; ++i) { + CHECK(shape_assign(&(*in_shape)[i], dshape)) + << "Incompatible input shape: expected " << dshape << ", got " << (*in_shape)[i]; + } + + if (!num_zero) dshape[axis] = size; + CHECK(shape_assign(&(*out_shape)[0], dshape)) + << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0]; + if ((*out_shape)[0][axis] != 0 && num_zero) { + int residual = (*out_shape)[0][axis] - size; + CHECK_GE(residual, 0) + << "Input size already exceeds output size. Residual: " << residual; + CHECK(num_zero <= 2 && num_zero >= 0) + << "Expecting 1 or 2 inputs that need shape inference. Got: " << num_zero; + bool need_infer = !(*out_shape)[0].Size(); + for (int i = 0; i < num_zero; i++) { + (*in_shape)[i*2][axis] = residual / num_zero; + need_infer = need_infer || !(*in_shape)[i].Size(); + } + return !need_infer; + } + + return dshape.Size() != 0; +} + static bool ConcatType(const nnvm::NodeAttrs& attrs, std::vector *in_type, std::vector *out_type) { @@ -228,6 +287,34 @@ struct ConcatGrad { DMLC_REGISTER_PARAMETER(ConcatParam); +#define CONCAT_FORWARD_ATTRS \ +.set_num_inputs([](const NodeAttrs& attrs) { \ + const ConcatParam& params = nnvm::get(attrs.parsed); \ + return params.num_args; \ +}) \ +.set_num_outputs(1) \ +.set_attr_parser(ParamParser) \ +.set_attr("FListInputNames", \ + [](const NodeAttrs& attrs) { \ + const ConcatParam& params = nnvm::get(attrs.parsed); \ + std::vector ret; \ + for (int i = 0; i < params.num_args; ++i) { \ + ret.push_back(std::string("arg") + std::to_string(i)); \ + } \ + return ret; \ +}) \ +.set_attr("FListOutputNames", \ + [](const NodeAttrs& attrs) { \ + return std::vector{"output"}; \ +}) \ +.set_attr("FInferType", ConcatType) \ +.set_attr("FInferStorageType", ConcatForwardInferStorageType) \ +.set_attr("FCompute", ConcatCompute) \ +.set_attr("FComputeEx", ConcatComputeExCPU) \ +.set_attr("FGradient", ConcatGrad{"_backward_Concat"}) \ +.set_attr("key_var_num_args", "num_args") + + NNVM_REGISTER_OP(Concat) MXNET_ADD_SPARSE_OP_ALIAS(concat) .add_alias("concat") @@ -268,37 +355,13 @@ Example:: [ 5., 5., 8., 8.]] )code" ADD_FILELINE) -.set_num_inputs([](const NodeAttrs& attrs) { - const ConcatParam& params = nnvm::get(attrs.parsed); - return params.num_args; -}) -.set_num_outputs(1) -.set_attr_parser(ParamParser) -.set_attr("FListInputNames", - [](const NodeAttrs& attrs) { - const ConcatParam& params = nnvm::get(attrs.parsed); - std::vector ret; - for (int i = 0; i < params.num_args; ++i) { - ret.push_back(std::string("arg") + std::to_string(i)); - } - return ret; -}) -.set_attr("FListOutputNames", - [](const NodeAttrs& attrs) { - return std::vector{"output"}; -}) #if MXNET_USE_MKLDNN == 1 .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) #endif +CONCAT_FORWARD_ATTRS .set_attr("FInferShape", ConcatShape) -.set_attr("FInferType", ConcatType) -.set_attr("FInferStorageType", ConcatForwardInferStorageType) -.set_attr("FCompute", ConcatCompute) -.set_attr("FComputeEx", ConcatComputeExCPU) -.set_attr("FGradient", ConcatGrad{"_backward_Concat"}) -.set_attr("key_var_num_args", "num_args") .add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate") .add_arguments(ConcatParam::__FIELDS__()); @@ -320,5 +383,19 @@ NNVM_REGISTER_OP(_backward_Concat) #endif .set_attr("FCompute", ConcatGradCompute); +// _rnn_param_concat is a custom concat op with specialized infer_shape, +// which handles the case where the first one or two inputs may have +// unknown shape that can be inferred from output shape. +NNVM_REGISTER_OP(_rnn_param_concat) +#if MXNET_USE_MKLDNN == 1 +.set_attr("FResourceRequest", [](const NodeAttrs& n) { + return std::vector{ResourceRequest::kTempSpace}; +}) +#endif +CONCAT_FORWARD_ATTRS +.set_attr("FInferShape", RNNParamConcatShape) +.add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate") +.add_arguments(ConcatParam::__FIELDS__()); + } // namespace op } // namespace mxnet diff --git a/src/operator/nn/concat.cu b/src/operator/nn/concat.cu index 4f6b8fc9ebef..2872d527898e 100644 --- a/src/operator/nn/concat.cu +++ b/src/operator/nn/concat.cu @@ -50,6 +50,10 @@ NNVM_REGISTER_OP(Concat) .set_attr("FCompute", ConcatCompute) .set_attr("FComputeEx", ConcatComputeExGPU); +NNVM_REGISTER_OP(_rnn_param_concat) +.set_attr("FCompute", ConcatCompute) +.set_attr("FComputeEx", ConcatComputeExGPU); + NNVM_REGISTER_OP(_backward_Concat) .set_attr("FCompute", ConcatGradCompute); diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc index 1e670a9047f0..73ef4f0f42a7 100644 --- a/src/operator/rnn.cc +++ b/src/operator/rnn.cc @@ -45,12 +45,12 @@ Operator *RNNProp::CreateOperatorEx(Context ctx, DMLC_REGISTER_PARAMETER(RNNParam); MXNET_REGISTER_OP_PROPERTY(RNN, RNNProp) -.describe(R"code(Applies recurrent layers to input data. Currently, vanilla RNN, LSTM and GRU are +.describe(R"code(Applies recurrent layers to input data. Currently, vanilla RNN, LSTM and GRU are implemented, with both multi-layer and bidirectional support. **Vanilla RNN** -Applies a single-gate recurrent layer to input X. Two kinds of activation function are supported: +Applies a single-gate recurrent layer to input X. Two kinds of activation function are supported: ReLU and Tanh. With ReLU activation function: @@ -63,7 +63,7 @@ With Tanh activtion function: .. math:: h_t = \tanh(W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh}) -Reference paper: Finding structure in time - Elman, 1988. +Reference paper: Finding structure in time - Elman, 1988. https://crl.ucsd.edu/~elman/Papers/fsit.pdf **LSTM** diff --git a/src/operator/tensor/broadcast_reduce-inl.cuh b/src/operator/tensor/broadcast_reduce-inl.cuh index be3d1f9223f4..33bf72798fd6 100644 --- a/src/operator/tensor/broadcast_reduce-inl.cuh +++ b/src/operator/tensor/broadcast_reduce-inl.cuh @@ -268,7 +268,11 @@ __global__ void reduce_kernel_M1(const int N, const bool addto, for (int idx = threadIdx.x + blockIdx.x*blockDim.x; idx < N; idx += blockDim.x*gridDim.x) { Shape coord = unravel(idx, sshape); int j = ravel(coord, bshape); - assign(&small[idx], addto, OP::Map(big[j])); + DType val, residual; + Reducer::SetInitValue(val, residual); + Reducer::Reduce(val, OP::Map(big[j]), residual); + Reducer::Finalize(val, residual); + assign(&small[idx], addto, val); } } @@ -287,7 +291,10 @@ __global__ void reduce_kernel_M1(const int N, const bool addto, int idx_big = ravel(coord, big_shape); int idx_lhs = ravel(coord, lhs_shape); int idx_rhs = ravel(coord, rhs_shape); - DType val = OP1::Map(big[idx_big], OP2::Map(lhs[idx_lhs], rhs[idx_rhs])); + DType val, residual; + Reducer::SetInitValue(val, residual); + Reducer::Reduce(val, OP1::Map(big[idx_big], OP2::Map(lhs[idx_lhs], rhs[idx_rhs])), residual); + Reducer::Finalize(val, residual); assign(&small[idx], addto, val); } } diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index 0f96e2cc2f72..ef59145bb4a9 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -28,6 +28,27 @@ namespace mxnet { namespace op { +/* + * \brief returns true if all indices are between [min, max] + * \param data_ptr the indices to check + * \param data_size the number of indices to examine + * \param min the expected min value for indices + * \param max the expected max value for indices + */ +template +bool CheckIndexOutOfBound(const DType* data_ptr, size_t data_size, + const DType min, const DType max) { + bool is_valid = true; + for (size_t i = 0; i < data_size; i++) { + if (data_ptr[i] > max || data_ptr[i] < min) { + is_valid = false; + break; + } + } + return is_valid; +} + + template<> void SparseEmbeddingOpForwardRspImpl(const OpContext& ctx, const TBlob& data, @@ -48,18 +69,16 @@ void SparseEmbeddingOpForwardRspImpl(const OpContext& ctx, return; } // check out-of-bound indices - bool is_valid = true; MSHADOW_TYPE_SWITCH(data.type_flag_, DType, { DType min = 0; DType max = static_cast(weight.shape()[0] - 1); // check with single thread is faster since data is small DType* data_ptr = data.dptr(); size_t data_size = data.shape_.Size(); - for (size_t i = 0; i < data_size; i++) { - if (data_ptr[i] > max || data_ptr[i] < min) is_valid = false; - } + bool is_valid = CheckIndexOutOfBound(data_ptr, data_size, + min, max); + CHECK(is_valid) << "SparseEmbedding input contains data out of bound"; }) - CHECK(is_valid) << "SparseEmbedding input contains data out of bound"; // the weight is actually dense if (weight.aux_shape(kIdx)[0] == weight.shape()[0]) { EmbeddingOpForwardDnsImpl(s, data, weight.data(), req, output); @@ -101,6 +120,15 @@ inline void SparseEmbeddingOpBackwardRspImpl(const bool deterministic, MSHADOW_TYPE_SWITCH(data.type_flag_, IType, { MSHADOW_SGL_DBL_TYPE_SWITCH(ograd.type_flag_, DType, { MSHADOW_IDX_TYPE_SWITCH(output.aux_type(kIdx), RType, { + // check out of bound indices + { + IType min = 0; + IType max = static_cast(output.shape()[0] - 1); + // check with single thread is faster since data is small + IType* data_ptr = data.dptr(); + bool is_valid = CheckIndexOutOfBound(data_ptr, data.shape_.Size(), min, max); + CHECK(is_valid) << "Embedding input contains data out of bound"; + } // mark row flags Fill(s, TBlob(row_flg, Shape1(num_rows), cpu::kDevMask), kWriteTo, 0); Kernel::Launch(s, data_size, row_flg, data.dptr()); diff --git a/src/operator/tensor/indexing_op.cu b/src/operator/tensor/indexing_op.cu index 39fd81ef2001..bdc7f6e843c0 100644 --- a/src/operator/tensor/indexing_op.cu +++ b/src/operator/tensor/indexing_op.cu @@ -36,7 +36,7 @@ namespace op { struct is_valid_check { template - MSHADOW_XINLINE static void Map(int i, int32_t* out, const DType* data, + MSHADOW_XINLINE static void Map(int i, char* out, const DType* data, const DType min, const DType max) { if (data[i] < min || data[i] > max) *out = 1; } @@ -116,6 +116,27 @@ struct AddTakeGradRspDeterministicKernel { } }; +/* + * \brief returns true if all indices are between [min, max] + * \param s the stream + * \param data_ptr the indices on the stream + * \param data_size the number of indices to examine + * \param min the expected min value for indices + * \param max the expected max value for indices + * \param is_valid_ptr the temparary workspace + */ +template +bool CheckIndexOutOfBound(mshadow::Stream *s, const DType* data_ptr, size_t data_size, + const DType min, const DType max, char* is_valid_ptr) { + using namespace mxnet_op; + int32_t is_valid = 0; + Kernel::Launch(s, 1, is_valid_ptr); + Kernel::Launch(s, data_size, is_valid_ptr, data_ptr, min, max); + CUDA_CALL(cudaMemcpy(&is_valid, is_valid_ptr, sizeof(char), + cudaMemcpyDeviceToHost)); + return is_valid == 0; +} + template<> void SparseEmbeddingOpForwardRspImpl(const OpContext& ctx, const TBlob& data, @@ -136,21 +157,17 @@ void SparseEmbeddingOpForwardRspImpl(const OpContext& ctx, return; } // check out-of-bound indices - int32_t is_valid = 0; MSHADOW_TYPE_SWITCH(data.type_flag_, DType, { DType min = 0; DType max = static_cast(weight.shape()[0] - 1); DType* data_ptr = data.dptr(); size_t data_size = data.shape_.Size(); Tensor workspace = ctx.requested[0] - .get_space_typed(Shape1(sizeof(int32_t)), s); - int32_t* is_valid_ptr = reinterpret_cast(workspace.dptr_); - Kernel::Launch(s, 1, is_valid_ptr); - Kernel::Launch(s, data_size, is_valid_ptr, data_ptr, min, max); - CUDA_CALL(cudaMemcpy(&is_valid, is_valid_ptr, sizeof(int32_t), - cudaMemcpyDeviceToHost)); + .get_space_typed(Shape1(1), s); + char* is_valid_ptr = reinterpret_cast(workspace.dptr_); + bool is_valid = CheckIndexOutOfBound(s, data_ptr, data_size, min, max, is_valid_ptr); + CHECK(is_valid) << "SparseEmbedding input contains data out of bound"; }) - CHECK_EQ(is_valid, 0) << "SparseEmbedding input contains data out of bound"; // the weight is actually dense if (weight.aux_shape(kIdx)[0] == weight.shape()[0]) { EmbeddingOpForwardDnsImpl(s, data, weight.data(), req, output); @@ -207,6 +224,17 @@ void SparseEmbeddingDeterministicKernelLaunch(const OpContext& ctx, sorted_data_storage_bytes); temp_storage = workspace.dptr_ + total_storage_bytes - temp_workspace_bytes; + // check out-of-bound indices + { + IType min = 0; + IType max = static_cast(output.shape()[0] - 1); + IType* data_ptr = data.dptr(); + size_t data_size = data.shape_.Size(); + bool is_valid = CheckIndexOutOfBound(s, data_ptr, data_size, min, max, + reinterpret_cast(temp_storage)); + CHECK(is_valid) << "Embedding input contains data out of bound"; + } + // make a copy of the data, to be sorted TBlob sorted_data_blob(sorted_data, Shape1(data_size), gpu::kDevMask); auto sorted_data_tensor = sorted_data_blob.FlatTo1D(s); diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index eec920555ed1..78e1fa1d9c6a 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -122,7 +122,7 @@ inline TShape InferReshapeShape(const nnvm::Tuple& shape, CHECK(d1 != -1 || d2 != -1) << "Split dims cannot both be -1."; if (d1 == -1) d1 = d0 / d2; if (d2 == -1) d2 = d0 / d1; - CHECK_EQ(d1 * d2, static_cast(d0)) << + CHECK(d1 * d2 == static_cast(d0) || static_cast(d0) == IType(0)) << "Split dims " << d1 << ", " << d2 << " do not divide original dim " << d0; tmp.push_back(d1); tmp.push_back(d2); @@ -151,13 +151,36 @@ inline TShape InferReshapeShape(const nnvm::Tuple& shape, return oshape; } +inline bool ReverseReshapeInferShape(TShape *in, const TShape& out) { + if (in->Size() && out.Size()) { + return true; + } else if (!out.Size()) { + return false; + } else { + int zero_axis = -1; + int non_zero_prod = 1; + for (index_t i = 0; i < in->ndim(); i++) { + if ((*in)[i] == 0) { + if (zero_axis != -1) + return false; // more than 1 zero found. + else + zero_axis = i; + } else { + non_zero_prod *= (*in)[i]; + } + } + (*in)[zero_axis] = out.Size() / non_zero_prod; + return true; + } +} + inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, - std::vector *in_attrs, - std::vector *out_attrs) { + std::vector *in_attrs, + std::vector *out_attrs) { const ReshapeParam& param_ = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; CHECK_EQ(out_attrs->size(), 1U); - const TShape &dshape = (*in_attrs)[0]; + TShape &dshape = (*in_attrs)[0]; if (dshape.ndim() == 0) return false; TShape oshape; if (param_.shape.ndim() != 0) { @@ -182,14 +205,15 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, oshape[inf_idx] = dshape.Size() / oshape.Size(); } } else { - return (*out_attrs)[0].ndim(); + return (*out_attrs)[0].ndim() && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); } + ReverseReshapeInferShape(&dshape, oshape); CHECK_EQ(oshape.Size(), dshape.Size()) << "Target shape size is different to source. " << "Target: " << oshape << "\nSource: " << dshape; SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return true; + return ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); } inline bool FlattenShape(const nnvm::NodeAttrs& attrs, diff --git a/tests/nightly/model_backwards_compatibility_check/common.py b/tests/nightly/model_backwards_compatibility_check/common.py index 4c61cc4e3267..8950a9270839 100644 --- a/tests/nightly/model_backwards_compatibility_check/common.py +++ b/tests/nightly/model_backwards_compatibility_check/common.py @@ -41,6 +41,8 @@ backslash = '/' s3 = boto3.resource('s3') ctx = mx.cpu(0) +atol_default = 1e-5 +rtol_default = 1e-5 def get_model_path(model_name): diff --git a/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py index ae368e3a0fc6..5d63e7e9bca3 100644 --- a/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py +++ b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py @@ -44,7 +44,7 @@ def test_module_checkpoint_api(): old_inference_results = load_inference_results(model_name) inference_results = loaded_model.predict(data_iter) # Check whether they are equal or not ? - assert_almost_equal(inference_results.asnumpy(), old_inference_results.asnumpy()) + assert_almost_equal(inference_results.asnumpy(), old_inference_results.asnumpy(), rtol=rtol_default, atol=atol_default) clean_model_files(model_files, model_name) logging.info('=================================') @@ -69,7 +69,7 @@ def test_lenet_gluon_load_params_api(): loaded_model.load_params(model_name + '-params') output = loaded_model(test_data) old_inference_results = mx.nd.load(model_name + '-inference')['inference'] - assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy()) + assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy(), rtol=rtol_default, atol=atol_default) clean_model_files(model_files, model_name) logging.info('=================================') logging.info('Assertion passed for model : %s' % model_name) @@ -92,7 +92,7 @@ def test_lenet_gluon_hybrid_imports_api(): loaded_model = gluon.SymbolBlock.imports(model_name + '-symbol.json', ['data'], model_name + '-0000.params') output = loaded_model(test_data) old_inference_results = mx.nd.load(model_name + '-inference')['inference'] - assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy()) + assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy(), rtol=rtol_default, atol=atol_default) clean_model_files(model_files, model_name) logging.info('=================================') logging.info('Assertion passed for model : %s' % model_name) @@ -124,7 +124,7 @@ def test_lstm_gluon_load_parameters_api(): loaded_model.load_parameters(model_name + '-params') output = loaded_model(test_data) old_inference_results = mx.nd.load(model_name + '-inference')['inference'] - assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy()) + assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy(), rtol=rtol_default, atol=atol_default) clean_model_files(model_files, model_name) logging.info('=================================') logging.info('Assertion passed for model : %s' % model_name) diff --git a/tests/nightly/test_server_profiling.py b/tests/nightly/test_server_profiling.py new file mode 100644 index 000000000000..7d157a3e4189 --- /dev/null +++ b/tests/nightly/test_server_profiling.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import mxnet as mx +import json + +key = '99' +shape = (1200, 1200) # bigger than MXNET_KVSTORE_BIGARRAY_BOUND +kv = mx.kv.create('dist_sync') + +def init_kv(): + # init kv dns keys + kv.init(key, mx.nd.ones(shape)) + kv.set_optimizer(mx.optimizer.create('sgd')) + return kv, kv.rank, kv.num_workers + +def test_sync_push_pull(): + kv, my_rank, nworker = init_kv() + def check_default_keys(kv, my_rank): + nrepeat = 10 + # checks pull after push in loop, because behavior during + # consecutive pushes doesn't offer any guarantees + for i in range(nrepeat): + kv.push(key, mx.nd.ones(shape, dtype='float32') * (my_rank+1)) + val = mx.nd.zeros(shape, dtype='float32') + kv.pull(key, out=val) + mx.nd.waitall() + check_default_keys(kv, my_rank) + +if __name__ == "__main__": + server_filename_suffix = 'test_profile_server.json' + worker_filename_suffix = 'test_profile_worker.json' + mx.profiler.set_config(filename=server_filename_suffix, profile_all=True, profile_process='server') + mx.profiler.set_config(filename='rank' + str(kv.rank) + '_' + worker_filename_suffix, profile_all=True, profile_process='worker') + mx.profiler.set_state(state='run', profile_process='server') + mx.profiler.set_state(state='run', profile_process='worker') + test_sync_push_pull() + mx.profiler.set_state(state='stop', profile_process='server') + mx.profiler.set_state(state='stop', profile_process='worker') + + import glob, os + + # will only work when launcher mode is local, as used for integration test + if kv.rank == 0: + for rank in range(kv.num_workers): + for suffix in [worker_filename_suffix, server_filename_suffix]: + # throws value error if file is not proper json + filename = 'rank' + str(rank) + '_' + suffix + print(glob.glob('*'), os.getcwd()) + with open(filename, 'r') as f: + j = json.load(f) + + + diff --git a/tests/python/gpu/test_forward.py b/tests/python/gpu/test_forward.py index 126ccabaa7b5..02b0256024d3 100644 --- a/tests/python/gpu/test_forward.py +++ b/tests/python/gpu/test_forward.py @@ -24,11 +24,13 @@ sys.path.insert(0, os.path.join(curr_path, '../unittest')) from common import setup_module, with_seed, teardown from mxnet.gluon import utils +import tarfile def _get_model(): if not os.path.exists('model/Inception-7-symbol.json'): - download('http://data.mxnet.io/models/imagenet/inception-v3.tar.gz', dirname='model') - os.system("cd model; tar -xf inception-v3.tar.gz --strip-components 1") + download('http://data.mxnet.io/models/imagenet/inception-v3.tar.gz') + with tarfile.open(name="inception-v3.tar.gz", mode="r:gz") as tf: + tf.extractall() def _dump_images(shape): import skimage.io diff --git a/tests/python/gpu/test_gluon_gpu.py b/tests/python/gpu/test_gluon_gpu.py new file mode 100644 index 000000000000..42d65dab5fdc --- /dev/null +++ b/tests/python/gpu/test_gluon_gpu.py @@ -0,0 +1,203 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import print_function +import sys +import os +import time +import multiprocessing as mp +import unittest +import mxnet as mx +import numpy as np +import unittest +from nose.tools import assert_raises +from mxnet.test_utils import check_consistency, set_default_context, assert_almost_equal +from mxnet.base import MXNetError +from mxnet import autograd +from numpy.testing import assert_allclose + +curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) +sys.path.insert(0, os.path.join(curr_path, '../unittest')) +from common import setup_module, with_seed, teardown, assert_raises_cudnn_disabled +from test_gluon import * +from test_loss import * +from test_gluon_rnn import * + +set_default_context(mx.gpu(0)) + +def check_rnn_layer(layer): + layer.collect_params().initialize(ctx=[mx.cpu(0), mx.gpu(0)]) + with mx.gpu(0): + x = mx.nd.ones((10, 16, 30)) + states = layer.begin_state(16) + go, gs = layer(x, states) + + with mx.cpu(0): + x = mx.nd.ones((10, 16, 30)) + states = layer.begin_state(16) + co, cs = layer(x, states) + + # atol of 1e-6 required, as exposed by seed 2124685726 + assert_almost_equal(go.asnumpy(), co.asnumpy(), rtol=1e-2, atol=1e-6) + for g, c in zip(gs, cs): + assert_almost_equal(g.asnumpy(), c.asnumpy(), rtol=1e-2, atol=1e-6) + + +def check_rnn_layer_w_rand_inputs(layer): + layer.collect_params().initialize(ctx=[mx.cpu(0), mx.gpu(0)]) + x = mx.nd.uniform(shape=(10, 16, 30)) + with mx.gpu(0): + x = x.copyto(mx.gpu(0)) + states = layer.begin_state(16) + go, gs = layer(x, states) + + with mx.cpu(0): + x = x.copyto(mx.cpu(0)) + states = layer.begin_state(16) + co, cs = layer(x, states) + + assert_almost_equal(go.asnumpy(), co.asnumpy(), rtol=1e-2, atol=1e-6) + for g, c in zip(gs, cs): + assert_almost_equal(g.asnumpy(), c.asnumpy(), rtol=1e-2, atol=1e-6) + + +@with_seed() +@assert_raises_cudnn_disabled() +def test_rnn_layer(): + check_rnn_layer(gluon.rnn.RNN(100, num_layers=3)) + check_rnn_layer(gluon.rnn.RNN(100, activation='tanh', num_layers=3)) + check_rnn_layer(gluon.rnn.LSTM(100, num_layers=3)) + check_rnn_layer(gluon.rnn.GRU(100, num_layers=3)) + + check_rnn_layer(gluon.rnn.LSTM(100, num_layers=3, bidirectional=True)) + check_rnn_layer_w_rand_inputs(gluon.rnn.LSTM(100, num_layers=3, bidirectional=True)) + + +@with_seed() +def test_gluon_ctc_consistency(): + loss = mx.gluon.loss.CTCLoss() + data = mx.nd.arange(0, 4, repeat=40, ctx=mx.gpu(0)).reshape((2,20,4)).flip(axis=0) + cpu_label = mx.nd.array([[2,1,-1,-1],[3,2,2,-1]], ctx=mx.cpu(0)) + gpu_label = mx.nd.array([[2,1,-1,-1],[3,2,2,-1]], ctx=mx.gpu(0)) + + cpu_data = data.copy().as_in_context(mx.cpu(0)) + cpu_data.attach_grad() + with mx.autograd.record(): + l_cpu = loss(cpu_data, cpu_label) + l_cpu.backward() + + gpu_data = data.copyto(mx.gpu(0)) + gpu_data.attach_grad() + with mx.autograd.record(): + l_gpu = loss(gpu_data, gpu_label) + l_gpu.backward() + + assert_almost_equal(cpu_data.grad.asnumpy(), gpu_data.grad.asnumpy(), atol=1e-3, rtol=1e-3) + + +@with_seed() +def test_global_norm_clip_multi_device(): + x1 = mx.nd.ones((3,3), ctx=mx.gpu(0)) + x2 = mx.nd.ones((4,4), ctx=mx.cpu(0)) + norm = gluon.utils.clip_global_norm([x1, x2], 1.0) + assert norm == 5.0 + assert_almost_equal(x1.asnumpy(), np.ones((3,3))/5) + assert_almost_equal(x2.asnumpy(), np.ones((4,4))/5) + + +def _check_batchnorm_result(input, num_devices=1, cuda=False): + from mxnet.gluon.utils import split_and_load + def _find_bn(module): + if isinstance(module, (mx.gluon.nn.BatchNorm, mx.gluon.contrib.nn.SyncBatchNorm)): + return module + elif isinstance(module.module, (mx.gluon.nn.BatchNorm, mx.gluon.contrib.nn.SyncBatchNorm)): + return module.module + + raise RuntimeError('BN not found') + + def _syncParameters(bn1, bn2, ctx): + ctx = input.context + bn2.gamma.set_data(bn1.gamma.data(ctx)) + bn2.beta.set_data(bn1.beta.data(ctx)) + bn2.running_mean.set_data(bn1.running_mean.data(ctx)) + bn2.running_var.set_data(bn1.running_var.data(ctx)) + + input1 = input.copy() + input2 = input.copy() + + if cuda: + input1 = input.as_in_context(mx.gpu(0)) + ctx_list = [mx.gpu(i) for i in range(num_devices)] + else: + ctx_list = [mx.cpu(0) for _ in range(num_devices)] + + nch = input.shape[1] + bn1 = mx.gluon.nn.BatchNorm(in_channels=nch) + bn2 = mx.gluon.contrib.nn.SyncBatchNorm(in_channels=nch, num_devices=num_devices) + + bn1.initialize(ctx=ctx_list[0]) + bn2.initialize(ctx=ctx_list) + + # using the same values for gamma and beta + #_syncParameters(_find_bn(bn1), _find_bn(bn2), ctx_list[0]) + + input1.attach_grad() + inputs2 = split_and_load(input2, ctx_list, batch_axis=0) + for xi in inputs2: + xi.attach_grad() + + with mx.autograd.record(): + output1 = bn1(input1) + output2 = [bn2(xi) for xi in inputs2] + loss1 = (output1 ** 2).sum() + loss2 = [(output ** 2).sum() for output in output2] + mx.autograd.backward(loss1) + mx.autograd.backward(loss2) + + output2 = mx.nd.concat(*[output.as_in_context(input.context) for output in output2], dim=0) + # assert forwarding + assert_almost_equal(input1.asnumpy(), input2.asnumpy(), atol=1e-3, rtol=1e-3) + assert_almost_equal(output1.asnumpy(), output2.asnumpy(), atol=1e-3, rtol=1e-3) + assert_almost_equal(_find_bn(bn1).running_mean.data(ctx_list[0]).asnumpy(), + _find_bn(bn2).running_mean.data(ctx_list[0]).asnumpy(), + atol=1e-3, rtol=1e-3) + assert_almost_equal(_find_bn(bn1).running_var.data(ctx_list[0]).asnumpy(), + _find_bn(bn2).running_var.data(ctx_list[0]).asnumpy(), + atol=1e-3, rtol=1e-3) + input2grad = mx.nd.concat(*[output.grad.as_in_context(input.context) for output in inputs2], dim=0) + assert_almost_equal(input1.grad.asnumpy(), input2grad.asnumpy(), atol=1e-3, rtol=1e-3) + + +def test_sync_batchnorm(): + def get_num_devices(): + for i in range(100): + try: + mx.nd.zeros((1,), ctx=mx.gpu(i)) + except: + return i + # no need to use SyncBN with 1 gpu + if get_num_devices() < 2: + return + ndev = 2 + # check with unsync version + for i in range(10): + _check_batchnorm_result(mx.nd.random.uniform(shape=(4, 1, 4, 4)), + num_devices=ndev, cuda=True) + +if __name__ == '__main__': + import nose + nose.runmodule() diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index a3e663a68274..3d799aa5319b 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -36,11 +36,8 @@ from test_operator import * from test_optimizer import * from test_random import * -from test_gluon import * -from test_loss import * from test_exc_handling import * #from test_rnn import * -from test_gluon_rnn import * from test_sparse_ndarray import * from test_sparse_operator import * from test_ndarray import * @@ -1660,17 +1657,6 @@ def check_rnn_layer_w_rand_inputs(layer): for g, c in zip(gs, cs): assert_almost_equal(g.asnumpy(), c.asnumpy(), rtol=1e-2, atol=1e-6) -@with_seed() -@assert_raises_cudnn_disabled() -def test_rnn_layer(): - check_rnn_layer(gluon.rnn.RNN(100, num_layers=3)) - check_rnn_layer(gluon.rnn.RNN(100, activation='tanh', num_layers=3)) - check_rnn_layer(gluon.rnn.LSTM(100, num_layers=3)) - check_rnn_layer(gluon.rnn.GRU(100, num_layers=3)) - - check_rnn_layer(gluon.rnn.LSTM(100, num_layers=3, bidirectional=True)) - check_rnn_layer_w_rand_inputs(gluon.rnn.LSTM(100, num_layers=3, bidirectional=True)) - @with_seed() def test_sequence_reverse(): check_sequence_reverse(mx.gpu(0)) @@ -1688,28 +1674,6 @@ def test_autograd_save_memory(): x.backward() -@with_seed() -def test_gluon_ctc_consistency(): - loss = mx.gluon.loss.CTCLoss() - data = mx.nd.arange(0, 4, repeat=40, ctx=mx.gpu(0)).reshape((2,20,4)).flip(axis=0) - cpu_label = mx.nd.array([[2,1,-1,-1],[3,2,2,-1]], ctx=mx.cpu(0)) - gpu_label = mx.nd.array([[2,1,-1,-1],[3,2,2,-1]], ctx=mx.gpu(0)) - - cpu_data = data.copy().as_in_context(mx.cpu(0)) - cpu_data.attach_grad() - with mx.autograd.record(): - l_cpu = loss(cpu_data, cpu_label) - l_cpu.backward() - - gpu_data = data.copyto(mx.gpu(0)) - gpu_data.attach_grad() - with mx.autograd.record(): - l_gpu = loss(gpu_data, gpu_label) - l_gpu.backward() - - assert_almost_equal(cpu_data.grad.asnumpy(), gpu_data.grad.asnumpy(), atol=1e-3, rtol=1e-3) - - @with_seed() def test_cuda_rtc(): source = r''' @@ -1740,16 +1704,6 @@ def test_cuda_rtc(): assert (y.asnumpy() == 12).all() -@with_seed() -def test_global_norm_clip_multi_device(): - x1 = mx.nd.ones((3,3), ctx=mx.gpu(0)) - x2 = mx.nd.ones((4,4), ctx=mx.cpu(0)) - norm = gluon.utils.clip_global_norm([x1, x2], 1.0) - assert norm == 5.0 - assert_almost_equal(x1.asnumpy(), np.ones((3,3))/5) - assert_almost_equal(x2.asnumpy(), np.ones((4,4))/5) - - @with_seed() def test_cross_device_autograd(): x = mx.nd.random.uniform(shape=(10,)) @@ -1968,84 +1922,6 @@ def test_context_num_gpus(): # Test that num_gpus reports at least one GPU, as the test is run on a GPU host. assert mx.context.num_gpus() > 0 -def _check_batchnorm_result(input, num_devices=1, cuda=False): - from mxnet.gluon.utils import split_and_load - def _find_bn(module): - if isinstance(module, (mx.gluon.nn.BatchNorm, mx.gluon.contrib.nn.SyncBatchNorm)): - return module - elif isinstance(module.module, (mx.gluon.nn.BatchNorm, mx.gluon.contrib.nn.SyncBatchNorm)): - return module.module - - raise RuntimeError('BN not found') - - def _syncParameters(bn1, bn2, ctx): - ctx = input.context - bn2.gamma.set_data(bn1.gamma.data(ctx)) - bn2.beta.set_data(bn1.beta.data(ctx)) - bn2.running_mean.set_data(bn1.running_mean.data(ctx)) - bn2.running_var.set_data(bn1.running_var.data(ctx)) - - input1 = input.copy() - input2 = input.copy() - - if cuda: - input1 = input.as_in_context(mx.gpu(0)) - ctx_list = [mx.gpu(i) for i in range(num_devices)] - else: - ctx_list = [mx.cpu(0) for _ in range(num_devices)] - - nch = input.shape[1] - bn1 = mx.gluon.nn.BatchNorm(in_channels=nch) - bn2 = mx.gluon.contrib.nn.SyncBatchNorm(in_channels=nch, num_devices=num_devices) - - bn1.initialize(ctx=ctx_list[0]) - bn2.initialize(ctx=ctx_list) - - # using the same values for gamma and beta - #_syncParameters(_find_bn(bn1), _find_bn(bn2), ctx_list[0]) - - input1.attach_grad() - inputs2 = split_and_load(input2, ctx_list, batch_axis=0) - for xi in inputs2: - xi.attach_grad() - - with mx.autograd.record(): - output1 = bn1(input1) - output2 = [bn2(xi) for xi in inputs2] - loss1 = (output1 ** 2).sum() - loss2 = [(output ** 2).sum() for output in output2] - mx.autograd.backward(loss1) - mx.autograd.backward(loss2) - - output2 = mx.nd.concat(*[output.as_in_context(input.context) for output in output2], dim=0) - # assert forwarding - assert_almost_equal(input1.asnumpy(), input2.asnumpy(), atol=1e-3, rtol=1e-3) - assert_almost_equal(output1.asnumpy(), output2.asnumpy(), atol=1e-3, rtol=1e-3) - assert_almost_equal(_find_bn(bn1).running_mean.data(ctx_list[0]).asnumpy(), - _find_bn(bn2).running_mean.data(ctx_list[0]).asnumpy(), - atol=1e-3, rtol=1e-3) - assert_almost_equal(_find_bn(bn1).running_var.data(ctx_list[0]).asnumpy(), - _find_bn(bn2).running_var.data(ctx_list[0]).asnumpy(), - atol=1e-3, rtol=1e-3) - input2grad = mx.nd.concat(*[output.grad.as_in_context(input.context) for output in inputs2], dim=0) - assert_almost_equal(input1.grad.asnumpy(), input2grad.asnumpy(), atol=1e-3, rtol=1e-3) - -def test_sync_batchnorm(): - def get_num_devices(): - for i in range(100): - try: - mx.nd.zeros((1,), ctx=mx.gpu(i)) - except: - return i - # no need to use SyncBN with 1 gpu - if get_num_devices() < 2: - return - ndev = 2 - # check with unsync version - for i in range(10): - _check_batchnorm_result(mx.nd.random.uniform(shape=(4, 1, 4, 4)), - num_devices=ndev, cuda=True) - if __name__ == '__main__': import nose nose.runmodule() diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 359bbee569f8..08303c816af1 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -77,6 +77,7 @@ def test_dequantize_int8_to_float32(): @with_seed() +@unittest.skip('Flaky test, tracked in: https://github.com/apache/incubator-mxnet/issues/11747') def test_requantize_int32_to_int8(): def quantized_int32_to_float(qdata, min_range, max_range): assert qdata.dtype == 'int32' diff --git a/tests/python/train/test_sparse_fm.py b/tests/python/train/test_sparse_fm.py new file mode 100644 index 000000000000..99a22f54cbbd --- /dev/null +++ b/tests/python/train/test_sparse_fm.py @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import mxnet as mx +import mxnet.ndarray as nd +from mxnet.test_utils import * +import numpy as np + +def test_factorization_machine_module(verbose=False): + """ Test factorization machine model with sparse operators """ + def check_factorization_machine_module(optimizer=None, num_epochs=None): + print("check_factorization_machine_module( {} )".format(optimizer)) + + def fm(factor_size, feature_dim, init): + x = mx.symbol.Variable("data", stype='csr') + v = mx.symbol.Variable("v", shape=(feature_dim, factor_size), + init=init, stype='row_sparse') + + w1_weight = mx.symbol.var('w1_weight', shape=(feature_dim, 1), + init=init, stype='row_sparse') + w1_bias = mx.symbol.var('w1_bias', shape=(1)) + w1 = mx.symbol.broadcast_add(mx.symbol.dot(x, w1_weight), w1_bias) + + v_s = mx.symbol._internal._square_sum(data=v, axis=1, keepdims=True) + x_s = mx.symbol.square(data=x) + bd_sum = mx.sym.dot(x_s, v_s) + + w2 = mx.symbol.dot(x, v) + w2_squared = 0.5 * mx.symbol.square(data=w2) + + w_all = mx.symbol.Concat(w1, w2_squared, dim=1) + sum1 = mx.symbol.sum(data=w_all, axis=1, keepdims=True) + sum2 = 0.5 * mx.symbol.negative(bd_sum) + model = mx.sym.elemwise_add(sum1, sum2) + + y = mx.symbol.Variable("label") + model = mx.symbol.LinearRegressionOutput(data=model, label=y) + return model + + # model + init = mx.initializer.Normal(sigma=0.01) + factor_size = 4 + feature_dim = 10000 + model = fm(factor_size, feature_dim, init) + + # data iter + num_batches = 5 + batch_size = 64 + num_samples = batch_size * num_batches + # generate some random csr data + csr_nd = rand_ndarray((num_samples, feature_dim), 'csr', 0.1) + label = mx.nd.ones((num_samples,1)) + # the alternative is to use LibSVMIter + train_iter = mx.io.NDArrayIter(data=csr_nd, + label={'label':label}, + batch_size=batch_size, + last_batch_handle='discard') + # create module + mod = mx.mod.Module(symbol=model, data_names=['data'], label_names=['label']) + # allocate memory by given the input data and lable shapes + mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) + # initialize parameters by uniform random numbers + mod.init_params(initializer=init) + if optimizer == 'sgd': + # use Sparse SGD with learning rate 0.1 to train + sgd = mx.optimizer.SGD(momentum=0.1, clip_gradient=5.0, learning_rate=0.01, + rescale_grad=1.0/batch_size) + mod.init_optimizer(optimizer=sgd) + if num_epochs is None: + num_epochs = 10 + expected_accuracy = 0.02 + elif optimizer == 'adam': + # use Sparse Adam to train + adam = mx.optimizer.Adam(clip_gradient=5.0, learning_rate=0.0005, + rescale_grad=1.0/batch_size) + mod.init_optimizer(optimizer=adam) + if num_epochs is None: + num_epochs = 10 + expected_accuracy = 0.05 + elif optimizer == 'adagrad': + # use Sparse AdaGrad with learning rate 0.1 to train + adagrad = mx.optimizer.AdaGrad(clip_gradient=5.0, learning_rate=0.01, + rescale_grad=1.0/batch_size) + mod.init_optimizer(optimizer=adagrad) + if num_epochs is None: + num_epochs = 20 + expected_accuracy = 0.09 + else: + raise AssertionError("Unsupported optimizer type '" + optimizer + "' specified") + # use accuracy as the metric + metric = mx.metric.create('MSE') + # train 'num_epochs' epoch + for epoch in range(num_epochs): + train_iter.reset() + metric.reset() + for batch in train_iter: + mod.forward(batch, is_train=True) # compute predictions + mod.update_metric(metric, batch.label) # accumulate prediction accuracy + mod.backward() # compute gradients + mod.update() # update parameters + print('Epoch %d, Training %s' % (epoch, metric.get())) + if num_epochs > 1: + assert(metric.get()[1] < expected_accuracy) + + if verbose is True: + print("============ SGD ==========================") + start = time.clock() + check_factorization_machine_module('sgd') + if verbose is True: + print("Duration: {}".format(time.clock() - start)) + print("============ ADAM ==========================") + start = time.clock() + check_factorization_machine_module('adam') + if verbose is True: + print("Duration: {}".format(time.clock() - start)) + print("============ ADAGRAD ==========================") + start = time.clock() + check_factorization_machine_module('adagrad') + if verbose is True: + print("Duration: {}".format(time.clock() - start)) + +# run as a script +if __name__ == "__main__": + test_factorization_machine_module() diff --git a/tests/python/unittest/test_base.py b/tests/python/unittest/test_base.py new file mode 100644 index 000000000000..3189729e1d10 --- /dev/null +++ b/tests/python/unittest/test_base.py @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import mxnet as mx +from mxnet.base import data_dir +from nose.tools import * +import os +import unittest +import logging +import os.path as op +import platform + +class MXNetDataDirTest(unittest.TestCase): + def setUp(self): + self.mxnet_data_dir = os.environ.get('MXNET_HOME') + if 'MXNET_HOME' in os.environ: + del os.environ['MXNET_HOME'] + + def tearDown(self): + if self.mxnet_data_dir: + os.environ['MXNET_HOME'] = self.mxnet_data_dir + else: + if 'MXNET_HOME' in os.environ: + del os.environ['MXNET_HOME'] + + def test_data_dir(self,): + prev_data_dir = data_dir() + system = platform.system() + if system != 'Windows': + self.assertEqual(data_dir(), op.join(op.expanduser('~'), '.mxnet')) + os.environ['MXNET_HOME'] = '/tmp/mxnet_data' + self.assertEqual(data_dir(), '/tmp/mxnet_data') + del os.environ['MXNET_HOME'] + self.assertEqual(data_dir(), prev_data_dir) + + diff --git a/tests/python/unittest/test_contrib_control_flow.py b/tests/python/unittest/test_contrib_control_flow.py index 67ed78ee0308..f1188b53d814 100644 --- a/tests/python/unittest/test_contrib_control_flow.py +++ b/tests/python/unittest/test_contrib_control_flow.py @@ -1159,6 +1159,7 @@ def check_contrib_rnn(cell_type, num_states): configs = [ {}, + {'inline_limit': 0}, {'static_alloc': True}, {'static_alloc': True, 'static_shape': True} ] for config in configs: diff --git a/tests/python/unittest/test_executor.py b/tests/python/unittest/test_executor.py index 630cad87496d..3117f6646481 100644 --- a/tests/python/unittest/test_executor.py +++ b/tests/python/unittest/test_executor.py @@ -18,13 +18,7 @@ import numpy as np import mxnet as mx from common import setup_module, with_seed, teardown - - -def reldiff(a, b): - diff = np.sum(np.abs(a - b)) - norm = np.sum(np.abs(a)) - reldiff = diff / norm - return reldiff +from mxnet.test_utils import assert_almost_equal def check_bind_with_uniform(uf, gf, dim, sf=None, lshape=None, rshape=None): @@ -64,9 +58,9 @@ def check_bind_with_uniform(uf, gf, dim, sf=None, lshape=None, rshape=None): out1 = uf(lhs_arr.asnumpy(), rhs_arr.asnumpy()) out3 = exec3.outputs[0].asnumpy() out4 = exec4.outputs[0].asnumpy() - assert reldiff(out1, out2) < 1e-6 - assert reldiff(out1, out3) < 1e-6 - assert reldiff(out1, out4) < 1e-6 + assert_almost_equal(out1, out2, rtol=1e-5, atol=1e-5) + assert_almost_equal(out1, out3, rtol=1e-5, atol=1e-5) + assert_almost_equal(out1, out4, rtol=1e-5, atol=1e-5) # test gradient out_grad = mx.nd.array(np.ones(out2.shape)) lhs_grad2, rhs_grad2 = gf(out_grad.asnumpy(), @@ -74,8 +68,8 @@ def check_bind_with_uniform(uf, gf, dim, sf=None, lshape=None, rshape=None): rhs_arr.asnumpy()) executor.backward([out_grad]) - assert reldiff(lhs_grad.asnumpy(), lhs_grad2) < 1e-6 - assert reldiff(rhs_grad.asnumpy(), rhs_grad2) < 1e-6 + assert_almost_equal(lhs_grad.asnumpy(), lhs_grad2, rtol=1e-5, atol=1e-5) + assert_almost_equal(rhs_grad.asnumpy(), rhs_grad2, rtol=1e-5, atol=1e-5) @with_seed(0) @@ -118,12 +112,14 @@ def check_bind(disable_bulk_exec): check_bind(False) -@with_seed(0) +# @roywei: Removing fixed seed as flakiness in this test is fixed +# tracked at https://github.com/apache/incubator-mxnet/issues/11686 +@with_seed() def test_dot(): nrepeat = 10 maxdim = 4 for repeat in range(nrepeat): - s =tuple(np.random.randint(1, 500, size=3)) + s =tuple(np.random.randint(1, 200, size=3)) check_bind_with_uniform(lambda x, y: np.dot(x, y), lambda g, x, y: (np.dot(g, y.T), np.dot(x.T, g)), 2, @@ -131,7 +127,7 @@ def test_dot(): rshape=(s[1], s[2]), sf = mx.symbol.dot) for repeat in range(nrepeat): - s =tuple(np.random.randint(1, 500, size=1)) + s =tuple(np.random.randint(1, 200, size=1)) check_bind_with_uniform(lambda x, y: np.dot(x, y), lambda g, x, y: (g * y, g * x), 2, diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py index a9a2904e1e13..4e8241ffc1ea 100644 --- a/tests/python/unittest/test_gluon_rnn.py +++ b/tests/python/unittest/test_gluon_rnn.py @@ -16,7 +16,7 @@ # under the License. import mxnet as mx -from mxnet import gluon +from mxnet import gluon, nd import numpy as np import copy from numpy.testing import assert_allclose @@ -25,7 +25,6 @@ from common import assert_raises_cudnn_disabled -@assert_raises_cudnn_disabled() def test_rnn(): cell = gluon.rnn.RNNCell(100, prefix='rnn_') inputs = [mx.sym.Variable('rnn_t%d_data'%i) for i in range(3)] @@ -51,7 +50,6 @@ def test_lstm(): assert outs == [(10, 100), (10, 100), (10, 100)] -@assert_raises_cudnn_disabled() def test_lstm_forget_bias(): forget_bias = 2.0 stack = gluon.rnn.SequentialRNNCell() @@ -77,19 +75,23 @@ def test_lstm_forget_bias(): def test_lstm_cpu_inference(): # should behave the same as lstm cell EXPECTED_LSTM_OUTPUT = np.array([[[0.72045636, 0.72045636, 0.95215213, 0.95215213], - [0.72045636, 0.72045636, 0.95215213, 0.95215213]], - [[0.95215213, 0.95215213, 0.72045636, 0.72045636], - [0.95215213, 0.95215213, 0.72045636, 0.72045636]]]) + [0.72045636, 0.72045636, 0.95215213, 0.95215213]], + [[0.95215213, 0.95215213, 0.72045636, 0.72045636], + [0.95215213, 0.95215213, 0.72045636, 0.72045636]]]) x = mx.nd.ones(shape=(2, 2, 2)) model = mx.gluon.rnn.LSTM(2, num_layers=6, bidirectional=True) + model_cell = model._unfuse() model.initialize(mx.init.One()) + y = model(x).asnumpy() + y_cell = model_cell.unroll(2, x, layout='TNC', merge_outputs=True)[0].asnumpy() + mx.test_utils.assert_almost_equal(y_cell, EXPECTED_LSTM_OUTPUT, + rtol=1e-3, atol=1e-5) mx.test_utils.assert_almost_equal(y, EXPECTED_LSTM_OUTPUT, rtol=1e-3, atol=1e-5) -@assert_raises_cudnn_disabled() def test_gru(): cell = gluon.rnn.GRUCell(100, prefix='rnn_') inputs = [mx.sym.Variable('rnn_t%d_data'%i) for i in range(3)] @@ -241,6 +243,46 @@ def test_bidirectional(): assert outs == [(10, 200), (10, 200), (10, 200)] +@assert_raises_cudnn_disabled() +def test_layer_bidirectional(): + class RefBiLSTM(gluon.Block): + def __init__(self, size, **kwargs): + super(RefBiLSTM, self).__init__(**kwargs) + with self.name_scope(): + self._lstm_fwd = gluon.rnn.LSTM(size, bidirectional=False, prefix='l0') + self._lstm_bwd = gluon.rnn.LSTM(size, bidirectional=False, prefix='r0') + + def forward(self, inpt): + fwd = self._lstm_fwd(inpt) + bwd_inpt = nd.flip(inpt, 0) + bwd = self._lstm_bwd(bwd_inpt) + bwd = nd.flip(bwd, 0) + return nd.concat(fwd, bwd, dim=2) + + size = 7 + in_size = 5 + weights = {} + for d in ['l', 'r']: + weights['lstm_{}0_i2h_weight'.format(d)] = mx.random.uniform(shape=(size*4, in_size)) + weights['lstm_{}0_h2h_weight'.format(d)] = mx.random.uniform(shape=(size*4, size)) + weights['lstm_{}0_i2h_bias'.format(d)] = mx.random.uniform(shape=(size*4,)) + weights['lstm_{}0_h2h_bias'.format(d)] = mx.random.uniform(shape=(size*4,)) + + net = gluon.rnn.LSTM(size, bidirectional=True, prefix='lstm_') + ref_net = RefBiLSTM(size, prefix='lstm_') + net.initialize() + ref_net.initialize() + net_params = net.collect_params() + ref_net_params = ref_net.collect_params() + for k in weights: + net_params[k].set_data(weights[k]) + ref_net_params[k.replace('l0', 'l0l0').replace('r0', 'r0l0')].set_data(weights[k]) + + data = mx.random.uniform(shape=(3, 10, in_size)) + assert_allclose(net(data).asnumpy(), ref_net(data).asnumpy()) + + + def test_zoneout(): cell = gluon.rnn.ZoneoutCell(gluon.rnn.RNNCell(100, prefix='rnn_'), zoneout_outputs=0.5, zoneout_states=0.5) @@ -341,9 +383,12 @@ def check_rnn_layer_forward(layer, inputs, states=None, run_only=False): layer.collect_params().initialize() inputs.attach_grad() with mx.autograd.record(): - out = layer(inputs, states) + if states is None: + out = layer(inputs) + else: + out = layer(inputs, states) if states is not None: - assert isinstance(out, tuple) and len(out) == 2 + assert isinstance(out, (list, tuple)) and len(out) == 2 out = out[0] else: assert isinstance(out, mx.nd.NDArray) @@ -355,15 +400,19 @@ def check_rnn_layer_forward(layer, inputs, states=None, run_only=False): layer.hybridize() with mx.autograd.record(): - out = layer(inputs, states) if states is not None: - assert isinstance(out, tuple) and len(out) == 2 + out = layer(inputs, states) + assert isinstance(out, (list, tuple)) and len(out) == 2 out = out[0] else: + out = layer(inputs) assert isinstance(out, mx.nd.NDArray) out.backward() - layer(inputs, states) # test is_training = false + if states is not None: + layer(inputs, states) # test is_training = false + else: + layer(inputs) if not run_only: mx.test_utils.assert_almost_equal(np_out, out.asnumpy(), rtol=1e-3, atol=1e-5) @@ -393,15 +442,26 @@ def test_rnn_layers(): check_rnn_layer_forward(gluon.rnn.GRU(10, 2, bidirectional=True, dropout=0.5), mx.nd.ones((8, 3, 20)), mx.nd.ones((4, 3, 10)), run_only=True) - net = gluon.nn.Sequential() - net.add(gluon.rnn.LSTM(10, 2, bidirectional=True)) + net = gluon.nn.HybridSequential() + net.add(gluon.rnn.LSTM(10, bidirectional=True)) net.add(gluon.nn.BatchNorm(axis=2)) net.add(gluon.nn.Flatten()) net.add(gluon.nn.Dense(3, activation='relu')) + net.hybridize() net.collect_params().initialize() with mx.autograd.record(): net(mx.nd.ones((2, 3, 10))).backward() + net2 = gluon.nn.HybridSequential() + net2.add(gluon.rnn.LSTM(10, bidirectional=True)) + net2.add(gluon.nn.BatchNorm(axis=2)) + net2.add(gluon.nn.Flatten()) + net2.add(gluon.nn.Dense(3, activation='relu')) + net2.hybridize() + net2.collect_params().initialize() + with mx.autograd.record(): + net2(mx.nd.ones((2, 3, 10))).backward() + def test_rnn_unroll_variant_length(): # Test for imperative usage @@ -487,10 +547,9 @@ def test_cell_fill_shape(): @assert_raises_cudnn_disabled() def test_layer_fill_shape(): layer = gluon.rnn.LSTM(10) - layer.hybridize() check_rnn_layer_forward(layer, mx.nd.ones((3, 2, 7))) print(layer) - assert layer.i2h_weight[0].shape[1] == 7, layer.i2h_weight[0].shape[1] + assert layer.l0_i2h_weight.shape[1] == 7, layer.l0_i2h_weight.shape[1] if __name__ == '__main__': diff --git a/tests/python/unittest/test_loss.py b/tests/python/unittest/test_loss.py index 8d5b86341a88..1da6244a4906 100644 --- a/tests/python/unittest/test_loss.py +++ b/tests/python/unittest/test_loss.py @@ -64,7 +64,8 @@ def get_net(num_hidden, flatten=True): fc3 = mx.symbol.FullyConnected(act2, name='fc3', num_hidden=num_hidden, flatten=flatten) return fc3 -@with_seed(1234) +# tracked at: https://github.com/apache/incubator-mxnet/issues/11692 +@with_seed() def test_ce_loss(): nclass = 10 N = 20 @@ -78,11 +79,12 @@ def test_ce_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam') + eval_metric=mx.metric.Loss(), optimizer='adam', + initializer=mx.init.Xavier(magnitude=2)) assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 - -@with_seed(1234) +# tracked at: https://github.com/apache/incubator-mxnet/issues/11691 +@with_seed() def test_bce_loss(): N = 20 data = mx.random.uniform(-1, 1, shape=(N, 20)) @@ -105,7 +107,7 @@ def test_bce_loss(): prob_npy = 1.0 / (1.0 + np.exp(-data.asnumpy())) label_npy = label.asnumpy() npy_bce_loss = - label_npy * np.log(prob_npy) - (1 - label_npy) * np.log(1 - prob_npy) - assert_almost_equal(mx_bce_loss, npy_bce_loss) + assert_almost_equal(mx_bce_loss, npy_bce_loss, rtol=1e-4, atol=1e-5) @with_seed() def test_bce_equal_ce2(): @@ -144,7 +146,7 @@ def test_kl_loss(): assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 -@with_seed(1234) +@with_seed() def test_l2_loss(): N = 20 data = mx.random.uniform(-1, 1, shape=(N, 10)) @@ -162,7 +164,7 @@ def test_l2_loss(): assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 -@with_seed(1234) +@with_seed() def test_l1_loss(): N = 20 data = mx.random.uniform(-1, 1, shape=(N, 10)) @@ -207,7 +209,7 @@ def test_ctc_loss(): mx.test_utils.assert_almost_equal(l.asnumpy(), np.array([18.82820702, 16.50581741])) -@with_seed(1234) +@with_seed() def test_ctc_loss_train(): N = 20 data = mx.random.uniform(-1, 1, shape=(N, 20, 10)) @@ -225,7 +227,7 @@ def test_ctc_loss_train(): assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 10 -@with_seed(1234) +@with_seed() def test_sample_weight_loss(): nclass = 10 N = 20 @@ -290,7 +292,7 @@ def test_huber_loss(): assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 -@with_seed(1234) +@with_seed() def test_hinge_loss(): N = 20 data = mx.random.uniform(-1, 1, shape=(N, 10)) @@ -305,10 +307,10 @@ def test_hinge_loss(): mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.06 -@with_seed(1234) +@with_seed() def test_squared_hinge_loss(): N = 20 data = mx.random.uniform(-1, 1, shape=(N, 10)) diff --git a/tests/python/unittest/test_module.py b/tests/python/unittest/test_module.py index 802988b43297..a21527a5a4ad 100644 --- a/tests/python/unittest/test_module.py +++ b/tests/python/unittest/test_module.py @@ -317,8 +317,9 @@ def create_bucketing_module(key): assert total_bytes_after == total_bytes_before - -@with_seed(11) +# roywei: Getting rid of fixed seed as flakiness could not be reproduced, +# tracked at: https://github.com/apache/incubator-mxnet/issues/11705 +@with_seed() def test_module_set_params(): # data iter data = mx.nd.array([[0.05, .10]]); @@ -381,7 +382,7 @@ def test_module_set_params(): aux_params={}, allow_missing=True, allow_extra=False) -@with_seed(11) +@with_seed() def test_monitor(): # data iter data = mx.nd.array([[0.05, .10]]); @@ -557,11 +558,12 @@ def check_shared_exec_group(sparse_embedding): for opt in sparse_embedding_opt: check_shared_exec_group(opt) -@with_seed(11) -def test_factorization_machine_module(verbose=False): +@with_seed() +def test_factorization_machine_module(): """ Test factorization machine model with sparse operators """ - def check_factorization_machine_module(optimizer=None, num_epochs=None): - print("check_factorization_machine_module( {} )".format(optimizer)) + # this unit test is to test the flow, training accuracy is tested in another test + def check_factorization_machine_module(num_epochs=None): + print("check_factorization_machine_module") def fm(factor_size, feature_dim, init): x = mx.symbol.Variable("data", stype='csr') @@ -613,33 +615,16 @@ def fm(factor_size, feature_dim, init): mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) # initialize parameters by uniform random numbers mod.init_params(initializer=init) - if optimizer == 'sgd': - # use Sparse SGD with learning rate 0.1 to train - sgd = mx.optimizer.SGD(momentum=0.1, clip_gradient=5.0, learning_rate=0.01, - rescale_grad=1.0/batch_size) - mod.init_optimizer(optimizer=sgd) - if num_epochs is None: - num_epochs = 10 - expected_accuracy = 0.02 - elif optimizer == 'adam': - # use Sparse Adam to train - adam = mx.optimizer.Adam(clip_gradient=5.0, learning_rate=0.0005, - rescale_grad=1.0/batch_size) - mod.init_optimizer(optimizer=adam) - if num_epochs is None: - num_epochs = 10 - expected_accuracy = 0.05 - elif optimizer == 'adagrad': - # use Sparse AdaGrad with learning rate 0.1 to train - adagrad = mx.optimizer.AdaGrad(clip_gradient=5.0, learning_rate=0.01, - rescale_grad=1.0/batch_size) - mod.init_optimizer(optimizer=adagrad) - if num_epochs is None: - num_epochs = 20 - expected_accuracy = 0.09 - else: - raise AssertionError("Unsupported optimizer type '" + optimizer + "' specified") - # use accuracy as the metric + + # use Sparse SGD with learning rate 0.1 to train + sgd = mx.optimizer.SGD(momentum=0.1, clip_gradient=5.0, learning_rate=0.01, + rescale_grad=1.0/batch_size) + mod.init_optimizer(optimizer=sgd) + if num_epochs is None: + num_epochs = 50 + expected_accuracy = 0.02 + + # use accuracy as the metric metric = mx.metric.create('MSE') # train 'num_epochs' epoch for epoch in range(num_epochs): @@ -654,23 +639,7 @@ def fm(factor_size, feature_dim, init): if num_epochs > 1: assert(metric.get()[1] < expected_accuracy) - if verbose is True: - print("============ SGD ==========================") - start = time.clock() - check_factorization_machine_module('sgd') - if verbose is True: - print("Duration: {}".format(time.clock() - start)) - print("============ ADAM ==========================") - start = time.clock() - check_factorization_machine_module('adam') - if verbose is True: - print("Duration: {}".format(time.clock() - start)) - print("============ ADAGRAD ==========================") - start = time.clock() - check_factorization_machine_module('adagrad') - if verbose is True: - print("Duration: {}".format(time.clock() - start)) - + check_factorization_machine_module() @with_seed() def test_module_initializer(): diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index e55fa1af90e8..931f805906f0 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -862,7 +862,7 @@ def test_iter(): for i in range(x.size): assert same(y[i].asnumpy(), x[i].asnumpy()) -@unittest.skip("test fails intermittently. temporarily disabled till it gets fixed. tracked at https://github.com/apache/incubator-mxnet/issues/8049") +@with_seed() def test_cached(): sym = mx.sym.Convolution(kernel=(3, 3), num_filter=10) + 2 op = mx.nd.CachedOp(sym) @@ -1308,25 +1308,31 @@ def test_norm(ctx=default_context()): def l1norm(input_data, axis=0, keepdims=False): return np.sum(abs(input_data), axis=axis, keepdims=keepdims) - def l2norm(input_data, axis=0, keepdims=False): + def l2norm(input_data, axis=0, keepdims=False): return sp_norm(input_data, axis=axis, keepdims=keepdims) in_data_dim = random_sample([4,5,6], 1)[0] - in_data_shape = rand_shape_nd(in_data_dim) - np_arr = np.random.uniform(-1, 1, in_data_shape).astype(np.float32) - mx_arr = mx.nd.array(np_arr, ctx=ctx) - for ord in [1,2]: - for keep_dims in [True, False]: - for i in range(4): - npy_out = l1norm(np_arr, i, keep_dims) if ord==1 else l2norm(np_arr, i, keep_dims) - mx_out = mx.nd.norm(mx_arr, ord=ord, axis=i, keepdims=keep_dims) - assert npy_out.shape == mx_out.shape - mx.test_utils.assert_almost_equal(npy_out, mx_out.asnumpy()) - if (i < 3): - npy_out = l1norm(np_arr, (i, i+1), keep_dims) if ord==1 else l2norm(np_arr, (i, i+1), keep_dims) - mx_out = mx.nd.norm(mx_arr, ord=ord, axis=(i, i+1), keepdims=keep_dims) + for force_reduce_dim1 in [True, False]: + in_data_shape = rand_shape_nd(in_data_dim) + if force_reduce_dim1: + in_data_shape = in_data_shape[:3] + (1, ) + in_data_shape[4:] + np_arr = np.random.uniform(-1, 1, in_data_shape).astype(np.float32) + mx_arr = mx.nd.array(np_arr, ctx=ctx) + for ord in [1, 2]: + for keep_dims in [True, False]: + for i in range(4): + npy_out = l1norm(np_arr, i, keep_dims) if ord == 1 else l2norm( + np_arr, i, keep_dims) + mx_out = mx.nd.norm(mx_arr, ord=ord, axis=i, keepdims=keep_dims) assert npy_out.shape == mx_out.shape mx.test_utils.assert_almost_equal(npy_out, mx_out.asnumpy()) + if (i < 3): + npy_out = l1norm(np_arr, (i, i + 1), keep_dims) if ord == 1 else l2norm( + np_arr, (i, i + 1), keep_dims) + mx_out = mx.nd.norm(mx_arr, ord=ord, axis=(i, i + 1), keepdims=keep_dims) + assert npy_out.shape == mx_out.shape + mx.test_utils.assert_almost_equal(npy_out, mx_out.asnumpy()) + @with_seed() def test_ndarray_cpu_shared_ctx(): diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 99d635e3565f..90e85d123d59 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -1943,11 +1943,11 @@ def test_bxor(a, b): test_bmul(a, b) test_bdiv(a, b) ''' - Flaky Test Disabled due to master build failure: - http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1248/pipeline + Flaky Test Disabled due to master build failure: + http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1248/pipeline Github Issue: https://github.com/apache/incubator-mxnet/issues/11838 - - test_bmod(a, b) + + test_bmod(a, b) ''' test_bmod_int(a, b) test_bpow(a, b) @@ -2065,6 +2065,23 @@ def test_reshape_new(src_shape, shape_args, reverse, dst_shape): assert np.square(exe.grad_dict['data'].asnumpy() - grad_npy.reshape(src_shape)).mean() < 1E-7, \ 'Src Shape = %s, Shape Arguments = %s, Reverse = %s, Dst Shape = %s'\ %(str(src_shape), str(shape_args), str(reverse), str(dst_shape)) + + for i in range(len(src_shape)): + holdout_src_shape = list(src_shape) + holdout_src_shape[i] = 0 + holdout_src_shape = tuple(holdout_src_shape) + net = mx.sym.Variable('data') + net = mx.sym.elemwise_add(net.reshape(shape_args, reverse=reverse), mx.sym.ones(shape=dst_shape)) + input_shape, output_shape, __ = net.infer_shape(data=holdout_src_shape) + assert output_shape[0] == dst_shape, \ + 'Holdout Src Shape = %s, Shape Arguments = %s, Reverse = %s, Dst Shape = %s, ' \ + 'Output Shape = %s' %(str(holdout_src_shape), str(shape_args), str(reverse), + str(dst_shape), str(output_shape[0])) + assert input_shape[0] == src_shape, \ + 'Holdout Src Shape = %s, Shape Arguments = %s, Reverse = %s, Dst Shape = %s, ' \ + 'Output Shape = %s' %(str(holdout_src_shape), str(shape_args), str(reverse), + str(dst_shape), str(output_shape[0])) + # Test new api (Using shape) test_cases = [ [(2, 3, 5, 5), (0, -1), False, (2, 75)], @@ -4946,8 +4963,9 @@ def _make_lower_triangle_symm(a, ndims, m, dtype=np.float32): lt_mask = mx.sym.reshape(lt_mask, shape=shp) return mx.sym.broadcast_mul(a, lt_mask) -# Seed set because the test is not robust enough to operate on random data -@with_seed(42) +# @ankkhedia: Getting rid of fixed seed as flakiness could not be reproduced +# tracked at https://github.com/apache/incubator-mxnet/issues/11718 +@with_seed() def test_laop(): dtype = np.float64 rtol_fw = 1e-7 @@ -5448,8 +5466,9 @@ def test_laop_3(): check_grad(test_syevd_l_4, [a_batch]) -# Seed set because the test is not robust enough to operate on random data -@with_seed(1896893923) +# @piyushghai - Removing the fixed seed for this test. +# Issue for flakiness is tracked at - https://github.com/apache/incubator-mxnet/issues/11721 +@with_seed() def test_laop_4(): # Currently disabled on GPU as syevd needs cuda8 # and MxNet builds use cuda 7.5 @@ -6615,7 +6634,7 @@ def test_diag(): w = np.random.randint(2,9) a_np = np.random.random((h, w)).astype(np.float32) a = mx.nd.array(a_np).astype('float32') - + # k == 0 r = mx.nd.diag(a) assert_almost_equal(r.asnumpy(), np.diag(a_np)) @@ -6658,7 +6677,7 @@ def test_diag(): d = np.random.randint(2,9) a_np = np.random.random((d)) a = mx.nd.array(a_np) - + # k is random k = np.random.randint(-d,d) r = mx.nd.diag(a, k=k) @@ -6725,7 +6744,7 @@ def test_invalid_block_size(): invalid_shape_inp = (n , c, h, w) data = rand_ndarray(invalid_shape_inp, 'default') assertRaises(MXNetError, mx.nd.depth_to_space, data, block) - + test_invalid_depth_dim() test_invalid_space_dim() test_invalid_block_size() @@ -6771,12 +6790,12 @@ def test_invalid_block_size(): invalid_shape_inp = (n, c, h, w) data = rand_ndarray(invalid_shape_inp, 'default') assertRaises(MXNetError, mx.nd.space_to_depth, data, block) - + def test_invalid_depth_dim(): invalid_shape_inp = (n, 0, h, w) data = rand_ndarray(invalid_shape_inp, 'default') assertRaises(MXNetError, mx.nd.space_to_depth, data, block) - + test_invalid_space_dim() test_invalid_block_size() test_invalid_depth_dim() diff --git a/tests/python/unittest/test_random.py b/tests/python/unittest/test_random.py index d90dfcf856f9..43e9608934e3 100644 --- a/tests/python/unittest/test_random.py +++ b/tests/python/unittest/test_random.py @@ -447,6 +447,7 @@ def test_uniform_generator(): verify_generator(generator=generator_mx_same_seed, buckets=buckets, probs=probs) @with_seed() +@unittest.skip('Flaky test, tracked in: https://github.com/apache/incubator-mxnet/issues/9856') def test_gamma_generator(): ctx = mx.context.current_context() for dtype in ['float16', 'float32', 'float64']: diff --git a/tests/python/unittest/test_sparse_ndarray.py b/tests/python/unittest/test_sparse_ndarray.py index 508f52301b42..7d3d58010b6e 100644 --- a/tests/python/unittest/test_sparse_ndarray.py +++ b/tests/python/unittest/test_sparse_ndarray.py @@ -514,24 +514,22 @@ def test_sparse_nd_astype_copy(): assert (id(x) == id(y)) -@with_seed(0) +@with_seed() def test_sparse_nd_pickle(): - repeat = 1 dim0 = 40 dim1 = 40 stypes = ['row_sparse', 'csr'] densities = [0, 0.5] stype_dict = {'row_sparse': RowSparseNDArray, 'csr': CSRNDArray} - for _ in range(repeat): - shape = rand_shape_2d(dim0, dim1) - for stype in stypes: - for density in densities: - a, _ = rand_sparse_ndarray(shape, stype, density) - assert isinstance(a, stype_dict[stype]) - data = pkl.dumps(a) - b = pkl.loads(data) - assert isinstance(b, stype_dict[stype]) - assert same(a.asnumpy(), b.asnumpy()) + shape = rand_shape_2d(dim0, dim1) + for stype in stypes: + for density in densities: + a, _ = rand_sparse_ndarray(shape, stype, density) + assert isinstance(a, stype_dict[stype]) + data = pkl.dumps(a) + b = pkl.loads(data) + assert isinstance(b, stype_dict[stype]) + assert same(a.asnumpy(), b.asnumpy()) # @kalyc: Getting rid of fixed seed as flakiness could not be reproduced diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 000000000000..0eca73fbb02a --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,3 @@ +# Requirements for tests, those are installed before running on the virtualenv +mock +nose diff --git a/tools/license_header.py b/tools/license_header.py index 0ee4049338b1..7aef33b71213 100755 --- a/tools/license_header.py +++ b/tools/license_header.py @@ -82,7 +82,7 @@ _LANGS = {'.cc':'*', '.h':'*', '.cu':'*', '.cuh':'*', '.py':'#', '.pm':'#', '.scala':'*', '.cc':'*', '.sh':'#', '.cmake':'#', '.java':'*', '.sh':'#', '.cpp':'*', '.hpp':'*', '.c':'*', - '.bat':'rem', '.pl':'#', '.m':'%', '.R':'#', '.mk':'#', '.cfg':'#', '.t':'#'} + '.bat':'rem', '.pl':'#', '.m':'%', '.R':'#', '.mk':'#', '.cfg':'#', '.t':'#', '.ps1': '#'} # Previous license header, which will be removed _OLD_LICENSE = re.compile('.*Copyright.*by Contributors')