From db71827a861bda24a1d3c27fe41a9e415037e3b2 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Mon, 28 Sep 2020 15:47:22 -0700 Subject: [PATCH 1/3] Bypass test_tensorrt.py:test_tensorrt_symbol_int8 on arch < 70 --- tests/python/tensorrt/test_tensorrt.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/python/tensorrt/test_tensorrt.py b/tests/python/tensorrt/test_tensorrt.py index c7e5f01018db..db31467dfd16 100644 --- a/tests/python/tensorrt/test_tensorrt.py +++ b/tests/python/tensorrt/test_tensorrt.py @@ -22,6 +22,7 @@ from mxnet.symbol import Symbol import numpy as np from mxnet.test_utils import assert_almost_equal +from mxnet.numpy_extension import get_cuda_compute_capability from mxnet import gluon from mxnet.gluon import nn from mxnet import nd @@ -135,17 +136,25 @@ def get_top1(logits): def test_tensorrt_symbol_int8(): + ctx = mx.gpu(0) + cuda_arch = get_cuda_compute_capability(ctx) + cuda_arch_min = 70 + if cuda_arch < cuda_arch_min: + print('Bypassing test_tensorrt_symbol_int8 on cuda arch {}, need arch >= {}).'.format( + cuda_arch, cuda_arch_min)) + return + # INT8 engine output are not lossless, so we don't expect numerical uniformity, # but we have to compare the TOP1 metric batch_shape=(1,3,224,224) sym, arg_params, aux_params = get_model(batch_shape=batch_shape) calibration_iters = 700 - trt_sym = sym.optimize_for('TensorRT', args=arg_params, aux=aux_params, ctx=mx.gpu(0), + trt_sym = sym.optimize_for('TensorRT', args=arg_params, aux=aux_params, ctx=ctx, precision='int8', calibration_iters=calibration_iters) - executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape, + executor = trt_sym.simple_bind(ctx=ctx, data=batch_shape, grad_req='null', force_rebind=True) dali_val_iter = get_dali_iter() From 496ee2f834b92c32ef7ad5fa35dbbe6de531b2c2 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Mon, 28 Sep 2020 16:01:21 -0700 Subject: [PATCH 2/3] Adapt test_tensorrt.py:test_tensorrt_symbol for A100 --- tests/python/tensorrt/test_tensorrt.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/python/tensorrt/test_tensorrt.py b/tests/python/tensorrt/test_tensorrt.py index db31467dfd16..20b84d0ef7c6 100644 --- a/tests/python/tensorrt/test_tensorrt.py +++ b/tests/python/tensorrt/test_tensorrt.py @@ -16,6 +16,7 @@ # under the License. import os +import sys import ctypes import mxnet as mx from mxnet.base import SymbolHandle, check_call, _LIB, mx_uint, c_str_array, c_str, mx_real_t @@ -28,6 +29,10 @@ from mxnet import nd from mxnet.gluon.model_zoo import vision +curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) +sys.path.insert(0, os.path.join(curr_path, '../unittest')) +from common import setup_module, with_seed, teardown + #################################### ######### FP32/FP16 tests ########## #################################### @@ -61,7 +66,7 @@ def get_baseline(input_data): return output -def check_tensorrt_symbol(baseline, input_data, fp16_mode, tol): +def check_tensorrt_symbol(baseline, input_data, fp16_mode, rtol=None, atol=None): sym, arg_params, aux_params = get_model(batch_shape=input_data.shape) trt_sym = sym.optimize_for('TensorRT', args=arg_params, aux=aux_params, ctx=mx.gpu(0), precision='fp16' if fp16_mode else 'fp32') @@ -70,17 +75,18 @@ def check_tensorrt_symbol(baseline, input_data, fp16_mode, tol): grad_req='null', force_rebind=True) output = executor.forward(is_train=False, data=input_data) - assert_almost_equal(output[0].asnumpy(), baseline[0].asnumpy(), atol=tol[0], rtol=tol[1]) + assert_almost_equal(output[0], baseline[0], rtol=rtol, atol=atol) +@with_seed() def test_tensorrt_symbol(): batch_shape = (32, 3, 224, 224) input_data = mx.nd.random.uniform(shape=(batch_shape), ctx=mx.gpu(0)) baseline = get_baseline(input_data) print("Testing resnet50 with TensorRT backend numerical accuracy...") print("FP32") - check_tensorrt_symbol(baseline, input_data, fp16_mode=False, tol=(1e-4, 1e-4)) + check_tensorrt_symbol(baseline, input_data, fp16_mode=False) print("FP16") - check_tensorrt_symbol(baseline, input_data, fp16_mode=True, tol=(1e-1, 1e-2)) + check_tensorrt_symbol(baseline, input_data, fp16_mode=True, rtol=1e-2, atol=1e-1) ############################## ######### INT8 tests ########## From 39f327939b8697d835a00a2888ac8e3c909505d0 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Wed, 30 Sep 2020 17:15:15 -0700 Subject: [PATCH 3/3] Fix test_numpy_op.py:test_np_mixed_precision_binary_funcs with portion of (#18660) --- tests/python/unittest/test_numpy_op.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 20da12b12f48..4bdaf5203ef1 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -2528,6 +2528,27 @@ def __init__(self, func): def hybrid_forward(self, F, a, b, *args, **kwargs): return getattr(F.np, self._func)(a, b) + if (func in ['multiply', 'mod', 'equal', 'not_equal', 'greater', + 'greater_equal', 'less', 'less_equal']) and \ + (lshape == () or rshape == ()) : + # the behaviors of infer type in dealing with the input shape of '()' are different between np and onp + # for example, + # mx_test_x1 = np.random.uniform(-2, 2, (2,3)).astype(np.float32) + # mx_test_x2 = np.random.uniform(-2, 2, ()).astype(np.float16) + # np_out = _np.mod(mx_test_x1.asnumpy(), mx_test_x2.asnumpy()) # float16 + # mx_out = np.mod(mx_test_x1, mx_test_x2) # float32 + + # logcial ops: when two numbers are only different in precision, NumPy also has a weird behavior + # for example, + # a = np.array([[1.441]], dtype = np.float16) + # b = np.array(1.4413278, dtype = np.float32) + # c = np.array([1.4413278], dtype = np.float32) + # np.greater(a,b), np.greater(a,c) # True True + # _np.greater(a.asnumpy(),b.asnumpy()), _np.greater(a.asnumpy(),c.asnumpy()) # False True + + # thus, skip the tests + return + np_func = getattr(_np, func) mx_func = TestMixedBinary(func) np_test_x1 = _np.random.uniform(low, high, lshape).astype(ltype)