Skip to content

Commit

Permalink
Implement remaining nn_basic ops in opperf (apache#17456)
Browse files Browse the repository at this point in the history
* Added SoftmaxOutput

* Added LinearRegressionOutput

* Added other regression ops

* Added SVMOutput

* Added L2, layer and instance norm

* gamma and beta to ndarray

* Reworked layer/instance norm

* Added Embedding

* Disabled backward on embedding

* Added Correlation

* Added data1 and 2 to ndarray

* Added SpatialTransformer

* Made loc ndarray type

* Run backward test

* Added IdentityAttachKLSparseReg

* Dropping grad

* Added sparseness target

* Added grad back

* Disabling backward for IdentityAttachKLSparseReg

* Trying to debug

* Print problematic op

* Another log

* Removing IdentityAttachKLSparseReg test for now

* Removed faulty test

* Added im2col

* Added col2im

* Added GroupNorm

* Added RNN

* Added paramters and state to ndarray

* Added LRN

* Added preloaded_multi_mp_sgd_mom_update

* Added lamb_update_phase1

* Added lamb_update_phase2

* Dropped reversal

* Finalized nn basic ops

* Cleaned up code for linter

* Refactored individual tests into generalized framework

* Refined logic, added default params

* Fixed LRN param placement

* Refactored default params for clarity

* Fixed lint errors

* Fixed BatchNorm issue

* Removed debugging comment

* Cleaned up indentation

* Added axis param for LayerNorm op

* Fixed loc param issues

* Linked Embedding backward issue in run_performance_test

* Disabling problematic runs on gpu

* Added myself to CONTRIBUTORS.md

* Addressed PR comments

* Fixed DEFAULT_LABEL issue

* Tightend up logic, established consistency with master

* Fixed indent
  • Loading branch information
connorgoggins authored and anirudh2290 committed May 29, 2020
1 parent b8cf68f commit 38e555f
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 77 deletions.
3 changes: 2 additions & 1 deletion CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ List of Contributors
* [dithyrambe](https://github.com/dithyrambe)
* [Piljae Chae](https://github.com/IHateMint)
* [Oliver Kowalke](https://github.com/olk)
* [Connor Goggins](https://github.com/connorgoggins)

Label Bot
---------
Expand All @@ -260,6 +261,6 @@ Label Bot
- @mxnet-label-bot remove [specify comma separated labels here]
- @mxnet-label-bot update [specify comma separated labels here]
(i.e. @mxnet-label-bot update [Bug, Python])

- Available label names which are supported: [Labels](https://github.com/apache/incubator-mxnet/labels)
- For further details: [My Wiki Page](https://cwiki.apache.org/confluence/display/MXNET/Machine+Learning+Based+GitHub+Bot)
102 changes: 46 additions & 56 deletions benchmark/opperf/nd_operations/nn_basic_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,71 +16,61 @@
# under the License.

import mxnet as mx
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

from benchmark.opperf.utils.op_registry_utils import get_all_nn_basic_operators
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks

"""Performance benchmark tests for MXNet NDArray basic NN Operators.
1. FullyConnected
2. Dropout
3. BatchNorm
4. SoftmaxOutput
5. LinearRegressionOutput
6. LogisticRegressionOutput
7. MAERegressionOutput
8. SVMOutput
9. L2Normalization
10. LayerNorm
11. InstanceNorm
12. Embedding
13. Correlation
14. SpatialTransformer
15. im2col
16. col2im
17. GroupNorm
18. RNN
19. LRN
"""


def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
# FullyConnnected operator benchmarks
fc_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "FullyConnected")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (32, 3, 256, 256),
"num_hidden": 64,
"weight": (64, 3 * 256 * 256),
"bias": (64,),
"flatten": True},
{"data": (32, 3, 256, 256),
"num_hidden": 64,
"weight": (64, 256),
"bias": (64,),
"flatten": False}],
warmup=warmup,
runs=runs)
"""Runs benchmarks with the given context and precision (dtype)for all the NN basic
operators in MXNet.
Parameters
----------
ctx: mx.ctx
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
profiler: str, default 'native'
Module to use for tracking benchmark excecution time
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
Number of runs to capture benchmark results
Returns
-------
Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
"""

# Dropout benchmarks
dropout_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "Dropout")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (32, 3, 256, 256),
"p": 0.5,
"mode": "always"},
{"data": (10000, 10),
"p": 0.5,
"mode": "always"}],
warmup=warmup,
runs=runs)
# BatchNorm benchmarks
batchnorm_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "BatchNorm")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (32, 3, 256, 256),
"gamma": (3,),
"beta": (3,),
"moving_mean": (3,),
"moving_var": (3,)},
{"data": (32, 3, 10000, 10),
"gamma": (3,),
"beta": (3,),
"moving_mean": (3,),
"moving_var": (3,)}],
warmup=warmup,
runs=runs)
# Prepare combined results
mx_basic_nn_results = merge_map_list(fc_benchmark_res + dropout_benchmark_res + batchnorm_benchmark_res)
return mx_basic_nn_results
# Fetch all NN Basic Operators
mx_nn_basic_ops = get_all_nn_basic_operators()

# Run benchmarks
mx_nn_basic_op_results = run_op_benchmarks(mx_nn_basic_ops, dtype, ctx, profiler, warmup, runs)
return mx_nn_basic_op_results
168 changes: 166 additions & 2 deletions benchmark/opperf/rules/default_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,92 @@
# NOTE: Data used is DEFAULT_DATA
DEFAULT_AXIS = [0]

# For NN basic operators
# General
DEFAULT_DATA_NN_BASIC = [(32, 3, 256, 256), (32, 3, 10000, 10)]
DEFAULT_NUM_HIDDEN = [64]
DEFAULT_BIAS = [(64,)]
DEFAULT_FLATTEN = [True, False]
DEFAULT_GAMMA = [(3,)]
DEFAULT_BETA = [(3,)]
DEFAULT_MOVING_MEAN = [(3,)]
DEFAULT_MOVING_VAR = [(3,)]
DEFAULT_LABEL_REG = [(32, 3, 256, 256), (32, 3, 10000, 10)]
DEFAULT_GRAD_SCALE = [.5]
DEFAULT_NORMALIZATION = ["batch"]
DEFAULT_MARGIN = [.5]
DEFAULT_REG_COEFF = [.5]
DEFAULT_INPUT_DIM = [3, 16]
DEFAULT_OUTPUT_DIM = [4, 9]
DEFAULT_SPARSE_GRAD = [False]
DEFAULT_KERNEL_SIZE = [3]
DEFAULT_MAX_DISPLACEMENT = [2]
DEFAULT_STRIDE_1 = [2]
DEFAULT_STRIDE_2 = [2]
DEFAULT_ALPHA = [.001]
DEFAULT_NSIZE = [3]
DEFAULT_PARAMETERS = [(7,), (104,)]
DEFAULT_STATE = [(1, 4, 1), (2, 10000, 4)]
DEFAULT_MODE = ["rnn_relu", "rnn_tanh"]
DEFAULT_STATE_SIZE = [1, 4]
DEFAULT_NUM_LAYERS = [1, 2]
DEFAULT_NUM_GROUPS = [1, 10]
DEFAULT_TRANSFORM = ["affine"]
DEFAULT_SAMPLER = ["bilinear"]
DEFAULT_DILATE = [(1,), (1, 1)]
DEFAULT_PAD = [(1,), (1, 1)]
DEFAULT_OUTPUT_SIZE = [(64, 16, 1), (32, 8, 1)]
DEFAULT_KERNEL = [(1, 1, 1), (1, 1, 1)]
DEFAULT_STRIDE = [(2, 2, 2), (1, 1, 1)]

# BatchNorm
DEFAULT_AXIS_BN = [1]

# LayerNorm
DEFAULT_GAMMA_LN = [(32,), (32,)]
DEFAULT_BETA_LN = [(32,), (32,)]

# L2Normalization
DEFAULT_MODE_L2 = ['channel', 'instance', 'spatial']

# SVMOutput
DEFAULT_LABEL_SVM = [(32, 3, 256), (32, 3, 10000)]

# SoftmaxOutput
DEFAULT_LABEL_SM = [(32, 3, 256), (32, 3, 10000)]

# FullyConnected
DEFAULT_WEIGHT_FC = [(64, 3 * 256 * 256), (64, 10)]

# Embedding
DEFAULT_WEIGHT_EMBEDDING = [(3, 4), (16, 9)]

# GroupNorm
DEFAULT_DATA_GN = [(32, 3, 256, 256), (32, 10, 10000, 10)]
DEFAULT_BETA_GAMMA_GN = [(1,), (10,)]

# Dropout
DEFAULT_DATA_DROPOUT = [(32, 3, 256, 256), (10000, 10)]
DEFAULT_MODE_DROPOUT = ["always"]

# SpatialTransformer
DEFAULT_DATA_ST = [(32, 3, 256, 6), (256, 3, 10000, 6)]
DEFAULT_LOC_TAR_ST = [(32, 6), (256, 6)]

# im2col
DEFAULT_KERNEL_I2C = [(3,), (3, 3)]
DEFAULT_STRIDE_I2C = [(1,), (1, 1)]

# col2im
DEFAULT_DATA_C2I = [(32, 64, 256), (32, 64, 256)]

# RNN
DEFAULT_DATA_RNN = [(32, 4, 4), (512, 10000, 10)]
DEFAULT_P_RNN = [.5]

# LRN
DEFAULT_BETA_LRN = [.2]

# For optimizer operators
DEFAULT_WEIGHT = [(1024, 1024), (10000, 1), (10000, 100)]
DEFAULT_GRAD = [(1024, 1024), (10000, 1), (10000, 100)]
Expand Down Expand Up @@ -267,7 +353,85 @@
"a": DEFAULT_A,
"lhs_fill_element_0index": DEFAULT_LHS_FEI,
"rhs_fill_element_0index": DEFAULT_RHS_FEI,
"mhs": DEFAULT_MHS}
"mhs": DEFAULT_MHS,
"data_spatialtransformer": DEFAULT_DATA_ST,
"loc_spatialtransformer": DEFAULT_LOC_TAR_ST,
"target_shape": DEFAULT_LOC_TAR_ST,
"transform_type_spatialtransformer": DEFAULT_TRANSFORM,
"sampler_type": DEFAULT_SAMPLER,
"data_col2im": DEFAULT_DATA_C2I,
"output_size": DEFAULT_OUTPUT_SIZE,
"kernel_col2im": DEFAULT_KERNEL,
"stride_col2im": DEFAULT_STRIDE,
"data_rnn": DEFAULT_DATA_RNN,
"p_rnn": DEFAULT_P_RNN,
"parameters": DEFAULT_PARAMETERS,
"state": DEFAULT_STATE,
"state_size": DEFAULT_STATE_SIZE,
"num_layers": DEFAULT_NUM_LAYERS,
"mode_rnn": DEFAULT_MODE,
"data_groupnorm": DEFAULT_DATA_GN,
"gamma_groupnorm": DEFAULT_BETA_GAMMA_GN,
"beta_groupnorm": DEFAULT_BETA_GAMMA_GN,
"num_groups": DEFAULT_NUM_GROUPS,
"eps": DEFAULT_EPSILON,
"data_dropout": DEFAULT_DATA_DROPOUT,
"mode_dropout": DEFAULT_MODE_DROPOUT,
"p_dropout": DEFAULT_P,
"data_nn_basic": DEFAULT_DATA_NN_BASIC,
"num_hidden": DEFAULT_NUM_HIDDEN,
"data_fullyconnected": DEFAULT_DATA_NN_BASIC,
"weight_fullyconnected": DEFAULT_WEIGHT_FC,
"weight_embedding": DEFAULT_WEIGHT_EMBEDDING,
"bias": DEFAULT_BIAS,
"flatten": DEFAULT_FLATTEN,
"data_batchnorm": DEFAULT_DATA_NN_BASIC,
"gamma_batchnorm": DEFAULT_GAMMA,
"beta_batchnorm": DEFAULT_BETA,
"moving_mean_batchnorm": DEFAULT_MOVING_MEAN,
"moving_var_batchnorm": DEFAULT_MOVING_VAR,
"axis_batchnorm": DEFAULT_AXIS_BN,
"data_softmaxoutput": DEFAULT_DATA_NN_BASIC,
"label_softmaxoutput": DEFAULT_LABEL_SM,
"data_maeregressionoutput": DEFAULT_DATA_NN_BASIC,
"label_maeregressionoutput": DEFAULT_LABEL_REG,
"data_logisticregressionoutput": DEFAULT_DATA_NN_BASIC,
"label_logisticregressionoutput": DEFAULT_LABEL_REG,
"data_linearregressionoutput": DEFAULT_DATA_NN_BASIC,
"label_linearregressionoutput": DEFAULT_LABEL_REG,
"data_svmoutput": DEFAULT_DATA_NN_BASIC,
"label_svmoutput": DEFAULT_LABEL_SVM,
"grad_scale": DEFAULT_GRAD_SCALE,
"normalization": DEFAULT_NORMALIZATION,
"margin": DEFAULT_MARGIN,
"regularization_coefficient": DEFAULT_REG_COEFF,
"data_l2normalization": DEFAULT_DATA_NN_BASIC,
"mode_l2normalization": DEFAULT_MODE_L2,
"gamma_layernorm": DEFAULT_GAMMA_LN,
"beta_layernorm": DEFAULT_BETA_LN,
"data_instancenorm": DEFAULT_DATA_NN_BASIC,
"gamma_instancenorm": DEFAULT_GAMMA,
"beta_instancenorm": DEFAULT_BETA,
"input_dim": DEFAULT_INPUT_DIM,
"output_dim": DEFAULT_OUTPUT_DIM,
"sparse_grad": DEFAULT_SPARSE_GRAD,
"data1": DEFAULT_DATA_NN_BASIC,
"data2": DEFAULT_DATA_NN_BASIC,
"kernel_size": DEFAULT_KERNEL_SIZE,
"max_displacement": DEFAULT_MAX_DISPLACEMENT,
"stride1": DEFAULT_STRIDE_1,
"stride2": DEFAULT_STRIDE_2,
"data_im2col": DEFAULT_DATA_NN_BASIC,
"kernel_im2col": DEFAULT_KERNEL_I2C,
"stride_im2col": DEFAULT_STRIDE_I2C,
"dilate_im2col": DEFAULT_DILATE,
"pad_im2col": DEFAULT_PAD,
"data_lrn": DEFAULT_DATA_NN_BASIC,
"alpha_lrn": DEFAULT_ALPHA,
"beta_lrn": DEFAULT_BETA_LRN,
"nsize": DEFAULT_NSIZE,
"data_layernorm": DEFAULT_DATA_NN_BASIC,
"axis_layernorm": DEFAULT_AXIS}


# These are names of MXNet operator parameters that is of type NDArray.
Expand All @@ -282,4 +446,4 @@
"v", "z", "g", "delta", "args", "indices", "shape_like", "y",
"x", "condition", "a", "index", "raveL_data", "label", "grid",
"A", "B", "C", "r1", "r2", "rois", "lrs", "wds", "weights_sum_sq",
"grads_sum_sq", "mhs"]
"grads_sum_sq", "mhs", "data1", "data2", "loc", "parameters", "state"]
43 changes: 27 additions & 16 deletions benchmark/opperf/utils/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from benchmark.opperf.rules.default_params import PARAMS_OF_TYPE_NDARRAY
from .profiler_utils import cpp_profile, python_profile

no_backward = ['gather_nd', 'softmax_cross_entropy', 'linalg_gelqf', 'linalg_slogdet', 'moments', 'SequenceLast']
no_backward = {'gather_nd', 'softmax_cross_entropy', 'linalg_gelqf', 'linalg_slogdet', 'moments', 'SequenceLast', 'Embedding'}

def _prepare_op_inputs(inputs, run_backward, dtype, ctx):
mx.random.seed(41)
Expand Down Expand Up @@ -163,6 +163,8 @@ def run_performance_test(ops, inputs, run_backward=True,
-------
List of dictionary of benchmark results. key -> name of the operator, Value is benchmark results.
Note: when run_performance_test is called on the nd.Embedding operator with run_backward=True, an error will
be thrown. Track issue here: https://github.com/apache/incubator-mxnet/issues/11314
"""
kwargs_list = _prepare_op_inputs(inputs, run_backward, dtype, ctx)

Expand All @@ -180,24 +182,33 @@ def run_performance_test(ops, inputs, run_backward=True,


def run_op_benchmarks(ops, dtype, ctx, profiler, warmup, runs):
# Running SoftmaxOutput backwards on GPU results in errors
# track issue here: https://github.com/apache/incubator-mxnet/issues/880
gpu_backwards_disabled_ops = ['SoftmaxOutput']

# Running im2col either forwards or backwards on GPU results in errors
# track issue here: https://github.com/apache/incubator-mxnet/issues/17493
gpu_disabled_ops = ['im2col']

# For each operator, run benchmarks
mx_op_benchmark_results = []
for op, op_params in ops.items():
# Prepare inputs for the operator
inputs = prepare_op_inputs(op, op_params)

# setting backward false for ops with known issue
if op in no_backward:
op_params["has_backward"] = False

# Run benchmarks
cur_op_res = run_performance_test(op_params["nd_op_handle"],
run_backward=op_params["has_backward"],
dtype=dtype, ctx=ctx,
profiler=profiler,
inputs=inputs,
warmup=warmup, runs=runs)
mx_op_benchmark_results += cur_op_res
if ctx == mx.cpu() or op not in gpu_disabled_ops:
# Prepare inputs for the operator
inputs = prepare_op_inputs(op, op_params)

# setting backward false for ops with known issue
if (ctx == mx.gpu() and op in gpu_backwards_disabled_ops) or op in no_backward:
op_params["has_backward"] = False

# Run benchmarks
cur_op_res = run_performance_test(op_params["nd_op_handle"],
run_backward=op_params["has_backward"],
dtype=dtype, ctx=ctx,
profiler=profiler,
inputs=inputs,
warmup=warmup, runs=runs)
mx_op_benchmark_results += cur_op_res

# Prepare combined results for all operators
mx_op_benchmark_results = merge_map_list(mx_op_benchmark_results)
Expand Down
Loading

0 comments on commit 38e555f

Please sign in to comment.