diff --git a/benchmark/python/ffi/benchmark_ffi.py b/benchmark/python/ffi/benchmark_ffi.py index 96d8e1d6658f..ee3fccfaa185 100644 --- a/benchmark/python/ffi/benchmark_ffi.py +++ b/benchmark/python/ffi/benchmark_ffi.py @@ -51,6 +51,9 @@ def generate_workloads(): def prepare_workloads(): pool = generate_workloads() OpArgMngr.add_workload("zeros", (2, 2)) + OpArgMngr.add_workload("einsum", "ii", pool['2x2'], optimize=False) + OpArgMngr.add_workload("unique", pool['1'], return_index=True, return_inverse=True, return_counts=True, axis=-1) + OpArgMngr.add_workload("dstack", (pool['2x1'], pool['2x1'], pool['2x1'], pool['2x1'])) OpArgMngr.add_workload("polyval", dnp.arange(10), pool['2x2']) OpArgMngr.add_workload("ediff1d", pool['2x2'], pool['2x2'], pool['2x2']) OpArgMngr.add_workload("nan_to_num", pool['2x2']) diff --git a/python/mxnet/kvstore/kvstore.py b/python/mxnet/kvstore/kvstore.py index eec6aa5453f0..59e5a4dad041 100644 --- a/python/mxnet/kvstore/kvstore.py +++ b/python/mxnet/kvstore/kvstore.py @@ -498,8 +498,9 @@ def set_gradient_compression(self, compression_params): """ Specifies type of low-bit quantization for gradient compression \ and additional arguments depending on the type of compression being used. - The 1bit compression works as follows: values which is above the threshold in the - gradient will be set to +1, whereas values below threshold will be set to -1. + The 1bit compression takes a float `threshold` and works as follows: + values which is above the threshold in the gradient will be set to +1, whereas + values below threshold will be set to -1. 2bit Gradient Compression takes a positive float `threshold`. The technique works by thresholding values such that positive values in the diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index ff0e48d47664..a1b6ff8b5bac 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -951,11 +951,8 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False, ax >>> u[indices] array([1., 2., 6., 4., 2., 3., 2.]) """ - ret = _npi.unique(ar, return_index, return_inverse, return_counts, axis) - if isinstance(ret, list): - return tuple(ret) - else: - return ret + ret = list(_api_internal.unique(ar, return_index, return_inverse, return_counts, axis)) + return ret[0] if len(ret) == 1 else tuple(ret) @set_module('mxnet.ndarray.numpy') @@ -4374,7 +4371,7 @@ def dstack(arrays): [[2, 3]], [[3, 4]]]) """ - return _npi.dstack(*arrays) + return _api_internal.dstack(*arrays) @set_module('mxnet.ndarray.numpy') @@ -6772,7 +6769,7 @@ def einsum(*operands, **kwargs): subscripts = operands[0] operands = operands[1:] - return _npi.einsum(*operands, subscripts=subscripts, out=out, optimize=int(optimize_arg)) + return _api_internal.einsum(*operands, subscripts, out, int(optimize_arg)) @set_module('mxnet.ndarray.numpy') diff --git a/src/api/operator/numpy/np_einsum_op.cc b/src/api/operator/numpy/np_einsum_op.cc new file mode 100644 index 000000000000..a5b8339a619e --- /dev/null +++ b/src/api/operator/numpy/np_einsum_op.cc @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_einsum_op.cc + * \brief Implementation of the API of functions in src/operator/numpy/np_einsum_op.cc + */ +#include +#include +#include +#include "../utils.h" +#include "../../../operator/numpy/np_einsum_op-inl.h" + +namespace mxnet { + +MXNET_REGISTER_API("_npi.einsum") +.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) { + using namespace runtime; + const nnvm::Op* op = Op::Get("_npi_einsum"); + nnvm::NodeAttrs attrs; + op::NumpyEinsumParam param; + int args_size = args.size(); + // param.num_args + param.num_args = args_size - 3; + // param.subscripts + param.subscripts = args[args_size - 3].operator std::string(); + // param.optimize + param.optimize = args[args_size - 1].operator int(); + + attrs.parsed = std::move(param); + attrs.op = op; + SetAttrDict(&attrs); + + // inputs + int num_inputs = param.num_args; + std::vector inputs_vec(num_inputs, nullptr); + for (int i = 0; i < num_inputs; ++i) { + inputs_vec[i] = args[i].operator mxnet::NDArray*(); + } + NDArray** inputs = inputs_vec.data(); + + // outputs + NDArray* out = args[args_size - 2].operator mxnet::NDArray*(); + NDArray** outputs = out == nullptr ? nullptr : &out; + int num_outputs = out != nullptr; + + auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, outputs); + if (out) { + *ret = PythonArg(args_size - 2); + } else { + *ret = reinterpret_cast(ndoutputs[0]); + } +}); + +} // namespace mxnet diff --git a/src/api/operator/numpy/np_matrix_op.cc b/src/api/operator/numpy/np_matrix_op.cc index ae8421ac4010..fdf8e9a081fa 100644 --- a/src/api/operator/numpy/np_matrix_op.cc +++ b/src/api/operator/numpy/np_matrix_op.cc @@ -24,6 +24,7 @@ #include #include #include "../utils.h" +#include "../../../operator/nn/concat-inl.h" #include "../../../operator/tensor/matrix_op-inl.h" #include "../../../operator/numpy/np_matrix_op-inl.h" @@ -49,6 +50,31 @@ MXNET_REGISTER_API("_npi.expand_dims") *ret = ndoutputs[0]; }); +MXNET_REGISTER_API("_npi.dstack") +.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) { + using namespace runtime; + const nnvm::Op* op = Op::Get("_npi_dstack"); + nnvm::NodeAttrs attrs; + op::ConcatParam param; + int args_size = args.size(); + // param.num_args + param.num_args = args_size; + attrs.parsed = param; + attrs.op = op; + SetAttrDict(&attrs); + // inputs + int num_inputs = args_size; + std::vector inputs_vec(args_size, nullptr); + for (int i = 0; i < args_size; ++i) { + inputs_vec[i] = args[i].operator mxnet::NDArray*(); + } + NDArray** inputs = inputs_vec.data(); + // outputs + int num_outputs = 0; + auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr); + *ret = ndoutputs[0]; +}); + MXNET_REGISTER_API("_npi.split") .set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) { using namespace runtime; diff --git a/src/api/operator/numpy/np_unique_op.cc b/src/api/operator/numpy/np_unique_op.cc new file mode 100644 index 000000000000..288260f5dfb2 --- /dev/null +++ b/src/api/operator/numpy/np_unique_op.cc @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_unique_op.cc + * \brief Implementation of the API of functions in src/operator/numpy/np_unique_op.cc + */ +#include +#include +#include +#include "../utils.h" +#include "../../../operator/numpy/np_unique_op.h" + +namespace mxnet { + +MXNET_REGISTER_API("_npi.unique") +.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) { + using namespace runtime; + const nnvm::Op* op = Op::Get("_npi_unique"); + nnvm::NodeAttrs attrs; + op::NumpyUniqueParam param; + // param + param.return_index = args[1].operator bool(); + param.return_inverse = args[2].operator bool(); + param.return_counts = args[3].operator bool(); + if (args[4].type_code() == kNull) { + param.axis = dmlc::nullopt; + } else { + param.axis = args[4].operator int(); + } + attrs.parsed = std::move(param); + attrs.op = op; + SetAttrDict(&attrs); + // inputs + int num_inputs = 1; + NDArray* inputs[] = {args[0].operator mxnet::NDArray*()}; + // outputs + int num_outputs = 0; + auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr); + std::vector ndarray_handles; + ndarray_handles.reserve(num_outputs); + for (int i = 0; i < num_outputs; ++i) { + ndarray_handles.emplace_back(ndoutputs[i]); + } + *ret = ADT(0, ndarray_handles.begin(), ndarray_handles.end()); +}); + +} // namespace mxnet diff --git a/src/kvstore/gradient_compression-inl.h b/src/kvstore/gradient_compression-inl.h index d882d6739b47..7d70dff59617 100644 --- a/src/kvstore/gradient_compression-inl.h +++ b/src/kvstore/gradient_compression-inl.h @@ -60,12 +60,12 @@ struct quantize_1bit { char *block_ptr = reinterpret_cast < char * > (compr_block); // masks used to quantize data const uint8_t bits[] = {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01}; - for(int i = start; i < end; ++i) { + for (int i = start; i < end; ++i) { // adds offset to reach appropriate byte char *curr_byte = block_ptr + ((i - start) >> 3); // adds gradient to existing residual to get updated grad residual[i] += grad[i]; - if(residual[i] > threshold){ + if (residual[i] > threshold) { // set data to 1 *curr_byte |= bits[(i & 7)]; // reduce residual by 1 diff --git a/src/kvstore/gradient_compression.cc b/src/kvstore/gradient_compression.cc index 915a3dff907d..86a183dd6688 100644 --- a/src/kvstore/gradient_compression.cc +++ b/src/kvstore/gradient_compression.cc @@ -153,12 +153,12 @@ void GradientCompression::Quantize(const mxnet::NDArray &from, mxnet::NDArray *t } else { LOG(FATAL) << "Unsupported quantization of type " << get_type_str(); } -#else - LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; -#endif } else { LOG(FATAL) << "unknown device mask"; } +#else + LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; +#endif } } @@ -207,12 +207,12 @@ void GradientCompression::Dequantize(const mxnet::NDArray &from, mxnet::NDArray } else { LOG(FATAL) << "Unsupported dequantization of type " << get_type_str(); } -#else - LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; -#endif } else { LOG(FATAL) << "unknown device mask"; } +#else + LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; +#endif } } } // namespace kvstore diff --git a/src/operator/nn/concat-inl.h b/src/operator/nn/concat-inl.h index ffedba46c1ac..b5505d12ca45 100644 --- a/src/operator/nn/concat-inl.h +++ b/src/operator/nn/concat-inl.h @@ -55,6 +55,13 @@ struct ConcatParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(dim).set_default(1) .describe("the dimension to be concated."); } + void SetAttrDict(std::unordered_map* dict) { + std::ostringstream num_args_s, dim_s; + num_args_s << num_args; + dim_s << dim; + (*dict)["num_args"] = num_args_s.str(); + (*dict)["dim"] = dim_s.str(); + } }; // struct ConcatParam template diff --git a/src/operator/numpy/np_einsum_op-inl.h b/src/operator/numpy/np_einsum_op-inl.h index b89e576bba23..ca80c7bc20be 100644 --- a/src/operator/numpy/np_einsum_op-inl.h +++ b/src/operator/numpy/np_einsum_op-inl.h @@ -384,6 +384,15 @@ struct NumpyEinsumParam: public dmlc::Parameter { DMLC_DECLARE_FIELD(optimize) .set_default(0); } + void SetAttrDict(std::unordered_map* dict) { + std::ostringstream num_args_s, optimize_s, subscripts_s; + num_args_s << num_args; + optimize_s << optimize; + subscripts_s << subscripts; + (*dict)["num_args"] = num_args_s.str(); + (*dict)["optimize"] = optimize_s.str(); + (*dict)["subscripts"] = subscripts_s.str(); + } }; class EinsumOp { diff --git a/src/operator/numpy/np_unique_op.h b/src/operator/numpy/np_unique_op.h index bc2b6c34c19f..0a121cd69481 100644 --- a/src/operator/numpy/np_unique_op.h +++ b/src/operator/numpy/np_unique_op.h @@ -80,6 +80,17 @@ struct NumpyUniqueParam : public dmlc::Parameter { .set_default(dmlc::optional()) .describe("An integer that represents the axis to operator on."); } + void SetAttrDict(std::unordered_map* dict) { + std::ostringstream return_index_s, return_inverse_s, return_counts_s, axis_s; + return_index_s << return_index; + return_inverse_s << return_inverse; + return_counts_s << return_counts; + axis_s << axis; + (*dict)["return_index"] = return_index_s.str(); + (*dict)["return_inverse"] = return_inverse_s.str(); + (*dict)["return_counts"] = return_counts_s.str(); + (*dict)["axis"] = axis_s.str(); + } }; } // namespace op diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index d3964af0bab7..3a8325cfb6e2 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -6542,6 +6542,9 @@ def hybrid_forward(self, F, a): configs = [ ((), True, True, True, None), ((1, ), True, True, True, -1), + ((5, ), False, False, False, 0), + ((5, ), True, False, False, 0), + ((5, ), True, True, False, 0), ((5, ), True, True, True, 0), ((5, ), True, True, True, None), ((5, 4), True, True, True, None), @@ -6562,15 +6565,24 @@ def hybrid_forward(self, F, a): x = np.array(x, dtype=dtype) np_out = _np.unique(x.asnumpy(), *config[1:]) mx_out = test_unique(x) - assert mx_out[0].shape == np_out[0].shape - for i in range(4): - assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5) + if (len(mx_out)) == 1: + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + else: + for i in range(len(mx_out)): + assert mx_out[i].shape == np_out[i].shape + assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5) # Test imperative once again mx_out = np.unique(x, *config[1:]) np_out = _np.unique(x.asnumpy(), *config[1:]) - for i in range(4): - assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5) + if (len(mx_out)) == 1: + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + else: + for i in range(len(mx_out)): + assert mx_out[i].shape == np_out[i].shape + assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5) @with_seed()