Skip to content

Commit

Permalink
* impl - FFI for np einsum (apache#17869)
Browse files Browse the repository at this point in the history
* impl - FFI for np dstack

* impl - benchmark np_einsum np_dstack

* impl - FFI for np_unique

* impl - benchmark np_unique

Co-authored-by: Ubuntu <[email protected]>
  • Loading branch information
2 people authored and shuo-ouyang committed Aug 7, 2020
1 parent 1f21000 commit 64d93ca
Show file tree
Hide file tree
Showing 12 changed files with 223 additions and 22 deletions.
3 changes: 3 additions & 0 deletions benchmark/python/ffi/benchmark_ffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def generate_workloads():
def prepare_workloads():
pool = generate_workloads()
OpArgMngr.add_workload("zeros", (2, 2))
OpArgMngr.add_workload("einsum", "ii", pool['2x2'], optimize=False)
OpArgMngr.add_workload("unique", pool['1'], return_index=True, return_inverse=True, return_counts=True, axis=-1)
OpArgMngr.add_workload("dstack", (pool['2x1'], pool['2x1'], pool['2x1'], pool['2x1']))
OpArgMngr.add_workload("polyval", dnp.arange(10), pool['2x2'])
OpArgMngr.add_workload("ediff1d", pool['2x2'], pool['2x2'], pool['2x2'])
OpArgMngr.add_workload("nan_to_num", pool['2x2'])
Expand Down
5 changes: 3 additions & 2 deletions python/mxnet/kvstore/kvstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,9 @@ def set_gradient_compression(self, compression_params):
""" Specifies type of low-bit quantization for gradient compression \
and additional arguments depending on the type of compression being used.
The 1bit compression works as follows: values which is above the threshold in the
gradient will be set to +1, whereas values below threshold will be set to -1.
The 1bit compression takes a float `threshold` and works as follows:
values which is above the threshold in the gradient will be set to +1, whereas
values below threshold will be set to -1.
2bit Gradient Compression takes a positive float `threshold`.
The technique works by thresholding values such that positive values in the
Expand Down
11 changes: 4 additions & 7 deletions python/mxnet/ndarray/numpy/_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,11 +951,8 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False, ax
>>> u[indices]
array([1., 2., 6., 4., 2., 3., 2.])
"""
ret = _npi.unique(ar, return_index, return_inverse, return_counts, axis)
if isinstance(ret, list):
return tuple(ret)
else:
return ret
ret = list(_api_internal.unique(ar, return_index, return_inverse, return_counts, axis))
return ret[0] if len(ret) == 1 else tuple(ret)


@set_module('mxnet.ndarray.numpy')
Expand Down Expand Up @@ -4374,7 +4371,7 @@ def dstack(arrays):
[[2, 3]],
[[3, 4]]])
"""
return _npi.dstack(*arrays)
return _api_internal.dstack(*arrays)


@set_module('mxnet.ndarray.numpy')
Expand Down Expand Up @@ -6772,7 +6769,7 @@ def einsum(*operands, **kwargs):

subscripts = operands[0]
operands = operands[1:]
return _npi.einsum(*operands, subscripts=subscripts, out=out, optimize=int(optimize_arg))
return _api_internal.einsum(*operands, subscripts, out, int(optimize_arg))


@set_module('mxnet.ndarray.numpy')
Expand Down
71 changes: 71 additions & 0 deletions src/api/operator/numpy/np_einsum_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file np_einsum_op.cc
* \brief Implementation of the API of functions in src/operator/numpy/np_einsum_op.cc
*/
#include <mxnet/api_registry.h>
#include <mxnet/runtime/packed_func.h>
#include <vector>
#include "../utils.h"
#include "../../../operator/numpy/np_einsum_op-inl.h"

namespace mxnet {

MXNET_REGISTER_API("_npi.einsum")
.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
using namespace runtime;
const nnvm::Op* op = Op::Get("_npi_einsum");
nnvm::NodeAttrs attrs;
op::NumpyEinsumParam param;
int args_size = args.size();
// param.num_args
param.num_args = args_size - 3;
// param.subscripts
param.subscripts = args[args_size - 3].operator std::string();
// param.optimize
param.optimize = args[args_size - 1].operator int();

attrs.parsed = std::move(param);
attrs.op = op;
SetAttrDict<op::NumpyEinsumParam>(&attrs);

// inputs
int num_inputs = param.num_args;
std::vector<NDArray*> inputs_vec(num_inputs, nullptr);
for (int i = 0; i < num_inputs; ++i) {
inputs_vec[i] = args[i].operator mxnet::NDArray*();
}
NDArray** inputs = inputs_vec.data();

// outputs
NDArray* out = args[args_size - 2].operator mxnet::NDArray*();
NDArray** outputs = out == nullptr ? nullptr : &out;
int num_outputs = out != nullptr;

auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, outputs);
if (out) {
*ret = PythonArg(args_size - 2);
} else {
*ret = reinterpret_cast<mxnet::NDArray*>(ndoutputs[0]);
}
});

} // namespace mxnet
26 changes: 26 additions & 0 deletions src/api/operator/numpy/np_matrix_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <mxnet/api_registry.h>
#include <mxnet/runtime/packed_func.h>
#include "../utils.h"
#include "../../../operator/nn/concat-inl.h"
#include "../../../operator/tensor/matrix_op-inl.h"
#include "../../../operator/numpy/np_matrix_op-inl.h"

Expand All @@ -49,6 +50,31 @@ MXNET_REGISTER_API("_npi.expand_dims")
*ret = ndoutputs[0];
});

MXNET_REGISTER_API("_npi.dstack")
.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
using namespace runtime;
const nnvm::Op* op = Op::Get("_npi_dstack");
nnvm::NodeAttrs attrs;
op::ConcatParam param;
int args_size = args.size();
// param.num_args
param.num_args = args_size;
attrs.parsed = param;
attrs.op = op;
SetAttrDict<op::ConcatParam>(&attrs);
// inputs
int num_inputs = args_size;
std::vector<NDArray*> inputs_vec(args_size, nullptr);
for (int i = 0; i < args_size; ++i) {
inputs_vec[i] = args[i].operator mxnet::NDArray*();
}
NDArray** inputs = inputs_vec.data();
// outputs
int num_outputs = 0;
auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr);
*ret = ndoutputs[0];
});

MXNET_REGISTER_API("_npi.split")
.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
using namespace runtime;
Expand Down
64 changes: 64 additions & 0 deletions src/api/operator/numpy/np_unique_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file np_unique_op.cc
* \brief Implementation of the API of functions in src/operator/numpy/np_unique_op.cc
*/
#include <mxnet/api_registry.h>
#include <mxnet/runtime/packed_func.h>
#include <vector>
#include "../utils.h"
#include "../../../operator/numpy/np_unique_op.h"

namespace mxnet {

MXNET_REGISTER_API("_npi.unique")
.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
using namespace runtime;
const nnvm::Op* op = Op::Get("_npi_unique");
nnvm::NodeAttrs attrs;
op::NumpyUniqueParam param;
// param
param.return_index = args[1].operator bool();
param.return_inverse = args[2].operator bool();
param.return_counts = args[3].operator bool();
if (args[4].type_code() == kNull) {
param.axis = dmlc::nullopt;
} else {
param.axis = args[4].operator int();
}
attrs.parsed = std::move(param);
attrs.op = op;
SetAttrDict<op::NumpyUniqueParam>(&attrs);
// inputs
int num_inputs = 1;
NDArray* inputs[] = {args[0].operator mxnet::NDArray*()};
// outputs
int num_outputs = 0;
auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr);
std::vector<NDArrayHandle> ndarray_handles;
ndarray_handles.reserve(num_outputs);
for (int i = 0; i < num_outputs; ++i) {
ndarray_handles.emplace_back(ndoutputs[i]);
}
*ret = ADT(0, ndarray_handles.begin(), ndarray_handles.end());
});

} // namespace mxnet
4 changes: 2 additions & 2 deletions src/kvstore/gradient_compression-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ struct quantize_1bit {
char *block_ptr = reinterpret_cast < char * > (compr_block);
// masks used to quantize data
const uint8_t bits[] = {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};
for(int i = start; i < end; ++i) {
for (int i = start; i < end; ++i) {
// adds offset to reach appropriate byte
char *curr_byte = block_ptr + ((i - start) >> 3);
// adds gradient to existing residual to get updated grad
residual[i] += grad[i];
if(residual[i] > threshold){
if (residual[i] > threshold) {
// set data to 1
*curr_byte |= bits[(i & 7)];
// reduce residual by 1
Expand Down
12 changes: 6 additions & 6 deletions src/kvstore/gradient_compression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,12 @@ void GradientCompression::Quantize(const mxnet::NDArray &from, mxnet::NDArray *t
} else {
LOG(FATAL) << "Unsupported quantization of type " << get_type_str();
}
#else
LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
#endif
} else {
LOG(FATAL) << "unknown device mask";
}
#else
LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
#endif
}
}

Expand Down Expand Up @@ -207,12 +207,12 @@ void GradientCompression::Dequantize(const mxnet::NDArray &from, mxnet::NDArray
} else {
LOG(FATAL) << "Unsupported dequantization of type " << get_type_str();
}
#else
LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
#endif
} else {
LOG(FATAL) << "unknown device mask";
}
#else
LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
#endif
}
}
} // namespace kvstore
Expand Down
7 changes: 7 additions & 0 deletions src/operator/nn/concat-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ struct ConcatParam : public dmlc::Parameter<ConcatParam> {
DMLC_DECLARE_FIELD(dim).set_default(1)
.describe("the dimension to be concated.");
}
void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
std::ostringstream num_args_s, dim_s;
num_args_s << num_args;
dim_s << dim;
(*dict)["num_args"] = num_args_s.str();
(*dict)["dim"] = dim_s.str();
}
}; // struct ConcatParam

template<typename xpu, typename DType>
Expand Down
9 changes: 9 additions & 0 deletions src/operator/numpy/np_einsum_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,15 @@ struct NumpyEinsumParam: public dmlc::Parameter<NumpyEinsumParam> {
DMLC_DECLARE_FIELD(optimize)
.set_default(0);
}
void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
std::ostringstream num_args_s, optimize_s, subscripts_s;
num_args_s << num_args;
optimize_s << optimize;
subscripts_s << subscripts;
(*dict)["num_args"] = num_args_s.str();
(*dict)["optimize"] = optimize_s.str();
(*dict)["subscripts"] = subscripts_s.str();
}
};

class EinsumOp {
Expand Down
11 changes: 11 additions & 0 deletions src/operator/numpy/np_unique_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,17 @@ struct NumpyUniqueParam : public dmlc::Parameter<NumpyUniqueParam> {
.set_default(dmlc::optional<int>())
.describe("An integer that represents the axis to operator on.");
}
void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
std::ostringstream return_index_s, return_inverse_s, return_counts_s, axis_s;
return_index_s << return_index;
return_inverse_s << return_inverse;
return_counts_s << return_counts;
axis_s << axis;
(*dict)["return_index"] = return_index_s.str();
(*dict)["return_inverse"] = return_inverse_s.str();
(*dict)["return_counts"] = return_counts_s.str();
(*dict)["axis"] = axis_s.str();
}
};

} // namespace op
Expand Down
22 changes: 17 additions & 5 deletions tests/python/unittest/test_numpy_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -6542,6 +6542,9 @@ def hybrid_forward(self, F, a):
configs = [
((), True, True, True, None),
((1, ), True, True, True, -1),
((5, ), False, False, False, 0),
((5, ), True, False, False, 0),
((5, ), True, True, False, 0),
((5, ), True, True, True, 0),
((5, ), True, True, True, None),
((5, 4), True, True, True, None),
Expand All @@ -6562,15 +6565,24 @@ def hybrid_forward(self, F, a):
x = np.array(x, dtype=dtype)
np_out = _np.unique(x.asnumpy(), *config[1:])
mx_out = test_unique(x)
assert mx_out[0].shape == np_out[0].shape
for i in range(4):
assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5)
if (len(mx_out)) == 1:
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
else:
for i in range(len(mx_out)):
assert mx_out[i].shape == np_out[i].shape
assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5)

# Test imperative once again
mx_out = np.unique(x, *config[1:])
np_out = _np.unique(x.asnumpy(), *config[1:])
for i in range(4):
assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5)
if (len(mx_out)) == 1:
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
else:
for i in range(len(mx_out)):
assert mx_out[i].shape == np_out[i].shape
assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5)


@with_seed()
Expand Down

0 comments on commit 64d93ca

Please sign in to comment.