Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.


op : insert by slice
Browse files Browse the repository at this point in the history
fix compile error

fix cmopile error again
  • Loading branch information
JiangZhaoh committed Feb 14, 2020
1 parent 53c568b commit fd91dfd
Show file tree
Hide file tree
Showing 14 changed files with 273 additions and 347 deletions.
4 changes: 2 additions & 2 deletions python/mxnet/ndarray/numpy/
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ def insert(arr, obj, values, axis=None):
start = obj.start
stop = obj.stop
step = 1 if obj.step is None else obj.step
return _npi.insert(arr, val=values, start=start, stop=stop, step=step, axis=axis)
return _npi.insert_slice(arr, val=values, start=start, stop=stop, step=step, axis=axis)
elif isinstance(obj, integer_types):
return _npi.insert_scalar(arr, val=values, int_ind=obj, axis=axis)
elif isinstance(obj, NDArray):
Expand All @@ -771,7 +771,7 @@ def insert(arr, obj, values, axis=None):
start = obj.start
stop = obj.stop
step = 1 if obj.step is None else obj.step
return _npi.insert(arr, values, start=start, stop=stop, step=step, axis=axis)
return _npi.insert_slice(arr, values, start=start, stop=stop, step=step, axis=axis)
elif isinstance(obj, integer_types):
return _npi.insert_scalar(arr, values, int_ind=obj, axis=axis)
elif isinstance(obj, NDArray):
Expand Down
2 changes: 1 addition & 1 deletion python/mxnet/
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs):
# 'insert',
Expand Down
4 changes: 2 additions & 2 deletions python/mxnet/symbol/numpy/
Original file line number Diff line number Diff line change
Expand Up @@ -3092,7 +3092,7 @@ def insert(arr, obj, values, axis=None):
start = obj.start
stop = obj.stop
step = 1 if obj.step is None else obj.step
return _npi.insert(arr, val=values, start=start, stop=stop, step=step, axis=axis)
return _npi.insert_slice(arr, val=values, start=start, stop=stop, step=step, axis=axis)
elif isinstance(obj, integer_types):
return _npi.insert_scalar(arr, val=values, int_ind=obj, axis=axis)
elif isinstance(obj, Symbol):
Expand All @@ -3105,7 +3105,7 @@ def insert(arr, obj, values, axis=None):
start = obj.start
stop = obj.stop
step = 1 if obj.step is None else obj.step
return _npi.insert(arr, values, start=start, stop=stop, step=step, axis=axis)
return _npi.insert_slice(arr, values, start=start, stop=stop, step=step, axis=axis)
elif isinstance(obj, integer_types):
return _npi.insert_scalar(arr, values, int_ind=obj, axis=axis)
elif isinstance(obj, Symbol):
Expand Down
243 changes: 0 additions & 243 deletions src/operator/numpy/np_insert_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,249 +365,6 @@ void InsertSequenceImpl(mshadow::Stream<xpu> *s, const TBlob& output,

template<typename xpu>
void NumpyInsertCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
/*using namespace mshadow;
using namespace mxnet_op;
const NumpyInsertParam& param = nnvm::get<NumpyInsertParam>(attrs.parsed);
int input_count = param.val.has_value() ? 1 : 2;
int insize = (param.step.has_value() || param.int_ind.has_value()) ?
input_count : input_count + 1;
bool obj_is_tensor = (param.val.has_value() && insize == 2) ||
(!param.val.has_value() && insize == 3);
CHECK_EQ(inputs.size(), insize);
CHECK_EQ(outputs.size(), 1);
CHECK_EQ(req.size(), 1);
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
const int arr_pos = 0;
const int val_pos = param.val.has_value() ? 0 : 1;
const int obj_pos = val_pos + 1;
const int out_pos = 0;
int ndim = inputs[arr_pos].shape_.ndim();
int axis = param.axis.has_value() ? param.axis.value() : 0;
TBlob arr;
TBlob values = param.val.has_value() ?
TBlob(nullptr, mxnet::TShape(0, 1), xpu::kDevMask, outputs[out_pos].type_flag_) :
if (!param.axis.has_value()) {
arr = inputs[arr_pos].reshape(Shape1(inputs[arr_pos].shape_.Size()));
ndim = 1;
} else if (ndim == 0) {
if (param.val.has_value()) {
CHECK_EQ(inputs[val_pos].shape_.ndim(), 0)
<< "'arr' is a 0-d array, 'values' can not assign to it. "
<< "alueError: assignment to 0-d array.";
mxnet_op::copy(s, outputs[out_pos], inputs[val_pos]);
} else {
MSHADOW_TYPE_SWITCH(outputs[out_pos].type_flag_, DType, {
Fill(s, outputs[out_pos], req[0], static_cast<DType>(param.val.value()));
} else {
arr = inputs[arr_pos];
CHECK(axis >= -1 * arr.shape_.ndim() && axis < arr.shape_.ndim())
<< "Axis should be in the range of [-r, r-1] where r is the rank of input tensor";
axis += (axis < 0) ? arr.shape_.ndim() : 0;
int N = arr.shape_[axis];
size_t indices_len = 0; // indices amount
int start = 0, stop = 0, step = 0; // arguments from 'obj' when it's 'slice'
// get and check indices from slice or sequence of ints
if (obj_is_tensor) { // indices from 'tensor'
indices_len = inputs[obj_pos].shape_.Size();
} else if (param.step.has_value()) { // indices from 'slice'
SliceIndices(param.start, param.stop, param.step,
N, &start, &stop, &step, &indices_len);
int numnew = 0; // numnew = output.shape[axis] - arr.shape[axis]
int index = 0; // save modified index, because index may be negative integer
mxnet::TShape val_newshape(arr.shape_.ndim(), -1);
// modify values's ndim to arr's ndim, for broadcast easily later
// e.g. value shape: (2,) arr shape: (3, 2) => value shape: (1, 2)
for (int i = values.shape_.ndim() - 1, j = arr.shape_.ndim() - 1;
i >= 0 || j >= 0;
--i, --j) {
if (i >= 0 && j >= 0) {
val_newshape[j] = values.shape_[i];
} else if (i >= 0) {
CHECK_EQ(values.shape_[i], 1) << "index exceed limits.";
} else {
val_newshape[j] = 1;
values.shape_.assign(val_newshape.begin(), val_newshape.end());
// get numnew
mxnet::TShape old_valshape(values.shape_);
if (param.int_ind.has_value() ||
(obj_is_tensor && inputs[obj_pos].shape_.ndim() == 0)) { // scaler
if (param.int_ind.has_value()) {
index = param.int_ind.value();
CHECK(index >= -1 * N && index <= N)
<< "Index should be in the range of [-r, r-1] where r is the dim size in 'axis'";
if (index < 0) {
index += N;
// values = moveaxis(values, 0, axis), will change values's shape
numnew = values.shape_[0];
mxnet::TShape axes(values.ndim(), -1); // moved axes
mxnet::TShape val_newshape(values.ndim(), -1);
int axes_id = 0;
for (int i = 1; i <= axis; ++i) {
axes[axes_id++] = i;
axes[axes_id++] = 0;
for (int i = axis + 1; i < values.ndim(); ++i) {
axes[axes_id++] = i;
for (int i = 0; i < values.ndim(); ++i) {
val_newshape[i] = values.shape_[axes[i]];
values.shape_.assign(val_newshape.begin(), val_newshape.end());
} else if (indices_len == 1) { // tensor with only one element
numnew = values.shape_[axis];
if (param.step.has_value()) {
index = start;
CHECK(index >= -1 * N && index <= N)
<< "Index should be in the range of [-r, r-1] where r is the dim size in 'axis'";
if (index < 0) {
index += N;
} else {
numnew = static_cast<int>(indices_len);
const mxnet::TShape& outshape = outputs[out_pos].shape_;
int dtype = outputs[out_pos].type_flag_;
int vtype = param.val.has_value() ?
mshadow::DataType<double>::kFlag :
if ((param.int_ind.has_value() ||
(obj_is_tensor && inputs[obj_pos].shape_.ndim() == 0) ||
(indices_len == 1)) &&
param.val.has_value()) {
// If insert use single index and 'value' is inputed as numerical parameter
values = TBlob(ctx.requested[0].get_space_typed<xpu, 1, VType>(Shape1(1), s));
Fill(s, values, kWriteTo, param.val.value());
if (param.int_ind.has_value()) {
// 'obj' is integer, need to moveaxis
MXNET_NDIM_SWITCH(outshape.ndim(), ndim, {
InsertScalerObj<xpu, ndim>(s, outputs[out_pos], arr, values,
outshape.get<ndim>(), values.shape_.get<ndim>(),
dtype, vtype, req[out_pos], axis, index, numnew,
outshape.Size(), true);
} else if (obj_is_tensor && inputs[obj_pos].shape_.ndim() == 0) {
// 'obj' is tensor and the tensor's ndim is 0, also need to moveaxis
MXNET_NDIM_SWITCH(outshape.ndim(), ndim, {
InsertSizeOneTensorObj<xpu, ndim>(s, outputs[out_pos], arr, values,
outshape.get<ndim>(), values.shape_.get<ndim>(),
dtype, vtype, req[out_pos], axis, inputs[obj_pos],
numnew, N, outshape.Size(), true);
} else if (indices_len == 1) {
MXNET_NDIM_SWITCH(outshape.ndim(), ndim, {
if (param.step.has_value()) {
InsertScalerObj<xpu, ndim>(s, outputs[out_pos], arr, values,
outshape.get<ndim>(), values.shape_.get<ndim>(),
dtype, vtype, req[out_pos], axis, start, numnew,
outshape.Size(), false);
} else {
InsertSizeOneTensorObj<xpu, ndim>(s, outputs[out_pos], arr, values,
outshape.get<ndim>(), values.shape_.get<ndim>(),
dtype, vtype, req[out_pos], axis, inputs[obj_pos],
numnew, N, outshape.Size(), false);
} else {
// broadcast check
for (int i = outshape.ndim() - 1; i >= 0; --i) {
int sz = outshape[i];
if (i == axis) {
sz = numnew;
CHECK((values.shape_[i] == 1) || (values.shape_[i] == sz));
size_t temp_storage_bytes, temp_mem_size;
temp_storage_bytes = SortByKeyWorkspaceSize<int64_t, int, xpu>(indices_len, false, true);
temp_mem_size = indices_len * sizeof(int64_t) * 2 +
indices_len * sizeof(int) +
outshape[axis] * sizeof(int) * 2 +
Tensor<xpu, 1, char> temp_mem =
ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(temp_mem_size), s);
int64_t* indices_ptr = reinterpret_cast<int64_t*>(temp_mem.dptr_);
int64_t* sorted_indices_ptr = reinterpret_cast<int64_t*>(indices_ptr + indices_len);
int* order_ptr = reinterpret_cast<int*>(sorted_indices_ptr + indices_len);
int* is_insert = reinterpret_cast<int*>(order_ptr + indices_len);
int* origin_idx = reinterpret_cast<int*>(is_insert + outshape[axis]);
Tensor<xpu, 1, char> temp_storage(reinterpret_cast<char*>(origin_idx + outshape[axis]),
Shape1(temp_storage_bytes), s);
Tensor<xpu, 1, int64_t> indices(indices_ptr, Shape1(indices_len), s);
Tensor<xpu, 1, int64_t> sorted_indices(sorted_indices_ptr, Shape1(indices_len), s);
Tensor<xpu, 1, int> order(order_ptr, Shape1(indices_len), s);
int num_bits = common::ilog2ui(static_cast<unsigned int>(indices_len) - 1);
if (param.step.has_value()) {
Kernel<SliceToIndices, xpu>::Launch(s, indices_len, indices_ptr, start, step);
} else {
Kernel<ObjToIndices, xpu>::Launch(s, indices_len, indices_ptr, N,
Kernel<range_fwd, xpu>::Launch(s, indices_len, 1, 0, 1, kWriteTo, order_ptr);
mxnet::op::SortByKey(indices, order, true, &temp_storage, 0, num_bits, &sorted_indices);
Kernel<IndicesModify, xpu>::Launch(s, indices_len, indices_ptr, order_ptr);
mxnet_op::Kernel<mxnet_op::set_zero, xpu>::Launch(s, outshape[axis], is_insert);
Kernel<SetIsInsert, xpu>::Launch(s, indices_len, indices_ptr, is_insert);
Kernel<SetOriginValuesIdx, xpu>::Launch(s, indices_len, indices_ptr, origin_idx);
Kernel<SetOriginArrIdx, xpu>::Launch(s, outshape[axis], is_insert, origin_idx);
MXNET_NDIM_SWITCH(outshape.ndim(), ndim, {
InsertSequenceImpl<xpu, ndim>(s, outputs[out_pos], arr, values,
outshape.get<ndim>(), values.shape_.get<ndim>(),
is_insert, origin_idx, dtype, vtype, req[out_pos],
axis, outshape.Size());

} // namespace op
} // namespace mxnet

Expand Down
7 changes: 6 additions & 1 deletion src/operator/numpy/np_insert_op_scalar-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,16 @@

#include <vector>
#include <algorithm>
#include "./np_insert_op-inl.h"

namespace mxnet {
namespace op {

* Only support scalar index (the type of param 'obj' is scalar).
template<typename xpu>
void NumpyInsertScalarCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
Expand Down Expand Up @@ -152,4 +157,4 @@ void NumpyInsertScalarCompute(const nnvm::NodeAttrs& attrs,
} // namespace op
} // namespace mxnet

2 changes: 1 addition & 1 deletion src/operator/numpy/
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

* Copyright (c) 2019 by Contributors
* \file
* \file
* \brief CPU Implementation of numpy insert operations
#include "./np_insert_op-inl.h"
Expand Down
4 changes: 2 additions & 2 deletions src/operator/numpy/
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

* Copyright (c) 2019 by Contributors
* \file
* \file
* \brief GPU Implementation of numpy insert operations (insert by int index)

Expand All @@ -33,4 +33,4 @@ NNVM_REGISTER_OP(_npi_insert_scalar)
.set_attr<FCompute>("FCompute<gpu>", NumpyInsertScalarCompute<gpu>);


0 comments on commit fd91dfd

Please sign in to comment.