Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-1426] Fix the wrong result of sum, mean, argmin, argmax when inputs contain inf or nan #16234

Merged
merged 21 commits into from
Nov 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 89 additions & 17 deletions 3rdparty/mshadow/mshadow/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,64 @@ struct divto {
typedef op::div OPType;
};
} // namespace sv

#ifndef __CUDA_ARCH__
using std::isnan;
using std::isinf;
#endif

/*! \brief
* determines if the given floating point
* number is not a number */
namespace isnan_typed {
template<typename DType>
MSHADOW_XINLINE bool IsNan(volatile DType val) {
return false;
}
template<>
MSHADOW_XINLINE bool IsNan(volatile float val) {
return isnan(val);
}
template<>
MSHADOW_XINLINE bool IsNan(volatile double val) {
return isnan(val);
}
template<>
MSHADOW_XINLINE bool IsNan(volatile long double val) {
return isnan(val);
}
template<>
MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) {
return (val.half_ & (~MSHADOW_HALF_SIGN_BIT)) > MSHADOW_HALF_EXPONENT_BITS;
}
} // namespace isnan_typed

/*! \brief
* determines if the given floating point
* number is a positive or negative infinity */
namespace isinf_typed {
template<typename DType>
MSHADOW_XINLINE bool IsInf(volatile DType val) {
return false;
}
template<>
MSHADOW_XINLINE bool IsInf(volatile float val) {
return isinf(val);
}
template<>
MSHADOW_XINLINE bool IsInf(volatile double val) {
return isinf(val);
}
template<>
MSHADOW_XINLINE bool IsInf(volatile long double val) {
return isinf(val);
}
template<>
MSHADOW_XINLINE bool IsInf(volatile mshadow::half::half_t val) {
return (val.half_ & (~MSHADOW_HALF_SIGN_BIT)) == MSHADOW_HALF_EXPONENT_BITS;
}
} // namespace isinf_typed

wkcn marked this conversation as resolved.
Show resolved Hide resolved
/*! \brief namespace for potential reducer operations */
namespace red {
namespace limits {
Expand Down Expand Up @@ -674,6 +732,12 @@ template<>
MSHADOW_XINLINE double NegInfValue<double>(void) {
return -HUGE_VAL;
}
/*! \brief negative infinity value of float16 */
template<>
MSHADOW_XINLINE half::half_t NegInfValue<half::half_t>(void) {
return half::half_t::Binary(
MSHADOW_HALF_SIGN_BIT | MSHADOW_HALF_EXPONENT_BITS);
}

/*!
* \brief maximum value of certain types
Expand Down Expand Up @@ -740,6 +804,11 @@ template<>
MSHADOW_XINLINE double PosInfValue<double>(void) {
return HUGE_VAL;
}
/*! \brief positive infinity value of float16 */
template<>
MSHADOW_XINLINE half::half_t PosInfValue<half::half_t>(void) {
return half::half_t::Binary(MSHADOW_HALF_EXPONENT_BITS);
}

} // namespace limits

Expand All @@ -755,7 +824,11 @@ struct sum {
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src, volatile DType& residual) { // NOLINT(*)
DType y = src - residual;
DType t = dst + y;
residual = (t - dst) - y;
if (isinf_typed::IsInf(t)) {
residual = 0;
} else {
residual = (t - dst) - y;
}
dst = t;
}
/*! \brief combine the results of two reducers */
Expand All @@ -767,10 +840,15 @@ struct sum {
template<typename DType>
MSHADOW_XINLINE static void Merge(volatile DType& dst_val, volatile DType& dst_residual, volatile DType& src_val, volatile DType& src_residual) { // NOLINT(*)
DType t1 = dst_val + src_val;
DType e = t1 - dst_val;
DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual;
dst_val = t1 + t2;
dst_residual = t2 - (dst_val - t1);
if (isinf_typed::IsInf(t1)) {
dst_val = t1;
dst_residual = 0;
} else {
DType e = t1 - dst_val;
DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual;
dst_val = t1 + t2;
dst_residual = t2 - (dst_val - t1);
}
}
/*! \brief finalize reduction */
template<typename DType>
Expand Down Expand Up @@ -807,12 +885,9 @@ struct maximum {
/*! \brief do reduction into dst */
template<typename DType>
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*)
using namespace std;
#ifdef __CUDACC__
dst = ::max(dst, src);
#else
dst = max(dst, src);
#endif // __CUDACC__
if (!isnan_typed::IsNan(dst)) {
if (!(dst >= src)) dst = src;
wkcn marked this conversation as resolved.
Show resolved Hide resolved
}
}
/*! \brief do reduction into dst */
template<typename DType>
Expand Down Expand Up @@ -863,12 +938,9 @@ struct minimum {
/*! \brief do reduction into dst */
template<typename DType>
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*)
using namespace std;
#ifdef __CUDACC__
dst = ::min(dst, src);
#else
dst = min(dst, src);
#endif // __CUDACC__
if (!isnan_typed::IsNan(dst)) {
if (!(dst <= src)) dst = src;
}
}
/*! \brief do reduction into dst */
template<typename DType>
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/mshadow/mshadow/extension/reduce_with_axis.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ struct Plan<ReduceWithAxisExp<Reducer, SrcExp, DType, dimsrc, mask, dimdst>, DTy
index_t z = (x*size_+k)*trailing_+y;
DType tmp = res;
Reducer::Reduce(res, src_.Eval(z/last_, z%last_));
if (tmp != res) {
if (tmp != res && !isnan_typed::IsNan(tmp)) {
wkcn marked this conversation as resolved.
Show resolved Hide resolved
idx = k;
}
}
Expand Down
2 changes: 2 additions & 0 deletions 3rdparty/mshadow/mshadow/half.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ MSHADOW_HALF_OPERATOR(bool, <=)

#define MSHADOW_HALF_MIN mshadow::half::half_t::Binary(0xFBFF);
#define MSHADOW_HALF_MAX mshadow::half::half_t::Binary(0x7BFF);
#define MSHADOW_HALF_SIGN_BIT 0x8000
#define MSHADOW_HALF_EXPONENT_BITS 0x7c00
} // namespace half
} // namespace mshadow
#endif // MSHADOW_HALF_H_
6 changes: 2 additions & 4 deletions julia/src/ndarray/reduction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ broadcasted(::typeof(min), x::NDArray{T}, y::NDArray{T}) where {T} =
"""
argmax(x::NDArray; dims) -> indices

Note that `NaN` is skipped during comparison.
This is different from Julia `Base.argmax`.
Note that `NaN` is treated as greater than all other values in `argmax`.

## Examples

Expand Down Expand Up @@ -77,8 +76,7 @@ Base.argmax(x::NDArray; dims = :) = _argmax(x, dims) .+ 1
"""
argmin(x::NDArray; dims) -> indices

Note that `NaN` is skipped during comparison.
This is different from Julia `Base.argmin`.
Note that `NaN` is treated as less than all other values in `argmin`.

## Examples

Expand Down
16 changes: 8 additions & 8 deletions julia/test/unittest/ndarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1515,8 +1515,8 @@ function test_argmax()
4 2 6]
x = NDArray(A)

@test copy(argmax(x, dims = 1)) == [2 1 2]
@test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1)
@test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)]
@test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)]
end

@info "NDArray::argmax::NaN"
Expand All @@ -1525,8 +1525,8 @@ function test_argmax()
NaN 2 6]
x = NDArray(A)

@test copy(argmax(x, dims = 1)) == [1 1 2]
@test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1)
@test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)]
@test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)]
end
end

Expand All @@ -1537,8 +1537,8 @@ function test_argmin()
4 2 6]
x = NDArray(A)

@test copy(argmin(x, dims = 1)) == [1 2 1]
@test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1)
@test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)]
@test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)]
end

@info "NDArray::argmin::NaN"
Expand All @@ -1547,8 +1547,8 @@ function test_argmin()
NaN 2 6]
x = NDArray(A)

@test copy(argmin(x, dims = 1)) == [1 2 1]
@test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1)
@test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)]
@test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)]
end
end

Expand Down
1 change: 1 addition & 0 deletions python/mxnet/ndarray/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4935,6 +4935,7 @@ class DLDataType(ctypes.Structure):
"bool": (1, 1, 1),
"uint32": (1, 32, 1),
"uint64": (1, 64, 1),
'float16': (2, 16, 1),
"float32": (2, 32, 1),
"float64": (2, 64, 1),
}
Expand Down
2 changes: 1 addition & 1 deletion src/operator/contrib/allclose_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ inline bool AllCloseType(const nnvm::NodeAttrs& attrs,
return (*out_attrs)[0] != -1;
}

using namespace mshadow_op::isnan_typed;
using mshadow::isnan_typed::IsNan;

template<int req>
struct allclose_forward {
Expand Down
Loading