apache · wkcn · Nov 12, 2019 · Sep 22, 2019 · Sep 22, 2019 · Sep 22, 2019
diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h
@@ -606,6 +606,64 @@ struct divto {
   typedef op::div OPType;
 };
 }  // namespace sv
+
+#ifndef __CUDA_ARCH__
+using std::isnan;
+using std::isinf;
+#endif
+
+/*! \brief
+ *  determines if the given floating point
+ *  number is not a number */
+namespace isnan_typed {
+  template<typename DType>
+  MSHADOW_XINLINE bool IsNan(volatile DType val) {
+    return false;
+  }
+  template<>
+  MSHADOW_XINLINE bool IsNan(volatile float val) {
+    return isnan(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsNan(volatile double val) {
+    return isnan(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsNan(volatile long double val) {
+    return isnan(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) {
+    return (val.half_ & (~MSHADOW_HALF_SIGN_BIT)) > MSHADOW_HALF_EXPONENT_BITS;
+  }
+}  // namespace isnan_typed
+
+/*! \brief
+ *  determines if the given floating point
+ *  number is a positive or negative infinity */
+namespace isinf_typed {
+  template<typename DType>
+  MSHADOW_XINLINE bool IsInf(volatile DType val) {
+    return false;
+  }
+  template<>
+  MSHADOW_XINLINE bool IsInf(volatile float val) {
+    return isinf(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsInf(volatile double val) {
+    return isinf(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsInf(volatile long double val) {
+    return isinf(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsInf(volatile mshadow::half::half_t val) {
+    return (val.half_ & (~MSHADOW_HALF_SIGN_BIT)) == MSHADOW_HALF_EXPONENT_BITS;
+  }
+}  // namespace isinf_typed
+
 /*! \brief namespace for potential reducer operations */
 namespace red {
 namespace limits {
@@ -674,6 +732,12 @@ template<>
 MSHADOW_XINLINE double NegInfValue<double>(void) {
   return -HUGE_VAL;
 }
+/*! \brief negative infinity value of float16 */
+template<>
+MSHADOW_XINLINE half::half_t NegInfValue<half::half_t>(void) {
+  return half::half_t::Binary(
+      MSHADOW_HALF_SIGN_BIT | MSHADOW_HALF_EXPONENT_BITS);
+}
 
 /*!
  * \brief maximum value of certain types
@@ -740,6 +804,11 @@ template<>
 MSHADOW_XINLINE double PosInfValue<double>(void) {
   return HUGE_VAL;
 }
+/*! \brief positive infinity value of float16 */
+template<>
+MSHADOW_XINLINE half::half_t PosInfValue<half::half_t>(void) {
+  return half::half_t::Binary(MSHADOW_HALF_EXPONENT_BITS);
+}
 
 }  // namespace limits
 
@@ -755,7 +824,11 @@ struct sum {
   MSHADOW_XINLINE static void Reduce(volatile DType& dst,  volatile DType src, volatile DType& residual) { // NOLINT(*)
     DType y = src - residual;
     DType t = dst + y;
-    residual = (t - dst) - y;
+    if (isinf_typed::IsInf(t)) {
+      residual = 0;
+    } else {
+      residual = (t - dst) - y;
+    }
     dst = t;
   }
   /*! \brief combine the results of two reducers */
@@ -767,10 +840,15 @@ struct sum {
   template<typename DType>
   MSHADOW_XINLINE static void Merge(volatile DType& dst_val, volatile DType& dst_residual, volatile DType& src_val, volatile DType& src_residual) { // NOLINT(*)
     DType t1 = dst_val + src_val;
-    DType e = t1 - dst_val;
-    DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual;
-    dst_val = t1 + t2;
-    dst_residual = t2 - (dst_val - t1);
+    if (isinf_typed::IsInf(t1)) {
+      dst_val = t1;
+      dst_residual = 0;
+    } else {
+      DType e = t1 - dst_val;
+      DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual;
+      dst_val = t1 + t2;
+      dst_residual = t2 - (dst_val - t1);
+    }
   }
   /*! \brief finalize reduction */
   template<typename DType>
@@ -807,12 +885,9 @@ struct maximum {
   /*! \brief do reduction into dst */
   template<typename DType>
   MSHADOW_XINLINE static void Reduce(volatile DType& dst,  volatile DType src) { // NOLINT(*)
-    using namespace std;
-#ifdef __CUDACC__
-    dst = ::max(dst, src);
-#else
-    dst = max(dst, src);
-#endif  // __CUDACC__
+    if (!isnan_typed::IsNan(dst)) {
+      if (!(dst >= src)) dst = src;
+    }
   }
   /*! \brief do reduction into dst */
   template<typename DType>
@@ -863,12 +938,9 @@ struct minimum {
   /*! \brief do reduction into dst */
   template<typename DType>
   MSHADOW_XINLINE static void Reduce(volatile DType& dst,  volatile DType src) { // NOLINT(*)
-    using namespace std;
-#ifdef __CUDACC__
-    dst = ::min(dst, src);
-#else
-    dst = min(dst, src);
-#endif  // __CUDACC__
+    if (!isnan_typed::IsNan(dst)) {
+      if (!(dst <= src)) dst = src;
+    }
   }
   /*! \brief do reduction into dst */
   template<typename DType>

diff --git a/3rdparty/mshadow/mshadow/extension/reduce_with_axis.h b/3rdparty/mshadow/mshadow/extension/reduce_with_axis.h
@@ -112,7 +112,7 @@ struct Plan<ReduceWithAxisExp<Reducer, SrcExp, DType, dimsrc, mask, dimdst>, DTy
         index_t z = (x*size_+k)*trailing_+y;
         DType tmp = res;
         Reducer::Reduce(res, src_.Eval(z/last_, z%last_));
-        if (tmp != res) {
+        if (tmp != res && !isnan_typed::IsNan(tmp)) {
           idx = k;
         }
       }

diff --git a/3rdparty/mshadow/mshadow/half.h b/3rdparty/mshadow/mshadow/half.h
@@ -349,6 +349,8 @@ MSHADOW_HALF_OPERATOR(bool, <=)
 
 #define MSHADOW_HALF_MIN mshadow::half::half_t::Binary(0xFBFF);
 #define MSHADOW_HALF_MAX mshadow::half::half_t::Binary(0x7BFF);
+#define MSHADOW_HALF_SIGN_BIT 0x8000
+#define MSHADOW_HALF_EXPONENT_BITS 0x7c00
 }  // namespace half
 }  // namespace mshadow
 #endif  // MSHADOW_HALF_H_
diff --git a/julia/src/ndarray/reduction.jl b/julia/src/ndarray/reduction.jl
@@ -47,8 +47,7 @@ broadcasted(::typeof(min), x::NDArray{T}, y::NDArray{T}) where {T} =
 """
     argmax(x::NDArray; dims) -> indices
 
-Note that `NaN` is skipped during comparison.
-This is different from Julia `Base.argmax`.
+Note that `NaN` is treated as greater than all other values in `argmax`.
 
 ## Examples
 
@@ -77,8 +76,7 @@ Base.argmax(x::NDArray; dims = :) = _argmax(x, dims) .+ 1
 """
     argmin(x::NDArray; dims) -> indices
 
-Note that `NaN` is skipped during comparison.
-This is different from Julia `Base.argmin`.
+Note that `NaN` is treated as less than all other values in `argmin`.
 
 ## Examples
 

diff --git a/julia/test/unittest/ndarray.jl b/julia/test/unittest/ndarray.jl
@@ -1515,8 +1515,8 @@ function test_argmax()
          4 2 6]
     x = NDArray(A)
 
-    @test copy(argmax(x, dims = 1)) == [2 1 2]
-    @test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1)
+    @test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)]
+    @test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)]
   end
 
   @info "NDArray::argmax::NaN"
@@ -1525,8 +1525,8 @@ function test_argmax()
          NaN 2 6]
     x = NDArray(A)
 
-    @test copy(argmax(x, dims = 1)) == [1 1 2]
-    @test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1)
+    @test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)]
+    @test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)]
   end
 end
 
@@ -1537,8 +1537,8 @@ function test_argmin()
          4 2 6]
     x = NDArray(A)
 
-    @test copy(argmin(x, dims = 1)) == [1 2 1]
-    @test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1)
+    @test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)]
+    @test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)]
   end
 
   @info "NDArray::argmin::NaN"
@@ -1547,8 +1547,8 @@ function test_argmin()
          NaN 2 6]
     x = NDArray(A)
 
-    @test copy(argmin(x, dims = 1)) == [1 2 1]
-    @test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1)
+    @test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)]
+    @test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)]
   end
 end
 

@@ -4935,6 +4935,7 @@ class DLDataType(ctypes.Structure):
         "bool": (1, 1, 1),
         "uint32": (1, 32, 1),
         "uint64": (1, 64, 1),
+        'float16': (2, 16, 1),
         "float32": (2, 32, 1),
         "float64": (2, 64, 1),
     }

diff --git a/src/operator/contrib/allclose_op-inl.h b/src/operator/contrib/allclose_op-inl.h
@@ -84,7 +84,7 @@ inline bool AllCloseType(const nnvm::NodeAttrs& attrs,
   return (*out_attrs)[0] != -1;
 }
 
-using namespace mshadow_op::isnan_typed;
+using mshadow::isnan_typed::IsNan;
 
 template<int req>
 struct allclose_forward {