diff --git a/CHANGELOG.md b/CHANGELOG.md index c532520884..e7f553afe9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - PR #209: Simplify README.md, move build instructions to BUILD.md - PR #225: Support for generating random integers - PR #215: Refactored LinAlg::norm to Stats::rowNorm and added Stats::colNorm +- PR #230: Refactored the cuda_utils header ## Bug Fixes diff --git a/cuML/src/dbscan/labelling/pack.h b/cuML/src/dbscan/labelling/pack.h index 833fd13599..eb3afeafea 100644 --- a/cuML/src/dbscan/labelling/pack.h +++ b/cuML/src/dbscan/labelling/pack.h @@ -15,6 +15,9 @@ */ #pragma once + +#include "utils.h" + namespace Dbscan { namespace Label { diff --git a/ml-prims/src/cuda_utils.h b/ml-prims/src/cuda_utils.h index 736537834f..6c7f1b5b54 100644 --- a/ml-prims/src/cuda_utils.h +++ b/ml-prims/src/cuda_utils.h @@ -17,41 +17,10 @@ #pragma once #include -#include -#include -#include -#include +#include "utils.h" namespace MLCommon { -/** macro to throw a c++ std::runtime_error */ -#define THROW(fmt, ...) \ - do { \ - std::string msg; \ - char errMsg[2048]; \ - std::sprintf(errMsg, "Exception occured! file=%s line=%d: ", __FILE__, \ - __LINE__); \ - msg += errMsg; \ - std::sprintf(errMsg, fmt, ##__VA_ARGS__); \ - msg += errMsg; \ - throw std::runtime_error(msg); \ - } while (0) - -/** macro to check for a conditional and assert on failure */ -#define ASSERT(check, fmt, ...) \ - do { \ - if (!(check)) \ - THROW(fmt, ##__VA_ARGS__); \ - } while (0) - -/** check for cuda runtime API errors and assert accordingly */ -#define CUDA_CHECK(call) \ - do { \ - cudaError_t status = call; \ - ASSERT(status == cudaSuccess, "FAIL: call='%s'. Reason:%s\n", #call, \ - cudaGetErrorString(status)); \ - } while (0) - /** helper macro for device inlined functions */ #define DI inline __device__ #define HDI inline __host__ __device__ @@ -102,60 +71,6 @@ constexpr HDI IntType log2(IntType num, IntType ret = IntType(0)) { return num <= IntType(1) ? ret : log2(num >> IntType(1), ++ret); } -/** cuda malloc */ -template -void allocate(Type *&ptr, size_t len, bool setZero = false) { - CUDA_CHECK(cudaMalloc((void **)&ptr, sizeof(Type) * len)); - if (setZero) - CUDA_CHECK(cudaMemset(ptr, 0, sizeof(Type) * len)); -} - -/** Helper function to calculate need memory for allocate to store dense matrix. -* @param rows number of rows in matrix -* @param columns number of columns in matrix -* @return need number of items to allocate via allocate() -* @sa allocate() -*/ -inline size_t allocLengthForMatrix(size_t rows, size_t columns) { - return rows * columns; -} - -/** performs a host to device copy */ -template -void updateDevice(Type *dPtr, const Type *hPtr, size_t len, - cudaStream_t stream = 0) { - CUDA_CHECK( - cudaMemcpy(dPtr, hPtr, len * sizeof(Type), cudaMemcpyHostToDevice)); -} - -template -void updateDeviceAsync(Type *dPtr, const Type *hPtr, size_t len, - cudaStream_t stream) { - CUDA_CHECK(cudaMemcpyAsync(dPtr, hPtr, len * sizeof(Type), - cudaMemcpyHostToDevice, stream)); -} - -/** performs a device to host copy */ -template -void updateHost(Type *hPtr, const Type *dPtr, size_t len, - cudaStream_t stream = 0) { - CUDA_CHECK( - cudaMemcpy(hPtr, dPtr, len * sizeof(Type), cudaMemcpyDeviceToHost)); -} - -template -void updateHostAsync(Type *hPtr, const Type *dPtr, size_t len, - cudaStream_t stream) { - CUDA_CHECK(cudaMemcpyAsync(hPtr, dPtr, len * sizeof(Type), - cudaMemcpyDeviceToHost, stream)); -} - -template -void copy(Type* dPtr1, const Type* dPtr2, size_t len) { - CUDA_CHECK(cudaMemcpy(dPtr1, dPtr2, len*sizeof(Type), - cudaMemcpyDeviceToDevice)); -} - /** Device function to apply the input lambda across threads in the grid */ template DI void forEach(int num, L lambda) { @@ -478,46 +393,4 @@ DI T shfl_xor(T val, int laneMask, int width = WarpSize, #endif } - -/** - * @defgroup Debug utils for debug device code - * @{ - */ -template -void myPrintHostVector(const char * variableName, const T * hostMem, size_t componentsCount, OutStream& out) -{ - out << variableName << "=["; - for (size_t i = 0; i < componentsCount; ++i) - { - if (i != 0) - out << ","; - out << hostMem[i]; - } - out << "];\n"; -} - -template -void myPrintHostVector(const char * variableName, const T * hostMem, size_t componentsCount) -{ - myPrintHostVector(variableName, hostMem, componentsCount, std::cout); - std::cout.flush(); -} - -template -void myPrintDevVector(const char * variableName, const T * devMem, size_t componentsCount, OutStream& out) -{ - T* hostMem = new T[componentsCount]; - CUDA_CHECK(cudaMemcpy(hostMem, devMem, componentsCount * sizeof(T), cudaMemcpyDeviceToHost)); - myPrintHostVector(variableName, hostMem, componentsCount, out); - delete []hostMem; -} - -template -void myPrintDevVector(const char * variableName, const T * devMem, size_t componentsCount) -{ - myPrintDevVector(variableName, devMem, componentsCount, std::cout); - std::cout.flush(); -} -/** @} */ - } // namespace MLCommon diff --git a/ml-prims/src/linalg/cublas_wrappers.h b/ml-prims/src/linalg/cublas_wrappers.h index f3d747621e..8d9eb7e5d6 100644 --- a/ml-prims/src/linalg/cublas_wrappers.h +++ b/ml-prims/src/linalg/cublas_wrappers.h @@ -57,6 +57,9 @@ namespace LinAlg { } \ } +///@todo: add a similar CUBLAS_CHECK_NO_THROW +/// (Ref: https://github.com/rapidsai/cuml/issues/229) + /** * @defgroup Axpy cublas ax+y operations * @{ diff --git a/ml-prims/src/linalg/cusolver_wrappers.h b/ml-prims/src/linalg/cusolver_wrappers.h index 93dfb1a5f0..1ca2c57541 100644 --- a/ml-prims/src/linalg/cusolver_wrappers.h +++ b/ml-prims/src/linalg/cusolver_wrappers.h @@ -70,6 +70,9 @@ namespace LinAlg { } \ } +///@todo: add a similar CUSOLVER_CHECK_NO_THROW +/// (Ref: https://github.com/rapidsai/cuml/issues/229) + /** * @defgroup Getrf cusolver getrf operations * @{ diff --git a/ml-prims/src/utils.h b/ml-prims/src/utils.h new file mode 100644 index 0000000000..43f107b469 --- /dev/null +++ b/ml-prims/src/utils.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace MLCommon { + +/** base exception class for the cuML or ml-prims project */ +class Exception : public std::exception { +public: + /** default ctor */ + Exception() throw(): std::exception(), msg() {} + + /** copy ctor */ + Exception(const Exception& src) throw(): std::exception(), msg(src.what()) { + collectCallStack(); + } + + /** ctor from an input message */ + Exception(const std::string& _msg) throw(): std::exception(), msg(_msg) { + collectCallStack(); + } + + /** dtor */ + virtual ~Exception() throw() {} + + /** get the message associated with this exception */ + virtual const char* what() const throw() { return msg.c_str(); } + +private: + /** message associated with this exception */ + std::string msg; + + /** append call stack info to this exception's message for ease of debug */ + // Courtesy: https://www.gnu.org/software/libc/manual/html_node/Backtraces.html + void collectCallStack() throw() { +#ifdef __GNUC__ + const int MaxStackDepth = 64; + void* stack[MaxStackDepth]; + auto depth = backtrace(stack, MaxStackDepth); + std::ostringstream oss; + oss << std::endl << "Obtained " << depth << " stack frames" << std::endl; + char** strings = backtrace_symbols(stack, depth); + if (strings == nullptr) { + oss << "But no stack trace could be found!" << std::endl; + msg += oss.str(); + return; + } + ///@todo: support for demangling of C++ symbol names + for (int i = 0; i < depth; ++i) { + oss << "#" << i << " in " << strings[i] << std::endl; + } + free(strings); + msg += oss.str(); +#endif // __GNUC__ + } +}; + +/** macro to throw a runtime error */ +#define THROW(fmt, ...) \ + do { \ + std::string msg; \ + char errMsg[2048]; \ + std::sprintf(errMsg, "Exception occured! file=%s line=%d: ", __FILE__, \ + __LINE__); \ + msg += errMsg; \ + std::sprintf(errMsg, fmt, ##__VA_ARGS__); \ + msg += errMsg; \ + throw MLCommon::Exception(msg); \ + } while (0) + +/** macro to check for a conditional and assert on failure */ +#define ASSERT(check, fmt, ...) \ + do { \ + if (!(check)) \ + THROW(fmt, ##__VA_ARGS__); \ + } while (0) + +/** check for cuda runtime API errors and assert accordingly */ +#define CUDA_CHECK(call) \ + do { \ + cudaError_t status = call; \ + ASSERT(status == cudaSuccess, "FAIL: call='%s'. Reason:%s\n", #call, \ + cudaGetErrorString(status)); \ + } while (0) + + +///@todo: add a similar CUDA_CHECK_NO_THROW +/// (Ref: https://github.com/rapidsai/cuml/issues/229) + +/** + * @defgroup Copy Copy methods + * @{ + */ +/** performs a host to device copy */ +template +void updateDevice(Type *dPtr, const Type *hPtr, size_t len, + cudaStream_t stream = 0) { + CUDA_CHECK(cudaMemcpy(dPtr, hPtr, len * sizeof(Type), + cudaMemcpyHostToDevice)); +} + +/** performs an sync host to device copy */ +template +void updateDeviceAsync(Type *dPtr, const Type *hPtr, size_t len, + cudaStream_t stream) { + CUDA_CHECK(cudaMemcpyAsync(dPtr, hPtr, len * sizeof(Type), + cudaMemcpyHostToDevice, stream)); +} + +/** performs a device to host copy */ +template +void updateHost(Type *hPtr, const Type *dPtr, size_t len, + cudaStream_t stream = 0) { + CUDA_CHECK(cudaMemcpy(hPtr, dPtr, len * sizeof(Type), + cudaMemcpyDeviceToHost)); +} + +/** performs an async device to host copy */ +template +void updateHostAsync(Type *hPtr, const Type *dPtr, size_t len, + cudaStream_t stream) { + CUDA_CHECK(cudaMemcpyAsync(hPtr, dPtr, len * sizeof(Type), + cudaMemcpyDeviceToHost, stream)); +} + +/** performs a device to device copy */ +template +void copy(Type* dPtr1, const Type* dPtr2, size_t len) { + CUDA_CHECK(cudaMemcpy(dPtr1, dPtr2, len*sizeof(Type), + cudaMemcpyDeviceToDevice)); +} +/** @} */ + +/** Helper function to calculate need memory for allocate to store dense matrix. +* @param rows number of rows in matrix +* @param columns number of columns in matrix +* @return need number of items to allocate via allocate() +* @sa allocate() +*/ +inline size_t allocLengthForMatrix(size_t rows, size_t columns) { + return rows * columns; +} + +/** cuda malloc */ +template +void allocate(Type *&ptr, size_t len, bool setZero = false) { + CUDA_CHECK(cudaMalloc((void **)&ptr, sizeof(Type) * len)); + if (setZero) + CUDA_CHECK(cudaMemset(ptr, 0, sizeof(Type) * len)); +} + +/** + * @defgroup Debug utils for debug device code + * @{ + */ +template +void myPrintHostVector(const char * variableName, const T * hostMem, size_t componentsCount, OutStream& out) +{ + out << variableName << "=["; + for (size_t i = 0; i < componentsCount; ++i) + { + if (i != 0) + out << ","; + out << hostMem[i]; + } + out << "];\n"; +} + +template +void myPrintHostVector(const char * variableName, const T * hostMem, size_t componentsCount) +{ + myPrintHostVector(variableName, hostMem, componentsCount, std::cout); + std::cout.flush(); +} + +template +void myPrintDevVector(const char * variableName, const T * devMem, size_t componentsCount, OutStream& out) +{ + T* hostMem = new T[componentsCount]; + CUDA_CHECK(cudaMemcpy(hostMem, devMem, componentsCount * sizeof(T), cudaMemcpyDeviceToHost)); + myPrintHostVector(variableName, hostMem, componentsCount, out); + delete []hostMem; +} + +template +void myPrintDevVector(const char * variableName, const T * devMem, size_t componentsCount) +{ + myPrintDevVector(variableName, devMem, componentsCount, std::cout); + std::cout.flush(); +} +/** @} */ + +}; // end namespace MLCommon diff --git a/ml-prims/test/cuda_utils.cu b/ml-prims/test/cuda_utils.cu index 76be6b3426..95f32c8e70 100644 --- a/ml-prims/test/cuda_utils.cu +++ b/ml-prims/test/cuda_utils.cu @@ -22,7 +22,7 @@ namespace MLCommon { TEST(Utils, Assert) { ASSERT_NO_THROW(ASSERT(1 == 1, "Should not assert!")); - ASSERT_THROW(ASSERT(1 != 1, "Should assert!"), std::runtime_error); + ASSERT_THROW(ASSERT(1 != 1, "Should assert!"), Exception); } TEST(Utils, CudaCheck) { ASSERT_NO_THROW(CUDA_CHECK(cudaFree(nullptr))); }