From f7cd601d0281de1a2c96b4979d727aa7828a0b6f Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Tue, 9 Feb 2021 20:38:53 -0500 Subject: [PATCH] Replace removed __forceinline__s with regular inlines. Still needed for ODR /facepalm. Bug 3255883 --- cub/util_device.cuh | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/cub/util_device.cuh b/cub/util_device.cuh index 5cf12596d1..7cceb59453 100644 --- a/cub/util_device.cuh +++ b/cub/util_device.cuh @@ -121,7 +121,7 @@ __global__ void EmptyKernel(void) { } /** * \brief Returns the current device or -1 if an error occurred. */ -CUB_RUNTIME_FUNCTION int CurrentDevice() +CUB_RUNTIME_FUNCTION inline int CurrentDevice() { #if defined(CUB_RUNTIME_ENABLED) // Host code or device code with the CUDA runtime. @@ -147,14 +147,14 @@ private: int const old_device; bool const needs_reset; public: - __host__ SwitchDevice(int new_device) + __host__ inline SwitchDevice(int new_device) : old_device(CurrentDevice()), needs_reset(old_device != new_device) { if (needs_reset) CubDebug(cudaSetDevice(new_device)); } - __host__ ~SwitchDevice() + __host__ inline ~SwitchDevice() { if (needs_reset) CubDebug(cudaSetDevice(old_device)); @@ -165,7 +165,7 @@ public: * \brief Returns the number of CUDA devices available or -1 if an error * occurred. */ -CUB_RUNTIME_FUNCTION int DeviceCountUncached() +CUB_RUNTIME_FUNCTION inline int DeviceCountUncached() { #if defined(CUB_RUNTIME_ENABLED) // Host code or device code with the CUDA runtime. @@ -198,7 +198,7 @@ struct ValueCache * \brief Call the nullary function to produce the value and construct the * cache. */ - __host__ ValueCache() : value(Function()) {} + __host__ inline ValueCache() : value(Function()) {} }; #endif @@ -207,7 +207,7 @@ struct ValueCache // Host code, only safely usable in C++11 or newer, where thread-safe // initialization of static locals is guaranteed. This is a separate function // to avoid defining a local static in a host/device function. -__host__ int DeviceCountCachedValue() +__host__ inline int DeviceCountCachedValue() { static ValueCache cache; return cache.value; @@ -221,7 +221,7 @@ __host__ int DeviceCountCachedValue() * * \note This function is thread safe. */ -CUB_RUNTIME_FUNCTION int DeviceCount() +CUB_RUNTIME_FUNCTION inline int DeviceCount() { int result = -1; if (CUB_IS_HOST_CODE) { @@ -281,7 +281,7 @@ public: /** * \brief Construct the cache. */ - __host__ PerDeviceAttributeCache() : entries_() + __host__ inline PerDeviceAttributeCache() : entries_() { assert(DeviceCount() <= CUB_MAX_DEVICES); } @@ -359,7 +359,7 @@ public: /** * \brief Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10). */ -CUB_RUNTIME_FUNCTION cudaError_t PtxVersionUncached(int& ptx_version) +CUB_RUNTIME_FUNCTION inline cudaError_t PtxVersionUncached(int& ptx_version) { // Instantiate `EmptyKernel` in both host and device code to ensure // it can be called. @@ -399,7 +399,7 @@ CUB_RUNTIME_FUNCTION cudaError_t PtxVersionUncached(int& ptx_version) /** * \brief Retrieves the PTX version that will be used on \p device (major * 100 + minor * 10). */ -__host__ cudaError_t PtxVersionUncached(int& ptx_version, int device) +__host__ inline cudaError_t PtxVersionUncached(int& ptx_version, int device) { SwitchDevice sd(device); return PtxVersionUncached(ptx_version); @@ -407,7 +407,7 @@ __host__ cudaError_t PtxVersionUncached(int& ptx_version, int device) #if CUB_CPP_DIALECT >= 2011 // C++11 and later. template -__host__ PerDeviceAttributeCache& GetPerDeviceAttributeCache() +__host__ inline PerDeviceAttributeCache& GetPerDeviceAttributeCache() { // C++11 guarantees that initialization of static locals is thread safe. static PerDeviceAttributeCache cache; @@ -425,7 +425,7 @@ struct SmVersionCacheTag {}; * * \note This function is thread safe. */ -__host__ cudaError_t PtxVersion(int& ptx_version, int device) +__host__ inline cudaError_t PtxVersion(int& ptx_version, int device) { #if CUB_CPP_DIALECT >= 2011 // C++11 and later. @@ -454,7 +454,7 @@ __host__ cudaError_t PtxVersion(int& ptx_version, int device) * * \note This function is thread safe. */ -CUB_RUNTIME_FUNCTION cudaError_t PtxVersion(int& ptx_version) +CUB_RUNTIME_FUNCTION inline cudaError_t PtxVersion(int& ptx_version) { cudaError_t result = cudaErrorUnknown; if (CUB_IS_HOST_CODE) { @@ -490,7 +490,7 @@ CUB_RUNTIME_FUNCTION cudaError_t PtxVersion(int& ptx_version) /** * \brief Retrieves the SM version of \p device (major * 100 + minor * 10) */ -CUB_RUNTIME_FUNCTION cudaError_t SmVersionUncached(int& sm_version, int device = CurrentDevice()) +CUB_RUNTIME_FUNCTION inline cudaError_t SmVersionUncached(int& sm_version, int device = CurrentDevice()) { #if defined(CUB_RUNTIME_ENABLED) // Host code or device code with the CUDA runtime. @@ -524,7 +524,7 @@ CUB_RUNTIME_FUNCTION cudaError_t SmVersionUncached(int& sm_version, int device = * * \note This function is thread safe. */ -CUB_RUNTIME_FUNCTION cudaError_t SmVersion(int& sm_version, int device = CurrentDevice()) +CUB_RUNTIME_FUNCTION inline cudaError_t SmVersion(int& sm_version, int device = CurrentDevice()) { cudaError_t result = cudaErrorUnknown; if (CUB_IS_HOST_CODE) { @@ -557,7 +557,7 @@ CUB_RUNTIME_FUNCTION cudaError_t SmVersion(int& sm_version, int device = Current /** * Synchronize the specified \p stream. */ -CUB_RUNTIME_FUNCTION cudaError_t SyncStream(cudaStream_t stream) +CUB_RUNTIME_FUNCTION inline cudaError_t SyncStream(cudaStream_t stream) { cudaError_t result = cudaErrorUnknown; if (CUB_IS_HOST_CODE) { @@ -613,7 +613,7 @@ CUB_RUNTIME_FUNCTION cudaError_t SyncStream(cudaStream_t stream) * */ template -CUB_RUNTIME_FUNCTION +CUB_RUNTIME_FUNCTION inline cudaError_t MaxSmOccupancy( int& max_sm_occupancy, ///< [out] maximum number of thread blocks that can reside on a single SM KernelPtr kernel_ptr, ///< [in] Kernel pointer for which to compute SM occupancy