Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Allow construction of cuda_async_memory_resource from existing pool #889

Merged
merged 10 commits into from
Mar 23, 2022
3 changes: 3 additions & 0 deletions include/rmm/detail/dynamic_load_runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ struct async_alloc {

using cudaFreeAsync_sig = cudart_sig<void*, cudaStream_t>;
RMM_CUDART_API_WRAPPER(cudaFreeAsync, cudaFreeAsync_sig);

using cudaDeviceGetDefaultMemPool_sig = cudart_sig<cudaMemPool_t*, int>;
RMM_CUDART_API_WRAPPER(cudaDeviceGetDefaultMemPool, cudaDeviceGetDefaultMemPool_sig);
};
#endif

Expand Down
25 changes: 12 additions & 13 deletions include/rmm/mr/device/cuda_async_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/dynamic_load_runtime.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_view_memory_resource.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

#include <thrust/optional.h>
Expand Down Expand Up @@ -72,7 +73,9 @@ class cuda_async_memory_resource final : public device_memory_resource {
pool_props.handleTypes = cudaMemHandleTypePosixFileDescriptor;
pool_props.location.type = cudaMemLocationTypeDevice;
pool_props.location.id = rmm::detail::current_device().value();
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle_, &pool_props));
cudaMemPool_t cuda_pool_handle{};
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props));
pool_ = cuda_async_view_memory_resource{cuda_pool_handle};

// CUDA drivers before 11.5 have known incompatibilities with the async allocator.
// We'll disable `cudaMemPoolReuseAllowOpportunistic` if cuda driver < 11.5.
Expand All @@ -83,15 +86,15 @@ class cuda_async_memory_resource final : public device_memory_resource {
if (driver_version < min_async_version) {
int disabled{0};
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
cuda_pool_handle_, cudaMemPoolReuseAllowOpportunistic, &disabled));
pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled));
}

auto const [free, total] = rmm::detail::available_device_memory();

// Need an l-value to take address to pass to cudaMemPoolSetAttribute
uint64_t threshold = release_threshold.value_or(total);
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
cuda_pool_handle_, cudaMemPoolAttrReleaseThreshold, &threshold));
pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold));

// Allocate and immediately deallocate the initial_pool_size to prime the pool with the
// specified size
Expand All @@ -110,7 +113,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
* @brief Returns the underlying native handle to the CUDA pool
*
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); }
#endif

~cuda_async_memory_resource() override
Expand Down Expand Up @@ -141,7 +144,7 @@ class cuda_async_memory_resource final : public device_memory_resource {

private:
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
cudaMemPool_t cuda_pool_handle_{};
cuda_async_view_memory_resource pool_{};
#endif

/**
Expand All @@ -158,10 +161,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
{
void* ptr{nullptr};
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (bytes > 0) {
RMM_CUDA_TRY_ALLOC(rmm::detail::async_alloc::cudaMallocFromPoolAsync(
&ptr, bytes, pool_handle(), stream.value()));
}
ptr = pool_.allocate(bytes, stream);
#else
(void)bytes;
(void)stream;
Expand All @@ -176,14 +176,13 @@ class cuda_async_memory_resource final : public device_memory_resource {
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view stream) override
{
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (ptr != nullptr) {
RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaFreeAsync(ptr, stream.value()));
}
pool_.deallocate(ptr, size, stream);
#else
(void)ptr;
(void)size;
(void)stream;
#endif
}
Expand Down
177 changes: 177 additions & 0 deletions include/rmm/mr/device/cuda_async_view_memory_resource.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/dynamic_load_runtime.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

#include <thrust/optional.h>

#include <cuda_runtime_api.h>

#include <cstddef>
#include <limits>

#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync
#define RMM_CUDA_MALLOC_ASYNC_SUPPORT
#endif

namespace rmm::mr {

/**
* @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for
* allocation/deallocation.
*/
class cuda_async_view_memory_resource final : public device_memory_resource {
public:
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
/**
* @brief Constructs a cuda_async_view_memory_resource which uses an existing CUDA memory pool.
* The provided pool is not owned by cuda_async_view_memory_resource and must remain valid
* during the lifetime of the memory resource.
*
* @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync`
*
* @param valid_pool_handle Handle to a CUDA memory pool which will be used to
* serve allocation requests.
*/
cuda_async_view_memory_resource(cudaMemPool_t valid_pool_handle)
: cuda_pool_handle_{[valid_pool_handle]() {
RMM_EXPECTS(nullptr != valid_pool_handle, "Unexpected null pool handle.");
return valid_pool_handle;
}()}
{
// Check if cudaMallocAsync Memory pool supported
auto const device = rmm::detail::current_device();
int cuda_pool_supported{};
auto result =
cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());
RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported,
"cudaMallocAsync not supported with this CUDA driver/runtime version");
}
#endif

#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
/**
* @brief Returns the underlying native handle to the CUDA pool
*
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
#endif

cuda_async_view_memory_resource() = default;
cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) = default;
cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) = default;
cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource const&) = default;
cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) = default;

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation. `cuda_memory_resource` does not support streams.
*
* @returns bool true
*/
[[nodiscard]] bool supports_streams() const noexcept override { return true; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }

private:
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
cudaMemPool_t cuda_pool_handle_{};
#endif

/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
*
* The returned pointer has at least 256B alignment.
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
*
* @param bytes The size, in bytes, of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
{
void* ptr{nullptr};
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (bytes > 0) {
RMM_CUDA_TRY_ALLOC(rmm::detail::async_alloc::cudaMallocFromPoolAsync(
&ptr, bytes, pool_handle(), stream.value()));
}
#else
(void)bytes;
(void)stream;
#endif
return ptr;
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
{
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (ptr != nullptr) {
RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaFreeAsync(ptr, stream.value()));
}
#else
(void)ptr;
(void)stream;
#endif
}

/**
* @brief Compare this resource to another.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
return dynamic_cast<cuda_async_view_memory_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
return std::make_pair(0, 0);
}
};

} // namespace rmm::mr
1 change: 1 addition & 0 deletions tests/mr/device/cuda_async_mr_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include <rmm/cuda_device.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>

Expand Down
83 changes: 83 additions & 0 deletions tests/mr/device/cuda_async_view_mr_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <rmm/cuda_device.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_view_memory_resource.hpp>

#include <gtest/gtest.h>

namespace rmm::test {
namespace {

using cuda_async_view_mr = rmm::mr::cuda_async_view_memory_resource;

#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT)

TEST(PoolTest, UsePool)
{
cudaMemPool_t memPool{};
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaDeviceGetDefaultMemPool(
&memPool, rmm::detail::current_device().value()));

const auto pool_init_size{100};
cuda_async_view_mr mr{memPool};
void* ptr = mr.allocate(pool_init_size);
mr.deallocate(ptr, pool_init_size);
RMM_CUDA_TRY(cudaDeviceSynchronize());
}

TEST(PoolTest, NotTakingOwnershipOfPool)
{
cudaMemPoolProps poolProps = {};
poolProps.allocType = cudaMemAllocationTypePinned;
poolProps.location.id = rmm::detail::current_device().value();
poolProps.location.type = cudaMemLocationTypeDevice;

cudaMemPool_t memPool{};

RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&memPool, &poolProps));

{
const auto pool_init_size{100};
cuda_async_view_mr mr{memPool};
void* ptr = mr.allocate(pool_init_size);
mr.deallocate(ptr, pool_init_size);
RMM_CUDA_TRY(cudaDeviceSynchronize());
}

auto destroy_valid_pool = [&]() {
auto result = rmm::detail::async_alloc::cudaMemPoolDestroy(memPool);
RMM_EXPECTS(result == cudaSuccess, "Pool wrapper did destroy pool");
};

EXPECT_NO_THROW(destroy_valid_pool());
}

TEST(PoolTest, ThrowIfNullptrPool)
{
auto construct_mr = []() {
cudaMemPool_t memPool{nullptr};
cuda_async_view_mr mr{memPool};
};

EXPECT_THROW(construct_mr(), rmm::logic_error);
}

#endif

} // namespace
} // namespace rmm::test