Skip to content

Commit

Permalink
Allow construction of cuda_async_memory_resource from existing pool (#…
Browse files Browse the repository at this point in the history
…889)

Adds a new MR type `cuda_async_view_memory_resource` which has a constructor `cuda_async_view_memory_resource(cudaMemPool_t valid_pool_handle)` . The memory resource will use this pool for allocation and deallocation instead of managing its own pool.

Refactors `cuda_async_memory_resource` to have an instance of the above and create it with a `cudaMemPool_t` that it owns.

Authors:
  - https://github.com/fkallen

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Jake Hemstad (https://github.com/jrhemstad)
  - Rong Ou (https://github.com/rongou)

URL: #889
  • Loading branch information
fkallen authored Mar 23, 2022
1 parent 2715eea commit 220ba88
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 13 deletions.
3 changes: 3 additions & 0 deletions include/rmm/detail/dynamic_load_runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ struct async_alloc {

using cudaFreeAsync_sig = cudart_sig<void*, cudaStream_t>;
RMM_CUDART_API_WRAPPER(cudaFreeAsync, cudaFreeAsync_sig);

using cudaDeviceGetDefaultMemPool_sig = cudart_sig<cudaMemPool_t*, int>;
RMM_CUDART_API_WRAPPER(cudaDeviceGetDefaultMemPool, cudaDeviceGetDefaultMemPool_sig);
};
#endif

Expand Down
25 changes: 12 additions & 13 deletions include/rmm/mr/device/cuda_async_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/dynamic_load_runtime.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_view_memory_resource.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

#include <thrust/optional.h>
Expand Down Expand Up @@ -72,7 +73,9 @@ class cuda_async_memory_resource final : public device_memory_resource {
pool_props.handleTypes = cudaMemHandleTypePosixFileDescriptor;
pool_props.location.type = cudaMemLocationTypeDevice;
pool_props.location.id = rmm::detail::current_device().value();
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle_, &pool_props));
cudaMemPool_t cuda_pool_handle{};
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props));
pool_ = cuda_async_view_memory_resource{cuda_pool_handle};

// CUDA drivers before 11.5 have known incompatibilities with the async allocator.
// We'll disable `cudaMemPoolReuseAllowOpportunistic` if cuda driver < 11.5.
Expand All @@ -83,15 +86,15 @@ class cuda_async_memory_resource final : public device_memory_resource {
if (driver_version < min_async_version) {
int disabled{0};
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
cuda_pool_handle_, cudaMemPoolReuseAllowOpportunistic, &disabled));
pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled));
}

auto const [free, total] = rmm::detail::available_device_memory();

// Need an l-value to take address to pass to cudaMemPoolSetAttribute
uint64_t threshold = release_threshold.value_or(total);
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
cuda_pool_handle_, cudaMemPoolAttrReleaseThreshold, &threshold));
pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold));

// Allocate and immediately deallocate the initial_pool_size to prime the pool with the
// specified size
Expand All @@ -110,7 +113,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
* @brief Returns the underlying native handle to the CUDA pool
*
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); }
#endif

~cuda_async_memory_resource() override
Expand Down Expand Up @@ -141,7 +144,7 @@ class cuda_async_memory_resource final : public device_memory_resource {

private:
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
cudaMemPool_t cuda_pool_handle_{};
cuda_async_view_memory_resource pool_{};
#endif

/**
Expand All @@ -158,10 +161,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
{
void* ptr{nullptr};
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (bytes > 0) {
RMM_CUDA_TRY_ALLOC(rmm::detail::async_alloc::cudaMallocFromPoolAsync(
&ptr, bytes, pool_handle(), stream.value()));
}
ptr = pool_.allocate(bytes, stream);
#else
(void)bytes;
(void)stream;
Expand All @@ -176,14 +176,13 @@ class cuda_async_memory_resource final : public device_memory_resource {
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view stream) override
{
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (ptr != nullptr) {
RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaFreeAsync(ptr, stream.value()));
}
pool_.deallocate(ptr, size, stream);
#else
(void)ptr;
(void)size;
(void)stream;
#endif
}
Expand Down
177 changes: 177 additions & 0 deletions include/rmm/mr/device/cuda_async_view_memory_resource.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/dynamic_load_runtime.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

#include <thrust/optional.h>

#include <cuda_runtime_api.h>

#include <cstddef>
#include <limits>

#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync
#define RMM_CUDA_MALLOC_ASYNC_SUPPORT
#endif

namespace rmm::mr {

/**
* @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for
* allocation/deallocation.
*/
class cuda_async_view_memory_resource final : public device_memory_resource {
public:
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
/**
* @brief Constructs a cuda_async_view_memory_resource which uses an existing CUDA memory pool.
* The provided pool is not owned by cuda_async_view_memory_resource and must remain valid
* during the lifetime of the memory resource.
*
* @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync`
*
* @param valid_pool_handle Handle to a CUDA memory pool which will be used to
* serve allocation requests.
*/
cuda_async_view_memory_resource(cudaMemPool_t valid_pool_handle)
: cuda_pool_handle_{[valid_pool_handle]() {
RMM_EXPECTS(nullptr != valid_pool_handle, "Unexpected null pool handle.");
return valid_pool_handle;
}()}
{
// Check if cudaMallocAsync Memory pool supported
auto const device = rmm::detail::current_device();
int cuda_pool_supported{};
auto result =
cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());
RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported,
"cudaMallocAsync not supported with this CUDA driver/runtime version");
}
#endif

#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
/**
* @brief Returns the underlying native handle to the CUDA pool
*
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
#endif

cuda_async_view_memory_resource() = default;
cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) = default;
cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) = default;
cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource const&) = default;
cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) = default;

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation. `cuda_memory_resource` does not support streams.
*
* @returns bool true
*/
[[nodiscard]] bool supports_streams() const noexcept override { return true; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }

private:
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
cudaMemPool_t cuda_pool_handle_{};
#endif

/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
*
* The returned pointer has at least 256B alignment.
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
*
* @param bytes The size, in bytes, of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
{
void* ptr{nullptr};
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (bytes > 0) {
RMM_CUDA_TRY_ALLOC(rmm::detail::async_alloc::cudaMallocFromPoolAsync(
&ptr, bytes, pool_handle(), stream.value()));
}
#else
(void)bytes;
(void)stream;
#endif
return ptr;
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
{
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (ptr != nullptr) {
RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaFreeAsync(ptr, stream.value()));
}
#else
(void)ptr;
(void)stream;
#endif
}

/**
* @brief Compare this resource to another.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
return dynamic_cast<cuda_async_view_memory_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
return std::make_pair(0, 0);
}
};

} // namespace rmm::mr
1 change: 1 addition & 0 deletions tests/mr/device/cuda_async_mr_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include <rmm/cuda_device.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>

Expand Down
83 changes: 83 additions & 0 deletions tests/mr/device/cuda_async_view_mr_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <rmm/cuda_device.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_view_memory_resource.hpp>

#include <gtest/gtest.h>

namespace rmm::test {
namespace {

using cuda_async_view_mr = rmm::mr::cuda_async_view_memory_resource;

#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT)

TEST(PoolTest, UsePool)
{
cudaMemPool_t memPool{};
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaDeviceGetDefaultMemPool(
&memPool, rmm::detail::current_device().value()));

const auto pool_init_size{100};
cuda_async_view_mr mr{memPool};
void* ptr = mr.allocate(pool_init_size);
mr.deallocate(ptr, pool_init_size);
RMM_CUDA_TRY(cudaDeviceSynchronize());
}

TEST(PoolTest, NotTakingOwnershipOfPool)
{
cudaMemPoolProps poolProps = {};
poolProps.allocType = cudaMemAllocationTypePinned;
poolProps.location.id = rmm::detail::current_device().value();
poolProps.location.type = cudaMemLocationTypeDevice;

cudaMemPool_t memPool{};

RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&memPool, &poolProps));

{
const auto pool_init_size{100};
cuda_async_view_mr mr{memPool};
void* ptr = mr.allocate(pool_init_size);
mr.deallocate(ptr, pool_init_size);
RMM_CUDA_TRY(cudaDeviceSynchronize());
}

auto destroy_valid_pool = [&]() {
auto result = rmm::detail::async_alloc::cudaMemPoolDestroy(memPool);
RMM_EXPECTS(result == cudaSuccess, "Pool wrapper did destroy pool");
};

EXPECT_NO_THROW(destroy_valid_pool());
}

TEST(PoolTest, ThrowIfNullptrPool)
{
auto construct_mr = []() {
cudaMemPool_t memPool{nullptr};
cuda_async_view_mr mr{memPool};
};

EXPECT_THROW(construct_mr(), rmm::logic_error);
}

#endif

} // namespace
} // namespace rmm::test

0 comments on commit 220ba88

Please sign in to comment.