Allow construction of cuda_async_memory_resource from existing pool (#…

…889) Adds a new MR type `cuda_async_view_memory_resource` which has a constructor `cuda_async_view_memory_resource(cudaMemPool_t valid_pool_handle)` . The memory resource will use this pool for allocation and deallocation instead of managing its own pool. Refactors `cuda_async_memory_resource` to have an instance of the above and create it with a `cudaMemPool_t` that it owns. Authors: - https://github.com/fkallen Approvers: - Mark Harris (https://github.com/harrism) - Jake Hemstad (https://github.com/jrhemstad) - Rong Ou (https://github.com/rongou) URL: #889
rapidsai · Mar 23, 2022 · 220ba88 · 220ba88
1 parent 2715eea
commit 220ba88
Show file tree

Hide file tree

Showing 5 changed files with 276 additions and 13 deletions.
diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp
@@ -133,6 +133,9 @@ struct async_alloc {
 
   using cudaFreeAsync_sig = cudart_sig<void*, cudaStream_t>;
   RMM_CUDART_API_WRAPPER(cudaFreeAsync, cudaFreeAsync_sig);
+
+  using cudaDeviceGetDefaultMemPool_sig = cudart_sig<cudaMemPool_t*, int>;
+  RMM_CUDART_API_WRAPPER(cudaDeviceGetDefaultMemPool, cudaDeviceGetDefaultMemPool_sig);
 };
 #endif
 

diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -20,6 +20,7 @@
 #include <rmm/detail/cuda_util.hpp>
 #include <rmm/detail/dynamic_load_runtime.hpp>
 #include <rmm/detail/error.hpp>
+#include <rmm/mr/device/cuda_async_view_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <thrust/optional.h>
@@ -72,7 +73,9 @@ class cuda_async_memory_resource final : public device_memory_resource {
     pool_props.handleTypes   = cudaMemHandleTypePosixFileDescriptor;
     pool_props.location.type = cudaMemLocationTypeDevice;
     pool_props.location.id   = rmm::detail::current_device().value();
-    RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle_, &pool_props));
+    cudaMemPool_t cuda_pool_handle{};
+    RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props));
+    pool_ = cuda_async_view_memory_resource{cuda_pool_handle};
 
     // CUDA drivers before 11.5 have known incompatibilities with the async allocator.
     // We'll disable `cudaMemPoolReuseAllowOpportunistic` if cuda driver < 11.5.
@@ -83,15 +86,15 @@ class cuda_async_memory_resource final : public device_memory_resource {
     if (driver_version < min_async_version) {
       int disabled{0};
       RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
-        cuda_pool_handle_, cudaMemPoolReuseAllowOpportunistic, &disabled));
+        pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled));
     }
 
     auto const [free, total] = rmm::detail::available_device_memory();
 
     // Need an l-value to take address to pass to cudaMemPoolSetAttribute
     uint64_t threshold = release_threshold.value_or(total);
     RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
-      cuda_pool_handle_, cudaMemPoolAttrReleaseThreshold, &threshold));
+      pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold));
 
     // Allocate and immediately deallocate the initial_pool_size to prime the pool with the
     // specified size
@@ -110,7 +113,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
    * @brief Returns the underlying native handle to the CUDA pool
    *
    */
-  [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
+  [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); }
 #endif
 
   ~cuda_async_memory_resource() override
@@ -141,7 +144,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
 
  private:
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-  cudaMemPool_t cuda_pool_handle_{};
+  cuda_async_view_memory_resource pool_{};
 #endif
 
   /**
@@ -158,10 +161,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
   {
     void* ptr{nullptr};
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-    if (bytes > 0) {
-      RMM_CUDA_TRY_ALLOC(rmm::detail::async_alloc::cudaMallocFromPoolAsync(
-        &ptr, bytes, pool_handle(), stream.value()));
-    }
+    ptr = pool_.allocate(bytes, stream);
 #else
     (void)bytes;
     (void)stream;
@@ -176,14 +176,13 @@ class cuda_async_memory_resource final : public device_memory_resource {
    *
    * @param p Pointer to be deallocated
    */
-  void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view stream) override
   {
 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
-    if (ptr != nullptr) {
-      RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaFreeAsync(ptr, stream.value()));
-    }
+    pool_.deallocate(ptr, size, stream);
 #else
     (void)ptr;
+    (void)size;
     (void)stream;
 #endif
   }

diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/detail/cuda_util.hpp>
+#include <rmm/detail/dynamic_load_runtime.hpp>
+#include <rmm/detail/error.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <thrust/optional.h>
+
+#include <cuda_runtime_api.h>
+
+#include <cstddef>
+#include <limits>
+
+#if CUDART_VERSION >= 11020  // 11.2 introduced cudaMallocAsync
+#define RMM_CUDA_MALLOC_ASYNC_SUPPORT
+#endif
+
+namespace rmm::mr {
+
+/**
+ * @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for
+ * allocation/deallocation.
+ */
+class cuda_async_view_memory_resource final : public device_memory_resource {
+ public:
+#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
+  /**
+   * @brief Constructs a cuda_async_view_memory_resource which uses an existing CUDA memory pool.
+   * The provided pool is not owned by cuda_async_view_memory_resource and must remain valid
+   * during the lifetime of the memory resource.
+   *
+   * @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync`
+   *
+   * @param valid_pool_handle Handle to a CUDA memory pool which will be used to
+   * serve allocation requests.
+   */
+  cuda_async_view_memory_resource(cudaMemPool_t valid_pool_handle)
+    : cuda_pool_handle_{[valid_pool_handle]() {
+        RMM_EXPECTS(nullptr != valid_pool_handle, "Unexpected null pool handle.");
+        return valid_pool_handle;
+      }()}
+  {
+    // Check if cudaMallocAsync Memory pool supported
+    auto const device = rmm::detail::current_device();
+    int cuda_pool_supported{};
+    auto result =
+      cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());
+    RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported,
+                "cudaMallocAsync not supported with this CUDA driver/runtime version");
+  }
+#endif
+
+#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
+  /**
+   * @brief Returns the underlying native handle to the CUDA pool
+   *
+   */
+  [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
+#endif
+
+  cuda_async_view_memory_resource()                                       = default;
+  cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) = default;
+  cuda_async_view_memory_resource(cuda_async_view_memory_resource&&)      = default;
+  cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource const&) = default;
+  cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) = default;
+
+  /**
+   * @brief Query whether the resource supports use of non-null CUDA streams for
+   * allocation/deallocation. `cuda_memory_resource` does not support streams.
+   *
+   * @returns bool true
+   */
+  [[nodiscard]] bool supports_streams() const noexcept override { return true; }
+
+  /**
+   * @brief Query whether the resource supports the get_mem_info API.
+   *
+   * @return true
+   */
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
+
+ private:
+#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
+  cudaMemPool_t cuda_pool_handle_{};
+#endif
+
+  /**
+   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
+  {
+    void* ptr{nullptr};
+#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
+    if (bytes > 0) {
+      RMM_CUDA_TRY_ALLOC(rmm::detail::async_alloc::cudaMallocFromPoolAsync(
+        &ptr, bytes, pool_handle(), stream.value()));
+    }
+#else
+    (void)bytes;
+    (void)stream;
+#endif
+    return ptr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p p.
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   */
+  void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
+  {
+#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
+    if (ptr != nullptr) {
+      RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaFreeAsync(ptr, stream.value()));
+    }
+#else
+    (void)ptr;
+    (void)stream;
+#endif
+  }
+
+  /**
+   * @brief Compare this resource to another.
+   *
+   * @throws Nothing.
+   *
+   * @param other The other resource to compare to
+   * @return true If the two resources are equivalent
+   * @return false If the two resources are not equal
+   */
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    return dynamic_cast<cuda_async_view_memory_resource const*>(&other) != nullptr;
+  }
+
+  /**
+   * @brief Get free and available memory for memory resource
+   *
+   * @throws `rmm::cuda_error` if unable to retrieve memory info.
+   *
+   * @return std::pair contaiing free_size and total_size of memory
+   */
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    rmm::cuda_stream_view) const override
+  {
+    return std::make_pair(0, 0);
+  }
+};
+
+}  // namespace rmm::mr
diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/cuda_async_memory_resource.hpp>
 

diff --git a/tests/mr/device/cuda_async_view_mr_tests.cpp b/tests/mr/device/cuda_async_view_mr_tests.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/detail/error.hpp>
+#include <rmm/mr/device/cuda_async_view_memory_resource.hpp>
+
+#include <gtest/gtest.h>
+
+namespace rmm::test {
+namespace {
+
+using cuda_async_view_mr = rmm::mr::cuda_async_view_memory_resource;
+
+#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT)
+
+TEST(PoolTest, UsePool)
+{
+  cudaMemPool_t memPool{};
+  RMM_CUDA_TRY(rmm::detail::async_alloc::cudaDeviceGetDefaultMemPool(
+    &memPool, rmm::detail::current_device().value()));
+
+  const auto pool_init_size{100};
+  cuda_async_view_mr mr{memPool};
+  void* ptr = mr.allocate(pool_init_size);
+  mr.deallocate(ptr, pool_init_size);
+  RMM_CUDA_TRY(cudaDeviceSynchronize());
+}
+
+TEST(PoolTest, NotTakingOwnershipOfPool)
+{
+  cudaMemPoolProps poolProps = {};
+  poolProps.allocType        = cudaMemAllocationTypePinned;
+  poolProps.location.id      = rmm::detail::current_device().value();
+  poolProps.location.type    = cudaMemLocationTypeDevice;
+
+  cudaMemPool_t memPool{};
+
+  RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&memPool, &poolProps));
+
+  {
+    const auto pool_init_size{100};
+    cuda_async_view_mr mr{memPool};
+    void* ptr = mr.allocate(pool_init_size);
+    mr.deallocate(ptr, pool_init_size);
+    RMM_CUDA_TRY(cudaDeviceSynchronize());
+  }
+
+  auto destroy_valid_pool = [&]() {
+    auto result = rmm::detail::async_alloc::cudaMemPoolDestroy(memPool);
+    RMM_EXPECTS(result == cudaSuccess, "Pool wrapper did destroy pool");
+  };
+
+  EXPECT_NO_THROW(destroy_valid_pool());
+}
+
+TEST(PoolTest, ThrowIfNullptrPool)
+{
+  auto construct_mr = []() {
+    cudaMemPool_t memPool{nullptr};
+    cuda_async_view_mr mr{memPool};
+  };
+
+  EXPECT_THROW(construct_mr(), rmm::logic_error);
+}
+
+#endif
+
+}  // namespace
+}  // namespace rmm::test