From 5d8d39b6614799f8da9bcd0df38af76a10f87726 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 15:00:53 +1000 Subject: [PATCH 01/21] Add [[deprecated]] to cnmem resources --- include/rmm/mr/device/cnmem_managed_memory_resource.hpp | 9 +++++---- include/rmm/mr/device/cnmem_memory_resource.hpp | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/rmm/mr/device/cnmem_managed_memory_resource.hpp b/include/rmm/mr/device/cnmem_managed_memory_resource.hpp index 9d6120934..cd91f7c8f 100644 --- a/include/rmm/mr/device/cnmem_managed_memory_resource.hpp +++ b/include/rmm/mr/device/cnmem_managed_memory_resource.hpp @@ -23,8 +23,9 @@ namespace rmm { namespace mr { /** * @brief Memory resource that allocates/deallocates managed device memory - (CUDA Unified Memory) using the cnmem pool sub-allocator. - * the cnmem pool sub-allocator for allocation/deallocation. + * (CUDA Unified Memory) using the cnmem pool sub-allocator. + * + * @note This class is deprecated as of RMM 0.15. Use pool_memory_resource. */ class cnmem_managed_memory_resource final : public cnmem_memory_resource { public: @@ -37,8 +38,8 @@ class cnmem_managed_memory_resource final : public cnmem_memory_resource { * @param initial_pool_size Size, in bytes, of the intial pool size. When * zero, an implementation defined pool size is used. */ - explicit cnmem_managed_memory_resource(std::size_t initial_pool_size = 0, - std::vector const& devices = {}) + [[deprecated]] explicit cnmem_managed_memory_resource(std::size_t initial_pool_size = 0, + std::vector const& devices = {}) : cnmem_memory_resource(initial_pool_size, devices, memory_kind::MANAGED) { } diff --git a/include/rmm/mr/device/cnmem_memory_resource.hpp b/include/rmm/mr/device/cnmem_memory_resource.hpp index da39756df..953d0fed8 100644 --- a/include/rmm/mr/device/cnmem_memory_resource.hpp +++ b/include/rmm/mr/device/cnmem_memory_resource.hpp @@ -57,6 +57,8 @@ namespace mr { /** * @brief Memory resource that allocates/deallocates using the cnmem pool * sub-allocator. + * + * @note This class is deprecated as of RMM 0.15. Use pool_memory_resource. */ class cnmem_memory_resource : public device_memory_resource { public: @@ -71,8 +73,8 @@ class cnmem_memory_resource : public device_memory_resource { * zero, an implementation defined pool size is used. * @param devices List of GPU device IDs to register with CNMEM */ - explicit cnmem_memory_resource(std::size_t initial_pool_size = 0, - std::vector const& devices = {}) + [[deprecated]] explicit cnmem_memory_resource(std::size_t initial_pool_size = 0, + std::vector const& devices = {}) : cnmem_memory_resource(initial_pool_size, devices, memory_kind::CUDA) { } From af97cbc7ccdcfa81b529a91433fc46d220e9ff08 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 15:01:09 +1000 Subject: [PATCH 02/21] Deprecate cnmem in readme --- README.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index de54f85a3..0f9ae72dc 100644 --- a/README.md +++ b/README.md @@ -164,15 +164,16 @@ Allocates and frees device memory using `cudaMalloc` and `cudaFree`. Allocates and frees device memory using `cudaMallocManaged` and `cudaFree`. -#### `cnmem_(managed_)memory_resource` - -Uses the [CNMeM](https://github.com/NVIDIA/cnmem) pool sub-allocator to satisfy (de)allocations. - #### `pool_memory_resource` A coalescing, best-fit pool sub-allocator. -### `fixed_size_memory_resource` +#### `cnmem_(managed_)memory_resource` [DEPRECATED] + +Uses the [CNMeM](https://github.com/NVIDIA/cnmem) pool sub-allocator to satisfy (de)allocations. +These resources are deprecated as of RMM 0.15. + +#### `fixed_size_memory_resource` A memory resource that can only allocate a single fixed size. Average allocation and deallocation cost is constant. @@ -208,7 +209,8 @@ Accessing and modifying the default resource is done through two functions: ```c++ rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(); // Points to `cuda_memory_resource` -rmm::mr::cnmem_memory_resource pool_mr{}; // Construct a resource that uses the CNMeM pool +// Construct a resource that uses a coalescing best-fit pool allocator +rmm::mr::pool_memory_resource> pool_mr{mr}; rmm::mr::set_default_resource(&pool_mr); // Updates the default resource pointer to `pool_mr` rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(); // Points to `pool_mr` ``` From a5861d00b891ce9ad22ae7f4d16817c0a8db003f Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 15:14:45 +1000 Subject: [PATCH 03/21] Replace cnmem with pool in device_uvector_bench. --- .../device_uvector/device_uvector_bench.cu | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index b10f9dfc4..ccdce0a14 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -19,18 +19,22 @@ #include #include #include -#include #include +#include +#include "rmm/mr/device/cuda_memory_resource.hpp" static void BM_UvectorSizeConstruction(benchmark::State& state) { - rmm::mr::cnmem_memory_resource mr{}; + rmm::mr::cuda_memory_resource cuda_mr{}; + rmm::mr::pool_memory_resource mr{&cuda_mr}; rmm::mr::set_default_resource(&mr); for (auto _ : state) { - rmm::device_uvector(state.range(0), cudaStream_t{0}); + rmm::device_uvector vec(state.range(0), cudaStream_t{0}); cudaDeviceSynchronize(); } + + rmm::mr::set_default_resource(nullptr); } BENCHMARK(BM_UvectorSizeConstruction) ->RangeMultiplier(10) @@ -39,12 +43,16 @@ BENCHMARK(BM_UvectorSizeConstruction) static void BM_ThrustVectorSizeConstruction(benchmark::State& state) { - rmm::mr::cnmem_memory_resource mr{}; + rmm::mr::cuda_memory_resource cuda_mr{}; + rmm::mr::pool_memory_resource mr{&cuda_mr}; rmm::mr::set_default_resource(&mr); + for (auto _ : state) { - rmm::device_vector(state.range(0)); + rmm::device_vector vec(state.range(0)); cudaDeviceSynchronize(); } + + rmm::mr::set_default_resource(nullptr); } BENCHMARK(BM_ThrustVectorSizeConstruction) From 4c29c2bbe73929337775b0d77688dff584c379fb Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 15:20:46 +1000 Subject: [PATCH 04/21] Add items/sec to uvector benchmark output --- benchmarks/device_uvector/device_uvector_bench.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index ccdce0a14..7e018a7c1 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -34,6 +34,8 @@ static void BM_UvectorSizeConstruction(benchmark::State& state) cudaDeviceSynchronize(); } + state.SetItemsProcessed(state.iterations()); + rmm::mr::set_default_resource(nullptr); } BENCHMARK(BM_UvectorSizeConstruction) @@ -52,6 +54,8 @@ static void BM_ThrustVectorSizeConstruction(benchmark::State& state) cudaDeviceSynchronize(); } + state.SetItemsProcessed(state.iterations()); + rmm::mr::set_default_resource(nullptr); } From 819c974f94bb8e3816304eac63ae5b989e470743 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 15:21:25 +1000 Subject: [PATCH 05/21] Remove cnmem from random_allocations benchmark --- .../random_allocations/random_allocations.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 469a3fae7..13ebfb9e1 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -158,8 +157,6 @@ void uniform_random_allocations(rmm::mr::device_memory_resource& mr, /// MR factory functions inline auto make_cuda() { return std::make_shared(); } -inline auto make_cnmem() { return std::make_shared(); } - inline auto make_pool() { return rmm::mr::make_owning_wrapper(make_cuda()); @@ -238,8 +235,6 @@ void declare_benchmark(std::string name) BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning)->Apply(benchmark_range); else if (name == "pool") BENCHMARK_CAPTURE(BM_RandomAllocations, pool_mr, &make_pool)->Apply(benchmark_range); - else if (name == "cnmem") - BENCHMARK_CAPTURE(BM_RandomAllocations, cnmem_mr, &make_cnmem)->Apply(benchmark_range); else std::cout << "Error: invalid memory_resource name: " << name << "\n"; } @@ -284,10 +279,8 @@ int main(int argc, char** argv) max_size = args["maxsize"].as(); if (args.count("profile") > 0) { - std::map const funcs({{"binning", &make_binning}, - {"cnmem", &make_cnmem}, - {"cuda", &make_cuda}, - {"pool", &make_pool}}); + std::map const funcs( + {{"binning", &make_binning}, {"cuda", &make_cuda}, {"pool", &make_pool}}); auto resource = args["resource"].as(); num_allocations = num_allocations > 0 ? num_allocations : 1000; @@ -304,7 +297,7 @@ int main(int argc, char** argv) std::string mr_name = args["resource"].as(); declare_benchmark(mr_name); } else { - std::array mrs{"pool", "binning", "cnmem", "cuda"}; + std::array mrs{"pool", "binning", "cuda"}; std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); }); } ::benchmark::RunSpecifiedBenchmarks(); From bbe906806ff2163e3441d433bc4e4c40718ccf7a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 15:30:44 +1000 Subject: [PATCH 06/21] Remove cnmem and add pool and binning to replay benchmark --- benchmarks/replay/replay.cpp | 45 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index de9e42769..9c0e14540 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -18,8 +18,10 @@ #include #include -#include +#include #include +#include +#include #include #include @@ -32,6 +34,27 @@ #include #include #include +#include "rmm/mr/device/device_memory_resource.hpp" + +/// MR factory functions +inline auto make_cuda() { return std::make_shared(); } + +inline auto make_pool() +{ + return rmm::mr::make_owning_wrapper(make_cuda()); +} + +inline auto make_binning() +{ + auto pool = make_pool(); + auto mr = rmm::mr::make_owning_wrapper(pool); + for (std::size_t i = 18; i <= 22; i++) { + mr->wrapped().add_bin(1 << i); + } + return mr; +} + +using MRFactoryFunc = std::function()>; /** * @brief Represents an allocation made during the replay @@ -51,9 +74,8 @@ struct allocation { * @tparam MR The type of the `device_memory_resource` to use for allocation * replay */ -template struct replay_benchmark { - std::unique_ptr mr_{}; + std::shared_ptr mr_{}; std::vector> const& events_{}; /** @@ -63,9 +85,9 @@ struct replay_benchmark { * @param events The set of allocation events to replay * @param args Variable number of arguments forward to the constructor of MR */ - template - replay_benchmark(std::vector> const& events, Args&&... args) - : mr_{new MR{std::forward(args)...}}, events_{events} + replay_benchmark(MRFactoryFunc factory, + std::vector> const& events) + : mr_{factory()}, events_{events} { } @@ -193,13 +215,16 @@ int main(int argc, char** argv) auto const num_threads = per_thread_events.size(); - benchmark::RegisterBenchmark("CUDA Resource", - replay_benchmark{per_thread_events}) + benchmark::RegisterBenchmark("CUDA Resource", replay_benchmark{&make_cuda, per_thread_events}) + ->Unit(benchmark::kMillisecond) + ->Threads(num_threads); + + benchmark::RegisterBenchmark("Pool Resource", replay_benchmark(&make_pool, per_thread_events)) ->Unit(benchmark::kMillisecond) ->Threads(num_threads); - benchmark::RegisterBenchmark( - "CNMEM Resource", replay_benchmark(per_thread_events, 0u)) + benchmark::RegisterBenchmark("Binning Resource", + replay_benchmark(&make_binning, per_thread_events)) ->Unit(benchmark::kMillisecond) ->Threads(num_threads); From a3e9731b50a4913d188ef003377f8c4f72a7b38a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 15:43:34 +1000 Subject: [PATCH 07/21] Remove cnmem from tests --- tests/device_buffer_tests.cu | 8 ++------ tests/mr/device/mr_multithreaded_tests.cpp | 16 +--------------- tests/mr/device/mr_test.hpp | 9 --------- tests/mr/device/mr_tests.cpp | 2 -- tests/mr/device/thrust_allocator_tests.cu | 10 ---------- 5 files changed, 3 insertions(+), 42 deletions(-) diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index 337e0d400..1839e57b0 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -18,12 +18,11 @@ #include #include -#include -#include #include #include #include #include +#include #include #include @@ -51,10 +50,7 @@ struct DeviceBufferTest : public ::testing::Test { void TearDown() override { EXPECT_EQ(cudaSuccess, cudaStreamDestroy(stream)); }; }; -using resources = ::testing::Types; +using resources = ::testing::Types; TYPED_TEST_CASE(DeviceBufferTest, resources); diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index ac57b8895..0c5a90511 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -39,8 +39,6 @@ INSTANTIATE_TEST_CASE_P(MultiThreadResourceTests, ::testing::Values(mr_factory{"CUDA", &make_cuda}, mr_factory{"Managed", &make_managed}, mr_factory{"Pool", &make_pool}, - mr_factory{"CNMEM", &make_cnmem}, - mr_factory{"CNMEM_Managed", &make_cnmem_managed}, mr_factory{"Binning", &make_binning}), [](auto const& info) { return info.param.name; }); @@ -234,10 +232,7 @@ TEST_P(mr_test_mt, AllocFreeDifferentThreadsSameStream) test_allocate_free_different_threads(this->mr.get(), this->stream, this->stream); } -struct mr_test_different_stream_mt : public mr_test_mt { -}; - -TEST_P(mr_test_different_stream_mt, AllocFreeDifferentThreadsDifferentStream) +TEST_P(mr_test_mt, AllocFreeDifferentThreadsDifferentStream) { cudaStream_t streamB{}; EXPECT_EQ(cudaSuccess, cudaStreamCreate(&streamB)); @@ -246,15 +241,6 @@ TEST_P(mr_test_different_stream_mt, AllocFreeDifferentThreadsDifferentStream) EXPECT_EQ(cudaSuccess, cudaStreamDestroy(streamB)); } -// CNMeM doesn't allow allocating/freeing on different streams -INSTANTIATE_TEST_CASE_P(MultiThreadResourceTestsDifferentStreams, - mr_test_different_stream_mt, - ::testing::Values(mr_factory{"CUDA", &make_cuda}, - mr_factory{"Managed", &make_managed}, - mr_factory{"Pool", &make_pool}, - mr_factory{"Binning", &make_binning}), - [](auto const& info) { return info.param.name; }); - } // namespace } // namespace test } // namespace rmm diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index e63aba31b..76048363b 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -19,8 +19,6 @@ #include #include -#include -#include #include #include #include @@ -228,13 +226,6 @@ inline auto make_cuda() { return std::make_shared inline auto make_managed() { return std::make_shared(); } -inline auto make_cnmem() { return std::make_shared(); } - -inline auto make_cnmem_managed() -{ - return std::make_shared(); -} - inline auto make_pool() { return rmm::mr::make_owning_wrapper(make_cuda()); diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index 650260cb7..c47fd5b05 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -27,8 +27,6 @@ INSTANTIATE_TEST_CASE_P(ResourceTests, mr_test, ::testing::Values(mr_factory{"CUDA", &make_cuda}, mr_factory{"Managed", &make_managed}, - mr_factory{"CNMEM", &make_cnmem}, - mr_factory{"CNMEM_Managed", &make_cnmem_managed}, mr_factory{"Pool", &make_pool}, mr_factory{"Binning", &make_binning}), [](auto const& info) { return info.param.name; }); diff --git a/tests/mr/device/thrust_allocator_tests.cu b/tests/mr/device/thrust_allocator_tests.cu index 6824b2701..ec72fba72 100644 --- a/tests/mr/device/thrust_allocator_tests.cu +++ b/tests/mr/device/thrust_allocator_tests.cu @@ -16,14 +16,6 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include "mr_test.hpp" @@ -44,8 +36,6 @@ INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests, allocator_test, ::testing::Values(mr_factory{"CUDA", &make_cuda}, mr_factory{"Managed", &make_managed}, - mr_factory{"CNMEM", &make_cnmem}, - mr_factory{"CNMEM_Managed", &make_cnmem_managed}, mr_factory{"Pool", &make_pool}, mr_factory{"Binning", &make_binning}), [](auto const& info) { return info.param.name; }); From 24d901fb05d6039d229f09d02ab3c06e0ca9899f Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 16:01:31 +1000 Subject: [PATCH 08/21] Change Python to use pool_memory_resource instead of cnmem --- python/rmm/_lib/memory_resource.pxd | 22 ------ python/rmm/_lib/memory_resource.pyx | 73 +++----------------- python/rmm/_lib/memory_resource_wrappers.hpp | 30 -------- python/rmm/mr.py | 4 -- python/rmm/rmm.py | 4 -- 5 files changed, 9 insertions(+), 124 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index fc05242ed..2bb930bdd 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -17,22 +17,6 @@ cdef extern from "memory_resource_wrappers.hpp" nogil: ): managed_memory_resource_wrapper() except + - cdef cppclass cnmem_memory_resource_wrapper( - device_memory_resource_wrapper - ): - cnmem_memory_resource_wrapper( - size_t initial_pool_size, - vector[int] devices - ) except + - - cdef cppclass cnmem_managed_memory_resource_wrapper( - device_memory_resource_wrapper - ): - cnmem_managed_memory_resource_wrapper( - size_t initial_pool_size, - vector[int] devices - ) except + - cdef cppclass pool_memory_resource_wrapper(device_memory_resource_wrapper): pool_memory_resource_wrapper( shared_ptr[device_memory_resource_wrapper] upstream_mr, @@ -93,12 +77,6 @@ cdef class CudaMemoryResource(MemoryResource): cdef class ManagedMemoryResource(MemoryResource): pass -cdef class CNMemMemoryResource(MemoryResource): - pass - -cdef class CNMemManagedMemoryResource(MemoryResource): - pass - cdef class PoolMemoryResource(MemoryResource): pass diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 6b4e72903..82225aad5 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -34,53 +34,6 @@ cdef class ManagedMemoryResource(MemoryResource): pass -cdef class CNMemMemoryResource(MemoryResource): - def __cinit__(self, size_t initial_pool_size=0, vector[int] devices=()): - self.c_obj.reset( - new cnmem_memory_resource_wrapper( - initial_pool_size, - devices - ) - ) - - def __init__(self, size_t initial_pool_size=0, vector[int] devices=()): - """ - Memory resource that uses the cnmem pool sub-allocator. - - Parameters - ---------- - initial_pool_size : int, optional - Initial pool size in bytes. By default, an implementation defined - pool size is used. - devices : tuple of int, optional - List of GPU device IDs to register with CNMEM. - """ - pass - - -cdef class CNMemManagedMemoryResource(MemoryResource): - def __cinit__(self, size_t initial_pool_size=0, vector[int] devices=()): - self.c_obj.reset( - new cnmem_managed_memory_resource_wrapper( - initial_pool_size, - devices - ) - ) - - def __init__(self, size_t initial_pool_size=0, vector[int] devices=()): - """ - Memory resource that uses the cnmem pool sub-allocator for - allocating/deallocating managed device memory. - - Parameters - ---------- - initial_pool_size : int, optional - Initial pool size in bytes. By default, an implementation defined - pool size is used. - devices : list of int - List of GPU device IDs to register with CNMEM. - """ - pass cdef class PoolMemoryResource(MemoryResource): @@ -271,7 +224,6 @@ cpdef _initialize( bool pool_allocator=False, bool managed_memory=False, object initial_pool_size=None, - object devices=0, bool logging=False, object log_file_name=None, ): @@ -281,27 +233,20 @@ cpdef _initialize( global _mr _mr = MemoryResource() - if not pool_allocator: - if not managed_memory: - typ = CudaMemoryResource - else: - typ = ManagedMemoryResource - args = () + if managed_memory: + upstream = ManagedMemoryResource else: - if not managed_memory: - typ = CNMemMemoryResource - else: - typ = CNMemManagedMemoryResource + upstream = CudaMemoryResource + if pool_allocator: if initial_pool_size is None: initial_pool_size = 0 - if devices is None: - devices = [0] - elif isinstance(devices, int): - devices = [devices] - - args = (initial_pool_size, devices) + typ = PoolMemoryResource + args = (upstream(), initial_pool_size) + else: + typ = upstream + args = () cdef MemoryResource mr diff --git a/python/rmm/_lib/memory_resource_wrappers.hpp b/python/rmm/_lib/memory_resource_wrappers.hpp index c4f20b3cb..290ebcf55 100644 --- a/python/rmm/_lib/memory_resource_wrappers.hpp +++ b/python/rmm/_lib/memory_resource_wrappers.hpp @@ -2,8 +2,6 @@ #include #include -#include -#include #include #include #include @@ -47,34 +45,6 @@ class managed_memory_resource_wrapper : public device_memory_resource_wrapper { std::shared_ptr mr; }; -class cnmem_memory_resource_wrapper : public device_memory_resource_wrapper { - public: - cnmem_memory_resource_wrapper(std::size_t initial_pool_size = 0, - std::vector const& devices = {}) - : mr(std::make_shared(initial_pool_size, devices)) - { - } - - std::shared_ptr get_mr() { return mr; } - - private: - std::shared_ptr mr; -}; - -class cnmem_managed_memory_resource_wrapper : public device_memory_resource_wrapper { - public: - cnmem_managed_memory_resource_wrapper(std::size_t initial_pool_size = 0, - std::vector const& devices = {}) - : mr(std::make_shared(initial_pool_size, devices)) - { - } - - std::shared_ptr get_mr() { return mr; } - - private: - std::shared_ptr mr; -}; - class pool_memory_resource_wrapper : public device_memory_resource_wrapper { public: pool_memory_resource_wrapper(std::shared_ptr upstream_mr, diff --git a/python/rmm/mr.py b/python/rmm/mr.py index 39c655f1c..c019e043a 100644 --- a/python/rmm/mr.py +++ b/python/rmm/mr.py @@ -1,8 +1,6 @@ # Copyright (c) 2020, NVIDIA CORPORATION. from rmm._lib.memory_resource import ( BinningMemoryResource, - CNMemManagedMemoryResource, - CNMemMemoryResource, CudaMemoryResource, FixedSizeMemoryResource, LoggingResourceAdaptor, @@ -18,8 +16,6 @@ __all__ = [ "BinningMemoryResource", - "CNMemManagedMemoryResource", - "CNMemMemoryResource", "CudaMemoryResource", "FixedSizeMemoryResource", "LoggingResourceAdaptor", diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py index 3ca216c65..ef1ef4aca 100644 --- a/python/rmm/rmm.py +++ b/python/rmm/rmm.py @@ -32,7 +32,6 @@ def reinitialize( pool_allocator=False, managed_memory=False, initial_pool_size=None, - devices=0, logging=False, log_file_name=None, ): @@ -52,8 +51,6 @@ def reinitialize( When `pool_allocator` is True, this indicates the initial pool size in bytes. None is used to indicate the default size of the underlying memorypool implementation, which currently is 1/2 total GPU memory. - devices : int or List[int], default 0 - GPU device IDs to register. By default registers only GPU 0. logging : bool, default False If True, enable run-time logging of all memory events (alloc, free, realloc). @@ -66,7 +63,6 @@ def reinitialize( pool_allocator=pool_allocator, managed_memory=managed_memory, initial_pool_size=initial_pool_size, - devices=devices, logging=logging, log_file_name=log_file_name, ) From 6567062da3e025b993b78045cfd9bdef732346cc Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 16:01:37 +1000 Subject: [PATCH 09/21] Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0530d13e7..92c990c77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ - PR #457 New `binning_memory_resource` (replaces `hybrid_memory_resource` and `fixed_multisize_memory_resource`). - PR #458 Add `get/set_per_device_resource` to better support multi-GPU per process applications +- PR #466 Deprecate CNMeM. ## Improvements From 127d9776daeca59a5b5e170abf14022417869f5c Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 6 Aug 2020 16:47:50 +1000 Subject: [PATCH 10/21] Python style --- python/rmm/_lib/memory_resource.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 82225aad5..f221d368d 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -34,8 +34,6 @@ cdef class ManagedMemoryResource(MemoryResource): pass - - cdef class PoolMemoryResource(MemoryResource): def __cinit__( From 67846bba74d71909c636e5220ece7107284eb17f Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Fri, 7 Aug 2020 11:05:58 +1000 Subject: [PATCH 11/21] Deprecate get/set_default_resource and update docs --- README.md | 22 ++++++++++++------- .../rmm/mr/device/default_memory_resource.hpp | 14 ++++++++++-- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0f9ae72dc..309067040 100644 --- a/README.md +++ b/README.md @@ -194,16 +194,22 @@ To enable this use case, RMM provides the concept of a "default" `device_memory_ resource is used when another is not explicitly provided. Accessing and modifying the default resource is done through two functions: -- `device_memory_resource* get_default_resource()` - - Returns a pointer to the current default resource - - The initial default memory resource is an instance of `cuda_memory_resource` - - This function is thread safe - -- `device_memory_resource* set_default_resource(device_memory_resource* new_resource)` - - Updates the default memory resource pointer to `new_resource` +- `device_memory_resource* get_current_device_resource()` + - Returns a pointer to the default resource for the current CUDA device. + - The initial default memory resource is an instance of `cuda_memory_resource`. + - This function is thread safe with respect to concurrent calls to it and + `set_current_device_resource()`. + - For more explicit control, you can use `get_per_device_resource()`, which takes a device ID. + - Replaces the deprecated `get_default_resource()` + +- `device_memory_resource* set_current_device_resource(device_memory_resource* new_mr)` + - Updates the default memory resource pointer for the current CUDA device to `new_resource` - Returns the previous default resource pointer - If `new_resource` is `nullptr`, then resets the default resource to `cuda_memory_resource` - - This function is thread safe + - This function is thread safe with respect to concurrent calls to it and + `get_current_device_resource()` + - For more explicit control, you can use `set_per_device_resource()`, which takes a device ID. + - Replaces the deprecated `set_default_resource()` #### Example diff --git a/include/rmm/mr/device/default_memory_resource.hpp b/include/rmm/mr/device/default_memory_resource.hpp index 98d5f9592..e603b821c 100644 --- a/include/rmm/mr/device/default_memory_resource.hpp +++ b/include/rmm/mr/device/default_memory_resource.hpp @@ -47,6 +47,9 @@ inline std::atomic& get_default() /** * @brief Get the default device memory resource pointer. * + * Deprecated as of RMM v0.15. Please use get_current_device_resource() or + * get_per_device_resource(). + * * The default device memory resource is used when an explicit memory resource * is not supplied. The initial default memory resource is a * `cuda_memory_resource`. @@ -56,11 +59,17 @@ inline std::atomic& get_default() * @return device_memory_resource* Pointer to the current default memory * resource */ -inline device_memory_resource* get_default_resource() { return detail::get_default().load(); } +[[deprecated]] inline device_memory_resource* get_default_resource() +{ + return detail::get_default().load(); +} /** * @brief Sets the default device memory resource pointer. * + * Deprecated as of RMM v0.15. Please use set_current_device_resource() or + * set_per_device_resource(). + * * If `new_resource` is not `nullptr`, sets the default device memory resource * pointer to `new_resource`. Otherwise, resets the default device memory * resource to the initial `cuda_memory_resource`. @@ -74,7 +83,8 @@ inline device_memory_resource* get_default_resource() { return detail::get_defau * default device memory resource * @return The previous value of the default device memory resource pointer */ -inline device_memory_resource* set_default_resource(device_memory_resource* new_resource) +[[deprecated]] inline device_memory_resource* set_default_resource( + device_memory_resource* new_resource) { new_resource = (new_resource == nullptr) ? detail::initial_resource() : new_resource; return detail::get_default().exchange(new_resource); From 31d238de9843145fe43a2f72efb678a2882ab067 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 11 Aug 2020 15:15:58 +1000 Subject: [PATCH 12/21] Make default_memory_resource a wrapper around per_device_memory_resource --- .../rmm/mr/device/default_memory_resource.hpp | 30 ++----------------- include/rmm/mr/device/per_device_resource.hpp | 18 +++++++++-- 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/include/rmm/mr/device/default_memory_resource.hpp b/include/rmm/mr/device/default_memory_resource.hpp index e603b821c..1a736d7d4 100644 --- a/include/rmm/mr/device/default_memory_resource.hpp +++ b/include/rmm/mr/device/default_memory_resource.hpp @@ -16,33 +16,10 @@ #pragma once -#include "cuda_memory_resource.hpp" -#include "device_memory_resource.hpp" +#include -#include namespace rmm { namespace mr { -namespace detail { -/** - * @brief Returns a pointer to the initial resource. - * - * Returns a global instance of a `cuda_memory_resource` as a function local static. - * - * @return Pointer to the static cuda_memory_resource used as the initial, default resource - */ -inline device_memory_resource* initial_resource() -{ - static cuda_memory_resource mr{}; - return &mr; -} - -// Use an atomic to guarantee thread safety -inline std::atomic& get_default() -{ - static std::atomic res{detail::initial_resource()}; - return res; -} -} // namespace detail /** * @brief Get the default device memory resource pointer. @@ -61,7 +38,7 @@ inline std::atomic& get_default() */ [[deprecated]] inline device_memory_resource* get_default_resource() { - return detail::get_default().load(); + return get_current_device_resource(); } /** @@ -86,8 +63,7 @@ inline std::atomic& get_default() [[deprecated]] inline device_memory_resource* set_default_resource( device_memory_resource* new_resource) { - new_resource = (new_resource == nullptr) ? detail::initial_resource() : new_resource; - return detail::get_default().exchange(new_resource); + return set_current_device_resource(new_resource); } } // namespace mr diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp index b190ce341..65e5f4632 100644 --- a/include/rmm/mr/device/per_device_resource.hpp +++ b/include/rmm/mr/device/per_device_resource.hpp @@ -16,8 +16,8 @@ #pragma once -#include "default_memory_resource.hpp" -#include "device_memory_resource.hpp" +#include +#include #include #include @@ -75,6 +75,20 @@ struct cuda_device_id { namespace mr { namespace detail { + +/** + * @brief Returns a pointer to the initial resource. + * + * Returns a global instance of a `cuda_memory_resource` as a function local static. + * + * @return Pointer to the static cuda_memory_resource used as the initial, default resource + */ +inline device_memory_resource* initial_resource() +{ + static cuda_memory_resource mr{}; + return &mr; +} + inline std::mutex& map_lock() { static std::mutex map_lock; From dc5f7ad317d47ea77d1ff50f04893388e0e272f0 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 11 Aug 2020 15:37:44 +1000 Subject: [PATCH 13/21] Add devices list to RMM python initialization - Iterates over devices to create resource per device - Update to use set_per_device_resource rather than set_default_resource - Log filename appends device ID - Updated tests --- python/rmm/_lib/lib.pxd | 2 + python/rmm/_lib/memory_resource.pxd | 9 +- python/rmm/_lib/memory_resource.pyx | 181 ++++++++++++++++--- python/rmm/_lib/memory_resource_wrappers.hpp | 12 +- python/rmm/mr.py | 16 +- python/rmm/rmm.py | 5 +- python/rmm/tests/test_rmm.py | 27 ++- 7 files changed, 203 insertions(+), 49 deletions(-) diff --git a/python/rmm/_lib/lib.pxd b/python/rmm/_lib/lib.pxd index ccf0f7223..5baddfdac 100644 --- a/python/rmm/_lib/lib.pxd +++ b/python/rmm/_lib/lib.pxd @@ -39,3 +39,5 @@ cdef extern from * nogil: cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) cudaError_t cudaStreamSynchronize(cudaStream_t stream) + cudaError_t cudaGetDevice(int* device) + cudaError_t cudaSetDevice(int device) diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index 39b5f522d..7344a8236 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -68,7 +68,8 @@ cdef extern from "memory_resource_wrappers.hpp" nogil: shared_ptr[device_memory_resource_wrapper] upstream_mr, ) except + - void set_default_resource( + void set_per_device_resource( + int device, shared_ptr[device_memory_resource_wrapper] new_resource ) except + @@ -92,8 +93,6 @@ cdef class BinningMemoryResource(MemoryResource): cpdef add_bin(self, size_t allocation_size, object bin_resource=*) cdef class LoggingResourceAdaptor(MemoryResource): + cdef object _log_file_name + cpdef get_file_name(self) cpdef flush(self) - -cpdef get_default_resource_type() - -cpdef is_initialized() diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index f3afdd9b0..e0e877897 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -6,6 +6,11 @@ from libcpp.cast cimport dynamic_cast from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr from libcpp.string cimport string +from rmm._lib.lib cimport ( + cudaGetDevice, + cudaSetDevice, + cudaSuccess +) cdef class CudaMemoryResource(MemoryResource): def __cinit__(self): @@ -202,6 +207,23 @@ cdef class BinningMemoryResource(MemoryResource): _bin_resource.c_obj ) +def _append_id(filename, id): + """ + Append ".dev" onto a filename before the extension + + Example: _append_id("hello.txt", 1) returns "hello.dev1.txt" + + Parameters + ---------- + filename : string + The filename, possibly with extension + id : int + The ID to append + """ + name, ext = os.path.splitext(filename) + return "{name}.dev{uid}{ext}".format(name=name, uid=id, ext=ext) + + cdef class LoggingResourceAdaptor(MemoryResource): def __cinit__(self, MemoryResource upstream, object log_file_name=None): if log_file_name is None: @@ -212,10 +234,15 @@ cdef class LoggingResourceAdaptor(MemoryResource): "log_file_name= argument or RMM_LOG_FILE " "environment variable" ) + # Append the device ID before the file extension + log_file_name = _append_id(log_file_name.decode(), get_current_device()) + print(log_file_name) + _log_file_name = log_file_name + self.c_obj.reset( new logging_resource_adaptor_wrapper( upstream.c_obj, - log_file_name + log_file_name.encode() ) ) @@ -236,24 +263,49 @@ cdef class LoggingResourceAdaptor(MemoryResource): cpdef flush(self): ((self.c_obj.get()))[0].flush() + cpdef get_file_name(self): + return self._log_file_name + +# Global per-device memory resources; dict of int:MemoryResource +cdef dict _per_device_mrs = {} + + +cpdef get_current_device(): + """ + Get the current CUDA device + """ + cdef int current_device + err = cudaGetDevice(¤t_device) + if err != cudaSuccess: + raise RuntimeError(f"Failed to get CUDA device with error: {err}") + return current_device + -# Global memory resource: -cdef MemoryResource _mr +cpdef set_current_device(int device): + """ + Set the current CUDA device + + Parameters + ---------- + device : int + The ID of the device to set as current + """ + err = cudaSetDevice(device) + if err != cudaSuccess: + raise RuntimeError(f"Failed to set CUDA device with error: {err}") cpdef _initialize( bool pool_allocator=False, bool managed_memory=False, object initial_pool_size=None, + object devices=0, bool logging=False, object log_file_name=None, ): """ Initializes RMM library using the options passed """ - global _mr - _mr = MemoryResource() - if managed_memory: upstream = ManagedMemoryResource else: @@ -271,43 +323,118 @@ cpdef _initialize( cdef MemoryResource mr - if logging: - mr = LoggingResourceAdaptor(typ(*args), log_file_name.encode()) - else: - mr = typ(*args) + # Save the current device so we can reset it + cdef int original_device = get_current_device() + + # reset any previously specified per device resources + global _per_device_mrs + _per_device_mrs.clear() - _set_default_resource( - mr - ) + if devices is None: + devices = [0] + elif isinstance(devices, int): + devices = [devices] + # create a memory resource per specified device + for device in devices: + set_current_device(device) + + if logging: + mr = LoggingResourceAdaptor(typ(*args), log_file_name.encode()) + else: + mr = typ(*args) -cpdef _set_default_resource(MemoryResource mr): + _set_per_device_resource(device, mr) + + # reset CUDA device to original + set_current_device(original_device) + + +cpdef get_per_device_resource(int device): """ - Set the memory resource to use for RMM device allocations. + Get the default memory resource for the specified device. Parameters ---------- + device : int + The ID of the device for which to get the memory resource. + """ + global _per_device_mrs + return _per_device_mrs[device] + + +cpdef _set_per_device_resource(int device, MemoryResource mr): + """ + Set the default memory resource for the specified device. + + Parameters + ---------- + device : int + The ID of the device for which to get the memory resource. mr : MemoryResource - A MemoryResource object. See `rmm.mr` for the different - MemoryResource types available. + The memory resource to set. """ - global _mr - _mr = mr - set_default_resource(_mr.c_obj) + global _per_device_mrs + _per_device_mrs[device] = mr + _mr = mr # coerce Python object to C object + set_per_device_resource(device, _mr.c_obj) -cpdef get_default_resource_type(): +cpdef set_current_device_resource(MemoryResource mr): """ - Get the default memory resource type used for RMM device allocations. + Set the default memory resource for the current device. + + Parameters + ---------- + mr : MemoryResource + The memory resource to set. """ - return type(_mr) + _set_per_device_resource(get_current_device(), mr) + + +cpdef get_per_device_resource_type(int device): + """ + Get the memory resource type used for RMM device allocations on the + specified device. + + Parameters + ---------- + device : int + The device ID + """ + return type(get_per_device_resource(device)) + + +cpdef get_current_device_resource(): + return get_per_device_resource(get_current_device()) + + +cpdef get_current_device_resource_type(): + """ + Get the memory resource type used for RMM device allocations on the + current device. + """ + return type(get_current_device_resource()) cpdef is_initialized(): - global _mr - return _mr.c_obj.get() is not NULL + """ + Check whether RMM is initialized + """ + global _per_device_mrs + cdef MemoryResource each_mr + return all( + [each_mr.c_obj.get() is not NULL + for each_mr in _per_device_mrs.values()] + ) cpdef _flush_logs(): - global _mr - _mr.flush() + """ + Flush the logs of all currently initialized LoggingResourceAdaptor + memory resources + """ + global _per_device_mrs + cdef MemoryResource each_mr + [each_mr.flush() for each_mr in _per_device_mrs.values() + if type(each_mr) is LoggingResourceAdaptor] diff --git a/python/rmm/_lib/memory_resource_wrappers.hpp b/python/rmm/_lib/memory_resource_wrappers.hpp index 107653b91..3d3624787 100644 --- a/python/rmm/_lib/memory_resource_wrappers.hpp +++ b/python/rmm/_lib/memory_resource_wrappers.hpp @@ -12,6 +12,7 @@ #include #include #include +#include "rmm/mr/device/per_device_resource.hpp" // These are "owning" versions of the memory_resource classes // that help lift the responsibility of managing memory resource @@ -153,7 +154,14 @@ class thread_safe_resource_adaptor_wrapper : public device_memory_resource_wrapp std::shared_ptr> mr; }; -void set_default_resource(std::shared_ptr new_resource) +inline void set_per_device_resource(int device_id, + std::shared_ptr new_resource) { - rmm::mr::set_default_resource(new_resource->get_mr().get()); + rmm::mr::set_per_device_resource(rmm::cuda_device_id{device_id}, new_resource->get_mr().get()); +} + +inline void set_current_device_resource( + std::shared_ptr new_resource) +{ + rmm::mr::set_current_device_resource(new_resource->get_mr().get()); } diff --git a/python/rmm/mr.py b/python/rmm/mr.py index c019e043a..73bbb0eeb 100644 --- a/python/rmm/mr.py +++ b/python/rmm/mr.py @@ -9,8 +9,12 @@ PoolMemoryResource, _flush_logs, _initialize, - _set_default_resource as set_default_resource, - get_default_resource_type, + _set_per_device_resource as set_per_device_resource, + get_per_device_resource, + set_current_device_resource, + get_current_device_resource, + get_per_device_resource_type, + get_current_device_resource_type, is_initialized, ) @@ -24,7 +28,11 @@ "PoolMemoryResource", "_flush_logs", "_initialize", - "set_default_resource", - "get_default_resource_type", + "set_per_device_resource", + "get_per_device_resource", + "set_current_device_resource", + "get_current_device_resource", + "get_per_device_resource_type", + "get_current_device_resource_type", "is_initialized", ] diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py index ef1ef4aca..5fb33cb23 100644 --- a/python/rmm/rmm.py +++ b/python/rmm/rmm.py @@ -20,7 +20,6 @@ import rmm from rmm import _lib as librmm - # Utility Functions class RMMError(Exception): def __init__(self, errcode, msg): @@ -32,6 +31,7 @@ def reinitialize( pool_allocator=False, managed_memory=False, initial_pool_size=None, + devices=0, logging=False, log_file_name=None, ): @@ -51,6 +51,8 @@ def reinitialize( When `pool_allocator` is True, this indicates the initial pool size in bytes. None is used to indicate the default size of the underlying memorypool implementation, which currently is 1/2 total GPU memory. + devices : int or List[int], default 0 + GPU device IDs to register. By default registers only GPU 0. logging : bool, default False If True, enable run-time logging of all memory events (alloc, free, realloc). @@ -63,6 +65,7 @@ def reinitialize( pool_allocator=pool_allocator, managed_memory=managed_memory, initial_pool_size=initial_pool_size, + devices=devices, logging=logging, log_file_name=log_file_name, ) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 29ad236d9..304bc6561 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -1,5 +1,6 @@ # Copyright (c) 2020, NVIDIA CORPORATION. +import os import sys import tempfile from itertools import product @@ -75,12 +76,18 @@ def test_rmm_modes(dtype, nelem, alloc, managed, pool): @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) def test_rmm_csv_log(dtype, nelem, alloc): - with tempfile.NamedTemporaryFile() as fp: - rmm.reinitialize(logging=True, log_file_name=fp.name) + try: + filename = "/tmp/test_rmm_csv_log.csv" + rmm.reinitialize(logging=True, log_file_name=filename) array_tester(dtype, nelem, alloc) rmm.mr._flush_logs() - csv = fp.read() - assert csv.find(b"Time,Action,Pointer,Size,Stream") >= 0 + # Need to open separately because the device ID is appended to filename + filename = "/tmp/test_rmm_csv_log.dev0.csv" + with open(filename, 'rb') as f: + csv = f.read() + assert csv.find(b"Time,Action,Pointer,Size,Stream") >= 0 + finally: + os.remove(filename) rmm.reinitialize() @@ -286,8 +293,8 @@ def test_pool_memory_resource(dtype, nelem, alloc): initial_pool_size=1 << 22, maximum_pool_size=1 << 23, ) - rmm.mr.set_default_resource(mr) - assert rmm.mr.get_default_resource_type() is type(mr) + rmm.mr.set_current_device_resource(mr) + assert rmm.mr.get_current_device_resource_type() is type(mr) array_tester(dtype, nelem, alloc) rmm.reinitialize() @@ -306,8 +313,8 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): mr = rmm.mr.FixedSizeMemoryResource( upstream(), block_size=1 << 20, blocks_to_preallocate=128 ) - rmm.mr.set_default_resource(mr) - assert rmm.mr.get_default_resource_type() is type(mr) + rmm.mr.set_current_device_resource(mr) + assert rmm.mr.get_current_device_resource_type() is type(mr) array_tester(dtype, nelem, alloc) rmm.reinitialize() @@ -337,7 +344,7 @@ def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr): mr.add_bin(1 << 10, fixed_mr) # 1KiB bin mr.add_bin(1 << 23, cuda_mr) # 8MiB bin - rmm.mr.set_default_resource(mr) - assert rmm.mr.get_default_resource_type() is type(mr) + rmm.mr.set_current_device_resource(mr) + assert rmm.mr.get_current_device_resource_type() is type(mr) array_tester(dtype, nelem, alloc) rmm.reinitialize() From 7be1df5980e705367a341b5e1d26540f61d7ffca Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 11 Aug 2020 16:09:59 +1000 Subject: [PATCH 14/21] Remove more usage of get/set_default_resource, update readme. --- README.md | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 79e70a0e0..525fbda39 100644 --- a/README.md +++ b/README.md @@ -205,11 +205,11 @@ Accessing and modifying the default resource is done through two functions: #### Example ```c++ -rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(); // Points to `cuda_memory_resource` +rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); // Points to `cuda_memory_resource` // Construct a resource that uses a coalescing best-fit pool allocator rmm::mr::pool_memory_resource> pool_mr{mr}; rmm::mr::set_default_resource(&pool_mr); // Updates the default resource pointer to `pool_mr` -rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(); // Points to `pool_mr` +rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); // Points to `pool_mr` ``` ## Device Data Structures @@ -371,26 +371,31 @@ array([1., 2., 3.]) ### MemoryResources -MemoryResources are used to configure how device memory allocations are made by RMM. +MemoryResources are used to configure how device memory allocations are made by +RMM. By default, i.e., if you don't set a MemoryResource explicitly, RMM uses the `CudaMemoryResource`, which uses `cudaMalloc` for allocating device memory. -The `rmm.mr.set_default_resource()` function can be used to set a -different MemoryResource. For example, enabling the -`ManagedMemoryResource` tells RMM to use `cudaMallocManaged` instead -of `cudaMalloc` for allocating memory: +`rmm.reinitialize()` provides an easy way to initialize RMM with specific +memory resource options across multiple devices. See `help(rmm.reinitialize) for +full details. + +For lower-level control, `rmm.mr.set_current_device_resource()` function can be +used to set a different MemoryResource for the current CUDA device. For +example, enabling the `ManagedMemoryResource` tells RMM to use +`cudaMallocManaged` instead of `cudaMalloc` for allocating memory: ```python >>> import rmm ->>> rmm.mr.set_default_resource(rmm.mr.ManagedMemoryResource()) +>>> rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource()) ``` -> :warning: The default resource must be set **before** allocating any -> device memory. Setting or changing the default resource after -> device allocations have been made can lead to unexpected behaviour -> or crashes. +> :warning: The default resource must be set for any device **before** +> allocating any device memory on that device. Setting or changing the +> resource after device allocations have been made can lead to unexpected +> behaviour or crashes. As another example, `PoolMemoryResource` allows you to allocate a large "pool" of device memory up-front. Subsequent allocations will @@ -406,7 +411,7 @@ of 1 GiB and a maximum size of 4 GiB. The pool uses ... initial_pool_size=2**30, ... maximum_pool_size=2**32 ... ) ->>> rmm.mr.set_default_resource(pool) +>>> rmm.mr.set_current_device_resource(pool) ``` Other MemoryResources include: From 5e592e1c04bd72aa355e70a97072f0cfa27d7d52 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 11 Aug 2020 16:13:08 +1000 Subject: [PATCH 15/21] default --> current_device, python style --- .../device_uvector/device_uvector_bench.cu | 9 +++++---- include/rmm/device_buffer.hpp | 14 +++++++------- include/rmm/device_scalar.hpp | 12 +++++++----- include/rmm/device_uvector.hpp | 14 ++++++++------ .../rmm/mr/device/thrust_allocator_adaptor.hpp | 4 ++-- python/rmm/_lib/memory_resource.pyx | 13 ++++++++----- python/rmm/mr.py | 6 +++--- python/rmm/rmm.py | 1 + python/rmm/tests/test_rmm.py | 3 +-- tests/device_buffer_tests.cu | 18 +++++++++--------- tests/device_scalar_tests.cpp | 2 +- tests/mr/device/mr_multithreaded_tests.cpp | 6 +++--- tests/mr/device/mr_test.hpp | 11 +++++++++++ tests/mr/device/mr_tests.cpp | 2 ++ tests/mr/device/pool_mr_tests.cpp | 6 +++--- 15 files changed, 71 insertions(+), 50 deletions(-) diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 7e018a7c1..34f32c536 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -22,12 +22,13 @@ #include #include #include "rmm/mr/device/cuda_memory_resource.hpp" +#include "rmm/mr/device/per_device_resource.hpp" static void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; rmm::mr::pool_memory_resource mr{&cuda_mr}; - rmm::mr::set_default_resource(&mr); + rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { rmm::device_uvector vec(state.range(0), cudaStream_t{0}); @@ -36,7 +37,7 @@ static void BM_UvectorSizeConstruction(benchmark::State& state) state.SetItemsProcessed(state.iterations()); - rmm::mr::set_default_resource(nullptr); + rmm::mr::set_current_device_resource(nullptr); } BENCHMARK(BM_UvectorSizeConstruction) ->RangeMultiplier(10) @@ -47,7 +48,7 @@ static void BM_ThrustVectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; rmm::mr::pool_memory_resource mr{&cuda_mr}; - rmm::mr::set_default_resource(&mr); + rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { rmm::device_vector vec(state.range(0)); @@ -56,7 +57,7 @@ static void BM_ThrustVectorSizeConstruction(benchmark::State& state) state.SetItemsProcessed(state.iterations()); - rmm::mr::set_default_resource(nullptr); + rmm::mr::set_current_device_resource(nullptr); } BENCHMARK(BM_ThrustVectorSizeConstruction) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index 09ac656f1..c8c5a52bd 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -31,7 +31,7 @@ namespace rmm { * * This class allocates untyped and *uninitialized* device memory using a * `device_memory_resource`. If not explicitly specified, the memory resource - * returned from `get_default_resource()` is used. + * returned from `get_current_device_resource()` is used. * * @note Unlike `std::vector` or `thrust::device_vector`, the device memory * allocated by a `device_buffer` is uninitialized. Therefore, it is undefined @@ -79,7 +79,7 @@ class device_buffer { // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host- // device function. This causes warnings/errors because this ctor invokes host-only functions. device_buffer() - : _data{nullptr}, _size{}, _capacity{}, _stream{}, _mr{rmm::mr::get_default_resource()} + : _data{nullptr}, _size{}, _capacity{}, _stream{}, _mr{rmm::mr::get_current_device_resource()} { } @@ -95,7 +95,7 @@ class device_buffer { */ explicit device_buffer(std::size_t size, cudaStream_t stream = 0, - mr::device_memory_resource* mr = mr::get_default_resource()) + mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { allocate(size); @@ -118,7 +118,7 @@ class device_buffer { device_buffer(void const* source_data, std::size_t size, cudaStream_t stream = 0, - mr::device_memory_resource* mr = mr::get_default_resource()) + mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { allocate(size); @@ -143,7 +143,7 @@ class device_buffer { */ device_buffer(device_buffer const& other, cudaStream_t stream = 0, - rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : device_buffer{other.data(), other.size(), stream, mr} { } @@ -388,8 +388,8 @@ class device_buffer { std::size_t _capacity{}; ///< The actual size of the device memory allocation cudaStream_t _stream{}; ///< Stream to use for device memory deallocation mr::device_memory_resource* _mr{ - mr::get_default_resource()}; ///< The memory resource used to - ///< allocate/deallocate device memory + mr::get_current_device_resource()}; ///< The memory resource used to + ///< allocate/deallocate device memory /** * @brief Allocates the specified amount of memory and updates the diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index 64f4cb76d..0031e6b58 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -50,8 +50,9 @@ class device_scalar { * @param stream Stream on which to perform asynchronous allocation. * @param mr Optional, resource with which to allocate. */ - explicit device_scalar(cudaStream_t stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()) + explicit device_scalar( + cudaStream_t stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) : buffer{sizeof(T), stream, mr} { } @@ -72,9 +73,10 @@ class device_scalar { * @param stream Optional, stream on which to perform allocation and copy. * @param mr Optional, resource with which to allocate. */ - explicit device_scalar(T const &initial_value, - cudaStream_t stream = 0, - rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()) + explicit device_scalar( + T const &initial_value, + cudaStream_t stream = 0, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) : buffer{sizeof(T), stream, mr} { set_value(initial_value, stream); diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp index 784d45736..4fa3cc854 100644 --- a/include/rmm/device_uvector.hpp +++ b/include/rmm/device_uvector.hpp @@ -107,9 +107,10 @@ class device_uvector { * @param stream The stream on which to perform the allocation * @param mr The resource used to allocate the device storage */ - explicit device_uvector(std::size_t size, - cudaStream_t stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()) + explicit device_uvector( + std::size_t size, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : _storage{elements_to_bytes(size), stream, mr} { } @@ -123,9 +124,10 @@ class device_uvector { * @param stream The stream on which to perform the copy * @param mr The resource used to allocate device memory for the new vector */ - explicit device_uvector(device_uvector const& other, - cudaStream_t stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()) + explicit device_uvector( + device_uvector const& other, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : _storage{other.storage, stream, mr} { } diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp index ec791af63..4333b3d6b 100644 --- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp +++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp @@ -120,8 +120,8 @@ class thrust_allocator : public thrust::device_malloc_allocator { cudaStream_t stream() const noexcept { return _stream; } private: - device_memory_resource* _mr{rmm::mr::get_default_resource()}; + device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; cudaStream_t _stream{0}; }; } // namespace mr -} // namespace rmm \ No newline at end of file +} // namespace rmm diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 1e278b09f..a89ec795b 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -208,6 +208,7 @@ cdef class BinningMemoryResource(MemoryResource): _bin_resource.c_obj ) + def _append_id(filename, id): """ Append ".dev" onto a filename before the extension @@ -216,7 +217,7 @@ def _append_id(filename, id): Parameters ---------- - filename : string + filename : string The filename, possibly with extension id : int The ID to append @@ -236,7 +237,9 @@ cdef class LoggingResourceAdaptor(MemoryResource): "environment variable" ) # Append the device ID before the file extension - log_file_name = _append_id(log_file_name.decode(), get_current_device()) + log_file_name = _append_id( + log_file_name.decode(), get_current_device() + ) print(log_file_name) _log_file_name = log_file_name @@ -346,7 +349,7 @@ cpdef _initialize( mr = typ(*args) _set_per_device_resource(device, mr) - + # reset CUDA device to original set_current_device(original_device) @@ -377,7 +380,7 @@ cpdef _set_per_device_resource(int device, MemoryResource mr): """ global _per_device_mrs _per_device_mrs[device] = mr - _mr = mr # coerce Python object to C object + _mr = mr # coerce Python object to C object set_per_device_resource(device, _mr.c_obj) @@ -427,7 +430,7 @@ cpdef is_initialized(): return all( [each_mr.c_obj.get() is not NULL for each_mr in _per_device_mrs.values()] - ) + ) cpdef _flush_logs(): diff --git a/python/rmm/mr.py b/python/rmm/mr.py index 73bbb0eeb..e9964db4d 100644 --- a/python/rmm/mr.py +++ b/python/rmm/mr.py @@ -10,12 +10,12 @@ _flush_logs, _initialize, _set_per_device_resource as set_per_device_resource, - get_per_device_resource, - set_current_device_resource, get_current_device_resource, - get_per_device_resource_type, get_current_device_resource_type, + get_per_device_resource, + get_per_device_resource_type, is_initialized, + set_current_device_resource, ) __all__ = [ diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py index 5fb33cb23..3ca216c65 100644 --- a/python/rmm/rmm.py +++ b/python/rmm/rmm.py @@ -20,6 +20,7 @@ import rmm from rmm import _lib as librmm + # Utility Functions class RMMError(Exception): def __init__(self, errcode, msg): diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 304bc6561..4cb8c5539 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -2,7 +2,6 @@ import os import sys -import tempfile from itertools import product import numpy as np @@ -83,7 +82,7 @@ def test_rmm_csv_log(dtype, nelem, alloc): rmm.mr._flush_logs() # Need to open separately because the device ID is appended to filename filename = "/tmp/test_rmm_csv_log.dev0.csv" - with open(filename, 'rb') as f: + with open(filename, "rb") as f: csv = f.read() assert csv.find(b"Time,Action,Pointer,Size,Stream") >= 0 finally: diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index 1839e57b0..209f97d15 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -60,7 +60,7 @@ TYPED_TEST(DeviceBufferTest, DefaultMemoryResource) EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); - EXPECT_EQ(rmm::mr::get_default_resource(), buff.memory_resource()); + EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(0, buff.stream()); } @@ -71,7 +71,7 @@ TYPED_TEST(DeviceBufferTest, DefaultMemoryResourceStream) EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); - EXPECT_EQ(rmm::mr::get_default_resource(), buff.memory_resource()); + EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(this->stream, buff.stream()); } @@ -106,7 +106,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawDevicePointer) EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); - EXPECT_EQ(rmm::mr::get_default_resource(), buff.memory_resource()); + EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(0, buff.stream()); // TODO check for equality between the contents of the two allocations EXPECT_EQ(cudaSuccess, cudaFree(device_memory)); @@ -119,7 +119,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawHostPointer) EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); - EXPECT_EQ(rmm::mr::get_default_resource(), buff.memory_resource()); + EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(0, buff.stream()); // TODO check for equality between the contents of the two allocations } @@ -131,7 +131,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromNullptr) EXPECT_EQ(nullptr, buff.data()); EXPECT_EQ(0, buff.size()); EXPECT_EQ(0, buff.capacity()); - EXPECT_EQ(rmm::mr::get_default_resource(), buff.memory_resource()); + EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(0, buff.stream()); } @@ -156,8 +156,8 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_NE(buff.data(), buff_copy.data()); EXPECT_EQ(buff.size(), buff_copy.size()); EXPECT_EQ(buff.capacity(), buff_copy.capacity()); - EXPECT_EQ(buff_copy.memory_resource(), rmm::mr::get_default_resource()); - EXPECT_TRUE(buff_copy.memory_resource()->is_equal(*rmm::mr::get_default_resource())); + EXPECT_EQ(buff_copy.memory_resource(), rmm::mr::get_current_device_resource()); + EXPECT_TRUE(buff_copy.memory_resource()->is_equal(*rmm::mr::get_current_device_resource())); EXPECT_EQ(buff_copy.stream(), cudaStream_t{0}); EXPECT_TRUE(thrust::equal(thrust::device, @@ -196,8 +196,8 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) // The capacity of the copy should be equal to the `size()` of the original EXPECT_EQ(new_size, buff_copy.capacity()); - EXPECT_EQ(buff_copy.memory_resource(), rmm::mr::get_default_resource()); - EXPECT_TRUE(buff_copy.memory_resource()->is_equal(*rmm::mr::get_default_resource())); + EXPECT_EQ(buff_copy.memory_resource(), rmm::mr::get_current_device_resource()); + EXPECT_TRUE(buff_copy.memory_resource()->is_equal(*rmm::mr::get_current_device_resource())); EXPECT_EQ(buff_copy.stream(), cudaStream_t{0}); // EXPECT_TRUE( diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp index f4f9af29c..36661ee47 100644 --- a/tests/device_scalar_tests.cpp +++ b/tests/device_scalar_tests.cpp @@ -30,7 +30,7 @@ void sync_stream(cudaStream_t stream) { EXPECT_EQ(cudaSuccess, cudaStreamSynchro template struct DeviceScalarTest : public ::testing::Test { cudaStream_t stream{}; - rmm::mr::device_memory_resource* mr{rmm::mr::get_default_resource()}; + rmm::mr::device_memory_resource* mr{rmm::mr::get_current_device_resource()}; T value{}; std::default_random_engine generator{}; std::uniform_int_distribution distribution{std::numeric_limits::lowest(), diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index 0c5a90511..197f6a918 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -85,7 +85,7 @@ TEST_P(mr_test_mt, SetDefaultResource_mt) // single thread changes default resource, then multiple threads use it rmm::mr::device_memory_resource* old{nullptr}; - EXPECT_NO_THROW(old = rmm::mr::set_default_resource(this->mr.get())); + EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(this->mr.get())); EXPECT_NE(nullptr, old); spawn([mr = this->mr.get()]() { @@ -94,8 +94,8 @@ TEST_P(mr_test_mt, SetDefaultResource_mt) }); // setting default resource w/ nullptr should reset to initial - EXPECT_NO_THROW(rmm::mr::set_default_resource(nullptr)); - EXPECT_TRUE(old->is_equal(*rmm::mr::get_default_resource())); + EXPECT_NO_THROW(rmm::mr::set_current_device_resource(nullptr)); + EXPECT_TRUE(old->is_equal(*rmm::mr::get_current_device_resource())); } TEST_P(mr_test_mt, SetCurrentDeviceResource_mt) diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index ba5f90758..20ca9c941 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -84,6 +84,17 @@ inline void test_get_default_resource() EXPECT_NO_THROW(rmm::mr::get_default_resource()->deallocate(p, 1_MiB)); } +inline void test_get_current_device_resource() +{ + EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); + void* p{nullptr}; + EXPECT_NO_THROW(p = rmm::mr::get_current_device_resource()->allocate(1_MiB)); + EXPECT_NE(nullptr, p); + EXPECT_TRUE(is_aligned(p)); + EXPECT_TRUE(is_device_memory(p)); + EXPECT_NO_THROW(rmm::mr::get_current_device_resource()->deallocate(p, 1_MiB)); +} + inline void test_allocate(rmm::mr::device_memory_resource* mr, std::size_t bytes, cudaStream_t stream = 0) diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index c47fd5b05..79774cbd8 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -72,6 +72,8 @@ TEST_P(mr_test, SetCurrentDeviceResource) // current dev resource should equal this resource EXPECT_TRUE(this->mr->is_equal(*rmm::mr::get_current_device_resource())); + test_get_current_device_resource(); + // setting to `nullptr` should reset to initial cuda resource EXPECT_NO_THROW(rmm::mr::set_current_device_resource(nullptr)); EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{})); diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index f3be0c39a..097124f27 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -34,7 +34,7 @@ TEST(PoolTest, ThrowOnNullUpstream) TEST(PoolTest, ThrowMaxLessThanInitial) { - auto max_less_than_initial = []() { Pool mr{rmm::mr::get_default_resource(), 100, 99}; }; + auto max_less_than_initial = []() { Pool mr{rmm::mr::get_current_device_resource(), 100, 99}; }; EXPECT_THROW(max_less_than_initial(), rmm::logic_error); } @@ -43,14 +43,14 @@ TEST(PoolTest, AllocateNinetyPercent) auto allocate_ninety = []() { auto const ninety_percent_pool = static_cast(rmm::mr::detail::available_device_memory() * 0.9); - Pool mr{rmm::mr::get_default_resource(), ninety_percent_pool}; + Pool mr{rmm::mr::get_current_device_resource(), ninety_percent_pool}; }; EXPECT_NO_THROW(allocate_ninety()); } TEST(PoolTest, ForceGrowth) { - Pool mr{rmm::mr::get_default_resource(), 0}; + Pool mr{rmm::mr::get_current_device_resource(), 0}; EXPECT_NO_THROW(mr.allocate(1000)); } From daef500baa45506106a2a17ecda6aa3d0727fbfa Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 11 Aug 2020 16:33:50 +1000 Subject: [PATCH 16/21] Review feedback --- python/rmm/_lib/memory_resource.pyx | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index edb37e605..4766d433f 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -223,7 +223,7 @@ def _append_id(filename, id): The ID to append """ name, ext = os.path.splitext(filename) - return "{name}.dev{uid}{ext}".format(name=name, uid=id, ext=ext) + return f"{name}.dev{id}{ext}" cdef class LoggingResourceAdaptor(MemoryResource): @@ -240,7 +240,7 @@ cdef class LoggingResourceAdaptor(MemoryResource): log_file_name = _append_id( log_file_name.decode(), get_current_device() ) - print(log_file_name) + _log_file_name = log_file_name self.c_obj.reset( @@ -410,6 +410,10 @@ cpdef get_per_device_resource_type(int device): cpdef get_current_device_resource(): + """ + Get the memory resource used for RMM device allocations on the current + device. + """ return get_per_device_resource(get_current_device()) @@ -440,5 +444,6 @@ cpdef _flush_logs(): """ global _per_device_mrs cdef MemoryResource each_mr - [each_mr.flush() for each_mr in _per_device_mrs.values() - if type(each_mr) is LoggingResourceAdaptor] + for each_mr in _per_device_mrs.values(): + if isinstance(each_mr, LoggingResourceAdaptor): + each_mr.flush() From e75fc42fc3af9fee434c3dc67e9dc0fca716dfa5 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 11 Aug 2020 16:36:44 +1000 Subject: [PATCH 17/21] Explicit void return type --- python/rmm/_lib/memory_resource.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 4766d433f..6981e2dfb 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -285,7 +285,7 @@ cpdef get_current_device(): return current_device -cpdef set_current_device(int device): +cpdef void set_current_device(int device) except *: """ Set the current CUDA device From b5d446dc7474b61406a1553f05ca5de5997f5fcd Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 11 Aug 2020 16:38:29 +1000 Subject: [PATCH 18/21] Explicit int return type --- python/rmm/_lib/memory_resource.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 6981e2dfb..011e25f91 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -274,7 +274,7 @@ cdef class LoggingResourceAdaptor(MemoryResource): cdef dict _per_device_mrs = {} -cpdef get_current_device(): +cpdef int get_current_device() except -1: """ Get the current CUDA device """ From 621d2948cbff3c31c1dca72d97413fed31d45059 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Tue, 11 Aug 2020 02:55:04 -0400 Subject: [PATCH 19/21] isort fix --- python/rmm/_lib/memory_resource.pyx | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 011e25f91..b7558d21d 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -7,11 +7,8 @@ from libcpp.cast cimport dynamic_cast from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr from libcpp.string cimport string -from rmm._lib.lib cimport ( - cudaGetDevice, - cudaSetDevice, - cudaSuccess -) +from rmm._lib.lib cimport cudaGetDevice, cudaSetDevice, cudaSuccess + cdef class CudaMemoryResource(MemoryResource): def __cinit__(self): From 48e46c1766d1e92a5bd74f4c4cce8e056020935f Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 12 Aug 2020 10:35:47 +1000 Subject: [PATCH 20/21] Warn when initialized with no devices present. --- python/rmm/_lib/memory_resource.pyx | 43 +++++++++++++++++------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index b7558d21d..f497beb7c 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -1,5 +1,6 @@ # Copyright (c) 2020, NVIDIA CORPORATION. import os +import warnings from libc.stdint cimport int8_t from libcpp cimport bool @@ -323,32 +324,38 @@ cpdef _initialize( args = () cdef MemoryResource mr + cdef int original_device # Save the current device so we can reset it - cdef int original_device = get_current_device() + try: + original_device = get_current_device() + except RuntimeError: + warnings.warn("No CUDA Device Found", ResourceWarning) + else: + # reset any previously specified per device resources + global _per_device_mrs + _per_device_mrs.clear() - # reset any previously specified per device resources - global _per_device_mrs - _per_device_mrs.clear() + if devices is None: + devices = [0] + elif isinstance(devices, int): + devices = [devices] - if devices is None: - devices = [0] - elif isinstance(devices, int): - devices = [devices] + # create a memory resource per specified device + for device in devices: + set_current_device(device) - # create a memory resource per specified device - for device in devices: - set_current_device(device) + if logging: + mr = LoggingResourceAdaptor(typ(*args), log_file_name.encode()) + else: + mr = typ(*args) - if logging: - mr = LoggingResourceAdaptor(typ(*args), log_file_name.encode()) - else: - mr = typ(*args) + _set_per_device_resource(device, mr) - _set_per_device_resource(device, mr) + # reset CUDA device to original + set_current_device(original_device) + - # reset CUDA device to original - set_current_device(original_device) cpdef get_per_device_resource(int device): From 50c19d95138fd824cb38759cb9e16060e470f71d Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 12 Aug 2020 10:37:49 +1000 Subject: [PATCH 21/21] Python style! --- python/rmm/_lib/memory_resource.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index f497beb7c..da0a2525a 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -354,8 +354,6 @@ cpdef _initialize( # reset CUDA device to original set_current_device(original_device) - - cpdef get_per_device_resource(int device):