From b04128685aad4e22bd5213a920f09f197985dbd7 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 7 Oct 2021 18:46:22 -0700 Subject: [PATCH 01/35] add some tests for arena mr --- include/rmm/detail/aligned.hpp | 36 ++++++++ .../rmm/mr/device/arena_memory_resource.hpp | 8 +- include/rmm/mr/device/detail/arena.hpp | 56 ++++--------- tests/CMakeLists.txt | 3 + tests/mr/device/arena_mr_tests.cpp | 83 +++++++++++++++++++ 5 files changed, 142 insertions(+), 44 deletions(-) create mode 100644 tests/mr/device/arena_mr_tests.cpp diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 321be53b5..19e69344d 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -62,6 +62,18 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep return (value + (alignment - 1)) & ~(alignment - 1); } +/** + * @brief Align up to nearest multiple of the CUDA allocation alignment + * + * @param[in] v value to align + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_up_cuda(std::size_t value) noexcept +{ + return align_up(value, CUDA_ALLOCATION_ALIGNMENT); +} + /** * @brief Align down to the nearest multiple of specified power of 2 * @@ -76,6 +88,18 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc return value & ~(alignment - 1); } +/** + * @brief Align down to the nearest multiple of the CUDA allocation alignment + * + * @param[in] v value to align + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_down_cuda(std::size_t value) noexcept +{ + return align_down(value, CUDA_ALLOCATION_ALIGNMENT); +} + /** * @brief Checks whether a value is aligned to a multiple of a specified power of 2 * @@ -90,6 +114,18 @@ constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept return value == align_down(value, alignment); } +/** + * @brief Checks whether a value is aligned to a multiple of the CUDA allocation alignment + * + * @param[in] v value to check for alignment + * + * @return true if aligned + */ +constexpr bool is_cuda_aligned(std::size_t value) noexcept +{ + return is_aligned(value, CUDA_ALLOCATION_ALIGNMENT); +} + inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index ce8737225..9b78d9207 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -89,8 +89,8 @@ class arena_memory_resource final : public device_memory_resource { * of the available memory on the current device. */ explicit arena_memory_resource(Upstream* upstream_mr, - std::size_t initial_size = global_arena::default_initial_size, - std::size_t maximum_size = global_arena::default_maximum_size, + std::optional initial_size = std::nullopt, + std::optional maximum_size = std::nullopt, bool dump_log_on_failure = false) : global_arena_{upstream_mr, initial_size, maximum_size}, dump_log_on_failure_{dump_log_on_failure} @@ -144,7 +144,7 @@ class arena_memory_resource final : public device_memory_resource { { if (bytes <= 0) { return nullptr; } - bytes = detail::arena::align_up(bytes); + bytes = rmm::detail::align_up_cuda(bytes); auto& arena = get_arena(stream); void* pointer = arena.allocate(bytes); @@ -173,7 +173,7 @@ class arena_memory_resource final : public device_memory_resource { { if (ptr == nullptr || bytes <= 0) { return; } - bytes = detail::arena::align_up(bytes); + bytes = rmm::detail::align_up_cuda(bytes); get_arena(stream).deallocate(ptr, bytes, stream); } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 0d2bb319a..9cb691bd8 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -139,28 +140,6 @@ class block { inline bool block_size_compare(block lhs, block rhs) { return lhs.size() < rhs.size(); } -/** - * @brief Align up to the allocation alignment. - * - * @param[in] v value to align - * @return Return the aligned value - */ -constexpr std::size_t align_up(std::size_t value) noexcept -{ - return rmm::detail::align_up(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); -} - -/** - * @brief Align down to the allocation alignment. - * - * @param[in] v value to align - * @return Return the aligned value - */ -constexpr std::size_t align_down(std::size_t value) noexcept -{ - return rmm::detail::align_down(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); -} - /** * @brief Get the first free block of at least `size` bytes. * @@ -253,10 +232,6 @@ inline auto total_block_size(T const& blocks) template class global_arena final { public: - /// The default initial size for the global arena. - static constexpr std::size_t default_initial_size = std::numeric_limits::max(); - /// The default maximum size for the global arena. - static constexpr std::size_t default_maximum_size = std::numeric_limits::max(); /// Reserved memory that should not be allocated (64 MiB). static constexpr std::size_t reserved_size = 1U << 26U; @@ -275,29 +250,30 @@ class global_arena final { * @param maximum_size Maximum size, in bytes, that the global arena can grow to. Defaults to all * of the available memory on the current device. */ - global_arena(Upstream* upstream_mr, std::size_t initial_size, std::size_t maximum_size) - : upstream_mr_{upstream_mr}, maximum_size_{maximum_size} + global_arena(Upstream* upstream_mr, + std::optional initial_size, + std::optional maximum_size) + : upstream_mr_{upstream_mr}, maximum_size_{maximum_size.value_or(0)} { RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer."); - RMM_EXPECTS(initial_size == default_initial_size || initial_size == align_up(initial_size), + RMM_EXPECTS(!initial_size || rmm::detail::is_cuda_aligned(initial_size.value()), "Error, Initial arena size required to be a multiple of 256 bytes"); - RMM_EXPECTS(maximum_size_ == default_maximum_size || maximum_size_ == align_up(maximum_size_), + RMM_EXPECTS(!maximum_size || rmm::detail::is_cuda_aligned(maximum_size.value()), "Error, Maximum arena size required to be a multiple of 256 bytes"); - if (initial_size == default_initial_size || maximum_size == default_maximum_size) { - std::size_t free{}; - std::size_t total{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); - if (initial_size == default_initial_size) { - initial_size = align_up(std::min(free, total / 2)); + auto init = initial_size.value_or(0); + if (!initial_size || !maximum_size) { + auto const [free, total] = rmm::detail::available_device_memory(); + if (!initial_size) { + init = rmm::detail::align_down_cuda(free) - reserved_size; } - if (maximum_size_ == default_maximum_size) { - maximum_size_ = align_down(free) - reserved_size; + if (!maximum_size) { + maximum_size_ = rmm::detail::align_down_cuda(free) - reserved_size; } } - RMM_EXPECTS(initial_size <= maximum_size_, "Initial arena size exceeds the maximum pool size!"); + RMM_EXPECTS(init <= maximum_size_, "Initial arena size exceeds the maximum pool size!"); - free_blocks_.emplace(expand_arena(initial_size)); + free_blocks_.emplace(expand_arena(init)); } // Disable copy (and move) semantics. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 78c0c94a6..91e93bccf 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -64,6 +64,9 @@ ConfigureTest(DEVICE_MR_TEST mr/device/mr_tests.cpp mr/device/mr_multithreaded_t # pool mr tests ConfigureTest(POOL_MR_TEST mr/device/pool_mr_tests.cpp) +# arena mr tests +ConfigureTest(ARENA_MR_TEST mr/device/arena_mr_tests.cpp) + # cuda_async mr tests ConfigureTest(CUDA_ASYNC_MR_TEST mr/device/cuda_async_mr_tests.cpp) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp new file mode 100644 index 000000000..e6609dda0 --- /dev/null +++ b/tests/mr/device/arena_mr_tests.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include + +namespace rmm::test { +namespace { +using arena_mr = rmm::mr::arena_memory_resource; + +TEST(ArenaTest, NullUpstream) +{ + EXPECT_THROW([]() { arena_mr mr{nullptr}; }(), rmm::logic_error); +} + +TEST(ArenaTest, UnalignedInitialSize) +{ + EXPECT_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource(), 255); }(), + rmm::logic_error); +} + +TEST(ArenaTest, UnalignedMaximumSize) +{ + EXPECT_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource(), 256, 257); }(), + rmm::logic_error); +} + +TEST(ArenaTest, MaxLessThanInitial) +{ + EXPECT_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource(), 512, 256); }(), + rmm::logic_error); +} + +TEST(ArenaTest, MaxEqualToInitial) +{ + EXPECT_NO_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource(), 512, 512); }()); +} + +TEST(ArenaTest, AllocateNinetyPercent) +{ + EXPECT_NO_THROW([]() { + auto const free = rmm::detail::available_device_memory().first; + auto const ninety_percent = + rmm::detail::align_up_cuda(static_cast(static_cast(free) * 0.9)); + arena_mr mr(rmm::mr::get_current_device_resource(), ninety_percent); + }()); +} + +TEST(ArenaTest, SmallMediumLarge) +{ + EXPECT_NO_THROW([]() { + arena_mr mr(rmm::mr::get_current_device_resource()); + auto* small = mr.allocate(256); + auto* medium = mr.allocate(1U << 26U); + auto const free = rmm::detail::available_device_memory().first; + auto* large = mr.allocate(free / 2); + mr.deallocate(small, 256); + mr.deallocate(medium, 1U << 26U); + mr.deallocate(large, free / 4); + }()); +} + +} // namespace +} // namespace rmm::test From 8bda94e7a4451b826a7ab59586063d2a45f762fa Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Mon, 8 Nov 2021 11:11:59 -0800 Subject: [PATCH 02/35] make superblocks persistent between different arenas --- .../rmm/mr/device/arena_memory_resource.hpp | 60 +- include/rmm/mr/device/detail/arena.hpp | 758 +++++++++++------- tests/mr/device/arena_mr_tests.cpp | 23 - 3 files changed, 519 insertions(+), 322 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index a8919def2..9b1073f85 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -77,23 +77,15 @@ class arena_memory_resource final : public device_memory_resource { * @brief Construct an `arena_memory_resource`. * * @throws rmm::logic_error if `upstream_mr == nullptr`. - * @throws rmm::logic_error if `initial_size` is neither the default nor aligned to a multiple of - * 256 bytes. - * @throws rmm::logic_error if `maximum_size` is neither the default nor aligned to a multiple of - * 256 bytes. * * @param upstream_mr The memory resource from which to allocate blocks for the pool - * @param initial_size Minimum size, in bytes, of the initial global arena. Defaults to half of - * the available memory on the current device. - * @param maximum_size Maximum size, in bytes, that the global arena can grow to. Defaults to all - * of the available memory on the current device. + * @param arena_size Size in bytes of the global arena. Defaults to all the available memory on + * the current device. */ explicit arena_memory_resource(Upstream* upstream_mr, - std::optional initial_size = std::nullopt, - std::optional maximum_size = std::nullopt, - bool dump_log_on_failure = false) - : global_arena_{upstream_mr, initial_size, maximum_size}, - dump_log_on_failure_{dump_log_on_failure} + std::optional arena_size = std::nullopt, + bool dump_log_on_failure = false) + : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure} { if (dump_log_on_failure_) { logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log"); @@ -124,8 +116,8 @@ class arena_memory_resource final : public device_memory_resource { bool supports_get_mem_info() const noexcept override { return false; } private: - using global_arena = detail::arena::global_arena; - using arena = detail::arena::arena; + using global_arena = rmm::mr::detail::arena::global_arena; + using arena = rmm::mr::detail::arena::arena; using read_lock = std::shared_lock; using write_lock = std::lock_guard; @@ -174,7 +166,43 @@ class arena_memory_resource final : public device_memory_resource { if (ptr == nullptr || bytes <= 0) { return; } bytes = rmm::detail::align_up_cuda(bytes); - get_arena(stream).deallocate(ptr, bytes, stream); + if (!get_arena(stream).deallocate(ptr, bytes, stream)) { + deallocate_from_other_arena(ptr, bytes, stream); + } + } + + /** + * @brief Deallocate memory pointed to by `ptr` that was allocated in a different arena. + * + * @param ptr Pointer to be deallocated. + * @param bytes The size in bytes of the allocation. This must be equal to the + * value of `bytes` that was passed to the `allocate` call that returned `ptr`. + * @param stream Stream on which to perform deallocation. + */ + void deallocate_from_other_arena(void* ptr, std::size_t bytes, cuda_stream_view stream) + { + stream.synchronize_no_throw(); + + read_lock lock(mtx_); + + if (use_per_thread_arena(stream)) { + auto const id = std::this_thread::get_id(); + for (auto& kv : thread_arenas_) { + // If the arena does not belong to the current thread, try to deallocate from it, and return + // if successful. + if (kv.first != id && kv.second->deallocate(ptr, bytes, stream)) { return; } + } + } else { + for (auto& kv : stream_arenas_) { + // If the arena does not belong to the current stream, try to deallocate from it, and return + // if successful. + if (stream.value() != kv.first && kv.second.deallocate(ptr, bytes, stream)) { return; } + } + } + + // The thread that originally allocated the block has terminated, deallocate directly in the + // global arena. + global_arena_.deallocate_from_other_arena(ptr, bytes); } /** diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 9cb691bd8..8966cb47f 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -38,82 +38,81 @@ namespace rmm::mr::detail::arena { -/// Minimum size of a superblock (256 KiB). -constexpr std::size_t minimum_superblock_size = 1U << 18U; - /** - * @brief Represents a chunk of memory that can be allocated and deallocated. - * - * A block bigger than a certain size is called a "superblock". + * @brief Represents a contiguous region of memory. */ -class block { +class memory_span { public: /** - * @brief Construct a default block. - */ - block() = default; - - /** - * @brief Construct a block given a pointer and size. - * - * @param pointer The address for the beginning of the block. - * @param size The size of the block. + * @brief Construct a default span. */ - block(char* pointer, std::size_t size) : pointer_(pointer), size_(size) {} + memory_span() = default; /** - * @brief Construct a block given a void pointer and size. + * @brief Construct a span given a pointer and size. * - * @param pointer The address for the beginning of the block. - * @param size The size of the block. + * @param pointer The address for the beginning of the span. + * @param size The size of the span. */ - block(void* pointer, std::size_t size) : pointer_(static_cast(pointer)), size_(size) {} + memory_span(void* pointer, std::size_t size) : pointer_{static_cast(pointer)}, size_{size} + { + } /// Returns the underlying pointer. - [[nodiscard]] void* pointer() const { return pointer_; } + [[nodiscard]] char* pointer() const { return pointer_; } - /// Returns the size of the block. + /// Returns the size of the span. [[nodiscard]] std::size_t size() const { return size_; } - /// Returns true if this block is valid (non-null), false otherwise. + /// Returns true if this span is valid (non-null), false otherwise. [[nodiscard]] bool is_valid() const { return pointer_ != nullptr; } - /// Returns true if this block is a superblock, false otherwise. - [[nodiscard]] bool is_superblock() const { return size_ >= minimum_superblock_size; } + /// Used by std::set to compare spans. + bool operator<(memory_span const& s) const { return pointer_ < s.pointer_; } + + private: + char* pointer_{}; ///< Raw memory pointer. + std::size_t size_{}; ///< Size in bytes. +}; + +/** + * @brief Represents a chunk of memory that can be allocated and deallocated. + */ +class block final : public memory_span { + public: + using memory_span::memory_span; + + /** + * @brief Is this block large enough to fit `sz` bytes? + * + * @param sz The size in bytes to check for fit. + * @return true if this block is at least `sz` bytes. + */ + [[nodiscard]] bool fits(std::size_t sz) const { return size() >= sz; } /** * @brief Verifies whether this block can be merged to the beginning of block b. * * @param b The block to check for contiguity. - * @return true Returns true if this block's `pointer` + `size` == `b.ptr`, and `not b.is_head`, - false otherwise. + * @return true Returns true if this block's `pointer` + `size` == `b.pointer`. */ - [[nodiscard]] bool is_contiguous_before(block const& blk) const + [[nodiscard]] bool is_contiguous_before(block const& b) const { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return pointer_ + size_ == blk.pointer_; + return pointer() + size() == b.pointer(); } - /** - * @brief Is this block large enough to fit `sz` bytes? - * - * @param size The size in bytes to check for fit. - * @return true if this block is at least `sz` bytes. - */ - [[nodiscard]] bool fits(std::size_t size) const { return size_ >= size; } - /** * @brief Split this block into two by the given size. * - * @param size The size in bytes of the first block. + * @param sz The size in bytes of the first block. * @return std::pair A pair of blocks split by sz. */ - [[nodiscard]] std::pair split(std::size_t size) const + [[nodiscard]] std::pair split(std::size_t sz) const { - RMM_LOGGING_ASSERT(size_ >= size); + RMM_LOGGING_ASSERT(size() >= sz); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; } - return {*this, {}}; + return {{pointer(), sz}, {pointer() + sz, size() - sz}}; } /** @@ -124,102 +123,198 @@ class block { * @param b block to merge. * @return block The merged block. */ - [[nodiscard]] block merge(block const& blk) const + [[nodiscard]] block merge(block const& b) const { - RMM_LOGGING_ASSERT(is_contiguous_before(blk)); - return {pointer_, size_ + blk.size_}; + RMM_LOGGING_ASSERT(is_contiguous_before(b)); + return {pointer(), size() + b.size()}; } +}; - /// Used by std::set to compare blocks. - bool operator<(block const& blk) const { return pointer_ < blk.pointer_; } - - private: - char* pointer_{}; ///< Raw memory pointer. - std::size_t size_{}; ///< Size in bytes. +/// Comparison function for block sizes. +struct block_size_compare { + bool operator()(block const& lhs, block const& rhs) const { return lhs.size() < rhs.size(); } }; -inline bool block_size_compare(block lhs, block rhs) { return lhs.size() < rhs.size(); } +/// Calculate the total size of a collection of blocks. +template +inline auto total_block_size(T const& blocks) +{ + return std::accumulate( + blocks.cbegin(), blocks.cend(), std::size_t{}, [](auto const& lhs, auto const& rhs) { + return lhs + rhs.size(); + }); +} /** - * @brief Get the first free block of at least `size` bytes. - * - * Address-ordered first-fit has shown to perform slightly better than best-fit when it comes to - * memory fragmentation, and slightly cheaper to implement. It is also used by some popular - * allocators such as jemalloc. - * - * \see Johnstone, M. S., & Wilson, P. R. (1998). The memory fragmentation problem: Solved?. ACM - * Sigplan Notices, 34(3), 26-36. - * - * @param free_blocks The address-ordered set of free blocks. - * @param size The number of bytes to allocate. - * @return block A block of memory of at least `size` bytes, or an empty block if not found. + * @brief Represents a large chunk of memory that is exchanged between the global arena and + * per-thread arenas. */ -inline block first_fit(std::set& free_blocks, std::size_t size) -{ - auto const iter = std::find_if( - free_blocks.cbegin(), free_blocks.cend(), [size](auto const& blk) { return blk.fits(size); }); +class superblock final : public memory_span { + public: + /// Minimum size of a superblock (4 MiB). + static constexpr std::size_t minimum_size{1U << 22U}; - if (iter == free_blocks.cend()) { return {}; } - // Remove the block from the free_list. - auto const blk = *iter; - auto const next = free_blocks.erase(iter); + /** + * @brief Construct a default superblock. + */ + superblock() = default; - if (blk.size() > size) { - // Split the block and put the remainder back. - auto const split = blk.split(size); - free_blocks.insert(next, split.second); - return split.first; + /** + * @brief Construct a superblock given a pointer and size. + * + * @param pointer The address for the beginning of the superblock. + * @param size The size of the superblock. + */ + superblock(void* pointer, std::size_t size) : memory_span{pointer, size} + { + free_blocks_.emplace(pointer, size); } - return blk; -} -/** - * @brief Coalesce the given block with other free blocks. - * - * @param free_blocks The address-ordered set of free blocks. - * @param b The block to coalesce. - * @return block The coalesced block. - */ -inline block coalesce_block(std::set& free_blocks, block const& blk) -{ - if (!blk.is_valid()) { return blk; } - - // Find the right place (in ascending address order) to insert the block. - auto const next = free_blocks.lower_bound(blk); - auto const previous = next == free_blocks.cbegin() ? next : std::prev(next); - - // Coalesce with neighboring blocks. - bool const merge_prev = previous->is_contiguous_before(blk); - bool const merge_next = next != free_blocks.cend() && blk.is_contiguous_before(*next); - - block merged{}; - if (merge_prev && merge_next) { - merged = previous->merge(blk).merge(*next); - free_blocks.erase(previous); - auto const iter = free_blocks.erase(next); - free_blocks.insert(iter, merged); - } else if (merge_prev) { - merged = previous->merge(blk); - auto const iter = free_blocks.erase(previous); - free_blocks.insert(iter, merged); - } else if (merge_next) { - merged = blk.merge(*next); - auto const iter = free_blocks.erase(next); - free_blocks.insert(iter, merged); - } else { - free_blocks.emplace(blk); - merged = blk; - } - return merged; -} + // Disable copy semantics. + superblock(superblock const&) = delete; + superblock& operator=(superblock const&) = delete; + // Allow move semantics. + superblock(superblock&& s) noexcept = default; + superblock& operator=(superblock&&) noexcept = default; -template -inline auto total_block_size(T const& blocks) -{ - return std::accumulate(blocks.cbegin(), blocks.cend(), std::size_t{}, [](auto lhs, auto rhs) { - return lhs + rhs.size(); - }); -} + ~superblock() = default; + + /** + * @brief Is this superblock empty? + * + * @return true if this superblock is empty. + */ + [[nodiscard]] bool empty() const + { + return free_blocks_.size() == 1 && free_blocks_.cbegin()->size() == size(); + } + + /** + * @brief Whether this superblock contains the given block. + * + * @param b The block to search for. + * @return true if the given block belongs to this superblock. + */ + [[nodiscard]] bool contains(block const& b) const + { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return pointer() <= b.pointer() && pointer() + size() >= b.pointer() + b.size(); + } + + /** + * @brief Can this superblock fit `sz` bytes? + * + * @param sz The size in bytes to check for fit. + * @return true if this superblock can fit `sz` bytes. + */ + [[nodiscard]] bool fits(std::size_t sz) const + { + return std::any_of( + free_blocks_.cbegin(), free_blocks_.cend(), [sz](auto const& b) { return b.fits(sz); }); + } + + /** + * @brief Verifies whether this superblock can be merged to the beginning of superblock s. + * + * @param s The superblock to check for contiguity. + * @return true Returns true if both superblocks are empty and this superblock's + * `pointer` + `size` == `s.ptr`. + */ + [[nodiscard]] bool is_contiguous_before(superblock const& s) const + { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return empty() && s.empty() && pointer() + size() == s.pointer(); + } + + /** + * @brief Split this superblock into two by the given size. + * + * @param sz The size in bytes of the first block. + * @return superblock_pair A pair of superblocks split by sz. + */ + [[nodiscard]] std::pair split(std::size_t sz) const + { + RMM_LOGGING_ASSERT(empty() && sz >= minimum_size && size() - sz >= minimum_size); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return {superblock{pointer(), sz}, superblock{pointer() + sz, size() - sz}}; + } + + /** + * @brief Coalesce two contiguous superblocks into one. + * + * `this->is_contiguous_before(s)` must be true. + * + * @param s superblock to merge. + * @return block The merged block. + */ + [[nodiscard]] superblock merge(superblock const& s) const + { + RMM_LOGGING_ASSERT(is_contiguous_before(s)); + return {pointer(), size() + s.size()}; + } + + /** + * @brief Get the first free block of at least `size` bytes. + * + * @param size The number of bytes to allocate. + * @return block A block of memory of at least `size` bytes, or an empty block if not found. + */ + block first_fit(std::size_t size) const + { + auto const iter = std::find_if( + free_blocks_.cbegin(), free_blocks_.cend(), [size](auto const& b) { return b.fits(size); }); + if (iter == free_blocks_.cend()) { return {}; } + + // Remove the block from the free list. + auto const b = *iter; + auto const next = free_blocks_.erase(iter); + + if (b.size() > size) { + // Split the block and put the remainder back. + auto const split = b.split(size); + free_blocks_.insert(next, split.second); + return split.first; + } + return b; + } + + /** + * @brief Coalesce the given block with other free blocks. + * + * @param b The block to coalesce. + */ + void coalesce(block const& b) const + { + // Find the right place (in ascending address order) to insert the block. + auto const next = free_blocks_.lower_bound(b); + auto const previous = next == free_blocks_.cbegin() ? next : std::prev(next); + + // Coalesce with neighboring blocks. + bool const merge_prev = previous->is_contiguous_before(b); + bool const merge_next = next != free_blocks_.cend() && b.is_contiguous_before(*next); + + if (merge_prev && merge_next) { + auto const merged = previous->merge(b).merge(*next); + free_blocks_.erase(previous); + auto const iter = free_blocks_.erase(next); + free_blocks_.insert(iter, merged); + } else if (merge_prev) { + auto const merged = previous->merge(b); + auto const iter = free_blocks_.erase(previous); + free_blocks_.insert(iter, merged); + } else if (merge_next) { + auto const merged = b.merge(*next); + auto const iter = free_blocks_.erase(next); + free_blocks_.insert(iter, merged); + } else { + free_blocks_.insert(next, b); + } + } + + private: + /// Address-ordered set of free blocks. + mutable std::set free_blocks_{}; +}; /** * @brief The global arena for allocating memory from the upstream memory resource. @@ -232,48 +327,21 @@ inline auto total_block_size(T const& blocks) template class global_arena final { public: - /// Reserved memory that should not be allocated (64 MiB). - static constexpr std::size_t reserved_size = 1U << 26U; - /** * @brief Construct a global arena. * * @throws rmm::logic_error if `upstream_mr == nullptr`. - * @throws rmm::logic_error if `initial_size` is neither the default nor aligned to a multiple of - * 256 bytes. - * @throws rmm::logic_error if `maximum_size` is neither the default nor aligned to a multiple of - * 256 bytes. * * @param upstream_mr The memory resource from which to allocate blocks for the pool - * @param initial_size Minimum size, in bytes, of the initial global arena. Defaults to half of - * the available memory on the current device. - * @param maximum_size Maximum size, in bytes, that the global arena can grow to. Defaults to all - * of the available memory on the current device. + * @param arena_size Size in bytes of the global arena. Defaults to all the available memory on + * the current device. */ - global_arena(Upstream* upstream_mr, - std::optional initial_size, - std::optional maximum_size) - : upstream_mr_{upstream_mr}, maximum_size_{maximum_size.value_or(0)} + global_arena(Upstream* upstream_mr, std::optional arena_size) + : upstream_mr_{upstream_mr} { RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer."); - RMM_EXPECTS(!initial_size || rmm::detail::is_cuda_aligned(initial_size.value()), - "Error, Initial arena size required to be a multiple of 256 bytes"); - RMM_EXPECTS(!maximum_size || rmm::detail::is_cuda_aligned(maximum_size.value()), - "Error, Maximum arena size required to be a multiple of 256 bytes"); - - auto init = initial_size.value_or(0); - if (!initial_size || !maximum_size) { - auto const [free, total] = rmm::detail::available_device_memory(); - if (!initial_size) { - init = rmm::detail::align_down_cuda(free) - reserved_size; - } - if (!maximum_size) { - maximum_size_ = rmm::detail::align_down_cuda(free) - reserved_size; - } - } - RMM_EXPECTS(init <= maximum_size_, "Initial arena size exceeds the maximum pool size!"); - - free_blocks_.emplace(expand_arena(init)); + auto const size = rmm::detail::align_down_cuda(arena_size.value_or(default_size())); + initialize(size); } // Disable copy (and move) semantics. @@ -289,136 +357,237 @@ class global_arena final { ~global_arena() { lock_guard lock(mtx_); - for (auto const& blk : upstream_blocks_) { - upstream_mr_->deallocate(blk.pointer(), blk.size()); - } + upstream_mr_->deallocate(upstream_block_.pointer(), upstream_block_.size()); } /** - * @brief Allocates memory of size at least `bytes`. - * - * @throws `std::bad_alloc` if the requested allocation could not be fulfilled. + * @brief Acquire a superblock that can fit a block of the given size. * - * @param bytes The size in bytes of the allocation. - * @return void* Pointer to the newly allocated memory. + * @param size The size in bytes of the allocation. + * @return superblock The acquired superblock. */ - block allocate(std::size_t bytes) + superblock acquire(std::size_t size) { lock_guard lock(mtx_); - return get_block(bytes); + return first_fit(size); } /** - * @brief Deallocate memory pointed to by `blk`. + * @brief Release a superblock. * - * @param blk Block to be deallocated. + * @param s Superblock to be released. */ - void deallocate(block const& blk) + void release(superblock&& s) { lock_guard lock(mtx_); - coalesce_block(free_blocks_, blk); + coalesce(std::move(s)); } /** - * @brief Deallocate a set of free blocks from a dying arena. + * @brief Release a set of superblocks from a dying arena. * - * @param free_blocks The set of free blocks. + * @param superblocks The set of superblocks. */ - void deallocate(std::set const& free_blocks) + void release(std::set& superblocks) { lock_guard lock(mtx_); - for (auto const& blk : free_blocks) { - coalesce_block(free_blocks_, blk); + auto iter = superblocks.cbegin(); + while (iter != superblocks.cend()) { + auto s = std::move(superblocks.extract(iter).value()); + coalesce(std::move(s)); + ++iter; } } /** - * @brief Dump memory to log. + * @brief Allocate a large block directly. * - * @param logger the spdlog logger to use + * @param size The size in bytes of the allocation. + * @return void* Pointer to the newly allocated memory. */ - void dump_memory_log(std::shared_ptr const& logger) const + void* allocate(std::size_t size) { - lock_guard lock(mtx_); + if (handles(size)) { + lock_guard lock(mtx_); + return first_fit(size).pointer(); + } + return nullptr; + } - logger->info(" Maximum size: {}", rmm::detail::bytes{maximum_size_}); - logger->info(" Current size: {}", rmm::detail::bytes{current_size_}); + /** + * @brief Deallocate memory pointed to by `ptr` directly. + * + * @param ptr Pointer to be deallocated. + * @param size The size in bytes of the allocation. This must be equal to the value of `size` + * that was passed to the `allocate` call that returned `p`. + * @param stream Stream on which to perform deallocation. + * @return bool true if the allocation is found, false otherwise. + */ + bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) + { + if (handles(size)) { + stream.synchronize_no_throw(); - logger->info(" # free blocks: {}", free_blocks_.size()); - if (!free_blocks_.empty()) { - logger->info(" Total size of free blocks: {}", - rmm::detail::bytes{total_block_size(free_blocks_)}); - auto const largest_free = - *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); - logger->info(" Size of largest free block: {}", rmm::detail::bytes{largest_free.size()}); + lock_guard lock(mtx_); + superblock s{ptr, size}; + coalesce(std::move(s)); + return true; } + return false; + } + + /** + * @brief Deallocate memory pointed to by `ptr` that was allocated in a per-thread arena. + * + * @param ptr Pointer to be deallocated. + * @param bytes The size in bytes of the allocation. This must be equal to the + * value of `bytes` that was passed to the `allocate` call that returned `ptr`. + * @param stream Stream on which to perform deallocation. + */ + void deallocate_from_other_arena(void* ptr, std::size_t bytes) + { + lock_guard lock(mtx_); - logger->info(" # upstream blocks={}", upstream_blocks_.size()); - logger->info(" Total size of upstream blocks: {}", - rmm::detail::bytes{total_block_size(upstream_blocks_)}); + block const b{ptr, bytes}; + auto const iter = std::find_if( + superblocks_.cbegin(), superblocks_.cend(), [b](auto const& s) { return s.contains(b); }); + if (iter == superblocks_.cend()) { RMM_FAIL("allocation not found"); } + iter->coalesce(b); + } + + /** + * @brief Dump memory to log. + * + * @param logger the spdlog logger to use + */ + void dump_memory_log(std::shared_ptr const& logger) const + { + // lock_guard lock(mtx_); + // + // logger->info(" Maximum size: {}", rmm::detail::bytes{maximum_size_}); + // logger->info(" Current size: {}", rmm::detail::bytes{current_size_}); + // + // logger->info(" # free blocks: {}", free_blocks_.size()); + // if (!free_blocks_.empty()) { + // logger->info(" Total size of free blocks: {}", + // rmm::detail::bytes{total_block_size(free_blocks_)}); + // auto const largest_free = + // *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); + // logger->info(" Size of largest free block: {}", + // rmm::detail::bytes{largest_free.size()}); + // } + // + // logger->info(" # upstream blocks={}", upstream_blocks_.size()); + // logger->info(" Total size of upstream blocks: {}", + // rmm::detail::bytes{total_block_size(upstream_blocks_)}); } private: using lock_guard = std::lock_guard; + /// Reserved memory that should not be allocated (64 MiB). + static constexpr std::size_t reserved_size = 1U << 26U; + /** - * @brief Get an available memory block of at least `size` bytes. - * - * @param size The number of bytes to allocate. - * @return block A block of memory of at least `size` bytes. + * @brief Default size of the global arena if unspecified. + * @return the default global arena size. */ - block get_block(std::size_t size) + constexpr std::size_t default_size() const { - // Find the first-fit free block. - auto const blk = first_fit(free_blocks_, size); - if (blk.is_valid()) { return blk; } + auto const [free, total] = rmm::detail::available_device_memory(); + return free - reserved_size; + } - // No existing larger blocks available, so grow the arena. - auto const upstream_block = expand_arena(size_to_grow(size)); - coalesce_block(free_blocks_, upstream_block); - return first_fit(free_blocks_, size); + /** + * @brief Allocate space from upstream to initialize the arena. + * + * @param size The size to allocate. + */ + void initialize(std::size_t size) + { + RMM_LOGGING_ASSERT(size >= superblock::minimum_size); + upstream_block_ = {upstream_mr_->allocate(size), size}; + superblocks_.emplace(upstream_block_.pointer(), size); } /** - * @brief Get the size to grow the global arena given the requested `size` bytes. + * @brief Should allocation of `size` bytes be handled by the global arena directly? + * + * @param size The size in bytes of the allocation. + * @return bool True if the allocation should be handled by the global arena. + */ + bool handles(std::size_t size) { return size > superblock::minimum_size / 2; } + + /** + * @brief Get the first superblock that can fit a block of at least `size` bytes. + * + * Address-ordered first-fit has shown to perform slightly better than best-fit when it comes to + * memory fragmentation, and slightly cheaper to implement. It is also used by some popular + * allocators such as jemalloc. * - * This simply grows the global arena to the maximum size. + * \see Johnstone, M. S., & Wilson, P. R. (1998). The memory fragmentation problem: Solved?. ACM + * Sigplan Notices, 34(3), 26-36. * - * @param size The number of bytes required. - * @return size The size for the arena to grow, or 0 if no more memory. + * @param size The number of bytes to allocate. + * @return superblock A superblock that can fit at least `size` bytes, or empty if not found. */ - constexpr std::size_t size_to_grow(std::size_t size) const + superblock first_fit(std::size_t size) { - if (current_size_ + size > maximum_size_) { return 0; } - return maximum_size_ - current_size_; + auto const iter = std::find_if( + superblocks_.cbegin(), superblocks_.cend(), [size](auto const& s) { return s.fits(size); }); + if (iter == superblocks_.cend()) { return {}; } + + auto node_handle = superblocks_.extract(iter); + auto s = std::move(node_handle.value()); + auto const sz = std::max(size, superblock::minimum_size); + if (s.empty() && s.size() - sz >= superblock::minimum_size) { + // Split the superblock and put the remainder back. + auto [head, tail] = s.split(sz); + superblocks_.insert(std::move(tail)); + return std::move(head); + } + return s; } /** - * @brief Allocate space from upstream to supply the arena and return a sufficiently sized block. + * @brief Coalesce the given superblock with other empty superblocks. * - * @param size The minimum size to allocate. - * @return block A block of at least `size` bytes. + * @param s The superblock to coalesce. */ - block expand_arena(std::size_t size) + void coalesce(superblock&& s) { - if (size > 0) { - upstream_blocks_.push_back({upstream_mr_->allocate(size), size}); - current_size_ += size; - return upstream_blocks_.back(); + // Find the right place (in ascending address order) to insert the block. + auto const next = superblocks_.lower_bound(s); + auto const previous = next == superblocks_.cbegin() ? next : std::prev(next); + + // Coalesce with neighboring blocks. + bool const merge_prev = previous->is_contiguous_before(s); + bool const merge_next = next != superblocks_.cend() && s.is_contiguous_before(*next); + + if (merge_prev && merge_next) { + auto p = std::move(superblocks_.extract(previous).value()); + auto n = std::move(superblocks_.extract(next).value()); + auto merged = p.merge(std::move(s)).merge(std::move(n)); + superblocks_.insert(std::move(merged)); + } else if (merge_prev) { + auto p = std::move(superblocks_.extract(previous).value()); + auto merged = p.merge(std::move(s)); + superblocks_.insert(std::move(merged)); + } else if (merge_next) { + auto n = std::move(superblocks_.extract(next).value()); + auto merged = s.merge(std::move(n)); + superblocks_.insert(std::move(merged)); + } else { + superblocks_.insert(std::move(s)); } - return {}; } /// The upstream resource to allocate memory from. Upstream* upstream_mr_; - /// The maximum size the global arena can grow to. - std::size_t maximum_size_; - /// The current size of the global arena. - std::size_t current_size_{}; - /// Address-ordered set of free blocks. - std::set free_blocks_; - /// Blocks allocated from upstream so that they can be quickly freed. - std::vector upstream_blocks_; + /// Block allocated from upstream so that it can be quickly freed. + block upstream_block_; + /// Address-ordered set of superblocks. + std::set superblocks_; /// Mutex for exclusive lock. mutable std::mutex mtx_; }; @@ -427,7 +596,7 @@ class global_arena final { * @brief An arena for allocating memory for a thread. * * An arena is a per-thread or per-non-default-stream memory pool. It allocates - * superblocks from the global arena, and return them when the superblocks become empty. + * superblocks from the global arena, and returns them when the superblocks become empty. * * @tparam Upstream Memory resource to use for allocating the global arena. Implements * rmm::mr::device_memory_resource interface. @@ -442,43 +611,44 @@ class arena { */ explicit arena(global_arena& global_arena) : global_arena_{global_arena} {} - ~arena() = default; - // Disable copy (and move) semantics. arena(arena const&) = delete; arena& operator=(arena const&) = delete; arena(arena&&) noexcept = delete; arena& operator=(arena&&) noexcept = delete; + ~arena() = default; + /** - * @brief Allocates memory of size at least `bytes`. - * - * @throws `std::bad_alloc` if the requested allocation could not be fulfilled. + * @brief Allocates memory of size at least `size` bytes. * - * @param bytes The size in bytes of the allocation. + * @param size The size in bytes of the allocation. * @return void* Pointer to the newly allocated memory. */ - void* allocate(std::size_t bytes) + void* allocate(std::size_t size) { + auto* ptr = global_arena_.allocate(size); + if (ptr != nullptr) { return ptr; } + lock_guard lock(mtx_); - auto const blk = get_block(bytes); - return blk.pointer(); + return get_block(size).pointer(); } /** * @brief Deallocate memory pointed to by `ptr`, and possibly return superblocks to upstream. * * @param ptr Pointer to be deallocated. - * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * @param size The size in bytes of the allocation. This must be equal to the value of `size` * that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. + * @return bool true if the allocation is found, false otherwise. */ - void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) + bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) { + if (global_arena_.deallocate(ptr, size, stream)) { return true; } + lock_guard lock(mtx_); - block const blk{ptr, bytes}; - auto const merged = coalesce_block(free_blocks_, blk); - shrink_arena(merged, stream); + return deallocate_from_superblock({ptr, size}); } /** @@ -487,8 +657,7 @@ class arena { void clean() { lock_guard lock(mtx_); - global_arena_.deallocate(free_blocks_); - free_blocks_.clear(); + global_arena_.release(superblocks_); } /** @@ -498,21 +667,20 @@ class arena { */ void dump_memory_log(std::shared_ptr const& logger) const { - lock_guard lock(mtx_); - logger->info(" # free blocks: {}", free_blocks_.size()); - if (!free_blocks_.empty()) { - logger->info(" Total size of free blocks: {}", - rmm::detail::bytes{total_block_size(free_blocks_)}); - auto const largest_free = - *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); - logger->info(" Size of largest free block: {}", rmm::detail::bytes{largest_free.size()}); - } + // lock_guard lock(mtx_); + // logger->info(" # free blocks: {}", free_blocks_.size()); + // if (!free_blocks_.empty()) { + // logger->info(" Total size of free blocks: {}", + // rmm::detail::bytes{total_block_size(free_blocks_)}); + // auto const largest_free = + // *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); + // logger->info(" Size of largest free block: {}", + // rmm::detail::bytes{largest_free.size()}); + // } } private: using lock_guard = std::lock_guard; - /// Maximum number of free blocks to keep. - static constexpr int max_free_blocks = 16; /** * @brief Get an available memory block of at least `size` bytes. @@ -522,51 +690,75 @@ class arena { */ block get_block(std::size_t size) { - if (size < minimum_superblock_size) { - // Find the first-fit free block. - auto const blk = first_fit(free_blocks_, size); - if (blk.is_valid()) { return blk; } - } + // Find the first-fit free block. + auto const b = first_fit(size); + if (b.is_valid()) { return b; } // No existing larger blocks available, so grow the arena and obtain a superblock. - auto const superblock = expand_arena(size); - if (superblock.is_valid()) { - coalesce_block(free_blocks_, superblock); - return first_fit(free_blocks_, size); + return expand_arena(size); + } + + /** + * @brief Get the first free block of at least `size` bytes. + * + * Address-ordered first-fit has shown to perform slightly better than best-fit when it comes to + * memory fragmentation, and slightly cheaper to implement. It is also used by some popular + * allocators such as jemalloc. + * + * \see Johnstone, M. S., & Wilson, P. R. (1998). The memory fragmentation problem: Solved?. ACM + * Sigplan Notices, 34(3), 26-36. + * + * @param size The number of bytes to allocate. + * @return block A block of memory of at least `size` bytes, or an empty block if not found. + */ + block first_fit(std::size_t size) + { + for (auto const& s : superblocks_) { + auto const b = s.first_fit(size); + if (b.is_valid()) { return b; } } - return superblock; + return {}; } /** - * @brief Allocate space from upstream to supply the arena and return a superblock. + * @brief Deallocate a block from the superblock it belongs to. * - * @return A superblock. + * @param b The block to deallocate. + * @return true if the block is found. */ - block expand_arena(std::size_t size) + bool deallocate_from_superblock(block b) { - auto const superblock_size = std::max(size, minimum_superblock_size); - return global_arena_.allocate(superblock_size); + auto const iter = std::find_if( + superblocks_.begin(), superblocks_.end(), [b](auto& s) { return s.contains(b); }); + if (iter == superblocks_.end()) { return false; } + + auto const& s = *iter; + s.coalesce(b); + if (s.empty()) { global_arena_.release(std::move(superblocks_.extract(iter).value())); } + return true; } /** - * @brief Shrink this arena by returning free superblocks to upstream. + * @brief Allocate space from upstream to supply the arena and return a block. * - * @param blk The block that can be used to shrink the arena. - * @param stream Stream on which to perform shrinking. + * @param size The number of bytes to allocate. + * @return block A block of memory of at least `size` bytes. */ - void shrink_arena(block const& blk, cuda_stream_view stream) + block expand_arena(std::size_t size) { - if (blk.is_superblock() || free_blocks_.size() > max_free_blocks) { - stream.synchronize_no_throw(); - global_arena_.deallocate(blk); - free_blocks_.erase(blk); + auto s = global_arena_.acquire(size); + if (s.is_valid()) { + auto const b = s.first_fit(size); + superblocks_.insert(std::move(s)); + return b; } + return {}; } /// The global arena to allocate superblocks from. global_arena& global_arena_; - /// Free blocks. - std::set free_blocks_; + /// Acquired superblocks. + std::set superblocks_; /// Mutex for exclusive lock. mutable std::mutex mtx_; }; diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index e6609dda0..17b001671 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -32,29 +32,6 @@ TEST(ArenaTest, NullUpstream) EXPECT_THROW([]() { arena_mr mr{nullptr}; }(), rmm::logic_error); } -TEST(ArenaTest, UnalignedInitialSize) -{ - EXPECT_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource(), 255); }(), - rmm::logic_error); -} - -TEST(ArenaTest, UnalignedMaximumSize) -{ - EXPECT_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource(), 256, 257); }(), - rmm::logic_error); -} - -TEST(ArenaTest, MaxLessThanInitial) -{ - EXPECT_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource(), 512, 256); }(), - rmm::logic_error); -} - -TEST(ArenaTest, MaxEqualToInitial) -{ - EXPECT_NO_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource(), 512, 512); }()); -} - TEST(ArenaTest, AllocateNinetyPercent) { EXPECT_NO_THROW([]() { From 5da4b59c6e84fd2608a22012c67e16a07b35f56c Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 9 Nov 2021 12:23:21 -0800 Subject: [PATCH 03/35] fix segfault --- .../rmm/mr/device/arena_memory_resource.hpp | 18 ++-- include/rmm/mr/device/detail/arena.hpp | 100 ++++++++---------- 2 files changed, 55 insertions(+), 63 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index bfd4993dd..b099a4c2a 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -78,7 +78,7 @@ class arena_memory_resource final : public device_memory_resource { * * @throws rmm::logic_error if `upstream_mr == nullptr`. * - * @param upstream_mr The memory resource from which to allocate blocks for the pool + * @param upstream_mr The memory resource from which to allocate blocks for the pool. * @param arena_size Size in bytes of the global arena. Defaults to all the available memory on * the current device. */ @@ -118,8 +118,8 @@ class arena_memory_resource final : public device_memory_resource { private: using global_arena = rmm::mr::detail::arena::global_arena; using arena = rmm::mr::detail::arena::arena; - using read_lock = std::shared_lock; - using write_lock = std::lock_guard; + using read_lock = std::shared_lock; + using write_lock = std::unique_lock; /** * @brief Allocates memory of size at least `bytes`. @@ -183,17 +183,17 @@ class arena_memory_resource final : public device_memory_resource { { stream.synchronize_no_throw(); - read_lock lock(mtx_); + write_lock lock(mtx_); if (use_per_thread_arena(stream)) { auto const id = std::this_thread::get_id(); - for (auto& kv : thread_arenas_) { + for (auto&& kv : thread_arenas_) { // If the arena does not belong to the current thread, try to deallocate from it, and return // if successful. if (kv.first != id && kv.second->deallocate(ptr, bytes, stream)) { return; } } } else { - for (auto& kv : stream_arenas_) { + for (auto&& kv : stream_arenas_) { // If the arena does not belong to the current stream, try to deallocate from it, and return // if successful. if (stream.value() != kv.first && kv.second.deallocate(ptr, bytes, stream)) { return; } @@ -211,10 +211,10 @@ class arena_memory_resource final : public device_memory_resource { void defragment() { RMM_CUDA_TRY(cudaDeviceSynchronize()); - for (auto& thread_arena : thread_arenas_) { + for (auto&& thread_arena : thread_arenas_) { thread_arena.second->clean(); } - for (auto& stream_arena : stream_arenas_) { + for (auto&& stream_arena : stream_arenas_) { stream_arena.second.clean(); } } @@ -334,7 +334,7 @@ class arena_memory_resource final : public device_memory_resource { /// The logger for memory dump. std::shared_ptr logger_{}; /// Mutex for read and write locks. - mutable std::shared_timed_mutex mtx_; + mutable std::shared_mutex mtx_; }; } // namespace rmm::mr diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 847b9dd77..515d77c16 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -174,7 +174,7 @@ class superblock final : public memory_span { superblock(superblock const&) = delete; superblock& operator=(superblock const&) = delete; // Allow move semantics. - superblock(superblock&& s) noexcept = default; + superblock(superblock&& sb) noexcept = default; superblock& operator=(superblock&&) noexcept = default; ~superblock() = default; @@ -220,10 +220,10 @@ class superblock final : public memory_span { * @return true Returns true if both superblocks are empty and this superblock's * `pointer` + `size` == `s.ptr`. */ - [[nodiscard]] bool is_contiguous_before(superblock const& s) const + [[nodiscard]] bool is_contiguous_before(superblock const& sb) const { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return empty() && s.empty() && pointer() + size() == s.pointer(); + return empty() && sb.empty() && pointer() + size() == sb.pointer(); } /** @@ -247,10 +247,10 @@ class superblock final : public memory_span { * @param s superblock to merge. * @return block The merged block. */ - [[nodiscard]] superblock merge(superblock const& s) const + [[nodiscard]] superblock merge(superblock const& sb) const { - RMM_LOGGING_ASSERT(is_contiguous_before(s)); - return {pointer(), size() + s.size()}; + RMM_LOGGING_ASSERT(is_contiguous_before(sb)); + return {pointer(), size() + sb.size()}; } /** @@ -333,8 +333,8 @@ class global_arena final { * @throws rmm::logic_error if `upstream_mr == nullptr`. * * @param upstream_mr The memory resource from which to allocate blocks for the pool - * @param arena_size Size in bytes of the global arena. Defaults to all the available memory on - * the current device. + * @param arena_size Size in bytes of the global arena. Defaults to half of the available memory + * on the current device. */ global_arena(Upstream* upstream_mr, std::optional arena_size) : upstream_mr_{upstream_mr} @@ -377,10 +377,10 @@ class global_arena final { * * @param s Superblock to be released. */ - void release(superblock&& s) + void release(superblock&& sb) { lock_guard lock(mtx_); - coalesce(std::move(s)); + coalesce(std::move(sb)); } /** @@ -391,11 +391,8 @@ class global_arena final { void release(std::set& superblocks) { lock_guard lock(mtx_); - auto iter = superblocks.cbegin(); - while (iter != superblocks.cend()) { - auto s = std::move(superblocks.extract(iter).value()); - coalesce(std::move(s)); - ++iter; + while (!superblocks.empty()) { + coalesce(std::move(superblocks.extract(superblocks.cbegin()).value())); } } @@ -429,8 +426,7 @@ class global_arena final { stream.synchronize_no_throw(); lock_guard lock(mtx_); - superblock s{ptr, size}; - coalesce(std::move(s)); + coalesce({ptr, size}); return true; } return false; @@ -450,7 +446,7 @@ class global_arena final { block const b{ptr, bytes}; auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [b](auto const& s) { return s.contains(b); }); + superblocks_.cbegin(), superblocks_.cend(), [&](auto const& sb) { return sb.contains(b); }); if (iter == superblocks_.cend()) { RMM_FAIL("allocation not found"); } iter->coalesce(b); } @@ -485,9 +481,6 @@ class global_arena final { private: using lock_guard = std::lock_guard; - /// Reserved memory that should not be allocated (64 MiB). - static constexpr std::size_t reserved_size = 1U << 26U; - /** * @brief Default size of the global arena if unspecified. * @return the default global arena size. @@ -495,7 +488,7 @@ class global_arena final { constexpr std::size_t default_size() const { auto const [free, total] = rmm::detail::available_device_memory(); - return free - reserved_size; + return free / 2; } /** @@ -534,51 +527,50 @@ class global_arena final { superblock first_fit(std::size_t size) { auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [size](auto const& s) { return s.fits(size); }); + superblocks_.cbegin(), superblocks_.cend(), [size](auto const& sb) { return sb.fits(size); }); if (iter == superblocks_.cend()) { return {}; } - auto node_handle = superblocks_.extract(iter); - auto s = std::move(node_handle.value()); - auto const sz = std::max(size, superblock::minimum_size); - if (s.empty() && s.size() - sz >= superblock::minimum_size) { + auto sb = std::move(superblocks_.extract(iter).value()); + auto const sz = std::max(size, superblock::minimum_size); + if (sb.empty() && sb.size() - sz >= superblock::minimum_size) { // Split the superblock and put the remainder back. - auto [head, tail] = s.split(sz); + auto [head, tail] = sb.split(sz); superblocks_.insert(std::move(tail)); return std::move(head); } - return s; + return sb; } /** * @brief Coalesce the given superblock with other empty superblocks. * - * @param s The superblock to coalesce. + * @param sb The superblock to coalesce. */ - void coalesce(superblock&& s) + void coalesce(superblock&& sb) { // Find the right place (in ascending address order) to insert the block. - auto const next = superblocks_.lower_bound(s); + auto const next = superblocks_.lower_bound(sb); auto const previous = next == superblocks_.cbegin() ? next : std::prev(next); // Coalesce with neighboring blocks. - bool const merge_prev = previous->is_contiguous_before(s); - bool const merge_next = next != superblocks_.cend() && s.is_contiguous_before(*next); + bool const merge_prev = previous->is_contiguous_before(sb); + bool const merge_next = next != superblocks_.cend() && sb.is_contiguous_before(*next); if (merge_prev && merge_next) { - auto p = std::move(superblocks_.extract(previous).value()); - auto n = std::move(superblocks_.extract(next).value()); - auto merged = p.merge(std::move(s)).merge(std::move(n)); - superblocks_.insert(std::move(merged)); + auto prev_sb = std::move(superblocks_.extract(previous).value()); + auto next_sb = std::move(superblocks_.extract(next).value()); + auto merged = prev_sb.merge(sb).merge(next_sb); + superblocks_.emplace(std::move(merged)); } else if (merge_prev) { - auto p = std::move(superblocks_.extract(previous).value()); - auto merged = p.merge(std::move(s)); - superblocks_.insert(std::move(merged)); + auto prev_sb = std::move(superblocks_.extract(previous).value()); + auto merged = prev_sb.merge(sb); + superblocks_.emplace(std::move(merged)); } else if (merge_next) { - auto n = std::move(superblocks_.extract(next).value()); - auto merged = s.merge(std::move(n)); - superblocks_.insert(std::move(merged)); + auto next_sb = std::move(superblocks_.extract(next).value()); + auto merged = sb.merge(next_sb); + superblocks_.emplace(std::move(merged)); } else { - superblocks_.insert(std::move(s)); + superblocks_.emplace(std::move(sb)); } } @@ -726,15 +718,15 @@ class arena { * @param b The block to deallocate. * @return true if the block is found. */ - bool deallocate_from_superblock(block b) + bool deallocate_from_superblock(block const& b) { auto const iter = std::find_if( - superblocks_.begin(), superblocks_.end(), [b](auto& s) { return s.contains(b); }); + superblocks_.begin(), superblocks_.end(), [&](auto const& sb) { return sb.contains(b); }); if (iter == superblocks_.end()) { return false; } - auto const& s = *iter; - s.coalesce(b); - if (s.empty()) { global_arena_.release(std::move(superblocks_.extract(iter).value())); } + auto const& sb = *iter; + sb.coalesce(b); + if (sb.empty()) { global_arena_.release(std::move(superblocks_.extract(iter).value())); } return true; } @@ -746,10 +738,10 @@ class arena { */ block expand_arena(std::size_t size) { - auto s = global_arena_.acquire(size); - if (s.is_valid()) { - auto const b = s.first_fit(size); - superblocks_.insert(std::move(s)); + auto sb = global_arena_.acquire(size); + if (sb.is_valid()) { + auto const b = sb.first_fit(size); + superblocks_.emplace(std::move(sb)); return b; } return {}; From 10ed42c666dcbfd4c5dc8165cd4a2ecdf9105ba9 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 9 Nov 2021 13:44:43 -0800 Subject: [PATCH 04/35] add back memory dump --- .../rmm/mr/device/arena_memory_resource.hpp | 45 +++++----- include/rmm/mr/device/detail/arena.hpp | 88 +++++++++++-------- 2 files changed, 71 insertions(+), 62 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index b099a4c2a..6bbbbecb5 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -92,6 +92,24 @@ class arena_memory_resource final : public device_memory_resource { } } + /** + * @brief Construct an `arena_memory_resource`. + * + * @throws rmm::logic_error if `upstream_mr == nullptr`. + * + * @param upstream_mr The memory resource from which to allocate blocks for the pool. + * @param arena_size Size in bytes of the global arena. Defaults to all the available memory on + * the current device. + * @param max_size Unused. + * @deprecated Use the version without the max size. + */ + arena_memory_resource(Upstream* upstream_mr, + std::optional arena_size, + std::optional max_size) + : arena_memory_resource{upstream_mr, arena_size, false} + { + } + ~arena_memory_resource() override = default; // Disable copy (and move) semantics. @@ -141,13 +159,8 @@ class arena_memory_resource final : public device_memory_resource { void* pointer = arena.allocate(bytes); if (pointer == nullptr) { - write_lock lock(mtx_); - defragment(); - pointer = arena.allocate(bytes); - if (pointer == nullptr) { - if (dump_log_on_failure_) { dump_memory_log(bytes); } - RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); - } + if (dump_log_on_failure_) { dump_memory_log(bytes); } + RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); } return pointer; @@ -183,7 +196,7 @@ class arena_memory_resource final : public device_memory_resource { { stream.synchronize_no_throw(); - write_lock lock(mtx_); + read_lock lock(mtx_); if (use_per_thread_arena(stream)) { auto const id = std::this_thread::get_id(); @@ -205,20 +218,6 @@ class arena_memory_resource final : public device_memory_resource { global_arena_.deallocate_from_other_arena(ptr, bytes); } - /** - * @brief Defragment memory by returning all free blocks to the global arena. - */ - void defragment() - { - RMM_CUDA_TRY(cudaDeviceSynchronize()); - for (auto&& thread_arena : thread_arenas_) { - thread_arena.second->clean(); - } - for (auto&& stream_arena : stream_arenas_) { - stream_arena.second.clean(); - } - } - /** * @brief Get the arena associated with the current thread or the given stream. * @@ -330,7 +329,7 @@ class arena_memory_resource final : public device_memory_resource { /// Implementation note: for small sizes, map is more efficient than unordered_map. std::map stream_arenas_; /// If true, dump memory information to log on allocation failure. - bool dump_log_on_failure_; + bool dump_log_on_failure_{}; /// The logger for memory dump. std::shared_ptr logger_{}; /// Mutex for read and write locks. diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 515d77c16..3c025c50f 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -75,6 +75,16 @@ class memory_span { std::size_t size_{}; ///< Size in bytes. }; +/// Calculate the total size of a collection of memory spans. +template +inline auto total_memory_size(std::set const& spans) +{ + return std::accumulate( + spans.cbegin(), spans.cend(), std::size_t{}, [](auto const& lhs, auto const& rhs) { + return lhs + rhs.size(); + }); +} + /** * @brief Represents a chunk of memory that can be allocated and deallocated. */ @@ -131,18 +141,9 @@ class block final : public memory_span { }; /// Comparison function for block sizes. -struct block_size_compare { - bool operator()(block const& lhs, block const& rhs) const { return lhs.size() < rhs.size(); } -}; - -/// Calculate the total size of a collection of blocks. -template -inline auto total_block_size(T const& blocks) +inline bool block_size_compare(block const& lhs, block const& rhs) { - return std::accumulate( - blocks.cbegin(), blocks.cend(), std::size_t{}, [](auto const& lhs, auto const& rhs) { - return lhs + rhs.size(); - }); + return lhs.size() < rhs.size(); } /** @@ -311,11 +312,30 @@ class superblock final : public memory_span { } } + /** + * @brief Find the max free block. + * @return the max free block. + */ + block max_free() const + { + return *std::max_element(free_blocks_.cbegin(), free_blocks_.cend(), block_size_compare); + } + private: /// Address-ordered set of free blocks. mutable std::set free_blocks_{}; }; +/// Find the max free size from a set of superblocks. +inline auto max_free(std::set const& superblocks) +{ + std::size_t size{}; + for (auto const& sb : superblocks) { + size = std::max(size, sb.max_free().size()); + } + return size; +}; + /** * @brief The global arena for allocating memory from the upstream memory resource. * @@ -458,24 +478,16 @@ class global_arena final { */ void dump_memory_log(std::shared_ptr const& logger) const { - // lock_guard lock(mtx_); - // - // logger->info(" Maximum size: {}", rmm::detail::bytes{maximum_size_}); - // logger->info(" Current size: {}", rmm::detail::bytes{current_size_}); - // - // logger->info(" # free blocks: {}", free_blocks_.size()); - // if (!free_blocks_.empty()) { - // logger->info(" Total size of free blocks: {}", - // rmm::detail::bytes{total_block_size(free_blocks_)}); - // auto const largest_free = - // *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); - // logger->info(" Size of largest free block: {}", - // rmm::detail::bytes{largest_free.size()}); - // } - // - // logger->info(" # upstream blocks={}", upstream_blocks_.size()); - // logger->info(" Total size of upstream blocks: {}", - // rmm::detail::bytes{total_block_size(upstream_blocks_)}); + lock_guard lock(mtx_); + + logger->info(" Arena size: {}", rmm::detail::bytes{upstream_block_.size()}); + + logger->info(" # superblocks: {}", superblocks_.size()); + if (!superblocks_.empty()) { + logger->info(" Total size of superblocks: {}", + rmm::detail::bytes{total_memory_size(superblocks_)}); + logger->info(" Size of largest free block: {}", rmm::detail::bytes{max_free(superblocks_)}); + } } private: @@ -659,16 +671,14 @@ class arena { */ void dump_memory_log(std::shared_ptr const& logger) const { - // lock_guard lock(mtx_); - // logger->info(" # free blocks: {}", free_blocks_.size()); - // if (!free_blocks_.empty()) { - // logger->info(" Total size of free blocks: {}", - // rmm::detail::bytes{total_block_size(free_blocks_)}); - // auto const largest_free = - // *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); - // logger->info(" Size of largest free block: {}", - // rmm::detail::bytes{largest_free.size()}); - // } + lock_guard lock(mtx_); + logger->info(" # superblocks: {}", superblocks_.size()); + if (!superblocks_.empty()) { + logger->info(" Total size of superblocks: {}", + rmm::detail::bytes{total_memory_size(superblocks_)}); + logger->info(" Size of largest free block: {}", + rmm::detail::bytes{max_free(superblocks_)}); + } } private: From 3f5bf1e02e351e736e59221d19beb0644b5e8f11 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 10 Nov 2021 12:25:20 -0800 Subject: [PATCH 05/35] switch to map for superblocks --- include/rmm/mr/device/detail/arena.hpp | 93 ++++++++++++++------------ 1 file changed, 49 insertions(+), 44 deletions(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 3c025c50f..ce7bc9733 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -30,9 +30,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -75,13 +77,13 @@ class memory_span { std::size_t size_{}; ///< Size in bytes. }; -/// Calculate the total size of a collection of memory spans. +/// Calculate the total size of a map of memory spans. template -inline auto total_memory_size(std::set const& spans) +inline auto total_memory_size(std::map const& spans) { return std::accumulate( spans.cbegin(), spans.cend(), std::size_t{}, [](auto const& lhs, auto const& rhs) { - return lhs + rhs.size(); + return lhs + rhs.second.size(); }); } @@ -260,7 +262,7 @@ class superblock final : public memory_span { * @param size The number of bytes to allocate. * @return block A block of memory of at least `size` bytes, or an empty block if not found. */ - block first_fit(std::size_t size) const + block first_fit(std::size_t size) { auto const iter = std::find_if( free_blocks_.cbegin(), free_blocks_.cend(), [size](auto const& b) { return b.fits(size); }); @@ -284,7 +286,7 @@ class superblock final : public memory_span { * * @param b The block to coalesce. */ - void coalesce(block const& b) const + void coalesce(block const& b) { // Find the right place (in ascending address order) to insert the block. auto const next = free_blocks_.lower_bound(b); @@ -316,22 +318,22 @@ class superblock final : public memory_span { * @brief Find the max free block. * @return the max free block. */ - block max_free() const + [[nodiscard]] block max_free() const { return *std::max_element(free_blocks_.cbegin(), free_blocks_.cend(), block_size_compare); } private: /// Address-ordered set of free blocks. - mutable std::set free_blocks_{}; + std::set free_blocks_{}; }; -/// Find the max free size from a set of superblocks. -inline auto max_free(std::set const& superblocks) +/// Find the max free size from a map of superblocks. +inline auto max_free(std::map const& superblocks) { std::size_t size{}; - for (auto const& sb : superblocks) { - size = std::max(size, sb.max_free().size()); + for (auto const& kv : superblocks) { + size = std::max(size, kv.second.max_free().size()); } return size; }; @@ -408,11 +410,11 @@ class global_arena final { * * @param superblocks The set of superblocks. */ - void release(std::set& superblocks) + void release(std::map& superblocks) { lock_guard lock(mtx_); while (!superblocks.empty()) { - coalesce(std::move(superblocks.extract(superblocks.cbegin()).value())); + coalesce(std::move(superblocks.extract(superblocks.cbegin()).mapped())); } } @@ -465,10 +467,11 @@ class global_arena final { lock_guard lock(mtx_); block const b{ptr, bytes}; - auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [&](auto const& sb) { return sb.contains(b); }); - if (iter == superblocks_.cend()) { RMM_FAIL("allocation not found"); } - iter->coalesce(b); + auto iter = std::find_if(superblocks_.begin(), superblocks_.end(), [&](auto const& kv) { + return kv.second.contains(b); + }); + if (iter == superblocks_.end()) { RMM_FAIL("allocation not found"); } + iter->second.coalesce(b); } /** @@ -512,7 +515,7 @@ class global_arena final { { RMM_LOGGING_ASSERT(size >= superblock::minimum_size); upstream_block_ = {upstream_mr_->allocate(size), size}; - superblocks_.emplace(upstream_block_.pointer(), size); + superblocks_.try_emplace(upstream_block_.pointer(), upstream_block_.pointer(), size); } /** @@ -521,7 +524,7 @@ class global_arena final { * @param size The size in bytes of the allocation. * @return bool True if the allocation should be handled by the global arena. */ - bool handles(std::size_t size) { return size > superblock::minimum_size / 2; } + bool handles(std::size_t size) const { return size > superblock::minimum_size / 2; } /** * @brief Get the first superblock that can fit a block of at least `size` bytes. @@ -538,16 +541,17 @@ class global_arena final { */ superblock first_fit(std::size_t size) { - auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [size](auto const& sb) { return sb.fits(size); }); + auto const iter = std::find_if(superblocks_.cbegin(), + superblocks_.cend(), + [size](auto const& kv) { return kv.second.fits(size); }); if (iter == superblocks_.cend()) { return {}; } - auto sb = std::move(superblocks_.extract(iter).value()); + auto sb = std::move(superblocks_.extract(iter).mapped()); auto const sz = std::max(size, superblock::minimum_size); if (sb.empty() && sb.size() - sz >= superblock::minimum_size) { // Split the superblock and put the remainder back. auto [head, tail] = sb.split(sz); - superblocks_.insert(std::move(tail)); + superblocks_.try_emplace(tail.pointer(), std::move(tail)); return std::move(head); } return sb; @@ -561,28 +565,28 @@ class global_arena final { void coalesce(superblock&& sb) { // Find the right place (in ascending address order) to insert the block. - auto const next = superblocks_.lower_bound(sb); + auto const next = superblocks_.lower_bound(sb.pointer()); auto const previous = next == superblocks_.cbegin() ? next : std::prev(next); // Coalesce with neighboring blocks. - bool const merge_prev = previous->is_contiguous_before(sb); - bool const merge_next = next != superblocks_.cend() && sb.is_contiguous_before(*next); + bool const merge_prev = previous->second.is_contiguous_before(sb); + bool const merge_next = next != superblocks_.cend() && sb.is_contiguous_before(next->second); if (merge_prev && merge_next) { - auto prev_sb = std::move(superblocks_.extract(previous).value()); - auto next_sb = std::move(superblocks_.extract(next).value()); + auto prev_sb = std::move(superblocks_.extract(previous).mapped()); + auto next_sb = std::move(superblocks_.extract(next).mapped()); auto merged = prev_sb.merge(sb).merge(next_sb); - superblocks_.emplace(std::move(merged)); + superblocks_.try_emplace(merged.pointer(), std::move(merged)); } else if (merge_prev) { - auto prev_sb = std::move(superblocks_.extract(previous).value()); + auto prev_sb = std::move(superblocks_.extract(previous).mapped()); auto merged = prev_sb.merge(sb); - superblocks_.emplace(std::move(merged)); + superblocks_.try_emplace(merged.pointer(), std::move(merged)); } else if (merge_next) { - auto next_sb = std::move(superblocks_.extract(next).value()); + auto next_sb = std::move(superblocks_.extract(next).mapped()); auto merged = sb.merge(next_sb); - superblocks_.emplace(std::move(merged)); + superblocks_.try_emplace(merged.pointer(), std::move(merged)); } else { - superblocks_.emplace(std::move(sb)); + superblocks_.try_emplace(sb.pointer(), std::move(sb)); } } @@ -590,8 +594,8 @@ class global_arena final { Upstream* upstream_mr_; /// Block allocated from upstream so that it can be quickly freed. block upstream_block_; - /// Address-ordered set of superblocks. - std::set superblocks_; + /// Address-ordered map of superblocks. + std::map superblocks_; /// Mutex for exclusive lock. mutable std::mutex mtx_; }; @@ -715,8 +719,8 @@ class arena { */ block first_fit(std::size_t size) { - for (auto const& s : superblocks_) { - auto const b = s.first_fit(size); + for (auto&& kv : superblocks_) { + auto const b = kv.second.first_fit(size); if (b.is_valid()) { return b; } } return {}; @@ -730,13 +734,14 @@ class arena { */ bool deallocate_from_superblock(block const& b) { - auto const iter = std::find_if( - superblocks_.begin(), superblocks_.end(), [&](auto const& sb) { return sb.contains(b); }); + auto iter = std::find_if(superblocks_.begin(), superblocks_.end(), [&](auto const& kv) { + return kv.second.contains(b); + }); if (iter == superblocks_.end()) { return false; } - auto const& sb = *iter; + auto& sb = iter->second; sb.coalesce(b); - if (sb.empty()) { global_arena_.release(std::move(superblocks_.extract(iter).value())); } + if (sb.empty()) { global_arena_.release(std::move(superblocks_.extract(iter).mapped())); } return true; } @@ -751,7 +756,7 @@ class arena { auto sb = global_arena_.acquire(size); if (sb.is_valid()) { auto const b = sb.first_fit(size); - superblocks_.emplace(std::move(sb)); + superblocks_.try_emplace(sb.pointer(), std::move(sb)); return b; } return {}; @@ -760,7 +765,7 @@ class arena { /// The global arena to allocate superblocks from. global_arena& global_arena_; /// Acquired superblocks. - std::set superblocks_; + std::map superblocks_; /// Mutex for exclusive lock. mutable std::mutex mtx_; }; From d33b9a0af8377bcd62c3ecaaee7ba38aefe4b70d Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 11 Nov 2021 09:30:53 -0800 Subject: [PATCH 06/35] add some tests --- include/rmm/mr/device/detail/arena.hpp | 4 +- tests/mr/device/arena_mr_tests.cpp | 132 +++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 2 deletions(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index ce7bc9733..877073d83 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -70,7 +70,7 @@ class memory_span { [[nodiscard]] bool is_valid() const { return pointer_ != nullptr; } /// Used by std::set to compare spans. - bool operator<(memory_span const& s) const { return pointer_ < s.pointer_; } + bool operator<(memory_span const& ms) const { return pointer_ < ms.pointer_; } private: char* pointer_{}; ///< Raw memory pointer. @@ -247,7 +247,7 @@ class superblock final : public memory_span { * * `this->is_contiguous_before(s)` must be true. * - * @param s superblock to merge. + * @param sb superblock to merge. * @return block The merged block. */ [[nodiscard]] superblock merge(superblock const& sb) const diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 17b001671..c7ddf2f6f 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -25,8 +25,140 @@ namespace rmm::test { namespace { + +using memory_span = rmm::mr::detail::arena::memory_span; +using block = rmm::mr::detail::arena::block; +using superblock = rmm::mr::detail::arena::superblock; using arena_mr = rmm::mr::arena_memory_resource; +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) +auto const fake_address = reinterpret_cast(1L << 10L); +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) +auto const fake_address2 = reinterpret_cast(1L << 11L); +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) +auto const fake_address3 = reinterpret_cast(1L << 22L); +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) +auto const fake_address4 = reinterpret_cast(1L << 23L); + +TEST(ArenaTest, MemorySpan) +{ + memory_span const ms{}; + EXPECT_FALSE(ms.is_valid()); + memory_span const ms2{fake_address, 256}; + EXPECT_TRUE(ms2.is_valid()); +} + +TEST(ArenaTest, BlockFits) +{ + block const b{fake_address, 1024}; + EXPECT_TRUE(b.fits(1024)); + EXPECT_FALSE(b.fits(1025)); +} + +TEST(ArenaTest, BlockIsContiguousBefore) +{ + block const b{fake_address, 1024}; + block const b2{fake_address2, 256}; + EXPECT_TRUE(b.is_contiguous_before(b2)); + block const b3{fake_address, 512}; + block const b4{fake_address2, 1024}; + EXPECT_FALSE(b3.is_contiguous_before(b4)); +} + +TEST(ArenaTest, BlockSplit) +{ + block const b{fake_address, 2048}; + auto const [head, tail] = b.split(1024); + EXPECT_EQ(head.pointer(), fake_address); + EXPECT_EQ(head.size(), 1024); + EXPECT_EQ(tail.pointer(), fake_address2); + EXPECT_EQ(tail.size(), 1024); +} + +TEST(ArenaTest, BlockMerge) +{ + block const b{fake_address, 1024}; + block const b2{fake_address2, 1024}; + auto const merged = b.merge(b2); + EXPECT_EQ(merged.pointer(), fake_address); + EXPECT_EQ(merged.size(), 2048); +} + +TEST(ArenaTest, SuperblockEmpty) +{ + superblock sb{fake_address3, 4194304}; + EXPECT_TRUE(sb.empty()); + sb.first_fit(256); + EXPECT_FALSE(sb.empty()); +} + +TEST(ArenaTest, SuperblockContains) +{ + superblock const sb{fake_address3, 4194304}; + block const b{fake_address, 2048}; + EXPECT_FALSE(sb.contains(b)); + block const b2{fake_address3, 1024}; + EXPECT_TRUE(sb.contains(b2)); + block const b3{fake_address3, 4194305}; + EXPECT_FALSE(sb.contains(b3)); + block const b4{fake_address3, 4194304}; + EXPECT_TRUE(sb.contains(b4)); + block const b5{fake_address4, 256}; + EXPECT_FALSE(sb.contains(b5)); +} + +TEST(ArenaTest, SuperblockFits) +{ + superblock sb{fake_address3, 4194304}; + EXPECT_TRUE(sb.fits(4194304)); + EXPECT_FALSE(sb.fits(4194305)); + + auto const b = sb.first_fit(1048576); + sb.first_fit(1048576); + sb.coalesce(b); + EXPECT_TRUE(sb.fits(2097152)); + EXPECT_FALSE(sb.fits(2097153)); +} + +TEST(ArenaTest, SuperblockIsContiguousBefore) +{ + superblock sb{fake_address3, 4194304}; + superblock sb2{fake_address4, 4194304}; + EXPECT_TRUE(sb.is_contiguous_before(sb2)); + + auto const b = sb.first_fit(256); + EXPECT_FALSE(sb.is_contiguous_before(sb2)); + sb.coalesce(b); + EXPECT_TRUE(sb.is_contiguous_before(sb2)); + + auto const b2 = sb2.first_fit(1024); + EXPECT_FALSE(sb.is_contiguous_before(sb2)); + sb2.coalesce(b2); + EXPECT_TRUE(sb.is_contiguous_before(sb2)); +} + +TEST(ArenaTest, SuperblockSplit) +{ + superblock sb{fake_address3, 8388608}; + auto const [head, tail] = sb.split(4194304); + EXPECT_EQ(head.pointer(), fake_address3); + EXPECT_EQ(head.size(), 4194304); + EXPECT_TRUE(head.empty()); + EXPECT_EQ(tail.pointer(), fake_address4); + EXPECT_EQ(tail.size(), 4194304); + EXPECT_TRUE(tail.empty()); +} + +TEST(ArenaTest, SuperblockMerge) +{ + superblock sb{fake_address3, 4194304}; + superblock sb2{fake_address4, 4194304}; + auto const merged = sb.merge(sb2); + EXPECT_EQ(merged.pointer(), fake_address3); + EXPECT_EQ(merged.size(), 8388608); + EXPECT_TRUE(merged.empty()); +} + TEST(ArenaTest, NullUpstream) { EXPECT_THROW([]() { arena_mr mr{nullptr}; }(), rmm::logic_error); From b4a1d6a9aa597899c6a8403bf6576fbeee94f26c Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 11 Nov 2021 14:58:40 -0800 Subject: [PATCH 07/35] add more tests --- tests/mr/device/arena_mr_tests.cpp | 98 ++++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 11 deletions(-) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index c7ddf2f6f..2a8d5d5fc 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -27,19 +27,20 @@ namespace rmm::test { namespace { using memory_span = rmm::mr::detail::arena::memory_span; -using block = rmm::mr::detail::arena::block; -using superblock = rmm::mr::detail::arena::superblock; -using arena_mr = rmm::mr::arena_memory_resource; +using block = rmm::mr::detail::arena::block; +using superblock = rmm::mr::detail::arena::superblock; +using arena_mr = rmm::mr::arena_memory_resource; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address = reinterpret_cast(1L << 10L); +auto const fake_address = reinterpret_cast(1024L); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address2 = reinterpret_cast(1L << 11L); +auto const fake_address2 = reinterpret_cast(2048L); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address3 = reinterpret_cast(1L << 22L); +auto const fake_address3 = reinterpret_cast(4194304L); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address4 = reinterpret_cast(1L << 23L); +auto const fake_address4 = reinterpret_cast(8388608L); +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, MemorySpan) { memory_span const ms{}; @@ -48,6 +49,7 @@ TEST(ArenaTest, MemorySpan) EXPECT_TRUE(ms2.is_valid()); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, BlockFits) { block const b{fake_address, 1024}; @@ -55,6 +57,7 @@ TEST(ArenaTest, BlockFits) EXPECT_FALSE(b.fits(1025)); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, BlockIsContiguousBefore) { block const b{fake_address, 1024}; @@ -65,6 +68,7 @@ TEST(ArenaTest, BlockIsContiguousBefore) EXPECT_FALSE(b3.is_contiguous_before(b4)); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, BlockSplit) { block const b{fake_address, 2048}; @@ -75,6 +79,7 @@ TEST(ArenaTest, BlockSplit) EXPECT_EQ(tail.size(), 1024); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, BlockMerge) { block const b{fake_address, 1024}; @@ -84,6 +89,7 @@ TEST(ArenaTest, BlockMerge) EXPECT_EQ(merged.size(), 2048); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, SuperblockEmpty) { superblock sb{fake_address3, 4194304}; @@ -92,6 +98,7 @@ TEST(ArenaTest, SuperblockEmpty) EXPECT_FALSE(sb.empty()); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, SuperblockContains) { superblock const sb{fake_address3, 4194304}; @@ -107,6 +114,7 @@ TEST(ArenaTest, SuperblockContains) EXPECT_FALSE(sb.contains(b5)); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, SuperblockFits) { superblock sb{fake_address3, 4194304}; @@ -120,6 +128,7 @@ TEST(ArenaTest, SuperblockFits) EXPECT_FALSE(sb.fits(2097153)); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, SuperblockIsContiguousBefore) { superblock sb{fake_address3, 4194304}; @@ -137,6 +146,7 @@ TEST(ArenaTest, SuperblockIsContiguousBefore) EXPECT_TRUE(sb.is_contiguous_before(sb2)); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, SuperblockSplit) { superblock sb{fake_address3, 8388608}; @@ -149,6 +159,7 @@ TEST(ArenaTest, SuperblockSplit) EXPECT_TRUE(tail.empty()); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, SuperblockMerge) { superblock sb{fake_address3, 4194304}; @@ -159,13 +170,76 @@ TEST(ArenaTest, SuperblockMerge) EXPECT_TRUE(merged.empty()); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +TEST(ArenaTest, SuperblockFirstFit) +{ + superblock sb{fake_address3, 4194304}; + auto const b = sb.first_fit(1024); + EXPECT_EQ(b.pointer(), fake_address3); + EXPECT_EQ(b.size(), 1024); + auto const b2 = sb.first_fit(2048); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + EXPECT_EQ(b2.pointer(), static_cast(fake_address3) + 1024); + EXPECT_EQ(b2.size(), 2048); + sb.coalesce(b); + auto const b3 = sb.first_fit(512); + EXPECT_EQ(b3.pointer(), fake_address3); + EXPECT_EQ(b3.size(), 512); +} + +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +TEST(ArenaTest, SuperblockCoalesceMergeNext) +{ + superblock sb{fake_address3, 4194304}; + auto const b = sb.first_fit(2097152); + sb.coalesce(b); + EXPECT_TRUE(sb.first_fit(4194304).is_valid()); +} + +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +TEST(ArenaTest, SuperblockCoalesceMergePrevious) +{ + superblock sb{fake_address3, 4194304}; + auto const b = sb.first_fit(1024); + auto const b2 = sb.first_fit(1024); + sb.first_fit(1024); + sb.coalesce(b); + sb.coalesce(b2); + auto const b3 = sb.first_fit(2048); + EXPECT_EQ(b3.pointer(), fake_address3); +} + +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +TEST(ArenaTest, SuperblockCoalesceMergePreviousAndNext) +{ + superblock sb{fake_address3, 4194304}; + auto const b = sb.first_fit(1024); + auto const b2 = sb.first_fit(1024); + sb.coalesce(b); + sb.coalesce(b2); + EXPECT_TRUE(sb.first_fit(4194304).is_valid()); +} + +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +TEST(ArenaTest, SuperblockMaxFree) +{ + superblock sb{fake_address3, 4194304}; + sb.first_fit(2097152); + auto const b = sb.max_free(); + EXPECT_EQ(b.size(), 2097152); +} + +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, NullUpstream) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) EXPECT_THROW([]() { arena_mr mr{nullptr}; }(), rmm::logic_error); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, AllocateNinetyPercent) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) EXPECT_NO_THROW([]() { auto const free = rmm::detail::available_device_memory().first; auto const ninety_percent = @@ -174,17 +248,19 @@ TEST(ArenaTest, AllocateNinetyPercent) }()); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) TEST(ArenaTest, SmallMediumLarge) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) EXPECT_NO_THROW([]() { arena_mr mr(rmm::mr::get_current_device_resource()); - auto* small = mr.allocate(256); - auto* medium = mr.allocate(1U << 26U); + auto* small = mr.allocate(256); + auto* medium = mr.allocate(1U << 26U); auto const free = rmm::detail::available_device_memory().first; - auto* large = mr.allocate(free / 2); + auto* large = mr.allocate(free / 3); mr.deallocate(small, 256); mr.deallocate(medium, 1U << 26U); - mr.deallocate(large, free / 4); + mr.deallocate(large, free / 3); }()); } From d86d6b19675aac43604ed0d2d52c136f1ec9e09e Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 11 Nov 2021 15:13:35 -0800 Subject: [PATCH 08/35] fix clang tidy warnings in test --- tests/mr/device/arena_mr_tests.cpp | 63 ++++++++++-------------------- 1 file changed, 21 insertions(+), 42 deletions(-) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 2a8d5d5fc..96baee083 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -40,8 +40,7 @@ auto const fake_address3 = reinterpret_cast(4194304L); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto const fake_address4 = reinterpret_cast(8388608L); -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, MemorySpan) +TEST(ArenaTest, MemorySpan) // NOLINT { memory_span const ms{}; EXPECT_FALSE(ms.is_valid()); @@ -49,16 +48,14 @@ TEST(ArenaTest, MemorySpan) EXPECT_TRUE(ms2.is_valid()); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, BlockFits) +TEST(ArenaTest, BlockFits) // NOLINT { block const b{fake_address, 1024}; EXPECT_TRUE(b.fits(1024)); EXPECT_FALSE(b.fits(1025)); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, BlockIsContiguousBefore) +TEST(ArenaTest, BlockIsContiguousBefore) // NOLINT { block const b{fake_address, 1024}; block const b2{fake_address2, 256}; @@ -68,8 +65,7 @@ TEST(ArenaTest, BlockIsContiguousBefore) EXPECT_FALSE(b3.is_contiguous_before(b4)); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, BlockSplit) +TEST(ArenaTest, BlockSplit) // NOLINT { block const b{fake_address, 2048}; auto const [head, tail] = b.split(1024); @@ -79,8 +75,7 @@ TEST(ArenaTest, BlockSplit) EXPECT_EQ(tail.size(), 1024); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, BlockMerge) +TEST(ArenaTest, BlockMerge) // NOLINT { block const b{fake_address, 1024}; block const b2{fake_address2, 1024}; @@ -89,8 +84,7 @@ TEST(ArenaTest, BlockMerge) EXPECT_EQ(merged.size(), 2048); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockEmpty) +TEST(ArenaTest, SuperblockEmpty) // NOLINT { superblock sb{fake_address3, 4194304}; EXPECT_TRUE(sb.empty()); @@ -98,8 +92,7 @@ TEST(ArenaTest, SuperblockEmpty) EXPECT_FALSE(sb.empty()); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockContains) +TEST(ArenaTest, SuperblockContains) // NOLINT { superblock const sb{fake_address3, 4194304}; block const b{fake_address, 2048}; @@ -114,8 +107,7 @@ TEST(ArenaTest, SuperblockContains) EXPECT_FALSE(sb.contains(b5)); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockFits) +TEST(ArenaTest, SuperblockFits) // NOLINT { superblock sb{fake_address3, 4194304}; EXPECT_TRUE(sb.fits(4194304)); @@ -128,8 +120,7 @@ TEST(ArenaTest, SuperblockFits) EXPECT_FALSE(sb.fits(2097153)); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockIsContiguousBefore) +TEST(ArenaTest, SuperblockIsContiguousBefore) // NOLINT { superblock sb{fake_address3, 4194304}; superblock sb2{fake_address4, 4194304}; @@ -146,8 +137,7 @@ TEST(ArenaTest, SuperblockIsContiguousBefore) EXPECT_TRUE(sb.is_contiguous_before(sb2)); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockSplit) +TEST(ArenaTest, SuperblockSplit) // NOLINT { superblock sb{fake_address3, 8388608}; auto const [head, tail] = sb.split(4194304); @@ -159,8 +149,7 @@ TEST(ArenaTest, SuperblockSplit) EXPECT_TRUE(tail.empty()); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockMerge) +TEST(ArenaTest, SuperblockMerge) // NOLINT { superblock sb{fake_address3, 4194304}; superblock sb2{fake_address4, 4194304}; @@ -170,8 +159,7 @@ TEST(ArenaTest, SuperblockMerge) EXPECT_TRUE(merged.empty()); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockFirstFit) +TEST(ArenaTest, SuperblockFirstFit) // NOLINT { superblock sb{fake_address3, 4194304}; auto const b = sb.first_fit(1024); @@ -187,8 +175,7 @@ TEST(ArenaTest, SuperblockFirstFit) EXPECT_EQ(b3.size(), 512); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockCoalesceMergeNext) +TEST(ArenaTest, SuperblockCoalesceMergeNext) // NOLINT { superblock sb{fake_address3, 4194304}; auto const b = sb.first_fit(2097152); @@ -196,8 +183,7 @@ TEST(ArenaTest, SuperblockCoalesceMergeNext) EXPECT_TRUE(sb.first_fit(4194304).is_valid()); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockCoalesceMergePrevious) +TEST(ArenaTest, SuperblockCoalesceMergePrevious) // NOLINT { superblock sb{fake_address3, 4194304}; auto const b = sb.first_fit(1024); @@ -209,8 +195,7 @@ TEST(ArenaTest, SuperblockCoalesceMergePrevious) EXPECT_EQ(b3.pointer(), fake_address3); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockCoalesceMergePreviousAndNext) +TEST(ArenaTest, SuperblockCoalesceMergePreviousAndNext) // NOLINT { superblock sb{fake_address3, 4194304}; auto const b = sb.first_fit(1024); @@ -220,8 +205,7 @@ TEST(ArenaTest, SuperblockCoalesceMergePreviousAndNext) EXPECT_TRUE(sb.first_fit(4194304).is_valid()); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SuperblockMaxFree) +TEST(ArenaTest, SuperblockMaxFree) // NOLINT { superblock sb{fake_address3, 4194304}; sb.first_fit(2097152); @@ -229,18 +213,15 @@ TEST(ArenaTest, SuperblockMaxFree) EXPECT_EQ(b.size(), 2097152); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, NullUpstream) +TEST(ArenaTest, NullUpstream) // NOLINT { // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) EXPECT_THROW([]() { arena_mr mr{nullptr}; }(), rmm::logic_error); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, AllocateNinetyPercent) +TEST(ArenaTest, AllocateNinetyPercent) // NOLINT { - // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) - EXPECT_NO_THROW([]() { + EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) auto const free = rmm::detail::available_device_memory().first; auto const ninety_percent = rmm::detail::align_up_cuda(static_cast(static_cast(free) * 0.9)); @@ -248,11 +229,9 @@ TEST(ArenaTest, AllocateNinetyPercent) }()); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) -TEST(ArenaTest, SmallMediumLarge) +TEST(ArenaTest, SmallMediumLarge) // NOLINT { - // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) - EXPECT_NO_THROW([]() { + EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) arena_mr mr(rmm::mr::get_current_device_resource()); auto* small = mr.allocate(256); auto* medium = mr.allocate(1U << 26U); From f87ba63c7ca7728fe7ef9f101932ca14e8cd5463 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 11 Nov 2021 19:57:45 -0800 Subject: [PATCH 09/35] add some logging asserts --- include/rmm/mr/device/detail/arena.hpp | 49 +++++++++++++++++-- tests/mr/device/arena_mr_tests.cpp | 65 ++++++++++++++++++++++++-- 2 files changed, 105 insertions(+), 9 deletions(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 877073d83..e16af1c5a 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -58,6 +58,8 @@ class memory_span { */ memory_span(void* pointer, std::size_t size) : pointer_{static_cast(pointer)}, size_{size} { + RMM_LOGGING_ASSERT(pointer != nullptr); + RMM_LOGGING_ASSERT(size > 0); } /// Returns the underlying pointer. @@ -67,10 +69,14 @@ class memory_span { [[nodiscard]] std::size_t size() const { return size_; } /// Returns true if this span is valid (non-null), false otherwise. - [[nodiscard]] bool is_valid() const { return pointer_ != nullptr; } + [[nodiscard]] bool is_valid() const { return pointer_ != nullptr && size_ > 0; } /// Used by std::set to compare spans. - bool operator<(memory_span const& ms) const { return pointer_ < ms.pointer_; } + bool operator<(memory_span const& ms) const + { + RMM_LOGGING_ASSERT(ms.is_valid()); + return pointer_ < ms.pointer_; + } private: char* pointer_{}; ///< Raw memory pointer. @@ -100,7 +106,12 @@ class block final : public memory_span { * @param sz The size in bytes to check for fit. * @return true if this block is at least `sz` bytes. */ - [[nodiscard]] bool fits(std::size_t sz) const { return size() >= sz; } + [[nodiscard]] bool fits(std::size_t sz) const + { + RMM_LOGGING_ASSERT(is_valid()); + RMM_LOGGING_ASSERT(sz > 0); + return size() >= sz; + } /** * @brief Verifies whether this block can be merged to the beginning of block b. @@ -110,6 +121,8 @@ class block final : public memory_span { */ [[nodiscard]] bool is_contiguous_before(block const& b) const { + RMM_LOGGING_ASSERT(is_valid()); + RMM_LOGGING_ASSERT(b.is_valid()); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) return pointer() + size() == b.pointer(); } @@ -122,6 +135,7 @@ class block final : public memory_span { */ [[nodiscard]] std::pair split(std::size_t sz) const { + RMM_LOGGING_ASSERT(is_valid()); RMM_LOGGING_ASSERT(size() >= sz); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) return {{pointer(), sz}, {pointer() + sz, size() - sz}}; @@ -137,6 +151,8 @@ class block final : public memory_span { */ [[nodiscard]] block merge(block const& b) const { + RMM_LOGGING_ASSERT(is_valid()); + RMM_LOGGING_ASSERT(b.is_valid()); RMM_LOGGING_ASSERT(is_contiguous_before(b)); return {pointer(), size() + b.size()}; } @@ -145,6 +161,8 @@ class block final : public memory_span { /// Comparison function for block sizes. inline bool block_size_compare(block const& lhs, block const& rhs) { + RMM_LOGGING_ASSERT(lhs.is_valid()); + RMM_LOGGING_ASSERT(rhs.is_valid()); return lhs.size() < rhs.size(); } @@ -170,6 +188,7 @@ class superblock final : public memory_span { */ superblock(void* pointer, std::size_t size) : memory_span{pointer, size} { + RMM_LOGGING_ASSERT(size >= minimum_size); free_blocks_.emplace(pointer, size); } @@ -189,6 +208,7 @@ class superblock final : public memory_span { */ [[nodiscard]] bool empty() const { + RMM_LOGGING_ASSERT(is_valid()); return free_blocks_.size() == 1 && free_blocks_.cbegin()->size() == size(); } @@ -200,6 +220,8 @@ class superblock final : public memory_span { */ [[nodiscard]] bool contains(block const& b) const { + RMM_LOGGING_ASSERT(is_valid()); + RMM_LOGGING_ASSERT(b.is_valid()); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) return pointer() <= b.pointer() && pointer() + size() >= b.pointer() + b.size(); } @@ -212,6 +234,7 @@ class superblock final : public memory_span { */ [[nodiscard]] bool fits(std::size_t sz) const { + RMM_LOGGING_ASSERT(is_valid()); return std::any_of( free_blocks_.cbegin(), free_blocks_.cend(), [sz](auto const& b) { return b.fits(sz); }); } @@ -225,6 +248,8 @@ class superblock final : public memory_span { */ [[nodiscard]] bool is_contiguous_before(superblock const& sb) const { + RMM_LOGGING_ASSERT(is_valid()); + RMM_LOGGING_ASSERT(sb.is_valid()); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) return empty() && sb.empty() && pointer() + size() == sb.pointer(); } @@ -237,6 +262,7 @@ class superblock final : public memory_span { */ [[nodiscard]] std::pair split(std::size_t sz) const { + RMM_LOGGING_ASSERT(is_valid()); RMM_LOGGING_ASSERT(empty() && sz >= minimum_size && size() - sz >= minimum_size); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) return {superblock{pointer(), sz}, superblock{pointer() + sz, size() - sz}}; @@ -264,6 +290,9 @@ class superblock final : public memory_span { */ block first_fit(std::size_t size) { + RMM_LOGGING_ASSERT(is_valid()); + RMM_LOGGING_ASSERT(size > 0); + auto const iter = std::find_if( free_blocks_.cbegin(), free_blocks_.cend(), [size](auto const& b) { return b.fits(size); }); if (iter == free_blocks_.cend()) { return {}; } @@ -288,6 +317,10 @@ class superblock final : public memory_span { */ void coalesce(block const& b) { + RMM_LOGGING_ASSERT(is_valid()); + RMM_LOGGING_ASSERT(b.is_valid()); + RMM_LOGGING_ASSERT(contains(b)); + // Find the right place (in ascending address order) to insert the block. auto const next = free_blocks_.lower_bound(b); auto const previous = next == free_blocks_.cbegin() ? next : std::prev(next); @@ -390,6 +423,8 @@ class global_arena final { */ superblock acquire(std::size_t size) { + // Superblocks should only be acquired if the size is not directly handled by the global arena. + RMM_LOGGING_ASSERT(!handles(size)); lock_guard lock(mtx_); return first_fit(size); } @@ -401,6 +436,7 @@ class global_arena final { */ void release(superblock&& sb) { + RMM_LOGGING_ASSERT(sb.is_valid()); lock_guard lock(mtx_); coalesce(std::move(sb)); } @@ -414,7 +450,9 @@ class global_arena final { { lock_guard lock(mtx_); while (!superblocks.empty()) { - coalesce(std::move(superblocks.extract(superblocks.cbegin()).mapped())); + auto&& sb = std::move(superblocks.extract(superblocks.cbegin()).mapped()); + RMM_LOGGING_ASSERT(sb.is_valid()); + coalesce(std::move(sb)); } } @@ -484,7 +522,6 @@ class global_arena final { lock_guard lock(mtx_); logger->info(" Arena size: {}", rmm::detail::bytes{upstream_block_.size()}); - logger->info(" # superblocks: {}", superblocks_.size()); if (!superblocks_.empty()) { logger->info(" Total size of superblocks: {}", @@ -564,6 +601,8 @@ class global_arena final { */ void coalesce(superblock&& sb) { + RMM_LOGGING_ASSERT(sb.is_valid()); + // Find the right place (in ascending address order) to insert the block. auto const next = superblocks_.lower_bound(sb.pointer()); auto const previous = next == superblocks_.cbegin() ? next : std::prev(next); diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 96baee083..7e3622bb7 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -21,15 +21,25 @@ #include #include +#include +#include #include namespace rmm::test { namespace { -using memory_span = rmm::mr::detail::arena::memory_span; -using block = rmm::mr::detail::arena::block; -using superblock = rmm::mr::detail::arena::superblock; -using arena_mr = rmm::mr::arena_memory_resource; +class mock_memory_resource { + public: + MOCK_METHOD(void*, allocate, (std::size_t)); + MOCK_METHOD(void, deallocate, (void*, std::size_t)); +}; + +using memory_span = rmm::mr::detail::arena::memory_span; +using block = rmm::mr::detail::arena::block; +using superblock = rmm::mr::detail::arena::superblock; +using global_arena = rmm::mr::detail::arena::global_arena; +using arena_mr = rmm::mr::arena_memory_resource; +using ::testing::Return; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto const fake_address = reinterpret_cast(1024L); @@ -40,6 +50,10 @@ auto const fake_address3 = reinterpret_cast(4194304L); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto const fake_address4 = reinterpret_cast(8388608L); +/** + * Test memory_span. + */ + TEST(ArenaTest, MemorySpan) // NOLINT { memory_span const ms{}; @@ -48,6 +62,10 @@ TEST(ArenaTest, MemorySpan) // NOLINT EXPECT_TRUE(ms2.is_valid()); } +/** + * Test block. + */ + TEST(ArenaTest, BlockFits) // NOLINT { block const b{fake_address, 1024}; @@ -84,6 +102,10 @@ TEST(ArenaTest, BlockMerge) // NOLINT EXPECT_EQ(merged.size(), 2048); } +/** + * Test superblock. + */ + TEST(ArenaTest, SuperblockEmpty) // NOLINT { superblock sb{fake_address3, 4194304}; @@ -213,6 +235,41 @@ TEST(ArenaTest, SuperblockMaxFree) // NOLINT EXPECT_EQ(b.size(), 2097152); } +/** + * Test global_arena. + */ + +TEST(ArenaTest, GlobalArenaNullUpstream) // NOLINT +{ + auto construct_nullptr = []() { global_arena ga{nullptr, std::nullopt}; }; + EXPECT_THROW(construct_nullptr(), rmm::logic_error); // NOLINT(cppcoreguidelines-avoid-goto) +} + +TEST(ArenaTest, GlobalArenaAcquire) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + + global_arena ga{&mock, 8388608}; + + auto const sb = ga.acquire(256); + EXPECT_EQ(sb.pointer(), fake_address3); + EXPECT_EQ(sb.size(), 4194304); + EXPECT_TRUE(sb.empty()); + + auto const sb2 = ga.acquire(1024); + EXPECT_EQ(sb2.pointer(), fake_address4); + EXPECT_EQ(sb2.size(), 4194304); + EXPECT_TRUE(sb2.empty()); + + EXPECT_FALSE(ga.acquire(512).is_valid()); +} + +/** + * Test arena_memory_resource. + */ + TEST(ArenaTest, NullUpstream) // NOLINT { // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) From ce633f282a4de331987de1cb310be2d55a2bd5d3 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 12 Nov 2021 08:59:33 -0800 Subject: [PATCH 10/35] more tests for global arena --- include/rmm/mr/device/detail/arena.hpp | 2 +- tests/mr/device/arena_mr_tests.cpp | 49 ++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index e16af1c5a..f16002d83 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -442,7 +442,7 @@ class global_arena final { } /** - * @brief Release a set of superblocks from a dying arena. + * @brief Release a map of superblocks from a dying arena. * * @param superblocks The set of superblocks. */ diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 7e3622bb7..a214fee17 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -266,6 +266,55 @@ TEST(ArenaTest, GlobalArenaAcquire) // NOLINT EXPECT_FALSE(ga.acquire(512).is_valid()); } +TEST(ArenaTest, GlobalArenaReleaseMergeNext) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + + global_arena ga{&mock, 8388608}; + + auto sb = ga.acquire(256); + ga.release(std::move(sb)); + auto* p = ga.allocate(8388608); + EXPECT_EQ(p, fake_address3); +} + +TEST(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(16777216)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 16777216)); + + global_arena ga{&mock, 16777216}; + + auto sb = ga.acquire(256); + auto sb2 = ga.acquire(1024); + ga.acquire(512); + ga.release(std::move(sb)); + ga.release(std::move(sb2)); + auto* p = ga.allocate(8388608); + EXPECT_EQ(p, fake_address3); +} + +TEST(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(16777216)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 16777216)); + + global_arena ga{&mock, 16777216}; + + auto sb = ga.acquire(256); + auto sb2 = ga.acquire(1024); + auto sb3 = ga.acquire(512); + ga.release(std::move(sb)); + ga.release(std::move(sb3)); + ga.release(std::move(sb2)); + auto* p = ga.allocate(16777216); + EXPECT_EQ(p, fake_address3); +} + /** * Test arena_memory_resource. */ From 23f679c3c4247997c4656d83a73e54c33686ee8b Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 12 Nov 2021 09:04:40 -0800 Subject: [PATCH 11/35] add back defrag --- .../rmm/mr/device/arena_memory_resource.hpp | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 6bbbbecb5..b1b65640c 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -159,8 +159,13 @@ class arena_memory_resource final : public device_memory_resource { void* pointer = arena.allocate(bytes); if (pointer == nullptr) { - if (dump_log_on_failure_) { dump_memory_log(bytes); } - RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); + write_lock lock(mtx_); + defragment(); + pointer = arena.allocate(bytes); + if (pointer == nullptr) { + if (dump_log_on_failure_) { dump_memory_log(bytes); } + RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); + } } return pointer; @@ -218,6 +223,20 @@ class arena_memory_resource final : public device_memory_resource { global_arena_.deallocate_from_other_arena(ptr, bytes); } + /** + * @brief Defragment memory by returning all free blocks to the global arena. + */ + void defragment() + { + RMM_CUDA_TRY(cudaDeviceSynchronize()); + for (auto& thread_arena : thread_arenas_) { + thread_arena.second->clean(); + } + for (auto& stream_arena : stream_arenas_) { + stream_arena.second.clean(); + } + } + /** * @brief Get the arena associated with the current thread or the given stream. * From a5a4881a10846359af4e5a17e0cdac96b7d0eee5 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Mon, 15 Nov 2021 18:21:52 -0800 Subject: [PATCH 12/35] more tests --- include/rmm/mr/device/detail/arena.hpp | 66 ++++++++++++-------------- tests/mr/device/arena_mr_tests.cpp | 64 +++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 35 deletions(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index f16002d83..14ce6cbe0 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -108,7 +108,7 @@ class block final : public memory_span { */ [[nodiscard]] bool fits(std::size_t sz) const { - RMM_LOGGING_ASSERT(is_valid()); + if (!is_valid()) { RMM_LOGGING_ASSERT(is_valid()); } RMM_LOGGING_ASSERT(sz > 0); return size() >= sz; } @@ -188,7 +188,7 @@ class superblock final : public memory_span { */ superblock(void* pointer, std::size_t size) : memory_span{pointer, size} { - RMM_LOGGING_ASSERT(size >= minimum_size); + RMM_LOGGING_ASSERT(size >= minimum_size / 2); free_blocks_.emplace(pointer, size); } @@ -413,8 +413,17 @@ class global_arena final { { lock_guard lock(mtx_); upstream_mr_->deallocate(upstream_block_.pointer(), upstream_block_.size()); + superblocks_.clear(); } + /** + * @brief Should allocation of `size` bytes be handled by the global arena directly? + * + * @param size The size in bytes of the allocation. + * @return bool True if the allocation should be handled by the global arena. + */ + bool handles(std::size_t size) const { return size > superblock::minimum_size / 2; } + /** * @brief Acquire a superblock that can fit a block of the given size. * @@ -464,11 +473,9 @@ class global_arena final { */ void* allocate(std::size_t size) { - if (handles(size)) { - lock_guard lock(mtx_); - return first_fit(size).pointer(); - } - return nullptr; + RMM_LOGGING_ASSERT(handles(size)); + lock_guard lock(mtx_); + return first_fit(size).pointer(); } /** @@ -482,14 +489,11 @@ class global_arena final { */ bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) { - if (handles(size)) { - stream.synchronize_no_throw(); - - lock_guard lock(mtx_); - coalesce({ptr, size}); - return true; - } - return false; + RMM_LOGGING_ASSERT(handles(size)); + stream.synchronize_no_throw(); + lock_guard lock(mtx_); + coalesce({ptr, size}); + return true; } /** @@ -510,6 +514,7 @@ class global_arena final { }); if (iter == superblocks_.end()) { RMM_FAIL("allocation not found"); } iter->second.coalesce(b); + if (iter->second.empty()) { coalesce(std::move(superblocks_.extract(iter).mapped())); } } /** @@ -552,17 +557,11 @@ class global_arena final { { RMM_LOGGING_ASSERT(size >= superblock::minimum_size); upstream_block_ = {upstream_mr_->allocate(size), size}; - superblocks_.try_emplace(upstream_block_.pointer(), upstream_block_.pointer(), size); + if (!upstream_block_.is_valid()) { RMM_FAIL("Failed to allocate memory from upstream"); } + superblocks_.insert( + std::make_pair(upstream_block_.pointer(), superblock(upstream_block_.pointer(), size))); } - /** - * @brief Should allocation of `size` bytes be handled by the global arena directly? - * - * @param size The size in bytes of the allocation. - * @return bool True if the allocation should be handled by the global arena. - */ - bool handles(std::size_t size) const { return size > superblock::minimum_size / 2; } - /** * @brief Get the first superblock that can fit a block of at least `size` bytes. * @@ -588,7 +587,7 @@ class global_arena final { if (sb.empty() && sb.size() - sz >= superblock::minimum_size) { // Split the superblock and put the remainder back. auto [head, tail] = sb.split(sz); - superblocks_.try_emplace(tail.pointer(), std::move(tail)); + superblocks_.insert(std::make_pair(tail.pointer(), std::move(tail))); return std::move(head); } return sb; @@ -615,17 +614,17 @@ class global_arena final { auto prev_sb = std::move(superblocks_.extract(previous).mapped()); auto next_sb = std::move(superblocks_.extract(next).mapped()); auto merged = prev_sb.merge(sb).merge(next_sb); - superblocks_.try_emplace(merged.pointer(), std::move(merged)); + superblocks_.insert(std::make_pair(merged.pointer(), std::move(merged))); } else if (merge_prev) { auto prev_sb = std::move(superblocks_.extract(previous).mapped()); auto merged = prev_sb.merge(sb); - superblocks_.try_emplace(merged.pointer(), std::move(merged)); + superblocks_.insert(std::make_pair(merged.pointer(), std::move(merged))); } else if (merge_next) { auto next_sb = std::move(superblocks_.extract(next).mapped()); auto merged = sb.merge(next_sb); - superblocks_.try_emplace(merged.pointer(), std::move(merged)); + superblocks_.insert(std::make_pair(merged.pointer(), std::move(merged))); } else { - superblocks_.try_emplace(sb.pointer(), std::move(sb)); + superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); } } @@ -674,9 +673,7 @@ class arena { */ void* allocate(std::size_t size) { - auto* ptr = global_arena_.allocate(size); - if (ptr != nullptr) { return ptr; } - + if (global_arena_.handles(size)) { return global_arena_.allocate(size); } lock_guard lock(mtx_); return get_block(size).pointer(); } @@ -692,8 +689,7 @@ class arena { */ bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) { - if (global_arena_.deallocate(ptr, size, stream)) { return true; } - + if (global_arena_.handles(size)) { return global_arena_.deallocate(ptr, size, stream); } lock_guard lock(mtx_); return deallocate_from_superblock({ptr, size}); } @@ -795,7 +791,7 @@ class arena { auto sb = global_arena_.acquire(size); if (sb.is_valid()) { auto const b = sb.first_fit(size); - superblocks_.try_emplace(sb.pointer(), std::move(sb)); + superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); return b; } return {}; diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index a214fee17..c71aa2bc0 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -315,6 +315,70 @@ TEST(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT EXPECT_EQ(p, fake_address3); } +TEST(ArenaTest, GlobalArenaReleaseMultiple) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(16777216)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 16777216)); + + global_arena ga{&mock, 16777216}; + + std::map superblocks{}; + auto sb = ga.acquire(256); + superblocks.insert(std::make_pair(sb.pointer(), std::move(sb))); + auto sb2 = ga.acquire(1024); + superblocks.insert(std::make_pair(sb2.pointer(), std::move(sb2))); + auto sb3 = ga.acquire(512); + superblocks.insert(std::make_pair(sb3.pointer(), std::move(sb3))); + ga.release(superblocks); + auto* p = ga.allocate(16777216); + EXPECT_EQ(p, fake_address3); +} + +TEST(ArenaTest, GlobalArenaAllocate) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + + global_arena ga{&mock, 8388608}; + + auto* ptr = ga.allocate(4194304); + EXPECT_EQ(ptr, fake_address3); + auto* ptr2 = ga.allocate(4194304); + EXPECT_EQ(ptr2, fake_address4); +} + +TEST(ArenaTest, GlobalArenaDeallocate) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + + global_arena ga{&mock, 8388608}; + + auto* ptr = ga.allocate(4194304); + EXPECT_EQ(ptr, fake_address3); + EXPECT_TRUE(ga.deallocate(ptr, 4194304, {})); + ptr = ga.allocate(4194304); + EXPECT_EQ(ptr, fake_address3); +} + +TEST(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + + global_arena ga{&mock, 8388608}; + + auto sb = ga.acquire(512); + auto const b = sb.first_fit(512); + ga.release(std::move(sb)); + ga.deallocate_from_other_arena(b.pointer(), b.size()); + EXPECT_EQ(ga.allocate(8388608), fake_address3); +} + /** * Test arena_memory_resource. */ From f77fb7e01875f8153b87639917ed6352b1baf7c9 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 16 Nov 2021 10:25:55 -0800 Subject: [PATCH 13/35] add tests for arena --- .../rmm/mr/device/arena_memory_resource.hpp | 2 +- include/rmm/mr/device/detail/arena.hpp | 6 +- tests/mr/device/arena_mr_tests.cpp | 85 ++++++++++++++++++- 3 files changed, 85 insertions(+), 8 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index b1b65640c..c6b8c20c2 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -201,7 +201,7 @@ class arena_memory_resource final : public device_memory_resource { { stream.synchronize_no_throw(); - read_lock lock(mtx_); + write_lock lock(mtx_); if (use_per_thread_arena(stream)) { auto const id = std::this_thread::get_id(); diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 14ce6cbe0..707c4eb19 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -108,7 +108,7 @@ class block final : public memory_span { */ [[nodiscard]] bool fits(std::size_t sz) const { - if (!is_valid()) { RMM_LOGGING_ASSERT(is_valid()); } + RMM_LOGGING_ASSERT(is_valid()); RMM_LOGGING_ASSERT(sz > 0); return size() >= sz; } @@ -557,9 +557,7 @@ class global_arena final { { RMM_LOGGING_ASSERT(size >= superblock::minimum_size); upstream_block_ = {upstream_mr_->allocate(size), size}; - if (!upstream_block_.is_valid()) { RMM_FAIL("Failed to allocate memory from upstream"); } - superblocks_.insert( - std::make_pair(upstream_block_.pointer(), superblock(upstream_block_.pointer(), size))); + superblocks_.try_emplace(upstream_block_.pointer(), upstream_block_.pointer(), size); } /** diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index c71aa2bc0..5d18ef747 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -38,6 +38,7 @@ using memory_span = rmm::mr::detail::arena::memory_span; using block = rmm::mr::detail::arena::block; using superblock = rmm::mr::detail::arena::superblock; using global_arena = rmm::mr::detail::arena::global_arena; +using arena = rmm::mr::detail::arena::arena; using arena_mr = rmm::mr::arena_memory_resource; using ::testing::Return; @@ -288,7 +289,7 @@ TEST(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT global_arena ga{&mock, 16777216}; - auto sb = ga.acquire(256); + auto sb = ga.acquire(256); auto sb2 = ga.acquire(1024); ga.acquire(512); ga.release(std::move(sb)); @@ -305,7 +306,7 @@ TEST(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT global_arena ga{&mock, 16777216}; - auto sb = ga.acquire(256); + auto sb = ga.acquire(256); auto sb2 = ga.acquire(1024); auto sb3 = ga.acquire(512); ga.release(std::move(sb)); @@ -372,13 +373,91 @@ TEST(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT global_arena ga{&mock, 8388608}; - auto sb = ga.acquire(512); + auto sb = ga.acquire(512); auto const b = sb.first_fit(512); ga.release(std::move(sb)); ga.deallocate_from_other_arena(b.pointer(), b.size()); EXPECT_EQ(ga.allocate(8388608), fake_address3); } +/** + * Test arena. + */ + +TEST(ArenaTest, ArenaAllocate) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + global_arena ga{&mock, 8388608}; + arena a{ga}; + + EXPECT_EQ(a.allocate(4194304), fake_address3); + EXPECT_EQ(a.allocate(256), fake_address4); +} + +TEST(ArenaTest, ArenaDeallocate) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + global_arena ga{&mock, 8388608}; + arena a{ga}; + + auto* ptr = a.allocate(4194304); + a.deallocate(ptr, 4194304, {}); + auto* ptr2 = a.allocate(256); + a.deallocate(ptr2, 256, {}); + EXPECT_EQ(a.allocate(8388608), fake_address3); +} + +TEST(ArenaTest, ArenaDeallocateMergePrevious) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + global_arena ga{&mock, 8388608}; + arena a{ga}; + + auto* ptr = a.allocate(256); + auto* ptr2 = a.allocate(256); + a.allocate(256); + a.deallocate(ptr, 256, {}); + a.deallocate(ptr2, 256, {}); + EXPECT_EQ(a.allocate(512), fake_address3); +} + +TEST(ArenaTest, ArenaDeallocateMergeNext) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + global_arena ga{&mock, 8388608}; + arena a{ga}; + + auto* ptr = a.allocate(256); + auto* ptr2 = a.allocate(256); + a.allocate(256); + a.deallocate(ptr2, 256, {}); + a.deallocate(ptr, 256, {}); + EXPECT_EQ(a.allocate(512), fake_address3); +} + +TEST(ArenaTest, ArenaDeallocateMergePreviousAndNext) // NOLINT +{ + mock_memory_resource mock; + EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + global_arena ga{&mock, 8388608}; + arena a{ga}; + + auto* ptr = a.allocate(256); + auto* ptr2 = a.allocate(256); + a.deallocate(ptr, 256, {}); + a.deallocate(ptr2, 256, {}); + EXPECT_EQ(a.allocate(2048), fake_address3); +} + /** * Test arena_memory_resource. */ From dd86082292ac5341163e78c63c9cb040a9124bc8 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 16 Nov 2021 10:39:13 -0800 Subject: [PATCH 14/35] remove alignment changes --- include/rmm/detail/aligned.hpp | 36 ------------------- .../rmm/mr/device/arena_memory_resource.hpp | 4 +-- include/rmm/mr/device/detail/arena.hpp | 3 +- tests/mr/device/arena_mr_tests.cpp | 3 +- 4 files changed, 6 insertions(+), 40 deletions(-) diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 19e69344d..321be53b5 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -62,18 +62,6 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep return (value + (alignment - 1)) & ~(alignment - 1); } -/** - * @brief Align up to nearest multiple of the CUDA allocation alignment - * - * @param[in] v value to align - * - * @return Return the aligned value, as one would expect - */ -constexpr std::size_t align_up_cuda(std::size_t value) noexcept -{ - return align_up(value, CUDA_ALLOCATION_ALIGNMENT); -} - /** * @brief Align down to the nearest multiple of specified power of 2 * @@ -88,18 +76,6 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc return value & ~(alignment - 1); } -/** - * @brief Align down to the nearest multiple of the CUDA allocation alignment - * - * @param[in] v value to align - * - * @return Return the aligned value, as one would expect - */ -constexpr std::size_t align_down_cuda(std::size_t value) noexcept -{ - return align_down(value, CUDA_ALLOCATION_ALIGNMENT); -} - /** * @brief Checks whether a value is aligned to a multiple of a specified power of 2 * @@ -114,18 +90,6 @@ constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept return value == align_down(value, alignment); } -/** - * @brief Checks whether a value is aligned to a multiple of the CUDA allocation alignment - * - * @param[in] v value to check for alignment - * - * @return true if aligned - */ -constexpr bool is_cuda_aligned(std::size_t value) noexcept -{ - return is_aligned(value, CUDA_ALLOCATION_ALIGNMENT); -} - inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index c6b8c20c2..b0fc92139 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -154,7 +154,7 @@ class arena_memory_resource final : public device_memory_resource { { if (bytes <= 0) { return nullptr; } - bytes = rmm::detail::align_up_cuda(bytes); + bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); auto& arena = get_arena(stream); void* pointer = arena.allocate(bytes); @@ -183,7 +183,7 @@ class arena_memory_resource final : public device_memory_resource { { if (ptr == nullptr || bytes <= 0) { return; } - bytes = rmm::detail::align_up_cuda(bytes); + bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); if (!get_arena(stream).deallocate(ptr, bytes, stream)) { deallocate_from_other_arena(ptr, bytes, stream); } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 707c4eb19..7e1f21238 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -395,7 +395,8 @@ class global_arena final { : upstream_mr_{upstream_mr} { RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer."); - auto const size = rmm::detail::align_down_cuda(arena_size.value_or(default_size())); + auto const size = rmm::detail::align_down(arena_size.value_or(default_size()), + rmm::detail::CUDA_ALLOCATION_ALIGNMENT); initialize(size); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 5d18ef747..f39dc5f2e 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -473,7 +473,8 @@ TEST(ArenaTest, AllocateNinetyPercent) // NOLINT EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) auto const free = rmm::detail::available_device_memory().first; auto const ninety_percent = - rmm::detail::align_up_cuda(static_cast(static_cast(free) * 0.9)); + rmm::detail::align_up(static_cast(static_cast(free) * 0.9), + rmm::detail::CUDA_ALLOCATION_ALIGNMENT); arena_mr mr(rmm::mr::get_current_device_resource(), ninety_percent); }()); } From 29ae23b85be3292024c231ef012b51faad5279d5 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 16 Nov 2021 12:46:56 -0800 Subject: [PATCH 15/35] small fixes --- include/rmm/mr/device/detail/arena.hpp | 42 +++++++++++++++++++------- tests/mr/device/arena_mr_tests.cpp | 12 ++++++-- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 7e1f21238..07deb49c7 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -188,7 +188,7 @@ class superblock final : public memory_span { */ superblock(void* pointer, std::size_t size) : memory_span{pointer, size} { - RMM_LOGGING_ASSERT(size >= minimum_size / 2); + RMM_LOGGING_ASSERT(size > minimum_size / 2); free_blocks_.emplace(pointer, size); } @@ -321,6 +321,11 @@ class superblock final : public memory_span { RMM_LOGGING_ASSERT(b.is_valid()); RMM_LOGGING_ASSERT(contains(b)); + if (free_blocks_.empty()) { + free_blocks_.insert(b); + return; + } + // Find the right place (in ascending address order) to insert the block. auto const next = free_blocks_.lower_bound(b); auto const previous = next == free_blocks_.cbegin() ? next : std::prev(next); @@ -348,12 +353,12 @@ class superblock final : public memory_span { } /** - * @brief Find the max free block. - * @return the max free block. + * @brief Find the max free block size. + * @return the max free block size. */ - [[nodiscard]] block max_free() const + [[nodiscard]] std::size_t max_free() const { - return *std::max_element(free_blocks_.cbegin(), free_blocks_.cend(), block_size_compare); + return std::max_element(free_blocks_.cbegin(), free_blocks_.cend(), block_size_compare)->size(); } private: @@ -366,7 +371,7 @@ inline auto max_free(std::map const& superblocks) { std::size_t size{}; for (auto const& kv : superblocks) { - size = std::max(size, kv.second.max_free().size()); + size = std::max(size, kv.second.max_free()); } return size; }; @@ -460,7 +465,7 @@ class global_arena final { { lock_guard lock(mtx_); while (!superblocks.empty()) { - auto&& sb = std::move(superblocks.extract(superblocks.cbegin()).mapped()); + auto sb = std::move(superblocks.extract(superblocks.cbegin()).mapped()); RMM_LOGGING_ASSERT(sb.is_valid()); coalesce(std::move(sb)); } @@ -514,8 +519,14 @@ class global_arena final { return kv.second.contains(b); }); if (iter == superblocks_.end()) { RMM_FAIL("allocation not found"); } - iter->second.coalesce(b); - if (iter->second.empty()) { coalesce(std::move(superblocks_.extract(iter).mapped())); } + + auto sb = std::move(superblocks_.extract(iter).mapped()); + sb.coalesce(b); + if (sb.empty()) { + coalesce(std::move(sb)); + } else { + superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); + } } /** @@ -601,6 +612,11 @@ class global_arena final { { RMM_LOGGING_ASSERT(sb.is_valid()); + if (superblocks_.empty()) { + superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); + return; + } + // Find the right place (in ascending address order) to insert the block. auto const next = superblocks_.lower_bound(sb.pointer()); auto const previous = next == superblocks_.cbegin() ? next : std::prev(next); @@ -773,9 +789,13 @@ class arena { }); if (iter == superblocks_.end()) { return false; } - auto& sb = iter->second; + auto sb = std::move(superblocks_.extract(iter).mapped()); sb.coalesce(b); - if (sb.empty()) { global_arena_.release(std::move(superblocks_.extract(iter).mapped())); } + if (sb.empty()) { + global_arena_.release(std::move(sb)); + } else { + superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); + } return true; } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index f39dc5f2e..049e3a2bd 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -198,6 +198,15 @@ TEST(ArenaTest, SuperblockFirstFit) // NOLINT EXPECT_EQ(b3.size(), 512); } +TEST(ArenaTest, SuperblockCoalesceAfterFull) // NOLINT +{ + superblock sb{fake_address3, 4194304}; + auto const b = sb.first_fit(2097152); + sb.first_fit(2097152); + sb.coalesce(b); + EXPECT_TRUE(sb.first_fit(2097152).is_valid()); +} + TEST(ArenaTest, SuperblockCoalesceMergeNext) // NOLINT { superblock sb{fake_address3, 4194304}; @@ -232,8 +241,7 @@ TEST(ArenaTest, SuperblockMaxFree) // NOLINT { superblock sb{fake_address3, 4194304}; sb.first_fit(2097152); - auto const b = sb.max_free(); - EXPECT_EQ(b.size(), 2097152); + EXPECT_EQ(sb.max_free(), 2097152); } /** From abd72260e56b1e17681a16d5bb4af49be3b01a19 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 16 Nov 2021 18:21:35 -0800 Subject: [PATCH 16/35] switch back to set, fix tests --- .../rmm/mr/device/arena_memory_resource.hpp | 25 +---- include/rmm/mr/device/detail/arena.hpp | 100 +++++++++--------- tests/mr/device/arena_mr_tests.cpp | 8 +- 3 files changed, 56 insertions(+), 77 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index b0fc92139..03d0b23ce 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -159,13 +159,8 @@ class arena_memory_resource final : public device_memory_resource { void* pointer = arena.allocate(bytes); if (pointer == nullptr) { - write_lock lock(mtx_); - defragment(); - pointer = arena.allocate(bytes); - if (pointer == nullptr) { - if (dump_log_on_failure_) { dump_memory_log(bytes); } - RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); - } + if (dump_log_on_failure_) { dump_memory_log(bytes); } + RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); } return pointer; @@ -201,7 +196,7 @@ class arena_memory_resource final : public device_memory_resource { { stream.synchronize_no_throw(); - write_lock lock(mtx_); + read_lock lock(mtx_); if (use_per_thread_arena(stream)) { auto const id = std::this_thread::get_id(); @@ -223,20 +218,6 @@ class arena_memory_resource final : public device_memory_resource { global_arena_.deallocate_from_other_arena(ptr, bytes); } - /** - * @brief Defragment memory by returning all free blocks to the global arena. - */ - void defragment() - { - RMM_CUDA_TRY(cudaDeviceSynchronize()); - for (auto& thread_arena : thread_arenas_) { - thread_arena.second->clean(); - } - for (auto& stream_arena : stream_arenas_) { - stream_arena.second.clean(); - } - } - /** * @brief Get the arena associated with the current thread or the given stream. * diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 07deb49c7..00327ee44 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -30,13 +30,11 @@ #include #include #include -#include #include #include #include #include #include -#include namespace rmm::mr::detail::arena { @@ -83,13 +81,13 @@ class memory_span { std::size_t size_{}; ///< Size in bytes. }; -/// Calculate the total size of a map of memory spans. +/// Calculate the total size of a set of memory spans. template -inline auto total_memory_size(std::map const& spans) +inline auto total_memory_size(std::set const& spans) { return std::accumulate( spans.cbegin(), spans.cend(), std::size_t{}, [](auto const& lhs, auto const& rhs) { - return lhs + rhs.second.size(); + return lhs + rhs.size(); }); } @@ -366,12 +364,12 @@ class superblock final : public memory_span { std::set free_blocks_{}; }; -/// Find the max free size from a map of superblocks. -inline auto max_free(std::map const& superblocks) +/// Find the max free size from a set of superblocks. +inline auto max_free(std::set const& superblocks) { std::size_t size{}; - for (auto const& kv : superblocks) { - size = std::max(size, kv.second.max_free()); + for (auto const& sb : superblocks) { + size = std::max(size, sb.max_free()); } return size; }; @@ -457,15 +455,15 @@ class global_arena final { } /** - * @brief Release a map of superblocks from a dying arena. + * @brief Release a set of superblocks from a dying arena. * * @param superblocks The set of superblocks. */ - void release(std::map& superblocks) + void release(std::set& superblocks) { lock_guard lock(mtx_); while (!superblocks.empty()) { - auto sb = std::move(superblocks.extract(superblocks.cbegin()).mapped()); + auto sb = std::move(superblocks.extract(superblocks.cbegin()).value()); RMM_LOGGING_ASSERT(sb.is_valid()); coalesce(std::move(sb)); } @@ -515,17 +513,16 @@ class global_arena final { lock_guard lock(mtx_); block const b{ptr, bytes}; - auto iter = std::find_if(superblocks_.begin(), superblocks_.end(), [&](auto const& kv) { - return kv.second.contains(b); - }); - if (iter == superblocks_.end()) { RMM_FAIL("allocation not found"); } + auto const iter = std::find_if( + superblocks_.cbegin(), superblocks_.cend(), [&](auto const& sb) { return sb.contains(b); }); + if (iter == superblocks_.cend()) { RMM_FAIL("allocation not found"); } - auto sb = std::move(superblocks_.extract(iter).mapped()); + auto sb = std::move(superblocks_.extract(iter).value()); sb.coalesce(b); if (sb.empty()) { coalesce(std::move(sb)); } else { - superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); + superblocks_.insert(std::move(sb)); } } @@ -569,7 +566,7 @@ class global_arena final { { RMM_LOGGING_ASSERT(size >= superblock::minimum_size); upstream_block_ = {upstream_mr_->allocate(size), size}; - superblocks_.try_emplace(upstream_block_.pointer(), upstream_block_.pointer(), size); + superblocks_.emplace(upstream_block_.pointer(), size); } /** @@ -587,17 +584,16 @@ class global_arena final { */ superblock first_fit(std::size_t size) { - auto const iter = std::find_if(superblocks_.cbegin(), - superblocks_.cend(), - [size](auto const& kv) { return kv.second.fits(size); }); + auto const iter = std::find_if( + superblocks_.cbegin(), superblocks_.cend(), [size](auto const& sb) { return sb.fits(size); }); if (iter == superblocks_.cend()) { return {}; } - auto sb = std::move(superblocks_.extract(iter).mapped()); + auto sb = std::move(superblocks_.extract(iter).value()); auto const sz = std::max(size, superblock::minimum_size); if (sb.empty() && sb.size() - sz >= superblock::minimum_size) { // Split the superblock and put the remainder back. auto [head, tail] = sb.split(sz); - superblocks_.insert(std::make_pair(tail.pointer(), std::move(tail))); + superblocks_.insert(std::move(tail)); return std::move(head); } return sb; @@ -613,33 +609,33 @@ class global_arena final { RMM_LOGGING_ASSERT(sb.is_valid()); if (superblocks_.empty()) { - superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); + superblocks_.insert(std::move(sb)); return; } // Find the right place (in ascending address order) to insert the block. - auto const next = superblocks_.lower_bound(sb.pointer()); + auto const next = superblocks_.lower_bound(sb); auto const previous = next == superblocks_.cbegin() ? next : std::prev(next); // Coalesce with neighboring blocks. - bool const merge_prev = previous->second.is_contiguous_before(sb); - bool const merge_next = next != superblocks_.cend() && sb.is_contiguous_before(next->second); + bool const merge_prev = previous->is_contiguous_before(sb); + bool const merge_next = next != superblocks_.cend() && sb.is_contiguous_before(*next); if (merge_prev && merge_next) { - auto prev_sb = std::move(superblocks_.extract(previous).mapped()); - auto next_sb = std::move(superblocks_.extract(next).mapped()); + auto prev_sb = std::move(superblocks_.extract(previous).value()); + auto next_sb = std::move(superblocks_.extract(next).value()); auto merged = prev_sb.merge(sb).merge(next_sb); - superblocks_.insert(std::make_pair(merged.pointer(), std::move(merged))); + superblocks_.insert(std::move(merged)); } else if (merge_prev) { - auto prev_sb = std::move(superblocks_.extract(previous).mapped()); + auto prev_sb = std::move(superblocks_.extract(previous).value()); auto merged = prev_sb.merge(sb); - superblocks_.insert(std::make_pair(merged.pointer(), std::move(merged))); + superblocks_.insert(std::move(merged)); } else if (merge_next) { - auto next_sb = std::move(superblocks_.extract(next).mapped()); + auto next_sb = std::move(superblocks_.extract(next).value()); auto merged = sb.merge(next_sb); - superblocks_.insert(std::make_pair(merged.pointer(), std::move(merged))); + superblocks_.insert(std::move(merged)); } else { - superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); + superblocks_.insert(std::move(sb)); } } @@ -647,8 +643,8 @@ class global_arena final { Upstream* upstream_mr_; /// Block allocated from upstream so that it can be quickly freed. block upstream_block_; - /// Address-ordered map of superblocks. - std::map superblocks_; + /// Address-ordered set of superblocks. + std::set superblocks_; /// Mutex for exclusive lock. mutable std::mutex mtx_; }; @@ -769,11 +765,14 @@ class arena { */ block first_fit(std::size_t size) { - for (auto&& kv : superblocks_) { - auto const b = kv.second.first_fit(size); - if (b.is_valid()) { return b; } - } - return {}; + auto const iter = std::find_if( + superblocks_.cbegin(), superblocks_.cend(), [size](auto const& sb) { return sb.fits(size); }); + if (iter == superblocks_.cend()) { return {}; } + + auto sb = std::move(superblocks_.extract(iter).value()); + auto const b = sb.first_fit(size); + superblocks_.insert(std::move(sb)); + return b; } /** @@ -784,17 +783,16 @@ class arena { */ bool deallocate_from_superblock(block const& b) { - auto iter = std::find_if(superblocks_.begin(), superblocks_.end(), [&](auto const& kv) { - return kv.second.contains(b); - }); - if (iter == superblocks_.end()) { return false; } + auto const iter = std::find_if( + superblocks_.cbegin(), superblocks_.cend(), [&](auto const& sb) { return sb.contains(b); }); + if (iter == superblocks_.cend()) { return false; } - auto sb = std::move(superblocks_.extract(iter).mapped()); + auto sb = std::move(superblocks_.extract(iter).value()); sb.coalesce(b); if (sb.empty()) { global_arena_.release(std::move(sb)); } else { - superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); + superblocks_.insert(std::move(sb)); } return true; } @@ -810,7 +808,7 @@ class arena { auto sb = global_arena_.acquire(size); if (sb.is_valid()) { auto const b = sb.first_fit(size); - superblocks_.insert(std::make_pair(sb.pointer(), std::move(sb))); + superblocks_.insert(std::move(sb)); return b; } return {}; @@ -819,7 +817,7 @@ class arena { /// The global arena to allocate superblocks from. global_arena& global_arena_; /// Acquired superblocks. - std::map superblocks_; + std::set superblocks_; /// Mutex for exclusive lock. mutable std::mutex mtx_; }; diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 049e3a2bd..85bfd5c83 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -332,13 +332,13 @@ TEST(ArenaTest, GlobalArenaReleaseMultiple) // NOLINT global_arena ga{&mock, 16777216}; - std::map superblocks{}; + std::set superblocks{}; auto sb = ga.acquire(256); - superblocks.insert(std::make_pair(sb.pointer(), std::move(sb))); + superblocks.insert(std::move(sb)); auto sb2 = ga.acquire(1024); - superblocks.insert(std::make_pair(sb2.pointer(), std::move(sb2))); + superblocks.insert(std::move(sb2)); auto sb3 = ga.acquire(512); - superblocks.insert(std::make_pair(sb3.pointer(), std::move(sb3))); + superblocks.insert(std::move(sb3)); ga.release(superblocks); auto* p = ga.allocate(16777216); EXPECT_EQ(p, fake_address3); From 10771f59fb54a67c6306dfe991e6b0b5ebf07882 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 17 Nov 2021 16:53:10 -0800 Subject: [PATCH 17/35] stream synchronize before releasing superblock --- .../rmm/mr/device/arena_memory_resource.hpp | 18 ------------------ include/rmm/mr/device/detail/arena.hpp | 11 +++++++---- tests/mr/device/arena_mr_tests.cpp | 14 +++++++------- 3 files changed, 14 insertions(+), 29 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 03d0b23ce..87c2a72db 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -92,24 +92,6 @@ class arena_memory_resource final : public device_memory_resource { } } - /** - * @brief Construct an `arena_memory_resource`. - * - * @throws rmm::logic_error if `upstream_mr == nullptr`. - * - * @param upstream_mr The memory resource from which to allocate blocks for the pool. - * @param arena_size Size in bytes of the global arena. Defaults to all the available memory on - * the current device. - * @param max_size Unused. - * @deprecated Use the version without the max size. - */ - arena_memory_resource(Upstream* upstream_mr, - std::optional arena_size, - std::optional max_size) - : arena_memory_resource{upstream_mr, arena_size, false} - { - } - ~arena_memory_resource() override = default; // Disable copy (and move) semantics. diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 00327ee44..c988efe16 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -446,10 +446,12 @@ class global_arena final { * @brief Release a superblock. * * @param s Superblock to be released. + * @param stream The stream to synchronize on before releasing. */ - void release(superblock&& sb) + void release(superblock&& sb, cuda_stream_view stream) { RMM_LOGGING_ASSERT(sb.is_valid()); + stream.synchronize_no_throw(); lock_guard lock(mtx_); coalesce(std::move(sb)); } @@ -702,7 +704,7 @@ class arena { { if (global_arena_.handles(size)) { return global_arena_.deallocate(ptr, size, stream); } lock_guard lock(mtx_); - return deallocate_from_superblock({ptr, size}); + return deallocate_from_superblock({ptr, size}, stream); } /** @@ -779,9 +781,10 @@ class arena { * @brief Deallocate a block from the superblock it belongs to. * * @param b The block to deallocate. + * @param stream The stream to use for deallocation. * @return true if the block is found. */ - bool deallocate_from_superblock(block const& b) + bool deallocate_from_superblock(block const& b, cuda_stream_view stream) { auto const iter = std::find_if( superblocks_.cbegin(), superblocks_.cend(), [&](auto const& sb) { return sb.contains(b); }); @@ -790,7 +793,7 @@ class arena { auto sb = std::move(superblocks_.extract(iter).value()); sb.coalesce(b); if (sb.empty()) { - global_arena_.release(std::move(sb)); + global_arena_.release(std::move(sb), stream); } else { superblocks_.insert(std::move(sb)); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 85bfd5c83..87acc2a67 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -284,7 +284,7 @@ TEST(ArenaTest, GlobalArenaReleaseMergeNext) // NOLINT global_arena ga{&mock, 8388608}; auto sb = ga.acquire(256); - ga.release(std::move(sb)); + ga.release(std::move(sb), {}); auto* p = ga.allocate(8388608); EXPECT_EQ(p, fake_address3); } @@ -300,8 +300,8 @@ TEST(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT auto sb = ga.acquire(256); auto sb2 = ga.acquire(1024); ga.acquire(512); - ga.release(std::move(sb)); - ga.release(std::move(sb2)); + ga.release(std::move(sb), {}); + ga.release(std::move(sb2), {}); auto* p = ga.allocate(8388608); EXPECT_EQ(p, fake_address3); } @@ -317,9 +317,9 @@ TEST(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT auto sb = ga.acquire(256); auto sb2 = ga.acquire(1024); auto sb3 = ga.acquire(512); - ga.release(std::move(sb)); - ga.release(std::move(sb3)); - ga.release(std::move(sb2)); + ga.release(std::move(sb), {}); + ga.release(std::move(sb3), {}); + ga.release(std::move(sb2), {}); auto* p = ga.allocate(16777216); EXPECT_EQ(p, fake_address3); } @@ -383,7 +383,7 @@ TEST(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT auto sb = ga.acquire(512); auto const b = sb.first_fit(512); - ga.release(std::move(sb)); + ga.release(std::move(sb), {}); ga.deallocate_from_other_arena(b.pointer(), b.size()); EXPECT_EQ(ga.allocate(8388608), fake_address3); } From c16f026a0a3a7c703e9017d002f9a5d3e55addd8 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 17 Nov 2021 18:22:17 -0800 Subject: [PATCH 18/35] update docs --- .../rmm/mr/device/arena_memory_resource.hpp | 18 ++++++++++-------- include/rmm/mr/device/detail/arena.hpp | 16 +++++++--------- tests/mr/device/arena_mr_tests.cpp | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 87c2a72db..59ba968ff 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -78,9 +78,9 @@ class arena_memory_resource final : public device_memory_resource { * * @throws rmm::logic_error if `upstream_mr == nullptr`. * - * @param upstream_mr The memory resource from which to allocate blocks for the pool. - * @param arena_size Size in bytes of the global arena. Defaults to all the available memory on - * the current device. + * @param upstream_mr The memory resource from which to allocate blocks for the global arena. + * @param arena_size Size in bytes of the global arena. Defaults to half of the available memory + * on the current device. */ explicit arena_memory_resource(Upstream* upstream_mr, std::optional arena_size = std::nullopt, @@ -126,7 +126,7 @@ class arena_memory_resource final : public device_memory_resource { * * The returned pointer has at least 256-byte alignment. * - * @throws `std::bad_alloc` if the requested allocation could not be fulfilled. + * @throws `rmm::out_of_memory` if no more memory is available for the requested size. * * @param bytes The size in bytes of the allocation. * @param stream The stream to associate this allocation with. @@ -153,7 +153,7 @@ class arena_memory_resource final : public device_memory_resource { * * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. + * value of `bytes` that was passed to the `allocate` call that returned `ptr`. * @param stream Stream on which to perform deallocation. */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override @@ -176,19 +176,21 @@ class arena_memory_resource final : public device_memory_resource { */ void deallocate_from_other_arena(void* ptr, std::size_t bytes, cuda_stream_view stream) { + // Since we are returning this memory to another stream, we need to make sure the current stream + // is caught up. stream.synchronize_no_throw(); - read_lock lock(mtx_); + write_lock lock(mtx_); if (use_per_thread_arena(stream)) { auto const id = std::this_thread::get_id(); - for (auto&& kv : thread_arenas_) { + for (auto const& kv : thread_arenas_) { // If the arena does not belong to the current thread, try to deallocate from it, and return // if successful. if (kv.first != id && kv.second->deallocate(ptr, bytes, stream)) { return; } } } else { - for (auto&& kv : stream_arenas_) { + for (auto& kv : stream_arenas_) { // If the arena does not belong to the current stream, try to deallocate from it, and return // if successful. if (stream.value() != kv.first && kv.second.deallocate(ptr, bytes, stream)) { return; } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index c988efe16..5f2bfc501 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -134,7 +134,7 @@ class block final : public memory_span { [[nodiscard]] std::pair split(std::size_t sz) const { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(size() >= sz); + RMM_LOGGING_ASSERT(size() > sz); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) return {{pointer(), sz}, {pointer() + sz, size() - sz}}; } @@ -149,8 +149,6 @@ class block final : public memory_span { */ [[nodiscard]] block merge(block const& b) const { - RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(b.is_valid()); RMM_LOGGING_ASSERT(is_contiguous_before(b)); return {pointer(), size() + b.size()}; } @@ -313,7 +311,7 @@ class superblock final : public memory_span { * * @param b The block to coalesce. */ - void coalesce(block const& b) + void coalesce(block const& b) // NOLINT(readability-function-cognitive-complexity) { RMM_LOGGING_ASSERT(is_valid()); RMM_LOGGING_ASSERT(b.is_valid()); @@ -417,7 +415,6 @@ class global_arena final { { lock_guard lock(mtx_); upstream_mr_->deallocate(upstream_block_.pointer(), upstream_block_.size()); - superblocks_.clear(); } /** @@ -491,15 +488,13 @@ class global_arena final { * @param size The size in bytes of the allocation. This must be equal to the value of `size` * that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. - * @return bool true if the allocation is found, false otherwise. */ - bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) + void deallocate(void* ptr, std::size_t size, cuda_stream_view stream) { RMM_LOGGING_ASSERT(handles(size)); stream.synchronize_no_throw(); lock_guard lock(mtx_); coalesce({ptr, size}); - return true; } /** @@ -702,7 +697,10 @@ class arena { */ bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) { - if (global_arena_.handles(size)) { return global_arena_.deallocate(ptr, size, stream); } + if (global_arena_.handles(size)) { + global_arena_.deallocate(ptr, size, stream); + return true; + } lock_guard lock(mtx_); return deallocate_from_superblock({ptr, size}, stream); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 87acc2a67..44aec7398 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -368,7 +368,7 @@ TEST(ArenaTest, GlobalArenaDeallocate) // NOLINT auto* ptr = ga.allocate(4194304); EXPECT_EQ(ptr, fake_address3); - EXPECT_TRUE(ga.deallocate(ptr, 4194304, {})); + ga.deallocate(ptr, 4194304, {}); ptr = ga.allocate(4194304); EXPECT_EQ(ptr, fake_address3); } From f3e687515880415aefe37a7e16f1042f7cf12efa Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 18 Nov 2021 09:35:26 -0800 Subject: [PATCH 19/35] use byte literals in tests --- tests/mr/device/arena_mr_tests.cpp | 253 +++++++++++++++-------------- 1 file changed, 127 insertions(+), 126 deletions(-) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 44aec7398..a251fee08 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -20,6 +20,7 @@ #include #include #include +#include "../../byte_literals.hpp" #include #include @@ -43,13 +44,13 @@ using arena_mr = rmm::mr::arena_memory_resource(1024L); +auto const fake_address = reinterpret_cast(1_KiB); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address2 = reinterpret_cast(2048L); +auto const fake_address2 = reinterpret_cast(2_KiB); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address3 = reinterpret_cast(4194304L); +auto const fake_address3 = reinterpret_cast(4_MiB); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address4 = reinterpret_cast(8388608L); +auto const fake_address4 = reinterpret_cast(8_MiB); /** * Test memory_span. @@ -69,38 +70,38 @@ TEST(ArenaTest, MemorySpan) // NOLINT TEST(ArenaTest, BlockFits) // NOLINT { - block const b{fake_address, 1024}; - EXPECT_TRUE(b.fits(1024)); - EXPECT_FALSE(b.fits(1025)); + block const b{fake_address, 1_KiB}; + EXPECT_TRUE(b.fits(1_KiB)); + EXPECT_FALSE(b.fits(1_KiB + 1)); } TEST(ArenaTest, BlockIsContiguousBefore) // NOLINT { - block const b{fake_address, 1024}; + block const b{fake_address, 1_KiB}; block const b2{fake_address2, 256}; EXPECT_TRUE(b.is_contiguous_before(b2)); block const b3{fake_address, 512}; - block const b4{fake_address2, 1024}; + block const b4{fake_address2, 1_KiB}; EXPECT_FALSE(b3.is_contiguous_before(b4)); } TEST(ArenaTest, BlockSplit) // NOLINT { - block const b{fake_address, 2048}; - auto const [head, tail] = b.split(1024); + block const b{fake_address, 2_KiB}; + auto const [head, tail] = b.split(1_KiB); EXPECT_EQ(head.pointer(), fake_address); - EXPECT_EQ(head.size(), 1024); + EXPECT_EQ(head.size(), 1_KiB); EXPECT_EQ(tail.pointer(), fake_address2); - EXPECT_EQ(tail.size(), 1024); + EXPECT_EQ(tail.size(), 1_KiB); } TEST(ArenaTest, BlockMerge) // NOLINT { - block const b{fake_address, 1024}; - block const b2{fake_address2, 1024}; + block const b{fake_address, 1_KiB}; + block const b2{fake_address2, 1_KiB}; auto const merged = b.merge(b2); EXPECT_EQ(merged.pointer(), fake_address); - EXPECT_EQ(merged.size(), 2048); + EXPECT_EQ(merged.size(), 2_KiB); } /** @@ -109,7 +110,7 @@ TEST(ArenaTest, BlockMerge) // NOLINT TEST(ArenaTest, SuperblockEmpty) // NOLINT { - superblock sb{fake_address3, 4194304}; + superblock sb{fake_address3, 4_MiB}; EXPECT_TRUE(sb.empty()); sb.first_fit(256); EXPECT_FALSE(sb.empty()); @@ -117,14 +118,14 @@ TEST(ArenaTest, SuperblockEmpty) // NOLINT TEST(ArenaTest, SuperblockContains) // NOLINT { - superblock const sb{fake_address3, 4194304}; - block const b{fake_address, 2048}; + superblock const sb{fake_address3, 4_MiB}; + block const b{fake_address, 2_KiB}; EXPECT_FALSE(sb.contains(b)); - block const b2{fake_address3, 1024}; + block const b2{fake_address3, 1_KiB}; EXPECT_TRUE(sb.contains(b2)); - block const b3{fake_address3, 4194305}; + block const b3{fake_address3, 4_MiB + 1}; EXPECT_FALSE(sb.contains(b3)); - block const b4{fake_address3, 4194304}; + block const b4{fake_address3, 4_MiB}; EXPECT_TRUE(sb.contains(b4)); block const b5{fake_address4, 256}; EXPECT_FALSE(sb.contains(b5)); @@ -132,21 +133,21 @@ TEST(ArenaTest, SuperblockContains) // NOLINT TEST(ArenaTest, SuperblockFits) // NOLINT { - superblock sb{fake_address3, 4194304}; - EXPECT_TRUE(sb.fits(4194304)); - EXPECT_FALSE(sb.fits(4194305)); + superblock sb{fake_address3, 4_MiB}; + EXPECT_TRUE(sb.fits(4_MiB)); + EXPECT_FALSE(sb.fits(4_MiB + 1)); - auto const b = sb.first_fit(1048576); - sb.first_fit(1048576); + auto const b = sb.first_fit(1_MiB); + sb.first_fit(1_MiB); sb.coalesce(b); - EXPECT_TRUE(sb.fits(2097152)); - EXPECT_FALSE(sb.fits(2097153)); + EXPECT_TRUE(sb.fits(2_MiB)); + EXPECT_FALSE(sb.fits(2_MiB + 1)); } TEST(ArenaTest, SuperblockIsContiguousBefore) // NOLINT { - superblock sb{fake_address3, 4194304}; - superblock sb2{fake_address4, 4194304}; + superblock sb{fake_address3, 4_MiB}; + superblock sb2{fake_address4, 4_MiB}; EXPECT_TRUE(sb.is_contiguous_before(sb2)); auto const b = sb.first_fit(256); @@ -154,7 +155,7 @@ TEST(ArenaTest, SuperblockIsContiguousBefore) // NOLINT sb.coalesce(b); EXPECT_TRUE(sb.is_contiguous_before(sb2)); - auto const b2 = sb2.first_fit(1024); + auto const b2 = sb2.first_fit(1_KiB); EXPECT_FALSE(sb.is_contiguous_before(sb2)); sb2.coalesce(b2); EXPECT_TRUE(sb.is_contiguous_before(sb2)); @@ -162,36 +163,36 @@ TEST(ArenaTest, SuperblockIsContiguousBefore) // NOLINT TEST(ArenaTest, SuperblockSplit) // NOLINT { - superblock sb{fake_address3, 8388608}; - auto const [head, tail] = sb.split(4194304); + superblock sb{fake_address3, 8_MiB}; + auto const [head, tail] = sb.split(4_MiB); EXPECT_EQ(head.pointer(), fake_address3); - EXPECT_EQ(head.size(), 4194304); + EXPECT_EQ(head.size(), 4_MiB); EXPECT_TRUE(head.empty()); EXPECT_EQ(tail.pointer(), fake_address4); - EXPECT_EQ(tail.size(), 4194304); + EXPECT_EQ(tail.size(), 4_MiB); EXPECT_TRUE(tail.empty()); } TEST(ArenaTest, SuperblockMerge) // NOLINT { - superblock sb{fake_address3, 4194304}; - superblock sb2{fake_address4, 4194304}; + superblock sb{fake_address3, 4_MiB}; + superblock sb2{fake_address4, 4_MiB}; auto const merged = sb.merge(sb2); EXPECT_EQ(merged.pointer(), fake_address3); - EXPECT_EQ(merged.size(), 8388608); + EXPECT_EQ(merged.size(), 8_MiB); EXPECT_TRUE(merged.empty()); } TEST(ArenaTest, SuperblockFirstFit) // NOLINT { - superblock sb{fake_address3, 4194304}; - auto const b = sb.first_fit(1024); + superblock sb{fake_address3, 4_MiB}; + auto const b = sb.first_fit(1_KiB); EXPECT_EQ(b.pointer(), fake_address3); - EXPECT_EQ(b.size(), 1024); - auto const b2 = sb.first_fit(2048); + EXPECT_EQ(b.size(), 1_KiB); + auto const b2 = sb.first_fit(2_KiB); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - EXPECT_EQ(b2.pointer(), static_cast(fake_address3) + 1024); - EXPECT_EQ(b2.size(), 2048); + EXPECT_EQ(b2.pointer(), static_cast(fake_address3) + 1_KiB); + EXPECT_EQ(b2.size(), 2_KiB); sb.coalesce(b); auto const b3 = sb.first_fit(512); EXPECT_EQ(b3.pointer(), fake_address3); @@ -200,48 +201,48 @@ TEST(ArenaTest, SuperblockFirstFit) // NOLINT TEST(ArenaTest, SuperblockCoalesceAfterFull) // NOLINT { - superblock sb{fake_address3, 4194304}; - auto const b = sb.first_fit(2097152); - sb.first_fit(2097152); + superblock sb{fake_address3, 4_MiB}; + auto const b = sb.first_fit(2_MiB); + sb.first_fit(2_MiB); sb.coalesce(b); - EXPECT_TRUE(sb.first_fit(2097152).is_valid()); + EXPECT_TRUE(sb.first_fit(2_MiB).is_valid()); } TEST(ArenaTest, SuperblockCoalesceMergeNext) // NOLINT { - superblock sb{fake_address3, 4194304}; - auto const b = sb.first_fit(2097152); + superblock sb{fake_address3, 4_MiB}; + auto const b = sb.first_fit(2_MiB); sb.coalesce(b); - EXPECT_TRUE(sb.first_fit(4194304).is_valid()); + EXPECT_TRUE(sb.first_fit(4_MiB).is_valid()); } TEST(ArenaTest, SuperblockCoalesceMergePrevious) // NOLINT { - superblock sb{fake_address3, 4194304}; - auto const b = sb.first_fit(1024); - auto const b2 = sb.first_fit(1024); - sb.first_fit(1024); + superblock sb{fake_address3, 4_MiB}; + auto const b = sb.first_fit(1_KiB); + auto const b2 = sb.first_fit(1_KiB); + sb.first_fit(1_KiB); sb.coalesce(b); sb.coalesce(b2); - auto const b3 = sb.first_fit(2048); + auto const b3 = sb.first_fit(2_KiB); EXPECT_EQ(b3.pointer(), fake_address3); } TEST(ArenaTest, SuperblockCoalesceMergePreviousAndNext) // NOLINT { - superblock sb{fake_address3, 4194304}; - auto const b = sb.first_fit(1024); - auto const b2 = sb.first_fit(1024); + superblock sb{fake_address3, 4_MiB}; + auto const b = sb.first_fit(1_KiB); + auto const b2 = sb.first_fit(1_KiB); sb.coalesce(b); sb.coalesce(b2); - EXPECT_TRUE(sb.first_fit(4194304).is_valid()); + EXPECT_TRUE(sb.first_fit(4_MiB).is_valid()); } TEST(ArenaTest, SuperblockMaxFree) // NOLINT { - superblock sb{fake_address3, 4194304}; - sb.first_fit(2097152); - EXPECT_EQ(sb.max_free(), 2097152); + superblock sb{fake_address3, 4_MiB}; + sb.first_fit(2_MiB); + EXPECT_EQ(sb.max_free(), 2_MiB); } /** @@ -257,19 +258,19 @@ TEST(ArenaTest, GlobalArenaNullUpstream) // NOLINT TEST(ArenaTest, GlobalArenaAcquire) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8388608}; + global_arena ga{&mock, 8_MiB}; auto const sb = ga.acquire(256); EXPECT_EQ(sb.pointer(), fake_address3); - EXPECT_EQ(sb.size(), 4194304); + EXPECT_EQ(sb.size(), 4_MiB); EXPECT_TRUE(sb.empty()); - auto const sb2 = ga.acquire(1024); + auto const sb2 = ga.acquire(1_KiB); EXPECT_EQ(sb2.pointer(), fake_address4); - EXPECT_EQ(sb2.size(), 4194304); + EXPECT_EQ(sb2.size(), 4_MiB); EXPECT_TRUE(sb2.empty()); EXPECT_FALSE(ga.acquire(512).is_valid()); @@ -278,114 +279,114 @@ TEST(ArenaTest, GlobalArenaAcquire) // NOLINT TEST(ArenaTest, GlobalArenaReleaseMergeNext) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8388608}; + global_arena ga{&mock, 8_MiB}; auto sb = ga.acquire(256); ga.release(std::move(sb), {}); - auto* p = ga.allocate(8388608); + auto* p = ga.allocate(8_MiB); EXPECT_EQ(p, fake_address3); } TEST(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(16777216)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 16777216)); + EXPECT_CALL(mock, allocate(16_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 16_MiB)); - global_arena ga{&mock, 16777216}; + global_arena ga{&mock, 16_MiB}; auto sb = ga.acquire(256); - auto sb2 = ga.acquire(1024); + auto sb2 = ga.acquire(1_KiB); ga.acquire(512); ga.release(std::move(sb), {}); ga.release(std::move(sb2), {}); - auto* p = ga.allocate(8388608); + auto* p = ga.allocate(8_MiB); EXPECT_EQ(p, fake_address3); } TEST(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(16777216)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 16777216)); + EXPECT_CALL(mock, allocate(16_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 16_MiB)); - global_arena ga{&mock, 16777216}; + global_arena ga{&mock, 16_MiB}; auto sb = ga.acquire(256); - auto sb2 = ga.acquire(1024); + auto sb2 = ga.acquire(1_KiB); auto sb3 = ga.acquire(512); ga.release(std::move(sb), {}); ga.release(std::move(sb3), {}); ga.release(std::move(sb2), {}); - auto* p = ga.allocate(16777216); + auto* p = ga.allocate(16_MiB); EXPECT_EQ(p, fake_address3); } TEST(ArenaTest, GlobalArenaReleaseMultiple) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(16777216)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 16777216)); + EXPECT_CALL(mock, allocate(16_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 16_MiB)); - global_arena ga{&mock, 16777216}; + global_arena ga{&mock, 16_MiB}; std::set superblocks{}; auto sb = ga.acquire(256); superblocks.insert(std::move(sb)); - auto sb2 = ga.acquire(1024); + auto sb2 = ga.acquire(1_KiB); superblocks.insert(std::move(sb2)); auto sb3 = ga.acquire(512); superblocks.insert(std::move(sb3)); ga.release(superblocks); - auto* p = ga.allocate(16777216); + auto* p = ga.allocate(16_MiB); EXPECT_EQ(p, fake_address3); } TEST(ArenaTest, GlobalArenaAllocate) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8388608}; + global_arena ga{&mock, 8_MiB}; - auto* ptr = ga.allocate(4194304); + auto* ptr = ga.allocate(4_MiB); EXPECT_EQ(ptr, fake_address3); - auto* ptr2 = ga.allocate(4194304); + auto* ptr2 = ga.allocate(4_MiB); EXPECT_EQ(ptr2, fake_address4); } TEST(ArenaTest, GlobalArenaDeallocate) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8388608}; + global_arena ga{&mock, 8_MiB}; - auto* ptr = ga.allocate(4194304); + auto* ptr = ga.allocate(4_MiB); EXPECT_EQ(ptr, fake_address3); - ga.deallocate(ptr, 4194304, {}); - ptr = ga.allocate(4194304); + ga.deallocate(ptr, 4_MiB, {}); + ptr = ga.allocate(4_MiB); EXPECT_EQ(ptr, fake_address3); } TEST(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8388608}; + global_arena ga{&mock, 8_MiB}; auto sb = ga.acquire(512); auto const b = sb.first_fit(512); ga.release(std::move(sb), {}); ga.deallocate_from_other_arena(b.pointer(), b.size()); - EXPECT_EQ(ga.allocate(8388608), fake_address3); + EXPECT_EQ(ga.allocate(8_MiB), fake_address3); } /** @@ -395,36 +396,36 @@ TEST(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT TEST(ArenaTest, ArenaAllocate) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); - global_arena ga{&mock, 8388608}; + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); + global_arena ga{&mock, 8_MiB}; arena a{ga}; - EXPECT_EQ(a.allocate(4194304), fake_address3); + EXPECT_EQ(a.allocate(4_MiB), fake_address3); EXPECT_EQ(a.allocate(256), fake_address4); } TEST(ArenaTest, ArenaDeallocate) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); - global_arena ga{&mock, 8388608}; + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); + global_arena ga{&mock, 8_MiB}; arena a{ga}; - auto* ptr = a.allocate(4194304); - a.deallocate(ptr, 4194304, {}); + auto* ptr = a.allocate(4_MiB); + a.deallocate(ptr, 4_MiB, {}); auto* ptr2 = a.allocate(256); a.deallocate(ptr2, 256, {}); - EXPECT_EQ(a.allocate(8388608), fake_address3); + EXPECT_EQ(a.allocate(8_MiB), fake_address3); } TEST(ArenaTest, ArenaDeallocateMergePrevious) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); - global_arena ga{&mock, 8388608}; + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); + global_arena ga{&mock, 8_MiB}; arena a{ga}; auto* ptr = a.allocate(256); @@ -438,9 +439,9 @@ TEST(ArenaTest, ArenaDeallocateMergePrevious) // NOLINT TEST(ArenaTest, ArenaDeallocateMergeNext) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); - global_arena ga{&mock, 8388608}; + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); + global_arena ga{&mock, 8_MiB}; arena a{ga}; auto* ptr = a.allocate(256); @@ -454,16 +455,16 @@ TEST(ArenaTest, ArenaDeallocateMergeNext) // NOLINT TEST(ArenaTest, ArenaDeallocateMergePreviousAndNext) // NOLINT { mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8388608)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8388608)); - global_arena ga{&mock, 8388608}; + EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); + global_arena ga{&mock, 8_MiB}; arena a{ga}; auto* ptr = a.allocate(256); auto* ptr2 = a.allocate(256); a.deallocate(ptr, 256, {}); a.deallocate(ptr2, 256, {}); - EXPECT_EQ(a.allocate(2048), fake_address3); + EXPECT_EQ(a.allocate(2_KiB), fake_address3); } /** @@ -492,11 +493,11 @@ TEST(ArenaTest, SmallMediumLarge) // NOLINT EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) arena_mr mr(rmm::mr::get_current_device_resource()); auto* small = mr.allocate(256); - auto* medium = mr.allocate(1U << 26U); + auto* medium = mr.allocate(64_MiB); auto const free = rmm::detail::available_device_memory().first; auto* large = mr.allocate(free / 3); mr.deallocate(small, 256); - mr.deallocate(medium, 1U << 26U); + mr.deallocate(medium, 64_MiB); mr.deallocate(large, free / 3); }()); } From cb25f74088ed232358607571a1e81eec6957b402 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 18 Nov 2021 10:21:38 -0800 Subject: [PATCH 20/35] fix overflow bug --- include/rmm/mr/device/arena_memory_resource.hpp | 6 +++--- include/rmm/mr/device/detail/arena.hpp | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 59ba968ff..c37f6f19b 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -180,17 +180,17 @@ class arena_memory_resource final : public device_memory_resource { // is caught up. stream.synchronize_no_throw(); - write_lock lock(mtx_); + read_lock lock(mtx_); if (use_per_thread_arena(stream)) { auto const id = std::this_thread::get_id(); - for (auto const& kv : thread_arenas_) { + for (auto&& kv : thread_arenas_) { // If the arena does not belong to the current thread, try to deallocate from it, and return // if successful. if (kv.first != id && kv.second->deallocate(ptr, bytes, stream)) { return; } } } else { - for (auto& kv : stream_arenas_) { + for (auto&& kv : stream_arenas_) { // If the arena does not belong to the current stream, try to deallocate from it, and return // if successful. if (stream.value() != kv.first && kv.second.deallocate(ptr, bytes, stream)) { return; } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 5f2bfc501..444eb3102 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -185,6 +185,7 @@ class superblock final : public memory_span { superblock(void* pointer, std::size_t size) : memory_span{pointer, size} { RMM_LOGGING_ASSERT(size > minimum_size / 2); + RMM_LOGGING_ASSERT(size < 1UL << 40UL); free_blocks_.emplace(pointer, size); } @@ -259,7 +260,7 @@ class superblock final : public memory_span { [[nodiscard]] std::pair split(std::size_t sz) const { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(empty() && sz >= minimum_size && size() - sz >= minimum_size); + RMM_LOGGING_ASSERT(empty() && sz >= minimum_size && size() >= sz + minimum_size); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) return {superblock{pointer(), sz}, superblock{pointer() + sz, size() - sz}}; } @@ -587,7 +588,7 @@ class global_arena final { auto sb = std::move(superblocks_.extract(iter).value()); auto const sz = std::max(size, superblock::minimum_size); - if (sb.empty() && sb.size() - sz >= superblock::minimum_size) { + if (sb.empty() && sb.size() >= sz + superblock::minimum_size) { // Split the superblock and put the remainder back. auto [head, tail] = sb.split(sz); superblocks_.insert(std::move(tail)); From 6eb957f7f45678fc0e276893c6749c5957ff0c86 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Mon, 22 Nov 2021 18:39:55 -0800 Subject: [PATCH 21/35] more fixes --- .../rmm/mr/device/arena_memory_resource.hpp | 24 +- include/rmm/mr/device/detail/arena.hpp | 114 ++++- tests/mr/device/arena_mr_tests.cpp | 429 +++++++++--------- 3 files changed, 334 insertions(+), 233 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index c37f6f19b..440da5a0b 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -141,13 +141,32 @@ class arena_memory_resource final : public device_memory_resource { void* pointer = arena.allocate(bytes); if (pointer == nullptr) { - if (dump_log_on_failure_) { dump_memory_log(bytes); } - RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); + write_lock lock(mtx_); + defragment(); + pointer = arena.allocate(bytes); + if (pointer == nullptr) { + if (dump_log_on_failure_) { dump_memory_log(bytes); } + RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); + } } return pointer; } + /** + * @brief Defragment memory by returning all superblocks to the global arena. + */ + void defragment() + { + RMM_CUDA_TRY(cudaDeviceSynchronize()); + for (auto& thread_arena : thread_arenas_) { + thread_arena.second->defragment(); + } + for (auto& stream_arena : stream_arenas_) { + stream_arena.second.defragment(); + } + } + /** * @brief Deallocate memory pointed to by `ptr`. * @@ -291,6 +310,7 @@ class arena_memory_resource final : public device_memory_resource { stream_arena.second.dump_memory_log(logger_); } } + logger_->flush(); } /** diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 444eb3102..802ea269a 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -66,6 +66,12 @@ class memory_span { /// Returns the size of the span. [[nodiscard]] std::size_t size() const { return size_; } + /// Returns the end of the span. + [[nodiscard]] char* end() const + { + return pointer_ + size_; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + } + /// Returns true if this span is valid (non-null), false otherwise. [[nodiscard]] bool is_valid() const { return pointer_ != nullptr && size_ > 0; } @@ -168,8 +174,8 @@ inline bool block_size_compare(block const& lhs, block const& rhs) */ class superblock final : public memory_span { public: - /// Minimum size of a superblock (4 MiB). - static constexpr std::size_t minimum_size{1U << 22U}; + /// Minimum size of a superblock (64 MiB). + static constexpr std::size_t minimum_size{1U << 26U}; /** * @brief Construct a default superblock. @@ -184,7 +190,7 @@ class superblock final : public memory_span { */ superblock(void* pointer, std::size_t size) : memory_span{pointer, size} { - RMM_LOGGING_ASSERT(size > minimum_size / 2); + RMM_LOGGING_ASSERT(size >= minimum_size); RMM_LOGGING_ASSERT(size < 1UL << 40UL); free_blocks_.emplace(pointer, size); } @@ -209,6 +215,17 @@ class superblock final : public memory_span { return free_blocks_.size() == 1 && free_blocks_.cbegin()->size() == size(); } + /** + * @brief Return the number of free blocks. + * + * @return the number of free blocks. + */ + [[nodiscard]] std::size_t free_blocks() const + { + RMM_LOGGING_ASSERT(is_valid()); + return free_blocks_.size(); + } + /** * @brief Whether this superblock contains the given block. * @@ -355,6 +372,7 @@ class superblock final : public memory_span { */ [[nodiscard]] std::size_t max_free() const { + if (free_blocks_.empty()) { return 0; } return std::max_element(free_blocks_.cbegin(), free_blocks_.cend(), block_size_compare)->size(); } @@ -424,7 +442,7 @@ class global_arena final { * @param size The size in bytes of the allocation. * @return bool True if the allocation should be handled by the global arena. */ - bool handles(std::size_t size) const { return size > superblock::minimum_size / 2; } + bool handles(std::size_t size) const { return size > superblock::minimum_size; } /** * @brief Acquire a superblock that can fit a block of the given size. @@ -437,19 +455,17 @@ class global_arena final { // Superblocks should only be acquired if the size is not directly handled by the global arena. RMM_LOGGING_ASSERT(!handles(size)); lock_guard lock(mtx_); - return first_fit(size); + return first_fit(size, superblock::minimum_size); } /** * @brief Release a superblock. * * @param s Superblock to be released. - * @param stream The stream to synchronize on before releasing. */ - void release(superblock&& sb, cuda_stream_view stream) + void release(superblock&& sb) { RMM_LOGGING_ASSERT(sb.is_valid()); - stream.synchronize_no_throw(); lock_guard lock(mtx_); coalesce(std::move(sb)); } @@ -479,7 +495,13 @@ class global_arena final { { RMM_LOGGING_ASSERT(handles(size)); lock_guard lock(mtx_); - return first_fit(size).pointer(); + auto const aligned = rmm::detail::align_up(size, superblock::minimum_size); + auto sb = first_fit(aligned, aligned); + if (sb.is_valid()) { + RMM_LOGGING_ASSERT(large_allocations_.find(sb.pointer()) == large_allocations_.cend()); + large_allocations_.emplace(sb.pointer(), sb.size()); + } + return sb.pointer(); } /** @@ -495,7 +517,9 @@ class global_arena final { RMM_LOGGING_ASSERT(handles(size)); stream.synchronize_no_throw(); lock_guard lock(mtx_); - coalesce({ptr, size}); + auto const allocated_size = large_allocations_.at(ptr); + large_allocations_.erase(ptr); + coalesce({ptr, allocated_size}); } /** @@ -539,6 +563,23 @@ class global_arena final { logger->info(" Total size of superblocks: {}", rmm::detail::bytes{total_memory_size(superblocks_)}); logger->info(" Size of largest free block: {}", rmm::detail::bytes{max_free(superblocks_)}); + logger->info(" # of outstanding large allocations: {}", large_allocations_.size()); + auto i = 0; + char* prev_end{}; + for (auto const& sb : superblocks_) { + if (prev_end == nullptr) { prev_end = sb.pointer(); } + logger->info( + " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, gap={}", + i, + fmt::ptr(sb.pointer()), + fmt::ptr(sb.end()), + rmm::detail::bytes{sb.size()}, + sb.empty(), + sb.free_blocks(), + rmm::detail::bytes{static_cast(sb.pointer() - prev_end)}); + prev_end = sb.end(); + i++; + } } } @@ -578,19 +619,20 @@ class global_arena final { * Sigplan Notices, 34(3), 26-36. * * @param size The number of bytes to allocate. + * @param minimum_size The minimum size of the superblock required. * @return superblock A superblock that can fit at least `size` bytes, or empty if not found. */ - superblock first_fit(std::size_t size) + superblock first_fit(std::size_t size, std::size_t minimum_size) { - auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [size](auto const& sb) { return sb.fits(size); }); + auto const iter = std::find_if(superblocks_.cbegin(), superblocks_.cend(), [=](auto const& sb) { + return sb.fits(size) && sb.size() >= minimum_size; + }); if (iter == superblocks_.cend()) { return {}; } - auto sb = std::move(superblocks_.extract(iter).value()); - auto const sz = std::max(size, superblock::minimum_size); - if (sb.empty() && sb.size() >= sz + superblock::minimum_size) { + auto sb = std::move(superblocks_.extract(iter).value()); + if (sb.empty() && sb.size() >= minimum_size + superblock::minimum_size) { // Split the superblock and put the remainder back. - auto [head, tail] = sb.split(sz); + auto [head, tail] = sb.split(minimum_size); superblocks_.insert(std::move(tail)); return std::move(head); } @@ -643,6 +685,8 @@ class global_arena final { block upstream_block_; /// Address-ordered set of superblocks. std::set superblocks_; + /// Large allocations. + std::unordered_map large_allocations_; /// Mutex for exclusive lock. mutable std::mutex mtx_; }; @@ -707,7 +751,7 @@ class arena { } /** - * @brief Clean the arena and deallocate free blocks from the global arena. + * @brief Clean the arena and release all superblocks to the global arena. */ void clean() { @@ -715,6 +759,20 @@ class arena { global_arena_.release(superblocks_); } + /** + * @brief Defragment the arena and release empty superblock to the global arena. + */ + void defragment() + { + lock_guard lock(mtx_); + while (true) { + auto const iter = std::find_if( + superblocks_.cbegin(), superblocks_.cend(), [](auto const& sb) { return sb.empty(); }); + if (iter == superblocks_.cend()) { return; } + global_arena_.release(std::move(superblocks_.extract(iter).value())); + } + } + /** * Dump memory to log. * @@ -729,6 +787,19 @@ class arena { rmm::detail::bytes{total_memory_size(superblocks_)}); logger->info(" Size of largest free block: {}", rmm::detail::bytes{max_free(superblocks_)}); + auto i = 0; + for (auto const& sb : superblocks_) { + logger->info( + " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}", + i, + fmt::ptr(sb.pointer()), + fmt::ptr(sb.end()), + rmm::detail::bytes{sb.size()}, + sb.empty(), + sb.free_blocks(), + rmm::detail::bytes{sb.max_free()}); + i++; + } } } @@ -791,11 +862,7 @@ class arena { auto sb = std::move(superblocks_.extract(iter).value()); sb.coalesce(b); - if (sb.empty()) { - global_arena_.release(std::move(sb), stream); - } else { - superblocks_.insert(std::move(sb)); - } + superblocks_.insert(std::move(sb)); return true; } @@ -809,6 +876,7 @@ class arena { { auto sb = global_arena_.acquire(size); if (sb.is_valid()) { + RMM_LOGGING_ASSERT(sb.size() >= superblock::minimum_size); auto const b = sb.first_fit(size); superblocks_.insert(std::move(sb)); return b; diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index a251fee08..15d8faef6 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -26,6 +27,8 @@ #include #include +#include + namespace rmm::test { namespace { @@ -43,20 +46,40 @@ using arena = rmm::mr::detail::arena::arena; using arena_mr = rmm::mr::arena_memory_resource; using ::testing::Return; -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) auto const fake_address = reinterpret_cast(1_KiB); -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) auto const fake_address2 = reinterpret_cast(2_KiB); -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address3 = reinterpret_cast(4_MiB); -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) -auto const fake_address4 = reinterpret_cast(8_MiB); +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) +auto const fake_address3 = reinterpret_cast(superblock::minimum_size); +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) +auto const fake_address4 = reinterpret_cast(superblock::minimum_size * 2); + +class ArenaTest : public ::testing::Test { + protected: + void SetUp() override + { + EXPECT_CALL(mock_, allocate(arena_size_)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock_, deallocate(fake_address3, arena_size_)); + ga_ = std::make_unique(&mock_, arena_size_); + a_ = std::make_unique(*ga_); + } + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::size_t arena_size_{superblock::minimum_size * 4}; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + mock_memory_resource mock_{}; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr ga_{}; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr a_{}; +}; /** * Test memory_span. */ -TEST(ArenaTest, MemorySpan) // NOLINT +TEST_F(ArenaTest, MemorySpan) // NOLINT { memory_span const ms{}; EXPECT_FALSE(ms.is_valid()); @@ -68,14 +91,14 @@ TEST(ArenaTest, MemorySpan) // NOLINT * Test block. */ -TEST(ArenaTest, BlockFits) // NOLINT +TEST_F(ArenaTest, BlockFits) // NOLINT { block const b{fake_address, 1_KiB}; EXPECT_TRUE(b.fits(1_KiB)); EXPECT_FALSE(b.fits(1_KiB + 1)); } -TEST(ArenaTest, BlockIsContiguousBefore) // NOLINT +TEST_F(ArenaTest, BlockIsContiguousBefore) // NOLINT { block const b{fake_address, 1_KiB}; block const b2{fake_address2, 256}; @@ -85,7 +108,7 @@ TEST(ArenaTest, BlockIsContiguousBefore) // NOLINT EXPECT_FALSE(b3.is_contiguous_before(b4)); } -TEST(ArenaTest, BlockSplit) // NOLINT +TEST_F(ArenaTest, BlockSplit) // NOLINT { block const b{fake_address, 2_KiB}; auto const [head, tail] = b.split(1_KiB); @@ -95,7 +118,7 @@ TEST(ArenaTest, BlockSplit) // NOLINT EXPECT_EQ(tail.size(), 1_KiB); } -TEST(ArenaTest, BlockMerge) // NOLINT +TEST_F(ArenaTest, BlockMerge) // NOLINT { block const b{fake_address, 1_KiB}; block const b2{fake_address2, 1_KiB}; @@ -108,46 +131,46 @@ TEST(ArenaTest, BlockMerge) // NOLINT * Test superblock. */ -TEST(ArenaTest, SuperblockEmpty) // NOLINT +TEST_F(ArenaTest, SuperblockEmpty) // NOLINT { - superblock sb{fake_address3, 4_MiB}; + superblock sb{fake_address3, superblock::minimum_size}; EXPECT_TRUE(sb.empty()); sb.first_fit(256); EXPECT_FALSE(sb.empty()); } -TEST(ArenaTest, SuperblockContains) // NOLINT +TEST_F(ArenaTest, SuperblockContains) // NOLINT { - superblock const sb{fake_address3, 4_MiB}; + superblock const sb{fake_address3, superblock::minimum_size}; block const b{fake_address, 2_KiB}; EXPECT_FALSE(sb.contains(b)); block const b2{fake_address3, 1_KiB}; EXPECT_TRUE(sb.contains(b2)); - block const b3{fake_address3, 4_MiB + 1}; + block const b3{fake_address3, superblock::minimum_size + 1}; EXPECT_FALSE(sb.contains(b3)); - block const b4{fake_address3, 4_MiB}; + block const b4{fake_address3, superblock::minimum_size}; EXPECT_TRUE(sb.contains(b4)); block const b5{fake_address4, 256}; EXPECT_FALSE(sb.contains(b5)); } -TEST(ArenaTest, SuperblockFits) // NOLINT +TEST_F(ArenaTest, SuperblockFits) // NOLINT { - superblock sb{fake_address3, 4_MiB}; - EXPECT_TRUE(sb.fits(4_MiB)); - EXPECT_FALSE(sb.fits(4_MiB + 1)); + superblock sb{fake_address3, superblock::minimum_size}; + EXPECT_TRUE(sb.fits(superblock::minimum_size)); + EXPECT_FALSE(sb.fits(superblock::minimum_size + 1)); - auto const b = sb.first_fit(1_MiB); - sb.first_fit(1_MiB); + auto const b = sb.first_fit(superblock::minimum_size / 4); + sb.first_fit(superblock::minimum_size / 4); sb.coalesce(b); - EXPECT_TRUE(sb.fits(2_MiB)); - EXPECT_FALSE(sb.fits(2_MiB + 1)); + EXPECT_TRUE(sb.fits(superblock::minimum_size / 2)); + EXPECT_FALSE(sb.fits(superblock::minimum_size / 2 + 1)); } -TEST(ArenaTest, SuperblockIsContiguousBefore) // NOLINT +TEST_F(ArenaTest, SuperblockIsContiguousBefore) // NOLINT { - superblock sb{fake_address3, 4_MiB}; - superblock sb2{fake_address4, 4_MiB}; + superblock sb{fake_address3, superblock::minimum_size}; + superblock sb2{fake_address4, superblock::minimum_size}; EXPECT_TRUE(sb.is_contiguous_before(sb2)); auto const b = sb.first_fit(256); @@ -161,31 +184,31 @@ TEST(ArenaTest, SuperblockIsContiguousBefore) // NOLINT EXPECT_TRUE(sb.is_contiguous_before(sb2)); } -TEST(ArenaTest, SuperblockSplit) // NOLINT +TEST_F(ArenaTest, SuperblockSplit) // NOLINT { - superblock sb{fake_address3, 8_MiB}; - auto const [head, tail] = sb.split(4_MiB); + superblock sb{fake_address3, superblock::minimum_size * 2}; + auto const [head, tail] = sb.split(superblock::minimum_size); EXPECT_EQ(head.pointer(), fake_address3); - EXPECT_EQ(head.size(), 4_MiB); + EXPECT_EQ(head.size(), superblock::minimum_size); EXPECT_TRUE(head.empty()); EXPECT_EQ(tail.pointer(), fake_address4); - EXPECT_EQ(tail.size(), 4_MiB); + EXPECT_EQ(tail.size(), superblock::minimum_size); EXPECT_TRUE(tail.empty()); } -TEST(ArenaTest, SuperblockMerge) // NOLINT +TEST_F(ArenaTest, SuperblockMerge) // NOLINT { - superblock sb{fake_address3, 4_MiB}; - superblock sb2{fake_address4, 4_MiB}; + superblock sb{fake_address3, superblock::minimum_size}; + superblock sb2{fake_address4, superblock::minimum_size}; auto const merged = sb.merge(sb2); EXPECT_EQ(merged.pointer(), fake_address3); - EXPECT_EQ(merged.size(), 8_MiB); + EXPECT_EQ(merged.size(), superblock::minimum_size * 2); EXPECT_TRUE(merged.empty()); } -TEST(ArenaTest, SuperblockFirstFit) // NOLINT +TEST_F(ArenaTest, SuperblockFirstFit) // NOLINT { - superblock sb{fake_address3, 4_MiB}; + superblock sb{fake_address3, superblock::minimum_size}; auto const b = sb.first_fit(1_KiB); EXPECT_EQ(b.pointer(), fake_address3); EXPECT_EQ(b.size(), 1_KiB); @@ -199,26 +222,26 @@ TEST(ArenaTest, SuperblockFirstFit) // NOLINT EXPECT_EQ(b3.size(), 512); } -TEST(ArenaTest, SuperblockCoalesceAfterFull) // NOLINT +TEST_F(ArenaTest, SuperblockCoalesceAfterFull) // NOLINT { - superblock sb{fake_address3, 4_MiB}; - auto const b = sb.first_fit(2_MiB); - sb.first_fit(2_MiB); + superblock sb{fake_address3, superblock::minimum_size}; + auto const b = sb.first_fit(superblock::minimum_size / 2); + sb.first_fit(superblock::minimum_size / 2); sb.coalesce(b); - EXPECT_TRUE(sb.first_fit(2_MiB).is_valid()); + EXPECT_TRUE(sb.first_fit(superblock::minimum_size / 2).is_valid()); } -TEST(ArenaTest, SuperblockCoalesceMergeNext) // NOLINT +TEST_F(ArenaTest, SuperblockCoalesceMergeNext) // NOLINT { - superblock sb{fake_address3, 4_MiB}; - auto const b = sb.first_fit(2_MiB); + superblock sb{fake_address3, superblock::minimum_size}; + auto const b = sb.first_fit(superblock::minimum_size / 2); sb.coalesce(b); - EXPECT_TRUE(sb.first_fit(4_MiB).is_valid()); + EXPECT_TRUE(sb.first_fit(superblock::minimum_size).is_valid()); } -TEST(ArenaTest, SuperblockCoalesceMergePrevious) // NOLINT +TEST_F(ArenaTest, SuperblockCoalesceMergePrevious) // NOLINT { - superblock sb{fake_address3, 4_MiB}; + superblock sb{fake_address3, superblock::minimum_size}; auto const b = sb.first_fit(1_KiB); auto const b2 = sb.first_fit(1_KiB); sb.first_fit(1_KiB); @@ -228,256 +251,222 @@ TEST(ArenaTest, SuperblockCoalesceMergePrevious) // NOLINT EXPECT_EQ(b3.pointer(), fake_address3); } -TEST(ArenaTest, SuperblockCoalesceMergePreviousAndNext) // NOLINT +TEST_F(ArenaTest, SuperblockCoalesceMergePreviousAndNext) // NOLINT { - superblock sb{fake_address3, 4_MiB}; + superblock sb{fake_address3, superblock::minimum_size}; auto const b = sb.first_fit(1_KiB); auto const b2 = sb.first_fit(1_KiB); sb.coalesce(b); sb.coalesce(b2); - EXPECT_TRUE(sb.first_fit(4_MiB).is_valid()); + EXPECT_TRUE(sb.first_fit(superblock::minimum_size).is_valid()); +} + +TEST_F(ArenaTest, SuperblockMaxFree) // NOLINT +{ + superblock sb{fake_address3, superblock::minimum_size}; + sb.first_fit(superblock::minimum_size / 2); + EXPECT_EQ(sb.max_free(), superblock::minimum_size / 2); } -TEST(ArenaTest, SuperblockMaxFree) // NOLINT +TEST_F(ArenaTest, SuperblockMaxFreeWhenFull) // NOLINT { - superblock sb{fake_address3, 4_MiB}; - sb.first_fit(2_MiB); - EXPECT_EQ(sb.max_free(), 2_MiB); + superblock sb{fake_address3, superblock::minimum_size}; + sb.first_fit(superblock::minimum_size); + EXPECT_EQ(sb.max_free(), 0); } /** * Test global_arena. */ -TEST(ArenaTest, GlobalArenaNullUpstream) // NOLINT +TEST_F(ArenaTest, GlobalArenaNullUpstream) // NOLINT { auto construct_nullptr = []() { global_arena ga{nullptr, std::nullopt}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); // NOLINT(cppcoreguidelines-avoid-goto) } -TEST(ArenaTest, GlobalArenaAcquire) // NOLINT +TEST_F(ArenaTest, GlobalArenaAcquire) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - - global_arena ga{&mock, 8_MiB}; - - auto const sb = ga.acquire(256); + auto const sb = ga_->acquire(256); EXPECT_EQ(sb.pointer(), fake_address3); - EXPECT_EQ(sb.size(), 4_MiB); + EXPECT_EQ(sb.size(), superblock::minimum_size); EXPECT_TRUE(sb.empty()); - auto const sb2 = ga.acquire(1_KiB); + auto const sb2 = ga_->acquire(1_KiB); EXPECT_EQ(sb2.pointer(), fake_address4); - EXPECT_EQ(sb2.size(), 4_MiB); + EXPECT_EQ(sb2.size(), superblock::minimum_size); EXPECT_TRUE(sb2.empty()); - EXPECT_FALSE(ga.acquire(512).is_valid()); + ga_->acquire(512); + ga_->acquire(512); + EXPECT_FALSE(ga_->acquire(512).is_valid()); } -TEST(ArenaTest, GlobalArenaReleaseMergeNext) // NOLINT +TEST_F(ArenaTest, GlobalArenaReleaseMergeNext) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - - global_arena ga{&mock, 8_MiB}; - - auto sb = ga.acquire(256); - ga.release(std::move(sb), {}); - auto* p = ga.allocate(8_MiB); + auto sb = ga_->acquire(256); + ga_->release(std::move(sb)); + auto* p = ga_->allocate(arena_size_); EXPECT_EQ(p, fake_address3); } -TEST(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT +TEST_F(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(16_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 16_MiB)); - - global_arena ga{&mock, 16_MiB}; - - auto sb = ga.acquire(256); - auto sb2 = ga.acquire(1_KiB); - ga.acquire(512); - ga.release(std::move(sb), {}); - ga.release(std::move(sb2), {}); - auto* p = ga.allocate(8_MiB); + auto sb = ga_->acquire(256); + auto sb2 = ga_->acquire(1_KiB); + ga_->acquire(512); + ga_->release(std::move(sb)); + ga_->release(std::move(sb2)); + auto* p = ga_->allocate(superblock::minimum_size * 2); EXPECT_EQ(p, fake_address3); } -TEST(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT +TEST_F(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(16_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 16_MiB)); - - global_arena ga{&mock, 16_MiB}; - - auto sb = ga.acquire(256); - auto sb2 = ga.acquire(1_KiB); - auto sb3 = ga.acquire(512); - ga.release(std::move(sb), {}); - ga.release(std::move(sb3), {}); - ga.release(std::move(sb2), {}); - auto* p = ga.allocate(16_MiB); + auto sb = ga_->acquire(256); + auto sb2 = ga_->acquire(1_KiB); + auto sb3 = ga_->acquire(512); + ga_->release(std::move(sb)); + ga_->release(std::move(sb3)); + ga_->release(std::move(sb2)); + auto* p = ga_->allocate(arena_size_); EXPECT_EQ(p, fake_address3); } -TEST(ArenaTest, GlobalArenaReleaseMultiple) // NOLINT +TEST_F(ArenaTest, GlobalArenaReleaseMultiple) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(16_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 16_MiB)); - - global_arena ga{&mock, 16_MiB}; - std::set superblocks{}; - auto sb = ga.acquire(256); + auto sb = ga_->acquire(256); superblocks.insert(std::move(sb)); - auto sb2 = ga.acquire(1_KiB); + auto sb2 = ga_->acquire(1_KiB); superblocks.insert(std::move(sb2)); - auto sb3 = ga.acquire(512); + auto sb3 = ga_->acquire(512); superblocks.insert(std::move(sb3)); - ga.release(superblocks); - auto* p = ga.allocate(16_MiB); + ga_->release(superblocks); + auto* p = ga_->allocate(arena_size_); EXPECT_EQ(p, fake_address3); } -TEST(ArenaTest, GlobalArenaAllocate) // NOLINT +TEST_F(ArenaTest, GlobalArenaAllocate) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - - global_arena ga{&mock, 8_MiB}; - - auto* ptr = ga.allocate(4_MiB); + auto* ptr = ga_->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); - auto* ptr2 = ga.allocate(4_MiB); - EXPECT_EQ(ptr2, fake_address4); } -TEST(ArenaTest, GlobalArenaDeallocate) // NOLINT +TEST_F(ArenaTest, GlobalArenaAllocateExtraLarge) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); + EXPECT_EQ(ga_->allocate(1_PiB), nullptr); + EXPECT_EQ(ga_->allocate(1_PiB), nullptr); +} - global_arena ga{&mock, 8_MiB}; +TEST_F(ArenaTest, GlobalArenaAllocateAlignUp) // NOLINT +{ + ga_->allocate(superblock::minimum_size + 256); + ga_->allocate(superblock::minimum_size + 256); + EXPECT_EQ(ga_->allocate(superblock::minimum_size + 256), nullptr); +} - auto* ptr = ga.allocate(4_MiB); +TEST_F(ArenaTest, GlobalArenaDeallocate) // NOLINT +{ + auto* ptr = ga_->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); - ga.deallocate(ptr, 4_MiB, {}); - ptr = ga.allocate(4_MiB); + ga_->deallocate(ptr, superblock::minimum_size * 2, {}); + ptr = ga_->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } -TEST(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT +TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - - global_arena ga{&mock, 8_MiB}; + auto* ptr = ga_->allocate(superblock::minimum_size + 256); + auto* ptr2 = ga_->allocate(superblock::minimum_size + 512); + ga_->deallocate(ptr, superblock::minimum_size + 256, {}); + ga_->deallocate(ptr2, superblock::minimum_size + 512, {}); + EXPECT_EQ(ga_->allocate(arena_size_), fake_address3); +} - auto sb = ga.acquire(512); +TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT +{ + auto sb = ga_->acquire(512); auto const b = sb.first_fit(512); - ga.release(std::move(sb), {}); - ga.deallocate_from_other_arena(b.pointer(), b.size()); - EXPECT_EQ(ga.allocate(8_MiB), fake_address3); + ga_->release(std::move(sb)); + ga_->deallocate_from_other_arena(b.pointer(), b.size()); + EXPECT_EQ(ga_->allocate(arena_size_), fake_address3); } /** * Test arena. */ -TEST(ArenaTest, ArenaAllocate) // NOLINT +TEST_F(ArenaTest, ArenaAllocate) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8_MiB}; - arena a{ga}; - - EXPECT_EQ(a.allocate(4_MiB), fake_address3); - EXPECT_EQ(a.allocate(256), fake_address4); + EXPECT_EQ(a_->allocate(superblock::minimum_size), fake_address3); + EXPECT_EQ(a_->allocate(256), fake_address4); } -TEST(ArenaTest, ArenaDeallocate) // NOLINT +TEST_F(ArenaTest, ArenaDeallocate) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8_MiB}; - arena a{ga}; - - auto* ptr = a.allocate(4_MiB); - a.deallocate(ptr, 4_MiB, {}); - auto* ptr2 = a.allocate(256); - a.deallocate(ptr2, 256, {}); - EXPECT_EQ(a.allocate(8_MiB), fake_address3); + auto* ptr = a_->allocate(superblock::minimum_size); + a_->deallocate(ptr, superblock::minimum_size, {}); + auto* ptr2 = a_->allocate(256); + a_->deallocate(ptr2, 256, {}); + EXPECT_EQ(a_->allocate(superblock::minimum_size), fake_address3); } -TEST(ArenaTest, ArenaDeallocateMergePrevious) // NOLINT +TEST_F(ArenaTest, ArenaDeallocateMergePrevious) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8_MiB}; - arena a{ga}; - - auto* ptr = a.allocate(256); - auto* ptr2 = a.allocate(256); - a.allocate(256); - a.deallocate(ptr, 256, {}); - a.deallocate(ptr2, 256, {}); - EXPECT_EQ(a.allocate(512), fake_address3); + auto* ptr = a_->allocate(256); + auto* ptr2 = a_->allocate(256); + a_->allocate(256); + a_->deallocate(ptr, 256, {}); + a_->deallocate(ptr2, 256, {}); + EXPECT_EQ(a_->allocate(512), fake_address3); } -TEST(ArenaTest, ArenaDeallocateMergeNext) // NOLINT +TEST_F(ArenaTest, ArenaDeallocateMergeNext) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8_MiB}; - arena a{ga}; - - auto* ptr = a.allocate(256); - auto* ptr2 = a.allocate(256); - a.allocate(256); - a.deallocate(ptr2, 256, {}); - a.deallocate(ptr, 256, {}); - EXPECT_EQ(a.allocate(512), fake_address3); + auto* ptr = a_->allocate(256); + auto* ptr2 = a_->allocate(256); + a_->allocate(256); + a_->deallocate(ptr2, 256, {}); + a_->deallocate(ptr, 256, {}); + EXPECT_EQ(a_->allocate(512), fake_address3); } -TEST(ArenaTest, ArenaDeallocateMergePreviousAndNext) // NOLINT +TEST_F(ArenaTest, ArenaDeallocateMergePreviousAndNext) // NOLINT { - mock_memory_resource mock; - EXPECT_CALL(mock, allocate(8_MiB)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock, deallocate(fake_address3, 8_MiB)); - global_arena ga{&mock, 8_MiB}; - arena a{ga}; + auto* ptr = a_->allocate(256); + auto* ptr2 = a_->allocate(256); + a_->deallocate(ptr, 256, {}); + a_->deallocate(ptr2, 256, {}); + EXPECT_EQ(a_->allocate(2_KiB), fake_address3); +} - auto* ptr = a.allocate(256); - auto* ptr2 = a.allocate(256); - a.deallocate(ptr, 256, {}); - a.deallocate(ptr2, 256, {}); - EXPECT_EQ(a.allocate(2_KiB), fake_address3); +TEST_F(ArenaTest, ArenaDefragment) // NOLINT +{ + std::vector pointers; + std::size_t num_pointers{4}; + for (std::size_t i = 0; i < num_pointers; i++) { + pointers.push_back(a_->allocate(superblock::minimum_size)); + } + for (auto* ptr : pointers) { + a_->deallocate(ptr, superblock::minimum_size, {}); + } + EXPECT_EQ(ga_->allocate(arena_size_), nullptr); + a_->defragment(); + EXPECT_EQ(ga_->allocate(arena_size_), fake_address3); } /** * Test arena_memory_resource. */ -TEST(ArenaTest, NullUpstream) // NOLINT +TEST_F(ArenaTest, NullUpstream) // NOLINT { // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) EXPECT_THROW([]() { arena_mr mr{nullptr}; }(), rmm::logic_error); } -TEST(ArenaTest, AllocateNinetyPercent) // NOLINT +TEST_F(ArenaTest, AllocateNinetyPercent) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) auto const free = rmm::detail::available_device_memory().first; @@ -488,7 +477,7 @@ TEST(ArenaTest, AllocateNinetyPercent) // NOLINT }()); } -TEST(ArenaTest, SmallMediumLarge) // NOLINT +TEST_F(ArenaTest, SmallMediumLarge) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) arena_mr mr(rmm::mr::get_current_device_resource()); @@ -502,5 +491,29 @@ TEST(ArenaTest, SmallMediumLarge) // NOLINT }()); } +TEST_F(ArenaTest, Defragment) // NOLINT +{ + EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) + auto const arena_size = superblock::minimum_size * 4; + arena_mr mr(rmm::mr::get_current_device_resource(), arena_size); + std::vector threads; + std::size_t num_threads{4}; + threads.reserve(num_threads); + for (std::size_t i = 0; i < num_threads; ++i) { + threads.emplace_back(std::thread([&] { + cuda_stream stream{}; + void* ptr = mr.allocate(32_KiB, stream); + mr.deallocate(ptr, 32_KiB, stream); + })); + } + for (auto& thread : threads) { + thread.join(); + } + + auto* ptr = mr.allocate(arena_size); + mr.deallocate(ptr, arena_size); + }()); +} + } // namespace } // namespace rmm::test From 9a2e917b9107f2f56382b2a4d0438822b95d8d2f Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 30 Nov 2021 13:34:59 -0800 Subject: [PATCH 22/35] clean instead of defragment individual arenas --- include/rmm/mr/device/arena_memory_resource.hpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 440da5a0b..342180927 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -160,10 +160,10 @@ class arena_memory_resource final : public device_memory_resource { { RMM_CUDA_TRY(cudaDeviceSynchronize()); for (auto& thread_arena : thread_arenas_) { - thread_arena.second->defragment(); + thread_arena.second->clean(); } for (auto& stream_arena : stream_arenas_) { - stream_arena.second.defragment(); + stream_arena.second.clean(); } } @@ -199,20 +199,15 @@ class arena_memory_resource final : public device_memory_resource { // is caught up. stream.synchronize_no_throw(); - read_lock lock(mtx_); + write_lock lock(mtx_); if (use_per_thread_arena(stream)) { - auto const id = std::this_thread::get_id(); for (auto&& kv : thread_arenas_) { - // If the arena does not belong to the current thread, try to deallocate from it, and return - // if successful. - if (kv.first != id && kv.second->deallocate(ptr, bytes, stream)) { return; } + if (kv.second->deallocate(ptr, bytes, stream)) { return; } } } else { for (auto&& kv : stream_arenas_) { - // If the arena does not belong to the current stream, try to deallocate from it, and return - // if successful. - if (stream.value() != kv.first && kv.second.deallocate(ptr, bytes, stream)) { return; } + if (kv.second.deallocate(ptr, bytes, stream)) { return; } } } From fb1f193bf9cb98487548f3b291f3340775cd58f3 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 30 Nov 2021 13:37:02 -0800 Subject: [PATCH 23/35] lower superblock size to 1MB --- include/rmm/mr/device/detail/arena.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 802ea269a..86d9d3079 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -174,8 +174,8 @@ inline bool block_size_compare(block const& lhs, block const& rhs) */ class superblock final : public memory_span { public: - /// Minimum size of a superblock (64 MiB). - static constexpr std::size_t minimum_size{1U << 26U}; + /// Minimum size of a superblock (1 MiB). + static constexpr std::size_t minimum_size{1U << 20U}; /** * @brief Construct a default superblock. From 5148c51a8d7dad186014b3ec3b2215bef7ad3144 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 3 Dec 2021 18:26:50 -0800 Subject: [PATCH 24/35] align to size classes --- .../random_allocations/random_allocations.cpp | 4 +- .../rmm/mr/device/arena_memory_resource.hpp | 62 +++++----- include/rmm/mr/device/detail/arena.hpp | 114 +++++++++++++++--- tests/mr/device/arena_mr_tests.cpp | 38 +++--- 4 files changed, 154 insertions(+), 64 deletions(-) diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 828561dd1..c236ed7bb 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -170,7 +170,9 @@ inline auto make_pool() inline auto make_arena() { - return rmm::mr::make_owning_wrapper(make_cuda()); + auto free = rmm::detail::available_device_memory().first; + auto reserve = 1UL << 26; + return rmm::mr::make_owning_wrapper(make_cuda(), free - reserve); } inline auto make_binning() diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 342180927..c55052c3b 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -118,8 +118,6 @@ class arena_memory_resource final : public device_memory_resource { private: using global_arena = rmm::mr::detail::arena::global_arena; using arena = rmm::mr::detail::arena::arena; - using read_lock = std::shared_lock; - using write_lock = std::unique_lock; /** * @brief Allocates memory of size at least `bytes`. @@ -135,22 +133,25 @@ class arena_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { if (bytes <= 0) { return nullptr; } + bytes = rmm::mr::detail::arena::align_to_size_class(bytes); + auto& arena = get_arena(stream); - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - auto& arena = get_arena(stream); - void* pointer = arena.allocate(bytes); + { + std::shared_lock lock(mtx_); + void* pointer = arena.allocate(bytes); + if (pointer != nullptr) { return pointer; } + } - if (pointer == nullptr) { - write_lock lock(mtx_); + { + std::unique_lock lock(mtx_); defragment(); - pointer = arena.allocate(bytes); + void* pointer = arena.allocate(bytes); if (pointer == nullptr) { if (dump_log_on_failure_) { dump_memory_log(bytes); } RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); } + return pointer; } - - return pointer; } /** @@ -178,9 +179,20 @@ class arena_memory_resource final : public device_memory_resource { void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { if (ptr == nullptr || bytes <= 0) { return; } + bytes = rmm::mr::detail::arena::align_to_size_class(bytes); + auto& arena = get_arena(stream); - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - if (!get_arena(stream).deallocate(ptr, bytes, stream)) { + { + std::shared_lock lock(mtx_); + if (arena.deallocate(ptr, bytes, stream)) { return; } + } + + { + // Since we are returning this memory to another stream, we need to make sure the current + // stream is caught up. + stream.synchronize_no_throw(); + + std::unique_lock lock(mtx_); deallocate_from_other_arena(ptr, bytes, stream); } } @@ -195,19 +207,13 @@ class arena_memory_resource final : public device_memory_resource { */ void deallocate_from_other_arena(void* ptr, std::size_t bytes, cuda_stream_view stream) { - // Since we are returning this memory to another stream, we need to make sure the current stream - // is caught up. - stream.synchronize_no_throw(); - - write_lock lock(mtx_); - if (use_per_thread_arena(stream)) { - for (auto&& kv : thread_arenas_) { - if (kv.second->deallocate(ptr, bytes, stream)) { return; } + for (auto const& thread_arena : thread_arenas_) { + if (thread_arena.second->deallocate(ptr, bytes, stream)) { return; } } } else { - for (auto&& kv : stream_arenas_) { - if (kv.second.deallocate(ptr, bytes, stream)) { return; } + for (auto& stream_arena : stream_arenas_) { + if (stream_arena.second.deallocate(ptr, bytes, stream)) { return; } } } @@ -237,12 +243,12 @@ class arena_memory_resource final : public device_memory_resource { { auto const thread_id = std::this_thread::get_id(); { - read_lock lock(mtx_); + std::shared_lock lock(map_mtx_); auto const iter = thread_arenas_.find(thread_id); if (iter != thread_arenas_.end()) { return *iter->second; } } { - write_lock lock(mtx_); + std::unique_lock lock(map_mtx_); auto thread_arena = std::make_shared(global_arena_); thread_arenas_.emplace(thread_id, thread_arena); thread_local detail::arena::arena_cleaner cleaner{thread_arena}; @@ -259,12 +265,12 @@ class arena_memory_resource final : public device_memory_resource { { RMM_LOGGING_ASSERT(!use_per_thread_arena(stream)); { - read_lock lock(mtx_); + std::shared_lock lock(map_mtx_); auto const iter = stream_arenas_.find(stream.value()); if (iter != stream_arenas_.end()) { return iter->second; } } { - write_lock lock(mtx_); + std::unique_lock lock(map_mtx_); stream_arenas_.emplace(stream.value(), global_arena_); return stream_arenas_.at(stream.value()); } @@ -331,7 +337,9 @@ class arena_memory_resource final : public device_memory_resource { bool dump_log_on_failure_{}; /// The logger for memory dump. std::shared_ptr logger_{}; - /// Mutex for read and write locks. + /// Mutex for read and write locks on arena maps. + mutable std::shared_mutex map_mtx_; + /// Mutex for shared and unique locks on the mr. mutable std::shared_mutex mtx_; }; diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 0423a4242..8e43a661d 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -38,6 +38,85 @@ namespace rmm::mr::detail::arena { +/** + * @brief Align up to nearest size class. + * + * @param[in] value value to align. + * @return Return the aligned value. + */ +inline std::size_t align_to_size_class(std::size_t value) noexcept +{ + // See http://jemalloc.net/jemalloc.3.html. + // NOLINTBEGIN(readability-magic-numbers,cppcoreguidelines-avoid-magic-numbers) + static std::array size_classes{ + // clang-format off + // Spacing 256: + 256UL, 512UL, 768UL, 1024UL, 1280UL, 1536UL, 1792UL, 2048UL, + // Spacing 512: + 2560UL, 3072UL, 3584UL, 4096UL, + // Spacing 1 KiB: + 5UL << 10, 6UL << 10, 7UL << 10, 8UL << 10, + // Spacing 2 KiB: + 10UL << 10, 12UL << 10, 14UL << 10, 16UL << 10, + // Spacing 4 KiB: + 20UL << 10, 24UL << 10, 28UL << 10, 32UL << 10, + // Spacing 8 KiB: + 40UL << 10, 48UL << 10, 54UL << 10, 64UL << 10, + // Spacing 16 KiB: + 80UL << 10, 96UL << 10, 112UL << 10, 128UL << 10, + // Spacing 32 KiB: + 160UL << 10, 192UL << 10, 224UL << 10, 256UL << 10, + // Spacing 64 KiB: + 320UL << 10, 384UL << 10, 448UL << 10, 512UL << 10, + // Spacing 128 KiB: + 640UL << 10, 768UL << 10, 896UL << 10, 1UL << 20, + // Spacing 256 KiB: + 1280UL << 10, 1536UL << 10, 1792UL << 10, 2UL << 20, + // Spacing 512 KiB: + 2560UL << 10, 3UL << 20, 3584UL << 10, 4UL << 20, + // Spacing 1 MiB: + 5UL << 20, 6UL << 20, 7UL << 20, 8UL << 20, + // Spacing 2 MiB: + 10UL << 20, 12UL << 20, 14UL << 20, 16UL << 20, + // Spacing 4 MiB: + 20UL << 20, 24UL << 20, 28UL << 20, 32UL << 20, + // Spacing 8 MiB: + 40UL << 20, 48UL << 20, 56UL << 20, 64UL << 20, + // Spacing 16 MiB: + 80UL << 20, 96UL << 20, 112UL << 20, 128UL << 20, + // Spacing 32 MiB: + 160UL << 20, 192UL << 20, 224UL << 20, 256UL << 20, + // Spacing 64 MiB: + 320UL << 20, 384UL << 20, 448UL << 20, 512UL << 20, + // Spacing 128 MiB: + 640UL << 20, 768UL << 20, 896UL << 20, 1UL << 30, + // Spacing 256 MiB: + 1280UL << 20, 1536UL << 20, 1792UL << 20, 2UL << 30, + // Spacing 512 MiB: + 2560UL << 20, 3UL << 30, 3584UL << 20, 4UL << 30, + // Spacing 1 GiB: + 5UL << 30, 6UL << 30, 7UL << 30, 8UL << 30, + // Spacing 2 GiB: + 10UL << 30, 12UL << 30, 14UL << 30, 16UL << 30, + // Spacing 4 GiB: + 20UL << 30, 24UL << 30, 28UL << 30, 32UL << 30, + // Spacing 8 GiB: + 40UL << 30, 48UL << 30, 56UL << 30, 64UL << 30, + // Spacing 16 GiB: + 80UL << 30, 96UL << 30, 112UL << 30, 128UL << 30, + // Spacing 32 Gib: + 160UL << 30, 192UL << 30, 224UL << 30, 256UL << 30, + // Catch all: + std::numeric_limits::max() + // clang-format on + }; + // NOLINTEND(readability-magic-numbers,cppcoreguidelines-avoid-magic-numbers) + + auto* bound = std::lower_bound(size_classes.begin(), size_classes.end(), value); + RMM_LOGGING_ASSERT(bound != size_classes.end()); + return *bound; +} + /** * @brief Represents a contiguous region of memory. */ @@ -307,7 +386,7 @@ class superblock final : public memory_span { RMM_LOGGING_ASSERT(is_valid()); RMM_LOGGING_ASSERT(size > 0); - auto fits = [size](auto const& blk) { return blk.fits(size); }; + auto fits = [size](auto const& blk) { return blk.fits(size); }; auto const iter = std::find_if(free_blocks_.cbegin(), free_blocks_.cend(), fits); if (iter == free_blocks_.cend()) { return {}; } @@ -432,7 +511,7 @@ class global_arena final { */ ~global_arena() { - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); upstream_mr_->deallocate(upstream_block_.pointer(), upstream_block_.size()); } @@ -454,7 +533,7 @@ class global_arena final { { // Superblocks should only be acquired if the size is not directly handled by the global arena. RMM_LOGGING_ASSERT(!handles(size)); - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); return first_fit(size, superblock::minimum_size); } @@ -466,7 +545,7 @@ class global_arena final { void release(superblock&& sb) { RMM_LOGGING_ASSERT(sb.is_valid()); - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); coalesce(std::move(sb)); } @@ -477,7 +556,7 @@ class global_arena final { */ void release(std::set& superblocks) { - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); while (!superblocks.empty()) { auto sb = std::move(superblocks.extract(superblocks.cbegin()).value()); RMM_LOGGING_ASSERT(sb.is_valid()); @@ -494,9 +573,8 @@ class global_arena final { void* allocate(std::size_t size) { RMM_LOGGING_ASSERT(handles(size)); - lock_guard lock(mtx_); - auto const aligned = rmm::detail::align_up(size, superblock::minimum_size); - auto sb = first_fit(aligned, aligned); + std::lock_guard lock(mtx_); + auto sb = first_fit(size, size); if (sb.is_valid()) { RMM_LOGGING_ASSERT(large_allocations_.find(sb.pointer()) == large_allocations_.cend()); large_allocations_.emplace(sb.pointer(), sb.size()); @@ -516,7 +594,7 @@ class global_arena final { { RMM_LOGGING_ASSERT(handles(size)); stream.synchronize_no_throw(); - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); auto const allocated_size = large_allocations_.at(ptr); large_allocations_.erase(ptr); coalesce({ptr, allocated_size}); @@ -532,7 +610,7 @@ class global_arena final { */ void deallocate_from_other_arena(void* ptr, std::size_t bytes) { - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); block const b{ptr, bytes}; auto const iter = std::find_if( @@ -555,7 +633,7 @@ class global_arena final { */ void dump_memory_log(std::shared_ptr const& logger) const { - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); logger->info(" Arena size: {}", rmm::detail::bytes{upstream_block_.size()}); logger->info(" # superblocks: {}", superblocks_.size()); @@ -584,8 +662,6 @@ class global_arena final { } private: - using lock_guard = std::lock_guard; - /** * @brief Default size of the global arena if unspecified. * @return the default global arena size. @@ -727,7 +803,7 @@ class arena { void* allocate(std::size_t size) { if (global_arena_.handles(size)) { return global_arena_.allocate(size); } - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); return get_block(size).pointer(); } @@ -746,7 +822,7 @@ class arena { global_arena_.deallocate(ptr, size, stream); return true; } - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); return deallocate_from_superblock({ptr, size}, stream); } @@ -755,7 +831,7 @@ class arena { */ void clean() { - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); global_arena_.release(superblocks_); } @@ -764,7 +840,7 @@ class arena { */ void defragment() { - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); while (true) { auto const iter = std::find_if( superblocks_.cbegin(), superblocks_.cend(), [](auto const& sb) { return sb.empty(); }); @@ -780,7 +856,7 @@ class arena { */ void dump_memory_log(std::shared_ptr const& logger) const { - lock_guard lock(mtx_); + std::lock_guard lock(mtx_); logger->info(" # superblocks: {}", superblocks_.size()); if (!superblocks_.empty()) { logger->info(" Total size of superblocks: {}", @@ -804,8 +880,6 @@ class arena { } private: - using lock_guard = std::lock_guard; - /** * @brief Get an available memory block of at least `size` bytes. * diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 0890544be..97db29748 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -46,14 +46,12 @@ using arena = rmm::mr::detail::arena::arena; using arena_mr = rmm::mr::arena_memory_resource; using ::testing::Return; -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) -auto const fake_address = reinterpret_cast(1_KiB); -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) +// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) +auto const fake_address = reinterpret_cast(1_KiB); auto const fake_address2 = reinterpret_cast(2_KiB); -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) auto const fake_address3 = reinterpret_cast(superblock::minimum_size); -// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) auto const fake_address4 = reinterpret_cast(superblock::minimum_size * 2); +// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) class ArenaTest : public ::testing::Test { protected: @@ -65,16 +63,31 @@ class ArenaTest : public ::testing::Test { a_ = std::make_unique(*ga_); } - // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + // NOLINTBEGIN(cppcoreguidelines-non-private-member-variables-in-classes) std::size_t arena_size_{superblock::minimum_size * 4}; - // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) mock_memory_resource mock_{}; - // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) std::unique_ptr ga_{}; - // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) std::unique_ptr a_{}; + // NOLINTEND(cppcoreguidelines-non-private-member-variables-in-classes) }; +/** + * Test align_to_size_class. + */ +TEST_F(ArenaTest, AlignToSizeClass) // NOLINT +{ + using rmm::mr::detail::arena::align_to_size_class; + EXPECT_EQ(align_to_size_class(8), 256); + EXPECT_EQ(align_to_size_class(256), 256); + EXPECT_EQ(align_to_size_class(264), 512); + EXPECT_EQ(align_to_size_class(512), 512); + EXPECT_EQ(align_to_size_class(17_KiB), 20_KiB); + EXPECT_EQ(align_to_size_class(13_MiB), 14_MiB); + EXPECT_EQ(align_to_size_class(2500_MiB), 2560_MiB); + EXPECT_EQ(align_to_size_class(128_GiB), 128_GiB); + EXPECT_EQ(align_to_size_class(1_PiB), std::numeric_limits::max()); +} + /** * Test memory_span. */ @@ -359,13 +372,6 @@ TEST_F(ArenaTest, GlobalArenaAllocateExtraLarge) // NOLINT EXPECT_EQ(ga_->allocate(1_PiB), nullptr); } -TEST_F(ArenaTest, GlobalArenaAllocateAlignUp) // NOLINT -{ - ga_->allocate(superblock::minimum_size + 256); - ga_->allocate(superblock::minimum_size + 256); - EXPECT_EQ(ga_->allocate(superblock::minimum_size + 256), nullptr); -} - TEST_F(ArenaTest, GlobalArenaDeallocate) // NOLINT { auto* ptr = ga_->allocate(superblock::minimum_size * 2); From a13e8adac9275ce9cf450292ed9cf8cc05b9711a Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 7 Dec 2021 10:12:53 -0800 Subject: [PATCH 25/35] keep track of large allocations in superblocks --- .../rmm/mr/device/arena_memory_resource.hpp | 22 +- include/rmm/mr/device/detail/arena.hpp | 351 +++++++++--------- tests/mr/device/arena_mr_tests.cpp | 2 +- 3 files changed, 193 insertions(+), 182 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index c55052c3b..fd6874705 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -133,7 +133,11 @@ class arena_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { if (bytes <= 0) { return nullptr; } - bytes = rmm::mr::detail::arena::align_to_size_class(bytes); +#ifdef RMM_ARENA_USE_SIZE_CLASSES + bytes = rmm::mr::detail::arena::align_to_size_class(bytes); +#else + bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); +#endif auto& arena = get_arena(stream); { @@ -179,7 +183,11 @@ class arena_memory_resource final : public device_memory_resource { void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { if (ptr == nullptr || bytes <= 0) { return; } - bytes = rmm::mr::detail::arena::align_to_size_class(bytes); +#ifdef RMM_ARENA_USE_SIZE_CLASSES + bytes = rmm::mr::detail::arena::align_to_size_class(bytes); +#else + bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); +#endif auto& arena = get_arena(stream); { @@ -209,17 +217,17 @@ class arena_memory_resource final : public device_memory_resource { { if (use_per_thread_arena(stream)) { for (auto const& thread_arena : thread_arenas_) { - if (thread_arena.second->deallocate(ptr, bytes, stream)) { return; } + if (thread_arena.second->deallocate(ptr, bytes)) { return; } } } else { for (auto& stream_arena : stream_arenas_) { - if (stream_arena.second.deallocate(ptr, bytes, stream)) { return; } + if (stream_arena.second.deallocate(ptr, bytes)) { return; } } } - // The thread that originally allocated the block has terminated, deallocate directly in the - // global arena. - global_arena_.deallocate_from_other_arena(ptr, bytes); + if (!global_arena_.deallocate(ptr, bytes)) { + RMM_FAIL("allocation not found"); + } } /** diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 8e43a661d..a2579471d 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -155,10 +155,10 @@ class memory_span { [[nodiscard]] bool is_valid() const { return pointer_ != nullptr && size_ > 0; } /// Used by std::set to compare spans. - bool operator<(memory_span const& ms) const + bool operator<(memory_span const& mem_span) const { - RMM_LOGGING_ASSERT(ms.is_valid()); - return pointer_ < ms.pointer_; + RMM_LOGGING_ASSERT(mem_span.is_valid()); + return pointer_ < mem_span.pointer_; } private: @@ -184,58 +184,58 @@ class block final : public memory_span { using memory_span::memory_span; /** - * @brief Is this block large enough to fit `sz` bytes? + * @brief Is this block large enough to fit `bytes` bytes? * - * @param sz The size in bytes to check for fit. - * @return true if this block is at least `sz` bytes. + * @param bytes The size in bytes to check for fit. + * @return true if this block is at least `bytes` bytes. */ - [[nodiscard]] bool fits(std::size_t sz) const + [[nodiscard]] bool fits(std::size_t bytes) const { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(sz > 0); - return size() >= sz; + RMM_LOGGING_ASSERT(bytes > 0); + return size() >= bytes; } /** - * @brief Verifies whether this block can be merged to the beginning of block b. + * @brief Verifies whether this block can be merged to the beginning of block blk. * - * @param b The block to check for contiguity. - * @return true Returns true if this block's `pointer` + `size` == `b.pointer`. + * @param blk The block to check for contiguity. + * @return true Returns true if this block's `pointer` + `size` == `blk.pointer`. */ - [[nodiscard]] bool is_contiguous_before(block const& b) const + [[nodiscard]] bool is_contiguous_before(block const& blk) const { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(b.is_valid()); + RMM_LOGGING_ASSERT(blk.is_valid()); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return pointer() + size() == b.pointer(); + return pointer() + size() == blk.pointer(); } /** * @brief Split this block into two by the given size. * - * @param sz The size in bytes of the first block. - * @return std::pair A pair of blocks split by sz. + * @param bytes The size in bytes of the first block. + * @return std::pair A pair of blocks split by bytes. */ - [[nodiscard]] std::pair split(std::size_t sz) const + [[nodiscard]] std::pair split(std::size_t bytes) const { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(size() > sz); + RMM_LOGGING_ASSERT(size() > bytes); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return {{pointer(), sz}, {pointer() + sz, size() - sz}}; + return {{pointer(), bytes}, {pointer() + bytes, size() - bytes}}; } /** * @brief Coalesce two contiguous blocks into one. * - * `this->is_contiguous_before(b)` must be true. + * `this->is_contiguous_before(blk)` must be true. * - * @param b block to merge. + * @param blk block to merge. * @return block The merged block. */ - [[nodiscard]] block merge(block const& b) const + [[nodiscard]] block merge(block const& blk) const { - RMM_LOGGING_ASSERT(is_contiguous_before(b)); - return {pointer(), size() + b.size()}; + RMM_LOGGING_ASSERT(is_contiguous_before(blk)); + return {pointer(), size() + blk.size()}; } }; @@ -278,7 +278,7 @@ class superblock final : public memory_span { superblock(superblock const&) = delete; superblock& operator=(superblock const&) = delete; // Allow move semantics. - superblock(superblock&& sb) noexcept = default; + superblock(superblock&&) noexcept = default; superblock& operator=(superblock&&) noexcept = default; ~superblock() = default; @@ -308,28 +308,29 @@ class superblock final : public memory_span { /** * @brief Whether this superblock contains the given block. * - * @param b The block to search for. + * @param blk The block to search for. * @return true if the given block belongs to this superblock. */ - [[nodiscard]] bool contains(block const& b) const + [[nodiscard]] bool contains(block const& blk) const { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(b.is_valid()); + RMM_LOGGING_ASSERT(blk.is_valid()); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return pointer() <= b.pointer() && pointer() + size() >= b.pointer() + b.size(); + return pointer() <= blk.pointer() && pointer() + size() >= blk.pointer() + blk.size(); } /** - * @brief Can this superblock fit `sz` bytes? + * @brief Can this superblock fit `bytes` bytes? * - * @param sz The size in bytes to check for fit. - * @return true if this superblock can fit `sz` bytes. + * @param bytes The size in bytes to check for fit. + * @return true if this superblock can fit `bytes` bytes. */ - [[nodiscard]] bool fits(std::size_t sz) const + [[nodiscard]] bool fits(std::size_t bytes) const { RMM_LOGGING_ASSERT(is_valid()); - return std::any_of( - free_blocks_.cbegin(), free_blocks_.cend(), [sz](auto const& b) { return b.fits(sz); }); + return std::any_of(free_blocks_.cbegin(), free_blocks_.cend(), [bytes](auto const& blk) { + return blk.fits(bytes); + }); } /** @@ -339,26 +340,26 @@ class superblock final : public memory_span { * @return true Returns true if both superblocks are empty and this superblock's * `pointer` + `size` == `s.ptr`. */ - [[nodiscard]] bool is_contiguous_before(superblock const& sb) const + [[nodiscard]] bool is_contiguous_before(superblock const& sblk) const { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(sb.is_valid()); + RMM_LOGGING_ASSERT(sblk.is_valid()); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return empty() && sb.empty() && pointer() + size() == sb.pointer(); + return empty() && sblk.empty() && pointer() + size() == sblk.pointer(); } /** * @brief Split this superblock into two by the given size. * - * @param sz The size in bytes of the first block. - * @return superblock_pair A pair of superblocks split by sz. + * @param bytes The size in bytes of the first block. + * @return superblock_pair A pair of superblocks split by bytes. */ - [[nodiscard]] std::pair split(std::size_t sz) const + [[nodiscard]] std::pair split(std::size_t bytes) const { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(empty() && sz >= minimum_size && size() >= sz + minimum_size); + RMM_LOGGING_ASSERT(empty() && bytes >= minimum_size && size() >= bytes + minimum_size); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - return {superblock{pointer(), sz}, superblock{pointer() + sz, size() - sz}}; + return {superblock{pointer(), bytes}, superblock{pointer() + bytes, size() - bytes}}; } /** @@ -366,13 +367,13 @@ class superblock final : public memory_span { * * `this->is_contiguous_before(s)` must be true. * - * @param sb superblock to merge. + * @param sblk superblock to merge. * @return block The merged block. */ - [[nodiscard]] superblock merge(superblock const& sb) const + [[nodiscard]] superblock merge(superblock const& sblk) const { - RMM_LOGGING_ASSERT(is_contiguous_before(sb)); - return {pointer(), size() + sb.size()}; + RMM_LOGGING_ASSERT(is_contiguous_before(sblk)); + return {pointer(), size() + sblk.size()}; } /** @@ -391,57 +392,52 @@ class superblock final : public memory_span { if (iter == free_blocks_.cend()) { return {}; } // Remove the block from the free list. - auto const b = *iter; + auto const blk = *iter; auto const next = free_blocks_.erase(iter); - if (b.size() > size) { + if (blk.size() > size) { // Split the block and put the remainder back. - auto const split = b.split(size); + auto const split = blk.split(size); free_blocks_.insert(next, split.second); return split.first; } - return b; + return blk; } /** * @brief Coalesce the given block with other free blocks. * - * @param b The block to coalesce. + * @param blk The block to coalesce. */ - void coalesce(block const& b) // NOLINT(readability-function-cognitive-complexity) + void coalesce(block const& blk) // NOLINT(readability-function-cognitive-complexity) { RMM_LOGGING_ASSERT(is_valid()); - RMM_LOGGING_ASSERT(b.is_valid()); - RMM_LOGGING_ASSERT(contains(b)); - - if (free_blocks_.empty()) { - free_blocks_.insert(b); - return; - } + RMM_LOGGING_ASSERT(blk.is_valid()); + RMM_LOGGING_ASSERT(contains(blk)); // Find the right place (in ascending address order) to insert the block. - auto const next = free_blocks_.lower_bound(b); + auto const next = free_blocks_.lower_bound(blk); auto const previous = next == free_blocks_.cbegin() ? next : std::prev(next); // Coalesce with neighboring blocks. - bool const merge_prev = previous->is_contiguous_before(b); - bool const merge_next = next != free_blocks_.cend() && b.is_contiguous_before(*next); + bool const merge_prev = previous != free_blocks_.cend() && previous->is_contiguous_before(blk); + bool const merge_next = next != free_blocks_.cend() && blk.is_contiguous_before(*next); if (merge_prev && merge_next) { - auto const merged = previous->merge(b).merge(*next); + auto const merged = previous->merge(blk).merge(*next); free_blocks_.erase(previous); auto const iter = free_blocks_.erase(next); free_blocks_.insert(iter, merged); } else if (merge_prev) { - auto const merged = previous->merge(b); + auto const merged = previous->merge(blk); auto const iter = free_blocks_.erase(previous); free_blocks_.insert(iter, merged); } else if (merge_next) { - auto const merged = b.merge(*next); + auto const merged = blk.merge(*next); auto const iter = free_blocks_.erase(next); free_blocks_.insert(iter, merged); } else { - free_blocks_.insert(next, b); + free_blocks_.insert(next, blk); } } @@ -464,8 +460,8 @@ class superblock final : public memory_span { inline auto max_free(std::set const& superblocks) { std::size_t size{}; - for (auto const& sb : superblocks) { - size = std::max(size, sb.max_free()); + for (auto const& sblk : superblocks) { + size = std::max(size, sblk.max_free()); } return size; }; @@ -534,7 +530,7 @@ class global_arena final { // Superblocks should only be acquired if the size is not directly handled by the global arena. RMM_LOGGING_ASSERT(!handles(size)); std::lock_guard lock(mtx_); - return first_fit(size, superblock::minimum_size); + return first_fit(size); } /** @@ -542,11 +538,11 @@ class global_arena final { * * @param s Superblock to be released. */ - void release(superblock&& sb) + void release(superblock&& sblk) { - RMM_LOGGING_ASSERT(sb.is_valid()); + RMM_LOGGING_ASSERT(sblk.is_valid()); std::lock_guard lock(mtx_); - coalesce(std::move(sb)); + coalesce(std::move(sblk)); } /** @@ -558,9 +554,9 @@ class global_arena final { { std::lock_guard lock(mtx_); while (!superblocks.empty()) { - auto sb = std::move(superblocks.extract(superblocks.cbegin()).value()); - RMM_LOGGING_ASSERT(sb.is_valid()); - coalesce(std::move(sb)); + auto sblk = std::move(superblocks.extract(superblocks.cbegin()).value()); + RMM_LOGGING_ASSERT(sblk.is_valid()); + coalesce(std::move(sblk)); } } @@ -574,56 +570,57 @@ class global_arena final { { RMM_LOGGING_ASSERT(handles(size)); std::lock_guard lock(mtx_); - auto sb = first_fit(size, size); - if (sb.is_valid()) { - RMM_LOGGING_ASSERT(large_allocations_.find(sb.pointer()) == large_allocations_.cend()); - large_allocations_.emplace(sb.pointer(), sb.size()); + auto sblk = first_fit(size); + if (sblk.is_valid()) { + auto blk = sblk.first_fit(size); + superblocks_.insert(std::move(sblk)); + return blk.pointer(); } - return sb.pointer(); + return nullptr; } /** - * @brief Deallocate memory pointed to by `ptr` directly. + * @brief Deallocate memory pointed to by `ptr`. * * @param ptr Pointer to be deallocated. * @param size The size in bytes of the allocation. This must be equal to the value of `size` * that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. + * @return bool true if the allocation is found, false otherwise. */ - void deallocate(void* ptr, std::size_t size, cuda_stream_view stream) + bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) { RMM_LOGGING_ASSERT(handles(size)); stream.synchronize_no_throw(); - std::lock_guard lock(mtx_); - auto const allocated_size = large_allocations_.at(ptr); - large_allocations_.erase(ptr); - coalesce({ptr, allocated_size}); + return deallocate(ptr, size); } /** - * @brief Deallocate memory pointed to by `ptr` that was allocated in a per-thread arena. + * @brief Deallocate memory pointed to by `ptr`. * * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the * value of `bytes` that was passed to the `allocate` call that returned `ptr`. - * @param stream Stream on which to perform deallocation. + * @return bool true if the allocation is found, false otherwise. */ - void deallocate_from_other_arena(void* ptr, std::size_t bytes) + bool deallocate(void* ptr, std::size_t bytes) { std::lock_guard lock(mtx_); - block const b{ptr, bytes}; - auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [&](auto const& sb) { return sb.contains(b); }); - if (iter == superblocks_.cend()) { RMM_FAIL("allocation not found"); } + block const blk{ptr, bytes}; + auto const iter = std::find_if(superblocks_.cbegin(), + superblocks_.cend(), + [&](auto const& sblk) { return sblk.contains(blk); }); + if (iter == superblocks_.cend()) { return false; } - auto sb = std::move(superblocks_.extract(iter).value()); - sb.coalesce(b); - if (sb.empty()) { - coalesce(std::move(sb)); + auto sblk = std::move(superblocks_.extract(iter).value()); + sblk.coalesce(blk); + if (sblk.empty()) { + coalesce(std::move(sblk)); } else { - superblocks_.insert(std::move(sb)); + superblocks_.insert(std::move(sblk)); } + return true; } /** @@ -641,22 +638,21 @@ class global_arena final { logger->info(" Total size of superblocks: {}", rmm::detail::bytes{total_memory_size(superblocks_)}); logger->info(" Size of largest free block: {}", rmm::detail::bytes{max_free(superblocks_)}); - logger->info(" # of outstanding large allocations: {}", large_allocations_.size()); - auto i = 0; + auto index = 0; char* prev_end{}; - for (auto const& sb : superblocks_) { - if (prev_end == nullptr) { prev_end = sb.pointer(); } + for (auto const& sblk : superblocks_) { + if (prev_end == nullptr) { prev_end = sblk.pointer(); } logger->info( " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, gap={}", - i, - fmt::ptr(sb.pointer()), - fmt::ptr(sb.end()), - rmm::detail::bytes{sb.size()}, - sb.empty(), - sb.free_blocks(), - rmm::detail::bytes{static_cast(sb.pointer() - prev_end)}); - prev_end = sb.end(); - i++; + index, + fmt::ptr(sblk.pointer()), + fmt::ptr(sblk.end()), + rmm::detail::bytes{sblk.size()}, + sblk.empty(), + sblk.free_blocks(), + rmm::detail::bytes{static_cast(sblk.pointer() - prev_end)}); + prev_end = sblk.end(); + index++; } } } @@ -698,60 +694,56 @@ class global_arena final { * @param minimum_size The minimum size of the superblock required. * @return superblock A superblock that can fit at least `size` bytes, or empty if not found. */ - superblock first_fit(std::size_t size, std::size_t minimum_size) + superblock first_fit(std::size_t size) { - auto const iter = std::find_if(superblocks_.cbegin(), superblocks_.cend(), [=](auto const& sb) { - return sb.fits(size) && sb.size() >= minimum_size; + auto iter = std::find_if(superblocks_.cbegin(), superblocks_.cend(), [=](auto const& sblk) { + return sblk.fits(size); }); if (iter == superblocks_.cend()) { return {}; } - auto sb = std::move(superblocks_.extract(iter).value()); - if (sb.empty() && sb.size() >= minimum_size + superblock::minimum_size) { + auto sblk = std::move(superblocks_.extract(iter).value()); + auto const min_size = std::max(superblock::minimum_size, size); + if (sblk.empty() && sblk.size() >= min_size + superblock::minimum_size) { // Split the superblock and put the remainder back. - auto [head, tail] = sb.split(minimum_size); + auto [head, tail] = sblk.split(min_size); superblocks_.insert(std::move(tail)); return std::move(head); } - return sb; + return sblk; } /** * @brief Coalesce the given superblock with other empty superblocks. * - * @param sb The superblock to coalesce. + * @param sblk The superblock to coalesce. */ - void coalesce(superblock&& sb) + void coalesce(superblock&& sblk) { - RMM_LOGGING_ASSERT(sb.is_valid()); - - if (superblocks_.empty()) { - superblocks_.insert(std::move(sb)); - return; - } + RMM_LOGGING_ASSERT(sblk.is_valid()); // Find the right place (in ascending address order) to insert the block. - auto const next = superblocks_.lower_bound(sb); + auto const next = superblocks_.lower_bound(sblk); auto const previous = next == superblocks_.cbegin() ? next : std::prev(next); // Coalesce with neighboring blocks. - bool const merge_prev = previous->is_contiguous_before(sb); - bool const merge_next = next != superblocks_.cend() && sb.is_contiguous_before(*next); + bool const merge_prev = previous != superblocks_.cend() && previous->is_contiguous_before(sblk); + bool const merge_next = next != superblocks_.cend() && sblk.is_contiguous_before(*next); if (merge_prev && merge_next) { auto prev_sb = std::move(superblocks_.extract(previous).value()); auto next_sb = std::move(superblocks_.extract(next).value()); - auto merged = prev_sb.merge(sb).merge(next_sb); + auto merged = prev_sb.merge(sblk).merge(next_sb); superblocks_.insert(std::move(merged)); } else if (merge_prev) { auto prev_sb = std::move(superblocks_.extract(previous).value()); - auto merged = prev_sb.merge(sb); + auto merged = prev_sb.merge(sblk); superblocks_.insert(std::move(merged)); } else if (merge_next) { auto next_sb = std::move(superblocks_.extract(next).value()); - auto merged = sb.merge(next_sb); + auto merged = sblk.merge(next_sb); superblocks_.insert(std::move(merged)); } else { - superblocks_.insert(std::move(sb)); + superblocks_.insert(std::move(sblk)); } } @@ -761,8 +753,6 @@ class global_arena final { block upstream_block_; /// Address-ordered set of superblocks. std::set superblocks_; - /// Large allocations. - std::unordered_map large_allocations_; /// Mutex for exclusive lock. mutable std::mutex mtx_; }; @@ -818,12 +808,22 @@ class arena { */ bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream) { - if (global_arena_.handles(size)) { - global_arena_.deallocate(ptr, size, stream); - return true; - } + if (global_arena_.handles(size) && global_arena_.deallocate(ptr, size, stream)) { return true; } + return deallocate(ptr, size); + } + + /** + * @brief Deallocate memory pointed to by `ptr`, and possibly return superblocks to upstream. + * + * @param ptr Pointer to be deallocated. + * @param size The size in bytes of the allocation. This must be equal to the value of `size` + * that was passed to the `allocate` call that returned `p`. + * @return bool true if the allocation is found, false otherwise. + */ + bool deallocate(void* ptr, std::size_t size) + { std::lock_guard lock(mtx_); - return deallocate_from_superblock({ptr, size}, stream); + return deallocate_from_superblock({ptr, size}); } /** @@ -833,6 +833,7 @@ class arena { { std::lock_guard lock(mtx_); global_arena_.release(superblocks_); + superblocks_.clear(); } /** @@ -843,7 +844,7 @@ class arena { std::lock_guard lock(mtx_); while (true) { auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [](auto const& sb) { return sb.empty(); }); + superblocks_.cbegin(), superblocks_.cend(), [](auto const& sblk) { return sblk.empty(); }); if (iter == superblocks_.cend()) { return; } global_arena_.release(std::move(superblocks_.extract(iter).value())); } @@ -863,18 +864,18 @@ class arena { rmm::detail::bytes{total_memory_size(superblocks_)}); logger->info(" Size of largest free block: {}", rmm::detail::bytes{max_free(superblocks_)}); - auto i = 0; - for (auto const& sb : superblocks_) { + auto index = 0; + for (auto const& sblk : superblocks_) { logger->info( " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}", - i, - fmt::ptr(sb.pointer()), - fmt::ptr(sb.end()), - rmm::detail::bytes{sb.size()}, - sb.empty(), - sb.free_blocks(), - rmm::detail::bytes{sb.max_free()}); - i++; + index, + fmt::ptr(sblk.pointer()), + fmt::ptr(sblk.end()), + rmm::detail::bytes{sblk.size()}, + sblk.empty(), + sblk.free_blocks(), + rmm::detail::bytes{sblk.max_free()}); + index++; } } } @@ -889,8 +890,8 @@ class arena { block get_block(std::size_t size) { // Find the first-fit free block. - auto const b = first_fit(size); - if (b.is_valid()) { return b; } + auto const blk = first_fit(size); + if (blk.is_valid()) { return blk; } // No existing larger blocks available, so grow the arena and obtain a superblock. return expand_arena(size); @@ -911,32 +912,34 @@ class arena { */ block first_fit(std::size_t size) { - auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [size](auto const& sb) { return sb.fits(size); }); + auto const iter = std::find_if(superblocks_.cbegin(), + superblocks_.cend(), + [size](auto const& sblk) { return sblk.fits(size); }); if (iter == superblocks_.cend()) { return {}; } - auto sb = std::move(superblocks_.extract(iter).value()); - auto const b = sb.first_fit(size); - superblocks_.insert(std::move(sb)); - return b; + auto sblk = std::move(superblocks_.extract(iter).value()); + auto const blk = sblk.first_fit(size); + superblocks_.insert(std::move(sblk)); + return blk; } /** * @brief Deallocate a block from the superblock it belongs to. * - * @param b The block to deallocate. + * @param blk The block to deallocate. * @param stream The stream to use for deallocation. * @return true if the block is found. */ - bool deallocate_from_superblock(block const& b, cuda_stream_view stream) + bool deallocate_from_superblock(block const& blk) { - auto const iter = std::find_if( - superblocks_.cbegin(), superblocks_.cend(), [&](auto const& sb) { return sb.contains(b); }); + auto const iter = std::find_if(superblocks_.cbegin(), + superblocks_.cend(), + [&](auto const& sblk) { return sblk.contains(blk); }); if (iter == superblocks_.cend()) { return false; } - auto sb = std::move(superblocks_.extract(iter).value()); - sb.coalesce(b); - superblocks_.insert(std::move(sb)); + auto sblk = std::move(superblocks_.extract(iter).value()); + sblk.coalesce(blk); + superblocks_.insert(std::move(sblk)); return true; } @@ -948,12 +951,12 @@ class arena { */ block expand_arena(std::size_t size) { - auto sb = global_arena_.acquire(size); - if (sb.is_valid()) { - RMM_LOGGING_ASSERT(sb.size() >= superblock::minimum_size); - auto const b = sb.first_fit(size); - superblocks_.insert(std::move(sb)); - return b; + auto sblk = global_arena_.acquire(size); + if (sblk.is_valid()) { + RMM_LOGGING_ASSERT(sblk.size() >= superblock::minimum_size); + auto const blk = sblk.first_fit(size); + superblocks_.insert(std::move(sblk)); + return blk; } return {}; } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 97db29748..7ed63ec2e 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -395,7 +395,7 @@ TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT auto sb = ga_->acquire(512); auto const b = sb.first_fit(512); ga_->release(std::move(sb)); - ga_->deallocate_from_other_arena(b.pointer(), b.size()); + ga_->deallocate(b.pointer(), b.size()); EXPECT_EQ(ga_->allocate(arena_size_), fake_address3); } From fb9ce95066850e5152c77cb3dff1f778567c98d2 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 7 Dec 2021 12:01:54 -0800 Subject: [PATCH 26/35] log max free in superblock --- include/rmm/mr/device/detail/arena.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index a2579471d..9b1f6869e 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -643,13 +643,15 @@ class global_arena final { for (auto const& sblk : superblocks_) { if (prev_end == nullptr) { prev_end = sblk.pointer(); } logger->info( - " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, gap={}", + " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}, " + "gap={}", index, fmt::ptr(sblk.pointer()), fmt::ptr(sblk.end()), rmm::detail::bytes{sblk.size()}, sblk.empty(), sblk.free_blocks(), + rmm::detail::bytes{sblk.max_free()}, rmm::detail::bytes{static_cast(sblk.pointer() - prev_end)}); prev_end = sblk.end(); index++; From 65742cbffd5d82a6825a7da185a729f1b477b2ab Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 7 Dec 2021 18:41:25 -0800 Subject: [PATCH 27/35] log fragmentation percentage --- .../rmm/mr/device/arena_memory_resource.hpp | 12 +- include/rmm/mr/device/detail/arena.hpp | 51 ++- tests/mr/device/arena_mr_tests.cpp | 358 +++++++++--------- 3 files changed, 220 insertions(+), 201 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index fd6874705..84e1dd73b 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -225,9 +225,7 @@ class arena_memory_resource final : public device_memory_resource { } } - if (!global_arena_.deallocate(ptr, bytes)) { - RMM_FAIL("allocation not found"); - } + if (!global_arena_.deallocate(ptr, bytes)) { RMM_FAIL("allocation not found"); } } /** @@ -307,15 +305,15 @@ class arena_memory_resource final : public device_memory_resource { logger_->info("**************************************************"); logger_->info("Global arena:"); global_arena_.dump_memory_log(logger_); - logger_->info("Per-thread arenas:"); + logger_->debug("Per-thread arenas:"); for (auto const& thread_arena : thread_arenas_) { - logger_->info(" Thread {}:", thread_arena.first); + logger_->debug(" Thread {}:", thread_arena.first); thread_arena.second->dump_memory_log(logger_); } if (!stream_arenas_.empty()) { - logger_->info("Per-stream arenas:"); + logger_->debug("Per-stream arenas:"); for (auto const& stream_arena : stream_arenas_) { - logger_->info(" Stream {}:", static_cast(stream_arena.first)); + logger_->debug(" Stream {}:", static_cast(stream_arena.first)); stream_arena.second.dump_memory_log(logger_); } } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 9b1f6869e..2c823becf 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -441,11 +441,17 @@ class superblock final : public memory_span { } } + /** + * @brief Find the total free block size. + * @return the total free block size. + */ + [[nodiscard]] std::size_t total_free_size() const { return total_memory_size(free_blocks_); } + /** * @brief Find the max free block size. * @return the max free block size. */ - [[nodiscard]] std::size_t max_free() const + [[nodiscard]] std::size_t max_free_size() const { if (free_blocks_.empty()) { return 0; } return std::max_element(free_blocks_.cbegin(), free_blocks_.cend(), block_size_compare)->size(); @@ -456,12 +462,21 @@ class superblock final : public memory_span { std::set free_blocks_{}; }; +/// Calculate the total free size of a set of superblocks. +inline auto total_free_size(std::set const& superblocks) +{ + return std::accumulate( + superblocks.cbegin(), superblocks.cend(), std::size_t{}, [](auto const& lhs, auto const& rhs) { + return lhs + rhs.total_free_size(); + }); +} + /// Find the max free size from a set of superblocks. -inline auto max_free(std::set const& superblocks) +inline auto max_free_size(std::set const& superblocks) { std::size_t size{}; for (auto const& sblk : superblocks) { - size = std::max(size, sblk.max_free()); + size = std::max(size, sblk.max_free_size()); } return size; }; @@ -635,14 +650,20 @@ class global_arena final { logger->info(" Arena size: {}", rmm::detail::bytes{upstream_block_.size()}); logger->info(" # superblocks: {}", superblocks_.size()); if (!superblocks_.empty()) { - logger->info(" Total size of superblocks: {}", - rmm::detail::bytes{total_memory_size(superblocks_)}); - logger->info(" Size of largest free block: {}", rmm::detail::bytes{max_free(superblocks_)}); + logger->debug(" Total size of superblocks: {}", + rmm::detail::bytes{total_memory_size(superblocks_)}); + auto const total_free = total_free_size(superblocks_); + auto const max_free = max_free_size(superblocks_); + auto const fragmentation = (1 - max_free / static_cast(total_free)) * 100; + logger->info(" Total free memory: {}", rmm::detail::bytes{total_free}); + logger->info(" Largest block of free memory: {}", rmm::detail::bytes{max_free}); + logger->info(" Fragmentation: {:.2f}%", fragmentation); + auto index = 0; char* prev_end{}; for (auto const& sblk : superblocks_) { if (prev_end == nullptr) { prev_end = sblk.pointer(); } - logger->info( + logger->debug( " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}, " "gap={}", index, @@ -651,7 +672,7 @@ class global_arena final { rmm::detail::bytes{sblk.size()}, sblk.empty(), sblk.free_blocks(), - rmm::detail::bytes{sblk.max_free()}, + rmm::detail::bytes{sblk.max_free_size()}, rmm::detail::bytes{static_cast(sblk.pointer() - prev_end)}); prev_end = sblk.end(); index++; @@ -860,15 +881,15 @@ class arena { void dump_memory_log(std::shared_ptr const& logger) const { std::lock_guard lock(mtx_); - logger->info(" # superblocks: {}", superblocks_.size()); + logger->debug(" # superblocks: {}", superblocks_.size()); if (!superblocks_.empty()) { - logger->info(" Total size of superblocks: {}", - rmm::detail::bytes{total_memory_size(superblocks_)}); - logger->info(" Size of largest free block: {}", - rmm::detail::bytes{max_free(superblocks_)}); + logger->debug(" Total size of superblocks: {}", + rmm::detail::bytes{total_memory_size(superblocks_)}); + logger->debug(" Size of largest free block: {}", + rmm::detail::bytes{max_free_size(superblocks_)}); auto index = 0; for (auto const& sblk : superblocks_) { - logger->info( + logger->debug( " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}", index, fmt::ptr(sblk.pointer()), @@ -876,7 +897,7 @@ class arena { rmm::detail::bytes{sblk.size()}, sblk.empty(), sblk.free_blocks(), - rmm::detail::bytes{sblk.max_free()}); + rmm::detail::bytes{sblk.max_free_size()}); index++; } } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 7ed63ec2e..bd6d81f2c 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -59,15 +59,15 @@ class ArenaTest : public ::testing::Test { { EXPECT_CALL(mock_, allocate(arena_size_)).WillOnce(Return(fake_address3)); EXPECT_CALL(mock_, deallocate(fake_address3, arena_size_)); - ga_ = std::make_unique(&mock_, arena_size_); - a_ = std::make_unique(*ga_); + global_arena_ = std::make_unique(&mock_, arena_size_); + arena_ = std::make_unique(*global_arena_); } // NOLINTBEGIN(cppcoreguidelines-non-private-member-variables-in-classes) std::size_t arena_size_{superblock::minimum_size * 4}; mock_memory_resource mock_{}; - std::unique_ptr ga_{}; - std::unique_ptr a_{}; + std::unique_ptr global_arena_{}; + std::unique_ptr arena_{}; // NOLINTEND(cppcoreguidelines-non-private-member-variables-in-classes) }; @@ -94,8 +94,8 @@ TEST_F(ArenaTest, AlignToSizeClass) // NOLINT TEST_F(ArenaTest, MemorySpan) // NOLINT { - memory_span const ms{}; - EXPECT_FALSE(ms.is_valid()); + memory_span const mem_span{}; + EXPECT_FALSE(mem_span.is_valid()); memory_span const ms2{fake_address, 256}; EXPECT_TRUE(ms2.is_valid()); } @@ -106,25 +106,25 @@ TEST_F(ArenaTest, MemorySpan) // NOLINT TEST_F(ArenaTest, BlockFits) // NOLINT { - block const b{fake_address, 1_KiB}; - EXPECT_TRUE(b.fits(1_KiB)); - EXPECT_FALSE(b.fits(1_KiB + 1)); + block const blk{fake_address, 1_KiB}; + EXPECT_TRUE(blk.fits(1_KiB)); + EXPECT_FALSE(blk.fits(1_KiB + 1)); } TEST_F(ArenaTest, BlockIsContiguousBefore) // NOLINT { - block const b{fake_address, 1_KiB}; - block const b2{fake_address2, 256}; - EXPECT_TRUE(b.is_contiguous_before(b2)); - block const b3{fake_address, 512}; - block const b4{fake_address2, 1_KiB}; - EXPECT_FALSE(b3.is_contiguous_before(b4)); + block const blk{fake_address, 1_KiB}; + block const blk2{fake_address2, 256}; + EXPECT_TRUE(blk.is_contiguous_before(blk2)); + block const blk3{fake_address, 512}; + block const blk4{fake_address2, 1_KiB}; + EXPECT_FALSE(blk3.is_contiguous_before(blk4)); } TEST_F(ArenaTest, BlockSplit) // NOLINT { - block const b{fake_address, 2_KiB}; - auto const [head, tail] = b.split(1_KiB); + block const blk{fake_address, 2_KiB}; + auto const [head, tail] = blk.split(1_KiB); EXPECT_EQ(head.pointer(), fake_address); EXPECT_EQ(head.size(), 1_KiB); EXPECT_EQ(tail.pointer(), fake_address2); @@ -133,9 +133,9 @@ TEST_F(ArenaTest, BlockSplit) // NOLINT TEST_F(ArenaTest, BlockMerge) // NOLINT { - block const b{fake_address, 1_KiB}; - block const b2{fake_address2, 1_KiB}; - auto const merged = b.merge(b2); + block const blk{fake_address, 1_KiB}; + block const blk2{fake_address2, 1_KiB}; + auto const merged = blk.merge(blk2); EXPECT_EQ(merged.pointer(), fake_address); EXPECT_EQ(merged.size(), 2_KiB); } @@ -146,61 +146,61 @@ TEST_F(ArenaTest, BlockMerge) // NOLINT TEST_F(ArenaTest, SuperblockEmpty) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - EXPECT_TRUE(sb.empty()); - sb.first_fit(256); - EXPECT_FALSE(sb.empty()); + superblock sblk{fake_address3, superblock::minimum_size}; + EXPECT_TRUE(sblk.empty()); + sblk.first_fit(256); + EXPECT_FALSE(sblk.empty()); } TEST_F(ArenaTest, SuperblockContains) // NOLINT { - superblock const sb{fake_address3, superblock::minimum_size}; - block const b{fake_address, 2_KiB}; - EXPECT_FALSE(sb.contains(b)); - block const b2{fake_address3, 1_KiB}; - EXPECT_TRUE(sb.contains(b2)); - block const b3{fake_address3, superblock::minimum_size + 1}; - EXPECT_FALSE(sb.contains(b3)); - block const b4{fake_address3, superblock::minimum_size}; - EXPECT_TRUE(sb.contains(b4)); - block const b5{fake_address4, 256}; - EXPECT_FALSE(sb.contains(b5)); + superblock const sblk{fake_address3, superblock::minimum_size}; + block const blk{fake_address, 2_KiB}; + EXPECT_FALSE(sblk.contains(blk)); + block const blk2{fake_address3, 1_KiB}; + EXPECT_TRUE(sblk.contains(blk2)); + block const blk3{fake_address3, superblock::minimum_size + 1}; + EXPECT_FALSE(sblk.contains(blk3)); + block const blk4{fake_address3, superblock::minimum_size}; + EXPECT_TRUE(sblk.contains(blk4)); + block const blk5{fake_address4, 256}; + EXPECT_FALSE(sblk.contains(blk5)); } TEST_F(ArenaTest, SuperblockFits) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - EXPECT_TRUE(sb.fits(superblock::minimum_size)); - EXPECT_FALSE(sb.fits(superblock::minimum_size + 1)); + superblock sblk{fake_address3, superblock::minimum_size}; + EXPECT_TRUE(sblk.fits(superblock::minimum_size)); + EXPECT_FALSE(sblk.fits(superblock::minimum_size + 1)); - auto const b = sb.first_fit(superblock::minimum_size / 4); - sb.first_fit(superblock::minimum_size / 4); - sb.coalesce(b); - EXPECT_TRUE(sb.fits(superblock::minimum_size / 2)); - EXPECT_FALSE(sb.fits(superblock::minimum_size / 2 + 1)); + auto const blk = sblk.first_fit(superblock::minimum_size / 4); + sblk.first_fit(superblock::minimum_size / 4); + sblk.coalesce(blk); + EXPECT_TRUE(sblk.fits(superblock::minimum_size / 2)); + EXPECT_FALSE(sblk.fits(superblock::minimum_size / 2 + 1)); } TEST_F(ArenaTest, SuperblockIsContiguousBefore) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; + superblock sblk{fake_address3, superblock::minimum_size}; superblock sb2{fake_address4, superblock::minimum_size}; - EXPECT_TRUE(sb.is_contiguous_before(sb2)); + EXPECT_TRUE(sblk.is_contiguous_before(sb2)); - auto const b = sb.first_fit(256); - EXPECT_FALSE(sb.is_contiguous_before(sb2)); - sb.coalesce(b); - EXPECT_TRUE(sb.is_contiguous_before(sb2)); + auto const blk = sblk.first_fit(256); + EXPECT_FALSE(sblk.is_contiguous_before(sb2)); + sblk.coalesce(blk); + EXPECT_TRUE(sblk.is_contiguous_before(sb2)); - auto const b2 = sb2.first_fit(1_KiB); - EXPECT_FALSE(sb.is_contiguous_before(sb2)); - sb2.coalesce(b2); - EXPECT_TRUE(sb.is_contiguous_before(sb2)); + auto const blk2 = sb2.first_fit(1_KiB); + EXPECT_FALSE(sblk.is_contiguous_before(sb2)); + sb2.coalesce(blk2); + EXPECT_TRUE(sblk.is_contiguous_before(sb2)); } TEST_F(ArenaTest, SuperblockSplit) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size * 2}; - auto const [head, tail] = sb.split(superblock::minimum_size); + superblock sblk{fake_address3, superblock::minimum_size * 2}; + auto const [head, tail] = sblk.split(superblock::minimum_size); EXPECT_EQ(head.pointer(), fake_address3); EXPECT_EQ(head.size(), superblock::minimum_size); EXPECT_TRUE(head.empty()); @@ -211,9 +211,9 @@ TEST_F(ArenaTest, SuperblockSplit) // NOLINT TEST_F(ArenaTest, SuperblockMerge) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; + superblock sblk{fake_address3, superblock::minimum_size}; superblock sb2{fake_address4, superblock::minimum_size}; - auto const merged = sb.merge(sb2); + auto const merged = sblk.merge(sb2); EXPECT_EQ(merged.pointer(), fake_address3); EXPECT_EQ(merged.size(), superblock::minimum_size * 2); EXPECT_TRUE(merged.empty()); @@ -221,71 +221,71 @@ TEST_F(ArenaTest, SuperblockMerge) // NOLINT TEST_F(ArenaTest, SuperblockFirstFit) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - auto const b = sb.first_fit(1_KiB); - EXPECT_EQ(b.pointer(), fake_address3); - EXPECT_EQ(b.size(), 1_KiB); - auto const b2 = sb.first_fit(2_KiB); + superblock sblk{fake_address3, superblock::minimum_size}; + auto const blk = sblk.first_fit(1_KiB); + EXPECT_EQ(blk.pointer(), fake_address3); + EXPECT_EQ(blk.size(), 1_KiB); + auto const blk2 = sblk.first_fit(2_KiB); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - EXPECT_EQ(b2.pointer(), static_cast(fake_address3) + 1_KiB); - EXPECT_EQ(b2.size(), 2_KiB); - sb.coalesce(b); - auto const b3 = sb.first_fit(512); - EXPECT_EQ(b3.pointer(), fake_address3); - EXPECT_EQ(b3.size(), 512); + EXPECT_EQ(blk2.pointer(), static_cast(fake_address3) + 1_KiB); + EXPECT_EQ(blk2.size(), 2_KiB); + sblk.coalesce(blk); + auto const blk3 = sblk.first_fit(512); + EXPECT_EQ(blk3.pointer(), fake_address3); + EXPECT_EQ(blk3.size(), 512); } TEST_F(ArenaTest, SuperblockCoalesceAfterFull) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - auto const b = sb.first_fit(superblock::minimum_size / 2); - sb.first_fit(superblock::minimum_size / 2); - sb.coalesce(b); - EXPECT_TRUE(sb.first_fit(superblock::minimum_size / 2).is_valid()); + superblock sblk{fake_address3, superblock::minimum_size}; + auto const blk = sblk.first_fit(superblock::minimum_size / 2); + sblk.first_fit(superblock::minimum_size / 2); + sblk.coalesce(blk); + EXPECT_TRUE(sblk.first_fit(superblock::minimum_size / 2).is_valid()); } TEST_F(ArenaTest, SuperblockCoalesceMergeNext) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - auto const b = sb.first_fit(superblock::minimum_size / 2); - sb.coalesce(b); - EXPECT_TRUE(sb.first_fit(superblock::minimum_size).is_valid()); + superblock sblk{fake_address3, superblock::minimum_size}; + auto const blk = sblk.first_fit(superblock::minimum_size / 2); + sblk.coalesce(blk); + EXPECT_TRUE(sblk.first_fit(superblock::minimum_size).is_valid()); } TEST_F(ArenaTest, SuperblockCoalesceMergePrevious) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - auto const b = sb.first_fit(1_KiB); - auto const b2 = sb.first_fit(1_KiB); - sb.first_fit(1_KiB); - sb.coalesce(b); - sb.coalesce(b2); - auto const b3 = sb.first_fit(2_KiB); - EXPECT_EQ(b3.pointer(), fake_address3); + superblock sblk{fake_address3, superblock::minimum_size}; + auto const blk = sblk.first_fit(1_KiB); + auto const blk2 = sblk.first_fit(1_KiB); + sblk.first_fit(1_KiB); + sblk.coalesce(blk); + sblk.coalesce(blk2); + auto const blk3 = sblk.first_fit(2_KiB); + EXPECT_EQ(blk3.pointer(), fake_address3); } TEST_F(ArenaTest, SuperblockCoalesceMergePreviousAndNext) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - auto const b = sb.first_fit(1_KiB); - auto const b2 = sb.first_fit(1_KiB); - sb.coalesce(b); - sb.coalesce(b2); - EXPECT_TRUE(sb.first_fit(superblock::minimum_size).is_valid()); + superblock sblk{fake_address3, superblock::minimum_size}; + auto const blk = sblk.first_fit(1_KiB); + auto const blk2 = sblk.first_fit(1_KiB); + sblk.coalesce(blk); + sblk.coalesce(blk2); + EXPECT_TRUE(sblk.first_fit(superblock::minimum_size).is_valid()); } -TEST_F(ArenaTest, SuperblockMaxFree) // NOLINT +TEST_F(ArenaTest, SuperblockMaxFreeSize) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - sb.first_fit(superblock::minimum_size / 2); - EXPECT_EQ(sb.max_free(), superblock::minimum_size / 2); + superblock sblk{fake_address3, superblock::minimum_size}; + sblk.first_fit(superblock::minimum_size / 2); + EXPECT_EQ(sblk.max_free_size(), superblock::minimum_size / 2); } -TEST_F(ArenaTest, SuperblockMaxFreeWhenFull) // NOLINT +TEST_F(ArenaTest, SuperblockMaxFreeSizeWhenFull) // NOLINT { - superblock sb{fake_address3, superblock::minimum_size}; - sb.first_fit(superblock::minimum_size); - EXPECT_EQ(sb.max_free(), 0); + superblock sblk{fake_address3, superblock::minimum_size}; + sblk.first_fit(superblock::minimum_size); + EXPECT_EQ(sblk.max_free_size(), 0); } /** @@ -294,109 +294,109 @@ TEST_F(ArenaTest, SuperblockMaxFreeWhenFull) // NOLINT TEST_F(ArenaTest, GlobalArenaNullUpstream) // NOLINT { - auto construct_nullptr = []() { global_arena ga{nullptr, std::nullopt}; }; + auto construct_nullptr = []() { global_arena global{nullptr, std::nullopt}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); // NOLINT(cppcoreguidelines-avoid-goto) } TEST_F(ArenaTest, GlobalArenaAcquire) // NOLINT { - auto const sb = ga_->acquire(256); - EXPECT_EQ(sb.pointer(), fake_address3); - EXPECT_EQ(sb.size(), superblock::minimum_size); - EXPECT_TRUE(sb.empty()); + auto const sblk = global_arena_->acquire(256); + EXPECT_EQ(sblk.pointer(), fake_address3); + EXPECT_EQ(sblk.size(), superblock::minimum_size); + EXPECT_TRUE(sblk.empty()); - auto const sb2 = ga_->acquire(1_KiB); + auto const sb2 = global_arena_->acquire(1_KiB); EXPECT_EQ(sb2.pointer(), fake_address4); EXPECT_EQ(sb2.size(), superblock::minimum_size); EXPECT_TRUE(sb2.empty()); - ga_->acquire(512); - ga_->acquire(512); - EXPECT_FALSE(ga_->acquire(512).is_valid()); + global_arena_->acquire(512); + global_arena_->acquire(512); + EXPECT_FALSE(global_arena_->acquire(512).is_valid()); } TEST_F(ArenaTest, GlobalArenaReleaseMergeNext) // NOLINT { - auto sb = ga_->acquire(256); - ga_->release(std::move(sb)); - auto* p = ga_->allocate(arena_size_); - EXPECT_EQ(p, fake_address3); + auto sblk = global_arena_->acquire(256); + global_arena_->release(std::move(sblk)); + auto* ptr = global_arena_->allocate(arena_size_); + EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT { - auto sb = ga_->acquire(256); - auto sb2 = ga_->acquire(1_KiB); - ga_->acquire(512); - ga_->release(std::move(sb)); - ga_->release(std::move(sb2)); - auto* p = ga_->allocate(superblock::minimum_size * 2); - EXPECT_EQ(p, fake_address3); + auto sblk = global_arena_->acquire(256); + auto sb2 = global_arena_->acquire(1_KiB); + global_arena_->acquire(512); + global_arena_->release(std::move(sblk)); + global_arena_->release(std::move(sb2)); + auto* ptr = global_arena_->allocate(superblock::minimum_size * 2); + EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT { - auto sb = ga_->acquire(256); - auto sb2 = ga_->acquire(1_KiB); - auto sb3 = ga_->acquire(512); - ga_->release(std::move(sb)); - ga_->release(std::move(sb3)); - ga_->release(std::move(sb2)); - auto* p = ga_->allocate(arena_size_); - EXPECT_EQ(p, fake_address3); + auto sblk = global_arena_->acquire(256); + auto sb2 = global_arena_->acquire(1_KiB); + auto sb3 = global_arena_->acquire(512); + global_arena_->release(std::move(sblk)); + global_arena_->release(std::move(sb3)); + global_arena_->release(std::move(sb2)); + auto* ptr = global_arena_->allocate(arena_size_); + EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaReleaseMultiple) // NOLINT { std::set superblocks{}; - auto sb = ga_->acquire(256); - superblocks.insert(std::move(sb)); - auto sb2 = ga_->acquire(1_KiB); + auto sblk = global_arena_->acquire(256); + superblocks.insert(std::move(sblk)); + auto sb2 = global_arena_->acquire(1_KiB); superblocks.insert(std::move(sb2)); - auto sb3 = ga_->acquire(512); + auto sb3 = global_arena_->acquire(512); superblocks.insert(std::move(sb3)); - ga_->release(superblocks); - auto* p = ga_->allocate(arena_size_); - EXPECT_EQ(p, fake_address3); + global_arena_->release(superblocks); + auto* ptr = global_arena_->allocate(arena_size_); + EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaAllocate) // NOLINT { - auto* ptr = ga_->allocate(superblock::minimum_size * 2); + auto* ptr = global_arena_->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaAllocateExtraLarge) // NOLINT { - EXPECT_EQ(ga_->allocate(1_PiB), nullptr); - EXPECT_EQ(ga_->allocate(1_PiB), nullptr); + EXPECT_EQ(global_arena_->allocate(1_PiB), nullptr); + EXPECT_EQ(global_arena_->allocate(1_PiB), nullptr); } TEST_F(ArenaTest, GlobalArenaDeallocate) // NOLINT { - auto* ptr = ga_->allocate(superblock::minimum_size * 2); + auto* ptr = global_arena_->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); - ga_->deallocate(ptr, superblock::minimum_size * 2, {}); - ptr = ga_->allocate(superblock::minimum_size * 2); + global_arena_->deallocate(ptr, superblock::minimum_size * 2, {}); + ptr = global_arena_->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp) // NOLINT { - auto* ptr = ga_->allocate(superblock::minimum_size + 256); - auto* ptr2 = ga_->allocate(superblock::minimum_size + 512); - ga_->deallocate(ptr, superblock::minimum_size + 256, {}); - ga_->deallocate(ptr2, superblock::minimum_size + 512, {}); - EXPECT_EQ(ga_->allocate(arena_size_), fake_address3); + auto* ptr = global_arena_->allocate(superblock::minimum_size + 256); + auto* ptr2 = global_arena_->allocate(superblock::minimum_size + 512); + global_arena_->deallocate(ptr, superblock::minimum_size + 256, {}); + global_arena_->deallocate(ptr2, superblock::minimum_size + 512, {}); + EXPECT_EQ(global_arena_->allocate(arena_size_), fake_address3); } TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT { - auto sb = ga_->acquire(512); - auto const b = sb.first_fit(512); - ga_->release(std::move(sb)); - ga_->deallocate(b.pointer(), b.size()); - EXPECT_EQ(ga_->allocate(arena_size_), fake_address3); + auto sblk = global_arena_->acquire(512); + auto const blk = sblk.first_fit(512); + global_arena_->release(std::move(sblk)); + global_arena_->deallocate(blk.pointer(), blk.size()); + EXPECT_EQ(global_arena_->allocate(arena_size_), fake_address3); } /** @@ -405,46 +405,46 @@ TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT TEST_F(ArenaTest, ArenaAllocate) // NOLINT { - EXPECT_EQ(a_->allocate(superblock::minimum_size), fake_address3); - EXPECT_EQ(a_->allocate(256), fake_address4); + EXPECT_EQ(arena_->allocate(superblock::minimum_size), fake_address3); + EXPECT_EQ(arena_->allocate(256), fake_address4); } TEST_F(ArenaTest, ArenaDeallocate) // NOLINT { - auto* ptr = a_->allocate(superblock::minimum_size); - a_->deallocate(ptr, superblock::minimum_size, {}); - auto* ptr2 = a_->allocate(256); - a_->deallocate(ptr2, 256, {}); - EXPECT_EQ(a_->allocate(superblock::minimum_size), fake_address3); + auto* ptr = arena_->allocate(superblock::minimum_size); + arena_->deallocate(ptr, superblock::minimum_size, {}); + auto* ptr2 = arena_->allocate(256); + arena_->deallocate(ptr2, 256, {}); + EXPECT_EQ(arena_->allocate(superblock::minimum_size), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergePrevious) // NOLINT { - auto* ptr = a_->allocate(256); - auto* ptr2 = a_->allocate(256); - a_->allocate(256); - a_->deallocate(ptr, 256, {}); - a_->deallocate(ptr2, 256, {}); - EXPECT_EQ(a_->allocate(512), fake_address3); + auto* ptr = arena_->allocate(256); + auto* ptr2 = arena_->allocate(256); + arena_->allocate(256); + arena_->deallocate(ptr, 256, {}); + arena_->deallocate(ptr2, 256, {}); + EXPECT_EQ(arena_->allocate(512), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergeNext) // NOLINT { - auto* ptr = a_->allocate(256); - auto* ptr2 = a_->allocate(256); - a_->allocate(256); - a_->deallocate(ptr2, 256, {}); - a_->deallocate(ptr, 256, {}); - EXPECT_EQ(a_->allocate(512), fake_address3); + auto* ptr = arena_->allocate(256); + auto* ptr2 = arena_->allocate(256); + arena_->allocate(256); + arena_->deallocate(ptr2, 256, {}); + arena_->deallocate(ptr, 256, {}); + EXPECT_EQ(arena_->allocate(512), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergePreviousAndNext) // NOLINT { - auto* ptr = a_->allocate(256); - auto* ptr2 = a_->allocate(256); - a_->deallocate(ptr, 256, {}); - a_->deallocate(ptr2, 256, {}); - EXPECT_EQ(a_->allocate(2_KiB), fake_address3); + auto* ptr = arena_->allocate(256); + auto* ptr2 = arena_->allocate(256); + arena_->deallocate(ptr, 256, {}); + arena_->deallocate(ptr2, 256, {}); + EXPECT_EQ(arena_->allocate(2_KiB), fake_address3); } TEST_F(ArenaTest, ArenaDefragment) // NOLINT @@ -452,14 +452,14 @@ TEST_F(ArenaTest, ArenaDefragment) // NOLINT std::vector pointers; std::size_t num_pointers{4}; for (std::size_t i = 0; i < num_pointers; i++) { - pointers.push_back(a_->allocate(superblock::minimum_size)); + pointers.push_back(arena_->allocate(superblock::minimum_size)); } for (auto* ptr : pointers) { - a_->deallocate(ptr, superblock::minimum_size, {}); + arena_->deallocate(ptr, superblock::minimum_size, {}); } - EXPECT_EQ(ga_->allocate(arena_size_), nullptr); - a_->defragment(); - EXPECT_EQ(ga_->allocate(arena_size_), fake_address3); + EXPECT_EQ(global_arena_->allocate(arena_size_), nullptr); + arena_->defragment(); + EXPECT_EQ(global_arena_->allocate(arena_size_), fake_address3); } /** From b92c9eb6418bb109bc5f40361edad6dcf899e25a Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 8 Dec 2021 15:21:02 -0800 Subject: [PATCH 28/35] minor fix --- include/rmm/mr/device/arena_memory_resource.hpp | 2 ++ include/rmm/mr/device/detail/arena.hpp | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 84e1dd73b..b01dec56d 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -89,6 +89,8 @@ class arena_memory_resource final : public device_memory_resource { { if (dump_log_on_failure_) { logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log"); + // Set the level to `debug` for more detailed output. + logger_->set_level(spdlog::level::info); } } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 2c823becf..61611e091 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -719,9 +719,9 @@ class global_arena final { */ superblock first_fit(std::size_t size) { - auto iter = std::find_if(superblocks_.cbegin(), superblocks_.cend(), [=](auto const& sblk) { - return sblk.fits(size); - }); + auto const iter = std::find_if(superblocks_.cbegin(), + superblocks_.cend(), + [=](auto const& sblk) { return sblk.fits(size); }); if (iter == superblocks_.cend()) { return {}; } auto sblk = std::move(superblocks_.extract(iter).value()); From 5452b82557ad51552b3df970682bb68ce0f2cc25 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 9 Dec 2021 09:17:22 -0800 Subject: [PATCH 29/35] clang format --- tests/mr/device/arena_mr_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index bd6d81f2c..f2369d189 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include "../../byte_literals.hpp" #include #include #include @@ -22,12 +21,13 @@ #include #include #include +#include "../../byte_literals.hpp" #include #include -#include #include +#include namespace rmm::test { namespace { From c782893763421a8f3dd3f3e7cd3067c2ed59e1dc Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 9 Dec 2021 15:06:39 -0800 Subject: [PATCH 30/35] address review comments --- .../rmm/mr/device/arena_memory_resource.hpp | 1 + include/rmm/mr/device/detail/arena.hpp | 28 ++++++++++--------- tests/mr/device/arena_mr_tests.cpp | 18 ++++++------ 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index b01dec56d..4d20f7cfa 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -81,6 +81,7 @@ class arena_memory_resource final : public device_memory_resource { * @param upstream_mr The memory resource from which to allocate blocks for the global arena. * @param arena_size Size in bytes of the global arena. Defaults to half of the available memory * on the current device. + * @param dump_log_on_failure If true, dump memory log when running out of memory. */ explicit arena_memory_resource(Upstream* upstream_mr, std::optional arena_size = std::nullopt, diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 61611e091..c22834474 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -120,12 +120,12 @@ inline std::size_t align_to_size_class(std::size_t value) noexcept /** * @brief Represents a contiguous region of memory. */ -class memory_span { +class byte_span { public: /** * @brief Construct a default span. */ - memory_span() = default; + byte_span() = default; /** * @brief Construct a span given a pointer and size. @@ -133,7 +133,7 @@ class memory_span { * @param pointer The address for the beginning of the span. * @param size The size of the span. */ - memory_span(void* pointer, std::size_t size) : pointer_{static_cast(pointer)}, size_{size} + byte_span(void* pointer, std::size_t size) : pointer_{static_cast(pointer)}, size_{size} { RMM_LOGGING_ASSERT(pointer != nullptr); RMM_LOGGING_ASSERT(size > 0); @@ -155,10 +155,10 @@ class memory_span { [[nodiscard]] bool is_valid() const { return pointer_ != nullptr && size_ > 0; } /// Used by std::set to compare spans. - bool operator<(memory_span const& mem_span) const + bool operator<(byte_span const& span) const { - RMM_LOGGING_ASSERT(mem_span.is_valid()); - return pointer_ < mem_span.pointer_; + RMM_LOGGING_ASSERT(span.is_valid()); + return pointer_ < span.pointer_; } private: @@ -166,7 +166,7 @@ class memory_span { std::size_t size_{}; ///< Size in bytes. }; -/// Calculate the total size of a set of memory spans. +/// Calculate the total size of a set of spans. template inline auto total_memory_size(std::set const& spans) { @@ -179,9 +179,9 @@ inline auto total_memory_size(std::set const& spans) /** * @brief Represents a chunk of memory that can be allocated and deallocated. */ -class block final : public memory_span { +class block final : public byte_span { public: - using memory_span::memory_span; + using byte_span::byte_span; /** * @brief Is this block large enough to fit `bytes` bytes? @@ -251,10 +251,12 @@ inline bool block_size_compare(block const& lhs, block const& rhs) * @brief Represents a large chunk of memory that is exchanged between the global arena and * per-thread arenas. */ -class superblock final : public memory_span { +class superblock final : public byte_span { public: /// Minimum size of a superblock (1 MiB). - static constexpr std::size_t minimum_size{1U << 20U}; + static constexpr std::size_t minimum_size{1UL << 20}; + /// Maximum size of a superblock (1 TiB), as a sanity check. + static constexpr std::size_t maximum_size{1UL << 40}; /** * @brief Construct a default superblock. @@ -267,10 +269,10 @@ class superblock final : public memory_span { * @param pointer The address for the beginning of the superblock. * @param size The size of the superblock. */ - superblock(void* pointer, std::size_t size) : memory_span{pointer, size} + superblock(void* pointer, std::size_t size) : byte_span{pointer, size} { RMM_LOGGING_ASSERT(size >= minimum_size); - RMM_LOGGING_ASSERT(size < 1UL << 40UL); + RMM_LOGGING_ASSERT(size <= maximum_size); free_blocks_.emplace(pointer, size); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index f2369d189..b856050bb 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -38,9 +38,9 @@ class mock_memory_resource { MOCK_METHOD(void, deallocate, (void*, std::size_t)); }; -using memory_span = rmm::mr::detail::arena::memory_span; -using block = rmm::mr::detail::arena::block; -using superblock = rmm::mr::detail::arena::superblock; +using rmm::mr::detail::arena::block; +using rmm::mr::detail::arena::byte_span; +using rmm::mr::detail::arena::superblock; using global_arena = rmm::mr::detail::arena::global_arena; using arena = rmm::mr::detail::arena::arena; using arena_mr = rmm::mr::arena_memory_resource; @@ -89,15 +89,15 @@ TEST_F(ArenaTest, AlignToSizeClass) // NOLINT } /** - * Test memory_span. + * Test byte_span. */ -TEST_F(ArenaTest, MemorySpan) // NOLINT +TEST_F(ArenaTest, ByteSpan) // NOLINT { - memory_span const mem_span{}; - EXPECT_FALSE(mem_span.is_valid()); - memory_span const ms2{fake_address, 256}; - EXPECT_TRUE(ms2.is_valid()); + byte_span const span{}; + EXPECT_FALSE(span.is_valid()); + byte_span const span2{fake_address, 256}; + EXPECT_TRUE(span2.is_valid()); } /** From 0fd715e42b1e28e370de18dc6bacb2debc4c721f Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 15 Dec 2021 09:37:35 -0800 Subject: [PATCH 31/35] clang format --- tests/mr/device/arena_mr_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index b856050bb..b6e69ce0d 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "../../byte_literals.hpp" #include #include #include @@ -21,13 +22,12 @@ #include #include #include -#include "../../byte_literals.hpp" #include #include -#include #include +#include namespace rmm::test { namespace { From c42a4d45139f0f1d4767b2be1d7063813c3b6bee Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 11 Jan 2022 16:56:36 -0800 Subject: [PATCH 32/35] review feedback --- .../random_allocations/random_allocations.cpp | 6 +- .../rmm/mr/device/arena_memory_resource.hpp | 3 +- include/rmm/mr/device/detail/arena.hpp | 5 +- tests/mr/device/arena_mr_tests.cpp | 170 +++++++++--------- 4 files changed, 95 insertions(+), 89 deletions(-) diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index c236ed7bb..470442830 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -170,8 +170,8 @@ inline auto make_pool() inline auto make_arena() { - auto free = rmm::detail::available_device_memory().first; - auto reserve = 1UL << 26; + auto free = rmm::detail::available_device_memory().first; + constexpr auto reserve{64UL << 20}; // Leave some space for CUDA overhead. return rmm::mr::make_owning_wrapper(make_cuda(), free - reserve); } diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 0039d57f6..0fa77b896 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -196,6 +196,7 @@ class arena_memory_resource final : public device_memory_resource { { std::shared_lock lock(mtx_); + // If the memory being freed does not belong to the arena, the following will return false. if (arena.deallocate(ptr, bytes, stream)) { return; } } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index c22834474..1dd1fbc6d 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -509,6 +509,8 @@ class global_arena final { RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer."); auto const size = rmm::detail::align_down(arena_size.value_or(default_size()), rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + RMM_EXPECTS(size >= superblock::minimum_size, + "Arena size smaller than minimum superblock size."); initialize(size); } @@ -700,7 +702,6 @@ class global_arena final { */ void initialize(std::size_t size) { - RMM_LOGGING_ASSERT(size >= superblock::minimum_size); upstream_block_ = {upstream_mr_->allocate(size), size}; superblocks_.emplace(upstream_block_.pointer(), size); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index c6b281cab..c9e9e5e37 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,22 +56,19 @@ auto const fake_address3 = reinterpret_cast(superblock::minimum_size); auto const fake_address4 = reinterpret_cast(superblock::minimum_size * 2); // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) -class ArenaTest : public ::testing::Test { - protected: +struct ArenaTest : public ::testing::Test { void SetUp() override { - EXPECT_CALL(mock_, allocate(arena_size_)).WillOnce(Return(fake_address3)); - EXPECT_CALL(mock_, deallocate(fake_address3, arena_size_)); - global_arena_ = std::make_unique(&mock_, arena_size_); - arena_ = std::make_unique(*global_arena_); + EXPECT_CALL(mock_mr, allocate(arena_size)).WillOnce(Return(fake_address3)); + EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size)); + global = std::make_unique(&mock_mr, arena_size); + per_thread = std::make_unique(*global); } - // NOLINTBEGIN(cppcoreguidelines-non-private-member-variables-in-classes) - std::size_t arena_size_{superblock::minimum_size * 4}; - mock_memory_resource mock_{}; - std::unique_ptr global_arena_{}; - std::unique_ptr arena_{}; - // NOLINTEND(cppcoreguidelines-non-private-member-variables-in-classes) + std::size_t arena_size{superblock::minimum_size * 4}; + mock_memory_resource mock_mr{}; + std::unique_ptr global{}; + std::unique_ptr per_thread{}; }; /** @@ -303,103 +300,103 @@ TEST_F(ArenaTest, GlobalArenaNullUpstream) // NOLINT TEST_F(ArenaTest, GlobalArenaAcquire) // NOLINT { - auto const sblk = global_arena_->acquire(256); + auto const sblk = global->acquire(256); EXPECT_EQ(sblk.pointer(), fake_address3); EXPECT_EQ(sblk.size(), superblock::minimum_size); EXPECT_TRUE(sblk.empty()); - auto const sb2 = global_arena_->acquire(1_KiB); + auto const sb2 = global->acquire(1_KiB); EXPECT_EQ(sb2.pointer(), fake_address4); EXPECT_EQ(sb2.size(), superblock::minimum_size); EXPECT_TRUE(sb2.empty()); - global_arena_->acquire(512); - global_arena_->acquire(512); - EXPECT_FALSE(global_arena_->acquire(512).is_valid()); + global->acquire(512); + global->acquire(512); + EXPECT_FALSE(global->acquire(512).is_valid()); } TEST_F(ArenaTest, GlobalArenaReleaseMergeNext) // NOLINT { - auto sblk = global_arena_->acquire(256); - global_arena_->release(std::move(sblk)); - auto* ptr = global_arena_->allocate(arena_size_); + auto sblk = global->acquire(256); + global->release(std::move(sblk)); + auto* ptr = global->allocate(arena_size); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaReleaseMergePrevious) // NOLINT { - auto sblk = global_arena_->acquire(256); - auto sb2 = global_arena_->acquire(1_KiB); - global_arena_->acquire(512); - global_arena_->release(std::move(sblk)); - global_arena_->release(std::move(sb2)); - auto* ptr = global_arena_->allocate(superblock::minimum_size * 2); + auto sblk = global->acquire(256); + auto sb2 = global->acquire(1_KiB); + global->acquire(512); + global->release(std::move(sblk)); + global->release(std::move(sb2)); + auto* ptr = global->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaReleaseMergePreviousAndNext) // NOLINT { - auto sblk = global_arena_->acquire(256); - auto sb2 = global_arena_->acquire(1_KiB); - auto sb3 = global_arena_->acquire(512); - global_arena_->release(std::move(sblk)); - global_arena_->release(std::move(sb3)); - global_arena_->release(std::move(sb2)); - auto* ptr = global_arena_->allocate(arena_size_); + auto sblk = global->acquire(256); + auto sb2 = global->acquire(1_KiB); + auto sb3 = global->acquire(512); + global->release(std::move(sblk)); + global->release(std::move(sb3)); + global->release(std::move(sb2)); + auto* ptr = global->allocate(arena_size); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaReleaseMultiple) // NOLINT { std::set superblocks{}; - auto sblk = global_arena_->acquire(256); + auto sblk = global->acquire(256); superblocks.insert(std::move(sblk)); - auto sb2 = global_arena_->acquire(1_KiB); + auto sb2 = global->acquire(1_KiB); superblocks.insert(std::move(sb2)); - auto sb3 = global_arena_->acquire(512); + auto sb3 = global->acquire(512); superblocks.insert(std::move(sb3)); - global_arena_->release(superblocks); - auto* ptr = global_arena_->allocate(arena_size_); + global->release(superblocks); + auto* ptr = global->allocate(arena_size); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaAllocate) // NOLINT { - auto* ptr = global_arena_->allocate(superblock::minimum_size * 2); + auto* ptr = global->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaAllocateExtraLarge) // NOLINT { - EXPECT_EQ(global_arena_->allocate(1_PiB), nullptr); - EXPECT_EQ(global_arena_->allocate(1_PiB), nullptr); + EXPECT_EQ(global->allocate(1_PiB), nullptr); + EXPECT_EQ(global->allocate(1_PiB), nullptr); } TEST_F(ArenaTest, GlobalArenaDeallocate) // NOLINT { - auto* ptr = global_arena_->allocate(superblock::minimum_size * 2); + auto* ptr = global->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); - global_arena_->deallocate(ptr, superblock::minimum_size * 2, {}); - ptr = global_arena_->allocate(superblock::minimum_size * 2); + global->deallocate(ptr, superblock::minimum_size * 2, {}); + ptr = global->allocate(superblock::minimum_size * 2); EXPECT_EQ(ptr, fake_address3); } TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp) // NOLINT { - auto* ptr = global_arena_->allocate(superblock::minimum_size + 256); - auto* ptr2 = global_arena_->allocate(superblock::minimum_size + 512); - global_arena_->deallocate(ptr, superblock::minimum_size + 256, {}); - global_arena_->deallocate(ptr2, superblock::minimum_size + 512, {}); - EXPECT_EQ(global_arena_->allocate(arena_size_), fake_address3); + auto* ptr = global->allocate(superblock::minimum_size + 256); + auto* ptr2 = global->allocate(superblock::minimum_size + 512); + global->deallocate(ptr, superblock::minimum_size + 256, {}); + global->deallocate(ptr2, superblock::minimum_size + 512, {}); + EXPECT_EQ(global->allocate(arena_size), fake_address3); } TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT { - auto sblk = global_arena_->acquire(512); + auto sblk = global->acquire(512); auto const blk = sblk.first_fit(512); - global_arena_->release(std::move(sblk)); - global_arena_->deallocate(blk.pointer(), blk.size()); - EXPECT_EQ(global_arena_->allocate(arena_size_), fake_address3); + global->release(std::move(sblk)); + global->deallocate(blk.pointer(), blk.size()); + EXPECT_EQ(global->allocate(arena_size), fake_address3); } /** @@ -408,46 +405,46 @@ TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT TEST_F(ArenaTest, ArenaAllocate) // NOLINT { - EXPECT_EQ(arena_->allocate(superblock::minimum_size), fake_address3); - EXPECT_EQ(arena_->allocate(256), fake_address4); + EXPECT_EQ(per_thread->allocate(superblock::minimum_size), fake_address3); + EXPECT_EQ(per_thread->allocate(256), fake_address4); } TEST_F(ArenaTest, ArenaDeallocate) // NOLINT { - auto* ptr = arena_->allocate(superblock::minimum_size); - arena_->deallocate(ptr, superblock::minimum_size, {}); - auto* ptr2 = arena_->allocate(256); - arena_->deallocate(ptr2, 256, {}); - EXPECT_EQ(arena_->allocate(superblock::minimum_size), fake_address3); + auto* ptr = per_thread->allocate(superblock::minimum_size); + per_thread->deallocate(ptr, superblock::minimum_size, {}); + auto* ptr2 = per_thread->allocate(256); + per_thread->deallocate(ptr2, 256, {}); + EXPECT_EQ(per_thread->allocate(superblock::minimum_size), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergePrevious) // NOLINT { - auto* ptr = arena_->allocate(256); - auto* ptr2 = arena_->allocate(256); - arena_->allocate(256); - arena_->deallocate(ptr, 256, {}); - arena_->deallocate(ptr2, 256, {}); - EXPECT_EQ(arena_->allocate(512), fake_address3); + auto* ptr = per_thread->allocate(256); + auto* ptr2 = per_thread->allocate(256); + per_thread->allocate(256); + per_thread->deallocate(ptr, 256, {}); + per_thread->deallocate(ptr2, 256, {}); + EXPECT_EQ(per_thread->allocate(512), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergeNext) // NOLINT { - auto* ptr = arena_->allocate(256); - auto* ptr2 = arena_->allocate(256); - arena_->allocate(256); - arena_->deallocate(ptr2, 256, {}); - arena_->deallocate(ptr, 256, {}); - EXPECT_EQ(arena_->allocate(512), fake_address3); + auto* ptr = per_thread->allocate(256); + auto* ptr2 = per_thread->allocate(256); + per_thread->allocate(256); + per_thread->deallocate(ptr2, 256, {}); + per_thread->deallocate(ptr, 256, {}); + EXPECT_EQ(per_thread->allocate(512), fake_address3); } TEST_F(ArenaTest, ArenaDeallocateMergePreviousAndNext) // NOLINT { - auto* ptr = arena_->allocate(256); - auto* ptr2 = arena_->allocate(256); - arena_->deallocate(ptr, 256, {}); - arena_->deallocate(ptr2, 256, {}); - EXPECT_EQ(arena_->allocate(2_KiB), fake_address3); + auto* ptr = per_thread->allocate(256); + auto* ptr2 = per_thread->allocate(256); + per_thread->deallocate(ptr, 256, {}); + per_thread->deallocate(ptr2, 256, {}); + EXPECT_EQ(per_thread->allocate(2_KiB), fake_address3); } TEST_F(ArenaTest, ArenaDefragment) // NOLINT @@ -455,14 +452,14 @@ TEST_F(ArenaTest, ArenaDefragment) // NOLINT std::vector pointers; std::size_t num_pointers{4}; for (std::size_t i = 0; i < num_pointers; i++) { - pointers.push_back(arena_->allocate(superblock::minimum_size)); + pointers.push_back(per_thread->allocate(superblock::minimum_size)); } for (auto* ptr : pointers) { - arena_->deallocate(ptr, superblock::minimum_size, {}); + per_thread->deallocate(ptr, superblock::minimum_size, {}); } - EXPECT_EQ(global_arena_->allocate(arena_size_), nullptr); - arena_->defragment(); - EXPECT_EQ(global_arena_->allocate(arena_size_), fake_address3); + EXPECT_EQ(global->allocate(arena_size), nullptr); + per_thread->defragment(); + EXPECT_EQ(global->allocate(arena_size), fake_address3); } /** @@ -476,6 +473,13 @@ TEST_F(ArenaTest, ThrowOnNullUpstream) // NOLINT EXPECT_THROW(construct_nullptr(), rmm::logic_error); } +TEST_F(ArenaTest, SizeSmallerThanSuperblockSize) // NOLINT +{ + auto construct_small = []() { arena_mr mr{rmm::mr::get_current_device_resource(), 256}; }; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto) + EXPECT_THROW(construct_small(), rmm::logic_error); +} + TEST_F(ArenaTest, AllocateNinetyPercent) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) From 96c976b795e45ed680dbd0156ca926333cfa3689 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 11 Jan 2022 17:15:06 -0800 Subject: [PATCH 33/35] clang format --- tests/mr/device/arena_mr_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index c9e9e5e37..3541ffbfc 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -61,8 +61,8 @@ struct ArenaTest : public ::testing::Test { { EXPECT_CALL(mock_mr, allocate(arena_size)).WillOnce(Return(fake_address3)); EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size)); - global = std::make_unique(&mock_mr, arena_size); - per_thread = std::make_unique(*global); + global = std::make_unique(&mock_mr, arena_size); + per_thread = std::make_unique(*global); } std::size_t arena_size{superblock::minimum_size * 4}; From a97565dd9efd6a27eb2d20328c5f3f4d56d1fd32 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 11 Jan 2022 19:02:37 -0800 Subject: [PATCH 34/35] increase test coverage --- .../rmm/mr/device/arena_memory_resource.hpp | 12 -------- include/rmm/mr/device/detail/arena.hpp | 30 ------------------- tests/mr/device/arena_mr_tests.cpp | 6 +++- 3 files changed, 5 insertions(+), 43 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 0fa77b896..f1b4e40c4 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -310,18 +310,6 @@ class arena_memory_resource final : public device_memory_resource { logger_->info("**************************************************"); logger_->info("Global arena:"); global_arena_.dump_memory_log(logger_); - logger_->debug("Per-thread arenas:"); - for (auto const& thread_arena : thread_arenas_) { - logger_->debug(" Thread {}:", thread_arena.first); - thread_arena.second->dump_memory_log(logger_); - } - if (!stream_arenas_.empty()) { - logger_->debug("Per-stream arenas:"); - for (auto const& stream_arena : stream_arenas_) { - logger_->debug(" Stream {}:", static_cast(stream_arena.first)); - stream_arena.second.dump_memory_log(logger_); - } - } logger_->flush(); } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 1dd1fbc6d..c0e5df377 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -876,36 +876,6 @@ class arena { } } - /** - * Dump memory to log. - * - * @param logger the spdlog logger to use - */ - void dump_memory_log(std::shared_ptr const& logger) const - { - std::lock_guard lock(mtx_); - logger->debug(" # superblocks: {}", superblocks_.size()); - if (!superblocks_.empty()) { - logger->debug(" Total size of superblocks: {}", - rmm::detail::bytes{total_memory_size(superblocks_)}); - logger->debug(" Size of largest free block: {}", - rmm::detail::bytes{max_free_size(superblocks_)}); - auto index = 0; - for (auto const& sblk : superblocks_) { - logger->debug( - " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}", - index, - fmt::ptr(sblk.pointer()), - fmt::ptr(sblk.end()), - rmm::detail::bytes{sblk.size()}, - sblk.empty(), - sblk.free_blocks(), - rmm::detail::bytes{sblk.max_free_size()}); - index++; - } - } - } - private: /** * @brief Get an available memory block of at least `size` bytes. diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 3541ffbfc..c7c7f578c 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -277,7 +277,9 @@ TEST_F(ArenaTest, SuperblockCoalesceMergePreviousAndNext) // NOLINT TEST_F(ArenaTest, SuperblockMaxFreeSize) // NOLINT { superblock sblk{fake_address3, superblock::minimum_size}; - sblk.first_fit(superblock::minimum_size / 2); + auto const blk = sblk.first_fit(superblock::minimum_size / 4); + sblk.first_fit(superblock::minimum_size / 4); + sblk.coalesce(blk); EXPECT_EQ(sblk.max_free_size(), superblock::minimum_size / 2); } @@ -394,8 +396,10 @@ TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT { auto sblk = global->acquire(512); auto const blk = sblk.first_fit(512); + auto const blk2 = sblk.first_fit(1024); global->release(std::move(sblk)); global->deallocate(blk.pointer(), blk.size()); + global->deallocate(blk2.pointer(), blk2.size()); EXPECT_EQ(global->allocate(arena_size), fake_address3); } From 5cf9360b68700c8713853146289538eeb7cd7a23 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 11 Jan 2022 19:04:41 -0800 Subject: [PATCH 35/35] clang format --- tests/mr/device/arena_mr_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index c7c7f578c..b86e2457c 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -394,8 +394,8 @@ TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp) // NOLINT TEST_F(ArenaTest, GlobalArenaDeallocateFromOtherArena) // NOLINT { - auto sblk = global->acquire(512); - auto const blk = sblk.first_fit(512); + auto sblk = global->acquire(512); + auto const blk = sblk.first_fit(512); auto const blk2 = sblk.first_fit(1024); global->release(std::move(sblk)); global->deallocate(blk.pointer(), blk.size());