Skip to content

Commit

Permalink
Add option to change the thread scope when copying a container
Browse files Browse the repository at this point in the history
  • Loading branch information
sleeepyjack committed Dec 18, 2023
1 parent e45ae4e commit 58a6c60
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ class open_addressing_ref_impl {

static constexpr auto cg_size = probing_scheme_type::cg_size; ///< Cooperative group size
static constexpr auto window_size =
storage_ref_type::window_size; ///< Number of elements handled per window
storage_ref_type::window_size; ///< Number of elements handled per window
static constexpr auto thread_scope = Scope; ///< CUDA thread scope
static constexpr auto has_payload =
not std::is_same_v<key_type, value_type>; ///< Determines if the container is a key/value or
///< key-only store
Expand Down
10 changes: 6 additions & 4 deletions include/cuco/detail/static_map/static_map_ref.inl
Original file line number Diff line number Diff line change
Expand Up @@ -221,19 +221,21 @@ template <typename Key,
typename ProbingScheme,
typename StorageRef,
typename... Operators>
template <typename CG>
template <typename CG, cuda::thread_scope NewScope>
__device__ constexpr auto
static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::make_copy(
CG const& tile, window_type* const memory_to_use) const noexcept
CG const& tile,
window_type* const memory_to_use,
cuda_thread_scope<NewScope> scope) const noexcept
{
this->impl_.make_copy(tile, memory_to_use);
return static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>{
return static_map_ref<Key, T, NewScope, KeyEqual, ProbingScheme, StorageRef, Operators...>{
cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
cuco::erased_key<Key>{this->erased_key_sentinel()},
this->key_eq(),
this->impl_.probing_scheme(),
{},
scope,
storage_ref_type{this->window_extent(), memory_to_use}};
}

Expand Down
10 changes: 6 additions & 4 deletions include/cuco/detail/static_set/static_set_ref.inl
Original file line number Diff line number Diff line change
Expand Up @@ -183,18 +183,20 @@ template <typename Key,
typename ProbingScheme,
typename StorageRef,
typename... Operators>
template <typename CG>
template <typename CG, cuda::thread_scope NewScope>
__device__ constexpr auto
static_set_ref<Key, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::make_copy(
CG const& tile, window_type* const memory_to_use) const noexcept
CG const& tile,
window_type* const memory_to_use,
cuda_thread_scope<NewScope> scope) const noexcept
{
this->impl_.make_copy(tile, memory_to_use);
return static_set_ref<Key, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>{
return static_set_ref<Key, NewScope, KeyEqual, ProbingScheme, StorageRef, Operators...>{
cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::erased_key<Key>{this->erased_key_sentinel()},
this->key_eq(),
this->impl_.probing_scheme(),
{},
scope,
storage_ref_type{this->window_extent(), memory_to_use}};
}

Expand Down
10 changes: 8 additions & 2 deletions include/cuco/static_map_ref.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class static_map_ref
static constexpr auto cg_size = probing_scheme_type::cg_size; ///< Cooperative group size
static constexpr auto window_size =
storage_ref_type::window_size; ///< Number of elements handled per window
static constexpr auto thread_scope = impl_type::thread_scope; ///< CUDA thread scope

/**
* @brief Constructs static_map_ref.
Expand Down Expand Up @@ -209,18 +210,23 @@ class static_map_ref
* although global memory can be used as well.
*
* @note This function synchronizes the group `tile`.
* @note By-default the thread scope of the copy will be the same as the scope of the parent ref.
*
* @tparam CG The type of the cooperative thread group
* @tparam NewScope The thread scope of the newly created device ref
*
* @param tile The ooperative thread group used to copy the data structure
* @param memory_to_use Array large enough to support `capacity` elements. Object does not take
* the ownership of the memory
* @param scope The thread scope of the newly created device ref
*
* @return Copy of the current device ref
*/
template <typename CG>
template <typename CG, cuda::thread_scope NewScope = thread_scope>
[[nodiscard]] __device__ constexpr auto make_copy(
CG const& tile, window_type* const memory_to_use) const noexcept;
CG const& tile,
window_type* const memory_to_use,
cuda_thread_scope<NewScope> scope = {}) const noexcept;

/**
* @brief Initializes the map storage using the threads in the group `tile`.
Expand Down
10 changes: 8 additions & 2 deletions include/cuco/static_set_ref.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class static_set_ref
static constexpr auto cg_size = probing_scheme_type::cg_size; ///< Cooperative group size
static constexpr auto window_size =
storage_ref_type::window_size; ///< Number of elements handled per window
static constexpr auto thread_scope = impl_type::thread_scope; ///< CUDA thread scope

/**
* @brief Constructs static_set_ref.
Expand Down Expand Up @@ -187,18 +188,23 @@ class static_set_ref
* although global memory can be used as well.
*
* @note This function synchronizes the group `tile`.
* @note By-default the thread scope of the copy will be the same as the scope of the parent ref.
*
* @tparam CG The type of the cooperative thread group
* @tparam NewScope The thread scope of the newly created device ref
*
* @param tile The ooperative thread group used to copy the data structure
* @param memory_to_use Array large enough to support `capacity` elements. Object does not take
* the ownership of the memory
* @param scope The thread scope of the newly created device ref
*
* @return Copy of the current device ref
*/
template <typename CG>
template <typename CG, cuda::thread_scope NewScope = thread_scope>
[[nodiscard]] __device__ constexpr auto make_copy(
CG const& tile, window_type* const memory_to_use) const noexcept;
CG const& tile,
window_type* const memory_to_use,
cuda_thread_scope<NewScope> scope = {}) const noexcept;

/**
* @brief Initializes the set storage using the threads in the group `tile`.
Expand Down

0 comments on commit 58a6c60

Please sign in to comment.