From ceceb7deeb4caf0f911e3a7c853ef0af42b902e6 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Tue, 19 Dec 2023 17:39:58 +0000 Subject: [PATCH] Add static_set shmem test --- tests/CMakeLists.txt | 1 + tests/static_map/shared_memory_test.cu | 1 - tests/static_set/shared_memory_test.cu | 203 +++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 tests/static_set/shared_memory_test.cu diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 916e0ea42..e09efddb3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -65,6 +65,7 @@ ConfigureTest(STATIC_SET_TEST static_set/retrieve_all_test.cu static_set/rehash_test.cu static_set/size_test.cu + static_set/shared_memory_test.cu static_set/unique_sequence_test.cu) ################################################################################################### diff --git a/tests/static_map/shared_memory_test.cu b/tests/static_map/shared_memory_test.cu index ac77ae8b1..d2444c23a 100644 --- a/tests/static_map/shared_memory_test.cu +++ b/tests/static_map/shared_memory_test.cu @@ -181,7 +181,6 @@ __global__ void shared_memory_hash_table_kernel(bool* key_found) using extent_type = cuco::experimental::extent; using storage_ref_type = cuco::experimental::aow_storage_ref; - // CTAD doesn't work for container ref types auto raw_ref = cuco::experimental::static_map_ref{ cuco::empty_key{-1}, cuco::empty_value{-1}, diff --git a/tests/static_set/shared_memory_test.cu b/tests/static_set/shared_memory_test.cu new file mode 100644 index 000000000..34f3b8ec1 --- /dev/null +++ b/tests/static_set/shared_memory_test.cu @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +template +__global__ void shared_memory_test_kernel(Ref* sets, + typename Ref::key_type const* const insterted_keys, + size_t number_of_elements, + bool* const keys_exist, + bool* const keys_correct) +{ + // Each block processes one set + const size_t set_id = blockIdx.x; + const size_t offset = set_id * number_of_elements; + + __shared__ typename Ref::window_type sm_buffer[NumWindows]; + + auto g = cuco::test::cg::this_thread_block(); + auto insert_ref = sets[set_id].make_copy(g, sm_buffer, cuco::experimental::thread_scope_block); + auto find_ref = std::move(insert_ref).with(cuco::experimental::op::find); + + for (int i = g.thread_rank(); i < number_of_elements; i += g.size()) { + auto found_it = find_ref.find(insterted_keys[offset + i]); + + if (found_it != find_ref.end()) { + keys_exist[offset + i] = true; + if (*found_it == insterted_keys[offset + i]) { + keys_correct[offset + i] = true; + } else { + keys_correct[offset + i] = false; + } + } else { + keys_exist[offset + i] = false; + keys_correct[offset + i] = true; + } + } +} + +TEMPLATE_TEST_CASE_SIG("Shared memory static set", "", ((typename Key), Key), (int32_t), (int64_t)) +{ + constexpr std::size_t number_of_sets = 1000; + constexpr std::size_t elements_in_set = 500; + constexpr std::size_t set_capacity = 2 * elements_in_set; + + using extent_type = cuco::experimental::extent; + using set_type = cuco::experimental::static_set< + Key, + extent_type, + cuda::thread_scope_device, + thrust::equal_to, + cuco::experimental::linear_probing<1, cuco::default_hash_function>>; + + // one array for all sets, first elements_in_set element belong to set 0, second to set 1 and so + // on + thrust::device_vector d_keys(number_of_sets * elements_in_set); + + thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); + + // using std::unique_ptr because static_set does not have copy/move constructor/assignment + // operator yet + std::vector> sets; + for (std::size_t set_id = 0; set_id < number_of_sets; ++set_id) { + sets.push_back(std::make_unique(extent_type{}, cuco::empty_key{-1})); + } + + thrust::device_vector d_keys_exist(number_of_sets * elements_in_set); + thrust::device_vector d_keys_correct(number_of_sets * elements_in_set); + + using ref_type = typename set_type::ref_type; + + SECTION("Keys are all found after insertion.") + { + std::vector h_refs; + for (std::size_t set_id = 0; set_id < number_of_sets; ++set_id) { + const std::size_t offset = set_id * elements_in_set; + + set_type* set = sets[set_id].get(); + set->insert(d_keys.begin() + offset, d_keys.begin() + offset + elements_in_set); + h_refs.push_back(set->ref(cuco::experimental::op::insert)); + } + thrust::device_vector d_refs(h_refs); + + auto constexpr num_windows = cuco::experimental::make_window_extent(extent_type{}); + + shared_memory_test_kernel + <<>>(d_refs.data().get(), + d_keys.data().get(), + elements_in_set, + d_keys_exist.data().get(), + d_keys_correct.data().get()); + + REQUIRE(d_keys_exist.size() == d_keys_correct.size()); + auto zip = + thrust::make_zip_iterator(thrust::make_tuple(d_keys_exist.begin(), d_keys_correct.begin())); + + REQUIRE(cuco::test::all_of(zip, + zip + d_keys_exist.size(), + cuda::proclaim_return_type([] __device__(auto const& z) { + return thrust::get<0>(z) and thrust::get<1>(z); + }))); + } + + SECTION("No key is found before insertion.") + { + std::vector h_refs; + for (std::size_t set_id = 0; set_id < number_of_sets; ++set_id) { + h_refs.push_back(sets[set_id].get()->ref(cuco::experimental::op::insert)); + } + thrust::device_vector d_refs(h_refs); + + auto constexpr num_windows = cuco::experimental::make_window_extent(extent_type{}); + + shared_memory_test_kernel + <<>>(d_refs.data().get(), + d_keys.data().get(), + elements_in_set, + d_keys_exist.data().get(), + d_keys_correct.data().get()); + + REQUIRE(cuco::test::none_of(d_keys_exist.begin(), d_keys_exist.end(), thrust::identity{})); + } +} + +auto constexpr cg_size = 1; +auto constexpr window_size = 1; + +template +__global__ void shared_memory_hash_set_kernel(bool* key_found) +{ + using Key = int32_t; + using slot_type = Key; + + __shared__ cuco::experimental::window set[NumWindows]; + + using extent_type = cuco::experimental::extent; + using storage_ref_type = cuco::experimental::aow_storage_ref; + + auto raw_ref = cuco::experimental::static_set_ref{ + cuco::empty_key{-1}, + thrust::equal_to{}, + cuco::experimental::linear_probing>{}, + cuco::experimental::thread_scope_block, + storage_ref_type{extent_type{}, set}}; + + auto const block = cooperative_groups::this_thread_block(); + raw_ref.initialize(block); + + auto const index = threadIdx.x + blockIdx.x * blockDim.x; + auto const rank = block.thread_rank(); + + // insert {thread_rank, thread_rank} for each thread in thread-block + auto insert_ref = std::move(raw_ref).with(cuco::experimental::op::insert); + insert_ref.insert(rank); + block.sync(); + + auto find_ref = std::move(insert_ref).with(cuco::experimental::op::find); + auto const retrieved_it = find_ref.find(rank); + block.sync(); + + if (retrieved_it != find_ref.end() && *retrieved_it == rank) { key_found[index] = true; } +} + +TEST_CASE("static set shared memory slots.", "") +{ + constexpr std::size_t N = 256; + auto constexpr num_windows = cuco::experimental::make_window_extent( + cuco::experimental::extent{}); + + thrust::device_vector key_found(N, false); + shared_memory_hash_set_kernel<<<8, 32>>>(key_found.data().get()); + CUCO_CUDA_TRY(cudaDeviceSynchronize()); + + REQUIRE(cuco::test::all_of(key_found.begin(), key_found.end(), thrust::identity{})); +}