From c7d52a257f6b705a77bdad4af9afa4c6b3cd30e2 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 20 Nov 2023 15:43:26 -0800 Subject: [PATCH 1/2] Migrate tests to excercise the new map (#393) This PR migrates static map tests to test the new static map: - Large type tests are not migrated since the new map doesn't support keys larger than 8 bytes - Shared memory tests require related functions to be added into ref code thus would be in a separate PR --- tests/static_map/custom_type_test.cu | 59 ++++--- tests/static_map/duplicate_keys_test.cu | 59 +++++-- tests/static_map/heterogeneous_lookup_test.cu | 7 +- tests/static_map/insert_and_find_test.cu | 9 +- tests/static_map/insert_or_assign_test.cu | 15 +- tests/static_map/key_sentinel_test.cu | 53 ++++--- tests/static_map/shared_memory_test.cu | 15 +- tests/static_map/stream_test.cu | 46 +++--- tests/static_map/unique_sequence_test.cu | 148 +++--------------- 9 files changed, 187 insertions(+), 224 deletions(-) diff --git a/tests/static_map/custom_type_test.cu b/tests/static_map/custom_type_test.cu index e23216ca3..536c83194 100644 --- a/tests/static_map/custom_type_test.cu +++ b/tests/static_map/custom_type_test.cu @@ -27,6 +27,8 @@ #include +#include + #include // User-defined key type @@ -123,17 +125,18 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", thrust::counting_iterator(0), thrust::counting_iterator(num), insert_keys.begin(), - [] __device__(auto i) { return Key{i}; }); + cuda::proclaim_return_type([] __device__(auto i) { return Key{i}; })); thrust::transform(thrust::device, thrust::counting_iterator(0), thrust::counting_iterator(num), insert_values.begin(), - [] __device__(auto i) { return Value{i}; }); + cuda::proclaim_return_type([] __device__(auto i) { return Value{i}; })); - auto insert_pairs = - thrust::make_transform_iterator(thrust::make_counting_iterator(0), - [] __device__(auto i) { return cuco::pair(i, i); }); + auto insert_pairs = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type>( + [] __device__(auto i) { return cuco::pair(i, i); })); SECTION("All inserted keys-value pairs should be correctly recovered during find") { @@ -151,9 +154,9 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", REQUIRE(cuco::test::equal(insert_values.begin(), insert_values.end(), found_values.begin(), - [] __device__(Value lhs, Value rhs) { + cuda::proclaim_return_type([] __device__(Value lhs, Value rhs) { return std::tie(lhs.f, lhs.s) == std::tie(rhs.f, rhs.s); - })); + }))); } SECTION("All inserted keys-value pairs should be contained") @@ -175,7 +178,7 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", insert_pairs, insert_pairs + num, thrust::counting_iterator(0), - [] __device__(auto const& key) { return (key % 2) == 0; }, + cuda::proclaim_return_type([] __device__(auto const& key) { return (key % 2) == 0; }), hash_custom_key{}, custom_key_equals{}); @@ -187,12 +190,13 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", hash_custom_key{}, custom_key_equals{}); - REQUIRE(cuco::test::equal(contained.begin(), - contained.end(), - thrust::counting_iterator(0), - [] __device__(auto const& idx_contained, auto const& idx) { - return ((idx % 2) == 0) == idx_contained; - })); + REQUIRE(cuco::test::equal( + contained.begin(), + contained.end(), + thrust::counting_iterator(0), + cuda::proclaim_return_type([] __device__(auto const& idx_contained, auto const& idx) { + return ((idx % 2) == 0) == idx_contained; + }))); } SECTION("Non-inserted keys-value pairs should not be contained") @@ -212,9 +216,11 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", map.insert(insert_pairs, insert_pairs + num, hash_custom_key{}, custom_key_equals{}); auto view = map.get_device_view(); REQUIRE(cuco::test::all_of( - insert_pairs, insert_pairs + num, [view] __device__(cuco::pair const& pair) { + insert_pairs, + insert_pairs + num, + cuda::proclaim_return_type([view] __device__(cuco::pair const& pair) { return view.contains(pair.first, hash_custom_key{}, custom_key_equals{}); - })); + }))); } SECTION("Inserting unique keys should return insert success.") @@ -222,9 +228,11 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", auto m_view = map.get_device_mutable_view(); REQUIRE(cuco::test::all_of(insert_pairs, insert_pairs + num, - [m_view] __device__(cuco::pair const& pair) mutable { - return m_view.insert(pair, hash_custom_key{}, custom_key_equals{}); - })); + cuda::proclaim_return_type( + [m_view] __device__(cuco::pair const& pair) mutable { + return m_view.insert( + pair, hash_custom_key{}, custom_key_equals{}); + }))); } SECTION("Cannot find any key in an empty hash map") @@ -235,18 +243,21 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", REQUIRE(cuco::test::all_of( insert_pairs, insert_pairs + num, - [view] __device__(cuco::pair const& pair) mutable { - return view.find(pair.first, hash_custom_key{}, custom_key_equals{}) == view.end(); - })); + cuda::proclaim_return_type( + [view] __device__(cuco::pair const& pair) mutable { + return view.find(pair.first, hash_custom_key{}, custom_key_equals{}) == view.end(); + }))); } SECTION("const view") { auto const view = map.get_device_view(); REQUIRE(cuco::test::all_of( - insert_pairs, insert_pairs + num, [view] __device__(cuco::pair const& pair) { + insert_pairs, + insert_pairs + num, + cuda::proclaim_return_type([view] __device__(cuco::pair const& pair) { return view.find(pair.first, hash_custom_key{}, custom_key_equals{}) == view.end(); - })); + }))); } } } diff --git a/tests/static_map/duplicate_keys_test.cu b/tests/static_map/duplicate_keys_test.cu index 5620fa4e9..e17ec3af8 100644 --- a/tests/static_map/duplicate_keys_test.cu +++ b/tests/static_map/duplicate_keys_test.cu @@ -29,16 +29,52 @@ #include -TEMPLATE_TEST_CASE_SIG("Duplicate keys", - "", - ((typename Key, typename Value), Key, Value), - (int32_t, int32_t), - (int32_t, int64_t), - (int64_t, int32_t), - (int64_t, int64_t)) +#include + +using size_type = std::size_t; + +TEMPLATE_TEST_CASE_SIG( + "static_map duplicate keys", + "", + ((typename Key, typename Value, cuco::test::probe_sequence Probe, int CGSize), + Key, + Value, + Probe, + CGSize), + (int32_t, int32_t, cuco::test::probe_sequence::double_hashing, 1), + (int32_t, int64_t, cuco::test::probe_sequence::double_hashing, 1), + (int32_t, int32_t, cuco::test::probe_sequence::double_hashing, 2), + (int32_t, int64_t, cuco::test::probe_sequence::double_hashing, 2), + (int64_t, int32_t, cuco::test::probe_sequence::double_hashing, 1), + (int64_t, int64_t, cuco::test::probe_sequence::double_hashing, 1), + (int64_t, int32_t, cuco::test::probe_sequence::double_hashing, 2), + (int64_t, int64_t, cuco::test::probe_sequence::double_hashing, 2), + (int32_t, int32_t, cuco::test::probe_sequence::linear_probing, 1), + (int32_t, int64_t, cuco::test::probe_sequence::linear_probing, 1), + (int32_t, int32_t, cuco::test::probe_sequence::linear_probing, 2), + (int32_t, int64_t, cuco::test::probe_sequence::linear_probing, 2), + (int64_t, int32_t, cuco::test::probe_sequence::linear_probing, 1), + (int64_t, int64_t, cuco::test::probe_sequence::linear_probing, 1), + (int64_t, int32_t, cuco::test::probe_sequence::linear_probing, 2), + (int64_t, int64_t, cuco::test::probe_sequence::linear_probing, 2)) { - constexpr std::size_t num_keys{500'000}; - cuco::static_map map{ + constexpr size_type num_keys{500'000}; + + using probe = + std::conditional_t>, + cuco::experimental::double_hashing, + cuco::murmurhash3_32>>; + + auto map = cuco::experimental::static_map, + cuda::thread_scope_device, + thrust::equal_to, + probe, + cuco::cuda_allocator, + cuco::experimental::storage<2>>{ num_keys * 2, cuco::empty_key{-1}, cuco::empty_value{-1}}; thrust::device_vector d_keys(num_keys); @@ -49,7 +85,8 @@ TEMPLATE_TEST_CASE_SIG("Duplicate keys", auto pairs_begin = thrust::make_transform_iterator( thrust::make_counting_iterator(0), - [] __device__(auto i) { return cuco::pair(i / 2, i / 2); }); + cuda::proclaim_return_type>( + [] __device__(auto i) { return cuco::pair(i / 2, i / 2); })); thrust::device_vector d_results(num_keys); thrust::device_vector d_contained(num_keys); @@ -68,7 +105,7 @@ TEMPLATE_TEST_CASE_SIG("Duplicate keys", map.insert(pairs_begin, pairs_begin + num_keys); - auto const num_entries = map.get_size(); + auto const num_entries = map.size(); REQUIRE(num_entries == gold); auto [key_out_end, value_out_end] = diff --git a/tests/static_map/heterogeneous_lookup_test.cu b/tests/static_map/heterogeneous_lookup_test.cu index ed1ace9bd..f386d96a5 100644 --- a/tests/static_map/heterogeneous_lookup_test.cu +++ b/tests/static_map/heterogeneous_lookup_test.cu @@ -27,6 +27,8 @@ #include +#include + #include // insert key type @@ -115,8 +117,9 @@ TEMPLATE_TEST_CASE_SIG("Heterogeneous lookup", auto insert_pairs = thrust::make_transform_iterator( thrust::counting_iterator(0), [] __device__(auto i) { return cuco::pair(i, i); }); - auto probe_keys = thrust::make_transform_iterator(thrust::counting_iterator(0), - [] __device__(auto i) { return ProbeKey(i); }); + auto probe_keys = thrust::make_transform_iterator( + thrust::counting_iterator(0), + cuda::proclaim_return_type([] __device__(auto i) { return ProbeKey{i}; })); SECTION("All inserted keys-value pairs should be contained") { diff --git a/tests/static_map/insert_and_find_test.cu b/tests/static_map/insert_and_find_test.cu index 3afc27b9a..9941e46a6 100644 --- a/tests/static_map/insert_and_find_test.cu +++ b/tests/static_map/insert_and_find_test.cu @@ -26,6 +26,8 @@ #include +#include + static constexpr int Iters = 10'000; template @@ -129,7 +131,8 @@ TEMPLATE_TEST_CASE_SIG( thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); map.find(d_keys.begin(), d_keys.end(), d_values.begin()); - REQUIRE(cuco::test::all_of(d_values.begin(), d_values.end(), [] __device__(Value v) { - return v == (Blocks * Threads) / CGSize; - })); + REQUIRE(cuco::test::all_of( + d_values.begin(), d_values.end(), cuda::proclaim_return_type([] __device__(Value v) { + return v == (Blocks * Threads) / CGSize; + }))); } diff --git a/tests/static_map/insert_or_assign_test.cu b/tests/static_map/insert_or_assign_test.cu index 90c6553ce..4bca776f7 100644 --- a/tests/static_map/insert_or_assign_test.cu +++ b/tests/static_map/insert_or_assign_test.cu @@ -27,6 +27,8 @@ #include +#include + using size_type = std::size_t; template @@ -36,9 +38,11 @@ __inline__ void test_insert_or_assign(Map& map, size_type num_keys) using Value = typename Map::mapped_type; // Insert pairs - auto pairs_begin = - thrust::make_transform_iterator(thrust::counting_iterator(0), - [] __device__(auto i) { return cuco::pair(i, i); }); + auto pairs_begin = thrust::make_transform_iterator( + thrust::counting_iterator(0), + cuda::proclaim_return_type>([] __device__(auto i) { + return cuco::pair{i, i}; + })); auto const initial_size = map.insert(pairs_begin, pairs_begin + num_keys); REQUIRE(initial_size == num_keys); // all keys should be inserted @@ -58,8 +62,9 @@ __inline__ void test_insert_or_assign(Map& map, size_type num_keys) thrust::device_vector d_values(num_keys); map.retrieve_all(d_keys.begin(), d_values.begin()); - auto gold_values_begin = thrust::make_transform_iterator(thrust::counting_iterator(0), - [] __device__(auto i) { return i * 2; }); + auto gold_values_begin = thrust::make_transform_iterator( + thrust::counting_iterator(0), + cuda::proclaim_return_type([] __device__(auto i) { return i * 2; })); thrust::sort(thrust::device, d_values.begin(), d_values.end()); REQUIRE(cuco::test::equal( diff --git a/tests/static_map/key_sentinel_test.cu b/tests/static_map/key_sentinel_test.cu index 74a1badd1..dceaf6ec4 100644 --- a/tests/static_map/key_sentinel_test.cu +++ b/tests/static_map/key_sentinel_test.cu @@ -24,12 +24,14 @@ #include +#include + #define SIZE 10 __device__ int A[SIZE]; template struct custom_equals { - __device__ bool operator()(T lhs, T rhs) { return A[lhs] == A[rhs]; } + __device__ bool operator()(T lhs, T rhs) const { return A[lhs] == A[rhs]; } }; TEMPLATE_TEST_CASE_SIG( @@ -39,11 +41,15 @@ TEMPLATE_TEST_CASE_SIG( using Value = T; constexpr std::size_t num_keys{SIZE}; - cuco::static_map map{ - SIZE * 2, cuco::empty_key{-1}, cuco::empty_value{-1}}; + auto map = cuco::experimental::static_map{ + SIZE * 2, + cuco::empty_key{-1}, + cuco::empty_value{-1}, + custom_equals{}, + cuco::experimental::linear_probing<1, cuco::default_hash_function>{}}; - auto m_view = map.get_device_mutable_view(); - auto view = map.get_device_view(); + auto insert_ref = map.ref(cuco::experimental::op::insert); + auto find_ref = map.ref(cuco::experimental::op::find); int h_A[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -51,34 +57,35 @@ TEMPLATE_TEST_CASE_SIG( } CUCO_CUDA_TRY(cudaMemcpyToSymbol(A, h_A, SIZE * sizeof(int))); - auto pairs_begin = - thrust::make_transform_iterator(thrust::make_counting_iterator(0), - [] __device__(auto i) { return cuco::pair(i, i); }); + auto pairs_begin = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type>( + [] __device__(auto i) { return cuco::pair(i, i); })); SECTION( "Tests of non-CG insert: The custom `key_equal` can never be used to compare against sentinel") { - REQUIRE(cuco::test::all_of(pairs_begin, - pairs_begin + num_keys, - [m_view] __device__(cuco::pair const& pair) mutable { - return m_view.insert( - pair, cuco::default_hash_function{}, custom_equals{}); - })); + REQUIRE( + cuco::test::all_of(pairs_begin, + pairs_begin + num_keys, + cuda::proclaim_return_type( + [insert_ref] __device__(cuco::pair const& pair) mutable { + return insert_ref.insert(pair); + }))); } SECTION( "Tests of CG insert: The custom `key_equal` can never be used to compare against sentinel") { - map.insert(pairs_begin, - pairs_begin + num_keys, - cuco::default_hash_function{}, - custom_equals{}); + map.insert(pairs_begin, pairs_begin + num_keys); // All keys inserted via custom `key_equal` should be found REQUIRE(cuco::test::all_of( - pairs_begin, pairs_begin + num_keys, [view] __device__(cuco::pair const& pair) { - auto const found = view.find(pair.first); - return (found != view.end()) and - (found->first.load() == pair.first and found->second.load() == pair.second); - })); + pairs_begin, + pairs_begin + num_keys, + cuda::proclaim_return_type([find_ref] __device__(cuco::pair const& pair) { + auto const found = find_ref.find(pair.first); + return (found != find_ref.end()) and + (found->first == pair.first and found->second == pair.second); + }))); } } diff --git a/tests/static_map/shared_memory_test.cu b/tests/static_map/shared_memory_test.cu index 444f1c7e7..70e2def8d 100644 --- a/tests/static_map/shared_memory_test.cu +++ b/tests/static_map/shared_memory_test.cu @@ -27,6 +27,8 @@ #include +#include + #include template @@ -74,9 +76,8 @@ TEMPLATE_TEST_CASE_SIG("Shared memory static map", (int64_t, int32_t), (int64_t, int64_t)) { - using MapType = cuco::static_map; - using DeviceViewType = typename MapType::device_view; - using DeviceViewIteratorType = typename DeviceViewType::iterator; + using MapType = cuco::static_map; + using DeviceViewType = typename MapType::device_view; constexpr std::size_t number_of_maps = 1000; constexpr std::size_t elements_in_map = 500; @@ -127,9 +128,11 @@ TEMPLATE_TEST_CASE_SIG("Shared memory static map", auto zip = thrust::make_zip_iterator( thrust::make_tuple(d_keys_exist.begin(), d_keys_and_values_correct.begin())); - REQUIRE(cuco::test::all_of(zip, zip + d_keys_exist.size(), [] __device__(auto const& z) { - return thrust::get<0>(z) and thrust::get<1>(z); - })); + REQUIRE(cuco::test::all_of(zip, + zip + d_keys_exist.size(), + cuda::proclaim_return_type([] __device__(auto const& z) { + return thrust::get<0>(z) and thrust::get<1>(z); + }))); } SECTION("No key is found before insertion.") diff --git a/tests/static_map/stream_test.cu b/tests/static_map/stream_test.cu index 6121cbd62..fe1b2ac65 100644 --- a/tests/static_map/stream_test.cu +++ b/tests/static_map/stream_test.cu @@ -29,7 +29,9 @@ #include -TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream", +#include + +TEMPLATE_TEST_CASE_SIG("static_map: unique sequence of keys on given stream", "", ((typename Key, typename Value), Key, Value), (int32_t, int32_t), @@ -41,11 +43,14 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream", CUCO_CUDA_TRY(cudaStreamCreate(&stream)); constexpr std::size_t num_keys{500'000}; - cuco::static_map map{1'000'000, - cuco::empty_key{-1}, - cuco::empty_value{-1}, - cuco::cuda_allocator{}, - stream}; + auto map = cuco::experimental::static_map{ + num_keys * 2, + cuco::empty_key{-1}, + cuco::empty_value{-1}, + thrust::equal_to{}, + cuco::experimental::linear_probing<1, cuco::default_hash_function>{}, + cuco::cuda_allocator{}, + stream}; thrust::device_vector d_keys(num_keys); thrust::device_vector d_values(num_keys); @@ -53,35 +58,34 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream", thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); thrust::sequence(thrust::device, d_values.begin(), d_values.end()); - auto pairs_begin = - thrust::make_transform_iterator(thrust::make_counting_iterator(0), - [] __device__(auto i) { return cuco::pair(i, i); }); - - auto hash_fn = cuco::default_hash_function{}; - auto equal_fn = thrust::equal_to{}; + auto pairs_begin = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type>( + [] __device__(auto i) { return cuco::pair(i, i); })); // bulk function test cases SECTION("All inserted keys-value pairs should be correctly recovered during find") { thrust::device_vector d_results(num_keys); - map.insert(pairs_begin, pairs_begin + num_keys, hash_fn, equal_fn, stream); - map.find(d_keys.begin(), d_keys.end(), d_results.begin(), hash_fn, equal_fn, stream); + map.insert(pairs_begin, pairs_begin + num_keys, stream); + map.find(d_keys.begin(), d_keys.end(), d_results.begin(), stream); auto zip = thrust::make_zip_iterator(thrust::make_tuple(d_results.begin(), d_values.begin())); - REQUIRE(cuco::test::all_of( - zip, - zip + num_keys, - [] __device__(auto const& p) { return thrust::get<0>(p) == thrust::get<1>(p); }, - stream)); + REQUIRE(cuco::test::all_of(zip, + zip + num_keys, + cuda::proclaim_return_type([] __device__(auto const& p) { + return thrust::get<0>(p) == thrust::get<1>(p); + }), + stream)); } SECTION("All inserted keys-value pairs should be contained") { thrust::device_vector d_contained(num_keys); - map.insert(pairs_begin, pairs_begin + num_keys, hash_fn, equal_fn, stream); - map.contains(d_keys.begin(), d_keys.end(), d_contained.begin(), hash_fn, equal_fn, stream); + map.insert(pairs_begin, pairs_begin + num_keys, stream); + map.contains(d_keys.begin(), d_keys.end(), d_contained.begin(), stream); REQUIRE(cuco::test::all_of(d_contained.begin(), d_contained.end(), thrust::identity{}, stream)); } diff --git a/tests/static_map/unique_sequence_test.cu b/tests/static_map/unique_sequence_test.cu index 6a0165cc2..69fa69fb0 100644 --- a/tests/static_map/unique_sequence_test.cu +++ b/tests/static_map/unique_sequence_test.cu @@ -31,122 +31,7 @@ #include -TEMPLATE_TEST_CASE_SIG("Unique sequence of keys", - "", - ((typename Key, typename Value), Key, Value), - (int32_t, int32_t), - (int32_t, int64_t), - (int64_t, int32_t), - (int64_t, int64_t)) -{ - constexpr std::size_t num_keys{500'000}; - cuco::static_map map{ - 1'000'000, cuco::empty_key{-1}, cuco::empty_value{-1}}; - - auto m_view = map.get_device_mutable_view(); - auto view = map.get_device_view(); - - thrust::device_vector d_keys(num_keys); - thrust::device_vector d_values(num_keys); - - thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); - thrust::sequence(thrust::device, d_values.begin(), d_values.end()); - - auto pairs_begin = - thrust::make_transform_iterator(thrust::make_counting_iterator(0), - [] __device__(auto i) { return cuco::pair(i, i); }); - - thrust::device_vector d_results(num_keys); - thrust::device_vector d_contained(num_keys); - - // bulk function test cases - SECTION("All inserted keys-value pairs should be correctly recovered during find") - { - map.insert(pairs_begin, pairs_begin + num_keys); - map.find(d_keys.begin(), d_keys.end(), d_results.begin()); - auto zip = thrust::make_zip_iterator(thrust::make_tuple(d_results.begin(), d_values.begin())); - - REQUIRE(cuco::test::all_of(zip, zip + num_keys, [] __device__(auto const& p) { - return thrust::get<0>(p) == thrust::get<1>(p); - })); - } - - SECTION("All inserted keys-value pairs should be contained") - { - map.insert(pairs_begin, pairs_begin + num_keys); - map.contains(d_keys.begin(), d_keys.end(), d_contained.begin()); - - REQUIRE(cuco::test::all_of(d_contained.begin(), d_contained.end(), thrust::identity{})); - } - - SECTION("Non-inserted keys-value pairs should not be contained") - { - map.contains(d_keys.begin(), d_keys.end(), d_contained.begin()); - - REQUIRE(cuco::test::none_of(d_contained.begin(), d_contained.end(), thrust::identity{})); - } - - SECTION("Inserting unique keys should return insert success.") - { - REQUIRE(cuco::test::all_of(pairs_begin, - pairs_begin + num_keys, - [m_view] __device__(cuco::pair const& pair) mutable { - return m_view.insert(pair); - })); - } - - SECTION("Cannot find any key in an empty hash map with non-const view") - { - SECTION("non-const view") - { - REQUIRE(cuco::test::all_of(pairs_begin, - pairs_begin + num_keys, - [view] __device__(cuco::pair const& pair) mutable { - return view.find(pair.first) == view.end(); - })); - } - SECTION("const view") - { - REQUIRE(cuco::test::all_of( - pairs_begin, pairs_begin + num_keys, [view] __device__(cuco::pair const& pair) { - return view.find(pair.first) == view.end(); - })); - } - } - - SECTION("Keys are all found after inserting many keys.") - { - // Bulk insert keys - thrust::for_each( - thrust::device, - pairs_begin, - pairs_begin + num_keys, - [m_view] __device__(cuco::pair const& pair) mutable { m_view.insert(pair); }); - - SECTION("non-const view") - { - // All keys should be found - REQUIRE(cuco::test::all_of(pairs_begin, - pairs_begin + num_keys, - [view] __device__(cuco::pair const& pair) mutable { - auto const found = view.find(pair.first); - return (found != view.end()) and - (found->first.load() == pair.first and - found->second.load() == pair.second); - })); - } - SECTION("const view") - { - // All keys should be found - REQUIRE(cuco::test::all_of( - pairs_begin, pairs_begin + num_keys, [view] __device__(cuco::pair const& pair) { - auto const found = view.find(pair.first); - return (found != view.end()) and - (found->first.load() == pair.first and found->second.load() == pair.second); - })); - } - } -} +#include using size_type = int32_t; @@ -160,14 +45,18 @@ __inline__ void test_unique_sequence(Map& map, size_type num_keys) thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); - auto keys_begin = d_keys.begin(); - auto pairs_begin = - thrust::make_transform_iterator(thrust::make_counting_iterator(0), - [] __device__(auto i) { return cuco::pair(i, i); }); + auto keys_begin = d_keys.begin(); + auto pairs_begin = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type>([] __device__(auto i) { + return cuco::pair{i, i}; + })); thrust::device_vector d_contained(num_keys); - auto zip_equal = [] __device__(auto const& p) { return thrust::get<0>(p) == thrust::get<1>(p); }; - auto is_even = [] __device__(auto const& i) { return i % 2 == 0; }; + auto zip_equal = cuda::proclaim_return_type( + [] __device__(auto const& p) { return thrust::get<0>(p) == thrust::get<1>(p); }); + auto is_even = + cuda::proclaim_return_type([] __device__(auto const& i) { return i % 2 == 0; }); SECTION("Non-inserted keys should not be contained.") { @@ -196,12 +85,13 @@ __inline__ void test_unique_sequence(Map& map, size_type num_keys) REQUIRE(map.size() == num_keys / 2); map.contains(keys_begin, keys_begin + num_keys, d_contained.begin()); - REQUIRE(cuco::test::equal(d_contained.begin(), - d_contained.end(), - thrust::counting_iterator(0), - [] __device__(auto const& idx_contained, auto const& idx) { - return ((idx % 2) == 0) == idx_contained; - })); + REQUIRE(cuco::test::equal( + d_contained.begin(), + d_contained.end(), + thrust::counting_iterator(0), + cuda::proclaim_return_type([] __device__(auto const& idx_contained, auto const& idx) { + return ((idx % 2) == 0) == idx_contained; + }))); } map.insert(pairs_begin, pairs_begin + num_keys); @@ -253,7 +143,7 @@ __inline__ void test_unique_sequence(Map& map, size_type num_keys) } TEMPLATE_TEST_CASE_SIG( - "Unique sequence", + "static_map: unique sequence", "", ((typename Key, typename Value, cuco::test::probe_sequence Probe, int CGSize), Key, From e41bf459a8ac61f7ac7f9fbb8a6666fd94c5a952 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20J=C3=BCnger?= <2955913+sleeepyjack@users.noreply.github.com> Date: Tue, 21 Nov 2023 02:09:59 +0100 Subject: [PATCH 2/2] Rework build script (#395) This PR updates the `ci/build.sh` script to be usable both interactively and in CI. --- .github/workflows/dispatch-build-and-test.yml | 4 +- README.md | 21 +- ci/build.sh | 231 +++++++++++++----- 3 files changed, 182 insertions(+), 74 deletions(-) diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml index 9f2e083ed..ab7d73619 100644 --- a/.github/workflows/dispatch-build-and-test.yml +++ b/.github/workflows/dispatch-build-and-test.yml @@ -35,7 +35,7 @@ jobs: with: cpu: ${{ matrix.cpu }} test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} - build_script: "./ci/build.sh ${{matrix.compiler.exe}} ${{matrix.std}} ${{matrix.gpu_build_archs}}" - test_script: "./ci/test.sh ${{matrix.compiler.exe}} ${{matrix.std}} ${{matrix.gpu_build_archs}}" + build_script: "./ci/build.sh --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}" + test_script: "./ci/test.sh --tests --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}" container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') }} diff --git a/README.md b/README.md index 93ac04027..163e43f2f 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,7 @@ This will take care of downloading `cuCollections` from GitHub and making the he `cuCollections` depends on the following libraries: -- [libcu++](https://github.com/NVIDIA/libcudacxx) -- [CUB](https://github.com/thrust/cub) +- [CUDA C++ Core Libraries (CCCL)](https://github.com/NVIDIA/cccl) No action is required from the user to satisfy these dependencies. `cuCollections`'s CMake script is configured to first search the system for these libraries, and if they are not found, to automatically fetch them from GitHub. @@ -75,14 +74,26 @@ To build the tests, benchmarks, and examples: cd $CUCO_ROOT mkdir -p build cd build -cmake .. -make +cmake .. # configure +make # build +ctest --test-dir tests # run tests ``` Binaries will be built into: - `build/tests/` -- `build/gbenchmarks/` +- `build/benchmarks/` - `build/examples/` +### Build Script: + +Alternatively, you can use the build script located at `ci/build.sh`. Calling this script with no arguments will trigger a full build which will be located at `build/local`. + +```bash +cd $CUCO_ROOT +ci/build.sh # configure and build +ctest --test-dir build/local/tests # run tests +``` + +For a comprehensive list of all available options along with descriptions and examples, you can use the option `ci/build.sh -h`. ## Code Formatting By default, `cuCollections` uses [`pre-commit.ci`](https://pre-commit.ci/) along with [`mirrors-clang-format`](https://github.com/pre-commit/mirrors-clang-format) to automatically format the C++/CUDA files in a pull request. diff --git a/ci/build.sh b/ci/build.sh index eb31d9382..2d8eb9ece 100755 --- a/ci/build.sh +++ b/ci/build.sh @@ -16,108 +16,205 @@ set -eo pipefail +ORIGINAL_DIR=$(pwd) + +resolve_path() { + local input_path=$1 + # Check if the input is an absolute path + if [[ "$input_path" = /* ]]; then + echo "$input_path" + else + # Treat as a relative path or executable name + # Check if it's in the PATH + if command -v "$input_path" >/dev/null 2>&1; then + echo "$input_path" + else + echo "$ORIGINAL_DIR/$input_path" + fi + fi +} + # Ensure the script is being executed in its containing directory cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"; # Script defaults -CUDA_COMPILER=nvcc +BUILD_TESTS=${BUILD_TESTS:-OFF} +BUILD_EXAMPLES=${BUILD_EXAMPLES:-OFF} +BUILD_BENCHMARKS=${BUILD_BENCHMARKS:-OFF} +CLEAN_BUILD=0 # Re-use existing artifacts by-default +BUILD_PREFIX=../build # /build +BUILD_INFIX=local # /build/local +DEBUG_BUILD=0 # Default build type is Release +PARALLEL_LEVEL=${PARALLEL_LEVEL:-$(nproc)} # defaults to number of cores in the system +CUDA_COMPILER=${CUDACXX:-nvcc} # $CUDACXX if set, otherwise `nvcc` +HOST_COMPILER=${CXX:-g++} # $CXX if set, otherwise `g++` +CUDA_ARCHS=native # detect system's GPU architectures +CXX_STANDARD=17 -# Check if the correct number of arguments has been provided function usage { - echo "Usage: $0 [OPTIONS] " - echo "The PARALLEL_LEVEL environment variable controls the amount of build parallelism. Default is the number of cores." - echo "Example: PARALLEL_LEVEL=8 $0 g++-8 14 \"70\" " - echo "Example: $0 clang++-8 17 \"70;75;80-virtual\" " - echo "Possible options: " - echo " -nvcc: path/to/nvcc" - echo " -v/--verbose: enable shell echo for debugging" + echo "cuCollections build script" + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " -t/--tests: Build tests" + echo " -e/--examples: Build examples" + echo " -b/--benchmarks: Build benchmarks" + echo " -c/--clean: Clean (re-)build" + echo " --prefix: Build directory prefix (Defaults to /build)" + echo " -i/--infix: Build directory infix (Defaults to local)" + echo " -d/--debug: Debug build" + echo " -p/--parallel: Build parallelism (Defaults to \$PARALLEL_LEVEL if set, otherwise the system's number of CPU cores)" + echo " --cuda: CUDA compiler (Defaults to \$CUDACXX if set, otherwise nvcc)" + echo " --cxx: Host compiler (Defaults to \$CXX if set, otherwise g++)" + echo " --arch: Target CUDA arches, e.g. \"60-real;70;80-virtual\" (Defaults to the system's native GPU archs)" + echo " --std: CUDA/C++ standard (Defaults to 17)" + echo " -v/-verbose/--verbose: Enable shell echo for debugging" + echo " -h/-help/--help: Show this usage message" + echo + echo "Examples:" + echo " Basic Build:" + echo " $ $0" + echo " Runs a basic build with default settings, i.e., builds tests, examples, and benchmarks." + echo " Build files will be written to /build/local and symlinked to /build/latest." + echo + echo " Custom Build Infix Directory:" + echo " $ $0 -i my_build" + echo " Build files will be written to the /build/my_build directory and symlinked to /build/latest." + echo + echo " Parallel Build with Specific CUDA Architecture and CUDA Compiler:" + echo " $ PARALLEL_LEVEL=8 $0 --cuda /my_cuda_compiler/nvcc --arch 70;80" + echo " $ $0 -p 8 --cuda /my_cuda_compiler/nvcc --arch 70;80" + echo " Specifies parallel build level of 8 and CUDA architecture 70 and 80 with the specified CUDA compiler." + echo " Build files will be written to /build/local and symlinked to /build/latest." + echo + echo " Debug Build with Tests and Examples:" + echo " $ CXX=g++-9 $0 -t -e -d" + echo " $ $0 --cxx g++-9 -t -e -d" + echo " Sets the host compiler to g++-9, builds tests and examples, and enables debug mode." + echo " Build files will be written to /build/local and symlinked to /build/latest." + echo + echo " Custom Build Directory with Benchmarks:" + echo " $ BUILD_BENCHMARKS=ON $0 --prefix /custom/build --infix my_build" + echo " $ $0 --prefix /custom/build --infix my_build -b" + echo " Builds benchmarks only." + echo " Build files will be written to /custom/build/my_build and symlinked to /custom/build/latest." + echo + echo " Verbose Mode for Debugging:" + echo " $ $0 -v --std 17" + echo " Enables verbose mode for detailed output and builds with C++17 standard." + echo " Build files will be written to /build/local and symlinked to /build/latest." + echo exit 1 } -# Check for extra options -# While there are more than 3 arguments, parse switches/options -while [ "$#" -gt 3 ] -do - case "${1}" in - -h) usage ;; - -help) usage ;; - --help) usage ;; - --verbose) VERBOSE=1; shift ;; - -v) VERBOSE=1; shift ;; - -nvcc) CUDA_COMPILER="${2}"; shift 2;; - *) usage ;; - esac +# Parse options + +# Copy the args into a temporary array, since we will modify them and +# the parent script may still need them. +args=("$@") +while [ "${#args[@]}" -ne 0 ]; do + case "${args[0]}" in + -t | --tests) BUILD_TESTS=ON; args=("${args[@]:1}");; + -e | --examples) BUILD_EXAMPLES=ON; args=("${args[@]:1}");; + -b | --benchmarks) BUILD_BENCHMARKS=ON; args=("${args[@]:1}");; + -c | --clean) CLEAN_BUILD=1; args=("${args[@]:1}");; + --prefix) BUILD_PREFIX=$(resolve_path "${args[1]}"); args=("${args[@]:2}");; + -i | --infix) BUILD_INFIX="${args[1]}"; args=("${args[@]:2}");; + -d | --debug) DEBUG_BUILD=1; args=("${args[@]:1}");; + -p | --parallel) PARALLEL_LEVEL="${args[1]}"; args=("${args[@]:2}");; + --cuda) CUDA_COMPILER=$(resolve_path "${args[1]}"); args=("${args[@]:2}");; + --cxx) HOST_COMPILER=$(resolve_path "${args[1]}"); args=("${args[@]:2}");; + --arch) CUDA_ARCHS="${args[1]}"; args=("${args[@]:2}");; + --std) CXX_STANDARD="${args[1]}"; args=("${args[@]:2}");; + -v | -verbose | --verbose) VERBOSE=1; args=("${args[@]:1}");; + -h | -help | --help) usage ;; + *) echo "Unrecognized option: ${args[0]}"; usage ;; + esac done +# Convert to full paths: +HOST_COMPILER=$(which ${HOST_COMPILER}) +CUDA_COMPILER=$(which ${CUDA_COMPILER}) +# Make CUDA arch list compatible with cmake +CUDA_ARCHS=$(echo "$CUDA_ARCHS" | tr ' ,' ';;') + if [ $VERBOSE ]; then set -x fi -if [ "$#" -ne 3 ]; then - echo "Invalid number of arguments" - usage -fi - # Begin processing unsets after option parsing set -u -# Assign command line arguments to variables -readonly HOST_COMPILER=$(which $1) -readonly CXX_STANDARD=$2 - -# Replace spaces, commas and semicolons with semicolons for CMake list -readonly GPU_ARCHS=$(echo $3 | tr ' ,' ';') +if [ "$BUILD_INFIX" = "latest" ] || [ -z "$BUILD_INFIX" ]; then + echo "Error: BUILD_INFIX cannot be empty or 'latest'" >&2 + exit 1 +fi -readonly PARALLEL_LEVEL=${PARALLEL_LEVEL:=$(nproc)} -readonly NVCC_VERSION=$($CUDA_COMPILER --version | grep release | awk '{print $6}' | cut -c2-) +# If no build target is specified, build all targets +if [ "$BUILD_TESTS" == "OFF" ] && [ "$BUILD_EXAMPLES" == "OFF" ] && [ "$BUILD_BENCHMARKS" == "OFF" ]; then + BUILD_TESTS=ON + BUILD_EXAMPLES=ON + BUILD_BENCHMARKS=ON +fi -if [ -z ${DEVCONTAINER_NAME+x} ]; then - BUILD_DIR=../build/local -else - BUILD_DIR=../build/${DEVCONTAINER_NAME} +# Trigger clean (re-)build +if [ "$CLEAN_BUILD" -eq 1 ]; then + rm -rf BUILD_DIR fi -# The most recent build will always be symlinked to cuCollections/build/latest +BUILD_DIR="$BUILD_PREFIX/$BUILD_INFIX" mkdir -p $BUILD_DIR -rm -f ../build/latest -ln -sf $BUILD_DIR ../build/latest -export BUILD_DIR -echo $BUILD_DIR +export BUILD_DIR # TODO remove + +# The most recent build will be symlinked to cuCollections/build/latest +rm -f $BUILD_PREFIX/latest +ln -sf $BUILD_DIR $BUILD_PREFIX/latest + +# Now that BUILD_DIR exists, use readlink to canonicalize the path: +BUILD_DIR=$(readlink -f "${BUILD_DIR}") + +BUILD_TYPE=$( [ "$DEBUG_BUILD" -eq 1 ] && echo "Debug" || echo "Release" ) CMAKE_OPTIONS=" - -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DCMAKE_CXX_STANDARD=${CXX_STANDARD} \ -DCMAKE_CUDA_STANDARD=${CXX_STANDARD} \ -DCMAKE_CXX_COMPILER=${HOST_COMPILER} \ -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \ -DCMAKE_CUDA_HOST_COMPILER=${HOST_COMPILER} \ - -DCMAKE_CUDA_ARCHITECTURES=${GPU_ARCHS} \ + -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS} \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DBUILD_TESTS=${BUILD_TESTS} \ + -DBUILD_EXAMPLES=${BUILD_EXAMPLES} \ + -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \ " echo "========================================" -echo "Begin build" -echo "pwd=$(pwd)" -echo "NVCC_VERSION=$NVCC_VERSION" -echo "HOST_COMPILER=$HOST_COMPILER" -echo "CXX_STANDARD=$CXX_STANDARD" -echo "GPU_ARCHS=$GPU_ARCHS" -echo "PARALLEL_LEVEL=$PARALLEL_LEVEL" -echo "BUILD_DIR=$BUILD_DIR" -echo "Current commit is:" -git log -1 || echo "Not a repository" +echo "-- START: $(date)" +echo "-- GIT_SHA: $(git rev-parse HEAD 2>/dev/null || echo 'Not a repository')" +echo "-- PWD: $(pwd)" +echo "-- BUILD_DIR: ${BUILD_DIR}" +echo "-- BUILD_TYPE: ${BUILD_TYPE}" +echo "-- PARALLEL_LEVEL: ${PARALLEL_LEVEL}" +echo "-- CUDA_ARCHS: ${CUDA_ARCHS}" +echo "-- BUILD_TESTS: ${BUILD_TESTS}" +echo "-- BUILD_EXAMPLES: ${BUILD_EXAMPLES}" +echo "-- BUILD_BENCHMARKS: ${BUILD_BENCHMARKS}" + +# configure +cmake -S .. -B $BUILD_DIR $CMAKE_OPTIONS echo "========================================" -function configure(){ - cmake -S .. -B $BUILD_DIR $CMAKE_OPTIONS -} - -function build(){ +if command -v sccache >/dev/null; then source "./sccache_stats.sh" start - cmake --build $BUILD_DIR --parallel $PARALLEL_LEVEL - echo "Build complete" - source "./sccache_stats.sh" end -} +fi + +#build +cmake --build $BUILD_DIR --parallel $PARALLEL_LEVEL +echo "========================================" +echo "Build complete" -configure -build \ No newline at end of file +if command -v sccache >/dev/null; then + source "./sccache_stats.sh" end +else + echo "sccache stats: N/A" +fi \ No newline at end of file