From c7d52a257f6b705a77bdad4af9afa4c6b3cd30e2 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Mon, 20 Nov 2023 15:43:26 -0800
Subject: [PATCH 1/2] Migrate tests to excercise the new map (#393)

This PR migrates static map tests to test the new static map:

- Large type tests are not migrated since the new map doesn't support
keys larger than 8 bytes
- Shared memory tests require related functions to be added into ref
code thus would be in a separate PR
---
 tests/static_map/custom_type_test.cu          |  59 ++++---
 tests/static_map/duplicate_keys_test.cu       |  59 +++++--
 tests/static_map/heterogeneous_lookup_test.cu |   7 +-
 tests/static_map/insert_and_find_test.cu      |   9 +-
 tests/static_map/insert_or_assign_test.cu     |  15 +-
 tests/static_map/key_sentinel_test.cu         |  53 ++++---
 tests/static_map/shared_memory_test.cu        |  15 +-
 tests/static_map/stream_test.cu               |  46 +++---
 tests/static_map/unique_sequence_test.cu      | 148 +++---------------
 9 files changed, 187 insertions(+), 224 deletions(-)
diff --git a/tests/static_map/custom_type_test.cu b/tests/static_map/custom_type_test.cu
index e23216ca3..536c83194 100644
--- a/tests/static_map/custom_type_test.cu
+++ b/tests/static_map/custom_type_test.cu
@@ -27,6 +27,8 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
+#include <cuda/functional>
+
 #include <tuple>
 
 // User-defined key type
@@ -123,17 +125,18 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type",
                     thrust::counting_iterator<int>(0),
                     thrust::counting_iterator<int>(num),
                     insert_keys.begin(),
-                    [] __device__(auto i) { return Key{i}; });
+                    cuda::proclaim_return_type<Key>([] __device__(auto i) { return Key{i}; }));
 
   thrust::transform(thrust::device,
                     thrust::counting_iterator<int>(0),
                     thrust::counting_iterator<int>(num),
                     insert_values.begin(),
-                    [] __device__(auto i) { return Value{i}; });
+                    cuda::proclaim_return_type<Value>([] __device__(auto i) { return Value{i}; }));
 
-  auto insert_pairs =
-    thrust::make_transform_iterator(thrust::make_counting_iterator<int>(0),
-                                    [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); });
+  auto insert_pairs = thrust::make_transform_iterator(
+    thrust::make_counting_iterator<int>(0),
+    cuda::proclaim_return_type<cuco::pair<Key, Value>>(
+      [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); }));
 
   SECTION("All inserted keys-value pairs should be correctly recovered during find")
   {
@@ -151,9 +154,9 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type",
     REQUIRE(cuco::test::equal(insert_values.begin(),
                               insert_values.end(),
                               found_values.begin(),
-                              [] __device__(Value lhs, Value rhs) {
+                              cuda::proclaim_return_type<bool>([] __device__(Value lhs, Value rhs) {
                                 return std::tie(lhs.f, lhs.s) == std::tie(rhs.f, rhs.s);
-                              }));
+                              })));
   }
 
   SECTION("All inserted keys-value pairs should be contained")
@@ -175,7 +178,7 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type",
       insert_pairs,
       insert_pairs + num,
       thrust::counting_iterator<int>(0),
-      [] __device__(auto const& key) { return (key % 2) == 0; },
+      cuda::proclaim_return_type<bool>([] __device__(auto const& key) { return (key % 2) == 0; }),
       hash_custom_key{},
       custom_key_equals{});
 
@@ -187,12 +190,13 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type",
                  hash_custom_key{},
                  custom_key_equals{});
 
-    REQUIRE(cuco::test::equal(contained.begin(),
-                              contained.end(),
-                              thrust::counting_iterator<int>(0),
-                              [] __device__(auto const& idx_contained, auto const& idx) {
-                                return ((idx % 2) == 0) == idx_contained;
-                              }));
+    REQUIRE(cuco::test::equal(
+      contained.begin(),
+      contained.end(),
+      thrust::counting_iterator<int>(0),
+      cuda::proclaim_return_type<bool>([] __device__(auto const& idx_contained, auto const& idx) {
+        return ((idx % 2) == 0) == idx_contained;
+      })));
   }
 
   SECTION("Non-inserted keys-value pairs should not be contained")
@@ -212,9 +216,11 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type",
     map.insert(insert_pairs, insert_pairs + num, hash_custom_key{}, custom_key_equals{});
     auto view = map.get_device_view();
     REQUIRE(cuco::test::all_of(
-      insert_pairs, insert_pairs + num, [view] __device__(cuco::pair<Key, Value> const& pair) {
+      insert_pairs,
+      insert_pairs + num,
+      cuda::proclaim_return_type<bool>([view] __device__(cuco::pair<Key, Value> const& pair) {
         return view.contains(pair.first, hash_custom_key{}, custom_key_equals{});
-      }));
+      })));
   }
 
   SECTION("Inserting unique keys should return insert success.")
@@ -222,9 +228,11 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type",
     auto m_view = map.get_device_mutable_view();
     REQUIRE(cuco::test::all_of(insert_pairs,
                                insert_pairs + num,
-                               [m_view] __device__(cuco::pair<Key, Value> const& pair) mutable {
-                                 return m_view.insert(pair, hash_custom_key{}, custom_key_equals{});
-                               }));
+                               cuda::proclaim_return_type<bool>(
+                                 [m_view] __device__(cuco::pair<Key, Value> const& pair) mutable {
+                                   return m_view.insert(
+                                     pair, hash_custom_key{}, custom_key_equals{});
+                                 })));
   }
 
   SECTION("Cannot find any key in an empty hash map")
@@ -235,18 +243,21 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type",
       REQUIRE(cuco::test::all_of(
         insert_pairs,
         insert_pairs + num,
-        [view] __device__(cuco::pair<Key, Value> const& pair) mutable {
-          return view.find(pair.first, hash_custom_key{}, custom_key_equals{}) == view.end();
-        }));
+        cuda::proclaim_return_type<bool>(
+          [view] __device__(cuco::pair<Key, Value> const& pair) mutable {
+            return view.find(pair.first, hash_custom_key{}, custom_key_equals{}) == view.end();
+          })));
     }
 
     SECTION("const view")
     {
       auto const view = map.get_device_view();
       REQUIRE(cuco::test::all_of(
-        insert_pairs, insert_pairs + num, [view] __device__(cuco::pair<Key, Value> const& pair) {
+        insert_pairs,
+        insert_pairs + num,
+        cuda::proclaim_return_type<bool>([view] __device__(cuco::pair<Key, Value> const& pair) {
           return view.find(pair.first, hash_custom_key{}, custom_key_equals{}) == view.end();
-        }));
+        })));
     }
   }
 }
diff --git a/tests/static_map/duplicate_keys_test.cu b/tests/static_map/duplicate_keys_test.cu
index 5620fa4e9..e17ec3af8 100644
--- a/tests/static_map/duplicate_keys_test.cu
+++ b/tests/static_map/duplicate_keys_test.cu
@@ -29,16 +29,52 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
-TEMPLATE_TEST_CASE_SIG("Duplicate keys",
-                       "",
-                       ((typename Key, typename Value), Key, Value),
-                       (int32_t, int32_t),
-                       (int32_t, int64_t),
-                       (int64_t, int32_t),
-                       (int64_t, int64_t))
+#include <cuda/functional>
+
+using size_type = std::size_t;
+
+TEMPLATE_TEST_CASE_SIG(
+  "static_map duplicate keys",
+  "",
+  ((typename Key, typename Value, cuco::test::probe_sequence Probe, int CGSize),
+   Key,
+   Value,
+   Probe,
+   CGSize),
+  (int32_t, int32_t, cuco::test::probe_sequence::double_hashing, 1),
+  (int32_t, int64_t, cuco::test::probe_sequence::double_hashing, 1),
+  (int32_t, int32_t, cuco::test::probe_sequence::double_hashing, 2),
+  (int32_t, int64_t, cuco::test::probe_sequence::double_hashing, 2),
+  (int64_t, int32_t, cuco::test::probe_sequence::double_hashing, 1),
+  (int64_t, int64_t, cuco::test::probe_sequence::double_hashing, 1),
+  (int64_t, int32_t, cuco::test::probe_sequence::double_hashing, 2),
+  (int64_t, int64_t, cuco::test::probe_sequence::double_hashing, 2),
+  (int32_t, int32_t, cuco::test::probe_sequence::linear_probing, 1),
+  (int32_t, int64_t, cuco::test::probe_sequence::linear_probing, 1),
+  (int32_t, int32_t, cuco::test::probe_sequence::linear_probing, 2),
+  (int32_t, int64_t, cuco::test::probe_sequence::linear_probing, 2),
+  (int64_t, int32_t, cuco::test::probe_sequence::linear_probing, 1),
+  (int64_t, int64_t, cuco::test::probe_sequence::linear_probing, 1),
+  (int64_t, int32_t, cuco::test::probe_sequence::linear_probing, 2),
+  (int64_t, int64_t, cuco::test::probe_sequence::linear_probing, 2))
 {
-  constexpr std::size_t num_keys{500'000};
-  cuco::static_map<Key, Value> map{
+  constexpr size_type num_keys{500'000};
+
+  using probe =
+    std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
+                       cuco::experimental::linear_probing<CGSize, cuco::murmurhash3_32<Key>>,
+                       cuco::experimental::double_hashing<CGSize,
+                                                          cuco::murmurhash3_32<Key>,
+                                                          cuco::murmurhash3_32<Key>>>;
+
+  auto map = cuco::experimental::static_map<Key,
+                                            Value,
+                                            cuco::experimental::extent<size_type>,
+                                            cuda::thread_scope_device,
+                                            thrust::equal_to<Key>,
+                                            probe,
+                                            cuco::cuda_allocator<std::byte>,
+                                            cuco::experimental::storage<2>>{
     num_keys * 2, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
 
   thrust::device_vector<Key> d_keys(num_keys);
@@ -49,7 +85,8 @@ TEMPLATE_TEST_CASE_SIG("Duplicate keys",
 
   auto pairs_begin = thrust::make_transform_iterator(
     thrust::make_counting_iterator<int>(0),
-    [] __device__(auto i) { return cuco::pair<Key, Value>(i / 2, i / 2); });
+    cuda::proclaim_return_type<cuco::pair<Key, Value>>(
+      [] __device__(auto i) { return cuco::pair<Key, Value>(i / 2, i / 2); }));
 
   thrust::device_vector<Value> d_results(num_keys);
   thrust::device_vector<bool> d_contained(num_keys);
@@ -68,7 +105,7 @@ TEMPLATE_TEST_CASE_SIG("Duplicate keys",
 
     map.insert(pairs_begin, pairs_begin + num_keys);
 
-    auto const num_entries = map.get_size();
+    auto const num_entries = map.size();
     REQUIRE(num_entries == gold);
 
     auto [key_out_end, value_out_end] =
diff --git a/tests/static_map/heterogeneous_lookup_test.cu b/tests/static_map/heterogeneous_lookup_test.cu
index ed1ace9bd..f386d96a5 100644
--- a/tests/static_map/heterogeneous_lookup_test.cu
+++ b/tests/static_map/heterogeneous_lookup_test.cu
@@ -27,6 +27,8 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
+#include <cuda/functional>
+
 #include <tuple>
 
 // insert key type
@@ -115,8 +117,9 @@ TEMPLATE_TEST_CASE_SIG("Heterogeneous lookup",
   auto insert_pairs = thrust::make_transform_iterator(
     thrust::counting_iterator<int>(0),
     [] __device__(auto i) { return cuco::pair<InsertKey, Value>(i, i); });
-  auto probe_keys = thrust::make_transform_iterator(thrust::counting_iterator<int>(0),
-                                                    [] __device__(auto i) { return ProbeKey(i); });
+  auto probe_keys = thrust::make_transform_iterator(
+    thrust::counting_iterator<int>(0),
+    cuda::proclaim_return_type<ProbeKey>([] __device__(auto i) { return ProbeKey{i}; }));
 
   SECTION("All inserted keys-value pairs should be contained")
   {
diff --git a/tests/static_map/insert_and_find_test.cu b/tests/static_map/insert_and_find_test.cu
index 3afc27b9a..9941e46a6 100644
--- a/tests/static_map/insert_and_find_test.cu
+++ b/tests/static_map/insert_and_find_test.cu
@@ -26,6 +26,8 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
+#include <cuda/functional>
+
 static constexpr int Iters = 10'000;
 
 template <typename Ref>
@@ -129,7 +131,8 @@ TEMPLATE_TEST_CASE_SIG(
   thrust::sequence(thrust::device, d_keys.begin(), d_keys.end());
   map.find(d_keys.begin(), d_keys.end(), d_values.begin());
 
-  REQUIRE(cuco::test::all_of(d_values.begin(), d_values.end(), [] __device__(Value v) {
-    return v == (Blocks * Threads) / CGSize;
-  }));
+  REQUIRE(cuco::test::all_of(
+    d_values.begin(), d_values.end(), cuda::proclaim_return_type<bool>([] __device__(Value v) {
+      return v == (Blocks * Threads) / CGSize;
+    })));
 }
diff --git a/tests/static_map/insert_or_assign_test.cu b/tests/static_map/insert_or_assign_test.cu
index 90c6553ce..4bca776f7 100644
--- a/tests/static_map/insert_or_assign_test.cu
+++ b/tests/static_map/insert_or_assign_test.cu
@@ -27,6 +27,8 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
+#include <cuda/functional>
+
 using size_type = std::size_t;
 
 template <typename Map>
@@ -36,9 +38,11 @@ __inline__ void test_insert_or_assign(Map& map, size_type num_keys)
   using Value = typename Map::mapped_type;
 
   // Insert pairs
-  auto pairs_begin =
-    thrust::make_transform_iterator(thrust::counting_iterator<size_type>(0),
-                                    [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); });
+  auto pairs_begin = thrust::make_transform_iterator(
+    thrust::counting_iterator<size_type>(0),
+    cuda::proclaim_return_type<cuco::pair<Key, Value>>([] __device__(auto i) {
+      return cuco::pair<Key, Value>{i, i};
+    }));
 
   auto const initial_size = map.insert(pairs_begin, pairs_begin + num_keys);
   REQUIRE(initial_size == num_keys);  // all keys should be inserted
@@ -58,8 +62,9 @@ __inline__ void test_insert_or_assign(Map& map, size_type num_keys)
   thrust::device_vector<Key> d_values(num_keys);
   map.retrieve_all(d_keys.begin(), d_values.begin());
 
-  auto gold_values_begin = thrust::make_transform_iterator(thrust::counting_iterator<size_type>(0),
-                                                           [] __device__(auto i) { return i * 2; });
+  auto gold_values_begin = thrust::make_transform_iterator(
+    thrust::counting_iterator<size_type>(0),
+    cuda::proclaim_return_type<size_type>([] __device__(auto i) { return i * 2; }));
 
   thrust::sort(thrust::device, d_values.begin(), d_values.end());
   REQUIRE(cuco::test::equal(
diff --git a/tests/static_map/key_sentinel_test.cu b/tests/static_map/key_sentinel_test.cu
index 74a1badd1..dceaf6ec4 100644
--- a/tests/static_map/key_sentinel_test.cu
+++ b/tests/static_map/key_sentinel_test.cu
@@ -24,12 +24,14 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
+#include <cuda/functional>
+
 #define SIZE 10
 __device__ int A[SIZE];
 
 template <typename T>
 struct custom_equals {
-  __device__ bool operator()(T lhs, T rhs) { return A[lhs] == A[rhs]; }
+  __device__ bool operator()(T lhs, T rhs) const { return A[lhs] == A[rhs]; }
 };
 
 TEMPLATE_TEST_CASE_SIG(
@@ -39,11 +41,15 @@ TEMPLATE_TEST_CASE_SIG(
   using Value = T;
 
   constexpr std::size_t num_keys{SIZE};
-  cuco::static_map<Key, Value> map{
-    SIZE * 2, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
+  auto map = cuco::experimental::static_map{
+    SIZE * 2,
+    cuco::empty_key<Key>{-1},
+    cuco::empty_value<Value>{-1},
+    custom_equals<Key>{},
+    cuco::experimental::linear_probing<1, cuco::default_hash_function<Key>>{}};
 
-  auto m_view = map.get_device_mutable_view();
-  auto view   = map.get_device_view();
+  auto insert_ref = map.ref(cuco::experimental::op::insert);
+  auto find_ref   = map.ref(cuco::experimental::op::find);
 
   int h_A[SIZE];
   for (int i = 0; i < SIZE; i++) {
@@ -51,34 +57,35 @@ TEMPLATE_TEST_CASE_SIG(
   }
   CUCO_CUDA_TRY(cudaMemcpyToSymbol(A, h_A, SIZE * sizeof(int)));
 
-  auto pairs_begin =
-    thrust::make_transform_iterator(thrust::make_counting_iterator<T>(0),
-                                    [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); });
+  auto pairs_begin = thrust::make_transform_iterator(
+    thrust::make_counting_iterator<T>(0),
+    cuda::proclaim_return_type<cuco::pair<Key, Value>>(
+      [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); }));
 
   SECTION(
     "Tests of non-CG insert: The custom `key_equal` can never be used to compare against sentinel")
   {
-    REQUIRE(cuco::test::all_of(pairs_begin,
-                               pairs_begin + num_keys,
-                               [m_view] __device__(cuco::pair<Key, Value> const& pair) mutable {
-                                 return m_view.insert(
-                                   pair, cuco::default_hash_function<Key>{}, custom_equals<Key>{});
-                               }));
+    REQUIRE(
+      cuco::test::all_of(pairs_begin,
+                         pairs_begin + num_keys,
+                         cuda::proclaim_return_type<bool>(
+                           [insert_ref] __device__(cuco::pair<Key, Value> const& pair) mutable {
+                             return insert_ref.insert(pair);
+                           })));
   }
 
   SECTION(
     "Tests of CG insert: The custom `key_equal` can never be used to compare against sentinel")
   {
-    map.insert(pairs_begin,
-               pairs_begin + num_keys,
-               cuco::default_hash_function<Key>{},
-               custom_equals<Key>{});
+    map.insert(pairs_begin, pairs_begin + num_keys);
     // All keys inserted via custom `key_equal` should be found
     REQUIRE(cuco::test::all_of(
-      pairs_begin, pairs_begin + num_keys, [view] __device__(cuco::pair<Key, Value> const& pair) {
-        auto const found = view.find(pair.first);
-        return (found != view.end()) and
-               (found->first.load() == pair.first and found->second.load() == pair.second);
-      }));
+      pairs_begin,
+      pairs_begin + num_keys,
+      cuda::proclaim_return_type<bool>([find_ref] __device__(cuco::pair<Key, Value> const& pair) {
+        auto const found = find_ref.find(pair.first);
+        return (found != find_ref.end()) and
+               (found->first == pair.first and found->second == pair.second);
+      })));
   }
 }
diff --git a/tests/static_map/shared_memory_test.cu b/tests/static_map/shared_memory_test.cu
index 444f1c7e7..70e2def8d 100644
--- a/tests/static_map/shared_memory_test.cu
+++ b/tests/static_map/shared_memory_test.cu
@@ -27,6 +27,8 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
+#include <cuda/functional>
+
 #include <limits>
 
 template <typename MapType, int CAPACITY>
@@ -74,9 +76,8 @@ TEMPLATE_TEST_CASE_SIG("Shared memory static map",
                        (int64_t, int32_t),
                        (int64_t, int64_t))
 {
-  using MapType                = cuco::static_map<Key, Value>;
-  using DeviceViewType         = typename MapType::device_view;
-  using DeviceViewIteratorType = typename DeviceViewType::iterator;
+  using MapType        = cuco::static_map<Key, Value>;
+  using DeviceViewType = typename MapType::device_view;
 
   constexpr std::size_t number_of_maps  = 1000;
   constexpr std::size_t elements_in_map = 500;
@@ -127,9 +128,11 @@ TEMPLATE_TEST_CASE_SIG("Shared memory static map",
     auto zip = thrust::make_zip_iterator(
       thrust::make_tuple(d_keys_exist.begin(), d_keys_and_values_correct.begin()));
 
-    REQUIRE(cuco::test::all_of(zip, zip + d_keys_exist.size(), [] __device__(auto const& z) {
-      return thrust::get<0>(z) and thrust::get<1>(z);
-    }));
+    REQUIRE(cuco::test::all_of(zip,
+                               zip + d_keys_exist.size(),
+                               cuda::proclaim_return_type<bool>([] __device__(auto const& z) {
+                                 return thrust::get<0>(z) and thrust::get<1>(z);
+                               })));
   }
 
   SECTION("No key is found before insertion.")
diff --git a/tests/static_map/stream_test.cu b/tests/static_map/stream_test.cu
index 6121cbd62..fe1b2ac65 100644
--- a/tests/static_map/stream_test.cu
+++ b/tests/static_map/stream_test.cu
@@ -29,7 +29,9 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
-TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream",
+#include <cuda/functional>
+
+TEMPLATE_TEST_CASE_SIG("static_map: unique sequence of keys on given stream",
                        "",
                        ((typename Key, typename Value), Key, Value),
                        (int32_t, int32_t),
@@ -41,11 +43,14 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream",
   CUCO_CUDA_TRY(cudaStreamCreate(&stream));
 
   constexpr std::size_t num_keys{500'000};
-  cuco::static_map<Key, Value> map{1'000'000,
-                                   cuco::empty_key<Key>{-1},
-                                   cuco::empty_value<Value>{-1},
-                                   cuco::cuda_allocator<char>{},
-                                   stream};
+  auto map = cuco::experimental::static_map{
+    num_keys * 2,
+    cuco::empty_key<Key>{-1},
+    cuco::empty_value<Value>{-1},
+    thrust::equal_to<Key>{},
+    cuco::experimental::linear_probing<1, cuco::default_hash_function<Key>>{},
+    cuco::cuda_allocator<char>{},
+    stream};
 
   thrust::device_vector<Key> d_keys(num_keys);
   thrust::device_vector<Value> d_values(num_keys);
@@ -53,35 +58,34 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream",
   thrust::sequence(thrust::device, d_keys.begin(), d_keys.end());
   thrust::sequence(thrust::device, d_values.begin(), d_values.end());
 
-  auto pairs_begin =
-    thrust::make_transform_iterator(thrust::make_counting_iterator<int>(0),
-                                    [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); });
-
-  auto hash_fn  = cuco::default_hash_function<Key>{};
-  auto equal_fn = thrust::equal_to<Value>{};
+  auto pairs_begin = thrust::make_transform_iterator(
+    thrust::make_counting_iterator<int>(0),
+    cuda::proclaim_return_type<cuco::pair<Key, Value>>(
+      [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); }));
 
   // bulk function test cases
   SECTION("All inserted keys-value pairs should be correctly recovered during find")
   {
     thrust::device_vector<Value> d_results(num_keys);
 
-    map.insert(pairs_begin, pairs_begin + num_keys, hash_fn, equal_fn, stream);
-    map.find(d_keys.begin(), d_keys.end(), d_results.begin(), hash_fn, equal_fn, stream);
+    map.insert(pairs_begin, pairs_begin + num_keys, stream);
+    map.find(d_keys.begin(), d_keys.end(), d_results.begin(), stream);
     auto zip = thrust::make_zip_iterator(thrust::make_tuple(d_results.begin(), d_values.begin()));
 
-    REQUIRE(cuco::test::all_of(
-      zip,
-      zip + num_keys,
-      [] __device__(auto const& p) { return thrust::get<0>(p) == thrust::get<1>(p); },
-      stream));
+    REQUIRE(cuco::test::all_of(zip,
+                               zip + num_keys,
+                               cuda::proclaim_return_type<bool>([] __device__(auto const& p) {
+                                 return thrust::get<0>(p) == thrust::get<1>(p);
+                               }),
+                               stream));
   }
 
   SECTION("All inserted keys-value pairs should be contained")
   {
     thrust::device_vector<bool> d_contained(num_keys);
 
-    map.insert(pairs_begin, pairs_begin + num_keys, hash_fn, equal_fn, stream);
-    map.contains(d_keys.begin(), d_keys.end(), d_contained.begin(), hash_fn, equal_fn, stream);
+    map.insert(pairs_begin, pairs_begin + num_keys, stream);
+    map.contains(d_keys.begin(), d_keys.end(), d_contained.begin(), stream);
 
     REQUIRE(cuco::test::all_of(d_contained.begin(), d_contained.end(), thrust::identity{}, stream));
   }
diff --git a/tests/static_map/unique_sequence_test.cu b/tests/static_map/unique_sequence_test.cu
index 6a0165cc2..69fa69fb0 100644
--- a/tests/static_map/unique_sequence_test.cu
+++ b/tests/static_map/unique_sequence_test.cu
@@ -31,122 +31,7 @@
 
 #include <catch2/catch_template_test_macros.hpp>
 
-TEMPLATE_TEST_CASE_SIG("Unique sequence of keys",
-                       "",
-                       ((typename Key, typename Value), Key, Value),
-                       (int32_t, int32_t),
-                       (int32_t, int64_t),
-                       (int64_t, int32_t),
-                       (int64_t, int64_t))
-{
-  constexpr std::size_t num_keys{500'000};
-  cuco::static_map<Key, Value> map{
-    1'000'000, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
-
-  auto m_view = map.get_device_mutable_view();
-  auto view   = map.get_device_view();
-
-  thrust::device_vector<Key> d_keys(num_keys);
-  thrust::device_vector<Value> d_values(num_keys);
-
-  thrust::sequence(thrust::device, d_keys.begin(), d_keys.end());
-  thrust::sequence(thrust::device, d_values.begin(), d_values.end());
-
-  auto pairs_begin =
-    thrust::make_transform_iterator(thrust::make_counting_iterator<int>(0),
-                                    [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); });
-
-  thrust::device_vector<Value> d_results(num_keys);
-  thrust::device_vector<bool> d_contained(num_keys);
-
-  // bulk function test cases
-  SECTION("All inserted keys-value pairs should be correctly recovered during find")
-  {
-    map.insert(pairs_begin, pairs_begin + num_keys);
-    map.find(d_keys.begin(), d_keys.end(), d_results.begin());
-    auto zip = thrust::make_zip_iterator(thrust::make_tuple(d_results.begin(), d_values.begin()));
-
-    REQUIRE(cuco::test::all_of(zip, zip + num_keys, [] __device__(auto const& p) {
-      return thrust::get<0>(p) == thrust::get<1>(p);
-    }));
-  }
-
-  SECTION("All inserted keys-value pairs should be contained")
-  {
-    map.insert(pairs_begin, pairs_begin + num_keys);
-    map.contains(d_keys.begin(), d_keys.end(), d_contained.begin());
-
-    REQUIRE(cuco::test::all_of(d_contained.begin(), d_contained.end(), thrust::identity{}));
-  }
-
-  SECTION("Non-inserted keys-value pairs should not be contained")
-  {
-    map.contains(d_keys.begin(), d_keys.end(), d_contained.begin());
-
-    REQUIRE(cuco::test::none_of(d_contained.begin(), d_contained.end(), thrust::identity{}));
-  }
-
-  SECTION("Inserting unique keys should return insert success.")
-  {
-    REQUIRE(cuco::test::all_of(pairs_begin,
-                               pairs_begin + num_keys,
-                               [m_view] __device__(cuco::pair<Key, Value> const& pair) mutable {
-                                 return m_view.insert(pair);
-                               }));
-  }
-
-  SECTION("Cannot find any key in an empty hash map with non-const view")
-  {
-    SECTION("non-const view")
-    {
-      REQUIRE(cuco::test::all_of(pairs_begin,
-                                 pairs_begin + num_keys,
-                                 [view] __device__(cuco::pair<Key, Value> const& pair) mutable {
-                                   return view.find(pair.first) == view.end();
-                                 }));
-    }
-    SECTION("const view")
-    {
-      REQUIRE(cuco::test::all_of(
-        pairs_begin, pairs_begin + num_keys, [view] __device__(cuco::pair<Key, Value> const& pair) {
-          return view.find(pair.first) == view.end();
-        }));
-    }
-  }
-
-  SECTION("Keys are all found after inserting many keys.")
-  {
-    // Bulk insert keys
-    thrust::for_each(
-      thrust::device,
-      pairs_begin,
-      pairs_begin + num_keys,
-      [m_view] __device__(cuco::pair<Key, Value> const& pair) mutable { m_view.insert(pair); });
-
-    SECTION("non-const view")
-    {
-      // All keys should be found
-      REQUIRE(cuco::test::all_of(pairs_begin,
-                                 pairs_begin + num_keys,
-                                 [view] __device__(cuco::pair<Key, Value> const& pair) mutable {
-                                   auto const found = view.find(pair.first);
-                                   return (found != view.end()) and
-                                          (found->first.load() == pair.first and
-                                           found->second.load() == pair.second);
-                                 }));
-    }
-    SECTION("const view")
-    {
-      // All keys should be found
-      REQUIRE(cuco::test::all_of(
-        pairs_begin, pairs_begin + num_keys, [view] __device__(cuco::pair<Key, Value> const& pair) {
-          auto const found = view.find(pair.first);
-          return (found != view.end()) and
-                 (found->first.load() == pair.first and found->second.load() == pair.second);
-        }));
-    }
-  }
-}
+#include <cuda/functional>
 
 using size_type = int32_t;
 
@@ -160,14 +45,18 @@ __inline__ void test_unique_sequence(Map& map, size_type num_keys)
 
   thrust::sequence(thrust::device, d_keys.begin(), d_keys.end());
 
-  auto keys_begin = d_keys.begin();
-  auto pairs_begin =
-    thrust::make_transform_iterator(thrust::make_counting_iterator<size_type>(0),
-                                    [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); });
+  auto keys_begin  = d_keys.begin();
+  auto pairs_begin = thrust::make_transform_iterator(
+    thrust::make_counting_iterator<size_type>(0),
+    cuda::proclaim_return_type<cuco::pair<Key, Value>>([] __device__(auto i) {
+      return cuco::pair<Key, Value>{i, i};
+    }));
   thrust::device_vector<bool> d_contained(num_keys);
 
-  auto zip_equal = [] __device__(auto const& p) { return thrust::get<0>(p) == thrust::get<1>(p); };
-  auto is_even   = [] __device__(auto const& i) { return i % 2 == 0; };
+  auto zip_equal = cuda::proclaim_return_type<bool>(
+    [] __device__(auto const& p) { return thrust::get<0>(p) == thrust::get<1>(p); });
+  auto is_even =
+    cuda::proclaim_return_type<bool>([] __device__(auto const& i) { return i % 2 == 0; });
 
   SECTION("Non-inserted keys should not be contained.")
   {
@@ -196,12 +85,13 @@ __inline__ void test_unique_sequence(Map& map, size_type num_keys)
     REQUIRE(map.size() == num_keys / 2);
 
     map.contains(keys_begin, keys_begin + num_keys, d_contained.begin());
-    REQUIRE(cuco::test::equal(d_contained.begin(),
-                              d_contained.end(),
-                              thrust::counting_iterator<std::size_t>(0),
-                              [] __device__(auto const& idx_contained, auto const& idx) {
-                                return ((idx % 2) == 0) == idx_contained;
-                              }));
+    REQUIRE(cuco::test::equal(
+      d_contained.begin(),
+      d_contained.end(),
+      thrust::counting_iterator<std::size_t>(0),
+      cuda::proclaim_return_type<bool>([] __device__(auto const& idx_contained, auto const& idx) {
+        return ((idx % 2) == 0) == idx_contained;
+      })));
   }
 
   map.insert(pairs_begin, pairs_begin + num_keys);
@@ -253,7 +143,7 @@ __inline__ void test_unique_sequence(Map& map, size_type num_keys)
 }
 
 TEMPLATE_TEST_CASE_SIG(
-  "Unique sequence",
+  "static_map: unique sequence",
   "",
   ((typename Key, typename Value, cuco::test::probe_sequence Probe, int CGSize),
    Key,

From e41bf459a8ac61f7ac7f9fbb8a6666fd94c5a952 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20J=C3=BCnger?=
 <2955913+sleeepyjack@users.noreply.github.com>
Date: Tue, 21 Nov 2023 02:09:59 +0100
Subject: [PATCH 2/2] Rework build script (#395)

This PR updates the `ci/build.sh` script to be usable both interactively
and in CI.
---
 .github/workflows/dispatch-build-and-test.yml |   4 +-
 README.md                                     |  21 +-
 ci/build.sh                                   | 231 +++++++++++++-----
 3 files changed, 182 insertions(+), 74 deletions(-)

diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
index 9f2e083ed..ab7d73619 100644
--- a/.github/workflows/dispatch-build-and-test.yml
+++ b/.github/workflows/dispatch-build-and-test.yml
@@ -35,7 +35,7 @@ jobs:
     with:
       cpu: ${{ matrix.cpu }}
       test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
-      build_script: "./ci/build.sh ${{matrix.compiler.exe}} ${{matrix.std}} ${{matrix.gpu_build_archs}}"
-      test_script:  "./ci/test.sh  ${{matrix.compiler.exe}} ${{matrix.std}} ${{matrix.gpu_build_archs}}"
+      build_script: "./ci/build.sh --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}"
+      test_script:  "./ci/test.sh --tests --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}"
       container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
       run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') }}
diff --git a/README.md b/README.md
index 93ac04027..163e43f2f 100644
--- a/README.md
+++ b/README.md
@@ -59,8 +59,7 @@ This will take care of downloading `cuCollections` from GitHub and making the he
 
 `cuCollections` depends on the following libraries:
 
-- [libcu++](https://github.com/NVIDIA/libcudacxx)
-- [CUB](https://github.com/thrust/cub)
+- [CUDA C++ Core Libraries (CCCL)](https://github.com/NVIDIA/cccl)
 
 No action is required from the user to satisfy these dependencies. `cuCollections`'s CMake script is configured to first search the system for these libraries, and if they are not found, to automatically fetch them from GitHub.
 
@@ -75,14 +74,26 @@ To build the tests, benchmarks, and examples:
 cd $CUCO_ROOT
 mkdir -p build
 cd build
-cmake ..
-make
+cmake .. # configure
+make # build
+ctest --test-dir tests # run tests
 ```
 Binaries will be built into:
 - `build/tests/`
-- `build/gbenchmarks/`
+- `build/benchmarks/`
 - `build/examples/`
 
+### Build Script:
+
+Alternatively, you can use the build script located at `ci/build.sh`. Calling this script with no arguments will trigger a full build which will be located at `build/local`.
+
+```bash
+cd $CUCO_ROOT
+ci/build.sh # configure and build
+ctest --test-dir build/local/tests # run tests
+```
+
+For a comprehensive list of all available options along with descriptions and examples, you can use the option `ci/build.sh -h`.
 
 ## Code Formatting
 By default, `cuCollections` uses [`pre-commit.ci`](https://pre-commit.ci/) along with [`mirrors-clang-format`](https://github.com/pre-commit/mirrors-clang-format) to automatically format the C++/CUDA files in a pull request.
diff --git a/ci/build.sh b/ci/build.sh
index eb31d9382..2d8eb9ece 100755
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -16,108 +16,205 @@
 
 set -eo pipefail
 
+ORIGINAL_DIR=$(pwd)
+
+resolve_path() {
+    local input_path=$1
+    # Check if the input is an absolute path
+    if [[ "$input_path" = /* ]]; then
+        echo "$input_path"
+    else
+        # Treat as a relative path or executable name
+        # Check if it's in the PATH
+        if command -v "$input_path" >/dev/null 2>&1; then
+            echo "$input_path"
+        else
+            echo "$ORIGINAL_DIR/$input_path"
+        fi
+    fi
+}
+
 # Ensure the script is being executed in its containing directory
 cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
 
 # Script defaults
-CUDA_COMPILER=nvcc
+BUILD_TESTS=${BUILD_TESTS:-OFF}
+BUILD_EXAMPLES=${BUILD_EXAMPLES:-OFF}
+BUILD_BENCHMARKS=${BUILD_BENCHMARKS:-OFF}
+CLEAN_BUILD=0 # Re-use existing artifacts by-default
+BUILD_PREFIX=../build # <repo_root>/build
+BUILD_INFIX=local # <repo_root>/build/local
+DEBUG_BUILD=0 # Default build type is Release
+PARALLEL_LEVEL=${PARALLEL_LEVEL:-$(nproc)} # defaults to number of cores in the system
+CUDA_COMPILER=${CUDACXX:-nvcc} # $CUDACXX if set, otherwise `nvcc`
+HOST_COMPILER=${CXX:-g++} # $CXX if set, otherwise `g++`
+CUDA_ARCHS=native # detect system's GPU architectures
+CXX_STANDARD=17
 
-# Check if the correct number of arguments has been provided
 function usage {
-    echo "Usage: $0 [OPTIONS] <HOST_COMPILER> <CXX_STANDARD> <GPU_ARCHS>"
-    echo "The PARALLEL_LEVEL environment variable controls the amount of build parallelism. Default is the number of cores."
-    echo "Example: PARALLEL_LEVEL=8 $0 g++-8 14 \"70\" "
-    echo "Example: $0 clang++-8 17 \"70;75;80-virtual\" "
-    echo "Possible options: "
-    echo "  -nvcc: path/to/nvcc"
-    echo "  -v/--verbose: enable shell echo for debugging"
+    echo "cuCollections build script"
+    echo "Usage: $0 [OPTIONS]"
+    echo "Options:"
+    echo "  -t/--tests: Build tests"
+    echo "  -e/--examples: Build examples"
+    echo "  -b/--benchmarks: Build benchmarks"
+    echo "  -c/--clean: Clean (re-)build"
+    echo "  --prefix: Build directory prefix (Defaults to <repo_root>/build)"
+    echo "  -i/--infix: Build directory infix (Defaults to local)"
+    echo "  -d/--debug: Debug build"
+    echo "  -p/--parallel: Build parallelism (Defaults to \$PARALLEL_LEVEL if set, otherwise the system's number of CPU cores)"
+    echo "  --cuda: CUDA compiler (Defaults to \$CUDACXX if set, otherwise nvcc)"
+    echo "  --cxx: Host compiler (Defaults to \$CXX if set, otherwise g++)"
+    echo "  --arch: Target CUDA arches, e.g. \"60-real;70;80-virtual\" (Defaults to the system's native GPU archs)"
+    echo "  --std: CUDA/C++ standard (Defaults to 17)"
+    echo "  -v/-verbose/--verbose: Enable shell echo for debugging"
+    echo "  -h/-help/--help: Show this usage message"
+    echo
+    echo "Examples:"
+    echo "  Basic Build:"
+    echo "    $ $0"
+    echo "    Runs a basic build with default settings, i.e., builds tests, examples, and benchmarks."
+    echo "    Build files will be written to <repo_root>/build/local and symlinked to <repo_root>/build/latest."
+    echo
+    echo "  Custom Build Infix Directory:"
+    echo "    $ $0 -i my_build"
+    echo "    Build files will be written to the <repo_root>/build/my_build directory and symlinked to <repo_root>/build/latest."
+    echo
+    echo "  Parallel Build with Specific CUDA Architecture and CUDA Compiler:"
+    echo "    $ PARALLEL_LEVEL=8 $0 --cuda /my_cuda_compiler/nvcc --arch 70;80"
+    echo "    $ $0 -p 8 --cuda /my_cuda_compiler/nvcc --arch 70;80"
+    echo "    Specifies parallel build level of 8 and CUDA architecture 70 and 80 with the specified CUDA compiler."
+    echo "    Build files will be written to <repo_root>/build/local and symlinked to <repo_root>/build/latest."
+    echo
+    echo "  Debug Build with Tests and Examples:"
+    echo "    $ CXX=g++-9 $0 -t -e -d"
+    echo "    $ $0 --cxx g++-9 -t -e -d"
+    echo "    Sets the host compiler to g++-9, builds tests and examples, and enables debug mode."
+    echo "    Build files will be written to <repo_root>/build/local and symlinked to <repo_root>/build/latest."
+    echo
+    echo "  Custom Build Directory with Benchmarks:"
+    echo "    $ BUILD_BENCHMARKS=ON $0 --prefix /custom/build --infix my_build"
+    echo "    $ $0 --prefix /custom/build --infix my_build -b"
+    echo "    Builds benchmarks only."
+    echo "    Build files will be written to /custom/build/my_build and symlinked to /custom/build/latest."
+    echo
+    echo "  Verbose Mode for Debugging:"
+    echo "    $ $0 -v --std 17"
+    echo "    Enables verbose mode for detailed output and builds with C++17 standard."
+    echo "    Build files will be written to <repo_root>/build/local and symlinked to <repo_root>/build/latest."
+    echo
     exit 1
 }
 
-# Check for extra options
-# While there are more than 3 arguments, parse switches/options
-while [ "$#" -gt 3 ]
-do
-  case "${1}" in
-  -h)     usage ;;
-  -help)  usage ;;
-  --help) usage ;;
-  --verbose)           VERBOSE=1; shift ;;
-  -v)                  VERBOSE=1; shift ;;
-  -nvcc)               CUDA_COMPILER="${2}"; shift 2;;
-  *) usage ;;
-  esac
+# Parse options
+
+# Copy the args into a temporary array, since we will modify them and
+# the parent script may still need them.
+args=("$@")
+while [ "${#args[@]}" -ne 0 ]; do
+    case "${args[0]}" in
+    -t | --tests) BUILD_TESTS=ON; args=("${args[@]:1}");;
+    -e | --examples) BUILD_EXAMPLES=ON; args=("${args[@]:1}");;
+    -b | --benchmarks) BUILD_BENCHMARKS=ON; args=("${args[@]:1}");;
+    -c | --clean) CLEAN_BUILD=1; args=("${args[@]:1}");;
+    --prefix) BUILD_PREFIX=$(resolve_path "${args[1]}"); args=("${args[@]:2}");;
+    -i | --infix) BUILD_INFIX="${args[1]}"; args=("${args[@]:2}");;
+    -d | --debug) DEBUG_BUILD=1; args=("${args[@]:1}");;
+    -p | --parallel)  PARALLEL_LEVEL="${args[1]}";  args=("${args[@]:2}");;
+    --cuda) CUDA_COMPILER=$(resolve_path "${args[1]}"); args=("${args[@]:2}");;
+    --cxx)  HOST_COMPILER=$(resolve_path "${args[1]}"); args=("${args[@]:2}");;
+    --arch) CUDA_ARCHS="${args[1]}";    args=("${args[@]:2}");;
+    --std)  CXX_STANDARD="${args[1]}";  args=("${args[@]:2}");;
+    -v | -verbose | --verbose) VERBOSE=1; args=("${args[@]:1}");;
+    -h | -help | --help) usage ;;
+    *) echo "Unrecognized option: ${args[0]}"; usage ;;
+    esac
 done
 
+# Convert to full paths:
+HOST_COMPILER=$(which ${HOST_COMPILER})
+CUDA_COMPILER=$(which ${CUDA_COMPILER})
+# Make CUDA arch list compatible with cmake
+CUDA_ARCHS=$(echo "$CUDA_ARCHS" | tr ' ,' ';;')
+
 if [ $VERBOSE ]; then
     set -x
 fi
 
-if [ "$#" -ne 3 ]; then
-    echo "Invalid number of arguments"
-    usage
-fi
-
 # Begin processing unsets after option parsing
 set -u
 
-# Assign command line arguments to variables
-readonly HOST_COMPILER=$(which $1)
-readonly CXX_STANDARD=$2
-
-# Replace spaces, commas and semicolons with semicolons for CMake list
-readonly GPU_ARCHS=$(echo $3 | tr ' ,' ';')
+if [ "$BUILD_INFIX" = "latest" ] || [ -z "$BUILD_INFIX" ]; then
+    echo "Error: BUILD_INFIX cannot be empty or 'latest'" >&2
+    exit 1
+fi
 
-readonly PARALLEL_LEVEL=${PARALLEL_LEVEL:=$(nproc)}
-readonly NVCC_VERSION=$($CUDA_COMPILER --version | grep release | awk '{print $6}' | cut -c2-)
+# If no build target is specified, build all targets
+if [ "$BUILD_TESTS" == "OFF" ] && [ "$BUILD_EXAMPLES" == "OFF" ] && [ "$BUILD_BENCHMARKS" == "OFF" ]; then
+    BUILD_TESTS=ON
+    BUILD_EXAMPLES=ON
+    BUILD_BENCHMARKS=ON
+fi
 
-if [ -z ${DEVCONTAINER_NAME+x} ]; then
-    BUILD_DIR=../build/local
-else
-    BUILD_DIR=../build/${DEVCONTAINER_NAME}
+# Trigger clean (re-)build
+if [ "$CLEAN_BUILD" -eq 1 ]; then
+    rm -rf BUILD_DIR
 fi
 
-# The most recent build will always be symlinked to cuCollections/build/latest
+BUILD_DIR="$BUILD_PREFIX/$BUILD_INFIX"
 mkdir -p $BUILD_DIR
-rm -f ../build/latest
-ln -sf $BUILD_DIR ../build/latest
-export BUILD_DIR
-echo $BUILD_DIR
+export BUILD_DIR # TODO remove
+
+# The most recent build will be symlinked to cuCollections/build/latest
+rm -f $BUILD_PREFIX/latest
+ln -sf $BUILD_DIR $BUILD_PREFIX/latest
+
+# Now that BUILD_DIR exists, use readlink to canonicalize the path:
+BUILD_DIR=$(readlink -f "${BUILD_DIR}")
+
+BUILD_TYPE=$( [ "$DEBUG_BUILD" -eq 1 ] && echo "Debug" || echo "Release" )
 
 CMAKE_OPTIONS="
-    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
     -DCMAKE_CXX_STANDARD=${CXX_STANDARD} \
     -DCMAKE_CUDA_STANDARD=${CXX_STANDARD} \
     -DCMAKE_CXX_COMPILER=${HOST_COMPILER} \
     -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \
     -DCMAKE_CUDA_HOST_COMPILER=${HOST_COMPILER} \
-    -DCMAKE_CUDA_ARCHITECTURES=${GPU_ARCHS} \
+    -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS} \
     -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+    -DBUILD_TESTS=${BUILD_TESTS} \
+    -DBUILD_EXAMPLES=${BUILD_EXAMPLES} \
+    -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
 "
 
 echo "========================================"
-echo "Begin build"
-echo "pwd=$(pwd)"
-echo "NVCC_VERSION=$NVCC_VERSION"
-echo "HOST_COMPILER=$HOST_COMPILER"
-echo "CXX_STANDARD=$CXX_STANDARD"
-echo "GPU_ARCHS=$GPU_ARCHS"
-echo "PARALLEL_LEVEL=$PARALLEL_LEVEL"
-echo "BUILD_DIR=$BUILD_DIR"
-echo "Current commit is:"
-git log -1 || echo "Not a repository"
+echo "-- START: $(date)"
+echo "-- GIT_SHA: $(git rev-parse HEAD 2>/dev/null || echo 'Not a repository')"
+echo "-- PWD: $(pwd)"
+echo "-- BUILD_DIR: ${BUILD_DIR}"
+echo "-- BUILD_TYPE: ${BUILD_TYPE}"
+echo "-- PARALLEL_LEVEL: ${PARALLEL_LEVEL}"
+echo "-- CUDA_ARCHS: ${CUDA_ARCHS}"
+echo "-- BUILD_TESTS: ${BUILD_TESTS}"
+echo "-- BUILD_EXAMPLES: ${BUILD_EXAMPLES}"
+echo "-- BUILD_BENCHMARKS: ${BUILD_BENCHMARKS}"
+
+# configure
+cmake -S .. -B $BUILD_DIR $CMAKE_OPTIONS
 echo "========================================"
 
-function configure(){
-    cmake -S .. -B $BUILD_DIR $CMAKE_OPTIONS
-}
-
-function build(){
+if command -v sccache >/dev/null; then
     source "./sccache_stats.sh" start
-    cmake --build $BUILD_DIR --parallel $PARALLEL_LEVEL
-    echo "Build complete"
-    source "./sccache_stats.sh" end
-}
+fi
+
+#build
+cmake --build $BUILD_DIR --parallel $PARALLEL_LEVEL
+echo "========================================"
+echo "Build complete"
 
-configure
-build
\ No newline at end of file
+if command -v sccache >/dev/null; then
+    source "./sccache_stats.sh" end
+else
+    echo "sccache stats: N/A"
+fi
\ No newline at end of file