diff --git a/benchmarks/hash_bench.cu b/benchmarks/hash_bench.cu
index 58c6ee770..973f6976d 100644
--- a/benchmarks/hash_bench.cu
+++ b/benchmarks/hash_bench.cu
@@ -77,7 +77,7 @@ void hash_eval(nvbench::state& state, nvbench::type_list<Hash>)
 
   state.add_element_count(num_keys);
 
-  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+  state.exec([&](nvbench::launch& launch) {
     hash_bench_kernel<block_size><<<grid_size, block_size, 0, launch.get_stream()>>>(
       Hash{}, num_keys, hash_values.begin(), materialize_result);
   });
diff --git a/include/cuco/detail/hash_functions/murmurhash3.cuh b/include/cuco/detail/hash_functions/murmurhash3.cuh
index ce5ab9d56..a12143523 100644
--- a/include/cuco/detail/hash_functions/murmurhash3.cuh
+++ b/include/cuco/detail/hash_functions/murmurhash3.cuh
@@ -16,7 +16,12 @@
 
 #pragma once
 
+#include <cuco/detail/hash_functions/utils.cuh>
+#include <cuco/extent.cuh>
+
+#include <cstddef>
 #include <cstdint>
+#include <type_traits>
 
 namespace cuco::detail {
 
@@ -31,15 +36,15 @@ template <typename Key>
 struct MurmurHash3_fmix32 {
   static_assert(sizeof(Key) == 4, "Key type must be 4 bytes in size.");
 
-  using argument_type = Key;       ///< The type of the values taken as argument
-  using result_type   = uint32_t;  ///< The type of the hash values produced
+  using argument_type = Key;            ///< The type of the values taken as argument
+  using result_type   = std::uint32_t;  ///< The type of the hash values produced
 
   /**
    * @brief Constructs a MurmurHash3_fmix32 hash function with the given `seed`.
    *
    * @param seed A custom number to randomize the resulting hash value
    */
-  __host__ __device__ constexpr MurmurHash3_fmix32(uint32_t seed = 0) : seed_{seed} {}
+  __host__ __device__ constexpr MurmurHash3_fmix32(std::uint32_t seed = 0) : seed_{seed} {}
 
   /**
    * @brief Returns a hash value for its argument, as a value of type `result_type`.
@@ -49,7 +54,7 @@ struct MurmurHash3_fmix32 {
    */
   constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
   {
-    uint32_t h = static_cast<uint32_t>(key) ^ seed_;
+    std::uint32_t h = static_cast<std::uint32_t>(key) ^ seed_;
     h ^= h >> 16;
     h *= 0x85ebca6b;
     h ^= h >> 13;
@@ -59,7 +64,7 @@ struct MurmurHash3_fmix32 {
   }
 
  private:
-  uint32_t seed_;
+  std::uint32_t seed_;
 };
 
 /**
@@ -73,15 +78,15 @@ template <typename Key>
 struct MurmurHash3_fmix64 {
   static_assert(sizeof(Key) == 8, "Key type must be 8 bytes in size.");
 
-  using argument_type = Key;       ///< The type of the values taken as argument
-  using result_type   = uint64_t;  ///< The type of the hash values produced
+  using argument_type = Key;            ///< The type of the values taken as argument
+  using result_type   = std::uint64_t;  ///< The type of the hash values produced
 
   /**
    * @brief Constructs a MurmurHash3_fmix64 hash function with the given `seed`.
    *
    * @param seed A custom number to randomize the resulting hash value
    */
-  __host__ __device__ constexpr MurmurHash3_fmix64(uint64_t seed = 0) : seed_{seed} {}
+  __host__ __device__ constexpr MurmurHash3_fmix64(std::uint64_t seed = 0) : seed_{seed} {}
 
   /**
    * @brief Returns a hash value for its argument, as a value of type `result_type`.
@@ -91,7 +96,7 @@ struct MurmurHash3_fmix64 {
    */
   constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
   {
-    uint64_t h = static_cast<uint64_t>(key) ^ seed_;
+    std::uint64_t h = static_cast<std::uint64_t>(key) ^ seed_;
     h ^= h >> 33;
     h *= 0xff51afd7ed558ccd;
     h ^= h >> 33;
@@ -101,7 +106,7 @@ struct MurmurHash3_fmix64 {
   }
 
  private:
-  uint64_t seed_;
+  std::uint64_t seed_;
 };
 
 /**
@@ -121,36 +126,50 @@ struct MurmurHash3_fmix64 {
  */
 template <typename Key>
 struct MurmurHash3_32 {
-  using argument_type = Key;       ///< The type of the values taken as argument
-  using result_type   = uint32_t;  ///< The type of the hash values produced
+  using argument_type = Key;            ///< The type of the values taken as argument
+  using result_type   = std::uint32_t;  ///< The type of the hash values produced
 
   /**
    * @brief Constructs a MurmurHash3_32 hash function with the given `seed`.
    *
    * @param seed A custom number to randomize the resulting hash value
    */
-  __host__ __device__ constexpr MurmurHash3_32(uint32_t seed = 0) : fmix32_{0}, seed_{seed} {}
+  __host__ __device__ constexpr MurmurHash3_32(std::uint32_t seed = 0) : fmix32_{0}, seed_{seed} {}
 
   /**
    * @brief Returns a hash value for its argument, as a value of type `result_type`.
    *
    * @param key The input argument to hash
-   * @return A resulting hash value for `key`
+   * @return The resulting hash value for `key`
    */
   constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
   {
-    constexpr int len         = sizeof(argument_type);
-    const uint8_t* const data = (const uint8_t*)&key;
-    constexpr int nblocks     = len / 4;
+    return compute_hash(reinterpret_cast<std::byte const*>(&key),
+                        cuco::experimental::extent<std::size_t, sizeof(Key)>{});
+  }
 
-    uint32_t h1           = seed_;
-    constexpr uint32_t c1 = 0xcc9e2d51;
-    constexpr uint32_t c2 = 0x1b873593;
+  /**
+   * @brief Returns a hash value for its argument, as a value of type `result_type`.
+   *
+   * @tparam Extent The extent type
+   *
+   * @param bytes The input argument to hash
+   * @param size The extent of the data in bytes
+   * @return The resulting hash value
+   */
+  template <typename Extent>
+  constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
+                                                         Extent size) const noexcept
+  {
+    auto const nblocks = size / 4;
+
+    std::uint32_t h1           = seed_;
+    constexpr std::uint32_t c1 = 0xcc9e2d51;
+    constexpr std::uint32_t c2 = 0x1b873593;
     //----------
     // body
-    const uint32_t* const blocks = (const uint32_t*)(data + nblocks * 4);
-    for (int i = -nblocks; i; i++) {
-      uint32_t k1 = blocks[i];  // getblock32(blocks,i);
+    for (std::remove_const_t<decltype(nblocks)> i = 0; size >= 4 && i < nblocks; i++) {
+      std::uint32_t k1 = load_chunk<std::uint32_t>(bytes, i);
       k1 *= c1;
       k1 = rotl32(k1, 15);
       k1 *= c2;
@@ -160,13 +179,12 @@ struct MurmurHash3_32 {
     }
     //----------
     // tail
-    const uint8_t* tail = (const uint8_t*)(data + nblocks * 4);
-    uint32_t k1         = 0;
-    switch (len & 3) {
-      case 3: k1 ^= tail[2] << 16;
-      case 2: k1 ^= tail[1] << 8;
+    std::uint32_t k1 = 0;
+    switch (size & 3) {
+      case 3: k1 ^= std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 2]) << 16; [[fallthrough]];
+      case 2: k1 ^= std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 1]) << 8; [[fallthrough]];
       case 1:
-        k1 ^= tail[0];
+        k1 ^= std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 0]);
         k1 *= c1;
         k1 = rotl32(k1, 15);
         k1 *= c2;
@@ -174,19 +192,18 @@ struct MurmurHash3_32 {
     };
     //----------
     // finalization
-    h1 ^= len;
+    h1 ^= size;
     h1 = fmix32_(h1);
     return h1;
   }
 
  private:
-  constexpr __host__ __device__ uint32_t rotl32(uint32_t x, int8_t r) const noexcept
+  constexpr __host__ __device__ std::uint32_t rotl32(std::uint32_t x, std::int8_t r) const noexcept
   {
     return (x << r) | (x >> (32 - r));
   }
 
-  MurmurHash3_fmix32<uint32_t> fmix32_;
-  uint32_t seed_;
+  MurmurHash3_fmix32<std::uint32_t> fmix32_;
+  std::uint32_t seed_;
 };
-
 }  //  namespace cuco::detail
\ No newline at end of file
diff --git a/include/cuco/detail/hash_functions/utils.cuh b/include/cuco/detail/hash_functions/utils.cuh
new file mode 100644
index 000000000..a50779f23
--- /dev/null
+++ b/include/cuco/detail/hash_functions/utils.cuh
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace cuco::detail {
+
+template <typename T, typename U, typename Extent>
+constexpr __host__ __device__ T load_chunk(U const* const data, Extent index) noexcept
+{
+  auto const chunks = reinterpret_cast<T const*>(data);
+  return chunks[index];
+}
+
+};  // namespace cuco::detail
\ No newline at end of file
diff --git a/include/cuco/detail/hash_functions/xxhash.cuh b/include/cuco/detail/hash_functions/xxhash.cuh
index 2ea67fd8a..c686f3b82 100644
--- a/include/cuco/detail/hash_functions/xxhash.cuh
+++ b/include/cuco/detail/hash_functions/xxhash.cuh
@@ -16,6 +16,10 @@
 
 #pragma once
 
+#include <cuco/detail/hash_functions/utils.cuh>
+#include <cuco/extent.cuh>
+
+#include <cstddef>
 #include <cstdint>
 
 namespace cuco::detail {
@@ -81,40 +85,51 @@ struct XXHash_32 {
    * @brief Returns a hash value for its argument, as a value of type `result_type`.
    *
    * @param key The input argument to hash
-   * @return A resulting hash value for `key`
+   * @return The resulting hash value for `key`
    */
   constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
   {
-    // TODO do we need to add checks/hints for alignment?
-    constexpr auto nbytes             = sizeof(Key);
-    [[maybe_unused]] auto const bytes = reinterpret_cast<char const*>(&key);  ///< per-byte access
-    [[maybe_unused]] auto const blocks =
-      reinterpret_cast<std::uint32_t const*>(&key);  ///< 4-byte word access
+    return compute_hash(reinterpret_cast<std::byte const*>(&key),
+                        cuco::experimental::extent<std::size_t, sizeof(Key)>{});
+  }
 
+  /**
+   * @brief Returns a hash value for its argument, as a value of type `result_type`.
+   *
+   * @tparam Extent The extent type
+   *
+   * @param bytes The input argument to hash
+   * @param size The extent of the data in bytes
+   * @return The resulting hash value
+   */
+  template <typename Extent>
+  constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
+                                                         Extent size) const noexcept
+  {
     std::size_t offset = 0;
     std::uint32_t h32;
 
     // data can be processed in 16-byte chunks
-    if constexpr (nbytes >= 16) {
-      constexpr auto limit = nbytes - 16;
-      std::uint32_t v1     = seed_ + prime1 + prime2;
-      std::uint32_t v2     = seed_ + prime2;
-      std::uint32_t v3     = seed_;
-      std::uint32_t v4     = seed_ - prime1;
+    if (size >= 16) {
+      auto const limit = size - 16;
+      std::uint32_t v1 = seed_ + prime1 + prime2;
+      std::uint32_t v2 = seed_ + prime2;
+      std::uint32_t v3 = seed_;
+      std::uint32_t v4 = seed_ - prime1;
 
       do {
         // pipeline 4*4byte computations
         auto const pipeline_offset = offset / 4;
-        v1 += blocks[pipeline_offset] * prime2;
+        v1 += load_chunk<std::uint32_t>(bytes, pipeline_offset + 0) * prime2;
         v1 = rotl(v1, 13);
         v1 *= prime1;
-        v2 += blocks[pipeline_offset + 1] * prime2;
+        v2 += load_chunk<std::uint32_t>(bytes, pipeline_offset + 1) * prime2;
         v2 = rotl(v2, 13);
         v2 *= prime1;
-        v3 += blocks[pipeline_offset + 2] * prime2;
+        v3 += load_chunk<std::uint32_t>(bytes, pipeline_offset + 2) * prime2;
         v3 = rotl(v3, 13);
         v3 *= prime1;
-        v4 += blocks[pipeline_offset + 3] * prime2;
+        v4 += load_chunk<std::uint32_t>(bytes, pipeline_offset + 3) * prime2;
         v4 = rotl(v4, 13);
         v4 *= prime1;
         offset += 16;
@@ -125,20 +140,20 @@ struct XXHash_32 {
       h32 = seed_ + prime5;
     }
 
-    h32 += nbytes;
+    h32 += size;
 
     // remaining data can be processed in 4-byte chunks
-    if constexpr ((nbytes % 16) >= 4) {
-      for (; offset <= nbytes - 4; offset += 4) {
-        h32 += blocks[offset / 4] * prime3;
+    if ((size % 16) >= 4) {
+      for (; offset <= size - 4; offset += 4) {
+        h32 += load_chunk<std::uint32_t>(bytes, offset / 4) * prime3;
         h32 = rotl(h32, 17) * prime4;
       }
     }
 
     // the following loop is only needed if the size of the key is not a multiple of the block size
-    if constexpr (nbytes % 4) {
-      while (offset < nbytes) {
-        h32 += (bytes[offset] & 255) * prime5;
+    if (size % 4) {
+      while (offset < size) {
+        h32 += (std::to_integer<std::uint32_t>(bytes[offset]) & 255) * prime5;
         h32 = rotl(h32, 11) * prime1;
         ++offset;
       }
@@ -232,42 +247,51 @@ struct XXHash_64 {
    * @brief Returns a hash value for its argument, as a value of type `result_type`.
    *
    * @param key The input argument to hash
-   * @return A resulting hash value for `key`
+   * @return The resulting hash value for `key`
    */
   constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
   {
-    // TODO do we need to add checks/hints for alignment?
-    constexpr auto nbytes             = sizeof(Key);
-    [[maybe_unused]] auto const bytes = reinterpret_cast<char const*>(&key);  ///< per-byte access
-    [[maybe_unused]] auto const blocks4 =
-      reinterpret_cast<std::uint32_t const*>(&key);  ///< 4-byte word access
-    [[maybe_unused]] auto const blocks8 =
-      reinterpret_cast<std::uint64_t const*>(&key);  ///< 8-byte word access
+    return compute_hash(reinterpret_cast<std::byte const*>(&key),
+                        cuco::experimental::extent<std::size_t, sizeof(Key)>{});
+  }
 
+  /**
+   * @brief Returns a hash value for its argument, as a value of type `result_type`.
+   *
+   * @tparam Extent The extent type
+   *
+   * @param bytes The input argument to hash
+   * @param size The extent of the data in bytes
+   * @return The resulting hash value
+   */
+  template <typename Extent>
+  constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
+                                                         Extent size) const noexcept
+  {
     std::size_t offset = 0;
     std::uint64_t h64;
 
     // data can be processed in 32-byte chunks
-    if constexpr (nbytes >= 32) {
-      constexpr auto limit = nbytes - 32;
-      std::uint64_t v1     = seed_ + prime1 + prime2;
-      std::uint64_t v2     = seed_ + prime2;
-      std::uint64_t v3     = seed_;
-      std::uint64_t v4     = seed_ - prime1;
+    if (size >= 32) {
+      auto const limit = size - 32;
+      std::uint64_t v1 = seed_ + prime1 + prime2;
+      std::uint64_t v2 = seed_ + prime2;
+      std::uint64_t v3 = seed_;
+      std::uint64_t v4 = seed_ - prime1;
 
       do {
         // pipeline 4*8byte computations
         auto const pipeline_offset = offset / 8;
-        v1 += blocks8[pipeline_offset] * prime2;
+        v1 += load_chunk<std::uint64_t>(bytes, pipeline_offset + 0) * prime2;
         v1 = rotl(v1, 31);
         v1 *= prime1;
-        v2 += blocks8[pipeline_offset + 1] * prime2;
+        v2 += load_chunk<std::uint64_t>(bytes, pipeline_offset + 1) * prime2;
         v2 = rotl(v2, 31);
         v2 *= prime1;
-        v3 += blocks8[pipeline_offset + 2] * prime2;
+        v3 += load_chunk<std::uint64_t>(bytes, pipeline_offset + 2) * prime2;
         v3 = rotl(v3, 31);
         v3 *= prime1;
-        v4 += blocks8[pipeline_offset + 3] * prime2;
+        v4 += load_chunk<std::uint64_t>(bytes, pipeline_offset + 3) * prime2;
         v4 = rotl(v4, 31);
         v4 *= prime1;
         offset += 32;
@@ -302,12 +326,12 @@ struct XXHash_64 {
       h64 = seed_ + prime5;
     }
 
-    h64 += nbytes;
+    h64 += size;
 
     // remaining data can be processed in 8-byte chunks
-    if constexpr ((nbytes % 32) >= 8) {
-      for (; offset <= nbytes - 8; offset += 8) {
-        std::uint64_t k1 = blocks8[offset / 8] * prime2;
+    if ((size % 32) >= 8) {
+      for (; offset <= size - 8; offset += 8) {
+        std::uint64_t k1 = load_chunk<std::uint64_t>(bytes, offset / 8) * prime2;
         k1               = rotl(k1, 31) * prime1;
         h64 ^= k1;
         h64 = rotl(h64, 27) * prime1 + prime4;
@@ -315,18 +339,18 @@ struct XXHash_64 {
     }
 
     // remaining data can be processed in 4-byte chunks
-    if constexpr (((nbytes % 32) % 8) >= 4) {
-      for (; offset <= nbytes - 4; offset += 4) {
-        h64 ^= (blocks4[offset / 4] & 0xffffffffull) * prime1;
+    if (((size % 32) % 8) >= 4) {
+      for (; offset <= size - 4; offset += 4) {
+        h64 ^= (load_chunk<std::uint32_t>(bytes, offset / 4) & 0xffffffffull) * prime1;
         h64 = rotl(h64, 23) * prime2 + prime3;
       }
     }
 
     // the following loop is only needed if the size of the key is not a multiple of a previous
     // block size
-    if constexpr (nbytes % 4) {
-      while (offset < nbytes) {
-        h64 ^= (bytes[offset] & 0xff) * prime5;
+    if (size % 4) {
+      while (offset < size) {
+        h64 ^= (std::to_integer<std::uint32_t>(bytes[offset]) & 0xff) * prime5;
         h64 = rotl(h64, 11) * prime1;
         ++offset;
       }
diff --git a/tests/utility/hash_test.cu b/tests/utility/hash_test.cu
index 5e518669e..3e8880860 100644
--- a/tests/utility/hash_test.cu
+++ b/tests/utility/hash_test.cu
@@ -21,8 +21,11 @@
 
 #include <thrust/device_vector.h>
 
+#include <catch2/catch_template_test_macros.hpp>
 #include <catch2/catch_test_macros.hpp>
 
+#include <cstddef>
+
 template <int32_t Words>
 struct large_key {
   constexpr __host__ __device__ large_key(int32_t value) noexcept
@@ -170,4 +173,26 @@ TEST_CASE("Test cuco::xxhash_32", "")
 
     CHECK(cuco::test::all_of(result.begin(), result.end(), [] __device__(bool v) { return v; }));
   }
+}
+
+TEMPLATE_TEST_CASE_SIG("Static vs. dynamic key hash test",
+                       "",
+                       ((typename Hash), Hash),
+                       (cuco::murmurhash3_32<char>),
+                       (cuco::murmurhash3_32<int32_t>),
+                       (cuco::xxhash_32<char>),
+                       (cuco::xxhash_32<int32_t>),
+                       (cuco::xxhash_64<char>),
+                       (cuco::xxhash_64<int32_t>))
+{
+  using key_type = typename Hash::argument_type;
+
+  Hash hash;
+  key_type key = 42;
+
+  SECTION("Identical keys with static and dynamic key size should have the same hash value.")
+  {
+    CHECK(hash(key) ==
+          hash.compute_hash(reinterpret_cast<std::byte const*>(&key), sizeof(key_type)));
+  }
 }
\ No newline at end of file