From cb8dffc54d1acd60a97e215d6a753da16cbc1ade Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:44:40 +0100 Subject: [PATCH 01/18] Import the UMASH hashing library We use it for vectorized hash grouping. For now, add the library separately to figure out the required CMake. --- scripts/clang_format_all.sh | 2 +- tsl/CMakeLists.txt | 60 ++ tsl/src/import/CMakeLists.txt | 17 +- tsl/src/import/umash.c | 1568 +++++++++++++++++++++++++++++++++ tsl/src/import/umash.h | 329 +++++++ 5 files changed, 1973 insertions(+), 3 deletions(-) create mode 100644 tsl/src/import/umash.c create mode 100644 tsl/src/import/umash.h diff --git a/scripts/clang_format_all.sh b/scripts/clang_format_all.sh index ea1bb03e8b8..5828d39b855 100755 --- a/scripts/clang_format_all.sh +++ b/scripts/clang_format_all.sh @@ -5,5 +5,5 @@ SCRIPT_DIR=$(cd "$(dirname $0)" || exit; pwd) BASE_DIR=$(dirname $SCRIPT_DIR) find ${BASE_DIR} \( -path "${BASE_DIR}/src/*" -or -path "${BASE_DIR}/test/*" -or -path "${BASE_DIR}/tsl/*" \) \ - -and -not \( -path "*/.*" -or -path "*CMake*" \) \ + -and -not \( -path "*/.*" -or -path "*CMake*" -or -path "${BASE_DIR}/tsl/src/import/*" \) \ -and \( -name '*.c' -or -name '*.h' \) -print0 | xargs -0 ${SCRIPT_DIR}/clang_format_wrapper.sh -style=file -i diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 9058a14625a..9734e5df10b 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -4,5 +4,65 @@ if(COMPRESSION_FUZZING) add_compile_definitions(TS_COMPRESSION_FUZZING=1) endif() +# We use the UMASH library for hashing in vectorized grouping. Detect if we can +# compile it on this platform. It is not tested on Windows and leads to a weird +# CI freeze, so don't even try for now. +if(NOT WIN32) + # Check whether we can enable the pclmul instruction required for the UMASH + # hashing on amd64. Shouldn't be done if the user has manually specified the + # target architecture, no idea how to detect this, but at least we shouldn't + # do this when cross-compiling. + if(NOT CMAKE_CROSSCOMPILING) + check_c_compiler_flag(-mpclmul CC_PCLMUL) + if(CC_PCLMUL) + add_compile_options(-mpclmul) + # The "C source compiles" check below doesn't use the global compilation + # flags, so we have to modify its flags separately. + set(CMAKE_REQUIRED_FLAGS -mpclmul) + endif() + endif() + + set(CMAKE_REQUIRED_FLAGS + "${CMAKE_REQUIRED_FLAGS} -Werror=implicit-function-declaration") + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + check_c_source_compiles( + " +#if defined(__PCLMUL__) +#include +#include +/* + * For some reason, this doesn't compile on our i386 CI, but I also can't detect + * it using the standard condition of defined(__x86_64__) && !defined(__ILP32__), + * as described at https://wiki.debian.org/X32Port . + */ +static void test() { (void) _mm_cvtsi64_si128((uint64_t) 0); } +#elif defined(__ARM_FEATURE_CRYPTO) +/* OK */ +#else +#error Unsupported platform for UMASH +#endif +" + UMASH_SUPPORTED) + unset(CMAKE_REQUIRED_FLAGS) + unset(CMAKE_TRY_COMPILE_TARGET_TYPE) +else() + set(UMASH_SUPPORTED OFF) +endif() + +option(USE_UMASH + "Use the UMASH hash for string and multi-column vectorized grouping" + ${UMASH_SUPPORTED}) + +if(USE_UMASH) + if(NOT UMASH_SUPPORTED) + message( + FATAL_ERROR + "UMASH use is requested, but it is not supported in the current configuration" + ) + endif() + add_compile_definitions(TS_USE_UMASH) +endif() + +# Add the subdirectories add_subdirectory(test) add_subdirectory(src) diff --git a/tsl/src/import/CMakeLists.txt b/tsl/src/import/CMakeLists.txt index ccac900c3ee..b938decf792 100644 --- a/tsl/src/import/CMakeLists.txt +++ b/tsl/src/import/CMakeLists.txt @@ -1,2 +1,15 @@ -set(SOURCES "") -target_sources(${PROJECT_NAME} PRIVATE ${SOURCES}) +set(SOURCES) + +if(USE_UMASH) + list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) +endif() + +if(SOURCES) + # Disable clang-tidy for imported code + add_library(target_no_static_code_analysis OBJECT ${SOURCES}) + set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY + "") + + target_link_libraries(${TSL_LIBRARY_NAME} + $) +endif() diff --git a/tsl/src/import/umash.c b/tsl/src/import/umash.c new file mode 100644 index 00000000000..eebcf8e2ced --- /dev/null +++ b/tsl/src/import/umash.c @@ -0,0 +1,1568 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * This file contains source code that was copied and/or modified from + * the UMASH hash implementation at https://github.com/backtrace-labs/umash. + * + * This is a copy of umash.c, git commit sha + * fc4c5b6ca1f06c308e96c43aa080bd766238e092. + */ + +#include "umash.h" + +/* + * UMASH is distributed under the MIT license. + * + * SPDX-License-Identifier: MIT + * + * Copyright 2020-2022 Backtrace I/O, Inc. + * Copyright 2022 Paul Khuong + * Copyright 2022 Dougall Johnson + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(UMASH_TEST_ONLY) && !defined(NDEBUG) +#define NDEBUG +#endif + +/** + * -DUMASH_LONG_INPUTS=0 to disable the routine specialised for long + * inputs, and -DUMASH_LONG_INPUTS=1 to enable it. If the variable + * isn't defined, we try to probe for `umash_long.inc`: that's where + * the long input routines are defined. + */ +#ifndef UMASH_LONG_INPUTS +#ifdef __has_include +#if __has_include("umash_long.inc") +#define UMASH_LONG_INPUTS 1 +#endif /* __has_include() */ +#endif /* __has_include */ + +#ifndef UMASH_LONG_INPUTS +#define UMASH_LONG_INPUTS 0 +#endif /* !UMASH_LONG_INPUTS */ +#endif /* !UMASH_LONG_INPUTS */ + +/* + * Default to dynamically dispatching implementations on x86-64 + * (there's nothing to dispatch on aarch64). + */ +#ifndef UMASH_DYNAMIC_DISPATCH +#ifdef __x86_64__ +#define UMASH_DYNAMIC_DISPATCH 1 +#else +#define UMASH_DYNAMIC_DISPATCH 0 +#endif +#endif + +/* + * Enable inline assembly by default when building with recent GCC or + * compatible compilers. It should always be safe to disable this + * option, although there may be a performance cost. + */ +#ifndef UMASH_INLINE_ASM + +#if defined(__clang__) +/* + * We need clang 8+ for output flags, and 10+ for relaxed vector + * constraints. + */ +#if __clang_major__ >= 10 +#define UMASH_INLINE_ASM 1 +#else +#define UMASH_INLINE_ASM 0 +#endif /* __clang_major__ */ + +#elif defined(__GNUC__) +#if __GNUC__ >= 6 +#define UMASH_INLINE_ASM 1 +#else +#define UMASH_INLINE_ASM 0 +#endif /* __GNUC__ */ + +#else +#define UMASH_INLINE_ASM 0 +#endif + +#endif + +#include +#include + +#ifdef __PCLMUL__ +/* If we have access to x86 PCLMUL (and some basic SSE). */ +#include + +/* We only use 128-bit vector, as pairs of 64-bit integers. */ +typedef __m128i v128; + +#define V128_ZERO { 0 }; + +static inline v128 +v128_create(uint64_t lo, uint64_t hi) +{ + return _mm_set_epi64x(hi, lo); +} + +/* Shift each 64-bit lane left by one bit. */ +static inline v128 +v128_shift(v128 x) +{ + return _mm_add_epi64(x, x); +} + +/* Computes the 128-bit carryless product of x and y. */ +static inline v128 +v128_clmul(uint64_t x, uint64_t y) +{ + return _mm_clmulepi64_si128(_mm_cvtsi64_si128(x), _mm_cvtsi64_si128(y), 0); +} + +/* Computes the 128-bit carryless product of the high and low halves of x. */ +static inline v128 +v128_clmul_cross(v128 x) +{ + return _mm_clmulepi64_si128(x, x, 1); +} + +#elif defined(__ARM_FEATURE_CRYPTO) + +#include + +typedef uint64x2_t v128; + +#define V128_ZERO { 0 }; + +static inline v128 +v128_create(uint64_t lo, uint64_t hi) +{ + return vcombine_u64(vcreate_u64(lo), vcreate_u64(hi)); +} + +static inline v128 +v128_shift(v128 x) +{ + return vshlq_n_u64(x, 1); +} + +static inline v128 +v128_clmul(uint64_t x, uint64_t y) +{ + return vreinterpretq_u64_p128(vmull_p64(x, y)); +} + +static inline v128 +v128_clmul_cross(v128 x) +{ + v128 swapped = vextq_u64(x, x, 1); +#if UMASH_INLINE_ASM + /* Keep the result out of GPRs. */ + __asm__("" : "+w"(swapped)); +#endif + + return v128_clmul(vgetq_lane_u64(x, 0), vgetq_lane_u64(swapped, 0)); +} + +#else + +#error \ + "Unsupported platform: umash requires CLMUL (-mpclmul) on x86-64, or crypto (-march=...+crypto) extensions on aarch64." +#endif + +/* + * #define UMASH_STAP_PROBE=1 to insert probe points in public UMASH + * functions. + * + * This functionality depends on Systemtap's SDT header file. + */ +#if defined(UMASH_STAP_PROBE) && UMASH_STAP_PROBE +#include +#else +#define DTRACE_PROBE1(lib, name, a0) +#define DTRACE_PROBE2(lib, name, a0, a1) +#define DTRACE_PROBE3(lib, name, a0, a1, a2) +#define DTRACE_PROBE4(lib, name, a0, a1, a2, a3) +#endif + +/* + * #define UMASH_SECTION="special_section" to emit all UMASH symbols + * in the `special_section` ELF section. + */ +#if defined(UMASH_SECTION) && defined(__GNUC__) +#define FN __attribute__((__section__(UMASH_SECTION))) +#else +#define FN +#endif + +/* + * Defining UMASH_TEST_ONLY switches to a debug build with internal + * symbols exposed. + */ +#ifdef UMASH_TEST_ONLY +#define TEST_DEF FN +#include "t/umash_test_only.h" +#else +#define TEST_DEF static FN +#endif + +#ifdef __GNUC__ +#define LIKELY(X) __builtin_expect(!!(X), 1) +#define UNLIKELY(X) __builtin_expect(!!(X), 0) +#define HOT __attribute__((__hot__)) +#define COLD __attribute__((__cold__)) +#else +#define LIKELY(X) X +#define UNLIKELY(X) X +#define HOT +#define COLD +#endif + +#define ARRAY_SIZE(ARR) (sizeof(ARR) / sizeof(ARR[0])) + +#define BLOCK_SIZE (sizeof(uint64_t) * UMASH_OH_PARAM_COUNT) + +/* + * We derive independent short hashes by offsetting the constant array + * by four u64s. In theory, any positive even number works, but this + * is the constant we used in an earlier incarnation, and it works. + */ +#define OH_SHORT_HASH_SHIFT 4 + +/* Incremental UMASH consumes 16 bytes at a time. */ +#define INCREMENTAL_GRANULARITY 16 + +/** + * Modular arithmetic utilities. + * + * The code below uses GCC extensions. It should be possible to add + * support for other compilers. + */ + +#if !defined(__x86_64__) || !UMASH_INLINE_ASM +static inline void +mul128(uint64_t x, uint64_t y, uint64_t *hi, uint64_t *lo) +{ + __uint128_t product = x; + + product *= y; + *hi = product >> 64; + *lo = product; + return; +} +#else +static inline void +mul128(uint64_t x, uint64_t y, uint64_t *hi, uint64_t *lo) +{ + uint64_t mulhi, mullo; + + __asm__("mul %3" : "=a"(mullo), "=d"(mulhi) : "%a"(x), "r"(y) : "cc"); + *hi = mulhi; + *lo = mullo; + return; +} +#endif + +TEST_DEF inline uint64_t +add_mod_fast(uint64_t x, uint64_t y) +{ + unsigned long long sum; + + /* If `sum` overflows, `sum + 8` does not. */ + return (__builtin_uaddll_overflow(x, y, &sum) ? sum + 8 : sum); +} + +static FN COLD uint64_t +add_mod_slow_slow_path(uint64_t sum, uint64_t fixup) +{ + /* Reduce sum, mod 2**64 - 8. */ + sum = (sum >= (uint64_t)-8) ? sum + 8 : sum; + /* sum < 2**64 - 8, so this doesn't overflow. */ + sum += fixup; + /* Reduce again. */ + sum = (sum >= (uint64_t)-8) ? sum + 8 : sum; + return sum; +} + +TEST_DEF inline uint64_t +add_mod_slow(uint64_t x, uint64_t y) +{ + unsigned long long sum; + uint64_t fixup = 0; + + /* x + y \equiv sum + fixup */ + if (__builtin_uaddll_overflow(x, y, &sum)) + fixup = 8; + + /* + * We must ensure `sum + fixup < 2**64 - 8`. + * + * We want a conditional branch here, but not in the + * overflowing add: overflows happen roughly half the time on + * pseudorandom inputs, but `sum < 2**64 - 16` is almost + * always true, for pseudorandom `sum`. + */ + if (LIKELY(sum < (uint64_t)-16)) + return sum + fixup; + +#ifdef UMASH_INLINE_ASM + /* + * Some compilers like to compile the likely branch above with + * conditional moves or predication. Insert a compiler barrier + * in the slow path here to force a branch. + */ + __asm__("" : "+r"(sum)); +#endif + return add_mod_slow_slow_path(sum, fixup); +} + +TEST_DEF inline uint64_t +mul_mod_fast(uint64_t m, uint64_t x) +{ + uint64_t hi, lo; + + mul128(m, x, &hi, &lo); + return add_mod_fast(lo, 8 * hi); +} + +TEST_DEF inline uint64_t +horner_double_update(uint64_t acc, uint64_t m0, uint64_t m1, uint64_t x, uint64_t y) +{ + + acc = add_mod_fast(acc, x); + return add_mod_slow(mul_mod_fast(m0, acc), mul_mod_fast(m1, y)); +} + +/** + * Salsa20 stream generator, used to derive struct umash_param. + * + * Slightly prettified version of D. J. Bernstein's public domain NaCL + * (version 20110121), without paying any attention to constant time + * execution or any other side-channel. + */ +static inline uint32_t +rotate(uint32_t u, int c) +{ + + return (u << c) | (u >> (32 - c)); +} + +static inline uint32_t +load_littleendian(const void *buf) +{ + uint32_t ret = 0; + uint8_t x[4]; + + memcpy(x, buf, sizeof(x)); + for (size_t i = 0; i < 4; i++) + ret |= (uint32_t)x[i] << (8 * i); + + return ret; +} + +static inline void +store_littleendian(void *dst, uint32_t u) +{ + + for (size_t i = 0; i < 4; i++) { + uint8_t lo = u; + + memcpy(dst, &lo, 1); + u >>= 8; + dst = (char *)dst + 1; + } + + return; +} + +static FN void +core_salsa20(char *out, const uint8_t in[static 16], const uint8_t key[static 32], + const uint8_t constant[16]) +{ + enum { ROUNDS = 20 }; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + + j0 = x0 = load_littleendian(constant + 0); + j1 = x1 = load_littleendian(key + 0); + j2 = x2 = load_littleendian(key + 4); + j3 = x3 = load_littleendian(key + 8); + j4 = x4 = load_littleendian(key + 12); + j5 = x5 = load_littleendian(constant + 4); + j6 = x6 = load_littleendian(in + 0); + j7 = x7 = load_littleendian(in + 4); + j8 = x8 = load_littleendian(in + 8); + j9 = x9 = load_littleendian(in + 12); + j10 = x10 = load_littleendian(constant + 8); + j11 = x11 = load_littleendian(key + 16); + j12 = x12 = load_littleendian(key + 20); + j13 = x13 = load_littleendian(key + 24); + j14 = x14 = load_littleendian(key + 28); + j15 = x15 = load_littleendian(constant + 12); + + for (size_t i = 0; i < ROUNDS; i += 2) { + x4 ^= rotate(x0 + x12, 7); + x8 ^= rotate(x4 + x0, 9); + x12 ^= rotate(x8 + x4, 13); + x0 ^= rotate(x12 + x8, 18); + x9 ^= rotate(x5 + x1, 7); + x13 ^= rotate(x9 + x5, 9); + x1 ^= rotate(x13 + x9, 13); + x5 ^= rotate(x1 + x13, 18); + x14 ^= rotate(x10 + x6, 7); + x2 ^= rotate(x14 + x10, 9); + x6 ^= rotate(x2 + x14, 13); + x10 ^= rotate(x6 + x2, 18); + x3 ^= rotate(x15 + x11, 7); + x7 ^= rotate(x3 + x15, 9); + x11 ^= rotate(x7 + x3, 13); + x15 ^= rotate(x11 + x7, 18); + x1 ^= rotate(x0 + x3, 7); + x2 ^= rotate(x1 + x0, 9); + x3 ^= rotate(x2 + x1, 13); + x0 ^= rotate(x3 + x2, 18); + x6 ^= rotate(x5 + x4, 7); + x7 ^= rotate(x6 + x5, 9); + x4 ^= rotate(x7 + x6, 13); + x5 ^= rotate(x4 + x7, 18); + x11 ^= rotate(x10 + x9, 7); + x8 ^= rotate(x11 + x10, 9); + x9 ^= rotate(x8 + x11, 13); + x10 ^= rotate(x9 + x8, 18); + x12 ^= rotate(x15 + x14, 7); + x13 ^= rotate(x12 + x15, 9); + x14 ^= rotate(x13 + x12, 13); + x15 ^= rotate(x14 + x13, 18); + } + + x0 += j0; + x1 += j1; + x2 += j2; + x3 += j3; + x4 += j4; + x5 += j5; + x6 += j6; + x7 += j7; + x8 += j8; + x9 += j9; + x10 += j10; + x11 += j11; + x12 += j12; + x13 += j13; + x14 += j14; + x15 += j15; + + store_littleendian(out + 0, x0); + store_littleendian(out + 4, x1); + store_littleendian(out + 8, x2); + store_littleendian(out + 12, x3); + store_littleendian(out + 16, x4); + store_littleendian(out + 20, x5); + store_littleendian(out + 24, x6); + store_littleendian(out + 28, x7); + store_littleendian(out + 32, x8); + store_littleendian(out + 36, x9); + store_littleendian(out + 40, x10); + store_littleendian(out + 44, x11); + store_littleendian(out + 48, x12); + store_littleendian(out + 52, x13); + store_littleendian(out + 56, x14); + store_littleendian(out + 60, x15); + return; +} + +TEST_DEF void +salsa20_stream( + void *dst, size_t len, const uint8_t nonce[static 8], const uint8_t key[static 32]) +{ + static const uint8_t sigma[16] = "expand 32-byte k"; + uint8_t in[16]; + + if (len == 0) + return; + + memcpy(in, nonce, 8); + memset(in + 8, 0, 8); + + while (len >= 64) { + unsigned int u; + + core_salsa20(dst, in, key, sigma); + u = 1; + for (size_t i = 8; i < 16; i++) { + u += in[i]; + in[i] = u; + u >>= 8; + } + + dst = (char *)dst + 64; + len -= 64; + } + + if (len > 0) { + char block[64]; + + core_salsa20(block, in, key, sigma); + memcpy(dst, block, len); + } + + return; +} + +#if defined(UMASH_TEST_ONLY) || UMASH_LONG_INPUTS +#include "umash_long.inc" +#endif + +/** + * OH block compression. + */ +TEST_DEF struct umash_oh +oh_varblock(const uint64_t *params, uint64_t tag, const void *block, size_t n_bytes) +{ + struct umash_oh ret; + v128 acc = V128_ZERO; + + /* The final block processes `remaining > 0` bytes. */ + size_t remaining = 1 + ((n_bytes - 1) % sizeof(v128)); + size_t end_full_pairs = (n_bytes - remaining) / sizeof(uint64_t); + const void *last_ptr = (const char *)block + n_bytes - sizeof(v128); + size_t i; + + for (i = 0; i < end_full_pairs; i += 2) { + v128 x, k; + + memcpy(&x, block, sizeof(x)); + block = (const char *)block + sizeof(x); + + memcpy(&k, ¶ms[i], sizeof(k)); + x ^= k; + acc ^= v128_clmul_cross(x); + } + + memcpy(&ret, &acc, sizeof(ret)); + + /* Compress the final (potentially partial) pair. */ + { + uint64_t x, y, enh_hi, enh_lo; + + memcpy(&x, last_ptr, sizeof(x)); + last_ptr = (const char *)last_ptr + sizeof(x); + memcpy(&y, last_ptr, sizeof(y)); + + x += params[i]; + y += params[i + 1]; + mul128(x, y, &enh_hi, &enh_lo); + enh_hi += tag; + + ret.bits[0] ^= enh_lo; + ret.bits[1] ^= enh_hi ^ enh_lo; + } + + return ret; +} + +TEST_DEF void +oh_varblock_fprint(struct umash_oh dst[static restrict 2], + const uint64_t *restrict params, uint64_t tag, const void *restrict block, + size_t n_bytes) +{ + v128 acc = V128_ZERO; /* Base umash */ + v128 acc_shifted = V128_ZERO; /* Accumulates shifted values */ + v128 lrc; + /* The final block processes `remaining > 0` bytes. */ + size_t remaining = 1 + ((n_bytes - 1) % sizeof(v128)); + size_t end_full_pairs = (n_bytes - remaining) / sizeof(uint64_t); + const void *last_ptr = (const char *)block + n_bytes - sizeof(v128); + size_t i; + + lrc = v128_create(params[UMASH_OH_PARAM_COUNT], params[UMASH_OH_PARAM_COUNT + 1]); + for (i = 0; i < end_full_pairs; i += 2) { + v128 x, k; + + memcpy(&x, block, sizeof(x)); + block = (const char *)block + sizeof(x); + + memcpy(&k, ¶ms[i], sizeof(k)); + + x ^= k; + lrc ^= x; + + x = v128_clmul_cross(x); + + acc ^= x; + if (i + 2 >= end_full_pairs) + break; + + acc_shifted ^= x; + acc_shifted = v128_shift(acc_shifted); + } + + /* + * Update the LRC for the last chunk before treating it + * specially. + */ + { + v128 x, k; + + memcpy(&x, last_ptr, sizeof(x)); + memcpy(&k, ¶ms[end_full_pairs], sizeof(k)); + + lrc ^= x ^ k; + } + + acc_shifted ^= acc; + acc_shifted = v128_shift(acc_shifted); + + acc_shifted ^= v128_clmul_cross(lrc); + + memcpy(&dst[0], &acc, sizeof(dst[0])); + memcpy(&dst[1], &acc_shifted, sizeof(dst[1])); + + { + uint64_t x, y, kx, ky, enh_hi, enh_lo; + + memcpy(&x, last_ptr, sizeof(x)); + last_ptr = (const char *)last_ptr + sizeof(x); + memcpy(&y, last_ptr, sizeof(y)); + + kx = x + params[end_full_pairs]; + ky = y + params[end_full_pairs + 1]; + + mul128(kx, ky, &enh_hi, &enh_lo); + enh_hi += tag; + + enh_hi ^= enh_lo; + dst[0].bits[0] ^= enh_lo; + dst[0].bits[1] ^= enh_hi; + + dst[1].bits[0] ^= enh_lo; + dst[1].bits[1] ^= enh_hi; + } + + return; +} + +/** + * Returns `then` if `cond` is true, `otherwise` if false. + * + * This noise helps compiler emit conditional moves. + */ +static inline const void * +select_ptr(bool cond, const void *then, const void *otherwise) +{ + const char *ret; + +#if UMASH_INLINE_ASM + /* Force strict evaluation of both arguments. */ + __asm__("" ::"r"(then), "r"(otherwise)); +#endif + + ret = (cond) ? then : otherwise; + +#if UMASH_INLINE_ASM + /* And also force the result to be materialised with a blackhole. */ + __asm__("" : "+r"(ret)); +#endif + return ret; +} + +/** + * Short UMASH (<= 8 bytes). + */ +TEST_DEF inline uint64_t +vec_to_u64(const void *data, size_t n_bytes) +{ + const char zeros[2] = { 0 }; + uint32_t hi, lo; + + /* + * If there are at least 4 bytes to read, read the first 4 in + * `lo`, and the last 4 in `hi`. This covers the whole range, + * since `n_bytes` is at most 8. + */ + if (LIKELY(n_bytes >= sizeof(lo))) { + memcpy(&lo, data, sizeof(lo)); + memcpy(&hi, (const char *)data + n_bytes - sizeof(hi), sizeof(hi)); + } else { + /* 0 <= n_bytes < 4. Decode the size in binary. */ + uint16_t word; + uint8_t byte; + + /* + * If the size is odd, load the first byte in `byte`; + * otherwise, load in a zero. + */ + memcpy(&byte, select_ptr(n_bytes & 1, data, zeros), 1); + lo = byte; + + /* + * If the size is 2 or 3, load the last two bytes in `word`; + * otherwise, load in a zero. + */ + memcpy(&word, + select_ptr(n_bytes & 2, (const char *)data + n_bytes - 2, zeros), 2); + /* + * We have now read `bytes[0 ... n_bytes - 1]` + * exactly once without overwriting any data. + */ + hi = word; + } + + /* + * Mix `hi` with the `lo` bits: SplitMix64 seems to have + * trouble with the top 4 bits. + */ + return ((uint64_t)hi << 32) | (lo + hi); +} + +TEST_DEF uint64_t +umash_short(const uint64_t *params, uint64_t seed, const void *data, size_t n_bytes) +{ + uint64_t h; + + seed += params[n_bytes]; + h = vec_to_u64(data, n_bytes); + h ^= h >> 30; + h *= 0xbf58476d1ce4e5b9ULL; + h = (h ^ seed) ^ (h >> 27); + h *= 0x94d049bb133111ebULL; + h ^= h >> 31; + return h; +} + +static FN struct umash_fp +umash_fp_short(const uint64_t *params, uint64_t seed, const void *data, size_t n_bytes) +{ + struct umash_fp ret; + uint64_t h; + + ret.hash[0] = seed + params[n_bytes]; + ret.hash[1] = seed + params[n_bytes + OH_SHORT_HASH_SHIFT]; + + h = vec_to_u64(data, n_bytes); + h ^= h >> 30; + h *= 0xbf58476d1ce4e5b9ULL; + h ^= h >> 27; + +#define TAIL(i) \ + do { \ + ret.hash[i] ^= h; \ + ret.hash[i] *= 0x94d049bb133111ebULL; \ + ret.hash[i] ^= ret.hash[i] >> 31; \ + } while (0) + + TAIL(0); + TAIL(1); +#undef TAIL + + return ret; +} + +/** + * Rotates `x` left by `n` bits. + */ +static inline uint64_t +rotl64(uint64_t x, int n) +{ + + return (x << n) | (x >> (64 - n)); +} + +TEST_DEF inline uint64_t +finalize(uint64_t x) +{ + + return (x ^ rotl64(x, 8)) ^ rotl64(x, 33); +} + +TEST_DEF uint64_t +umash_medium(const uint64_t multipliers[static 2], const uint64_t *oh, uint64_t seed, + const void *data, size_t n_bytes) +{ + uint64_t enh_hi, enh_lo; + + { + uint64_t x, y; + + memcpy(&x, data, sizeof(x)); + memcpy(&y, (const char *)data + n_bytes - sizeof(y), sizeof(y)); + x += oh[0]; + y += oh[1]; + + mul128(x, y, &enh_hi, &enh_lo); + enh_hi += seed ^ n_bytes; + } + + enh_hi ^= enh_lo; + return finalize(horner_double_update( + /*acc=*/0, multipliers[0], multipliers[1], enh_lo, enh_hi)); +} + +static FN struct umash_fp +umash_fp_medium(const uint64_t multipliers[static 2][2], const uint64_t *oh, + uint64_t seed, const void *data, size_t n_bytes) +{ + struct umash_fp ret; + const uint64_t offset = seed ^ n_bytes; + uint64_t enh_hi, enh_lo; + union { + v128 v; + uint64_t u64[2]; + } mixed_lrc; + uint64_t lrc[2] = { oh[UMASH_OH_PARAM_COUNT], oh[UMASH_OH_PARAM_COUNT + 1] }; + uint64_t x, y; + uint64_t a, b; + + /* Expand the 9-16 bytes to 16. */ + memcpy(&x, data, sizeof(x)); + memcpy(&y, (const char *)data + n_bytes - sizeof(y), sizeof(y)); + + a = oh[0]; + b = oh[1]; + + lrc[0] ^= x ^ a; + lrc[1] ^= y ^ b; + mixed_lrc.v = v128_clmul(lrc[0], lrc[1]); + + a += x; + b += y; + + mul128(a, b, &enh_hi, &enh_lo); + enh_hi += offset; + enh_hi ^= enh_lo; + + ret.hash[0] = finalize(horner_double_update( + /*acc=*/0, multipliers[0][0], multipliers[0][1], enh_lo, enh_hi)); + + ret.hash[1] = finalize(horner_double_update(/*acc=*/0, multipliers[1][0], + multipliers[1][1], enh_lo ^ mixed_lrc.u64[0], enh_hi ^ mixed_lrc.u64[1])); + + return ret; +} + +TEST_DEF uint64_t +umash_long(const uint64_t multipliers[static 2], const uint64_t *oh, uint64_t seed, + const void *data, size_t n_bytes) +{ + uint64_t acc = 0; + + /* + * umash_long.inc defines this variable when the long input + * routine is enabled. + */ +#ifdef UMASH_MULTIPLE_BLOCKS_THRESHOLD + if (UNLIKELY(n_bytes >= UMASH_MULTIPLE_BLOCKS_THRESHOLD)) { + size_t n_block = n_bytes / BLOCK_SIZE; + const void *remaining; + + n_bytes %= BLOCK_SIZE; + remaining = (const char *)data + (n_block * BLOCK_SIZE); + acc = umash_multiple_blocks(acc, multipliers, oh, seed, data, n_block); + + data = remaining; + if (n_bytes == 0) + goto finalize; + + goto last_block; + } +#else + /* Avoid warnings about the unused labels. */ + if (0) { + goto last_block; + goto finalize; + } +#endif + + while (n_bytes > BLOCK_SIZE) { + struct umash_oh compressed; + + compressed = oh_varblock(oh, seed, data, BLOCK_SIZE); + data = (const char *)data + BLOCK_SIZE; + n_bytes -= BLOCK_SIZE; + + acc = horner_double_update(acc, multipliers[0], multipliers[1], + compressed.bits[0], compressed.bits[1]); + } + +last_block: + /* Do the final block. */ + { + struct umash_oh compressed; + + seed ^= (uint8_t)n_bytes; + compressed = oh_varblock(oh, seed, data, n_bytes); + acc = horner_double_update(acc, multipliers[0], multipliers[1], + compressed.bits[0], compressed.bits[1]); + } + +finalize: + return finalize(acc); +} + +TEST_DEF struct umash_fp +umash_fp_long(const uint64_t multipliers[static 2][2], const uint64_t *oh, uint64_t seed, + const void *data, size_t n_bytes) +{ + struct umash_oh compressed[2]; + struct umash_fp ret; + uint64_t acc[2] = { 0, 0 }; + +#ifdef UMASH_MULTIPLE_BLOCKS_THRESHOLD + if (UNLIKELY(n_bytes >= UMASH_MULTIPLE_BLOCKS_THRESHOLD)) { + struct umash_fp poly = { .hash = { 0, 0 } }; + size_t n_block = n_bytes / BLOCK_SIZE; + const void *remaining; + + n_bytes %= BLOCK_SIZE; + remaining = (const char *)data + (n_block * BLOCK_SIZE); + poly = umash_fprint_multiple_blocks( + poly, multipliers, oh, seed, data, n_block); + + acc[0] = poly.hash[0]; + acc[1] = poly.hash[1]; + + data = remaining; + if (n_bytes == 0) + goto finalize; + + goto last_block; + } +#else + /* Avoid warnings about the unused labels. */ + if (0) { + goto last_block; + goto finalize; + } +#endif + + while (n_bytes > BLOCK_SIZE) { + oh_varblock_fprint(compressed, oh, seed, data, BLOCK_SIZE); + +#define UPDATE(i) \ + acc[i] = horner_double_update(acc[i], multipliers[i][0], multipliers[i][1], \ + compressed[i].bits[0], compressed[i].bits[1]) + + UPDATE(0); + UPDATE(1); +#undef UPDATE + + data = (const char *)data + BLOCK_SIZE; + n_bytes -= BLOCK_SIZE; + } + +last_block: + oh_varblock_fprint(compressed, oh, seed ^ (uint8_t)n_bytes, data, n_bytes); + +#define FINAL(i) \ + do { \ + acc[i] = horner_double_update(acc[i], multipliers[i][0], \ + multipliers[i][1], compressed[i].bits[0], compressed[i].bits[1]); \ + } while (0) + + FINAL(0); + FINAL(1); +#undef FINAL + +finalize: + ret.hash[0] = finalize(acc[0]); + ret.hash[1] = finalize(acc[1]); + return ret; +} + +static FN bool +value_is_repeated(const uint64_t *values, size_t n, uint64_t needle) +{ + + for (size_t i = 0; i < n; i++) { + if (values[i] == needle) + return true; + } + + return false; +} + +FN bool +umash_params_prepare(struct umash_params *params) +{ + static const uint64_t modulo = (1UL << 61) - 1; + /* + * The polynomial parameters have two redundant fields (for + * the pre-squared multipliers). Use them as our source of + * extra entropy if needed. + */ + uint64_t buf[] = { params->poly[0][0], params->poly[1][0] }; + size_t buf_idx = 0; + +#define GET_RANDOM(DST) \ + do { \ + if (buf_idx >= ARRAY_SIZE(buf)) \ + return false; \ + \ + (DST) = buf[buf_idx++]; \ + } while (0) + + /* Check the polynomial multipliers: we don't want 0s. */ + for (size_t i = 0; i < ARRAY_SIZE(params->poly); i++) { + uint64_t f = params->poly[i][1]; + + while (true) { + /* + * Zero out bits and use rejection sampling to + * guarantee uniformity. + */ + f &= (1UL << 61) - 1; + if (f != 0 && f < modulo) + break; + + GET_RANDOM(f); + } + + /* We can work in 2**64 - 8 and reduce after the fact. */ + params->poly[i][0] = mul_mod_fast(f, f) % modulo; + params->poly[i][1] = f; + } + + /* Avoid repeated OH noise values. */ + for (size_t i = 0; i < ARRAY_SIZE(params->oh); i++) { + while (value_is_repeated(params->oh, i, params->oh[i])) + GET_RANDOM(params->oh[i]); + } + + return true; +} + +FN void +umash_params_derive(struct umash_params *params, uint64_t bits, const void *key) +{ + uint8_t umash_key[32] = "Do not use UMASH VS adversaries."; + + if (key != NULL) + memcpy(umash_key, key, sizeof(umash_key)); + + while (true) { + uint8_t nonce[8]; + + for (size_t i = 0; i < 8; i++) + nonce[i] = bits >> (8 * i); + + salsa20_stream(params, sizeof(*params), nonce, umash_key); + if (umash_params_prepare(params)) + return; + + /* + * This should practically never fail, so really + * shouldn't happen multiple times. If it does, an + * infinite loop is as good as anything else. + */ + bits++; + } +} + +/* + * Updates the polynomial state at the end of a block. + */ +static FN void +sink_update_poly(struct umash_sink *sink) +{ + uint64_t oh0, oh1; + + oh0 = sink->oh_acc.bits[0]; + oh1 = sink->oh_acc.bits[1]; + sink->poly_state[0].acc = horner_double_update(sink->poly_state[0].acc, + sink->poly_state[0].mul[0], sink->poly_state[0].mul[1], oh0, oh1); + + sink->oh_acc = (struct umash_oh) { .bits = { 0 } }; + if (sink->hash_wanted == 0) + return; + + oh0 = sink->oh_twisted.acc.bits[0]; + oh1 = sink->oh_twisted.acc.bits[1]; + sink->poly_state[1].acc = horner_double_update(sink->poly_state[1].acc, + sink->poly_state[1].mul[0], sink->poly_state[1].mul[1], oh0, oh1); + + sink->oh_twisted = + (struct umash_twisted_oh) { .lrc = { sink->oh[UMASH_OH_PARAM_COUNT], + sink->oh[UMASH_OH_PARAM_COUNT + 1] } }; + return; +} + +/* + * Updates the OH state with 16 bytes of data. If `final` is true, we + * are definitely consuming the last chunk in the input. + */ +static FN void +sink_consume_buf( + struct umash_sink *sink, const char buf[static INCREMENTAL_GRANULARITY], bool final) +{ + const size_t buf_begin = sizeof(sink->buf) - INCREMENTAL_GRANULARITY; + const size_t param = sink->oh_iter; + const uint64_t k0 = sink->oh[param]; + const uint64_t k1 = sink->oh[param + 1]; + uint64_t x, y; + + /* Use GPR loads to avoid forwarding stalls. */ + memcpy(&x, buf, sizeof(x)); + memcpy(&y, buf + sizeof(x), sizeof(y)); + + /* All but the last 16-byte chunk of each block goes through PH. */ + if (sink->oh_iter < UMASH_OH_PARAM_COUNT - 2 && !final) { + v128 acc, h, twisted_acc, prev; + uint64_t m0, m1; + + m0 = x ^ k0; + m1 = y ^ k1; + + memcpy(&acc, &sink->oh_acc, sizeof(acc)); + h = v128_clmul(m0, m1); + acc ^= h; + memcpy(&sink->oh_acc, &acc, sizeof(acc)); + + if (sink->hash_wanted == 0) + goto next; + + sink->oh_twisted.lrc[0] ^= m0; + sink->oh_twisted.lrc[1] ^= m1; + + memcpy(&twisted_acc, &sink->oh_twisted.acc, sizeof(twisted_acc)); + memcpy(&prev, sink->oh_twisted.prev, sizeof(prev)); + + twisted_acc ^= prev; + twisted_acc = v128_shift(twisted_acc); + memcpy(&sink->oh_twisted.acc, &twisted_acc, sizeof(twisted_acc)); + memcpy(&sink->oh_twisted.prev, &h, sizeof(h)); + } else { + /* The last chunk is combined with the size tag with ENH. */ + uint64_t tag = sink->seed ^ (uint8_t)(sink->block_size + sink->bufsz); + uint64_t enh_hi, enh_lo; + + mul128(x + k0, y + k1, &enh_hi, &enh_lo); + enh_hi += tag; + enh_hi ^= enh_lo; + + if (sink->hash_wanted != 0) { + union { + v128 vec; + uint64_t h[2]; + } lrc_hash; + uint64_t lrc0, lrc1; + uint64_t oh0, oh1; + uint64_t oh_twisted0, oh_twisted1; + + lrc0 = sink->oh_twisted.lrc[0] ^ x ^ k0; + lrc1 = sink->oh_twisted.lrc[1] ^ y ^ k1; + lrc_hash.vec = v128_clmul(lrc0, lrc1); + + oh_twisted0 = sink->oh_twisted.acc.bits[0]; + oh_twisted1 = sink->oh_twisted.acc.bits[1]; + + oh0 = sink->oh_acc.bits[0]; + oh1 = sink->oh_acc.bits[1]; + oh0 ^= oh_twisted0; + oh0 <<= 1; + oh1 ^= oh_twisted1; + oh1 <<= 1; + + oh0 ^= lrc_hash.h[0]; + oh1 ^= lrc_hash.h[1]; + sink->oh_twisted.acc.bits[0] = oh0 ^ enh_lo; + sink->oh_twisted.acc.bits[1] = oh1 ^ enh_hi; + } + + sink->oh_acc.bits[0] ^= enh_lo; + sink->oh_acc.bits[1] ^= enh_hi; + } + +next: + memmove(&sink->buf, buf, buf_begin); + sink->block_size += sink->bufsz; + sink->bufsz = 0; + sink->oh_iter += 2; + + if (sink->oh_iter == UMASH_OH_PARAM_COUNT || final) { + sink_update_poly(sink); + sink->block_size = 0; + sink->oh_iter = 0; + } + + return; +} + +/** + * Hashes full 256-byte blocks into a sink that just dumped its OH + * state in the toplevel polynomial hash and reset the block state. + */ +static FN size_t +block_sink_update(struct umash_sink *sink, const void *data, size_t n_bytes) +{ + size_t consumed = 0; + + assert(n_bytes >= BLOCK_SIZE); + assert(sink->bufsz == 0); + assert(sink->block_size == 0); + assert(sink->oh_iter == 0); + +#ifdef UMASH_MULTIPLE_BLOCKS_THRESHOLD + if (UNLIKELY(n_bytes > UMASH_MULTIPLE_BLOCKS_THRESHOLD)) { + /* + * We leave the last block (partial or not) for the + * caller: incremental hashing must save some state + * at the end of a block. + */ + size_t n_blocks = (n_bytes - 1) / BLOCK_SIZE; + + if (sink->hash_wanted != 0) { + const uint64_t multipliers[2][2] = { + [0][0] = sink->poly_state[0].mul[0], + [0][1] = sink->poly_state[0].mul[1], + [1][0] = sink->poly_state[1].mul[0], + [1][1] = sink->poly_state[1].mul[1], + }; + struct umash_fp poly = { + .hash[0] = sink->poly_state[0].acc, + .hash[1] = sink->poly_state[1].acc, + }; + + poly = umash_fprint_multiple_blocks( + poly, multipliers, sink->oh, sink->seed, data, n_blocks); + + sink->poly_state[0].acc = poly.hash[0]; + sink->poly_state[1].acc = poly.hash[1]; + } else { + sink->poly_state[0].acc = umash_multiple_blocks( + sink->poly_state[0].acc, sink->poly_state[0].mul, sink->oh, + sink->seed, data, n_blocks); + } + + return n_blocks * BLOCK_SIZE; + } +#endif + + while (n_bytes > BLOCK_SIZE) { + /* + * Is this worth unswitching? Not obviously, given + * the amount of work in one OH block. + */ + if (sink->hash_wanted != 0) { + struct umash_oh hashes[2]; + + oh_varblock_fprint( + hashes, sink->oh, sink->seed, data, BLOCK_SIZE); + sink->oh_acc = hashes[0]; + sink->oh_twisted.acc = hashes[1]; + } else { + sink->oh_acc = + oh_varblock(sink->oh, sink->seed, data, BLOCK_SIZE); + } + + sink_update_poly(sink); + consumed += BLOCK_SIZE; + data = (const char *)data + BLOCK_SIZE; + n_bytes -= BLOCK_SIZE; + } + + return consumed; +} + +FN void +umash_sink_update(struct umash_sink *sink, const void *data, size_t n_bytes) +{ + const size_t buf_begin = sizeof(sink->buf) - INCREMENTAL_GRANULARITY; + size_t remaining = INCREMENTAL_GRANULARITY - sink->bufsz; + + DTRACE_PROBE4(libumash, umash_sink_update, sink, remaining, data, n_bytes); + + if (n_bytes < remaining) { + memcpy(&sink->buf[buf_begin + sink->bufsz], data, n_bytes); + sink->bufsz += n_bytes; + return; + } + + memcpy(&sink->buf[buf_begin + sink->bufsz], data, remaining); + data = (const char *)data + remaining; + n_bytes -= remaining; + /* We know we're hashing at least 16 bytes. */ + sink->large_umash = true; + sink->bufsz = INCREMENTAL_GRANULARITY; + + /* + * We can't compress a 16-byte buffer until we know whether + * data is coming: the last 16-byte chunk goes to `NH` instead + * of `PH`. We could try to detect when the buffer is the + * last chunk in a block and immediately go to `NH`, but it + * seems more robust to always let the stores settle before we + * read them, just in case the combination is bad for forwarding. + */ + if (n_bytes == 0) + return; + + sink_consume_buf(sink, sink->buf + buf_begin, /*final=*/false); + + while (n_bytes > INCREMENTAL_GRANULARITY) { + size_t consumed; + + if (sink->oh_iter == 0 && n_bytes > BLOCK_SIZE) { + consumed = block_sink_update(sink, data, n_bytes); + assert(consumed >= BLOCK_SIZE); + + /* + * Save the tail of the data we just consumed + * in `sink->buf[0 ... buf_begin - 1]`: the + * final digest may need those bytes for its + * redundant read. + */ + memcpy(sink->buf, + (const char *)data + (consumed - INCREMENTAL_GRANULARITY), + buf_begin); + } else { + consumed = INCREMENTAL_GRANULARITY; + sink->bufsz = INCREMENTAL_GRANULARITY; + sink_consume_buf(sink, data, /*final=*/false); + } + + n_bytes -= consumed; + data = (const char *)data + consumed; + } + + memcpy(&sink->buf[buf_begin], data, n_bytes); + sink->bufsz = n_bytes; + return; +} + +FN uint64_t +umash_full(const struct umash_params *params, uint64_t seed, int which, const void *data, + size_t n_bytes) +{ + + DTRACE_PROBE4(libumash, umash_full, params, which, data, n_bytes); + + /* + * We don't (yet) implement code that only evaluates the + * second hash. We don't currently use that logic, and it's + * about to become a bit more complex, so let's just go for a + * full fingerprint and take what we need. + * + * umash_full is also rarely used that way: usually we want + * either the main hash, or the full fingerprint. + */ + if (UNLIKELY(which != 0)) { + struct umash_fp fp; + + fp = umash_fprint(params, seed, data, n_bytes); + return fp.hash[1]; + } + + /* + * It's not that short inputs are necessarily more likely, but + * we want to make sure they fall through correctly to + * minimise latency. + */ + if (LIKELY(n_bytes <= sizeof(v128))) { + if (LIKELY(n_bytes <= sizeof(uint64_t))) + return umash_short(params->oh, seed, data, n_bytes); + + return umash_medium(params->poly[0], params->oh, seed, data, n_bytes); + } + + return umash_long(params->poly[0], params->oh, seed, data, n_bytes); +} + +FN struct umash_fp +umash_fprint( + const struct umash_params *params, uint64_t seed, const void *data, size_t n_bytes) +{ + + DTRACE_PROBE3(libumash, umash_fprint, params, data, n_bytes); + if (LIKELY(n_bytes <= sizeof(v128))) { + if (LIKELY(n_bytes <= sizeof(uint64_t))) + return umash_fp_short(params->oh, seed, data, n_bytes); + + return umash_fp_medium(params->poly, params->oh, seed, data, n_bytes); + } + + return umash_fp_long(params->poly, params->oh, seed, data, n_bytes); +} + +FN void +umash_init(struct umash_state *state, const struct umash_params *params, uint64_t seed, + int which) +{ + + which = (which == 0) ? 0 : 1; + DTRACE_PROBE3(libumash, umash_init, state, params, which); + + state->sink = (struct umash_sink) { + .poly_state[0] = { + .mul = { + params->poly[0][0], + params->poly[0][1], + }, + }, + .poly_state[1]= { + .mul = { + params->poly[1][0], + params->poly[1][1], + }, + }, + .oh = params->oh, + .hash_wanted = which, + .oh_twisted.lrc = { params->oh[UMASH_OH_PARAM_COUNT], + params->oh[UMASH_OH_PARAM_COUNT + 1] }, + .seed = seed, + }; + + return; +} + +FN void +umash_fp_init( + struct umash_fp_state *state, const struct umash_params *params, uint64_t seed) +{ + + DTRACE_PROBE2(libumash, umash_fp_init, state, params); + + state->sink = (struct umash_sink) { + .poly_state[0] = { + .mul = { + params->poly[0][0], + params->poly[0][1], + }, + }, + .poly_state[1]= { + .mul = { + params->poly[1][0], + params->poly[1][1], + }, + }, + .oh = params->oh, + .hash_wanted = 2, + .oh_twisted.lrc = { params->oh[UMASH_OH_PARAM_COUNT], + params->oh[UMASH_OH_PARAM_COUNT + 1] }, + .seed = seed, + }; + + return; +} + +/** + * Pumps any last block out of the incremental state. + */ +static FN void +digest_flush(struct umash_sink *sink) +{ + + if (sink->bufsz > 0) + sink_consume_buf(sink, &sink->buf[sink->bufsz], /*final=*/true); + return; +} + +/** + * Finalizes a digest out of `sink`'s current state. + * + * The `sink` must be `digest_flush`ed if it is a `large_umash`. + * + * @param index 0 to return the first (only, if hashing) value, 1 for the + * second independent value for fingerprinting. + */ +static FN uint64_t +digest(const struct umash_sink *sink, int index) +{ + const size_t buf_begin = sizeof(sink->buf) - INCREMENTAL_GRANULARITY; + const size_t shift = (index == 0) ? 0 : OH_SHORT_HASH_SHIFT; + + if (sink->large_umash) + return finalize(sink->poly_state[index].acc); + + if (sink->bufsz <= sizeof(uint64_t)) + return umash_short( + &sink->oh[shift], sink->seed, &sink->buf[buf_begin], sink->bufsz); + + return umash_medium(sink->poly_state[index].mul, sink->oh, sink->seed, + &sink->buf[buf_begin], sink->bufsz); +} + +static FN struct umash_fp +fp_digest_sink(const struct umash_sink *sink) +{ + struct umash_sink copy; + struct umash_fp ret; + const size_t buf_begin = sizeof(sink->buf) - INCREMENTAL_GRANULARITY; + + if (sink->large_umash) { + copy = *sink; + digest_flush(©); + sink = © + } else if (sink->bufsz <= sizeof(uint64_t)) { + return umash_fp_short( + sink->oh, sink->seed, &sink->buf[buf_begin], sink->bufsz); + } else { + const struct umash_params *params; + + /* + * Back out the params struct from our pointer to its + * `oh` member. + */ + params = (const void *)((const char *)sink->oh - + __builtin_offsetof(struct umash_params, oh)); + return umash_fp_medium(params->poly, sink->oh, sink->seed, + &sink->buf[buf_begin], sink->bufsz); + } + + for (size_t i = 0; i < ARRAY_SIZE(ret.hash); i++) + ret.hash[i] = digest(sink, i); + + return ret; +} + +FN uint64_t +umash_digest(const struct umash_state *state) +{ + struct umash_sink copy; + const struct umash_sink *sink = &state->sink; + + DTRACE_PROBE1(libumash, umash_digest, state); + + if (sink->hash_wanted == 1) { + struct umash_fp fp; + + fp = fp_digest_sink(sink); + return fp.hash[1]; + } + + if (sink->large_umash) { + copy = *sink; + digest_flush(©); + sink = © + } + + return digest(sink, 0); +} + +FN struct umash_fp +umash_fp_digest(const struct umash_fp_state *state) +{ + + DTRACE_PROBE1(libumash, umash_fp_digest, state); + return fp_digest_sink(&state->sink); +} diff --git a/tsl/src/import/umash.h b/tsl/src/import/umash.h new file mode 100644 index 00000000000..9f055c2ad00 --- /dev/null +++ b/tsl/src/import/umash.h @@ -0,0 +1,329 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * This file contains source code that was copied and/or modified from + * the UMASH hash implementation at https://github.com/backtrace-labs/umash. + * + * This is a copy of umash.h, git commit sha + * fc4c5b6ca1f06c308e96c43aa080bd766238e092. + */ + +/* + * UMASH is distributed under the MIT license. + * + * SPDX-License-Identifier: MIT + * + * Copyright 2020-2022 Backtrace I/O, Inc. + * Copyright 2022 Paul Khuong + * Copyright 2022 Dougall Johnson + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef UMASH_H +#define UMASH_H +#include +#include +#include + +/** + * # UMASH: a non-cryptographic hash function with collision bounds + * + * SPDX-License-Identifier: MIT + * Copyright 2020-2022 Backtrace I/O, Inc. + * Copyright 2022 Paul Khuong + * + * UMASH is a fast (9-22 ns latency for inputs of 1-64 bytes and 22 + * GB/s peak throughput, on a 2.5 GHz Intel 8175M) 64-bit hash + * function with mathematically proven collision bounds: it is + * [ceil(s / 4096) * 2^{-55}]-almost-universal for inputs of s or + * fewer bytes. + * + * When that's not enough, UMASH can also generate a pair of 64-bit + * hashes in a single traversal. The resulting fingerprint reduces + * the collision probability to less than [ceil(s / 2^{26})^2 * 2^{-83}]; + * the probability that two distinct inputs receive the same + * fingerprint is less 2^{-83} for inputs up to 64 MB, and less than + * 2^{-70} as long as the inputs are shorter than 5 GB each. This + * expectation is taken over the randomly generated `umash_params`. + * If an attacker can infer the contents of these parameters, the + * bounds do not apply. + * + * ## Initialisation + * + * In order to use `UMASH`, one must first generate a `struct + * umash_params`; each such param defines a distinct `UMASH` function + * (a pair of such functions, in fact). Ideally, one would fill + * a struct with random bytes and call`umash_params_prepare`. + * + * - `umash_params_prepare`: attempts to convert the contents of + * randomly filled `struct umash_params` into a valid UMASH + * parameter struct (key). When the input consists of uniformly + * generated random bytes, the probability of failure is + * astronomically small. + * + * - `umash_params_derive`: deterministically constructs a `struct + * umash_params` from a 64-bit seed and an optional 32-byte secret. + * The seed and secret are expanded into random bytes with Salsa20; + * the resulting `umash_params` should be practically random, as + * long the seed or secret are unknown. + * + * ## Batch hashing and fingerprinting + * + * Once we have a `struct umash_params`, we can use `umash_full` or + * `umash_fprint` like regular hash functions. + * + * - `umash_full` can compute either of the two UMASH functions + * described by a `struct umash_params`. Its `seed` argument will + * change the output, but is not associated with any collision + * bound. + * + * - `umash_fprint` computes both `UMASH` functions described by a + * `struct umash_params`. `umash_fp::hash[0]` corresponds to + * calling `umash_full` with the same arguments and `which = 0`; + * `umash_fp::hash[1]` corresponds to `which = 1`. + * + * ## Incremental hashing and fingerprinting + * + * We can also compute UMASH values by feeding bytes incrementally. + * The result is guaranteed to the same as if we had buffered all the + * bytes and called `umash_full` or `umash_fprint`. + * + * - `umash_init` initialises a `struct umash_state` with the same + * parameters one would pass to `umash_full`. + * + * - `umash_digest` computes the value `umash_full` would return + * were it passed the arguments that were given to `umash_init`, + * and the bytes "fed" into the `umash_state`. + * + * - `umash_fp_init` initialises a `struct umash_fp_state` with the + * same parameters one would pass to `umash_fprint`. + * + * - `umash_fp_digest` computes the value `umash_fprint` would return + * for the bytes "fed" into the `umash_fp_state`. + * + * In both cases, one passes a pointer to `struct umash_state::sink` + * or `struct umash_fp_state::sink` to callees that wish to feed bytes + * into the `umash_state` or `umash_fp_state`. + * + * - `umash_sink_update` feeds a byte range to the `umash_sink` + * initialised by calling `umash_init` or `umash_fp_init`. The sink + * does not take ownership of anything and the input bytes may be + * overwritten or freed as soon as `umash_sink_update` returns. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { UMASH_OH_PARAM_COUNT = 32, UMASH_OH_TWISTING_COUNT = 2 }; + +/** + * A single UMASH params struct stores the parameters for a pair of + * independent `UMASH` functions. + */ +struct umash_params { + /* + * Each uint64_t[2] array consists of {f^2, f}, where f is a + * random multiplier in mod 2**61 - 1. + */ + uint64_t poly[2][2]; + /* + * The second (twisted) OH function uses an additional + * 128-bit constant stored in the last two elements. + */ + uint64_t oh[UMASH_OH_PARAM_COUNT + UMASH_OH_TWISTING_COUNT]; +}; + +/** + * A fingerprint consists of two independent `UMASH` hash values. + */ +struct umash_fp { + uint64_t hash[2]; +}; + +/** + * This struct holds the state for incremental UMASH hashing or + * fingerprinting. + * + * A sink owns no allocation, and simply borrows a pointer to its + * `umash_params`. It can be byte-copied to snapshot its state. + * + * The layout works best with alignment to 64 bytes, but does not + * require it. + */ +struct umash_sink { + /* + * We incrementally maintain two states when fingerprinting. + * When hashing, only the first `poly_state` and `oh_acc` + * entries are active. + */ + struct { + uint64_t mul[2]; /* Multiplier, and multiplier^2. */ + uint64_t acc; /* Current Horner accumulator. */ + } poly_state[2]; + + /* + * We write new bytes to the second half, and keep the previous + * 16 byte chunk in the first half. + * + * We may temporarily have a full 16-byte buffer in the second half: + * we must know if the first 16 byte chunk is the first of many, or + * the whole input. + */ + char buf[2 * 16]; + + /* The next 64 bytes are accessed in the `OH` inner loop. */ + + /* key->oh. */ + const uint64_t *oh; + + /* oh_iter tracks where we are in the inner loop, times 2. */ + uint32_t oh_iter; + uint8_t bufsz; /* Write pointer in `buf + 16`. */ + uint8_t block_size; /* Current OH block size, excluding `bufsz`. */ + bool large_umash; /* True once we definitely have >= 16 bytes. */ + /* + * 0 if we're computing the first umash, 1 for the second, and + * 2 for a fingerprint. + * + * In practice, we treat 1 and 2 the same (always compute a + * full fingerprint), and return only the second half if we + * only want that half. + */ + uint8_t hash_wanted; + + /* Accumulators for the current OH value. */ + struct umash_oh { + uint64_t bits[2]; + } oh_acc; + struct umash_twisted_oh { + uint64_t lrc[2]; + uint64_t prev[2]; + struct umash_oh acc; + } oh_twisted; + + uint64_t seed; +}; + +/** + * The `umash_state` struct wraps a sink in a type-safe interface: we + * don't want to try and extract a fingerprint from a sink configured + * for hashing. + */ +struct umash_state { + struct umash_sink sink; +}; + +/** + * Similarly, the `umash_fp_state` struct wraps a sink from which we + * should extract a fingerprint. + */ +struct umash_fp_state { + struct umash_sink sink; +}; + +/** + * Converts a `umash_params` struct filled with random values into + * something usable by the UMASH functions below. + * + * When it succeeds, this function is idempotent. Failure happens + * with probability < 2**-110 is `params` is filled with uniformly + * distributed random bits. That's an astronomically unlikely event, + * and most likely signals an issue with the caller's (pseudo-)random + * number generator. + * + * @return false on failure, probably because the input was not random. + */ +bool umash_params_prepare(struct umash_params *params); + +/** + * Deterministically derives a `umash_params` struct from `bits` and + * `key`. The `bits` values do not have to be particularly well + * distributed, and can be generated sequentially. + * + * @param key a pointer to exactly 32 secret bytes. NULL will be + * replaced with "Do not use UMASH VS adversaries.", the default + * UMASH secret. + */ +void umash_params_derive(struct umash_params *, uint64_t bits, const void *key); + +/** + * Updates a `umash_sink` to take into account `data[0 ... n_bytes)`. + */ +void umash_sink_update(struct umash_sink *, const void *data, size_t n_bytes); + +/** + * Computes the UMASH hash of `data[0 ... n_bytes)`. + * + * Randomly generated `param` lead to independent UMASH values and + * associated worst-case collision bounds; changing the `seed` comes + * with no guarantee. + * + * @param which 0 to compute the first UMASH defined by `params`, 1 + * for the second. + */ +uint64_t umash_full(const struct umash_params *params, uint64_t seed, int which, + const void *data, size_t n_bytes); + +/** + * Computes the UMASH fingerprint of `data[0 ... n_bytes)`. + * + * Randomly generated `param` lead to independent UMASH values and + * associated worst-case collision bounds; changing the `seed` comes + * with no guarantee. + */ +struct umash_fp umash_fprint( + const struct umash_params *params, uint64_t seed, const void *data, size_t n_bytes); + +/** + * Prepares a `umash_state` for computing the `which`th UMASH function in + * `params`. + */ +void umash_init( + struct umash_state *, const struct umash_params *params, uint64_t seed, int which); + +/** + * Returns the UMASH value for the bytes that have been + * `umash_sink_update`d into the state. + */ +uint64_t umash_digest(const struct umash_state *); + +/** + * Prepares a `umash_fp_state` for computing the UMASH fingerprint in + * `params`. + */ +void umash_fp_init( + struct umash_fp_state *, const struct umash_params *params, uint64_t seed); + +/** + * Returns the UMASH fingerprint for the bytes that have been + * `umash_sink_update`d into the state. + */ +struct umash_fp umash_fp_digest(const struct umash_fp_state *); + +#ifdef __cplusplus +} +#endif +#endif /* !UMASH_H */ From af505d4ec6d662fc7bfa8c64a84dc4a3d372a369 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:49:49 +0100 Subject: [PATCH 02/18] more fixes --- tsl/CMakeLists.txt | 9 +++++---- tsl/src/import/umash.h | 12 ++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 9734e5df10b..ffcde553703 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -4,10 +4,11 @@ if(COMPRESSION_FUZZING) add_compile_definitions(TS_COMPRESSION_FUZZING=1) endif() -# We use the UMASH library for hashing in vectorized grouping. Detect if we can -# compile it on this platform. It is not tested on Windows and leads to a weird -# CI freeze, so don't even try for now. -if(NOT WIN32) +# We use the UMASH library for hashing in vectorized grouping. If it was not +# explicitly disabled already, detect if we can compile it on this platform. It +# is not tested on Windows and leads to a weird CI freeze, so don't even try for +# now. +if((NOT WIN32) AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) # Check whether we can enable the pclmul instruction required for the UMASH # hashing on amd64. Shouldn't be done if the user has manually specified the # target architecture, no idea how to detect this, but at least we shouldn't diff --git a/tsl/src/import/umash.h b/tsl/src/import/umash.h index 9f055c2ad00..3ad9460b46a 100644 --- a/tsl/src/import/umash.h +++ b/tsl/src/import/umash.h @@ -47,6 +47,10 @@ #include #include +#ifndef TS_USE_UMASH +#error "UMASH usage is disabled, but the header is included" +#endif + /** * # UMASH: a non-cryptographic hash function with collision bounds * @@ -285,7 +289,7 @@ void umash_sink_update(struct umash_sink *, const void *data, size_t n_bytes); * for the second. */ uint64_t umash_full(const struct umash_params *params, uint64_t seed, int which, - const void *data, size_t n_bytes); + const void *data, size_t n_bytes); /** * Computes the UMASH fingerprint of `data[0 ... n_bytes)`. @@ -295,14 +299,14 @@ uint64_t umash_full(const struct umash_params *params, uint64_t seed, int which, * with no guarantee. */ struct umash_fp umash_fprint( - const struct umash_params *params, uint64_t seed, const void *data, size_t n_bytes); + const struct umash_params *params, uint64_t seed, const void *data, size_t n_bytes); /** * Prepares a `umash_state` for computing the `which`th UMASH function in * `params`. */ void umash_init( - struct umash_state *, const struct umash_params *params, uint64_t seed, int which); + struct umash_state *, const struct umash_params *params, uint64_t seed, int which); /** * Returns the UMASH value for the bytes that have been @@ -315,7 +319,7 @@ uint64_t umash_digest(const struct umash_state *); * `params`. */ void umash_fp_init( - struct umash_fp_state *, const struct umash_params *params, uint64_t seed); + struct umash_fp_state *, const struct umash_params *params, uint64_t seed); /** * Returns the UMASH fingerprint for the bytes that have been From d325a30ff76291b1c3284e6fce9ff2fbd2c753ca Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:38:54 +0100 Subject: [PATCH 03/18] the hell is going on there --- .github/workflows/windows-build-and-test.yaml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml index 85b123c3f09..7c2bf17edc4 100644 --- a/.github/workflows/windows-build-and-test.yaml +++ b/.github/workflows/windows-build-and-test.yaml @@ -189,7 +189,7 @@ jobs: run: | export TEST_TABLESPACE1_PREFIX='${{ env.TABLESPACE1 }}' export TEST_TABLESPACE2_PREFIX='${{ env.TABLESPACE2 }}' - cmake -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} + cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} make -C build_wsl isolationchecklocal | tee -a installcheck.log make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log @@ -245,6 +245,23 @@ jobs: name: PostgreSQL ${{ matrix.pg }} log ${{ matrix.os }} ${{ matrix.build_type }} Build path: ${{ env.PGDATA }}\log\postmaster.log + - name: Upload CMake Logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: CMake Logs ${{ matrix.pg }} ${{ matrix.os }} ${{ matrix.build_type }} + path: | + build_win/CMakeCache.txt + build_win/CMakeFiles/CMakeConfigureLog.yaml + build_win/CMakeFiles/CMakeError.log + build_win/CMakeFiles/CMakeOutput.log + build_win/compile_commands.json + build_wsl/CMakeCache.txt + build_wsl/CMakeFiles/CMakeConfigureLog.yaml + build_wsl/CMakeFiles/CMakeError.log + build_wsl/CMakeFiles/CMakeOutput.log + build_wsl/compile_commands.json + - name: Upload test results to the database if: always() shell: wsl-bash {0} From 8f9cdb2cd452c81e9441cafbd813f06b30e3ad0d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:43:10 +0100 Subject: [PATCH 04/18] reinstall? --- .github/workflows/windows-build-and-test.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml index 7c2bf17edc4..42402f3cdb5 100644 --- a/.github/workflows/windows-build-and-test.yaml +++ b/.github/workflows/windows-build-and-test.yaml @@ -184,6 +184,14 @@ jobs: yes | /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh || true apt-get install -y --force-yes postgresql-server-dev-${{ matrix.pg }} + - name: FIXME reinstall cmake + shell: wsl-bash {0} + run: | + cmake -version + apt-get purge cmake + apt-get install cmake + cmake -version + - name: Run tests shell: wsl-bash {0} run: | From 15085f0014aa086c0369ab08bce124d6745463d9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 23 Jan 2025 17:24:27 +0100 Subject: [PATCH 05/18] yes --- .github/workflows/windows-build-and-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml index 42402f3cdb5..9c7e5e1197d 100644 --- a/.github/workflows/windows-build-and-test.yaml +++ b/.github/workflows/windows-build-and-test.yaml @@ -188,8 +188,8 @@ jobs: shell: wsl-bash {0} run: | cmake -version - apt-get purge cmake - apt-get install cmake + apt-get purge -y cmake + apt-get install -y cmake cmake -version - name: Run tests From 6d5f19ba57f1e6b5f1fbf476c2d3b7f811dab939 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 23 Jan 2025 17:26:32 +0100 Subject: [PATCH 06/18] timeout --- .github/workflows/windows-build-and-test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml index 9c7e5e1197d..7d78a054650 100644 --- a/.github/workflows/windows-build-and-test.yaml +++ b/.github/workflows/windows-build-and-test.yaml @@ -197,7 +197,8 @@ jobs: run: | export TEST_TABLESPACE1_PREFIX='${{ env.TABLESPACE1 }}' export TEST_TABLESPACE2_PREFIX='${{ env.TABLESPACE2 }}' - cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} + timeout 60 cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} + echo $? make -C build_wsl isolationchecklocal | tee -a installcheck.log make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log From 810221e8a9a3067988eb7749d9896c47d6744529 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 23 Jan 2025 17:47:09 +0100 Subject: [PATCH 07/18] why doesn't it work --- .github/workflows/windows-build-and-test.yaml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml index 7d78a054650..b7bc33a1b1d 100644 --- a/.github/workflows/windows-build-and-test.yaml +++ b/.github/workflows/windows-build-and-test.yaml @@ -197,11 +197,21 @@ jobs: run: | export TEST_TABLESPACE1_PREFIX='${{ env.TABLESPACE1 }}' export TEST_TABLESPACE2_PREFIX='${{ env.TABLESPACE2 }}' - timeout 60 cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} + sleep 300 && killall -9 cmake & + timeout 60 cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} 2>&1 > cmake.log echo $? - make -C build_wsl isolationchecklocal | tee -a installcheck.log - make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log +# make -C build_wsl isolationchecklocal | tee -a installcheck.log +# make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log + exit 1 + + - name: Upload CMake Logs (FIXME) + if: always() + uses: actions/upload-artifact@v4 + with: + name: CMake (FIXME) Logs ${{ matrix.pg }} ${{ matrix.os }} ${{ matrix.build_type }} + path: | + cmake.log - name: Setup postgres cluster for TSL tests run: | From 7c1b0d15ce5c317c780f6e7a67e53291be436d80 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 11:25:13 +0100 Subject: [PATCH 08/18] yaml --- .github/workflows/windows-build-and-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml index b7bc33a1b1d..6759638a950 100644 --- a/.github/workflows/windows-build-and-test.yaml +++ b/.github/workflows/windows-build-and-test.yaml @@ -201,8 +201,8 @@ jobs: timeout 60 cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} 2>&1 > cmake.log echo $? -# make -C build_wsl isolationchecklocal | tee -a installcheck.log -# make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log + #make -C build_wsl isolationchecklocal | tee -a installcheck.log + #make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log exit 1 - name: Upload CMake Logs (FIXME) From e669acc0006af933575559a0b8fbcb6c9cd73164 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 11:37:58 +0100 Subject: [PATCH 09/18] fix redirect --- .github/workflows/windows-build-and-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml index 6759638a950..1a702cefb44 100644 --- a/.github/workflows/windows-build-and-test.yaml +++ b/.github/workflows/windows-build-and-test.yaml @@ -197,8 +197,8 @@ jobs: run: | export TEST_TABLESPACE1_PREFIX='${{ env.TABLESPACE1 }}' export TEST_TABLESPACE2_PREFIX='${{ env.TABLESPACE2 }}' - sleep 300 && killall -9 cmake & - timeout 60 cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} 2>&1 > cmake.log + sleep 60 && killall -9 cmake & + timeout 60 cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} > cmake.log 2>&1 echo $? #make -C build_wsl isolationchecklocal | tee -a installcheck.log From 694867bf6861dda918ae69755a3bb6d2419eff76 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 12:08:17 +0100 Subject: [PATCH 10/18] disable on wsl --- tsl/CMakeLists.txt | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index ffcde553703..b3beae85098 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -5,10 +5,18 @@ if(COMPRESSION_FUZZING) endif() # We use the UMASH library for hashing in vectorized grouping. If it was not -# explicitly disabled already, detect if we can compile it on this platform. It -# is not tested on Windows and leads to a weird CI freeze, so don't even try for -# now. -if((NOT WIN32) AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) +# explicitly disabled already, detect if we can compile it on this platform. +# +# It is not tested on Windows and leads to a weird CI freeze, so don't even try +# for now. +# +# In WSL, it somehow freezes the CMake process and the entire GitHub action, +# with no possibility to get the logs. I was unable to debug it after a couple +# of days, so on WSL it's disabled as well. We don't really build for this +# configuration anyway, and just use it to generate and run the tests. +if((NOT WIN32) + AND (NOT (CMAKE_HOST_SYSTEM_VERSION MATCHES "microsoft-standard-WSL2$")) + AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) # Check whether we can enable the pclmul instruction required for the UMASH # hashing on amd64. Shouldn't be done if the user has manually specified the # target architecture, no idea how to detect this, but at least we shouldn't From bc5df6ad4c9d8c9aa7b37b9a12370805538b944e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 12:33:18 +0100 Subject: [PATCH 11/18] try to disable the import part --- tsl/src/import/CMakeLists.txt | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tsl/src/import/CMakeLists.txt b/tsl/src/import/CMakeLists.txt index b938decf792..1810ff1f467 100644 --- a/tsl/src/import/CMakeLists.txt +++ b/tsl/src/import/CMakeLists.txt @@ -1,15 +1,15 @@ -set(SOURCES) +#set(SOURCES) -if(USE_UMASH) - list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) -endif() +#if(USE_UMASH) +# list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) +#endif() -if(SOURCES) - # Disable clang-tidy for imported code - add_library(target_no_static_code_analysis OBJECT ${SOURCES}) - set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY - "") +#if(SOURCES) +# # Disable clang-tidy for imported code +# add_library(target_no_static_code_analysis OBJECT ${SOURCES}) +# set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY +# "") - target_link_libraries(${TSL_LIBRARY_NAME} - $) -endif() +# target_link_libraries(${TSL_LIBRARY_NAME} +# $) +#endif() From f22dda28d1e2bf4e3dcc7fc04cc02bc4b175ea93 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 12:34:14 +0100 Subject: [PATCH 12/18] try to disable the pclmul part --- tsl/CMakeLists.txt | 26 +++++++++++++------------- tsl/src/import/CMakeLists.txt | 24 ++++++++++++------------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index b3beae85098..86cfe06ff05 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -17,19 +17,19 @@ endif() if((NOT WIN32) AND (NOT (CMAKE_HOST_SYSTEM_VERSION MATCHES "microsoft-standard-WSL2$")) AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) - # Check whether we can enable the pclmul instruction required for the UMASH - # hashing on amd64. Shouldn't be done if the user has manually specified the - # target architecture, no idea how to detect this, but at least we shouldn't - # do this when cross-compiling. - if(NOT CMAKE_CROSSCOMPILING) - check_c_compiler_flag(-mpclmul CC_PCLMUL) - if(CC_PCLMUL) - add_compile_options(-mpclmul) - # The "C source compiles" check below doesn't use the global compilation - # flags, so we have to modify its flags separately. - set(CMAKE_REQUIRED_FLAGS -mpclmul) - endif() - endif() +# # Check whether we can enable the pclmul instruction required for the UMASH +# # hashing on amd64. Shouldn't be done if the user has manually specified the +# # target architecture, no idea how to detect this, but at least we shouldn't +# # do this when cross-compiling. +# if(NOT CMAKE_CROSSCOMPILING) +# check_c_compiler_flag(-mpclmul CC_PCLMUL) +# if(CC_PCLMUL) +# add_compile_options(-mpclmul) +# # The "C source compiles" check below doesn't use the global compilation +# # flags, so we have to modify its flags separately. +# set(CMAKE_REQUIRED_FLAGS -mpclmul) +# endif() +# endif() set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=implicit-function-declaration") diff --git a/tsl/src/import/CMakeLists.txt b/tsl/src/import/CMakeLists.txt index 1810ff1f467..b938decf792 100644 --- a/tsl/src/import/CMakeLists.txt +++ b/tsl/src/import/CMakeLists.txt @@ -1,15 +1,15 @@ -#set(SOURCES) +set(SOURCES) -#if(USE_UMASH) -# list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) -#endif() +if(USE_UMASH) + list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) +endif() -#if(SOURCES) -# # Disable clang-tidy for imported code -# add_library(target_no_static_code_analysis OBJECT ${SOURCES}) -# set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY -# "") +if(SOURCES) + # Disable clang-tidy for imported code + add_library(target_no_static_code_analysis OBJECT ${SOURCES}) + set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY + "") -# target_link_libraries(${TSL_LIBRARY_NAME} -# $) -#endif() + target_link_libraries(${TSL_LIBRARY_NAME} + $) +endif() From f2b402445f73c6e131332542dca4f15b2c94386d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 12:34:53 +0100 Subject: [PATCH 13/18] try to disable the check c source part --- tsl/CMakeLists.txt | 68 +++++++++++++++++------------------ tsl/src/import/CMakeLists.txt | 24 ++++++------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 86cfe06ff05..4958c923783 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -17,41 +17,41 @@ endif() if((NOT WIN32) AND (NOT (CMAKE_HOST_SYSTEM_VERSION MATCHES "microsoft-standard-WSL2$")) AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) -# # Check whether we can enable the pclmul instruction required for the UMASH -# # hashing on amd64. Shouldn't be done if the user has manually specified the -# # target architecture, no idea how to detect this, but at least we shouldn't -# # do this when cross-compiling. -# if(NOT CMAKE_CROSSCOMPILING) -# check_c_compiler_flag(-mpclmul CC_PCLMUL) -# if(CC_PCLMUL) -# add_compile_options(-mpclmul) -# # The "C source compiles" check below doesn't use the global compilation -# # flags, so we have to modify its flags separately. -# set(CMAKE_REQUIRED_FLAGS -mpclmul) -# endif() -# endif() + # Check whether we can enable the pclmul instruction required for the UMASH + # hashing on amd64. Shouldn't be done if the user has manually specified the + # target architecture, no idea how to detect this, but at least we shouldn't + # do this when cross-compiling. + if(NOT CMAKE_CROSSCOMPILING) + check_c_compiler_flag(-mpclmul CC_PCLMUL) + if(CC_PCLMUL) + add_compile_options(-mpclmul) + # The "C source compiles" check below doesn't use the global compilation + # flags, so we have to modify its flags separately. + set(CMAKE_REQUIRED_FLAGS -mpclmul) + endif() + endif() - set(CMAKE_REQUIRED_FLAGS - "${CMAKE_REQUIRED_FLAGS} -Werror=implicit-function-declaration") - set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) - check_c_source_compiles( - " -#if defined(__PCLMUL__) -#include -#include -/* - * For some reason, this doesn't compile on our i386 CI, but I also can't detect - * it using the standard condition of defined(__x86_64__) && !defined(__ILP32__), - * as described at https://wiki.debian.org/X32Port . - */ -static void test() { (void) _mm_cvtsi64_si128((uint64_t) 0); } -#elif defined(__ARM_FEATURE_CRYPTO) -/* OK */ -#else -#error Unsupported platform for UMASH -#endif -" - UMASH_SUPPORTED) +# set(CMAKE_REQUIRED_FLAGS +# "${CMAKE_REQUIRED_FLAGS} -Werror=implicit-function-declaration") +# set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +# check_c_source_compiles( +# " +##if defined(__PCLMUL__) +##include +##include +#/* +# * For some reason, this doesn't compile on our i386 CI, but I also can't detect +# * it using the standard condition of defined(__x86_64__) && !defined(__ILP32__), +# * as described at https://wiki.debian.org/X32Port . +# */ +#static void test() { (void) _mm_cvtsi64_si128((uint64_t) 0); } +##elif defined(__ARM_FEATURE_CRYPTO) +#/* OK */ +##else +##error Unsupported platform for UMASH +##endif +#" +# UMASH_SUPPORTED) unset(CMAKE_REQUIRED_FLAGS) unset(CMAKE_TRY_COMPILE_TARGET_TYPE) else() diff --git a/tsl/src/import/CMakeLists.txt b/tsl/src/import/CMakeLists.txt index b938decf792..1810ff1f467 100644 --- a/tsl/src/import/CMakeLists.txt +++ b/tsl/src/import/CMakeLists.txt @@ -1,15 +1,15 @@ -set(SOURCES) +#set(SOURCES) -if(USE_UMASH) - list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) -endif() +#if(USE_UMASH) +# list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) +#endif() -if(SOURCES) - # Disable clang-tidy for imported code - add_library(target_no_static_code_analysis OBJECT ${SOURCES}) - set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY - "") +#if(SOURCES) +# # Disable clang-tidy for imported code +# add_library(target_no_static_code_analysis OBJECT ${SOURCES}) +# set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY +# "") - target_link_libraries(${TSL_LIBRARY_NAME} - $) -endif() +# target_link_libraries(${TSL_LIBRARY_NAME} +# $) +#endif() From 24df521a680459430db47675fd2594ae9a09564d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 12:36:47 +0100 Subject: [PATCH 14/18] try to disable all detection --- tsl/CMakeLists.txt | 62 +++++++++++++++++------------------ tsl/src/import/CMakeLists.txt | 24 +++++++------- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 4958c923783..770ceeab25c 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -4,32 +4,32 @@ if(COMPRESSION_FUZZING) add_compile_definitions(TS_COMPRESSION_FUZZING=1) endif() -# We use the UMASH library for hashing in vectorized grouping. If it was not -# explicitly disabled already, detect if we can compile it on this platform. -# -# It is not tested on Windows and leads to a weird CI freeze, so don't even try -# for now. -# -# In WSL, it somehow freezes the CMake process and the entire GitHub action, -# with no possibility to get the logs. I was unable to debug it after a couple -# of days, so on WSL it's disabled as well. We don't really build for this -# configuration anyway, and just use it to generate and run the tests. -if((NOT WIN32) - AND (NOT (CMAKE_HOST_SYSTEM_VERSION MATCHES "microsoft-standard-WSL2$")) - AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) - # Check whether we can enable the pclmul instruction required for the UMASH - # hashing on amd64. Shouldn't be done if the user has manually specified the - # target architecture, no idea how to detect this, but at least we shouldn't - # do this when cross-compiling. - if(NOT CMAKE_CROSSCOMPILING) - check_c_compiler_flag(-mpclmul CC_PCLMUL) - if(CC_PCLMUL) - add_compile_options(-mpclmul) - # The "C source compiles" check below doesn't use the global compilation - # flags, so we have to modify its flags separately. - set(CMAKE_REQUIRED_FLAGS -mpclmul) - endif() - endif() +## We use the UMASH library for hashing in vectorized grouping. If it was not +## explicitly disabled already, detect if we can compile it on this platform. +## +## It is not tested on Windows and leads to a weird CI freeze, so don't even try +## for now. +## +## In WSL, it somehow freezes the CMake process and the entire GitHub action, +## with no possibility to get the logs. I was unable to debug it after a couple +## of days, so on WSL it's disabled as well. We don't really build for this +## configuration anyway, and just use it to generate and run the tests. +#if((NOT WIN32) +# AND (NOT (CMAKE_HOST_SYSTEM_VERSION MATCHES "microsoft-standard-WSL2$")) +# AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) +# # Check whether we can enable the pclmul instruction required for the UMASH +# # hashing on amd64. Shouldn't be done if the user has manually specified the +# # target architecture, no idea how to detect this, but at least we shouldn't +# # do this when cross-compiling. +# if(NOT CMAKE_CROSSCOMPILING) +# check_c_compiler_flag(-mpclmul CC_PCLMUL) +# if(CC_PCLMUL) +# add_compile_options(-mpclmul) +# # The "C source compiles" check below doesn't use the global compilation +# # flags, so we have to modify its flags separately. +# set(CMAKE_REQUIRED_FLAGS -mpclmul) +# endif() +# endif() # set(CMAKE_REQUIRED_FLAGS # "${CMAKE_REQUIRED_FLAGS} -Werror=implicit-function-declaration") @@ -52,11 +52,11 @@ if((NOT WIN32) ##endif #" # UMASH_SUPPORTED) - unset(CMAKE_REQUIRED_FLAGS) - unset(CMAKE_TRY_COMPILE_TARGET_TYPE) -else() - set(UMASH_SUPPORTED OFF) -endif() +# unset(CMAKE_REQUIRED_FLAGS) +# unset(CMAKE_TRY_COMPILE_TARGET_TYPE) +#else() +# set(UMASH_SUPPORTED OFF) +#endif() option(USE_UMASH "Use the UMASH hash for string and multi-column vectorized grouping" diff --git a/tsl/src/import/CMakeLists.txt b/tsl/src/import/CMakeLists.txt index 1810ff1f467..b938decf792 100644 --- a/tsl/src/import/CMakeLists.txt +++ b/tsl/src/import/CMakeLists.txt @@ -1,15 +1,15 @@ -#set(SOURCES) +set(SOURCES) -#if(USE_UMASH) -# list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) -#endif() +if(USE_UMASH) + list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/umash.c) +endif() -#if(SOURCES) -# # Disable clang-tidy for imported code -# add_library(target_no_static_code_analysis OBJECT ${SOURCES}) -# set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY -# "") +if(SOURCES) + # Disable clang-tidy for imported code + add_library(target_no_static_code_analysis OBJECT ${SOURCES}) + set_target_properties(target_no_static_code_analysis PROPERTIES C_CLANG_TIDY + "") -# target_link_libraries(${TSL_LIBRARY_NAME} -# $) -#endif() + target_link_libraries(${TSL_LIBRARY_NAME} + $) +endif() From 034a74325a2c5968ba80d7699a92f669c0819545 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:02:22 +0100 Subject: [PATCH 15/18] no c compilation check but with import --- tsl/CMakeLists.txt | 62 +++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 770ceeab25c..4958c923783 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -4,32 +4,32 @@ if(COMPRESSION_FUZZING) add_compile_definitions(TS_COMPRESSION_FUZZING=1) endif() -## We use the UMASH library for hashing in vectorized grouping. If it was not -## explicitly disabled already, detect if we can compile it on this platform. -## -## It is not tested on Windows and leads to a weird CI freeze, so don't even try -## for now. -## -## In WSL, it somehow freezes the CMake process and the entire GitHub action, -## with no possibility to get the logs. I was unable to debug it after a couple -## of days, so on WSL it's disabled as well. We don't really build for this -## configuration anyway, and just use it to generate and run the tests. -#if((NOT WIN32) -# AND (NOT (CMAKE_HOST_SYSTEM_VERSION MATCHES "microsoft-standard-WSL2$")) -# AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) -# # Check whether we can enable the pclmul instruction required for the UMASH -# # hashing on amd64. Shouldn't be done if the user has manually specified the -# # target architecture, no idea how to detect this, but at least we shouldn't -# # do this when cross-compiling. -# if(NOT CMAKE_CROSSCOMPILING) -# check_c_compiler_flag(-mpclmul CC_PCLMUL) -# if(CC_PCLMUL) -# add_compile_options(-mpclmul) -# # The "C source compiles" check below doesn't use the global compilation -# # flags, so we have to modify its flags separately. -# set(CMAKE_REQUIRED_FLAGS -mpclmul) -# endif() -# endif() +# We use the UMASH library for hashing in vectorized grouping. If it was not +# explicitly disabled already, detect if we can compile it on this platform. +# +# It is not tested on Windows and leads to a weird CI freeze, so don't even try +# for now. +# +# In WSL, it somehow freezes the CMake process and the entire GitHub action, +# with no possibility to get the logs. I was unable to debug it after a couple +# of days, so on WSL it's disabled as well. We don't really build for this +# configuration anyway, and just use it to generate and run the tests. +if((NOT WIN32) + AND (NOT (CMAKE_HOST_SYSTEM_VERSION MATCHES "microsoft-standard-WSL2$")) + AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) + # Check whether we can enable the pclmul instruction required for the UMASH + # hashing on amd64. Shouldn't be done if the user has manually specified the + # target architecture, no idea how to detect this, but at least we shouldn't + # do this when cross-compiling. + if(NOT CMAKE_CROSSCOMPILING) + check_c_compiler_flag(-mpclmul CC_PCLMUL) + if(CC_PCLMUL) + add_compile_options(-mpclmul) + # The "C source compiles" check below doesn't use the global compilation + # flags, so we have to modify its flags separately. + set(CMAKE_REQUIRED_FLAGS -mpclmul) + endif() + endif() # set(CMAKE_REQUIRED_FLAGS # "${CMAKE_REQUIRED_FLAGS} -Werror=implicit-function-declaration") @@ -52,11 +52,11 @@ endif() ##endif #" # UMASH_SUPPORTED) -# unset(CMAKE_REQUIRED_FLAGS) -# unset(CMAKE_TRY_COMPILE_TARGET_TYPE) -#else() -# set(UMASH_SUPPORTED OFF) -#endif() + unset(CMAKE_REQUIRED_FLAGS) + unset(CMAKE_TRY_COMPILE_TARGET_TYPE) +else() + set(UMASH_SUPPORTED OFF) +endif() option(USE_UMASH "Use the UMASH hash for string and multi-column vectorized grouping" From 865ed77fed7b27c191bb1322944d8bf544b37a78 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:04:16 +0100 Subject: [PATCH 16/18] compile an executable for test --- tsl/CMakeLists.txt | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 4958c923783..579281e66a6 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -31,27 +31,27 @@ if((NOT WIN32) endif() endif() -# set(CMAKE_REQUIRED_FLAGS -# "${CMAKE_REQUIRED_FLAGS} -Werror=implicit-function-declaration") -# set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) -# check_c_source_compiles( -# " -##if defined(__PCLMUL__) -##include -##include -#/* -# * For some reason, this doesn't compile on our i386 CI, but I also can't detect -# * it using the standard condition of defined(__x86_64__) && !defined(__ILP32__), -# * as described at https://wiki.debian.org/X32Port . -# */ -#static void test() { (void) _mm_cvtsi64_si128((uint64_t) 0); } -##elif defined(__ARM_FEATURE_CRYPTO) -#/* OK */ -##else -##error Unsupported platform for UMASH -##endif -#" -# UMASH_SUPPORTED) + set(CMAKE_REQUIRED_FLAGS + "${CMAKE_REQUIRED_FLAGS} -Werror=implicit-function-declaration") + check_c_source_compiles( + " +#if defined(__PCLMUL__) +#include +#include +/* + * For some reason, this doesn't compile on our i386 CI, but I also can't detect + * it using the standard condition of defined(__x86_64__) && !defined(__ILP32__), + * as described at https://wiki.debian.org/X32Port . + */ +static void test() { (void) _mm_cvtsi64_si128((uint64_t) 0); } +#elif defined(__ARM_FEATURE_CRYPTO) +/* OK */ +#else +#error Unsupported platform for UMASH +#endif +void main(void) {}; +" + UMASH_SUPPORTED) unset(CMAKE_REQUIRED_FLAGS) unset(CMAKE_TRY_COMPILE_TARGET_TYPE) else() From e9abfcbc6312d8c0636b4901015a2cd20cff2285 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:16:24 +0100 Subject: [PATCH 17/18] cleanup --- .github/workflows/windows-build-and-test.yaml | 25 +++---------------- tsl/CMakeLists.txt | 1 - 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml index 1a702cefb44..fab95ef71ed 100644 --- a/.github/workflows/windows-build-and-test.yaml +++ b/.github/workflows/windows-build-and-test.yaml @@ -184,34 +184,15 @@ jobs: yes | /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh || true apt-get install -y --force-yes postgresql-server-dev-${{ matrix.pg }} - - name: FIXME reinstall cmake - shell: wsl-bash {0} - run: | - cmake -version - apt-get purge -y cmake - apt-get install -y cmake - cmake -version - - name: Run tests shell: wsl-bash {0} run: | export TEST_TABLESPACE1_PREFIX='${{ env.TABLESPACE1 }}' export TEST_TABLESPACE2_PREFIX='${{ env.TABLESPACE2 }}' - sleep 60 && killall -9 cmake & - timeout 60 cmake --trace -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} > cmake.log 2>&1 - echo $? - - #make -C build_wsl isolationchecklocal | tee -a installcheck.log - #make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log - exit 1 + cmake -B build_wsl -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DTEST_PGPORT_LOCAL=${{ env.PGPORT }} - - name: Upload CMake Logs (FIXME) - if: always() - uses: actions/upload-artifact@v4 - with: - name: CMake (FIXME) Logs ${{ matrix.pg }} ${{ matrix.os }} ${{ matrix.build_type }} - path: | - cmake.log + make -C build_wsl isolationchecklocal | tee -a installcheck.log + make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log - name: Setup postgres cluster for TSL tests run: | diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 579281e66a6..83d7c546f92 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -53,7 +53,6 @@ void main(void) {}; " UMASH_SUPPORTED) unset(CMAKE_REQUIRED_FLAGS) - unset(CMAKE_TRY_COMPILE_TARGET_TYPE) else() set(UMASH_SUPPORTED OFF) endif() From 1a4b7975c7c8a4188752c99b9bd83eda4275f015 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:18:58 +0100 Subject: [PATCH 18/18] simplify? --- tsl/CMakeLists.txt | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 83d7c546f92..4dde58522d1 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -6,17 +6,7 @@ endif() # We use the UMASH library for hashing in vectorized grouping. If it was not # explicitly disabled already, detect if we can compile it on this platform. -# -# It is not tested on Windows and leads to a weird CI freeze, so don't even try -# for now. -# -# In WSL, it somehow freezes the CMake process and the entire GitHub action, -# with no possibility to get the logs. I was unable to debug it after a couple -# of days, so on WSL it's disabled as well. We don't really build for this -# configuration anyway, and just use it to generate and run the tests. -if((NOT WIN32) - AND (NOT (CMAKE_HOST_SYSTEM_VERSION MATCHES "microsoft-standard-WSL2$")) - AND ((NOT DEFINED USE_UMASH) OR USE_UMASH)) +if((NOT DEFINED USE_UMASH) OR USE_UMASH) # Check whether we can enable the pclmul instruction required for the UMASH # hashing on amd64. Shouldn't be done if the user has manually specified the # target architecture, no idea how to detect this, but at least we shouldn't