From 16e20715d5e8842f8dd4ecdbf2e5b9c0e9cc2fda Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Sun, 26 Jan 2025 17:23:24 +0100 Subject: [PATCH] Explicitly verify dependency between architectures (like sse3 implies sse2) Also add CI to make sure the assumption we make are correct on all architectures supported by GCC. Fix #1070 --- .github/workflows/arch-consistency-check.yml | 15 +++ include/xsimd/types/xsimd_avx2_register.hpp | 5 + .../xsimd/types/xsimd_avx512bw_register.hpp | 4 + .../xsimd/types/xsimd_avx512cd_register.hpp | 4 + .../xsimd/types/xsimd_avx512dq_register.hpp | 4 + .../xsimd/types/xsimd_avx512er_register.hpp | 4 + .../xsimd/types/xsimd_avx512f_register.hpp | 4 + .../xsimd/types/xsimd_avx512ifma_register.hpp | 4 + .../xsimd/types/xsimd_avx512pf_register.hpp | 4 + .../xsimd/types/xsimd_avx512vbmi_register.hpp | 4 + .../xsimd_avx512vnni_avx512bw_register.hpp | 4 + .../xsimd_avx512vnni_avx512vbmi_register.hpp | 4 + include/xsimd/types/xsimd_avx_register.hpp | 4 + .../xsimd/types/xsimd_avxvnni_register.hpp | 5 + .../xsimd/types/xsimd_fma3_avx2_register.hpp | 5 + .../xsimd/types/xsimd_fma3_avx_register.hpp | 5 + .../xsimd/types/xsimd_fma3_sse_register.hpp | 5 + include/xsimd/types/xsimd_fma4_register.hpp | 5 + .../types/xsimd_i8mm_neon64_register.hpp | 5 + include/xsimd/types/xsimd_neon64_register.hpp | 4 + include/xsimd/types/xsimd_sse3_register.hpp | 5 + include/xsimd/types/xsimd_sse4_1_register.hpp | 5 + include/xsimd/types/xsimd_sse4_2_register.hpp | 5 + include/xsimd/types/xsimd_ssse3_register.hpp | 5 + test/check_arch.sh | 123 ++++++++++++++++++ 25 files changed, 241 insertions(+) create mode 100644 .github/workflows/arch-consistency-check.yml create mode 100644 test/check_arch.sh diff --git a/.github/workflows/arch-consistency-check.yml b/.github/workflows/arch-consistency-check.yml new file mode 100644 index 000000000..dc57879dc --- /dev/null +++ b/.github/workflows/arch-consistency-check.yml @@ -0,0 +1,15 @@ +name: Arch consistency check +on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{ github.ref }} + cancel-in-progress: true +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout xsimd + uses: actions/checkout@v3 + - name: Install dependencies + run: sudo apt install g++ + - name: Check architecture consistency + run: cd test && sh ./check_arch.sh diff --git a/include/xsimd/types/xsimd_avx2_register.hpp b/include/xsimd/types/xsimd_avx2_register.hpp index 264b7c3ed..a53132b94 100644 --- a/include/xsimd/types/xsimd_avx2_register.hpp +++ b/include/xsimd/types/xsimd_avx2_register.hpp @@ -29,6 +29,11 @@ namespace xsimd }; #if XSIMD_WITH_AVX2 + +#if !XSIMD_WITH_AVX +#error "architecture inconsistency: avx2 requires avx" +#endif + namespace types { XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx2, avx); diff --git a/include/xsimd/types/xsimd_avx512bw_register.hpp b/include/xsimd/types/xsimd_avx512bw_register.hpp index 9d4d33b64..1aacc8f0e 100644 --- a/include/xsimd/types/xsimd_avx512bw_register.hpp +++ b/include/xsimd/types/xsimd_avx512bw_register.hpp @@ -31,6 +31,10 @@ namespace xsimd #if XSIMD_WITH_AVX512BW +#if !XSIMD_WITH_AVX512DQ +#error "architecture inconsistency: avx512bw requires avx512dq" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512cd_register.hpp b/include/xsimd/types/xsimd_avx512cd_register.hpp index cf0601395..7c32d5faa 100644 --- a/include/xsimd/types/xsimd_avx512cd_register.hpp +++ b/include/xsimd/types/xsimd_avx512cd_register.hpp @@ -31,6 +31,10 @@ namespace xsimd #if XSIMD_WITH_AVX512CD +#if !XSIMD_WITH_AVX512F +#error "architecture inconsistency: avx512bw requires avx512f" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512dq_register.hpp b/include/xsimd/types/xsimd_avx512dq_register.hpp index f8a8dc543..833ff8c7a 100644 --- a/include/xsimd/types/xsimd_avx512dq_register.hpp +++ b/include/xsimd/types/xsimd_avx512dq_register.hpp @@ -31,6 +31,10 @@ namespace xsimd #if XSIMD_WITH_AVX512DQ +#if !XSIMD_WITH_AVX512CD +#error "architecture inconsistency: avx512dq requires avx512cd" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512er_register.hpp b/include/xsimd/types/xsimd_avx512er_register.hpp index a52bd0064..aa7d3f9a0 100644 --- a/include/xsimd/types/xsimd_avx512er_register.hpp +++ b/include/xsimd/types/xsimd_avx512er_register.hpp @@ -31,6 +31,10 @@ namespace xsimd #if XSIMD_WITH_AVX512ER +#if !XSIMD_WITH_AVX512CD +#error "architecture inconsistency: avx512er requires avx512cd" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512f_register.hpp b/include/xsimd/types/xsimd_avx512f_register.hpp index 1a11b6c92..7f4bda929 100644 --- a/include/xsimd/types/xsimd_avx512f_register.hpp +++ b/include/xsimd/types/xsimd_avx512f_register.hpp @@ -33,6 +33,10 @@ namespace xsimd #if XSIMD_WITH_AVX512F +#if !XSIMD_WITH_AVX2 +#error "architecture inconsistency: avx512f requires avx2" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512ifma_register.hpp b/include/xsimd/types/xsimd_avx512ifma_register.hpp index a8bc8885f..9c0953ca5 100644 --- a/include/xsimd/types/xsimd_avx512ifma_register.hpp +++ b/include/xsimd/types/xsimd_avx512ifma_register.hpp @@ -31,6 +31,10 @@ namespace xsimd #if XSIMD_WITH_AVX512IFMA +#if !XSIMD_WITH_AVX512BW +#error "architecture inconsistency: avx512ifma requires avx512bw" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512pf_register.hpp b/include/xsimd/types/xsimd_avx512pf_register.hpp index 4838a8a46..c2df20982 100644 --- a/include/xsimd/types/xsimd_avx512pf_register.hpp +++ b/include/xsimd/types/xsimd_avx512pf_register.hpp @@ -31,6 +31,10 @@ namespace xsimd #if XSIMD_WITH_AVX512PF +#if !XSIMD_WITH_AVX512ER +#error "architecture inconsistency: avx512pf requires avx512er" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512vbmi_register.hpp b/include/xsimd/types/xsimd_avx512vbmi_register.hpp index 40f51e9b1..1ebe48a4f 100644 --- a/include/xsimd/types/xsimd_avx512vbmi_register.hpp +++ b/include/xsimd/types/xsimd_avx512vbmi_register.hpp @@ -31,6 +31,10 @@ namespace xsimd #if XSIMD_WITH_AVX512VBMI +#if !XSIMD_WITH_AVX512IFMA +#error "architecture inconsistency: avx512vbmi requires avx512ifma" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp b/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp index a19b949f8..9fdb88219 100644 --- a/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +++ b/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp @@ -34,6 +34,10 @@ namespace xsimd #if XSIMD_WITH_AVX512VNNI_AVX512BW +#if !XSIMD_WITH_AVX512BW +#error "architecture inconsistency: avx512vnni+avx512bw requires avx512bw" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp b/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp index 0a6b45f76..adfaff5b2 100644 --- a/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp +++ b/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp @@ -34,6 +34,10 @@ namespace xsimd #if XSIMD_WITH_AVX512VNNI_AVX512VBMI +#if !XSIMD_WITH_AVX512VBMI +#error "architecture inconsistency: avx512vnni+avx512vbmi requires avx512vbmi" +#endif + namespace types { template diff --git a/include/xsimd/types/xsimd_avx_register.hpp b/include/xsimd/types/xsimd_avx_register.hpp index 7357304d5..0cd423473 100644 --- a/include/xsimd/types/xsimd_avx_register.hpp +++ b/include/xsimd/types/xsimd_avx_register.hpp @@ -34,6 +34,10 @@ namespace xsimd #if XSIMD_WITH_AVX +#if !XSIMD_WITH_SSE4_2 +#error "architecture inconsistency: avx requires sse4.2" +#endif + #include namespace xsimd diff --git a/include/xsimd/types/xsimd_avxvnni_register.hpp b/include/xsimd/types/xsimd_avxvnni_register.hpp index 419547b1c..e1e1ee964 100644 --- a/include/xsimd/types/xsimd_avxvnni_register.hpp +++ b/include/xsimd/types/xsimd_avxvnni_register.hpp @@ -29,6 +29,11 @@ namespace xsimd }; #if XSIMD_WITH_AVXVNNI + +#if !XSIMD_WITH_AVX2 +#error "architecture inconsistency: avxvnni requires avx2" +#endif + namespace types { XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avxvnni, avx2); diff --git a/include/xsimd/types/xsimd_fma3_avx2_register.hpp b/include/xsimd/types/xsimd_fma3_avx2_register.hpp index cf3e26d08..081a4c2ac 100644 --- a/include/xsimd/types/xsimd_fma3_avx2_register.hpp +++ b/include/xsimd/types/xsimd_fma3_avx2_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_FMA3_AVX2 + +#if !XSIMD_WITH_AVX2 +#error "architecture inconsistency: fma3+avx2 requires avx2" +#endif + namespace types { diff --git a/include/xsimd/types/xsimd_fma3_avx_register.hpp b/include/xsimd/types/xsimd_fma3_avx_register.hpp index 5012d25a0..c03430cec 100644 --- a/include/xsimd/types/xsimd_fma3_avx_register.hpp +++ b/include/xsimd/types/xsimd_fma3_avx_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_FMA3_AVX + +#if !XSIMD_WITH_AVX +#error "architecture inconsistency: fma3+avx requires avx" +#endif + namespace types { diff --git a/include/xsimd/types/xsimd_fma3_sse_register.hpp b/include/xsimd/types/xsimd_fma3_sse_register.hpp index 87ebc27b5..f13039b75 100644 --- a/include/xsimd/types/xsimd_fma3_sse_register.hpp +++ b/include/xsimd/types/xsimd_fma3_sse_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_FMA3_SSE + +#if !XSIMD_WITH_SSE4_2 +#error "architecture inconsistency: fma3+sse4.2 requires sse4.2" +#endif + namespace types { diff --git a/include/xsimd/types/xsimd_fma4_register.hpp b/include/xsimd/types/xsimd_fma4_register.hpp index 70f889abb..6c649cb2a 100644 --- a/include/xsimd/types/xsimd_fma4_register.hpp +++ b/include/xsimd/types/xsimd_fma4_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_FMA4 + +#if !XSIMD_WITH_SSE4_2 +#error "architecture inconsistency: fma4 requires sse4.2" +#endif + namespace types { diff --git a/include/xsimd/types/xsimd_i8mm_neon64_register.hpp b/include/xsimd/types/xsimd_i8mm_neon64_register.hpp index 0e2b42d8e..39945eae7 100644 --- a/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +++ b/include/xsimd/types/xsimd_i8mm_neon64_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_I8MM_NEON64 + +#if !XSIMD_WITH_NEON64 +#error "architecture inconsistency: i8mm+neon64 requires neon64" +#endif + namespace types { diff --git a/include/xsimd/types/xsimd_neon64_register.hpp b/include/xsimd/types/xsimd_neon64_register.hpp index 709f601a3..7fa0b2ce5 100644 --- a/include/xsimd/types/xsimd_neon64_register.hpp +++ b/include/xsimd/types/xsimd_neon64_register.hpp @@ -32,6 +32,10 @@ namespace xsimd #if XSIMD_WITH_NEON64 +#if !XSIMD_WITH_NEON +#error "architecture inconsistency: neon64 requires neon" +#endif + namespace types { XSIMD_DECLARE_SIMD_REGISTER_ALIAS(neon64, neon); diff --git a/include/xsimd/types/xsimd_sse3_register.hpp b/include/xsimd/types/xsimd_sse3_register.hpp index 6f216bb81..9a0f7ef0e 100644 --- a/include/xsimd/types/xsimd_sse3_register.hpp +++ b/include/xsimd/types/xsimd_sse3_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_SSE3 + +#if !XSIMD_WITH_SSE2 +#error "architecture inconsistency: sse3 requires sse2" +#endif + namespace types { diff --git a/include/xsimd/types/xsimd_sse4_1_register.hpp b/include/xsimd/types/xsimd_sse4_1_register.hpp index f7f6c0657..e6c9a69d2 100644 --- a/include/xsimd/types/xsimd_sse4_1_register.hpp +++ b/include/xsimd/types/xsimd_sse4_1_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_SSE4_1 + +#if !XSIMD_WITH_SSSE3 +#error "architecture inconsistency: sse4.1 requires ssse3" +#endif + namespace types { XSIMD_DECLARE_SIMD_REGISTER_ALIAS(sse4_1, ssse3); diff --git a/include/xsimd/types/xsimd_sse4_2_register.hpp b/include/xsimd/types/xsimd_sse4_2_register.hpp index e92e49872..97006e236 100644 --- a/include/xsimd/types/xsimd_sse4_2_register.hpp +++ b/include/xsimd/types/xsimd_sse4_2_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_SSE4_2 + +#if !XSIMD_WITH_SSE4_1 +#error "architecture inconsistency: sse4.2 requires sse4.1" +#endif + namespace types { XSIMD_DECLARE_SIMD_REGISTER_ALIAS(sse4_2, sse4_1); diff --git a/include/xsimd/types/xsimd_ssse3_register.hpp b/include/xsimd/types/xsimd_ssse3_register.hpp index fc1c0f82d..8c72ebe1b 100644 --- a/include/xsimd/types/xsimd_ssse3_register.hpp +++ b/include/xsimd/types/xsimd_ssse3_register.hpp @@ -33,6 +33,11 @@ namespace xsimd }; #if XSIMD_WITH_SSSE3 + +#if !XSIMD_WITH_SSE3 +#error "architecture inconsistency: ssse3 requires sse3" +#endif + namespace types { XSIMD_DECLARE_SIMD_REGISTER_ALIAS(ssse3, sse3); diff --git a/test/check_arch.sh b/test/check_arch.sh new file mode 100644 index 000000000..524abcbe6 --- /dev/null +++ b/test/check_arch.sh @@ -0,0 +1,123 @@ +#!/bin/sh +set -e +CXX=g++ +printf "int main() { return 0;}" > sanity_check.cpp +printf "#include \nint main() { return 0;}" > xsimd_check.cpp + +sed -n '/x86[-]64/,$ p' $0 | \ + while read arch; do \ + if echo $arch | grep -q '#' ; then continue; fi ; \ + echo "# $arch" ; \ + $CXX -w -march=$arch sanity_check.cpp -fsyntax-only ; \ + $CXX -w -I../include -march=$arch xsimd_check.cpp -fsyntax-only ; \ + done + +rm sanity_check.cpp xsimd_check.cpp + +exit 0 +# The commented archs below are not x86 64 variant and thus are ignored + +x86-64 +x86-64 +x86-64-v2 +x86-64-v3 +x86-64-v4 +#i386 +#i486 +#i586 +#pentium +#lakemont +#pentium-mmx +#pentiumpro +#i686 +#pentium2 +#pentium3 +#pentium3m +#pentium-m +#pentium4 +#pentium4m +#prescott +nocona +core2 +nehalem +westmere +sandybridge +ivybridge +haswell +broadwell +skylake +bonnell +silvermont +goldmont +goldmont-plus +tremont +sierraforest +grandridge +clearwaterforest +knl +knm +skylake-avx512 +cannonlake +icelake-client +icelake-server +cascadelake +cooperlake +tigerlake +sapphirerapids +alderlake +raptorlake +meteorlake +rocketlake +graniterapids +graniterapids-d +arrowlake +arrowlake-s +lunarlake +pantherlake +#k6 +#k6-2 +#k6-3 +#athlon +#athlon-tbird +#athlon-4 +#athlon-xp +#athlon-mp +k8 +opteron +athlon64 +athlon-fx +k8-sse3 +opteron-sse3 +athlon64-sse3 +amdfam10 +barcelona +bdver1 +bdver2 +bdver3 +bdver4 +znver1 +znver2 +znver3 +znver4 +znver5 +btver1 +btver2 +#winchip-c6 +#winchip2 +#c3 +#c3-2 +#c7 +#samuel-2 +#nehemiah +#esther +eden-x2 +eden-x4 +nano +nano-1000 +nano-2000 +nano-3000 +nano-x2 +nano-x4 +lujiazui +yongfeng +#geode