From dd81ecb3d3b29fadc920725a8f1cffc67f6b0d0d Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Mon, 19 Jun 2023 14:42:05 -0400 Subject: [PATCH] Merge pull request #6223 from masterleinad/fix_simd_on_gpus Fix SIMD support on GPUs --- cmake/kokkos_arch.cmake | 21 ++++++--- simd/src/Kokkos_SIMD_AVX2.hpp | 54 +++++++++++++++-------- simd/src/Kokkos_SIMD_AVX512.hpp | 78 +++++++++++++++++++++------------ simd/src/Kokkos_SIMD_Common.hpp | 38 +++++----------- simd/src/Kokkos_SIMD_NEON.hpp | 56 ++++++++++++++--------- simd/src/Kokkos_SIMD_Scalar.hpp | 63 +++++++++++++++++++++----- simd/unit_tests/TestSIMD.cpp | 9 ++-- 7 files changed, 206 insertions(+), 113 deletions(-) diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index 6aadc9cbeb..d64726307b 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -406,8 +406,10 @@ IF (KOKKOS_ARCH_SKL) ENDIF() IF (KOKKOS_ARCH_SKX) - #avx512-xeon - SET(KOKKOS_ARCH_AVX512XEON ON) + # FIXME_NVHPC nvc++ doesn't seem to support AVX512. + IF (NOT KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) + SET(KOKKOS_ARCH_AVX512XEON ON) + ENDIF() COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID Cray NO-VALUE-SPECIFIED @@ -419,7 +421,10 @@ IF (KOKKOS_ARCH_SKX) ENDIF() IF (KOKKOS_ARCH_ICL) - SET(KOKKOS_ARCH_AVX512XEON ON) + # FIXME_NVHPC nvc++ doesn't seem to support AVX512. + IF (NOT KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) + SET(KOKKOS_ARCH_AVX512XEON ON) + ENDIF() COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID MSVC /arch:AVX512 @@ -428,7 +433,10 @@ IF (KOKKOS_ARCH_ICL) ENDIF() IF (KOKKOS_ARCH_ICX) - SET(KOKKOS_ARCH_AVX512XEON ON) + # FIXME_NVHPC nvc++ doesn't seem to support AVX512. + IF (NOT KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) + SET(KOKKOS_ARCH_AVX512XEON ON) + ENDIF() COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID MSVC /arch:AVX512 @@ -437,7 +445,10 @@ IF (KOKKOS_ARCH_ICX) ENDIF() IF (KOKKOS_ARCH_SPR) - SET(KOKKOS_ARCH_AVX512XEON ON) + # FIXME_NVHPC nvc++ doesn't seem to support AVX512. + IF (NOT KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) + SET(KOKKOS_ARCH_AVX512XEON ON) + ENDIF() COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID MSVC /arch:AVX512 diff --git a/simd/src/Kokkos_SIMD_AVX2.hpp b/simd/src/Kokkos_SIMD_AVX2.hpp index 8174d27490..1c4ff4765d 100644 --- a/simd/src/Kokkos_SIMD_AVX2.hpp +++ b/simd/src/Kokkos_SIMD_AVX2.hpp @@ -589,7 +589,7 @@ class simd> { std::is_invocable_r_v>, bool> = false> - KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) : m_value(_mm_setr_epi32(gen(std::integral_constant()), gen(std::integral_constant()), gen(std::integral_constant()), @@ -700,7 +700,7 @@ class simd> { std::is_invocable_r_v>, bool> = false> - KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) : m_value(_mm256_setr_epi64x( gen(std::integral_constant()), gen(std::integral_constant()), @@ -822,7 +822,7 @@ class simd> { std::is_invocable_r_v>, bool> = false> - KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) : m_value(_mm256_setr_epi64x( gen(std::integral_constant()), gen(std::integral_constant()), @@ -958,11 +958,15 @@ class const_where_expression>, } } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -1026,11 +1030,15 @@ class const_where_expression< static_cast<__m128i>(m_value)); } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -1088,11 +1096,15 @@ class const_where_expression< static_cast<__m256i>(m_value)); } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -1152,11 +1164,15 @@ class const_where_expression< static_cast<__m256i>(m_value)); } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> diff --git a/simd/src/Kokkos_SIMD_AVX512.hpp b/simd/src/Kokkos_SIMD_AVX512.hpp index b43d926929..240314a8c1 100644 --- a/simd/src/Kokkos_SIMD_AVX512.hpp +++ b/simd/src/Kokkos_SIMD_AVX512.hpp @@ -145,7 +145,7 @@ class simd> { std::is_invocable_r_v>, bool> = false> - KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) : m_value( _mm256_setr_epi32(gen(std::integral_constant()), gen(std::integral_constant()), @@ -854,11 +854,15 @@ class const_where_expression>, static_cast<__m512d>(m_value), 8); } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -922,11 +926,15 @@ class const_where_expression< static_cast<__m256i>(m_value)); } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -984,11 +992,15 @@ class const_where_expression< static_cast<__m256i>(m_value)); } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -1046,11 +1058,15 @@ class const_where_expression< static_cast<__m512i>(m_value)); } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -1108,11 +1124,15 @@ class const_where_expression< static_cast<__m512i>(m_value)); } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -1152,16 +1172,16 @@ class where_expression>, simd_mask>, simd>> const& x) { return _mm512_mask_reduce_max_epi32( - static_cast<__mmask8>(Impl::mask(x)), - _mm512_castsi256_si512(static_cast<__m256i>(Impl::value(x)))); + static_cast<__mmask8>(x.impl_get_mask()), + _mm512_castsi256_si512(static_cast<__m256i>(x.impl_get_value()))); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION double hmin( const_where_expression>, simd>> const& x) { - return _mm512_mask_reduce_min_pd(static_cast<__mmask8>(Impl::mask(x)), - static_cast<__m512d>(Impl::value(x))); + return _mm512_mask_reduce_min_pd(static_cast<__mmask8>(x.impl_get_mask()), + static_cast<__m512d>(x.impl_get_value())); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int64_t reduce( @@ -1169,8 +1189,8 @@ class where_expression>, simd_mask>, simd>> const& x, std::int64_t, std::plus<>) { - return _mm512_mask_reduce_add_epi64(static_cast<__mmask8>(Impl::mask(x)), - static_cast<__m512i>(Impl::value(x))); + return _mm512_mask_reduce_add_epi64(static_cast<__mmask8>(x.impl_get_mask()), + static_cast<__m512i>(x.impl_get_value())); } [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION double reduce( @@ -1178,8 +1198,8 @@ class where_expression>, simd>> const& x, double, std::plus<>) { - return _mm512_mask_reduce_add_pd(static_cast<__mmask8>(Impl::mask(x)), - static_cast<__m512d>(Impl::value(x))); + return _mm512_mask_reduce_add_pd(static_cast<__mmask8>(x.impl_get_mask()), + static_cast<__m512d>(x.impl_get_value())); } } // namespace Experimental diff --git a/simd/src/Kokkos_SIMD_Common.hpp b/simd/src/Kokkos_SIMD_Common.hpp index 9731669450..de11640430 100644 --- a/simd/src/Kokkos_SIMD_Common.hpp +++ b/simd/src/Kokkos_SIMD_Common.hpp @@ -92,14 +92,14 @@ class where_expression : public const_where_expression { }; template -[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION where_expression, simd> where(typename simd::mask_type const& mask, simd& value) { return where_expression(mask, value); } template -[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION const_where_expression, simd> where(typename simd::mask_type const& mask, simd const& value) { @@ -308,44 +308,28 @@ KOKKOS_FORCEINLINE_FUNCTION where_expression& operator/=( // fallback implementations of reductions across simd_mask: template -[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool all_of( +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool all_of( simd_mask const& a) { return a == simd_mask(true); } template -[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool any_of( +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool any_of( simd_mask const& a) { return a != simd_mask(false); } template -[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool none_of( +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool none_of( simd_mask const& a) { return a == simd_mask(false); } -namespace Impl { - -template -[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr auto const& mask( - const_where_expression, simd> const& x) { - return x.m_mask; -} - -template -[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr auto const& value( - const_where_expression, simd> const& x) { - return x.m_value; -} - -} // namespace Impl - template [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T hmin(const_where_expression, simd> const& x) { - auto const& v = Impl::value(x); - auto const& m = Impl::mask(x); + auto const& v = x.impl_get_value(); + auto const& m = x.impl_get_mask(); auto result = Kokkos::reduction_identity::min(); for (std::size_t i = 0; i < v.size(); ++i) { if (m[i]) result = Kokkos::min(result, v[i]); @@ -356,8 +340,8 @@ hmin(const_where_expression, simd> const& x) { template [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T hmax(const_where_expression, simd> const& x) { - auto const& v = Impl::value(x); - auto const& m = Impl::mask(x); + auto const& v = x.impl_get_value(); + auto const& m = x.impl_get_mask(); auto result = Kokkos::reduction_identity::max(); for (std::size_t i = 0; i < v.size(); ++i) { if (m[i]) result = Kokkos::max(result, v[i]); @@ -369,8 +353,8 @@ template [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T reduce(const_where_expression, simd> const& x, T, std::plus<>) { - auto const& v = Impl::value(x); - auto const& m = Impl::mask(x); + auto const& v = x.impl_get_value(); + auto const& m = x.impl_get_mask(); auto result = Kokkos::reduction_identity::sum(); for (std::size_t i = 0; i < v.size(); ++i) { if (m[i]) result += v[i]; diff --git a/simd/src/Kokkos_SIMD_NEON.hpp b/simd/src/Kokkos_SIMD_NEON.hpp index 5bf99a17b1..612137a060 100644 --- a/simd/src/Kokkos_SIMD_NEON.hpp +++ b/simd/src/Kokkos_SIMD_NEON.hpp @@ -299,7 +299,7 @@ class simd> { std::is_invocable_r_v>, bool> = false> - KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) { + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) { m_value = vsetq_lane_f64(gen(std::integral_constant()), m_value, 0); m_value = vsetq_lane_f64(gen(std::integral_constant()), @@ -502,7 +502,7 @@ class simd> { std::is_invocable_r_v>, bool> = false> - KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) { + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) { m_value = vset_lane_s32(gen(std::integral_constant()), m_value, 0); m_value = vset_lane_s32(gen(std::integral_constant()), @@ -640,7 +640,7 @@ class simd> { std::is_invocable_r_v>, bool> = false> - KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) { + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) { m_value = vsetq_lane_s64(gen(std::integral_constant()), m_value, 0); m_value = vsetq_lane_s64(gen(std::integral_constant()), @@ -778,7 +778,7 @@ class simd> { std::is_invocable_r_v>, bool> = false> - KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) { + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) { m_value = vsetq_lane_u64(gen(std::integral_constant()), m_value, 0); m_value = vsetq_lane_u64(gen(std::integral_constant()), @@ -898,11 +898,15 @@ class const_where_expression>, if (m_mask[1]) mem[index[1]] = m_value[1]; } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -966,11 +970,15 @@ class const_where_expression< if (m_mask[1]) mem[1] = m_value[1]; } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -1028,11 +1036,15 @@ class const_where_expression< if (m_mask[1]) mem[1] = m_value[1]; } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> @@ -1090,11 +1102,15 @@ class const_where_expression< if (m_mask[1]) mem[1] = m_value[1]; } - friend constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template <> diff --git a/simd/src/Kokkos_SIMD_Scalar.hpp b/simd/src/Kokkos_SIMD_Scalar.hpp index 2fd8d9d68b..8803417447 100644 --- a/simd/src/Kokkos_SIMD_Scalar.hpp +++ b/simd/src/Kokkos_SIMD_Scalar.hpp @@ -256,11 +256,15 @@ class const_where_expression, mem[static_cast(index)] = static_cast(m_value); } - friend KOKKOS_FUNCTION constexpr auto const& Impl::mask( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } - friend KOKKOS_FUNCTION constexpr auto const& Impl::value( - const_where_expression const& x); + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } }; template @@ -297,21 +301,60 @@ class where_expression, } }; +template +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION + where_expression, + simd> + where(typename simd< + T, Kokkos::Experimental::simd_abi::scalar>::mask_type const& mask, + simd& value) { + return where_expression(mask, value); +} + +template +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION + const_where_expression, + simd> + where(typename simd< + T, Kokkos::Experimental::simd_abi::scalar>::mask_type const& mask, + simd const& value) { + return const_where_expression(mask, value); +} + +template +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool all_of( + simd_mask const& a) { + return a == simd_mask(true); +} + +template +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool any_of( + simd_mask const& a) { + return a != simd_mask(false); +} + +template +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool none_of( + simd_mask const& a) { + return a == simd_mask(false); +} + template [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T reduce(const_where_expression, simd> const& x, T identity_element, std::plus<>) { - return static_cast(Impl::mask(x)) ? static_cast(Impl::value(x)) - : identity_element; + return static_cast(x.impl_get_mask()) + ? static_cast(x.impl_get_value()) + : identity_element; } template [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T hmax(const_where_expression, simd> const& x) { - return static_cast(Impl::mask(x)) - ? static_cast(Impl::value(x)) + return static_cast(x.impl_get_mask()) + ? static_cast(x.impl_get_value()) : Kokkos::reduction_identity::max(); } @@ -319,8 +362,8 @@ template [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T hmin(const_where_expression, simd> const& x) { - return static_cast(Impl::mask(x)) - ? static_cast(Impl::value(x)) + return static_cast(x.impl_get_mask()) + ? static_cast(x.impl_get_value()) : Kokkos::reduction_identity::min(); } diff --git a/simd/unit_tests/TestSIMD.cpp b/simd/unit_tests/TestSIMD.cpp index fc02e0b1e0..cf00cc36cb 100644 --- a/simd/unit_tests/TestSIMD.cpp +++ b/simd/unit_tests/TestSIMD.cpp @@ -168,9 +168,12 @@ void host_check_binary_op_one_loader(BinaryOp binary_op, std::size_t n, loader.host_load(second_args + i, nlanes, second_arg); if (!(loaded_first_arg && loaded_second_arg)) continue; simd_type expected_result; - for (std::size_t lane = 0; lane < nlanes; ++lane) { - expected_result[lane] = - binary_op.on_host(T(first_arg[lane]), T(second_arg[lane])); + // gcc 8.4.0 warns if using nlanes as upper bound about first_arg and/or + // second_arg being uninitialized + for (std::size_t lane = 0; lane < simd_type::size(); ++lane) { + if (lane < nlanes) + expected_result[lane] = + binary_op.on_host(T(first_arg[lane]), T(second_arg[lane])); } simd_type const computed_result = binary_op.on_host(first_arg, second_arg); host_check_equality(expected_result, computed_result, nlanes);