From 033aaac6533a14bc431f2825a808b7d7d8d48a60 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 6 Feb 2023 06:15:28 -0800 Subject: [PATCH 1/4] [SYCL] Add reducer class member aliases and constexpr value This commit adds the value_type and binary_operation member aliases and the dimensions value to the reducer class. Signed-off-by: Larsen, Steffen --- sycl/include/sycl/reduction.hpp | 23 +++++-- .../basic_tests/reduction/reducer_aliases.cpp | 61 +++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 sycl/test/basic_tests/reduction/reducer_aliases.cpp diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 456f9004d2981..8a5084661103d 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -320,6 +320,14 @@ template class combiner { ReduVarPtr, [](auto &&Ref, auto Val) { return Ref.fetch_max(Val); }); } }; + +template class reducer_common { +public: + using value_type = T; + using binary_operation = BinaryOperation; + static constexpr int dimensions = Dims; +}; + } // namespace detail /// Specialization of the generic class 'reducer'. It is used for reductions @@ -336,7 +344,8 @@ class reducer< reducer::value>>> { + !detail::IsKnownIdentityOp::value>>>, + public detail::reducer_common { public: reducer(const T &Identity, BinaryOperation BOp) : MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {} @@ -371,7 +380,8 @@ class reducer< reducer::value>>> { + detail::IsKnownIdentityOp::value>>>, + public detail::reducer_common { public: reducer() : MValue(getIdentity()) {} reducer(const T & /* Identity */, BinaryOperation) : MValue(getIdentity()) {} @@ -398,7 +408,8 @@ class reducer> : public detail::combiner< reducer>> { + std::enable_if_t>>, + public detail::reducer_common { public: reducer(T &Ref, BinaryOperation BOp) : MElement(Ref), MBinaryOp(BOp) {} @@ -423,7 +434,8 @@ class reducer< reducer::value>>> { + !detail::IsKnownIdentityOp::value>>>, + public detail::reducer_common { public: reducer(const T &Identity, BinaryOperation BOp) : MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {} @@ -453,7 +465,8 @@ class reducer< reducer::value>>> { + detail::IsKnownIdentityOp::value>>>, + public detail::reducer_common { public: reducer() : MValue(getIdentity()) {} reducer(const T & /* Identity */, BinaryOperation) : MValue(getIdentity()) {} diff --git a/sycl/test/basic_tests/reduction/reducer_aliases.cpp b/sycl/test/basic_tests/reduction/reducer_aliases.cpp new file mode 100644 index 0000000000000..edd6763cf8431 --- /dev/null +++ b/sycl/test/basic_tests/reduction/reducer_aliases.cpp @@ -0,0 +1,61 @@ +// RUN: %clangxx -fsycl -fsyntax-only -sycl-std=2020 %s + +// Tests the member aliases on the reducer class. + +#include + +#include + +template class Kernel; + +template +void CheckReducerAliases() { + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(ReducerT::dimensions == Dims); +} + +template void CheckAllReducers(sycl::queue &Q) { + T *Vals = sycl::malloc_device(4, Q); + sycl::span SpanVal(Vals, 4); + + auto CustomOp = [](const T &LHS, const T &RHS) { return LHS + RHS; }; + + auto ValReduction1 = sycl::reduction(Vals, sycl::plus<>()); + auto ValReduction2 = sycl::reduction(Vals, T{}, sycl::plus<>()); + auto ValReduction3 = sycl::reduction(Vals, T{}, CustomOp); + auto SpanReduction1 = sycl::reduction(SpanVal, sycl::plus<>()); + auto SpanReduction2 = sycl::reduction(SpanVal, T{}, sycl::plus<>()); + auto SpanReduction3 = sycl::reduction(SpanVal, T{}, CustomOp); + // TODO: Add cases with identityless reductions when supported. + Q.parallel_for>( + sycl::range<1>{10}, ValReduction1, ValReduction2, ValReduction3, + SpanReduction1, SpanReduction2, SpanReduction3, + [=](sycl::id<1>, auto &ValRedu1, auto &ValRedu2, auto &ValRedu3, + auto &SpanRedu1, auto &SpanRedu2, auto &SpanRedu3) { + CheckReducerAliases, T, + sycl::plus<>, 0>(); + CheckReducerAliases, T, + sycl::plus<>, 0>(); + CheckReducerAliases, T, + decltype(CustomOp), 0>(); + CheckReducerAliases, T, + sycl::plus<>, 1>(); + CheckReducerAliases, T, + sycl::plus<>, 1>(); + CheckReducerAliases, T, + decltype(CustomOp), 1>(); + }); +} + +int main() { + sycl::queue Q; + CheckAllReducers(Q); + CheckAllReducers(Q); + CheckAllReducers(Q); + CheckAllReducers(Q); + CheckAllReducers(Q); + CheckAllReducers(Q); + CheckAllReducers(Q); + return 0; +} From b032eda29f1f1aec4330501ee88a578ae35ddc7f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 6 Feb 2023 09:09:45 -0800 Subject: [PATCH 2/4] [SYCL][Reduction] Hide reducer non-standard members and add identity This commit hides the members in reducer that are not mentioned in the SYCL 2020 specification and introduces the identity member function. Signed-off-by: Larsen, Steffen --- sycl/include/sycl/reduction.hpp | 114 ++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 29 deletions(-) diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 8a5084661103d..010497baf91b3 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -159,6 +159,40 @@ struct ReducerTraits> { static constexpr size_t extent = Extent; }; +/// Helper class for accessing internal reducer member functions. +template class ReducerAccess { +public: + ReducerAccess(ReducerT &ReducerRef) : MReducerRef(ReducerRef) {} + + template auto &getElement(size_t E) { + return MReducerRef.getElement(E); + } + + template + enable_if_t< + IsKnownIdentityOp::value, + typename ReducerRelayT::value_type> static constexpr getIdentity() { + return ReducerT::getIdentity(); + } + + template + enable_if_t< + !IsKnownIdentityOp::value, + typename ReducerRelayT::value_type> + getIdentity() { + return MReducerRef.identity(); + } + +private: + ReducerT &MReducerRef; +}; + +// Deduction guide to simplify the use of ReducerAccess. +template +ReducerAccess(ReducerT &) -> ReducerAccess; + /// Use CRTP to avoid redefining shorthand operators in terms of combine /// /// Also, for many types with known identity the operation 'atomic_combine()' @@ -238,7 +272,7 @@ template class combiner { auto AtomicRef = sycl::atomic_ref(), Space>( address_space_cast(ReduVarPtr)[E]); - Functor(std::move(AtomicRef), reducer->getElement(E)); + Functor(std::move(AtomicRef), ReducerAccess{*reducer}.getElement(E)); } } @@ -355,13 +389,15 @@ class reducer< return *this; } - T getIdentity() const { return MIdentity; } + T identity() const { return MIdentity; } + +private: + template friend class detail::ReducerAccess; T &getElement(size_t) { return MValue; } const T &getElement(size_t) const { return MValue; } - T MValue; -private: + T MValue; const T MIdentity; BinaryOperation MBinaryOp; }; @@ -392,7 +428,14 @@ class reducer< return *this; } - static T getIdentity() { + T identity() const { + return getIdentity(); + } + +private: + template friend class detail::ReducerAccess; + + static constexpr T getIdentity() { return detail::known_identity_impl::value; } @@ -419,6 +462,8 @@ class reducer friend class detail::ReducerAccess; + T &MElement; BinaryOperation MBinaryOp; }; @@ -444,11 +489,14 @@ class reducer< return {MValue[Index], MBinaryOp}; } - T getIdentity() const { return MIdentity; } + T identity() const { return MIdentity; } + +private: + template friend class detail::ReducerAccess; + T &getElement(size_t E) { return MValue[E]; } const T &getElement(size_t E) const { return MValue[E]; } -private: marray MValue; const T MIdentity; BinaryOperation MBinaryOp; @@ -477,14 +525,20 @@ class reducer< return {MValue[Index], BinaryOperation()}; } - static T getIdentity() { + T identity() const { + return getIdentity(); + } + +private: + template friend class detail::ReducerAccess; + + static constexpr T getIdentity() { return detail::known_identity_impl::value; } T &getElement(size_t E) { return MValue[E]; } const T &getElement(size_t E) const { return MValue[E]; } -private: marray MValue; }; @@ -769,8 +823,7 @@ class reduction_impl // list of known operations does not break the existing programs. if constexpr (is_known_identity) { (void)Identity; - return reducer_type::getIdentity(); - + return ReducerAccess::getIdentity(); } else { return Identity; } @@ -788,7 +841,7 @@ class reduction_impl template * = nullptr> reduction_impl(RedOutVar Var, bool InitializeToIdentity = false) - : algo(reducer_type::getIdentity(), BinaryOperation(), + : algo(ReducerAccess::getIdentity(), BinaryOperation(), InitializeToIdentity, Var) { if constexpr (!is_usm) if (Var.size() != 1) @@ -896,7 +949,7 @@ struct NDRangeReduction { // Work-group cooperates to initialize multiple reduction variables auto LID = NDId.get_local_id(0); for (size_t E = LID; E < NElements; E += NDId.get_local_range(0)) { - GroupSum[E] = Reducer.getIdentity(); + GroupSum[E] = ReducerAccess(Reducer).getIdentity(); } workGroupBarrier(); @@ -909,7 +962,7 @@ struct NDRangeReduction { workGroupBarrier(); if (LID == 0) { for (size_t E = 0; E < NElements; ++E) { - Reducer.getElement(E) = GroupSum[E]; + ReducerAccess{Reducer}.getElement(E) = GroupSum[E]; } Reducer.template atomic_combine(&Out[0]); } @@ -959,7 +1012,7 @@ struct NDRangeReduction< // reduce_over_group is only defined for each T, not for span size_t LID = NDId.get_local_id(0); for (int E = 0; E < NElements; ++E) { - auto &RedElem = Reducer.getElement(E); + auto &RedElem = ReducerAccess{Reducer}.getElement(E); RedElem = reduce_over_group(Group, RedElem, BOp); if (LID == 0) { if (NWorkGroups == 1) { @@ -970,7 +1023,7 @@ struct NDRangeReduction< Out[E] = RedElem; } else { PartialSums[NDId.get_group_linear_id() * NElements + E] = - Reducer.getElement(E); + ReducerAccess{Reducer}.getElement(E); } } } @@ -993,7 +1046,7 @@ struct NDRangeReduction< // Reduce each result separately // TODO: Opportunity to parallelize across elements. for (int E = 0; E < NElements; ++E) { - auto LocalSum = Reducer.getIdentity(); + auto LocalSum = ReducerAccess{Reducer}.getIdentity(); for (size_t I = LID; I < NWorkGroups; I += WGSize) LocalSum = BOp(LocalSum, PartialSums[I * NElements + E]); auto Result = reduce_over_group(Group, LocalSum, BOp); @@ -1083,7 +1136,7 @@ template <> struct NDRangeReduction { for (int E = 0; E < NElements; ++E) { // Copy the element to local memory to prepare it for tree-reduction. - LocalReds[LID] = Reducer.getElement(E); + LocalReds[LID] = ReducerAccess{Reducer}.getElement(E); doTreeReduction(WGSize, LID, false, Identity, LocalReds, BOp, [&]() { workGroupBarrier(); }); @@ -1158,8 +1211,8 @@ struct NDRangeReduction { typename Reduction::binary_operation BOp; for (int E = 0; E < NElements; ++E) { - Reducer.getElement(E) = - reduce_over_group(NDIt.get_group(), Reducer.getElement(E), BOp); + ReducerAccess{Reducer}.getElement(E) = reduce_over_group( + NDIt.get_group(), ReducerAccess{Reducer}.getElement(E), BOp); } if (NDIt.get_local_linear_id() == 0) Reducer.atomic_combine(&Out[0]); @@ -1207,14 +1260,15 @@ struct NDRangeReduction< for (int E = 0; E < NElements; ++E) { // Copy the element to local memory to prepare it for tree-reduction. - LocalReds[LID] = Reducer.getElement(E); + LocalReds[LID] = ReducerAccess{Reducer}.getElement(E); typename Reduction::binary_operation BOp; - doTreeReduction(WGSize, LID, IsPow2WG, Reducer.getIdentity(), - LocalReds, BOp, [&]() { NDIt.barrier(); }); + doTreeReduction(WGSize, LID, IsPow2WG, + ReducerAccess{Reducer}.getIdentity(), LocalReds, BOp, + [&]() { NDIt.barrier(); }); if (LID == 0) { - Reducer.getElement(E) = + ReducerAccess{Reducer}.getElement(E) = IsPow2WG ? LocalReds[0] : BOp(LocalReds[0], LocalReds[WGSize]); } @@ -1282,7 +1336,7 @@ struct NDRangeReduction< typename Reduction::binary_operation BOp; for (int E = 0; E < NElements; ++E) { typename Reduction::result_type PSum; - PSum = Reducer.getElement(E); + PSum = ReducerAccess{Reducer}.getElement(E); PSum = reduce_over_group(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar) @@ -1346,7 +1400,8 @@ struct NDRangeReduction< typename Reduction::result_type PSum = (HasUniformWG || (GID < NWorkItems)) ? In[GID * NElements + E] - : Reduction::reducer_type::getIdentity(); + : ReducerAccess< + typename Reduction::reducer_type>::getIdentity(); PSum = reduce_over_group(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar) @@ -1420,7 +1475,7 @@ template <> struct NDRangeReduction { for (int E = 0; E < NElements; ++E) { // Copy the element to local memory to prepare it for tree-reduction. - LocalReds[LID] = Reducer.getElement(E); + LocalReds[LID] = ReducerAccess{Reducer}.getElement(E); doTreeReduction(WGSize, LID, IsPow2WG, ReduIdentity, LocalReds, BOp, [&]() { NDIt.barrier(); }); @@ -1693,7 +1748,8 @@ void reduCGFuncImplScalar( size_t WGSize = NDIt.get_local_range().size(); size_t LID = NDIt.get_local_linear_id(); - ((std::get(LocalAccsTuple)[LID] = std::get(ReducersTuple).MValue), + ((std::get(LocalAccsTuple)[LID] = + ReducerAccess{std::get(ReducersTuple)}.getElement(0)), ...); // For work-groups, which size is not power of two, local accessors have @@ -1744,7 +1800,7 @@ void reduCGFuncImplArrayHelper(bool Pow2WG, bool IsOneWG, nd_item NDIt, for (size_t E = 0; E < NElements; ++E) { // Copy the element to local memory to prepare it for tree-reduction. - LocalReds[LID] = Reducer.getElement(E); + LocalReds[LID] = ReducerAccess{Reducer}.getElement(E); doTreeReduction(WGSize, LID, Pow2WG, Identity, LocalReds, BOp, [&]() { NDIt.barrier(); }); From 77bf7afd37ede898f037df062efb6ab441fe8af5 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 7 Feb 2023 05:44:03 -0800 Subject: [PATCH 3/4] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/include/sycl/reduction.hpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 010497baf91b3..a86016cd82673 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -428,9 +428,7 @@ class reducer< return *this; } - T identity() const { - return getIdentity(); - } + T identity() const { return getIdentity(); } private: template friend class detail::ReducerAccess; @@ -525,9 +523,7 @@ class reducer< return {MValue[Index], BinaryOperation()}; } - T identity() const { - return getIdentity(); - } + T identity() const { return getIdentity(); } private: template friend class detail::ReducerAccess; From 81afc4501ff24f5d8e35c4fff51724dabfd182be Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 8 Feb 2023 02:07:03 -0800 Subject: [PATCH 4/4] Address MSVC error Signed-off-by: Larsen, Steffen --- sycl/include/sycl/reduction.hpp | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index a86016cd82673..2901c2112fba2 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -172,8 +172,8 @@ template class ReducerAccess { enable_if_t< IsKnownIdentityOp::value, - typename ReducerRelayT::value_type> static constexpr getIdentity() { - return ReducerT::getIdentity(); + typename ReducerRelayT::value_type> constexpr getIdentity() { + return getIdentityStatic(); } template @@ -185,6 +185,17 @@ template class ReducerAccess { return MReducerRef.identity(); } + // MSVC does not like static overloads of non-static functions, even if they + // are made mutually exclusive through SFINAE. Instead we use a new static + // function to be used when a static function is needed. + template + enable_if_t< + IsKnownIdentityOp::value, + typename ReducerRelayT::value_type> static constexpr getIdentityStatic() { + return ReducerT::getIdentity(); + } + private: ReducerT &MReducerRef; }; @@ -819,7 +830,7 @@ class reduction_impl // list of known operations does not break the existing programs. if constexpr (is_known_identity) { (void)Identity; - return ReducerAccess::getIdentity(); + return ReducerAccess::getIdentityStatic(); } else { return Identity; } @@ -837,8 +848,8 @@ class reduction_impl template * = nullptr> reduction_impl(RedOutVar Var, bool InitializeToIdentity = false) - : algo(ReducerAccess::getIdentity(), BinaryOperation(), - InitializeToIdentity, Var) { + : algo(ReducerAccess::getIdentityStatic(), + BinaryOperation(), InitializeToIdentity, Var) { if constexpr (!is_usm) if (Var.size() != 1) throw sycl::runtime_error(errc::invalid, @@ -1396,8 +1407,8 @@ struct NDRangeReduction< typename Reduction::result_type PSum = (HasUniformWG || (GID < NWorkItems)) ? In[GID * NElements + E] - : ReducerAccess< - typename Reduction::reducer_type>::getIdentity(); + : ReducerAccess:: + getIdentityStatic(); PSum = reduce_over_group(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar)