From cbbf94c522a32d24c491caa4ac3c3e7a677bd7d6 Mon Sep 17 00:00:00 2001 From: "James J. Elliott" Date: Sat, 24 Apr 2021 06:53:39 -0700 Subject: [PATCH] When compiling with IBM Clang 11 + Cuda, Zoltan2 MJ captures 'this' inside lambdas This patch * Marks 1 function static, which was class const (but used not class variables). * addresses this-> being used inside a nested team lambda (which clang doesn't like) To remove this, I moved sEpsilon as parameter to the function being called and was able to mark the function static as well (removing its use of this->sEpsilon) This entails creating a funciton-locally copy of sEpsilon so that it may be captured by the default [=] capture. Both changes entail adding a `using` statement within the function so that the now static class functions can be called (e.g., AlgMJ< ... >:: --- .../partition/Zoltan2_AlgMultiJagged.hpp | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgMultiJagged.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgMultiJagged.hpp index e83b6436de36..a12d5e1c8d39 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgMultiJagged.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgMultiJagged.hpp @@ -645,8 +645,9 @@ class AlgMJ * \param achieved balance we achieved. * \param expected balance expected. */ + static KOKKOS_INLINE_FUNCTION - double calculate_imbalance(mj_scalar_t achieved, mj_scalar_t expected) const { + double calculate_imbalance(mj_scalar_t achieved, mj_scalar_t expected) { return static_cast(achieved) / static_cast(expected) - 1.0; } @@ -715,6 +716,7 @@ class AlgMJ * the left of the cut line. * \param new_cut_position DOCWORK: Documentation */ + static KOKKOS_INLINE_FUNCTION void mj_calculate_new_cut_position ( mj_scalar_t cut_upper_bound, @@ -722,7 +724,8 @@ class AlgMJ mj_scalar_t cut_upper_weight, mj_scalar_t cut_lower_weight, mj_scalar_t expected_weight, - mj_scalar_t &new_cut_position); + mj_scalar_t &new_cut_position, + mj_scalar_t sEpsilon); /*! \brief Function checks if should do migration or not. * It returns true to point that migration should be done when @@ -4386,19 +4389,21 @@ void AlgMJ:: */ template +KOKKOS_INLINE_FUNCTION void AlgMJ::mj_calculate_new_cut_position(mj_scalar_t cut_upper_bound, mj_scalar_t cut_lower_bound, mj_scalar_t cut_upper_weight, mj_scalar_t cut_lower_weight, mj_scalar_t expected_weight, - mj_scalar_t &new_cut_position) { + mj_scalar_t &new_cut_position, + mj_scalar_t sEpsilon) { - if(std::abs(cut_upper_bound - cut_lower_bound) < this->sEpsilon) { + if(std::abs(cut_upper_bound - cut_lower_bound) < sEpsilon) { new_cut_position = cut_upper_bound; //or lower bound does not matter. } - if(std::abs(cut_upper_weight - cut_lower_weight) < this->sEpsilon) { + if(std::abs(cut_upper_weight - cut_lower_weight) < sEpsilon) { new_cut_position = cut_lower_bound; } @@ -5113,6 +5118,7 @@ void AlgMJsEpsilon; // Note for a 22 part system I tried removing the outer loop // and doing each sub loop as a simple parallel_for over num_cuts. // But that was about twice as slow (10ms) as the current form (5ms) @@ -5154,6 +5160,8 @@ void AlgMJ; // seen weight in the part mj_scalar_t seen_weight_in_part = 0; // expected weight for part. @@ -5180,12 +5188,12 @@ void AlgMJmj_calculate_new_cut_position( + algMJ_t::mj_calculate_new_cut_position( current_cut_upper_bounds(i), current_cut_lower_bounds(i), current_cut_upper_weights(i), current_cut_lower_bound_weights(i), - expected_weight_in_part, new_cut_position); + expected_weight_in_part, new_cut_position, + _sEpsilon); // if cut line does not move significantly. // then finalize the search. @@ -5382,13 +5391,14 @@ void AlgMJmj_calculate_new_cut_position( + algMJ_t::mj_calculate_new_cut_position( current_cut_upper_bounds(i), current_cut_lower_bounds(i), current_cut_upper_weights(i), current_cut_lower_bound_weights(i), expected_weight_in_part, - new_cut_position); + new_cut_position, + _sEpsilon); // if cut line does not move significantly. if(std::abs(current_cut_coordinates(i) -