Skip to content

Commit 3ed0f5a

Browse files
committed
Merge branch 'ambrad/hommexx/sl-fix-threading' (PR #7012)
Hommexx/SL: Fix a threading issue. Fix a threading error in EAMxx simulations on CPU in the case that each element has more than one thread. Add some team_barriers and rearrange a section of code to permit more team_barriers. Clean up some compiler warnings. [BFB]
2 parents 8e96857 + 35277e9 commit 3ed0f5a

File tree

3 files changed

+21
-13
lines changed

3 files changed

+21
-13
lines changed

components/homme/src/share/cxx/ComposeTransportImpl.hpp

+14-8
Original file line numberDiff line numberDiff line change
@@ -356,9 +356,6 @@ struct ComposeTransportImpl {
356356
RNlev yp(pack2real(yps));
357357
const auto f = [&] (const int i, const int j, const int k) {
358358
if (k == 0) return;
359-
const auto& xkm1 = x(i,j,k-1);
360-
const auto& xk = x(i,j,k ); // also the interpolation point
361-
const auto& xkp1 = x(i,j,k+1);
362359
yp(i,j,k) = approx_derivative(x(i,j,k-1), x(i,j,k), x(i,j,k+1),
363360
y(i,j,k-1), y(i,j,k), y(i,j,k+1));
364361
};
@@ -377,14 +374,19 @@ struct ComposeTransportImpl {
377374
{
378375
const auto ttr = Kokkos::TeamThreadRange(kv.team, NP*NP);
379376
const auto tvr = Kokkos::ThreadVectorRange(kv.team, NUM_LEV);
380-
const auto f = [&] (const int idx) {
377+
const auto f1 = [&] (const int idx) {
381378
const int i = idx / NP, j = idx % NP;
382379
const auto r = [&] (const int k, Real& dps, const bool final) {
383380
assert(k != 0 || dps == 0);
384381
if (final) edds(i,j,k) = dps;
385382
dps += divdps(i,j,k);
386383
};
387384
Dispatch<>::parallel_scan(kv.team, num_phys_lev, r);
385+
};
386+
Kokkos::parallel_for(ttr, f1);
387+
kv.team_barrier();
388+
const auto f2 = [&] (const int idx) {
389+
const int i = idx / NP, j = idx % NP;
388390
const int kend = num_phys_lev - 1;
389391
const Real dps = edds(i,j,kend) + divdps(i,j,kend);
390392
assert(hybrid_bi(0)[0] == 0);
@@ -393,11 +395,15 @@ struct ComposeTransportImpl {
393395
if (kp == 0) edd(i,j,kp)[0] = 0;
394396
};
395397
Kokkos::parallel_for(tvr, s);
396-
assert(edds(i,j,0) == 0);
397-
const int bottom = num_phys_lev;
398-
edds(i,j,bottom) = 0; // benign write race
399398
};
400-
Kokkos::parallel_for(ttr, f);
399+
Kokkos::parallel_for(ttr, f2);
400+
kv.team_barrier();
401+
const int bottom = num_phys_lev;
402+
const auto f3 = [&] (const int idx) {
403+
const int i = idx / NP, j = idx % NP;
404+
Kokkos::single(Kokkos::PerThread(kv.team), [&] () { edds(i,j,bottom) = 0; });
405+
};
406+
Kokkos::parallel_for(ttr, f3);
401407
}
402408
};
403409

components/homme/src/share/cxx/ComposeTransportImplEnhancedTrajectoryTests.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ get_test_team_policy (const int nelem, const int nlev, const int ncol=NP*NP) {
2525
}
2626

2727
struct TestData {
28+
const ComposeTransportImpl& cti;
2829
std::mt19937_64 engine;
2930
static const Real eps;
30-
const ComposeTransportImpl& cti;
3131

3232
TestData (const CTI& cti_, const int seed = 0)
3333
: cti(cti_), engine(seed == 0 ? std::random_device()() : seed)
@@ -103,7 +103,7 @@ void fillcols (const int n, const Real* const h, const RelnV::HostMirror& a) {
103103
assert(n <= a.extent_int(2));
104104
for (int i = 0; i < a.extent_int(0); ++i)
105105
for (int j = 0; j < a.extent_int(1); ++j)
106-
for (size_t k = 0; k < n; ++k)
106+
for (int k = 0; k < n; ++k)
107107
a(i,j,k) = h[k];
108108
}
109109

components/homme/src/share/cxx/ComposeTransportImplTrajectory.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ reconstruct_and_limit_dp (const KernelVariables& kv, const CSNlev& dprefp, const
7575
};
7676
Kokkos::Real2 sums;
7777
Dispatch<>::parallel_reduce(kv.team, tvr, g1, sums);
78-
kv.team_barrier();
7978
const Real nmass = sums.v[0];
8079
if (nmass == 0) return;
8180
// Compensate for clipping.
@@ -157,7 +156,6 @@ KOKKOS_FUNCTION static void calc_vertically_lagrangian_levels (
157156
assert(hybrid_bi(0)[0] == 0);
158157

159158
const auto ttr = TeamThreadRange(kv.team, NP*NP);
160-
const auto tvr = ThreadVectorRange(kv.team, NUM_LEV);
161159

162160
// Reconstruct an approximation to endpoint eta_dot_dpdn on Eulerian levels.
163161
const auto& divdp = wrk1a;
@@ -183,6 +181,7 @@ KOKKOS_FUNCTION static void calc_vertically_lagrangian_levels (
183181
kv.team_barrier();
184182
RNlevp edds(cti::pack2real(edd)), divdps(cti::pack2real(divdp));
185183
cti::calc_eta_dot_dpdn(kv, hybrid_bi, divdps, edd, edds);
184+
kv.team_barrier();
186185
}
187186

188187
// Use p0 as the reference coordinate system. p0 differs from p1 by B(eta)
@@ -203,6 +202,7 @@ KOKKOS_FUNCTION static void calc_vertically_lagrangian_levels (
203202
// time.
204203
const auto& ptp0 = dprecon;
205204
cti::approx_derivative(kv, pref, *eta_dot_dpdn[1], ptp0);
205+
kv.team_barrier();
206206

207207
{
208208
const auto& edd = *eta_dot_dpdn[0];
@@ -240,14 +240,16 @@ KOKKOS_FUNCTION static void calc_vertically_lagrangian_levels (
240240
if (static_cast<int>(cti::num_lev_pack) ==
241241
static_cast<int>(cti::max_num_lev_pack)) {
242242
// Re-zero eta_dot_dpdn at bottom.
243+
kv.team_barrier();
243244
RNlevp edds(cti::pack2real(edd));
244245
const auto f = [&] (const int idx) {
245246
const int i = idx / NP, j = idx % NP;
246247
const int bottom = cti::num_phys_lev;
247-
edds(i,j,bottom) = 0;
248+
Kokkos::single(Kokkos::PerThread(kv.team), [&] () { edds(i,j,bottom) = 0; });
248249
};
249250
parallel_for(ttr, f);
250251
}
252+
kv.team_barrier();
251253
}
252254

253255
reconstruct_and_limit_dp(kv, dp3d, dt, dp_tol, *eta_dot_dpdn[0], dprecon);

0 commit comments

Comments
 (0)