diff --git a/Src/AmrCore/AMReX_MFInterpolater.cpp b/Src/AmrCore/AMReX_MFInterpolater.cpp index 21e99e760f..1fa5d44214 100644 --- a/Src/AmrCore/AMReX_MFInterpolater.cpp +++ b/Src/AmrCore/AMReX_MFInterpolater.cpp @@ -52,7 +52,9 @@ MFPCInterp::interp (MultiFab const& crsemf, int ccomp, MultiFab& finemf, int fco fine[box_no](i,j,k,n+fcomp) = crse[box_no](ic,jc,kc,n+ccomp);) } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -202,7 +204,9 @@ MFCellConsLinInterp::interp (MultiFab const& crsemf, int ccomp, MultiFab& finemf }); } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -373,7 +377,9 @@ MFCellConsLinMinmaxLimitInterp::interp (MultiFab const& crsemf, int ccomp, Multi } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -457,7 +463,9 @@ MFCellBilinear::interp (MultiFab const& crsemf, int ccomp, MultiFab& finemf, int mf_cell_bilin_interp(i,j,k,n, fine[box_no], fcomp, crse[box_no], ccomp, ratio); } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -512,7 +520,9 @@ MFNodeBilinear::interp (MultiFab const& crsemf, int ccomp, MultiFab& finemf, int mf_nodebilin_interp(i,j,k,n, fine[box_no], fcomp, crse[box_no], ccomp, ratio); } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H index 5fcdb85eab..547811fe2b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H @@ -841,7 +841,9 @@ MLABecLaplacianT::Fapply (int amrlev, int mglev, MF& out, const MF& in) cons dxinv, ascalar, bscalar); }); } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -1031,7 +1033,9 @@ MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in }); } } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -1271,7 +1275,9 @@ MLABecLaplacianT::normalize (int amrlev, int mglev, MF& mf) const AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), dxinv, ascalar, bscalar); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -1383,7 +1389,9 @@ MLABecLaplacianT::makeNLinOp (int /*grid_size*/) const ama[box_no](i,j,k,n) = huge_alpha; } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -1434,7 +1442,9 @@ MLABecLaplacianT::copyNSolveSolution (MF& dst, MF const& src) const dstma[box_no](i,j,k,n) = RT(0.0); } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H index 970cf48fc2..d81357e8bd 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H @@ -206,7 +206,9 @@ MLCellABecLapT::define (const Vector& a_geom, { coarsen_overset_mask(i,j,k, crsema[box_no], finema[box_no]); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -263,7 +265,9 @@ MLCellABecLapT::setDirichletNodesToZero (int amrlev, int mglev, MF& mf) cons { if (mskma[bno](i,j,k) == 0) { ma[bno](i,j,k,n) = RT(0.0); } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } } @@ -639,7 +643,9 @@ MLCellABecLapT::applyOverset (int amrlev, MF& rhs) const rhsa[box_no](i,j,k,n) = RT(0.0); } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp index 3e500351c9..1b2e6c5d8a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp @@ -193,7 +193,9 @@ void MLCurlCurl::restriction (int amrlev, int cmglev, MF& crse, MF& fine) const { mlcurlcurl_restriction(idim,i,j,k,crsema[bno],finema[bno],dinfo); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } if (need_parallel_copy) { crse[idim].ParallelCopy(cfine); @@ -228,7 +230,9 @@ void MLCurlCurl::interpolation (int amrlev, int fmglev, MF& fine, } }); } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } void @@ -377,7 +381,9 @@ void MLCurlCurl::smooth1D (int amrlev, int mglev, MF& sol, MF const& rhs, bcx[bno],bcy[bno],bcz[bno], adxinv,color,dinfo); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else { ParallelFor( nmf, [=] AMREX_GPU_DEVICE(int bno, int i, int j, int k) { @@ -385,7 +391,9 @@ void MLCurlCurl::smooth1D (int amrlev, int mglev, MF& sol, MF const& rhs, rhsx[bno],rhsy[bno],rhsz[bno], b,adxinv,color,dinfo); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } } #endif @@ -450,7 +458,9 @@ void MLCurlCurl::smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, }); } } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp index 0fda51eac1..9715419d0f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp @@ -109,7 +109,9 @@ MLNodeABecLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa yarr_ma[box_no](i,j,k) = (dmskarr_ma[box_no](i,j,k)) ? Real(0.0) : alpha*acoef_ma[box_no](i,j,k)*xarr_ma[box_no](i,j,k) - beta*lap; }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } void @@ -145,7 +147,9 @@ MLNodeABecLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF acoef_ma[box_no], bcoef_ma[box_no], dmskarr_ma[box_no], dxinvarr); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } if (m_smooth_num_sweeps > 1) { nodalSync(amrlev, mglev, sol); } } #else @@ -193,7 +197,9 @@ MLNodeABecLaplacian::restriction (int amrlev, int cmglev, MultiFab& crse, MultiF { mlndlap_restriction(i,j,k,pcrse_ma[box_no],fine_ma[box_no],msk_ma[box_no]); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } if (need_parallel_copy) { crse.ParallelCopy(cfine); @@ -225,7 +231,9 @@ MLNodeABecLaplacian::interpolation (int amrlev, int fmglev, MultiFab& fine, cons mlndlap_interpadd_aa(i, j, k, fine_ma[box_no], crse_ma[box_no], sig_ma[box_no], msk_ma[box_no]); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } void @@ -280,7 +288,9 @@ MLNodeABecLaplacian::fixUpResidualMask (int amrlev, iMultiFab& resmsk) { if (fmsk[bno](i,j,k) == nodelap_detail::crse_fine_node) { rmsk[bno](i,j,k) = 1; } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } void diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp index 3cb18a97ac..7a0e5600d5 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp @@ -367,7 +367,9 @@ MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, rhs_ma[box_no](i,j,k) -= offset * scale; }); } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else { rhs.plus(-offset, 0, 1); } @@ -532,7 +534,9 @@ MLNodeLaplacian::restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab& mlndlap_restriction_rap(i,j,k,pcrse_ma[box_no],fine_ma[box_no],st_ma[box_no],msk_ma[box_no]); }); } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -665,7 +669,9 @@ MLNodeLaplacian::interpolation (int amrlev, int fmglev, MultiFab& fine, const Mu mlndlap_semi_interpadd_aa(i, j, k, fine_ma[box_no], crse_ma[box_no], sig_ma[box_no], msk_ma[box_no], idir); }); } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -883,7 +889,9 @@ MLNodeLaplacian::normalize (int amrlev, int mglev, MultiFab& mf) const mlndlap_normalize_aa(i,j,k,ma[box_no],sx_ma[box_no],dmsk_ma[box_no],dxinv); }); } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp index 8e490f3034..c7efa2f771 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp @@ -265,7 +265,9 @@ MLNodeLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& i #endif }); } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -558,7 +560,9 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& } } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } nodalSync(amrlev, mglev, sol); } else @@ -701,7 +705,9 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& } } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp index 9719ac3d3e..d98b38e835 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp @@ -470,7 +470,9 @@ MLNodeLinOp::setDirichletNodesToZero (int amrlev, int mglev, MultiFab& mf) const { if (maskma[bno](i,j,k)) { ma[bno](i,j,k,n) = RT(0.0); } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } #ifdef AMREX_USE_EB EB_set_covered(mf, 0, ncomp, 0, RT(0.0)); #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp index e8135aeba1..1a02e06ff3 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp @@ -219,7 +219,9 @@ MLNodeTensorLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const Multi { mlndtslap_adotx(i,j,k, out_a[box_no], in_a[box_no], dmsk_a[box_no], s); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } #endif } @@ -261,7 +263,9 @@ MLNodeTensorLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const Mult mlndtslap_gauss_seidel(i, j, k, sol_a[box_no], rhs_a[box_no], dmsk_a[box_no], s); } }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } #endif } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H index 679a1c9e58..d2a75e3d77 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H @@ -235,7 +235,9 @@ MLPoissonT::Fapply (int amrlev, int mglev, MF& out, const MF& in) const }); } } - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif { @@ -333,7 +335,9 @@ MLPoissonT::normalize (int amrlev, int mglev, MF& mf) const { mlpoisson_normalize(i,j,k, ma[box_no], AMREX_D_DECL(dhx,dhy,dhz), dx, probxlo); }); - Gpu::streamSynchronize(); + if (!Gpu::inNoSyncRegion()) { + Gpu::streamSynchronize(); + } } else #endif {