From 40fd5748d96ef8f72829889449844b654263bac8 Mon Sep 17 00:00:00 2001 From: isriva Date: Fri, 22 Sep 2023 11:18:05 -0700 Subject: [PATCH 001/151] added code for heffte usage in fhdex; needs testing --- src_analysis/Make.package | 2 + src_analysis/TurbSpectra.H | 83 ++ src_analysis/TurbSpectra.cpp | 1319 +++++++++++++++++++++++++ src_compressible_stag/main_driver.cpp | 85 +- 4 files changed, 1417 insertions(+), 72 deletions(-) create mode 100644 src_analysis/TurbSpectra.H create mode 100644 src_analysis/TurbSpectra.cpp diff --git a/src_analysis/Make.package b/src_analysis/Make.package index e9d284030..98d27de23 100644 --- a/src_analysis/Make.package +++ b/src_analysis/Make.package @@ -1,3 +1,5 @@ CEXE_sources += StructFact.cpp +CEXE_sources += TurbSpectra.cpp CEXE_headers += StructFact.H +CEXE_headers += TurbSpectra.H diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H new file mode 100644 index 000000000..bb304f97e --- /dev/null +++ b/src_analysis/TurbSpectra.H @@ -0,0 +1,83 @@ +#ifndef _TurbSpectra_H_ +#define _TurbSpectra_H_ + +#include +#include +#include +#include + +// These are for heFFTe / FFTW / cuFFT / rocFFT + +#if defined(USE_HEFFTE) +#include +#else +#ifdef AMREX_USE_CUDA +#include +#elif AMREX_USE_HIP +# if __has_include() // ROCm 5.3+ +# include +# else +# include +# endif +#else +#include +#include +#endif +#endif + +#include + +#include + +#include "common_functions.H" + +#define ALIGN 16 + +using namespace amrex; + +#if defined(USE_HEFFTE) +#else +#ifdef AMREX_USE_CUDA +std::string cufftError (const cufftResult& err); +#endif +#ifdef AMREX_USE_HIP +std::string rocfftError (const rocfft_status err); +void rocfft_status (std::string const& name, rocfft_status status); +#endif +#endif + +#if defined(USE_HEFFTE) +void IntegrateKScalar(BaseFab >& spectral_field, + const std::string& name, const Real& scaling, + const Box& c_local_box, + const Real& sqrtnpts); +void IntegrateKVelocity(BaseFab >& spectral_field, + const std::string& name, const Real& scaling, + const Box& c_local_box); +#else +void IntegrateKScalar(Vector > > >& spectral_field, + const MultiFab& variables_onegrid, + const std::string& name, + const Real& scaling, + const Real& sqrtnpts); +void IntegrateKVelocity(Vector > > >& spectral_field, + const MultiFab& vel_onegrid, + const std::string& name, + const Real& scaling); +#endif + +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); + +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); + + +#endif diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp new file mode 100644 index 000000000..58c5aef22 --- /dev/null +++ b/src_analysis/TurbSpectra.cpp @@ -0,0 +1,1319 @@ +#include "common_functions.H" +#include "TurbSpectra.H" + +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +#if defined(USE_HEFFTE) +#else +#ifdef AMREX_USE_CUDA +std::string cufftError (const cufftResult& err) +{ + switch (err) { + case CUFFT_SUCCESS: return "CUFFT_SUCCESS"; + case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN"; + case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED"; + case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE"; + case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE"; + case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR"; + case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED"; + case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED"; + case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE"; + case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA"; + default: return std::to_string(err) + " (unknown error code)"; + } +} +#endif + +#ifdef AMREX_USE_HIP +std::string rocfftError (const rocfft_status err) +{ + if (err == rocfft_status_success) { + return std::string("rocfft_status_success"); + } else if (err == rocfft_status_failure) { + return std::string("rocfft_status_failure"); + } else if (err == rocfft_status_invalid_arg_value) { + return std::string("rocfft_status_invalid_arg_value"); + } else if (err == rocfft_status_invalid_dimensions) { + return std::string("rocfft_status_invalid_dimensions"); + } else if (err == rocfft_status_invalid_array_type) { + return std::string("rocfft_status_invalid_array_type"); + } else if (err == rocfft_status_invalid_strides) { + return std::string("rocfft_status_invalid_strides"); + } else if (err == rocfft_status_invalid_distance) { + return std::string("rocfft_status_invalid_distance"); + } else if (err == rocfft_status_invalid_offset) { + return std::string("rocfft_status_invalid_offset"); + } else { + return std::to_string(err) + " (unknown error code)"; + } +} + +void rocfft_status (std::string const& name, rocfft_status status) +{ + if (status != rocfft_status_success) { + amrex::AllPrint() << name + " failed! Error: " + rocfftErrorToString(status) << "\n";; + } +} +#endif +#endif + +#if defined(USE_HEFFTE) +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); + + int ncomp = MFTurbVel.nComp(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of variables + DistributionMapping dm = variables.DistributionMap(); + BoxArray ba = variables.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field(c_local_box, ncomp, The_Device_Arena()); + + // we only need to build the plan once; track whether we did + heffte_plan fplan; + int r2c_direction = 0; +#ifdef AMREX_USE_CUDA + int status = heffte_plan_create_r2c(heffte::backend::cufft, +#elif AMREX_USE_HIP + int status = heffte_plan_create_r2c(heffte::backend::rocfft, +#else + int status = heffte_plan_create_r2c(heffte::backend::fftw, +#endif + {local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}, + NULL, + {c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}, + NULL, r2c_direction, ParallelDescriptor::Communicator(), &fplan); + + if (status != Heffte_SUCCESS) amrex::Abort("Failed at heffte_plan_create() with error code"); + + for (int comp=0; comp::type; + heffte_forward_d2z(fplan, variables[local_boxid].dataPtr(comp), + reinterpret_cast spectral_field.dataPtr(comp), + Heffte_SCALE_NONE); + + ParallelDescriptor::Barrier(); + + // Integrate spectra over k-shells + IntegrateKScalar(spectral_field,var_names[comp],var_scaling[comp],c_local_box,sqrtnpts); + + } + + heffte_plan_destroy(fplan); +} + +void IntegrateKScalar(BaseFab >& spectral_field, + const std::string& name, const Real& scaling, + const Box& c_local_box, + const Real& sqrtnpts) + +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + Gpu::HostVector phisum_host(npts); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + Array4< GpuComplex > spectral = spectral_field.array(); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + if (i <= n_cells[0]/2) { // only half of kx-domain + int ki = i; + int kj = j; + int kk = k; + if (j >= n_cells[1]/2) kj = n_cells[1]-j; + if (k >= n_cells[2]/2) kk = n_cells[2]-k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real = spectral(i,j,k).real(); + Real imag = spectral(i,j,k).imag(); + Real cov = scaling*(1.0/(sqrtnpts*sqrtnpts))*(real*real + imag*imag); + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + + ParallelDescriptor::Barrier(); + + for (int d=1; d& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); + + const GpuArray dx = geom.CellSizeArray(); + + int ncomp = MFTurbVel.nComp(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = vel.DistributionMap(); + BoxArray ba = vel.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field_T(c_local_box, 3, The_Device_Arena()); // total + BaseFab > spectral_field_S(c_local_box, 3, The_Device_Arena()); // solenoidal + BaseFab > spectral_field_D(c_local_box, 3, The_Device_Arena()); // dilatational + + heffte_plan fplan; + + int r2c_direction = 0; +#ifdef AMREX_USE_CUDA + int status = heffte_plan_create_r2c(heffte::backend::cufft, +#elif AMREX_USE_HIP + int status = heffte_plan_create_r2c(heffte::backend::rocfft, +#else + int status = heffte_plan_create_r2c(heffte::backend::fftw, +#endif + {local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}, + NULL, + {c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}, + NULL, r2c_direction, ParallelDescriptor::Communicator(), &fplan); + + if (status != Heffte_SUCCESS) amrex::Abort("Failed at heffte_plan_create() with error code"); + + // ForwardTransform + using heffte_complex = typename heffte::fft_output::type; + + heffte_forward_d2z(fplan, vel[local_boxid].dataPtr(0), + reinterpret_cast spectral_field_T.dataPtr(0), + Heffte_SCALE_NONE); + heffte_forward_d2z(fplan, vel[local_boxid].dataPtr(1), + reinterpret_cast spectral_field_T.dataPtr(1), + Heffte_SCALE_NONE); + heffte_forward_d2z(fplan, vel[local_boxid].dataPtr(2), + reinterpret_cast spectral_field_T.dataPtr(2), + Heffte_SCALE_NONE); + + heffte_plan_destroy(fplan); + + // Decompose velocity field into solenoidal and dilatational + Array4< GpuComplex > spectral_tx = spectral_field_T.array(0,1); + Array4< GpuComplex > spectral_ty = spectral_field_T.array(1,1); + Array4< GpuComplex > spectral_tz = spectral_field_T.array(2,1); + Array4< GpuComplex > spectral_sx = spectral_field_S.array(0,1); + Array4< GpuComplex > spectral_sy = spectral_field_S.array(1,1); + Array4< GpuComplex > spectral_sz = spectral_field_S.array(2,1); + Array4< GpuComplex > spectral_dx = spectral_field_D.array(0,1); + Array4< GpuComplex > spectral_dy = spectral_field_D.array(1,1); + Array4< GpuComplex > spectral_dz = spectral_field_D.array(2,1); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR, GxC, GyR, GyC, GzR, GzC; + + if (i <= nx/2) { + // Gradient Operators + GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Scale Total velocity FFT components + spectral_tx(i,j,k).real() *= (1.0/sqrtnpts); + spectral_ty(i,j,k).real() *= (1.0/sqrtnpts); + spectral_tz(i,j,k).real() *= (1.0/sqrtnpts); + spectral_tx(i,j,k).imag() *= (1.0/sqrtnpts); + spectral_ty(i,j,k).imag() *= (1.0/sqrtnpts); + spectral_tz(i,j,k).imag() *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k).real() = 0.0; + spectral_dy(i,j,k).real() = 0.0; + spectral_dz(i,j,k).real() = 0.0; + spectral_dx(i,j,k).imag() = 0.0; + spectral_dy(i,j,k).imag() = 0.0; + spectral_dz(i,j,k).imag() = 0.0; + } + else { + + // Dilatational velocity + spectral_dx(i,j,k).real() = (divR*GxR + divC*GxC) / Lap; + spectral_dy(i,j,k).real() = (divR*GyR + divC*GyC) / Lap; + spectral_dz(i,j,k).real() = (divR*GzR + divC*GzC) / Lap; + spectral_dx(i,j,k).imag() = (divC*GxR - divR*GxC) / Lap; + spectral_dy(i,j,k).imag() = (divC*GyR - divR*GyC) / Lap; + spectral_dz(i,j,k).imag() = (divC*GzR - divR*GzC) / Lap; + + // Solenoidal velocity + spectral_sx(i,j,k).real() = spectral_tx(i,j,k).real() - spectral_dx(i,j,k).real(); + spectral_sy(i,j,k).real() = spectral_ty(i,j,k).real() - spectral_dy(i,j,k).real(); + spectral_sz(i,j,k).real() = spectral_tz(i,j,k).real() - spectral_dz(i,j,k).real(); + spectral_sx(i,j,k).imag() = spectral_tx(i,j,k).imag() - spectral_dx(i,j,k).imag(); + spectral_sy(i,j,k).imag() = spectral_ty(i,j,k).imag() - spectral_dy(i,j,k).imag(); + spectral_sz(i,j,k).imag() = spectral_tz(i,j,k).imag() - spectral_dz(i,j,k).imag(); + } + + }); + + ParallelDescriptor::Barrier(); + + // Integrate K spectrum for velocities + IntegrateKVelocity(spectral_field_T,"turb_total", scaling,c_local_box); + IntegrateKVelocity(spectral_field_S,"turb_solenoidal",scaling,c_local_box); + IntegrateKVelocity(spectral_field_D,"turb_dilational",scaling,c_local_box); + + // setup plan for inverse FFT + heffte_plan fplanR; +#ifdef AMREX_USE_CUDA + status = heffte_plan_create(heffte::backend::cufft, +#elif AMREX_USE_HIP + status = heffte_plan_create(heffte::backend::rocfft, +#else + status = heffte_plan_create(heffte::backend::fftw, +#endif + {c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}, + NULL, + {local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}, + NULL, ParallelDescriptor::Communicator(), NULL, &fplanR); + + if (status != Heffte_SUCCESS) amrex::Abort("Failed at heffte_plan_create() with error code"); + + // inverse Fourier transform solenoidal and dilatational components + heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_S.dataPtr(0), + vel_decomp[local_boxid].dataPtr(0), Heffte_SCALE_NONE); + heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_S.dataPtr(1), + vel_decomp[local_boxid].dataPtr(1), Heffte_SCALE_NONE); + heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_S.dataPtr(2), + vel_decomp[local_boxid].dataPtr(2), Heffte_SCALE_NONE); + heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_D.dataPtr(0), + vel_decomp[local_boxid].dataPtr(3), Heffte_SCALE_NONE); + heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_D.dataPtr(1), + vel_decomp[local_boxid].dataPtr(4), Heffte_SCALE_NONE); + heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_D.dataPtr(2), + vel_decomp[local_boxid].dataPtr(5), Heffte_SCALE_NONE); + + heffte_plan_destroy(fplanR); + + vel_decomp.mult(1.0/sqrtnpts); + +} + +void IntegrateKVelocity(BaseFab >& spectral_field, + const std::string& name, const Real& scaling, + const Box& c_local_box) + +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + Gpu::HostVector phisum_host(npts); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + Array4< GpuComplex > spectralx = spectral_field.array(0,1); + Array4< GpuComplex > spectraly = spectral_field.array(1,1); + Array4< GpuComplex > spectralz = spectral_field.array(2,1); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + if (i <= n_cells[0]/2) { // only half of kx-domain + int ki = i; + int kj = j; + int kk = k; + if (j >= n_cells[1]/2) kj = n_cells[1]-j; + if (k >= n_cells[2]/2) kk = n_cells[2]-k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real, imag, cov_x, cov_y, cov_z, cov; + real = spectralx(i,j,k).real(); + imag = spectralx(i,j,k).imag(); + cov_x = scaling*(real*real + imag*imag); + real = spectraly(i,j,k).real(); + imag = spectraly(i,j,k).imag(); + cov_y = scaling*(real*real + imag*imag); + real = spectralz(i,j,k).real(); + imag = spectralz(i,j,k).imag(); + cov_z = scaling*(real*real + imag*imag); + cov = cov_x + cov_y + cov_z; + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + + ParallelDescriptor::Barrier(); + + for (int d=1; d& var_scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + + int ncomp = MFTurbVel.nComp(); + + long npts; + + // Initialize the boxarray "ba_onegrid" from the single box "domain" + BoxArray ba_onegrid; + { + Box domain = geom.Domain(); + ba_onegrid.define(domain); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + } + Real sqrtnpts = std::sqrt(npts); + DistributionMapping dmap_onegrid(ba_onegrid); + MultiFab variables_onegrid; + variables_onegrid.define(ba_onegrid, dmap_onegrid, ncomp, 0); + variables_onegrid.ParallelCopy(vel,0,0,ncomp); + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + // contain to store FFT - note it is shrunk by "half" in x + Vector > > > spectral_field; + for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + IntVect fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_field.emplace_back(new BaseFab >(spectral_bx,ncomp, + The_Device_Arena())); + spectral_field.back()->setVal(0.0); // touch the memory + } + + // for CUDA builds we only need to build the plan once; track whether we did + Vector forward_plan; + bool built_plan = false; + for (int comp=0; comp + (spectral_field.back()->dataPtr(comp)), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + variables_onegrid[mfi].dataPtr(comp), + reinterpret_cast + (spectral_field[i]->dataPtr(comp))); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftErrorToString(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(comp); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr(comp)); + result = rocfft_execute(forward_plan[i], + (void**) &variables_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // Integrate spectra over k-shells + IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],var_scaling[comp],sqrtnpts); + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } + +} + +void IntegrateKScalar(Vector > > >& spectral_field, + const MultiFab& variables_onegrid, + const std::string& name, + const Real& scaling, + const Real& sqrtnpts) + +{ + int npts = n_cells[0]/2; + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + // Integrate spectra over k-shells + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + Array4< GpuComplex > spectral = (*spectral_field[0]).array(); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= bx.length(0)/2) { // only half of kx-domain + int ki = i; + int kj = j; + int kk = k; + if (j >= bx.length(1)/2) kj = bx.length(1)-j; + if (k >= bx.length(2)/2) kk = bx.length(2)-k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real = spectral(i,j,k).real(); + Real imag = spectral(i,j,k).imag(); + Real cov = scaling*(1.0/(sqrtnpts*sqrtnpts))*(real*real + imag*imag); + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + } + + for (int d=1; d& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + + const GpuArray dx = geom.CellSizeArray(); + + long npts; + + // Initialize the boxarray "ba_onegrid" from the single box "domain" + BoxArray ba_onegrid; + { + Box domain = geom.Domain(); + ba_onegrid.define(domain); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + } + Real sqrtnpts = std::sqrt(npts); + DistributionMapping dmap_onegrid(ba_onegrid); + MultiFab vel_onegrid; + vel_onegrid.define(ba_onegrid, dmap_onegrid, 3, 0); + vel_onegrid.ParallelCopy(vel,0,0,3); + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + // contain to store FFT - note it is shrunk by "half" in x + Vector > > > spectral_field; + Vector > > > spectral_field_S; + Vector > > > spectral_field_D; + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + IntVect fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_field.emplace_back(new BaseFab >(spectral_bx,3, + The_Device_Arena())); + spectral_field.back()->setVal(0.0); // touch the memory + + spectral_field_S.emplace_back(new BaseFab >(spectral_bx,3, + The_Device_Arena())); + spectral_field_S.back()->setVal(0.0); // touch the memory + + spectral_field_D.emplace_back(new BaseFab >(spectral_bx,3, + The_Device_Arena())); + spectral_field_D.back()->setVal(0.0); // touch the memory + } + + // for CUDA builds we only need to build the plan once; track whether we did + Vector forward_plan; + bool built_plan = false; + for (int comp=0; comp<3; comp++) { + if (!built_plan) { + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftErrorToString(result) << "\n"; + } + built_plan = true; +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + assert_rocfft_status("rocfft_plan_create", result); + built_plan = true; +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(comp), + reinterpret_cast + (spectral_field.back()->dataPtr(comp)), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(comp), + reinterpret_cast + (spectral_field[i]->dataPtr(comp))); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftErrorToString(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(comp); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr(comp)); + result = rocfft_execute(forward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } + + // Decompose velocity field into solenoidal and dilatational + for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + Array4< GpuComplex > spectral_tx = (*spectral_field[0]) .array(0,1); + Array4< GpuComplex > spectral_ty = (*spectral_field[0]) .array(1,1); + Array4< GpuComplex > spectral_tz = (*spectral_field[0]) .array(2,1); + Array4< GpuComplex > spectral_sx = (*spectral_field_S[0]).array(0,1); + Array4< GpuComplex > spectral_sy = (*spectral_field_S[0]).array(1,1); + Array4< GpuComplex > spectral_sz = (*spectral_field_S[0]).array(2,1); + Array4< GpuComplex > spectral_dx = (*spectral_field_D[0]).array(0,1); + Array4< GpuComplex > spectral_dy = (*spectral_field_D[0]).array(1,1); + Array4< GpuComplex > spectral_dz = (*spectral_field_D[0]).array(2,1); + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR, GxC, GyR, GyC, GzR, GzC; + + if (i <= nx/2) { + // Gradient Operators + GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Scale Total velocity FFT components + spectral_tx(i,j,k).real() *= (1.0/sqrtnpts); + spectral_ty(i,j,k).real() *= (1.0/sqrtnpts); + spectral_tz(i,j,k).real() *= (1.0/sqrtnpts); + spectral_tx(i,j,k).imag() *= (1.0/sqrtnpts); + spectral_ty(i,j,k).imag() *= (1.0/sqrtnpts); + spectral_tz(i,j,k).imag() *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k).real() = 0.0; + spectral_dy(i,j,k).real() = 0.0; + spectral_dz(i,j,k).real() = 0.0; + spectral_dx(i,j,k).imag() = 0.0; + spectral_dy(i,j,k).imag() = 0.0; + spectral_dz(i,j,k).imag() = 0.0; + } + else { + // Dilatational velocity + spectral_dx(i,j,k).real() = (divR*GxR + divC*GxC) / Lap; + spectral_dy(i,j,k).real() = (divR*GyR + divC*GyC) / Lap; + spectral_dz(i,j,k).real() = (divR*GzR + divC*GzC) / Lap; + spectral_dx(i,j,k).imag() = (divC*GxR - divR*GxC) / Lap; + spectral_dy(i,j,k).imag() = (divC*GyR - divR*GyC) / Lap; + spectral_dz(i,j,k).imag() = (divC*GzR - divR*GzC) / Lap; + + // Solenoidal velocity + spectral_sx(i,j,k).real() = spectral_tx(i,j,k).real() - spectral_dx(i,j,k).real(); + spectral_sy(i,j,k).real() = spectral_ty(i,j,k).real() - spectral_dy(i,j,k).real(); + spectral_sz(i,j,k).real() = spectral_tz(i,j,k).real() - spectral_dz(i,j,k).real(); + spectral_sx(i,j,k).imag() = spectral_tx(i,j,k).imag() - spectral_dx(i,j,k).imag(); + spectral_sy(i,j,k).imag() = spectral_ty(i,j,k).imag() - spectral_dy(i,j,k).imag(); + spectral_sz(i,j,k).imag() = spectral_tz(i,j,k).imag() - spectral_dz(i,j,k).imag(); + } + }); + } + + ParallelDescriptor::Barrier(); + + // Integrate K spectrum for velocities + IntegrateKVelocity(spectral_field , vel_onegrid, "turb_total" ,scaling); + IntegrateKVelocity(spectral_field_S, vel_onegrid, "turb_solenoidal",scaling); + IntegrateKVelocity(spectral_field_D, vel_onegrid, "turb_dilational",scaling); + + // Create one-grid array to store IFFT velocities + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 6, 0); + vel_decomp_onegrid.setVal(0.0);; + + // Inverse FFT to get solenoidal velocity + Vector backward_planS; + built_plan = false; + for (int comp=0; comp<3; comp++) { + if (!built_plan) { + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftErrorToString(result) << "\n"; + } + built_plan = true; +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_inverse, rocfft_precision_double, + 3, lengths, 1, nullptr); + assert_rocfft_status("rocfft_plan_create", result); + built_plan = true; +#else // host + fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], + reinterpret_cast + (spectral_field_S.back()->dataPtr(comp)), + vel_decomp_onegrid[mfi].dataPtr(comp), + FFTW_ESTIMATE); +#endif + backward_planS.push_back(fplan); + } + } + + ParallelDescriptor::Barrier(); + + // Backward Transform + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(backward_planS[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecZ2D(backward_planS[i], + reinterpret_cast + (spectral_field_S[i]->dataPtr(comp)), + vel_decomp_onegrid[mfi].dataPtr(comp)); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftErrorToString(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(backward_planS[i], &buffersize); + assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(comp); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field_S[i]->dataPtr(comp)); + result = rocfft_execute(backward_planS[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(backward_planS[i]); +#endif + } + } + + // destroy fft plan + for (int i = 0; i < backward_planS.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(backward_planS[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(backward_planS[i]); +#else + fftw_destroy_plan(backward_planS[i]); +#endif + } + + + // Inverse FFT to get dilatational velocity + Vector backward_planD; + built_plan = false; + for (int comp=0; comp<3; comp++) { + if (!built_plan) { + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftErrorToString(result) << "\n"; + } + built_plan = true; +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_inverse, rocfft_precision_double, + 3, lengths, 1, nullptr); + assert_rocfft_status("rocfft_plan_create", result); + built_plan = true; +#else // host + fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], + reinterpret_cast + (spectral_field_D.back()->dataPtr(comp)), + vel_decomp_onegrid[mfi].dataPtr(comp+3), + FFTW_ESTIMATE); +#endif + backward_planD.push_back(fplan); + } + } + + ParallelDescriptor::Barrier(); + + // Backward Transform + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(backward_planD[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecZ2D(backward_planD[i], + reinterpret_cast + (spectral_field_D[i]->dataPtr(comp)), + vel_decomp_onegrid[mfi].dataPtr(comp+3)); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftErrorToString(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(backward_planD[i], &buffersize); + assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(comp+3); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field_D[i]->dataPtr(comp)); + result = rocfft_execute(backward_planD[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(backward_planD[i]); +#endif + } + } + + // destroy fft plan + for (int i = 0; i < backward_planD.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(backward_planD[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(backward_planD[i]); +#else + fftw_destroy_plan(backward_planD[i]); +#endif + } + + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,0,6); + vel_decomp.mult(1.0/sqrtnpts); +} + +void IntegrateKVelocity(Vector > > >& spectral_field, + const MultiFab& vel_onegrid, + const std::string& name, + const Real& scaling) +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + Gpu::HostVector phisum_host(npts); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + // Integrate spectra over k-shells + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + Array4< GpuComplex > spectralx = (*spectral_field[0]).array(0,1); + Array4< GpuComplex > spectraly = (*spectral_field[0]).array(1,1); + Array4< GpuComplex > spectralz = (*spectral_field[0]).array(2,1); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= bx.length(0)/2) { // only half of kx-domain + int ki = i; + int kj = j; + int kk = k; + if (j >= bx.length(1)/2) kj = bx.length(1)-j; + if (k >= bx.length(2)/2) kk = bx.length(2)-k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real, imag, cov_x, cov_y, cov_z, cov; + real = spectralx(i,j,k).real(); + imag = spectralx(i,j,k).imag(); + cov_x = scaling*(real*real + imag*imag); + real = spectraly(i,j,k).real(); + imag = spectraly(i,j,k).imag(); + cov_y = scaling*(real*real + imag*imag); + real = spectralz(i,j,k).real(); + imag = spectralz(i,j,k).imag(); + cov_z = scaling*(real*real + imag*imag); + cov = cov_x + cov_y + cov_z; + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + } + + for (int d=1; d var_names_turbVelTotal{"ux","uy","uz"}; - Vector var_scaling_turbVelTotal(3, dVolinv); - amrex::Vector< int > s_pairA_turbVelTotal(3); - amrex::Vector< int > s_pairB_turbVelTotal(3); - for (int d=0; d<3; ++d) { - s_pairA_turbVelTotal[d] = d; - s_pairB_turbVelTotal[d] = d; - } - - Vector var_scaling_turbVelDecomp(6, dVolinv); - - Vector< std::string > var_names_turbScalar{"rho","tenp","press"}; - Vector var_scaling_turbScalar(3, dVolinv); - amrex::Vector< int > s_pairA_turbScalar(3); - amrex::Vector< int > s_pairB_turbScalar(3); - for (int d=0; d<3; ++d) { - s_pairA_turbScalar[d] = d; - s_pairB_turbScalar[d] = d; - } -#endif - // object for turbulence forcing TurbForcingComp turbforce; @@ -893,21 +853,10 @@ void main_driver(const char* argv) #if defined(TURB) if (turbForcing >= 1) { - - structFactMFTurbVel.define(ba, dmap, 3, 0); - structFactMFTurbScalar.define(ba, dmap, 6, 0); + MFTurbVel.define(ba, dmap, 3, 0); + MFTurbScalar.define(ba, dmap, 6, 0); vel_decomp.define(ba, dmap, 6, 0); vel_decomp.setVal(0.0); - - turbStructFactVelTotal.define(ba,dmap, - var_names_turbVelTotal,var_scaling_turbVelTotal, - s_pairA_turbVelTotal,s_pairB_turbVelTotal); - turbStructFactScalar.define(ba,dmap, - var_names_turbScalar,var_scaling_turbScalar, - s_pairA_turbScalar,s_pairB_turbScalar); - turbStructFactVelDecomp.defineDecomp(ba,dmap, - var_names_turbVelTotal,var_scaling_turbVelDecomp, - s_pairA_turbVelTotal,s_pairB_turbVelTotal); } #endif @@ -1178,27 +1127,19 @@ void main_driver(const char* argv) // copy velocities into structFactMFTurb for(int d=0; d var_names_turbVel{"vel_total","vel_solenoidal","vel_dilation"}; + TurbSpectrumVelDecomp(MFTurbVel, vel_decomp, geom, step, var_names_turbVel); - // scalars - turbStructFactScalar.FortStructure(structFactMFTurbScalar,geom,1); - turbStructFactScalar.CallFinalize(geom); - turbStructFactScalar.IntegratekShellsScalar(step,geom,var_names_turbScalar); + // scalars + Vector< std::string > var_names_turbScalar{"rho","tenp","press"}; + TurbSpectrumScalar(MFTurbScalar, geom, step, var_names_turbScalar); } #endif } From 4d6a4b2c13fa468a0a68e459c5ff22780c21a613 Mon Sep 17 00:00:00 2001 From: isriva Date: Wed, 27 Sep 2023 08:07:59 -0700 Subject: [PATCH 002/151] heffte compiles fine both on host and cuda device --- exec/compressible_stag/GNUmakefile | 13 ++- src_analysis/TurbSpectra.H | 15 +-- src_analysis/TurbSpectra.cpp | 157 +++++++++++++------------- src_compressible_stag/main_driver.cpp | 14 ++- 4 files changed, 110 insertions(+), 89 deletions(-) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index bb25a44fe..3f6ff4485 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -14,6 +14,7 @@ MAX_SPEC = 8 USE_PARTICLES = FALSE DO_TURB = FALSE +USE_HEFFTE = FALSE include $(AMREX_HOME)/Tools/GNUMake/Make.defs @@ -53,8 +54,16 @@ INCLUDE_LOCATIONS += ../../src_analysis/ include $(AMREX_HOME)/Tools/GNUMake/Make.rules -ifeq ($(findstring cgpu, $(HOST)), cgpu) - CXXFLAGS += $(FFTW) +ifeq ($(USE_HEFFTE),TRUE) + ifeq ($(USE_CUDA),TRUE) + HEFFTE_HOME ?= ../../../heffte/build_gpu + else + HEFFTE_HOME ?= ../../../heffte/build + endif + libraries += -lheffte + VPATH_LOCATIONS += $(HEFFTE_HOME)/include + INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include + LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib endif ifeq ($(USE_CUDA),TRUE) diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H index bb304f97e..3952330b4 100644 --- a/src_analysis/TurbSpectra.H +++ b/src_analysis/TurbSpectra.H @@ -35,8 +35,6 @@ using namespace amrex; -#if defined(USE_HEFFTE) -#else #ifdef AMREX_USE_CUDA std::string cufftError (const cufftResult& err); #endif @@ -44,26 +42,29 @@ std::string cufftError (const cufftResult& err); std::string rocfftError (const rocfft_status err); void rocfft_status (std::string const& name, rocfft_status status); #endif -#endif #if defined(USE_HEFFTE) void IntegrateKScalar(BaseFab >& spectral_field, const std::string& name, const Real& scaling, const Box& c_local_box, - const Real& sqrtnpts); + const Real& sqrtnpts, + const int& step); void IntegrateKVelocity(BaseFab >& spectral_field, const std::string& name, const Real& scaling, - const Box& c_local_box); + const Box& c_local_box, + const int& step); #else void IntegrateKScalar(Vector > > >& spectral_field, const MultiFab& variables_onegrid, const std::string& name, const Real& scaling, - const Real& sqrtnpts); + const Real& sqrtnpts, + const int& step); void IntegrateKVelocity(Vector > > >& spectral_field, const MultiFab& vel_onegrid, const std::string& name, - const Real& scaling); + const Real& scaling, + const int& step); #endif void TurbSpectrumScalar(const MultiFab& variables, diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 58c5aef22..bd3d3774e 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -5,8 +5,6 @@ #include "AMReX_PlotFileUtil.H" #include "AMReX_BoxArray.H" -#if defined(USE_HEFFTE) -#else #ifdef AMREX_USE_CUDA std::string cufftError (const cufftResult& err) { @@ -53,22 +51,21 @@ std::string rocfftError (const rocfft_status err) void rocfft_status (std::string const& name, rocfft_status status) { if (status != rocfft_status_success) { - amrex::AllPrint() << name + " failed! Error: " + rocfftErrorToString(status) << "\n";; + amrex::AllPrint() << name + " failed! Error: " + rocfftError(status) << "\n";; } } #endif -#endif #if defined(USE_HEFFTE) void TurbSpectrumScalar(const MultiFab& variables, const amrex::Geometry& geom, const int& step, - const amrex::Vector& var_scaling, + const amrex::Vector& scaling, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); int ncomp = MFTurbVel.nComp(); @@ -146,7 +143,7 @@ void TurbSpectrumScalar(const MultiFab& variables, ParallelDescriptor::Barrier(); // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,var_names[comp],var_scaling[comp],c_local_box,sqrtnpts); + IntegrateKScalar(spectral_field,var_names[comp],scaling[comp],c_local_box,sqrtnpts,step); } @@ -156,7 +153,8 @@ void TurbSpectrumScalar(const MultiFab& variables, void IntegrateKScalar(BaseFab >& spectral_field, const std::string& name, const Real& scaling, const Box& c_local_box, - const Real& sqrtnpts) + const Real& sqrtnpts, + const int& step) { int npts = n_cells[0]/2; @@ -233,7 +231,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, MultiFab& vel_decomp, const amrex::Geometry& geom, const int& step, - const amrex::Real& var_scaling, + const amrex::Real& scaling, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); @@ -377,30 +375,29 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k).real() = 0.0; - spectral_dy(i,j,k).real() = 0.0; - spectral_dz(i,j,k).real() = 0.0; - spectral_dx(i,j,k).imag() = 0.0; - spectral_dy(i,j,k).imag() = 0.0; - spectral_dz(i,j,k).imag() = 0.0; + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; } else { // Dilatational velocity - spectral_dx(i,j,k).real() = (divR*GxR + divC*GxC) / Lap; - spectral_dy(i,j,k).real() = (divR*GyR + divC*GyC) / Lap; - spectral_dz(i,j,k).real() = (divR*GzR + divC*GzC) / Lap; - spectral_dx(i,j,k).imag() = (divC*GxR - divR*GxC) / Lap; - spectral_dy(i,j,k).imag() = (divC*GyR - divR*GyC) / Lap; - spectral_dz(i,j,k).imag() = (divC*GzR - divR*GzC) / Lap; + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; // Solenoidal velocity - spectral_sx(i,j,k).real() = spectral_tx(i,j,k).real() - spectral_dx(i,j,k).real(); - spectral_sy(i,j,k).real() = spectral_ty(i,j,k).real() - spectral_dy(i,j,k).real(); - spectral_sz(i,j,k).real() = spectral_tz(i,j,k).real() - spectral_dz(i,j,k).real(); - spectral_sx(i,j,k).imag() = spectral_tx(i,j,k).imag() - spectral_dx(i,j,k).imag(); - spectral_sy(i,j,k).imag() = spectral_ty(i,j,k).imag() - spectral_dy(i,j,k).imag(); - spectral_sz(i,j,k).imag() = spectral_tz(i,j,k).imag() - spectral_dz(i,j,k).imag(); + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); } }); @@ -408,9 +405,9 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, ParallelDescriptor::Barrier(); // Integrate K spectrum for velocities - IntegrateKVelocity(spectral_field_T,"turb_total", scaling,c_local_box); - IntegrateKVelocity(spectral_field_S,"turb_solenoidal",scaling,c_local_box); - IntegrateKVelocity(spectral_field_D,"turb_dilational",scaling,c_local_box); + IntegrateKVelocity(spectral_field_T,"turb_total", scaling,c_local_box,step); + IntegrateKVelocity(spectral_field_S,"turb_solenoidal",scaling,c_local_box,step); + IntegrateKVelocity(spectral_field_D,"turb_dilational",scaling,c_local_box,step); // setup plan for inverse FFT heffte_plan fplanR; @@ -452,7 +449,8 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, void IntegrateKVelocity(BaseFab >& spectral_field, const std::string& name, const Real& scaling, - const Box& c_local_box) + const Box& c_local_box, + const int& step) { int npts = n_cells[0]/2; @@ -540,14 +538,14 @@ void IntegrateKVelocity(BaseFab >& spectral_field, void TurbSpectrumScalar(const MultiFab& variables, const amrex::Geometry& geom, const int& step, - const amrex::Vector& var_scaling, + const amrex::Vector& scaling, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); - int ncomp = MFTurbVel.nComp(); + int ncomp = variables.nComp(); long npts; @@ -562,7 +560,7 @@ void TurbSpectrumScalar(const MultiFab& variables, DistributionMapping dmap_onegrid(ba_onegrid); MultiFab variables_onegrid; variables_onegrid.define(ba_onegrid, dmap_onegrid, ncomp, 0); - variables_onegrid.ParallelCopy(vel,0,0,ncomp); + variables_onegrid.ParallelCopy(variables,0,0,ncomp); #ifdef AMREX_USE_CUDA using FFTplan = cufftHandle; @@ -575,15 +573,19 @@ void TurbSpectrumScalar(const MultiFab& variables, using FFTcomplex = fftw_complex; #endif + + // size of box including ghost cell range + IntVect fft_size; + // contain to store FFT - note it is shrunk by "half" in x Vector > > > spectral_field; for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { // grab a single box including ghost cell range Box realspace_bx = mfi.fabbox(); - + // size of box including ghost cell range - IntVect fft_size = realspace_bx.length(); // This will be different for hybrid FFT + fft_size = realspace_bx.length(); // This will be different for hybrid FFT // this is the size of the box, except the 0th component is 'halved plus 1' IntVect spectral_bx_size = fft_size; @@ -609,7 +611,7 @@ void TurbSpectrumScalar(const MultiFab& variables, cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftErrorToString(result) << "\n"; + << cufftError(result) << "\n"; } built_plan = true; #elif AMREX_USE_HIP // HIP @@ -643,7 +645,7 @@ void TurbSpectrumScalar(const MultiFab& variables, (spectral_field[i]->dataPtr(comp))); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftErrorToString(result) << "\n"; + << cufftError(result) << "\n"; } #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; @@ -678,7 +680,7 @@ void TurbSpectrumScalar(const MultiFab& variables, } // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],var_scaling[comp],sqrtnpts); + IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); } // destroy fft plan @@ -698,7 +700,8 @@ void IntegrateKScalar(Vector > > >& spe const MultiFab& variables_onegrid, const std::string& name, const Real& scaling, - const Real& sqrtnpts) + const Real& sqrtnpts, + const int& step) { int npts = n_cells[0]/2; @@ -781,7 +784,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, MultiFab& vel_decomp, const amrex::Geometry& geom, const int& step, - const amrex::Real& var_scaling, + const amrex::Real& scaling, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); @@ -816,6 +819,9 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, using FFTcomplex = fftw_complex; #endif + // size of box including ghost cell range + IntVect fft_size; + // contain to store FFT - note it is shrunk by "half" in x Vector > > > spectral_field; Vector > > > spectral_field_S; @@ -826,7 +832,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, Box realspace_bx = mfi.fabbox(); // size of box including ghost cell range - IntVect fft_size = realspace_bx.length(); // This will be different for hybrid FFT + fft_size = realspace_bx.length(); // This will be different for hybrid FFT // this is the size of the box, except the 0th component is 'halved plus 1' IntVect spectral_bx_size = fft_size; @@ -860,7 +866,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftErrorToString(result) << "\n"; + << cufftError(result) << "\n"; } built_plan = true; #elif AMREX_USE_HIP // HIP @@ -894,7 +900,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, (spectral_field[i]->dataPtr(comp))); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftErrorToString(result) << "\n"; + << cufftError(result) << "\n"; } #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; @@ -977,12 +983,9 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, } // Scale Total velocity FFT components - spectral_tx(i,j,k).real() *= (1.0/sqrtnpts); - spectral_ty(i,j,k).real() *= (1.0/sqrtnpts); - spectral_tz(i,j,k).real() *= (1.0/sqrtnpts); - spectral_tx(i,j,k).imag() *= (1.0/sqrtnpts); - spectral_ty(i,j,k).imag() *= (1.0/sqrtnpts); - spectral_tz(i,j,k).imag() *= (1.0/sqrtnpts); + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); // Inverse Laplacian Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; @@ -996,29 +999,28 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k).real() = 0.0; - spectral_dy(i,j,k).real() = 0.0; - spectral_dz(i,j,k).real() = 0.0; - spectral_dx(i,j,k).imag() = 0.0; - spectral_dy(i,j,k).imag() = 0.0; - spectral_dz(i,j,k).imag() = 0.0; + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; } else { // Dilatational velocity - spectral_dx(i,j,k).real() = (divR*GxR + divC*GxC) / Lap; - spectral_dy(i,j,k).real() = (divR*GyR + divC*GyC) / Lap; - spectral_dz(i,j,k).real() = (divR*GzR + divC*GzC) / Lap; - spectral_dx(i,j,k).imag() = (divC*GxR - divR*GxC) / Lap; - spectral_dy(i,j,k).imag() = (divC*GyR - divR*GyC) / Lap; - spectral_dz(i,j,k).imag() = (divC*GzR - divR*GzC) / Lap; + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; // Solenoidal velocity - spectral_sx(i,j,k).real() = spectral_tx(i,j,k).real() - spectral_dx(i,j,k).real(); - spectral_sy(i,j,k).real() = spectral_ty(i,j,k).real() - spectral_dy(i,j,k).real(); - spectral_sz(i,j,k).real() = spectral_tz(i,j,k).real() - spectral_dz(i,j,k).real(); - spectral_sx(i,j,k).imag() = spectral_tx(i,j,k).imag() - spectral_dx(i,j,k).imag(); - spectral_sy(i,j,k).imag() = spectral_ty(i,j,k).imag() - spectral_dy(i,j,k).imag(); - spectral_sz(i,j,k).imag() = spectral_tz(i,j,k).imag() - spectral_dz(i,j,k).imag(); + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); } }); } @@ -1026,9 +1028,9 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, ParallelDescriptor::Barrier(); // Integrate K spectrum for velocities - IntegrateKVelocity(spectral_field , vel_onegrid, "turb_total" ,scaling); - IntegrateKVelocity(spectral_field_S, vel_onegrid, "turb_solenoidal",scaling); - IntegrateKVelocity(spectral_field_D, vel_onegrid, "turb_dilational",scaling); + IntegrateKVelocity(spectral_field , vel_onegrid, "turb_total" ,scaling,step); + IntegrateKVelocity(spectral_field_S, vel_onegrid, "turb_solenoidal",scaling,step); + IntegrateKVelocity(spectral_field_D, vel_onegrid, "turb_dilational",scaling,step); // Create one-grid array to store IFFT velocities MultiFab vel_decomp_onegrid; @@ -1047,7 +1049,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftErrorToString(result) << "\n"; + << cufftError(result) << "\n"; } built_plan = true; #elif AMREX_USE_HIP // HIP @@ -1081,7 +1083,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, vel_decomp_onegrid[mfi].dataPtr(comp)); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftErrorToString(result) << "\n"; + << cufftError(result) << "\n"; } #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; @@ -1140,7 +1142,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftErrorToString(result) << "\n"; + << cufftError(result) << "\n"; } built_plan = true; #elif AMREX_USE_HIP // HIP @@ -1174,7 +1176,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, vel_decomp_onegrid[mfi].dataPtr(comp+3)); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftErrorToString(result) << "\n"; + << cufftError(result) << "\n"; } #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; @@ -1228,7 +1230,8 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, void IntegrateKVelocity(Vector > > >& spectral_field, const MultiFab& vel_onegrid, const std::string& name, - const Real& scaling) + const Real& scaling, + const int& step) { int npts = n_cells[0]/2; diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 092b63cbd..af016478b 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -38,7 +38,7 @@ void main_driver(const char* argv) std::string inputs_file = argv; - amrex::AllPrint() << "Compiled with support for maximum species = " << MAX_SPECIES << "\n"; + amrex::Print() << "Compiled with support for maximum species = " << MAX_SPECIES << "\n"; // copy contents of F90 modules to C++ namespaces InitializeCommonNamespace(); @@ -260,10 +260,16 @@ void main_driver(const char* argv) #if defined(TURB) // data structure for turbulence diagnostics + MultiFab MFTurbScalar; + MultiFab MFTurbVel; + MultiFab vel_decomp; std::string turbfilename = "turbstats"; std::ofstream turboutfile; std::string turbfilenamedecomp = "turbstatsdecomp"; std::ofstream turboutfiledecomp; + // need to use dVol for scaling + Real dVol = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2]; + Real dVolinv = 1.0/dVol; #endif ///////////////////////////////////////////// @@ -1135,11 +1141,13 @@ void main_driver(const char* argv) // decomposed velocities Vector< std::string > var_names_turbVel{"vel_total","vel_solenoidal","vel_dilation"}; - TurbSpectrumVelDecomp(MFTurbVel, vel_decomp, geom, step, var_names_turbVel); + Real scaling_turb_veldecomp = dVolinv; + TurbSpectrumVelDecomp(MFTurbVel, vel_decomp, geom, step, scaling_turb_veldecomp, var_names_turbVel); // scalars Vector< std::string > var_names_turbScalar{"rho","tenp","press"}; - TurbSpectrumScalar(MFTurbScalar, geom, step, var_names_turbScalar); + Vector scaling_turb_scalar(3, dVolinv); + TurbSpectrumScalar(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); } #endif } From 965d88967ffa2f3de627ddc80cfe48ee0d671114 Mon Sep 17 00:00:00 2001 From: isriva Date: Thu, 28 Sep 2023 18:39:18 -0700 Subject: [PATCH 003/151] heffte compiles and runs fine, needs longer run testing --- exec/compressible_stag/GNUmakefile | 6 +- src_analysis/TurbSpectra.H | 7 +- src_analysis/TurbSpectra.cpp | 948 +++++++++++------------- src_compressible_stag/DeriveVelProp.cpp | 21 +- src_compressible_stag/main_driver.cpp | 25 +- 5 files changed, 483 insertions(+), 524 deletions(-) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 3f6ff4485..208570aa9 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -74,12 +74,16 @@ else ifeq ($(USE_HIP),TRUE) LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft else - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f endif ifeq ($(DO_TURB), TRUE) DEFINES += -DTURB endif +ifeq ($(USE_HEFFTE), TRUE) + DEFINES += -DHEFFTE +endif + MAXSPECIES := $(strip $(MAX_SPEC)) DEFINES += -DMAX_SPECIES=$(MAXSPECIES) diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H index 3952330b4..4893b47b3 100644 --- a/src_analysis/TurbSpectra.H +++ b/src_analysis/TurbSpectra.H @@ -8,9 +8,9 @@ // These are for heFFTe / FFTW / cuFFT / rocFFT -#if defined(USE_HEFFTE) +#if defined(HEFFTE) #include -#else +#endif #ifdef AMREX_USE_CUDA #include #elif AMREX_USE_HIP @@ -23,7 +23,6 @@ #include #include #endif -#endif #include @@ -43,7 +42,7 @@ std::string rocfftError (const rocfft_status err); void rocfft_status (std::string const& name, rocfft_status status); #endif -#if defined(USE_HEFFTE) +#if defined(HEFFTE) void IntegrateKScalar(BaseFab >& spectral_field, const std::string& name, const Real& scaling, const Box& c_local_box, diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index bd3d3774e..ea4c4adde 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -56,7 +56,6 @@ void rocfft_status (std::string const& name, rocfft_status status) } #endif -#if defined(USE_HEFFTE) void TurbSpectrumScalar(const MultiFab& variables, const amrex::Geometry& geom, const int& step, @@ -66,9 +65,10 @@ void TurbSpectrumScalar(const MultiFab& variables, BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); +#if defined(HEFFTE) AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); - int ncomp = MFTurbVel.nComp(); + int ncomp = variables.nComp(); long npts; Box domain = geom.Domain(); @@ -114,117 +114,180 @@ void TurbSpectrumScalar(const MultiFab& variables, // each MPI rank gets storage for its piece of the fft BaseFab > spectral_field(c_local_box, ncomp, The_Device_Arena()); - // we only need to build the plan once; track whether we did - heffte_plan fplan; int r2c_direction = 0; #ifdef AMREX_USE_CUDA - int status = heffte_plan_create_r2c(heffte::backend::cufft, + heffte::fft3d_r2c fft #elif AMREX_USE_HIP - int status = heffte_plan_create_r2c(heffte::backend::rocfft, + heffte::fft3d_r2c fft #else - int status = heffte_plan_create_r2c(heffte::backend::fftw, + heffte::fft3d_r2c fft #endif - {local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}, - NULL, - {c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}, - NULL, r2c_direction, ParallelDescriptor::Communicator(), &fplan); - - if (status != Heffte_SUCCESS) amrex::Abort("Failed at heffte_plan_create() with error code"); + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + using heffte_complex = typename heffte::fft_output::type; for (int comp=0; comp::type; - heffte_forward_d2z(fplan, variables[local_boxid].dataPtr(comp), - reinterpret_cast spectral_field.dataPtr(comp), - Heffte_SCALE_NONE); + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(comp); + fft.forward(variables[local_boxid].dataPtr(comp),spectral_data); ParallelDescriptor::Barrier(); // Integrate spectra over k-shells IntegrateKScalar(spectral_field,var_names[comp],scaling[comp],c_local_box,sqrtnpts,step); - } - - heffte_plan_destroy(fplan); -} +#else // not heFFTe + int ncomp = variables.nComp(); -void IntegrateKScalar(BaseFab >& spectral_field, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const Real& sqrtnpts, - const int& step) + long npts; -{ - int npts = n_cells[0]/2; - - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - Gpu::HostVector phisum_host(npts); - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + // Initialize the boxarray "ba_onegrid" from the single box "domain" + BoxArray ba_onegrid; { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); + Box domain = geom.Domain(); + ba_onegrid.define(domain); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + } + Real sqrtnpts = std::sqrt(npts); + DistributionMapping dmap_onegrid(ba_onegrid); + MultiFab variables_onegrid; + variables_onegrid.define(ba_onegrid, dmap_onegrid, ncomp, 0); + variables_onegrid.ParallelCopy(variables,0,0,ncomp); - Array4< GpuComplex > spectral = spectral_field.array(); - ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) - { - if (i <= n_cells[0]/2) { // only half of kx-domain - int ki = i; - int kj = j; - int kk = k; - if (j >= n_cells[1]/2) kj = n_cells[1]-j; - if (k >= n_cells[2]/2) kk = n_cells[2]-k; +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real = spectral(i,j,k).real(); - Real imag = spectral(i,j,k).imag(); - Real cov = scaling*(1.0/(sqrtnpts*sqrtnpts))*(real*real + imag*imag); - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); - } - } - }); + + // size of box including ghost cell range + IntVect fft_size; - ParallelDescriptor::Barrier(); + // contain to store FFT - note it is shrunk by "half" in x + Vector > > > spectral_field; + for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); - for (int d=1; d >(spectral_bx,ncomp, + The_Device_Arena())); + spectral_field.back()->setVal(0.0); // touch the memory } - - Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - if (d != 0) { - phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; + + // for CUDA builds we only need to build the plan once; track whether we did + Vector forward_plan; + bool built_plan = false; + for (int comp=0; comp + (spectral_field.back()->dataPtr(comp)), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } } - }); - - Gpu::copy(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); - - if (ParallelDescriptor::IOProcessor()) { - std::ofstream turb; - std::string turbBaseName = "turb_"+name; - std::string turbName = Concatenate(turbBaseName,step,7); - turbName += ".txt"; - turb.open(turbName); - for (int d=1; d + (spectral_field[i]->dataPtr(comp))); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(comp); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr(comp)); + result = rocfft_execute(forward_plan[i], + (void**) &variables_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif } - turb.close(); + + // Integrate spectra over k-shells + IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif } +#endif // end heFFTE } void TurbSpectrumVelDecomp(const MultiFab& vel, @@ -237,12 +300,11 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); +#if defined(HEFFTE) AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); const GpuArray dx = geom.CellSizeArray(); - int ncomp = MFTurbVel.nComp(); - long npts; Box domain = geom.Domain(); npts = (domain.length(0)*domain.length(1)*domain.length(2)); @@ -289,39 +351,35 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, BaseFab > spectral_field_S(c_local_box, 3, The_Device_Arena()); // solenoidal BaseFab > spectral_field_D(c_local_box, 3, The_Device_Arena()); // dilatational - heffte_plan fplan; int r2c_direction = 0; #ifdef AMREX_USE_CUDA - int status = heffte_plan_create_r2c(heffte::backend::cufft, + heffte::fft3d_r2c fft #elif AMREX_USE_HIP - int status = heffte_plan_create_r2c(heffte::backend::rocfft, + heffte::fft3d_r2c fft #else - int status = heffte_plan_create_r2c(heffte::backend::fftw, + heffte::fft3d_r2c fft #endif - {local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}, - NULL, - {c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}, - NULL, r2c_direction, ParallelDescriptor::Communicator(), &fplan); - - if (status != Heffte_SUCCESS) amrex::Abort("Failed at heffte_plan_create() with error code"); + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); // ForwardTransform using heffte_complex = typename heffte::fft_output::type; - - heffte_forward_d2z(fplan, vel[local_boxid].dataPtr(0), - reinterpret_cast spectral_field_T.dataPtr(0), - Heffte_SCALE_NONE); - heffte_forward_d2z(fplan, vel[local_boxid].dataPtr(1), - reinterpret_cast spectral_field_T.dataPtr(1), - Heffte_SCALE_NONE); - heffte_forward_d2z(fplan, vel[local_boxid].dataPtr(2), - reinterpret_cast spectral_field_T.dataPtr(2), - Heffte_SCALE_NONE); - - heffte_plan_destroy(fplan); + { + heffte_complex* spectral_data = (heffte_complex*) spectral_field_T.dataPtr(0); + fft.forward(vel[local_boxid].dataPtr(0),spectral_data); + } + { + heffte_complex* spectral_data = (heffte_complex*) spectral_field_T.dataPtr(1); + fft.forward(vel[local_boxid].dataPtr(1),spectral_data); + } + { + heffte_complex* spectral_data = (heffte_complex*) spectral_field_T.dataPtr(2); + fft.forward(vel[local_boxid].dataPtr(2),spectral_data); + } // Decompose velocity field into solenoidal and dilatational Array4< GpuComplex > spectral_tx = spectral_field_T.array(0,1); @@ -356,12 +414,9 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, } // Scale Total velocity FFT components - spectral_tx(i,j,k).real() *= (1.0/sqrtnpts); - spectral_ty(i,j,k).real() *= (1.0/sqrtnpts); - spectral_tz(i,j,k).real() *= (1.0/sqrtnpts); - spectral_tx(i,j,k).imag() *= (1.0/sqrtnpts); - spectral_ty(i,j,k).imag() *= (1.0/sqrtnpts); - spectral_tz(i,j,k).imag() *= (1.0/sqrtnpts); + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); // Inverse Laplacian Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; @@ -409,388 +464,40 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, IntegrateKVelocity(spectral_field_S,"turb_solenoidal",scaling,c_local_box,step); IntegrateKVelocity(spectral_field_D,"turb_dilational",scaling,c_local_box,step); - // setup plan for inverse FFT - heffte_plan fplanR; -#ifdef AMREX_USE_CUDA - status = heffte_plan_create(heffte::backend::cufft, -#elif AMREX_USE_HIP - status = heffte_plan_create(heffte::backend::rocfft, -#else - status = heffte_plan_create(heffte::backend::fftw, -#endif - {c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}, - NULL, - {local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}, - NULL, ParallelDescriptor::Communicator(), NULL, &fplanR); - - if (status != Heffte_SUCCESS) amrex::Abort("Failed at heffte_plan_create() with error code"); - // inverse Fourier transform solenoidal and dilatational components - heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_S.dataPtr(0), - vel_decomp[local_boxid].dataPtr(0), Heffte_SCALE_NONE); - heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_S.dataPtr(1), - vel_decomp[local_boxid].dataPtr(1), Heffte_SCALE_NONE); - heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_S.dataPtr(2), - vel_decomp[local_boxid].dataPtr(2), Heffte_SCALE_NONE); - heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_D.dataPtr(0), - vel_decomp[local_boxid].dataPtr(3), Heffte_SCALE_NONE); - heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_D.dataPtr(1), - vel_decomp[local_boxid].dataPtr(4), Heffte_SCALE_NONE); - heffte_forward_z2d(fplanR, reinterpret_cast spectral_field_D.dataPtr(2), - vel_decomp[local_boxid].dataPtr(5), Heffte_SCALE_NONE); - - heffte_plan_destroy(fplanR); - - vel_decomp.mult(1.0/sqrtnpts); + { + heffte_complex* spectral_data = (heffte_complex*) spectral_field_S.dataPtr(0); + fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(0)); -} + } + { + heffte_complex* spectral_data = (heffte_complex*) spectral_field_S.dataPtr(1); + fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(1)); -void IntegrateKVelocity(BaseFab >& spectral_field, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const int& step) + } + { + heffte_complex* spectral_data = (heffte_complex*) spectral_field_S.dataPtr(2); + fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(2)); -{ - int npts = n_cells[0]/2; - - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - Gpu::HostVector phisum_host(npts); - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + } { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_D.dataPtr(0); + fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(3)); - Array4< GpuComplex > spectralx = spectral_field.array(0,1); - Array4< GpuComplex > spectraly = spectral_field.array(1,1); - Array4< GpuComplex > spectralz = spectral_field.array(2,1); - ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + } { - if (i <= n_cells[0]/2) { // only half of kx-domain - int ki = i; - int kj = j; - int kk = k; - if (j >= n_cells[1]/2) kj = n_cells[1]-j; - if (k >= n_cells[2]/2) kk = n_cells[2]-k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real, imag, cov_x, cov_y, cov_z, cov; - real = spectralx(i,j,k).real(); - imag = spectralx(i,j,k).imag(); - cov_x = scaling*(real*real + imag*imag); - real = spectraly(i,j,k).real(); - imag = spectraly(i,j,k).imag(); - cov_y = scaling*(real*real + imag*imag); - real = spectralz(i,j,k).real(); - imag = spectralz(i,j,k).imag(); - cov_z = scaling*(real*real + imag*imag); - cov = cov_x + cov_y + cov_z; - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); - } - } - }); - - ParallelDescriptor::Barrier(); - - for (int d=1; d& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); - - int ncomp = variables.nComp(); - - long npts; - - // Initialize the boxarray "ba_onegrid" from the single box "domain" - BoxArray ba_onegrid; - { - Box domain = geom.Domain(); - ba_onegrid.define(domain); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - } - Real sqrtnpts = std::sqrt(npts); - DistributionMapping dmap_onegrid(ba_onegrid); - MultiFab variables_onegrid; - variables_onegrid.define(ba_onegrid, dmap_onegrid, ncomp, 0); - variables_onegrid.ParallelCopy(variables,0,0,ncomp); - -#ifdef AMREX_USE_CUDA - using FFTplan = cufftHandle; - using FFTcomplex = cuDoubleComplex; -#elif AMREX_USE_HIP - using FFTplan = rocfft_plan; - using FFTcomplex = double2; -#else - using FFTplan = fftw_plan; - using FFTcomplex = fftw_complex; -#endif - - - // size of box including ghost cell range - IntVect fft_size; - - // contain to store FFT - note it is shrunk by "half" in x - Vector > > > spectral_field; - for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); - - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT - - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; - - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - - spectral_field.emplace_back(new BaseFab >(spectral_bx,ncomp, - The_Device_Arena())); - spectral_field.back()->setVal(0.0); // touch the memory - } - - // for CUDA builds we only need to build the plan once; track whether we did - Vector forward_plan; - bool built_plan = false; - for (int comp=0; comp - (spectral_field.back()->dataPtr(comp)), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - variables_onegrid[mfi].dataPtr(comp), - reinterpret_cast - (spectral_field[i]->dataPtr(comp))); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(comp); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr(comp)); - result = rocfft_execute(forward_plan[i], - (void**) &variables_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } - -} - -void IntegrateKScalar(Vector > > >& spectral_field, - const MultiFab& variables_onegrid, - const std::string& name, - const Real& scaling, - const Real& sqrtnpts, - const int& step) - -{ - int npts = n_cells[0]/2; - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - - Gpu::HostVector phisum_host(npts); - - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - // Integrate spectra over k-shells - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - - for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.tilebox(); - - Array4< GpuComplex > spectral = (*spectral_field[0]).array(); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - if (i <= bx.length(0)/2) { // only half of kx-domain - int ki = i; - int kj = j; - int kk = k; - if (j >= bx.length(1)/2) kj = bx.length(1)-j; - if (k >= bx.length(2)/2) kk = bx.length(2)-k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real = spectral(i,j,k).real(); - Real imag = spectral(i,j,k).imag(); - Real cov = scaling*(1.0/(sqrtnpts*sqrtnpts))*(real*real + imag*imag); - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); - } - } - }); - } - - for (int d=1; d& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_D.dataPtr(1); + fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(4)); + } + { + heffte_complex* spectral_data = (heffte_complex*) spectral_field_D.dataPtr(2); + fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(5)); + + } + + vel_decomp.mult(1.0/sqrtnpts); +#else // not heFFTe const GpuArray dx = geom.CellSizeArray(); long npts; @@ -1225,8 +932,259 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, // copy into external multifab vel_decomp.ParallelCopy(vel_decomp_onegrid,0,0,6); vel_decomp.mult(1.0/sqrtnpts); +#endif // end heFFTe +} + +#if defined(HEFFTE) +void IntegrateKScalar(BaseFab >& spectral_field, + const std::string& name, const Real& scaling, + const Box& c_local_box, + const Real& sqrtnpts, + const int& step) + +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + Gpu::HostVector phisum_host(npts); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + Array4< GpuComplex > spectral = spectral_field.array(); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + if (i <= n_cells[0]/2) { // only half of kx-domain + int ki = i; + int kj = j; + int kk = k; + if (j >= n_cells[1]/2) kj = n_cells[1]-j; + if (k >= n_cells[2]/2) kk = n_cells[2]-k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real = spectral(i,j,k).real(); + Real imag = spectral(i,j,k).imag(); + Real cov = scaling*(1.0/(sqrtnpts*sqrtnpts))*(real*real + imag*imag); + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + + ParallelDescriptor::Barrier(); + + for (int d=1; d > > >& spectral_field, + const MultiFab& variables_onegrid, + const std::string& name, + const Real& scaling, + const Real& sqrtnpts, + const int& step) + +{ + int npts = n_cells[0]/2; + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + // Integrate spectra over k-shells + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + Array4< GpuComplex > spectral = (*spectral_field[0]).array(); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= bx.length(0)/2) { // only half of kx-domain + int ki = i; + int kj = j; + int kk = k; + if (j >= bx.length(1)/2) kj = bx.length(1)-j; + if (k >= bx.length(2)/2) kk = bx.length(2)-k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real = spectral(i,j,k).real(); + Real imag = spectral(i,j,k).imag(); + Real cov = scaling*(1.0/(sqrtnpts*sqrtnpts))*(real*real + imag*imag); + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + } + + for (int d=1; d >& spectral_field, + const std::string& name, const Real& scaling, + const Box& c_local_box, + const int& step) + +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + Gpu::HostVector phisum_host(npts); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + Array4< GpuComplex > spectralx = spectral_field.array(0,1); + Array4< GpuComplex > spectraly = spectral_field.array(1,1); + Array4< GpuComplex > spectralz = spectral_field.array(2,1); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + if (i <= n_cells[0]/2) { // only half of kx-domain + int ki = i; + int kj = j; + int kk = k; + if (j >= n_cells[1]/2) kj = n_cells[1]-j; + if (k >= n_cells[2]/2) kk = n_cells[2]-k; + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real, imag, cov_x, cov_y, cov_z, cov; + real = spectralx(i,j,k).real(); + imag = spectralx(i,j,k).imag(); + cov_x = scaling*(real*real + imag*imag); + real = spectraly(i,j,k).real(); + imag = spectraly(i,j,k).imag(); + cov_y = scaling*(real*real + imag*imag); + real = spectralz(i,j,k).real(); + imag = spectralz(i,j,k).imag(); + cov_z = scaling*(real*real + imag*imag); + cov = cov_x + cov_y + cov_z; + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + + ParallelDescriptor::Barrier(); + + for (int d=1; d > > >& spectral_field, const MultiFab& vel_onegrid, const std::string& name, @@ -1317,6 +1275,4 @@ void IntegrateKVelocity(Vector > > >& s turb.close(); } } - #endif - diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp index 87f3232d9..27c6f05bf 100644 --- a/src_compressible_stag/DeriveVelProp.cpp +++ b/src_compressible_stag/DeriveVelProp.cpp @@ -485,9 +485,9 @@ void EvaluateWritePlotFileVelGrad(int step, const Box& bx = mfi.tilebox(); - const Array4 out = output.array(mfi); + const Array4< Real>& out = output.array(mfi); - const Array4 v_decomp = vel_decomp.array(mfi); + const Array4& v_decomp = vel_decomp.array(mfi); AMREX_D_TERM(Array4 const& velx = vel[0].array(mfi);, Array4 const& vely = vel[1].array(mfi);, @@ -495,11 +495,16 @@ void EvaluateWritePlotFileVelGrad(int step, amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - for (int n=0;n<6;++n) { - out(i,j,k,n) = v_decomp(i,j,k,n); - } - out(i,j,k,6) = sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag solenoidal - out(i,j,k,7) = sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag dilatational + + out(i,j,k,0) = v_decomp(i,j,k,0); + out(i,j,k,1) = v_decomp(i,j,k,1); + out(i,j,k,2) = v_decomp(i,j,k,2); + out(i,j,k,3) = v_decomp(i,j,k,3); + out(i,j,k,4) = v_decomp(i,j,k,4); + out(i,j,k,5) = v_decomp(i,j,k,5); + + out(i,j,k,6) = std::sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag solenoidal + out(i,j,k,7) = std::sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag dilatational // divergence out(i,j,k,8) = (velx(i+1,j,k) - velx(i,j,k))/dx[0] + @@ -549,7 +554,7 @@ void EvaluateWritePlotFileVelGrad(int step, Real w3_pp = u32_pp - u23_pp; // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3) - out(i,j,k,9) = sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp + + out(i,j,k,9) = std::sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp + w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp + w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp)); }); diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index af016478b..68f6eee27 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -684,11 +684,6 @@ void main_driver(const char* argv) if (plot_int > 0) { WritePlotFileStag(0, 0.0, geom, cu, cuMeans, cuVars, cumom, cumomMeans, cumomVars, prim, primMeans, primVars, vel, velMeans, velVars, coVars, surfcov, surfcovMeans, surfcovVars, eta, kappa); -#if defined(TURB) - if (turbForcing > 0) { - EvaluateWritePlotFileVelGrad(0, 0.0, geom, vel, vel_decomp); - } -#endif if (plot_cross) { if (do_1D) { @@ -720,6 +715,15 @@ void main_driver(const char* argv) } // end t=0 setup + +#if defined(TURB) + if (turbForcing >= 1) { + MFTurbVel.define(ba, dmap, 3, 0); + MFTurbScalar.define(ba, dmap, 3, 0); + vel_decomp.define(ba, dmap, 6, 0); + vel_decomp.setVal(0.0); + } +#endif /////////////////////////////////////////// // Setup Structure factor @@ -856,15 +860,6 @@ void main_driver(const char* argv) } } - -#if defined(TURB) - if (turbForcing >= 1) { - MFTurbVel.define(ba, dmap, 3, 0); - MFTurbScalar.define(ba, dmap, 6, 0); - vel_decomp.define(ba, dmap, 6, 0); - vel_decomp.setVal(0.0); - } -#endif ///////////////////////////////////////////////// // Initialize Fluxes and Sources @@ -1145,7 +1140,7 @@ void main_driver(const char* argv) TurbSpectrumVelDecomp(MFTurbVel, vel_decomp, geom, step, scaling_turb_veldecomp, var_names_turbVel); // scalars - Vector< std::string > var_names_turbScalar{"rho","tenp","press"}; + Vector< std::string > var_names_turbScalar{"rho","temp","press"}; Vector scaling_turb_scalar(3, dVolinv); TurbSpectrumScalar(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); } From 40a635d0abab73c208f2fd1c859d748afc21b211 Mon Sep 17 00:00:00 2001 From: isriva Date: Thu, 5 Oct 2023 11:10:59 -0700 Subject: [PATCH 004/151] heffte works well. needs tests on multiple cuda and amd gpus --- src_analysis/TurbSpectra.H | 18 +- src_analysis/TurbSpectra.cpp | 832 +++++++++++++++--------- src_compressible_stag/DeriveVelProp.cpp | 4 +- 3 files changed, 534 insertions(+), 320 deletions(-) diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H index 4893b47b3..ebf6b96bc 100644 --- a/src_analysis/TurbSpectra.H +++ b/src_analysis/TurbSpectra.H @@ -43,27 +43,33 @@ void rocfft_status (std::string const& name, rocfft_status status); #endif #if defined(HEFFTE) -void IntegrateKScalar(BaseFab >& spectral_field, +void IntegrateKScalar(const BaseFab >& spectral_field, const std::string& name, const Real& scaling, const Box& c_local_box, const Real& sqrtnpts, - const int& step); -void IntegrateKVelocity(BaseFab >& spectral_field, + const int& step, + const int& comp); +void IntegrateKVelocity(const BaseFab >& spectral_field, const std::string& name, const Real& scaling, const Box& c_local_box, const int& step); #else -void IntegrateKScalar(Vector > > >& spectral_field, +void IntegrateKScalar(const Vector > > >& spectral_field, const MultiFab& variables_onegrid, const std::string& name, const Real& scaling, const Real& sqrtnpts, - const int& step); -void IntegrateKVelocity(Vector > > >& spectral_field, + const int& step, + const int& comp); +void IntegrateKVelocity(const Vector > > >& spectral_fieldx, + const Vector > > >& spectral_fieldy, + const Vector > > >& spectral_fieldz, const MultiFab& vel_onegrid, const std::string& name, const Real& scaling, const int& step); +void InverseFFTVel(Vector > > >& spectral_field, + MultiFab& vel_decomp_onegrid, const IntVect& fft_size); #endif void TurbSpectrumScalar(const MultiFab& variables, diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index ea4c4adde..95e453f69 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -136,7 +136,7 @@ void TurbSpectrumScalar(const MultiFab& variables, ParallelDescriptor::Barrier(); // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,var_names[comp],scaling[comp],c_local_box,sqrtnpts,step); + IntegrateKScalar(spectral_field,var_names[comp],scaling[comp],c_local_box,sqrtnpts,step,comp); } #else // not heFFTe int ncomp = variables.nComp(); @@ -153,8 +153,7 @@ void TurbSpectrumScalar(const MultiFab& variables, Real sqrtnpts = std::sqrt(npts); DistributionMapping dmap_onegrid(ba_onegrid); MultiFab variables_onegrid; - variables_onegrid.define(ba_onegrid, dmap_onegrid, ncomp, 0); - variables_onegrid.ParallelCopy(variables,0,0,ncomp); + variables_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); #ifdef AMREX_USE_CUDA using FFTplan = cufftHandle; @@ -173,32 +172,33 @@ void TurbSpectrumScalar(const MultiFab& variables, // contain to store FFT - note it is shrunk by "half" in x Vector > > > spectral_field; - for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); - - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT - - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; - - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - - spectral_field.emplace_back(new BaseFab >(spectral_bx,ncomp, - The_Device_Arena())); - spectral_field.back()->setVal(0.0); // touch the memory - } - - // for CUDA builds we only need to build the plan once; track whether we did Vector forward_plan; bool built_plan = false; + + // for CUDA builds we only need to build the plan once; track whether we did for (int comp=0; comp >(spectral_bx,1, + The_Device_Arena())); + spectral_field.back()->setVal(0.0); // touch the memory FFTplan fplan; #ifdef AMREX_USE_CUDA // CUDA @@ -207,23 +207,23 @@ void TurbSpectrumScalar(const MultiFab& variables, amrex::AllPrint() << " cufftplan3d forward failed! Error: " << cufftError(result) << "\n"; } - built_plan = true; #elif AMREX_USE_HIP // HIP const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, rocfft_transform_type_real_forward, rocfft_precision_double, 3, lengths, 1, nullptr); assert_rocfft_status("rocfft_plan_create", result); - built_plan = true; #else // host fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - variables_onegrid[mfi].dataPtr(comp), + variables_onegrid[mfi].dataPtr(), reinterpret_cast - (spectral_field.back()->dataPtr(comp)), + (spectral_field.back()->dataPtr()), FFTW_ESTIMATE); #endif forward_plan.push_back(fplan); } + + built_plan = true; } ParallelDescriptor::Barrier(); @@ -234,13 +234,13 @@ void TurbSpectrumScalar(const MultiFab& variables, #ifdef AMREX_USE_CUDA cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); cufftResult result = cufftExecD2Z(forward_plan[i], - variables_onegrid[mfi].dataPtr(comp), + variables_onegrid[mfi].dataPtr(), reinterpret_cast - (spectral_field[i]->dataPtr(comp))); + (spectral_field[i]->dataPtr())); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " forward transform using cufftExec failed! Error: " << cufftError(result) << "\n"; - } + } #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; rocfft_status result = rocfft_execution_info_create(&execinfo); @@ -257,8 +257,8 @@ void TurbSpectrumScalar(const MultiFab& variables, result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); assert_rocfft_status("rocfft_execution_info_set_stream", result); - amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(comp); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr(comp)); + amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); result = rocfft_execute(forward_plan[i], (void**) &variables_onegrid_ptr, // in (void**) &spectral_field_ptr, // out @@ -274,7 +274,7 @@ void TurbSpectrumScalar(const MultiFab& variables, } // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); + IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step,comp); } // destroy fft plan @@ -460,9 +460,9 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, ParallelDescriptor::Barrier(); // Integrate K spectrum for velocities - IntegrateKVelocity(spectral_field_T,"turb_total", scaling,c_local_box,step); - IntegrateKVelocity(spectral_field_S,"turb_solenoidal",scaling,c_local_box,step); - IntegrateKVelocity(spectral_field_D,"turb_dilational",scaling,c_local_box,step); + IntegrateKVelocity(spectral_field_T,"vel_total", scaling,c_local_box,step); + IntegrateKVelocity(spectral_field_S,"vel_solenoidal",scaling,c_local_box,step); + IntegrateKVelocity(spectral_field_D,"vel_dilational",scaling,c_local_box,step); // inverse Fourier transform solenoidal and dilatational components { @@ -512,8 +512,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, Real sqrtnpts = std::sqrt(npts); DistributionMapping dmap_onegrid(ba_onegrid); MultiFab vel_onegrid; - vel_onegrid.define(ba_onegrid, dmap_onegrid, 3, 0); - vel_onegrid.ParallelCopy(vel,0,0,3); + vel_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); #ifdef AMREX_USE_CUDA using FFTplan = cufftHandle; @@ -530,81 +529,83 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, IntVect fft_size; // contain to store FFT - note it is shrunk by "half" in x - Vector > > > spectral_field; - Vector > > > spectral_field_S; - Vector > > > spectral_field_D; - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); + Vector > > > spectral_fieldx; + Vector > > > spectral_fieldy; + Vector > > > spectral_fieldz; + Vector > > > spectral_field_Sx; + Vector > > > spectral_field_Sy; + Vector > > > spectral_field_Sz; + Vector > > > spectral_field_Dx; + Vector > > > spectral_field_Dy; + Vector > > > spectral_field_Dz; + + // x-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,0,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - spectral_field.emplace_back(new BaseFab >(spectral_bx,3, - The_Device_Arena())); - spectral_field.back()->setVal(0.0); // touch the memory + spectral_fieldx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldx.back()->setVal(0.0); // touch the memory - spectral_field_S.emplace_back(new BaseFab >(spectral_bx,3, - The_Device_Arena())); - spectral_field_S.back()->setVal(0.0); // touch the memory + spectral_field_Sx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sx.back()->setVal(0.0); // touch the memory - spectral_field_D.emplace_back(new BaseFab >(spectral_bx,3, - The_Device_Arena())); - spectral_field_D.back()->setVal(0.0); // touch the memory - } + spectral_field_Dx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dx.back()->setVal(0.0); // touch the memory - // for CUDA builds we only need to build the plan once; track whether we did - Vector forward_plan; - bool built_plan = false; - for (int comp=0; comp<3; comp++) { - if (!built_plan) { - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - FFTplan fplan; + FFTplan fplan; #ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } - built_plan = true; + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } #elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); - built_plan = true; + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + assert_rocfft_status("rocfft_plan_create", result); + built_plan = true; #else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - vel_onegrid[mfi].dataPtr(comp), - reinterpret_cast - (spectral_field.back()->dataPtr(comp)), - FFTW_ESTIMATE); + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldx.back()->dataPtr()), + FFTW_ESTIMATE); #endif - forward_plan.push_back(fplan); - } + forward_plan.push_back(fplan); } ParallelDescriptor::Barrier(); - + // ForwardTransform for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { int i = mfi.LocalIndex(); #ifdef AMREX_USE_CUDA cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); cufftResult result = cufftExecD2Z(forward_plan[i], - vel_onegrid[mfi].dataPtr(comp), + vel_onegrid[mfi].dataPtr(), reinterpret_cast - (spectral_field[i]->dataPtr(comp))); + (spectral_fieldx[i]->dataPtr())); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " forward transform using cufftExec failed! Error: " << cufftError(result) << "\n"; @@ -625,8 +626,8 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); assert_rocfft_status("rocfft_execution_info_set_stream", result); - amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(comp); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr(comp)); + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldx[i]->dataPtr()); result = rocfft_execute(forward_plan[i], (void**) &vel_onegrid_ptr, // in (void**) &spectral_field_ptr, // out @@ -640,154 +641,87 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, fftw_execute(forward_plan[i]); #endif } - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { #ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); + cufftDestroy(forward_plan[i]); #elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); + rocfft_plan_destroy(forward_plan[i]); #else - fftw_destroy_plan(forward_plan[i]); + fftw_destroy_plan(forward_plan[i]); #endif - } - - // Decompose velocity field into solenoidal and dilatational - for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + } + + } // end x-vel - const Box& bx = mfi.tilebox(); - Array4< GpuComplex > spectral_tx = (*spectral_field[0]) .array(0,1); - Array4< GpuComplex > spectral_ty = (*spectral_field[0]) .array(1,1); - Array4< GpuComplex > spectral_tz = (*spectral_field[0]) .array(2,1); - Array4< GpuComplex > spectral_sx = (*spectral_field_S[0]).array(0,1); - Array4< GpuComplex > spectral_sy = (*spectral_field_S[0]).array(1,1); - Array4< GpuComplex > spectral_sz = (*spectral_field_S[0]).array(2,1); - Array4< GpuComplex > spectral_dx = (*spectral_field_D[0]).array(0,1); - Array4< GpuComplex > spectral_dy = (*spectral_field_D[0]).array(1,1); - Array4< GpuComplex > spectral_dz = (*spectral_field_D[0]).array(2,1); + // y-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,1,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); - Real GxR, GxC, GyR, GyC, GzR, GzC; - - if (i <= nx/2) { - // Gradient Operators - GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; - } - else { // conjugate - amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); - } + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT - // Scale Total velocity FFT components - spectral_tx(i,j,k) *= (1.0/sqrtnpts); - spectral_ty(i,j,k) *= (1.0/sqrtnpts); - spectral_tz(i,j,k) *= (1.0/sqrtnpts); - - // Inverse Laplacian - Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; - - // Divergence of vel - Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + - spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + - spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; - Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + - spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + - spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; - - if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k) *= 0.0; - spectral_dy(i,j,k) *= 0.0; - spectral_dz(i,j,k) *= 0.0; - } - else { - // Dilatational velocity - GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, - (divC*GxR - divR*GxC) / Lap); - spectral_dx(i,j,k) = copy_dx; - - GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, - (divC*GyR - divR*GyC) / Lap); - spectral_dy(i,j,k) = copy_dy; - - GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, - (divC*GzR - divR*GzC) / Lap); - spectral_dz(i,j,k) = copy_dz; - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); - } - }); - } + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; - ParallelDescriptor::Barrier(); + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - // Integrate K spectrum for velocities - IntegrateKVelocity(spectral_field , vel_onegrid, "turb_total" ,scaling,step); - IntegrateKVelocity(spectral_field_S, vel_onegrid, "turb_solenoidal",scaling,step); - IntegrateKVelocity(spectral_field_D, vel_onegrid, "turb_dilational",scaling,step); - - // Create one-grid array to store IFFT velocities - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 6, 0); - vel_decomp_onegrid.setVal(0.0);; - - // Inverse FFT to get solenoidal velocity - Vector backward_planS; - built_plan = false; - for (int comp=0; comp<3; comp++) { - if (!built_plan) { - for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { - FFTplan fplan; + spectral_fieldy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldy.back()->setVal(0.0); // touch the memory + + spectral_field_Sy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sy.back()->setVal(0.0); // touch the memory + + spectral_field_Dy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dy.back()->setVal(0.0); // touch the memory + + FFTplan fplan; #ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } - built_plan = true; + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } #elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_inverse, rocfft_precision_double, - 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); - built_plan = true; + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + assert_rocfft_status("rocfft_plan_create", result); + built_plan = true; #else // host - fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], - reinterpret_cast - (spectral_field_S.back()->dataPtr(comp)), - vel_decomp_onegrid[mfi].dataPtr(comp), - FFTW_ESTIMATE); + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldy.back()->dataPtr()), + FFTW_ESTIMATE); #endif - backward_planS.push_back(fplan); - } + forward_plan.push_back(fplan); } ParallelDescriptor::Barrier(); - - // Backward Transform - for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { int i = mfi.LocalIndex(); #ifdef AMREX_USE_CUDA - cufftSetStream(backward_planS[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecZ2D(backward_planS[i], + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(), reinterpret_cast - (spectral_field_S[i]->dataPtr(comp)), - vel_decomp_onegrid[mfi].dataPtr(comp)); + (spectral_fieldy[i]->dataPtr())); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " forward transform using cufftExec failed! Error: " << cufftError(result) << "\n"; @@ -798,7 +732,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, assert_rocfft_status("rocfft_execution_info_create", result); std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(backward_planS[i], &buffersize); + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); void* buffer = amrex::The_Arena()->alloc(buffersize); @@ -808,9 +742,9 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); assert_rocfft_status("rocfft_execution_info_set_stream", result); - amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(comp); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field_S[i]->dataPtr(comp)); - result = rocfft_execute(backward_planS[i], + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldy[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], (void**) &vel_onegrid_ptr, // in (void**) &spectral_field_ptr, // out execinfo); @@ -820,67 +754,90 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, result = rocfft_execution_info_destroy(execinfo); assert_rocfft_status("rocfft_execution_info_destroy", result); #else - fftw_execute(backward_planS[i]); + fftw_execute(forward_plan[i]); #endif } - } - - // destroy fft plan - for (int i = 0; i < backward_planS.size(); ++i) { + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { #ifdef AMREX_USE_CUDA - cufftDestroy(backward_planS[i]); + cufftDestroy(forward_plan[i]); #elif AMREX_USE_HIP - rocfft_plan_destroy(backward_planS[i]); + rocfft_plan_destroy(forward_plan[i]); #else - fftw_destroy_plan(backward_planS[i]); + fftw_destroy_plan(forward_plan[i]); #endif - } - + } - // Inverse FFT to get dilatational velocity - Vector backward_planD; - built_plan = false; - for (int comp=0; comp<3; comp++) { - if (!built_plan) { - for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { - FFTplan fplan; + } // end y-vel + + // z-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,2,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_fieldz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldz.back()->setVal(0.0); // touch the memory + + spectral_field_Sz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sz.back()->setVal(0.0); // touch the memory + + spectral_field_Dz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dz.back()->setVal(0.0); // touch the memory + + FFTplan fplan; #ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } - built_plan = true; + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } #elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_inverse, rocfft_precision_double, - 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); - built_plan = true; + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + assert_rocfft_status("rocfft_plan_create", result); + built_plan = true; #else // host - fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], - reinterpret_cast - (spectral_field_D.back()->dataPtr(comp)), - vel_decomp_onegrid[mfi].dataPtr(comp+3), - FFTW_ESTIMATE); + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldz.back()->dataPtr()), + FFTW_ESTIMATE); #endif - backward_planD.push_back(fplan); - } + forward_plan.push_back(fplan); } ParallelDescriptor::Barrier(); - - // Backward Transform - for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { int i = mfi.LocalIndex(); #ifdef AMREX_USE_CUDA - cufftSetStream(backward_planD[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecZ2D(backward_planD[i], + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(), reinterpret_cast - (spectral_field_D[i]->dataPtr(comp)), - vel_decomp_onegrid[mfi].dataPtr(comp+3)); + (spectral_fieldz[i]->dataPtr())); if (result != CUFFT_SUCCESS) { amrex::AllPrint() << " forward transform using cufftExec failed! Error: " << cufftError(result) << "\n"; @@ -891,7 +848,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, assert_rocfft_status("rocfft_execution_info_create", result); std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(backward_planD[i], &buffersize); + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); void* buffer = amrex::The_Arena()->alloc(buffersize); @@ -901,9 +858,9 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); assert_rocfft_status("rocfft_execution_info_set_stream", result); - amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(comp+3); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field_D[i]->dataPtr(comp)); - result = rocfft_execute(backward_planD[i], + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldz[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], (void**) &vel_onegrid_ptr, // in (void**) &spectral_field_ptr, // out execinfo); @@ -913,34 +870,170 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, result = rocfft_execution_info_destroy(execinfo); assert_rocfft_status("rocfft_execution_info_destroy", result); #else - fftw_execute(backward_planD[i]); + fftw_execute(forward_plan[i]); #endif } - } - - // destroy fft plan - for (int i = 0; i < backward_planD.size(); ++i) { + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { #ifdef AMREX_USE_CUDA - cufftDestroy(backward_planD[i]); + cufftDestroy(forward_plan[i]); #elif AMREX_USE_HIP - rocfft_plan_destroy(backward_planD[i]); + rocfft_plan_destroy(forward_plan[i]); #else - fftw_destroy_plan(backward_planD[i]); + fftw_destroy_plan(forward_plan[i]); #endif + } + + } // end x-vel + + + // Decompose velocity field into solenoidal and dilatational + for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + Array4< GpuComplex > spectral_tx = (*spectral_fieldx[0]) .array(); + Array4< GpuComplex > spectral_ty = (*spectral_fieldy[0]) .array(); + Array4< GpuComplex > spectral_tz = (*spectral_fieldz[0]) .array(); + Array4< GpuComplex > spectral_sx = (*spectral_field_Sx[0]).array(); + Array4< GpuComplex > spectral_sy = (*spectral_field_Sy[0]).array(); + Array4< GpuComplex > spectral_sz = (*spectral_field_Sz[0]).array(); + Array4< GpuComplex > spectral_dx = (*spectral_field_Dx[0]).array(); + Array4< GpuComplex > spectral_dy = (*spectral_field_Dy[0]).array(); + Array4< GpuComplex > spectral_dz = (*spectral_field_Dz[0]).array(); + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR, GxC, GyR, GyC, GzR, GzC; + + if (i <= nx/2) { + // Gradient Operators + GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; + + // Scale Total velocity FFT components + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + } + } + }); } - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,0,6); + ParallelDescriptor::Barrier(); + + // Integrate K spectrum for velocities + IntegrateKVelocity(spectral_fieldx, spectral_fieldy, spectral_fieldz, vel_onegrid, "vel_total" ,scaling,step); + IntegrateKVelocity(spectral_field_Sx, spectral_field_Sy, spectral_field_Sz, vel_onegrid, "vel_solenoidal",scaling,step); + IntegrateKVelocity(spectral_field_Dx, spectral_field_Dy, spectral_field_Dz, vel_onegrid, "vel_dilatational",scaling,step); + + + // Inverse Solenoidal and Dilatational Velocity Components + { // solenoidal x + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sx, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,0,1); + } + { // solenoidal y + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sy, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,1,1); + } + { // solenoidal z + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sz, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,2,1); + } + { // dilatational x + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dx, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,3,1); + } + { // dilatational y + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dy, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,4,1); + } + { // dilatational z + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dz, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,5,1); + } vel_decomp.mult(1.0/sqrtnpts); + + #endif // end heFFTe } #if defined(HEFFTE) -void IntegrateKScalar(BaseFab >& spectral_field, +void IntegrateKScalar(const BaseFab >& spectral_field, const std::string& name, const Real& scaling, const Box& c_local_box, const Real& sqrtnpts, - const int& step) + const int& step, + const int& comp) { int npts = n_cells[0]/2; @@ -957,15 +1050,15 @@ void IntegrateKScalar(BaseFab >& spectral_field, phicnt_ptr[d] = 0; }); - Array4< GpuComplex > spectral = spectral_field.array(); + const Array4< const GpuComplex > spectral = spectral_field.const_array(comp,1); ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) { if (i <= n_cells[0]/2) { // only half of kx-domain int ki = i; int kj = j; int kk = k; - if (j >= n_cells[1]/2) kj = n_cells[1]-j; - if (k >= n_cells[2]/2) kk = n_cells[2]-k; +// if (j >= n_cells[1]/2) kj = n_cells[1]-j; +// if (k >= n_cells[2]/2) kk = n_cells[2]-k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -975,11 +1068,14 @@ void IntegrateKScalar(BaseFab >& spectral_field, int cell = int(dist); Real real = spectral(i,j,k).real(); Real imag = spectral(i,j,k).imag(); - Real cov = scaling*(1.0/(sqrtnpts*sqrtnpts))*(real*real + imag*imag); + Real cov = (1.0/(sqrtnpts*sqrtnpts*scaling))*(real*real + imag*imag); amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); } } + else { + amrex::Abort("i should not exceed n_cells[0]/2"); + } }); ParallelDescriptor::Barrier(); @@ -1013,12 +1109,13 @@ void IntegrateKScalar(BaseFab >& spectral_field, } } #else -void IntegrateKScalar(Vector > > >& spectral_field, +void IntegrateKScalar(const Vector > > >& spectral_field, const MultiFab& variables_onegrid, const std::string& name, const Real& scaling, const Real& sqrtnpts, - const int& step) + const int& step, + const int& comp) { int npts = n_cells[0]/2; @@ -1041,7 +1138,7 @@ void IntegrateKScalar(Vector > > >& spe const Box& bx = mfi.tilebox(); - Array4< GpuComplex > spectral = (*spectral_field[0]).array(); + const Array4 > spectral = (*spectral_field[0]).const_array(); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -1049,8 +1146,8 @@ void IntegrateKScalar(Vector > > >& spe int ki = i; int kj = j; int kk = k; - if (j >= bx.length(1)/2) kj = bx.length(1)-j; - if (k >= bx.length(2)/2) kk = bx.length(2)-k; +// if (j >= bx.length(1)/2) kj = bx.length(1)-j; +// if (k >= bx.length(2)/2) kk = bx.length(2)-k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1060,7 +1157,7 @@ void IntegrateKScalar(Vector > > >& spe int cell = int(dist); Real real = spectral(i,j,k).real(); Real imag = spectral(i,j,k).imag(); - Real cov = scaling*(1.0/(sqrtnpts*sqrtnpts))*(real*real + imag*imag); + Real cov = (1.0/(scaling*sqrtnpts*sqrtnpts))*(real*real + imag*imag); amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); } @@ -1099,7 +1196,7 @@ void IntegrateKScalar(Vector > > >& spe #endif #if defined(HEFFTE) -void IntegrateKVelocity(BaseFab >& spectral_field, +void IntegrateKVelocity(const BaseFab >& spectral_field, const std::string& name, const Real& scaling, const Box& c_local_box, const int& step) @@ -1119,17 +1216,17 @@ void IntegrateKVelocity(BaseFab >& spectral_field, phicnt_ptr[d] = 0; }); - Array4< GpuComplex > spectralx = spectral_field.array(0,1); - Array4< GpuComplex > spectraly = spectral_field.array(1,1); - Array4< GpuComplex > spectralz = spectral_field.array(2,1); + const Array4 > spectralx = spectral_field.const_array(0,1); + const Array4 > spectraly = spectral_field.const_array(1,1); + const Array4 > spectralz = spectral_field.const_array(2,1); ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) { if (i <= n_cells[0]/2) { // only half of kx-domain int ki = i; int kj = j; int kk = k; - if (j >= n_cells[1]/2) kj = n_cells[1]-j; - if (k >= n_cells[2]/2) kk = n_cells[2]-k; +// if (j >= n_cells[1]/2) kj = n_cells[1]-j; +// if (k >= n_cells[2]/2) kk = n_cells[2]-k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1140,18 +1237,21 @@ void IntegrateKVelocity(BaseFab >& spectral_field, Real real, imag, cov_x, cov_y, cov_z, cov; real = spectralx(i,j,k).real(); imag = spectralx(i,j,k).imag(); - cov_x = scaling*(real*real + imag*imag); + cov_x = (1.0/scaling)*(real*real + imag*imag); real = spectraly(i,j,k).real(); imag = spectraly(i,j,k).imag(); - cov_y = scaling*(real*real + imag*imag); + cov_y = (1.0/scaling)*(real*real + imag*imag); real = spectralz(i,j,k).real(); imag = spectralz(i,j,k).imag(); - cov_z = scaling*(real*real + imag*imag); + cov_z = (1.0/scaling)*(real*real + imag*imag); cov = cov_x + cov_y + cov_z; amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); } } + else { + amrex::Abort("i should not exceed n_cells[0]/2"); + } }); ParallelDescriptor::Barrier(); @@ -1185,7 +1285,9 @@ void IntegrateKVelocity(BaseFab >& spectral_field, } } #else -void IntegrateKVelocity(Vector > > >& spectral_field, +void IntegrateKVelocity(const Vector > > >& spectral_fieldx, + const Vector > > >& spectral_fieldy, + const Vector > > >& spectral_fieldz, const MultiFab& vel_onegrid, const std::string& name, const Real& scaling, @@ -1210,9 +1312,9 @@ void IntegrateKVelocity(Vector > > >& s const Box& bx = mfi.tilebox(); - Array4< GpuComplex > spectralx = (*spectral_field[0]).array(0,1); - Array4< GpuComplex > spectraly = (*spectral_field[0]).array(1,1); - Array4< GpuComplex > spectralz = (*spectral_field[0]).array(2,1); + const Array4 > spectralx = (*spectral_fieldx[0]).const_array(); + const Array4 > spectraly = (*spectral_fieldy[0]).const_array(); + const Array4 > spectralz = (*spectral_fieldz[0]).const_array(); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -1220,8 +1322,8 @@ void IntegrateKVelocity(Vector > > >& s int ki = i; int kj = j; int kk = k; - if (j >= bx.length(1)/2) kj = bx.length(1)-j; - if (k >= bx.length(2)/2) kk = bx.length(2)-k; +// if (j >= bx.length(1)/2) kj = bx.length(1)-j; +// if (k >= bx.length(2)/2) kk = bx.length(2)-k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1232,13 +1334,13 @@ void IntegrateKVelocity(Vector > > >& s Real real, imag, cov_x, cov_y, cov_z, cov; real = spectralx(i,j,k).real(); imag = spectralx(i,j,k).imag(); - cov_x = scaling*(real*real + imag*imag); + cov_x = (1.0/scaling)*(real*real + imag*imag); real = spectraly(i,j,k).real(); imag = spectraly(i,j,k).imag(); - cov_y = scaling*(real*real + imag*imag); + cov_y = (1.0/scaling)*(real*real + imag*imag); real = spectralz(i,j,k).real(); imag = spectralz(i,j,k).imag(); - cov_z = scaling*(real*real + imag*imag); + cov_z = (1.0/scaling)*(real*real + imag*imag); cov = cov_x + cov_y + cov_z; amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); @@ -1276,3 +1378,109 @@ void IntegrateKVelocity(Vector > > >& s } } #endif + +#if defined(HEFFTE) +// this function not needed for HEFFTE +#else +void InverseFFTVel(Vector > > >& spectral_field, + MultiFab& vel_decomp_onegrid, const IntVect& fft_size) +{ + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + Vector backward_plan; + + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + FFTplan fplan; +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_inverse, rocfft_precision_double, + 3, lengths, 1, nullptr); + assert_rocfft_status("rocfft_plan_create", result); + built_plan = true; +#else // host + fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], + reinterpret_cast + (spectral_field.back()->dataPtr()), + vel_decomp_onegrid[mfi].dataPtr(), + FFTW_ESTIMATE); +#endif + backward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // Backward Transform + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecZ2D(backward_plan[i], + reinterpret_cast + (spectral_field[i]->dataPtr()), + vel_decomp_onegrid[mfi].dataPtr()); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize); + assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); + result = rocfft_execute(backward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(backward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < backward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(backward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(backward_plan[i]); +#else + fftw_destroy_plan(backward_plan[i]); +#endif + } + +} +#endif diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp index 27c6f05bf..e80f3ea92 100644 --- a/src_compressible_stag/DeriveVelProp.cpp +++ b/src_compressible_stag/DeriveVelProp.cpp @@ -567,8 +567,8 @@ void EvaluateWritePlotFileVelGrad(int step, varNames[1] = "uy_s"; varNames[2] = "uz_s"; varNames[3] = "ux_d"; - varNames[4] = "ux_d"; - varNames[5] = "uy_d"; + varNames[4] = "uy_d"; + varNames[5] = "uz_d"; varNames[6] = "umag_s"; varNames[7] = "umag_d"; varNames[8] = "div"; From 48bca93f3f454e4522f794ff21832fa4a38c413d Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Mon, 9 Oct 2023 09:41:28 -0700 Subject: [PATCH 005/151] flag to restart without advancing time. also more changes to heffte --- src_analysis/TurbSpectra.cpp | 6 +++-- src_compressible/compressible_functions.cpp | 10 ++++++++- src_compressible/compressible_namespace.H | 1 + src_compressible_stag/Checkpoint.cpp | 4 ++-- src_compressible_stag/main_driver.cpp | 25 ++++++++++++--------- 5 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 95e453f69..394bad6ee 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -1093,7 +1093,8 @@ void IntegrateKScalar(const BaseFab >& spectral_field, } }); - Gpu::copy(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::streamSynchronize(); if (ParallelDescriptor::IOProcessor()) { std::ofstream turb; @@ -1269,7 +1270,8 @@ void IntegrateKVelocity(const BaseFab >& spectral_field, } }); - Gpu::copy(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::streamSynchronize(); if (ParallelDescriptor::IOProcessor()) { std::ofstream turb; diff --git a/src_compressible/compressible_functions.cpp b/src_compressible/compressible_functions.cpp index 9b6b7116b..07d0fa3a9 100644 --- a/src_compressible/compressible_functions.cpp +++ b/src_compressible/compressible_functions.cpp @@ -8,6 +8,7 @@ AMREX_GPU_MANAGED int compressible::do_1D; AMREX_GPU_MANAGED int compressible::do_2D; AMREX_GPU_MANAGED int compressible::all_correl; AMREX_GPU_MANAGED int compressible::nspec_surfcov = 0; +AMREX_GPU_MANAGED int compressible::turbRestartRun = 1; void InitializeCompressibleNamespace() { @@ -62,7 +63,14 @@ void InitializeCompressibleNamespace() all_correl = 0; pp.query("all_correl",all_correl); - + // restart for turbulence + // if 1: will advance time, if 0: only stats no advance time + pp.query("turbRestartRun",turbRestartRun); + if (turbRestartRun == 0) { + if (restart <= 0) amrex::Abort("turbRestartRun requires restarting from a checkpoint, restart > 0"); + if (max_step != restart+1) amrex::Abort("this is a single step run; max_step should be equal to restart+1"); + } + return; } diff --git a/src_compressible/compressible_namespace.H b/src_compressible/compressible_namespace.H index d5fa22abe..3357067cb 100644 --- a/src_compressible/compressible_namespace.H +++ b/src_compressible/compressible_namespace.H @@ -7,6 +7,7 @@ namespace compressible { extern AMREX_GPU_MANAGED int do_2D; extern AMREX_GPU_MANAGED int all_correl; extern AMREX_GPU_MANAGED int nspec_surfcov; + extern AMREX_GPU_MANAGED int turbRestartRun; } diff --git a/src_compressible_stag/Checkpoint.cpp b/src_compressible_stag/Checkpoint.cpp index f03802ba9..e298af6ce 100644 --- a/src_compressible_stag/Checkpoint.cpp +++ b/src_compressible_stag/Checkpoint.cpp @@ -639,7 +639,7 @@ void ReadCheckPoint3D(int& step, dmap.define(ba, ParallelDescriptor::NProcs()); #if defined(TURB) - if (turbForcing > 1) { + if ((turbForcing > 1) and (turbRestartRun)) { turbforce.define(ba,dmap,turb_a,turb_b,turb_c,turb_d,turb_alpha); } #endif @@ -690,7 +690,7 @@ void ReadCheckPoint3D(int& step, #if defined(TURB) // Read in turbulent forcing - if (turbForcing > 1) { + if ((turbForcing > 1) and (turbRestartRun)) { Real fs_temp; Real fc_temp; for (int i=0; i<132; ++i) { diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 68f6eee27..10cb72a56 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -918,7 +918,7 @@ void main_driver(const char* argv) #if defined(TURB) // Initialize Turbulence Forcing Object - if (turbForcing > 1) { + if ((turbForcing > 1) and (turbRestartRun)) { turbforce.Initialize(geom); } #endif @@ -943,8 +943,10 @@ void main_driver(const char* argv) if (n_ads_spec>0) sample_MFsurfchem(cu, prim, surfcov, dNadsdes, geom, dt); // FHD - RK3stepStag(cu, cumom, prim, vel, source, eta, zeta, kappa, chi, D, - faceflux, edgeflux_x, edgeflux_y, edgeflux_z, cenflux, ranchem, geom, dt, step, turbforce); + if (turbRestartRun) { + RK3stepStag(cu, cumom, prim, vel, source, eta, zeta, kappa, chi, D, + faceflux, edgeflux_x, edgeflux_y, edgeflux_z, cenflux, ranchem, geom, dt, step, turbforce); + } // update surface chemistry (via either surfchem_mui or MFsurfchem) #if defined(MUI) || defined(USE_AMREX_MPMD) @@ -1097,12 +1099,6 @@ void main_driver(const char* argv) WritePlotFileStag(step, time, geom, cu, cuMeans, cuVars, cumom, cumomMeans, cumomVars, prim, primMeans, primVars, vel, velMeans, velVars, coVars, surfcov, surfcovMeans, surfcovVars, eta, kappa); -#if defined(TURB) - if (turbForcing > 0) { - EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp); - } -#endif - if (plot_cross) { if (do_1D) { WriteSpatialCross1D(spatialCross1D, step, geom, ncross); @@ -1144,13 +1140,18 @@ void main_driver(const char* argv) Vector scaling_turb_scalar(3, dVolinv); TurbSpectrumScalar(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); } + + if (turbForcing > 0) { + EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp); + } #endif } #if defined(TURB) // turbulence outputs - if ((turbForcing >= 1) and (step%1000 == 0)) { + if (((turbForcing >= 1) and (step%1000 == 0)) or + ((turbForcing >= 1) and (turbRestartRun == 0))) { Real turbKE, c_speed, u_rms, taylor_len, taylor_Re, taylor_Ma, skew, kurt, eps_s, eps_d, eps_ratio, kolm_s, kolm_d, kolm_t; @@ -1184,7 +1185,9 @@ void main_driver(const char* argv) turboutfile << std::endl; } - if ((turbForcing >= 1) and (writePlt)) { + if (((turbForcing >= 1) and (writePlt)) or + ((turbForcing >= 1) and (turbRestartRun == 0))) { + Real turbKE_s, turbKE_d, delta_turbKE; Real u_rms_s, u_rms_d, delta_u_rms; Real taylor_Ma_d; From b625dc4538b1222825a0a5e6005028a206680656 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Wed, 11 Oct 2023 19:17:40 -0400 Subject: [PATCH 006/151] more compilation mods to get heffte working on frontier --- exec/compressible_stag/GNUmakefile | 34 +- exec/compressible_stag/build_frontier.sh | 15 +- src_analysis/TurbSpectra.H | 61 ++- src_analysis/TurbSpectra.cpp | 538 +++++++++++++++-------- src_compressible_stag/main_driver.cpp | 10 + 5 files changed, 433 insertions(+), 225 deletions(-) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 208570aa9..8124a8ee1 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -1,11 +1,12 @@ # AMREX_HOME defines the directory in which we will find all the AMReX code. # If you set AMREX_HOME as an environment variable, this line will be ignored AMREX_HOME ?= ../../../amrex/ +HEFFTE_HOME ?= ../../../heffte/ DEBUG = FALSE USE_MPI = TRUE USE_OMP = FALSE -USE_CUDA = TRUE +USE_CUDA = FALSE USE_HIP = FALSE COMP = gnu DIM = 3 @@ -13,8 +14,10 @@ TINY_PROFILE = FALSE MAX_SPEC = 8 USE_PARTICLES = FALSE -DO_TURB = FALSE -USE_HEFFTE = FALSE +DO_TURB = FALSE +USE_HEFFTE_FFTW = FALSE +USE_HEFFTE_CUFFT = FALSE +USE_HEFFTE_ROCFFT = FALSE include $(AMREX_HOME)/Tools/GNUMake/Make.defs @@ -41,31 +44,20 @@ include ../../src_rng/Make.package VPATH_LOCATIONS += ../../src_rng/ INCLUDE_LOCATIONS += ../../src_rng/ - include ../../src_common/Make.package VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ +include $(HEFFTE_HOME)/src/Make.package + include $(AMREX_HOME)/Src/Base/Make.package include ../../src_analysis/Make.package VPATH_LOCATIONS += ../../src_analysis/ INCLUDE_LOCATIONS += ../../src_analysis/ - + include $(AMREX_HOME)/Tools/GNUMake/Make.rules -ifeq ($(USE_HEFFTE),TRUE) - ifeq ($(USE_CUDA),TRUE) - HEFFTE_HOME ?= ../../../heffte/build_gpu - else - HEFFTE_HOME ?= ../../../heffte/build - endif - libraries += -lheffte - VPATH_LOCATIONS += $(HEFFTE_HOME)/include - INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include - LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib -endif - ifeq ($(USE_CUDA),TRUE) LIBRARIES += -lcufft else ifeq ($(USE_HIP),TRUE) @@ -81,8 +73,12 @@ ifeq ($(DO_TURB), TRUE) DEFINES += -DTURB endif -ifeq ($(USE_HEFFTE), TRUE) - DEFINES += -DHEFFTE +ifeq ($(USE_HEFFTE_FFTW),TRUE) + DEFINES += -DHEFFTE_FFTW +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + DEFINES += -DHEFFTE_CUFFT +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + DEFINES += -DHEFFTE_ROCFFT endif MAXSPECIES := $(strip $(MAX_SPEC)) diff --git a/exec/compressible_stag/build_frontier.sh b/exec/compressible_stag/build_frontier.sh index d89d820bf..3b02bbf8b 100755 --- a/exec/compressible_stag/build_frontier.sh +++ b/exec/compressible_stag/build_frontier.sh @@ -1,7 +1,6 @@ #!/usr/bin/bash ## load necessary modules -module load cmake/3.23.2 module load craype-accel-amd-gfx90a module load rocm/5.2.0 # waiting for 5.6 for next bump module load cray-mpich @@ -14,11 +13,11 @@ export MPICH_GPU_SUPPORT_ENABLED=1 export AMREX_AMD_ARCH=gfx90a # compiler environment hints -export CC=$(which hipcc) -export CXX=$(which hipcc) -export FC=$(which ftn) -export CFLAGS="-I${ROCM_PATH}/include" -export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" -export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" +##export CC=$(which hipcc) +##export CXX=$(which hipcc) +##export FC=$(which ftn) +##export CFLAGS="-I${ROCM_PATH}/include" +##export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" +##export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" -make -j18 USE_CUDA=FALSE USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 +make -j10 USE_CUDA=FALSE USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_ROCFFT=TRUE diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H index ebf6b96bc..4abf0bfcc 100644 --- a/src_analysis/TurbSpectra.H +++ b/src_analysis/TurbSpectra.H @@ -6,11 +6,13 @@ #include #include -// These are for heFFTe / FFTW / cuFFT / rocFFT -#if defined(HEFFTE) +// HEFFTE +#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) #include #endif + +// non-HEFFTE #ifdef AMREX_USE_CUDA #include #elif AMREX_USE_HIP @@ -34,33 +36,48 @@ using namespace amrex; +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) #ifdef AMREX_USE_CUDA std::string cufftError (const cufftResult& err); #endif #ifdef AMREX_USE_HIP std::string rocfftError (const rocfft_status err); -void rocfft_status (std::string const& name, rocfft_status status); +void Assert_rocfft_status (std::string const& name, rocfft_status status); +#endif #endif -#if defined(HEFFTE) -void IntegrateKScalar(const BaseFab >& spectral_field, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const Real& sqrtnpts, - const int& step, - const int& comp); -void IntegrateKVelocity(const BaseFab >& spectral_field, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const int& step); -#else +#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) +void IntegrateKScalarHeffte(const BaseFab >& spectral_field, + const std::string& name, const Real& scaling, + const Box& c_local_box, + const Real& sqrtnpts, + const int& step); +void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, + const BaseFab >& spectral_fieldy, + const BaseFab >& spectral_fieldz, + const std::string& name, const Real& scaling, + const Box& c_local_box, + const int& step); +void TurbSpectrumScalarHeffte(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); +void TurbSpectrumVelDecompHeffte(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); +#endif + +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) void IntegrateKScalar(const Vector > > >& spectral_field, const MultiFab& variables_onegrid, const std::string& name, const Real& scaling, const Real& sqrtnpts, - const int& step, - const int& comp); + const int& step); void IntegrateKVelocity(const Vector > > >& spectral_fieldx, const Vector > > >& spectral_fieldy, const Vector > > >& spectral_fieldz, @@ -68,22 +85,18 @@ void IntegrateKVelocity(const Vector > const std::string& name, const Real& scaling, const int& step); -void InverseFFTVel(Vector > > >& spectral_field, - MultiFab& vel_decomp_onegrid, const IntVect& fft_size); -#endif - void TurbSpectrumScalar(const MultiFab& variables, const amrex::Geometry& geom, const int& step, const amrex::Vector& var_scaling, const amrex::Vector< std::string >& var_names); - void TurbSpectrumVelDecomp(const MultiFab& vel, MultiFab& vel_decomp, const amrex::Geometry& geom, const int& step, const amrex::Real& var_scaling, const amrex::Vector< std::string >& var_names); - - +void InverseFFTVel(Vector > > >& spectral_field, + MultiFab& vel_decomp_onegrid, const IntVect& fft_size); +#endif #endif diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 394bad6ee..f2f308b1c 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -5,6 +5,7 @@ #include "AMReX_PlotFileUtil.H" #include "AMReX_BoxArray.H" +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) #ifdef AMREX_USE_CUDA std::string cufftError (const cufftResult& err) { @@ -48,24 +49,25 @@ std::string rocfftError (const rocfft_status err) } } -void rocfft_status (std::string const& name, rocfft_status status) +void Assert_rocfft_status (std::string const& name, rocfft_status status) { if (status != rocfft_status_success) { amrex::AllPrint() << name + " failed! Error: " + rocfftError(status) << "\n";; } } #endif +#endif -void TurbSpectrumScalar(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& scaling, - const amrex::Vector< std::string >& var_names) +#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // heffte +void TurbSpectrumScalarHeffte(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& scaling, + const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); -#if defined(HEFFTE) AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); int ncomp = variables.nComp(); @@ -111,34 +113,51 @@ void TurbSpectrumScalar(const MultiFab& variables, c_local_box.growHi(0,1); } - // each MPI rank gets storage for its piece of the fft - BaseFab > spectral_field(c_local_box, ncomp, The_Device_Arena()); - - int r2c_direction = 0; -#ifdef AMREX_USE_CUDA - heffte::fft3d_r2c fft -#elif AMREX_USE_HIP - heffte::fft3d_r2c fft -#else - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - using heffte_complex = typename heffte::fft_output::type; for (int comp=0; comp > spectral_field(c_local_box, 1, The_Device_Arena()); + + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + using heffte_complex = typename heffte::fft_output::type; + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + + MultiFab variables_single(ba, dm, 1, 0); + variables_single.ParallelCopy(variables,comp,0,1); + + fft.forward(variables_single[local_boxid].dataPtr(),spectral_data); ParallelDescriptor::Barrier(); // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,var_names[comp],scaling[comp],c_local_box,sqrtnpts,step,comp); + IntegrateKScalarHeffte(spectral_field,var_names[comp],scaling[comp],c_local_box,sqrtnpts,step); } -#else // not heFFTe +} +#endif + +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); int ncomp = variables.nComp(); long npts; @@ -166,7 +185,6 @@ void TurbSpectrumScalar(const MultiFab& variables, using FFTcomplex = fftw_complex; #endif - // size of box including ghost cell range IntVect fft_size; @@ -212,7 +230,7 @@ void TurbSpectrumScalar(const MultiFab& variables, rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, rocfft_transform_type_real_forward, rocfft_precision_double, 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); + Assert_rocfft_status("rocfft_plan_create", result); #else // host fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], variables_onegrid[mfi].dataPtr(), @@ -244,18 +262,18 @@ void TurbSpectrumScalar(const MultiFab& variables, #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; rocfft_status result = rocfft_execution_info_create(&execinfo); - assert_rocfft_status("rocfft_execution_info_create", result); + Assert_rocfft_status("rocfft_execution_info_create", result); std::size_t buffersize = 0; result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); void* buffer = amrex::The_Arena()->alloc(buffersize); result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - assert_rocfft_status("rocfft_execution_info_set_stream", result); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(); FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); @@ -263,18 +281,18 @@ void TurbSpectrumScalar(const MultiFab& variables, (void**) &variables_onegrid_ptr, // in (void**) &spectral_field_ptr, // out execinfo); - assert_rocfft_status("rocfft_execute", result); + Assert_rocfft_status("rocfft_execute", result); amrex::Gpu::streamSynchronize(); amrex::The_Arena()->free(buffer); result = rocfft_execution_info_destroy(execinfo); - assert_rocfft_status("rocfft_execution_info_destroy", result); + Assert_rocfft_status("rocfft_execution_info_destroy", result); #else fftw_execute(forward_plan[i]); #endif } // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step,comp); + IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); } // destroy fft plan @@ -287,20 +305,20 @@ void TurbSpectrumScalar(const MultiFab& variables, fftw_destroy_plan(forward_plan[i]); #endif } -#endif // end heFFTE } - -void TurbSpectrumVelDecomp(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& scaling, - const amrex::Vector< std::string >& var_names) +#endif // end not-heFFTE + +#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // heffte +void TurbSpectrumVelDecompHeffte(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& scaling, + const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); -#if defined(HEFFTE) AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); const GpuArray dx = geom.CellSizeArray(); @@ -347,50 +365,107 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, } // each MPI rank gets storage for its piece of the fft - BaseFab > spectral_field_T(c_local_box, 3, The_Device_Arena()); // total - BaseFab > spectral_field_S(c_local_box, 3, The_Device_Arena()); // solenoidal - BaseFab > spectral_field_D(c_local_box, 3, The_Device_Arena()); // dilatational - - - int r2c_direction = 0; -#ifdef AMREX_USE_CUDA - heffte::fft3d_r2c fft -#elif AMREX_USE_HIP - heffte::fft3d_r2c fft -#else - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); + BaseFab > spectral_field_Tx(c_local_box, 1, The_Device_Arena()); // totalx + BaseFab > spectral_field_Ty(c_local_box, 1, The_Device_Arena()); // totaly + BaseFab > spectral_field_Tz(c_local_box, 1, The_Device_Arena()); // totalz + BaseFab > spectral_field_Sx(c_local_box, 1, The_Device_Arena()); // solenoidalx + BaseFab > spectral_field_Sy(c_local_box, 1, The_Device_Arena()); // solenoidaly + BaseFab > spectral_field_Sz(c_local_box, 1, The_Device_Arena()); // solenoidalz + BaseFab > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx + BaseFab > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly + BaseFab > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz + spectral_field_Tx.setVal(0.0); + spectral_field_Ty.setVal(0.0); + spectral_field_Tz.setVal(0.0); + spectral_field_Sx.setVal(0.0); + spectral_field_Sy.setVal(0.0); + spectral_field_Sz.setVal(0.0); + spectral_field_Dx.setVal(0.0); + spectral_field_Dy.setVal(0.0); + spectral_field_Dz.setVal(0.0); // ForwardTransform + // X using heffte_complex = typename heffte::fft_output::type; { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_T.dataPtr(0); - fft.forward(vel[local_boxid].dataPtr(0),spectral_data); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_single(ba, dm, 1, 0); + vel_single.ParallelCopy(vel, 0, 0, 1); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + ParallelDescriptor::Barrier(); } + // Y { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_T.dataPtr(1); - fft.forward(vel[local_boxid].dataPtr(1),spectral_data); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_single(ba, dm, 1, 0); + vel_single.ParallelCopy(vel, 1, 0, 1); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + ParallelDescriptor::Barrier(); } + // Z { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_T.dataPtr(2); - fft.forward(vel[local_boxid].dataPtr(2),spectral_data); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_single(ba, dm, 1, 0); + vel_single.ParallelCopy(vel, 2, 0, 1); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + ParallelDescriptor::Barrier(); } // Decompose velocity field into solenoidal and dilatational - Array4< GpuComplex > spectral_tx = spectral_field_T.array(0,1); - Array4< GpuComplex > spectral_ty = spectral_field_T.array(1,1); - Array4< GpuComplex > spectral_tz = spectral_field_T.array(2,1); - Array4< GpuComplex > spectral_sx = spectral_field_S.array(0,1); - Array4< GpuComplex > spectral_sy = spectral_field_S.array(1,1); - Array4< GpuComplex > spectral_sz = spectral_field_S.array(2,1); - Array4< GpuComplex > spectral_dx = spectral_field_D.array(0,1); - Array4< GpuComplex > spectral_dy = spectral_field_D.array(1,1); - Array4< GpuComplex > spectral_dz = spectral_field_D.array(2,1); + Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(); + Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(); + Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(); + Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(); + Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(); + Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(); + Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(); + Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(); + Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(); ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) { @@ -398,7 +473,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, int ny = n_cells[1]; int nz = n_cells[2]; - Real GxR, GxC, GyR, GyC, GzR, GzC; + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; if (i <= nx/2) { // Gradient Operators @@ -448,56 +523,183 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, (divC*GzR - divR*GzC) / Lap); spectral_dz(i,j,k) = copy_dz; - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); }); ParallelDescriptor::Barrier(); // Integrate K spectrum for velocities - IntegrateKVelocity(spectral_field_T,"vel_total", scaling,c_local_box,step); - IntegrateKVelocity(spectral_field_S,"vel_solenoidal",scaling,c_local_box,step); - IntegrateKVelocity(spectral_field_D,"vel_dilational",scaling,c_local_box,step); + IntegrateKVelocityHeffte(spectral_field_Tx,spectral_field_Ty,spectral_field_Tz,"vel_total" ,scaling,c_local_box,step); + IntegrateKVelocityHeffte(spectral_field_Sx,spectral_field_Sy,spectral_field_Sz,"vel_solenoidal",scaling,c_local_box,step); + IntegrateKVelocityHeffte(spectral_field_Dx,spectral_field_Dy,spectral_field_Dz,"vel_dilational",scaling,c_local_box,step); // inverse Fourier transform solenoidal and dilatational components { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_S.dataPtr(0); - fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(0)); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_decomp_single(ba, dm, 1, 0); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + ParallelDescriptor::Barrier(); + + vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); } { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_S.dataPtr(1); - fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(1)); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_decomp_single(ba, dm, 1, 0); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + ParallelDescriptor::Barrier(); + + vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); } { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_S.dataPtr(2); - fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(2)); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_decomp_single(ba, dm, 1, 0); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + ParallelDescriptor::Barrier(); + + vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); } { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_D.dataPtr(0); - fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(3)); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_decomp_single(ba, dm, 1, 0); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + ParallelDescriptor::Barrier(); + + vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); } { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_D.dataPtr(1); - fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(4)); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_decomp_single(ba, dm, 1, 0); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + ParallelDescriptor::Barrier(); + + vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); } { - heffte_complex* spectral_data = (heffte_complex*) spectral_field_D.dataPtr(2); - fft.backward(spectral_data, vel_decomp[local_boxid].dataPtr(5)); + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + MultiFab vel_decomp_single(ba, dm, 1, 0); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + ParallelDescriptor::Barrier(); + + vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); } vel_decomp.mult(1.0/sqrtnpts); -#else // not heFFTe +} +#endif + +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); const GpuArray dx = geom.CellSizeArray(); long npts; @@ -583,8 +785,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, rocfft_transform_type_real_forward, rocfft_precision_double, 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); - built_plan = true; + Assert_rocfft_status("rocfft_plan_create", result); #else // host fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], vel_onegrid[mfi].dataPtr(), @@ -613,18 +814,18 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; rocfft_status result = rocfft_execution_info_create(&execinfo); - assert_rocfft_status("rocfft_execution_info_create", result); + Assert_rocfft_status("rocfft_execution_info_create", result); std::size_t buffersize = 0; result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); void* buffer = amrex::The_Arena()->alloc(buffersize); result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - assert_rocfft_status("rocfft_execution_info_set_stream", result); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldx[i]->dataPtr()); @@ -632,11 +833,11 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, (void**) &vel_onegrid_ptr, // in (void**) &spectral_field_ptr, // out execinfo); - assert_rocfft_status("rocfft_execute", result); + Assert_rocfft_status("rocfft_execute", result); amrex::Gpu::streamSynchronize(); amrex::The_Arena()->free(buffer); result = rocfft_execution_info_destroy(execinfo); - assert_rocfft_status("rocfft_execution_info_destroy", result); + Assert_rocfft_status("rocfft_execution_info_destroy", result); #else fftw_execute(forward_plan[i]); #endif @@ -699,8 +900,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, rocfft_transform_type_real_forward, rocfft_precision_double, 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); - built_plan = true; + Assert_rocfft_status("rocfft_plan_create", result); #else // host fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], vel_onegrid[mfi].dataPtr(), @@ -729,18 +929,18 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; rocfft_status result = rocfft_execution_info_create(&execinfo); - assert_rocfft_status("rocfft_execution_info_create", result); + Assert_rocfft_status("rocfft_execution_info_create", result); std::size_t buffersize = 0; result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); void* buffer = amrex::The_Arena()->alloc(buffersize); result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - assert_rocfft_status("rocfft_execution_info_set_stream", result); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldy[i]->dataPtr()); @@ -748,11 +948,11 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, (void**) &vel_onegrid_ptr, // in (void**) &spectral_field_ptr, // out execinfo); - assert_rocfft_status("rocfft_execute", result); + Assert_rocfft_status("rocfft_execute", result); amrex::Gpu::streamSynchronize(); amrex::The_Arena()->free(buffer); result = rocfft_execution_info_destroy(execinfo); - assert_rocfft_status("rocfft_execution_info_destroy", result); + Assert_rocfft_status("rocfft_execution_info_destroy", result); #else fftw_execute(forward_plan[i]); #endif @@ -815,8 +1015,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, rocfft_transform_type_real_forward, rocfft_precision_double, 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); - built_plan = true; + Assert_rocfft_status("rocfft_plan_create", result); #else // host fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], vel_onegrid[mfi].dataPtr(), @@ -845,18 +1044,18 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; rocfft_status result = rocfft_execution_info_create(&execinfo); - assert_rocfft_status("rocfft_execution_info_create", result); + Assert_rocfft_status("rocfft_execution_info_create", result); std::size_t buffersize = 0; result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); void* buffer = amrex::The_Arena()->alloc(buffersize); result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - assert_rocfft_status("rocfft_execution_info_set_stream", result); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldz[i]->dataPtr()); @@ -864,11 +1063,11 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, (void**) &vel_onegrid_ptr, // in (void**) &spectral_field_ptr, // out execinfo); - assert_rocfft_status("rocfft_execute", result); + Assert_rocfft_status("rocfft_execute", result); amrex::Gpu::streamSynchronize(); amrex::The_Arena()->free(buffer); result = rocfft_execution_info_destroy(execinfo); - assert_rocfft_status("rocfft_execution_info_destroy", result); + Assert_rocfft_status("rocfft_execution_info_destroy", result); #else fftw_execute(forward_plan[i]); #endif @@ -909,7 +1108,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, int ny = n_cells[1]; int nz = n_cells[2]; - Real GxR, GxC, GyR, GyC, GzR, GzC; + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; if (i <= nx/2) { // Gradient Operators @@ -954,12 +1153,12 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, (divC*GzR - divR*GzC) / Lap); spectral_dz(i,j,k) = copy_dz; - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); } }); } @@ -1022,18 +1221,15 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, vel_decomp.ParallelCopy(vel_decomp_onegrid,0,5,1); } vel_decomp.mult(1.0/sqrtnpts); - - -#endif // end heFFTe } +#endif // end heFFTe -#if defined(HEFFTE) -void IntegrateKScalar(const BaseFab >& spectral_field, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const Real& sqrtnpts, - const int& step, - const int& comp) +#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) +void IntegrateKScalarHeffte(const BaseFab >& spectral_field, + const std::string& name, const Real& scaling, + const Box& c_local_box, + const Real& sqrtnpts, + const int& step) { int npts = n_cells[0]/2; @@ -1050,15 +1246,13 @@ void IntegrateKScalar(const BaseFab >& spectral_field, phicnt_ptr[d] = 0; }); - const Array4< const GpuComplex > spectral = spectral_field.const_array(comp,1); + const Array4< const GpuComplex > spectral = spectral_field.const_array(); ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) { if (i <= n_cells[0]/2) { // only half of kx-domain int ki = i; int kj = j; int kk = k; -// if (j >= n_cells[1]/2) kj = n_cells[1]-j; -// if (k >= n_cells[2]/2) kk = n_cells[2]-k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1109,14 +1303,15 @@ void IntegrateKScalar(const BaseFab >& spectral_field, turb.close(); } } -#else +#endif + +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) void IntegrateKScalar(const Vector > > >& spectral_field, const MultiFab& variables_onegrid, const std::string& name, const Real& scaling, const Real& sqrtnpts, - const int& step, - const int& comp) + const int& step) { int npts = n_cells[0]/2; @@ -1147,8 +1342,6 @@ void IntegrateKScalar(const Vector > > int ki = i; int kj = j; int kk = k; -// if (j >= bx.length(1)/2) kj = bx.length(1)-j; -// if (k >= bx.length(2)/2) kk = bx.length(2)-k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1196,11 +1389,13 @@ void IntegrateKScalar(const Vector > > } #endif -#if defined(HEFFTE) -void IntegrateKVelocity(const BaseFab >& spectral_field, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const int& step) +#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) +void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, + const BaseFab >& spectral_fieldy, + const BaseFab >& spectral_fieldz, + const std::string& name, const Real& scaling, + const Box& c_local_box, + const int& step) { int npts = n_cells[0]/2; @@ -1217,17 +1412,15 @@ void IntegrateKVelocity(const BaseFab >& spectral_field, phicnt_ptr[d] = 0; }); - const Array4 > spectralx = spectral_field.const_array(0,1); - const Array4 > spectraly = spectral_field.const_array(1,1); - const Array4 > spectralz = spectral_field.const_array(2,1); + const Array4 > spectralx = spectral_fieldx.const_array(); + const Array4 > spectraly = spectral_fieldy.const_array(); + const Array4 > spectralz = spectral_fieldz.const_array(); ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) { if (i <= n_cells[0]/2) { // only half of kx-domain int ki = i; int kj = j; int kk = k; -// if (j >= n_cells[1]/2) kj = n_cells[1]-j; -// if (k >= n_cells[2]/2) kk = n_cells[2]-k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1286,7 +1479,9 @@ void IntegrateKVelocity(const BaseFab >& spectral_field, turb.close(); } } -#else +#endif + +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) void IntegrateKVelocity(const Vector > > >& spectral_fieldx, const Vector > > >& spectral_fieldy, const Vector > > >& spectral_fieldz, @@ -1324,8 +1519,6 @@ void IntegrateKVelocity(const Vector > int ki = i; int kj = j; int kk = k; -// if (j >= bx.length(1)/2) kj = bx.length(1)-j; -// if (k >= bx.length(2)/2) kk = bx.length(2)-k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1381,9 +1574,7 @@ void IntegrateKVelocity(const Vector > } #endif -#if defined(HEFFTE) -// this function not needed for HEFFTE -#else +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) void InverseFFTVel(Vector > > >& spectral_field, MultiFab& vel_decomp_onegrid, const IntVect& fft_size) { @@ -1414,8 +1605,7 @@ void InverseFFTVel(Vector > > >& spectr rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, rocfft_transform_type_real_inverse, rocfft_precision_double, 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); - built_plan = true; + Assert_rocfft_status("rocfft_plan_create", result); #else // host fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], reinterpret_cast @@ -1444,18 +1634,18 @@ void InverseFFTVel(Vector > > >& spectr #elif AMREX_USE_HIP rocfft_execution_info execinfo = nullptr; rocfft_status result = rocfft_execution_info_create(&execinfo); - assert_rocfft_status("rocfft_execution_info_create", result); + Assert_rocfft_status("rocfft_execution_info_create", result); std::size_t buffersize = 0; result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize); - assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); void* buffer = amrex::The_Arena()->alloc(buffersize); result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - assert_rocfft_status("rocfft_execution_info_set_stream", result); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(); FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); @@ -1463,11 +1653,11 @@ void InverseFFTVel(Vector > > >& spectr (void**) &vel_onegrid_ptr, // in (void**) &spectral_field_ptr, // out execinfo); - assert_rocfft_status("rocfft_execute", result); + Assert_rocfft_status("rocfft_execute", result); amrex::Gpu::streamSynchronize(); amrex::The_Arena()->free(buffer); result = rocfft_execution_info_destroy(execinfo); - assert_rocfft_status("rocfft_execution_info_destroy", result); + Assert_rocfft_status("rocfft_execution_info_destroy", result); #else fftw_execute(backward_plan[i]); #endif diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 10cb72a56..fb3f912dc 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -1133,12 +1133,22 @@ void main_driver(const char* argv) // decomposed velocities Vector< std::string > var_names_turbVel{"vel_total","vel_solenoidal","vel_dilation"}; Real scaling_turb_veldecomp = dVolinv; +#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // heffte + TurbSpectrumVelDecompHeffte(MFTurbVel, vel_decomp, geom, step, scaling_turb_veldecomp, var_names_turbVel); +#endif +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) TurbSpectrumVelDecomp(MFTurbVel, vel_decomp, geom, step, scaling_turb_veldecomp, var_names_turbVel); +#endif // scalars Vector< std::string > var_names_turbScalar{"rho","temp","press"}; Vector scaling_turb_scalar(3, dVolinv); +#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // heffte + TurbSpectrumScalarHeffte(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); +#endif +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) TurbSpectrumScalar(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); +#endif } if (turbForcing > 0) { From d2e0f807ade3e351c851725dae9882e9b28b2e94 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Thu, 12 Oct 2023 11:23:22 -0700 Subject: [PATCH 007/151] minor change --- src_analysis/TurbSpectra.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index f2f308b1c..881f4d4e5 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -152,7 +152,7 @@ void TurbSpectrumScalarHeffte(const MultiFab& variables, void TurbSpectrumScalar(const MultiFab& variables, const amrex::Geometry& geom, const int& step, - const amrex::Vector& var_scaling, + const amrex::Vector& scaling, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); @@ -694,7 +694,7 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, MultiFab& vel_decomp, const amrex::Geometry& geom, const int& step, - const amrex::Real& var_scaling, + const amrex::Real& scaling, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); From 5b7ad925256dd606e8c296343f8e75fe1de0cf00 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Thu, 12 Oct 2023 13:16:06 -0700 Subject: [PATCH 008/151] more changes --- src_analysis/TurbSpectra.cpp | 275 ++++++++--------------------------- 1 file changed, 60 insertions(+), 215 deletions(-) diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 881f4d4e5..8086cc8b9 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -113,10 +113,11 @@ void TurbSpectrumScalarHeffte(const MultiFab& variables, c_local_box.growHi(0,1); } - for (int comp=0; comp > spectral_field(c_local_box, 1, The_Device_Arena()); + MultiFab variables_single(ba, dm, 1, 0); + using heffte_complex = typename heffte::fft_output::type; int r2c_direction = 0; #if defined(HEFFTE_CUFFT) @@ -132,19 +133,14 @@ void TurbSpectrumScalarHeffte(const MultiFab& variables, {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, r2c_direction, ParallelDescriptor::Communicator()); - using heffte_complex = typename heffte::fft_output::type; - heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); - - MultiFab variables_single(ba, dm, 1, 0); - variables_single.ParallelCopy(variables,comp,0,1); - - fft.forward(variables_single[local_boxid].dataPtr(),spectral_data); - - ParallelDescriptor::Barrier(); - - // Integrate spectra over k-shells - IntegrateKScalarHeffte(spectral_field,var_names[comp],scaling[comp],c_local_box,sqrtnpts,step); - } + for (int comp=0; comp > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx BaseFab > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly BaseFab > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz - spectral_field_Tx.setVal(0.0); - spectral_field_Ty.setVal(0.0); - spectral_field_Tz.setVal(0.0); - spectral_field_Sx.setVal(0.0); - spectral_field_Sy.setVal(0.0); - spectral_field_Sz.setVal(0.0); - spectral_field_Dx.setVal(0.0); - spectral_field_Dy.setVal(0.0); - spectral_field_Dz.setVal(0.0); + MultiFab vel_single(ba, dm, 1, 0); + + int r2c_direction = 0; +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); // ForwardTransform // X using heffte_complex = typename heffte::fft_output::type; { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_single(ba, dm, 1, 0); - vel_single.ParallelCopy(vel, 0, 0, 1); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - ParallelDescriptor::Barrier(); + vel_single.ParallelCopy(vel, 0, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); } // Y { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_single(ba, dm, 1, 0); - vel_single.ParallelCopy(vel, 1, 0, 1); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - ParallelDescriptor::Barrier(); + vel_single.ParallelCopy(vel, 1, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); } // Z { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_single(ba, dm, 1, 0); - vel_single.ParallelCopy(vel, 2, 0, 1); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - ParallelDescriptor::Barrier(); + vel_single.ParallelCopy(vel, 2, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); } // Decompose velocity field into solenoidal and dilatational @@ -539,153 +490,47 @@ void TurbSpectrumVelDecompHeffte(const MultiFab& vel, IntegrateKVelocityHeffte(spectral_field_Sx,spectral_field_Sy,spectral_field_Sz,"vel_solenoidal",scaling,c_local_box,step); IntegrateKVelocityHeffte(spectral_field_Dx,spectral_field_Dy,spectral_field_Dz,"vel_dilational",scaling,c_local_box,step); + MultiFab vel_decomp_single(ba, dm, 1, 0); // inverse Fourier transform solenoidal and dilatational components { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_decomp_single(ba, dm, 1, 0); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - ParallelDescriptor::Barrier(); - - vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); - + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + ParallelDescriptor::Barrier(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); } { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_decomp_single(ba, dm, 1, 0); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - ParallelDescriptor::Barrier(); - - vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); - + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + ParallelDescriptor::Barrier(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); } { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_decomp_single(ba, dm, 1, 0); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - ParallelDescriptor::Barrier(); - - vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); - + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + ParallelDescriptor::Barrier(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); } { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_decomp_single(ba, dm, 1, 0); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - ParallelDescriptor::Barrier(); - - vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); - + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + ParallelDescriptor::Barrier(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); } { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_decomp_single(ba, dm, 1, 0); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - ParallelDescriptor::Barrier(); - - vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); - + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + ParallelDescriptor::Barrier(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); } { - int r2c_direction = 0; -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - MultiFab vel_decomp_single(ba, dm, 1, 0); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - ParallelDescriptor::Barrier(); - - vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); - + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + ParallelDescriptor::Barrier(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); } vel_decomp.mult(1.0/sqrtnpts); + } #endif From c6e97baa99f42aed59d6989891769ca80395e268 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Thu, 26 Oct 2023 21:27:28 -0700 Subject: [PATCH 009/151] heffte seems to be working now --- src_analysis/TurbSpectra.cpp | 201 +++++++++++++++++++++++++++-------- 1 file changed, 156 insertions(+), 45 deletions(-) diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 8086cc8b9..e7ca97431 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -120,6 +120,7 @@ void TurbSpectrumScalarHeffte(const MultiFab& variables, using heffte_complex = typename heffte::fft_output::type; int r2c_direction = 0; + for (int comp=0; comp fft #elif defined(HEFFTE_ROCFFT) @@ -127,19 +128,19 @@ void TurbSpectrumScalarHeffte(const MultiFab& variables, #elif defined(HEFFTE_FFTW) heffte::fft3d_r2c fft #endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - for (int comp=0; comp fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); // ForwardTransform // X using heffte_complex = typename heffte::fft_output::type; { - vel_single.ParallelCopy(vel, 0, 0, 1); +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 0, 0, 1); heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); } // Y { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + vel_single.ParallelCopy(vel, 1, 0, 1); heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); } // Z { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + vel_single.ParallelCopy(vel, 2, 0, 1); heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); } + Gpu::streamSynchronize(); + // Decompose velocity field into solenoidal and dilatational Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(); Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(); @@ -483,7 +513,7 @@ void TurbSpectrumVelDecompHeffte(const MultiFab& vel, }); - ParallelDescriptor::Barrier(); + Gpu::streamSynchronize(); // Integrate K spectrum for velocities IntegrateKVelocityHeffte(spectral_field_Tx,spectral_field_Ty,spectral_field_Tz,"vel_total" ,scaling,c_local_box,step); @@ -493,42 +523,127 @@ void TurbSpectrumVelDecompHeffte(const MultiFab& vel, MultiFab vel_decomp_single(ba, dm, 1, 0); // inverse Fourier transform solenoidal and dilatational components { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - ParallelDescriptor::Barrier(); + + Gpu::streamSynchronize(); vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); } { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - ParallelDescriptor::Barrier(); + + Gpu::streamSynchronize(); vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); } { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - ParallelDescriptor::Barrier(); + + Gpu::streamSynchronize(); vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); } { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - ParallelDescriptor::Barrier(); + + Gpu::streamSynchronize(); vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); } { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - ParallelDescriptor::Barrier(); + + Gpu::streamSynchronize(); vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); } { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - ParallelDescriptor::Barrier(); + + Gpu::streamSynchronize(); vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); } + vel_decomp.mult(1.0/sqrtnpts); } @@ -1108,8 +1223,8 @@ void IntegrateKScalarHeffte(const BaseFab >& spectral_field, Real real = spectral(i,j,k).real(); Real imag = spectral(i,j,k).imag(); Real cov = (1.0/(sqrtnpts*sqrtnpts*scaling))*(real*real + imag*imag); - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); } } else { @@ -1117,12 +1232,10 @@ void IntegrateKScalarHeffte(const BaseFab >& spectral_field, } }); - ParallelDescriptor::Barrier(); + Gpu::streamSynchronize(); - for (int d=1; d >& spectral_fieldx, } }); - ParallelDescriptor::Barrier(); + Gpu::streamSynchronize(); - for (int d=1; d Date: Sat, 28 Oct 2023 18:23:38 -0400 Subject: [PATCH 010/151] get heffte and k-space integration working on frontier --- src_analysis/TurbSpectra.cpp | 204 +++++++++++++++++++++++++---------- 1 file changed, 150 insertions(+), 54 deletions(-) diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index e7ca97431..9265a5e99 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -1194,21 +1194,57 @@ void IntegrateKScalarHeffte(const BaseFab >& spectral_field, { int npts = n_cells[0]/2; - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); +// Gpu::DeviceVector phisum_device(npts); +// Gpu::DeviceVector phicnt_device(npts); Gpu::HostVector phisum_host(npts); - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + Gpu::HostVector phicnt_host(npts); +// Gpu::HostVector phisum_host(npts); +// Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data +// int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); +// amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept +// { +// phisum_ptr[d] = 0.; +// phicnt_ptr[d] = 0; +// }); + for (int d=0; d > spectral = spectral_field.const_array(); - ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) - { +// ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) +// { +// if (i <= n_cells[0]/2) { // only half of kx-domain +// int ki = i; +// int kj = j; +// int kk = k; +// +// Real dist = (ki*ki + kj*kj + kk*kk); +// dist = std::sqrt(dist); +// +// if ( dist <= n_cells[0]/2-0.5) { +// dist = dist+0.5; +// int cell = int(dist); +// Real real = spectral(i,j,k).real(); +// Real imag = spectral(i,j,k).imag(); +// Real cov = (1.0/(sqrtnpts*sqrtnpts*scaling))*(real*real + imag*imag); +// amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov); +// amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); +// } +// } +// else { +// amrex::Abort("i should not exceed n_cells[0]/2"); +// } +// }); + + // Gpu::streamSynchronize(); + + const auto lo = amrex::lbound(c_local_box); + const auto hi = amrex::ubound(c_local_box); + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { if (i <= n_cells[0]/2) { // only half of kx-domain int ki = i; int kj = j; @@ -1223,30 +1259,38 @@ void IntegrateKScalarHeffte(const BaseFab >& spectral_field, Real real = spectral(i,j,k).real(); Real imag = spectral(i,j,k).imag(); Real cov = (1.0/(sqrtnpts*sqrtnpts*scaling))*(real*real + imag*imag); - amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + amrex::HostDevice::Atomic::Add(&(phisum_host[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_host[cell]),1); } - } - else { - amrex::Abort("i should not exceed n_cells[0]/2"); - } - }); + } + else { + amrex::Abort("i should not exceed n_cells[0]/2"); + } + } + } + } - Gpu::streamSynchronize(); + ParallelDescriptor::Barrier(); - ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts); Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { +// amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept +// { +// if (d != 0) { +// phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; +// } +// }); + + for (int d=0; d >& spectral_fieldx, { int npts = n_cells[0]/2; - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); +// Gpu::DeviceVector phisum_device(npts); +// Gpu::DeviceVector phicnt_device(npts); Gpu::HostVector phisum_host(npts); - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + Gpu::HostVector phicnt_host(npts); +// Gpu::HostVector phisum_host(npts); +// Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data +// int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); +// amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept +// { +// phisum_ptr[d] = 0.; +// phicnt_ptr[d] = 0; +// }); + for (int d=0; d > spectralx = spectral_fieldx.const_array(); const Array4 > spectraly = spectral_fieldy.const_array(); const Array4 > spectralz = spectral_fieldz.const_array(); - ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) - { +// ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) +// { +// if (i <= n_cells[0]/2) { // only half of kx-domain +// int ki = i; +// int kj = j; +// int kk = k; +// +// Real dist = (ki*ki + kj*kj + kk*kk); +// dist = std::sqrt(dist); +// +// if ( dist <= n_cells[0]/2-0.5) { +// dist = dist+0.5; +// int cell = int(dist); +// Real real, imag, cov_x, cov_y, cov_z, cov; +// real = spectralx(i,j,k).real(); +// imag = spectralx(i,j,k).imag(); +// cov_x = (1.0/scaling)*(real*real + imag*imag); +// real = spectraly(i,j,k).real(); +// imag = spectraly(i,j,k).imag(); +// cov_y = (1.0/scaling)*(real*real + imag*imag); +// real = spectralz(i,j,k).real(); +// imag = spectralz(i,j,k).imag(); +// cov_z = (1.0/scaling)*(real*real + imag*imag); +// cov = cov_x + cov_y + cov_z; +// amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov); +// amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); +// } +// } +// else { +// amrex::Abort("i should not exceed n_cells[0]/2"); +// } +// }); +// +// Gpu::streamSynchronize(); + + const auto lo = amrex::lbound(c_local_box); + const auto hi = amrex::ubound(c_local_box); + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { if (i <= n_cells[0]/2) { // only half of kx-domain int ki = i; int kj = j; @@ -1397,30 +1485,38 @@ void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, imag = spectralz(i,j,k).imag(); cov_z = (1.0/scaling)*(real*real + imag*imag); cov = cov_x + cov_y + cov_z; - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + amrex::HostDevice::Atomic::Add(&(phisum_host[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_host[cell]),1); } - } - else { - amrex::Abort("i should not exceed n_cells[0]/2"); - } - }); + } + else { + amrex::Abort("i should not exceed n_cells[0]/2"); + } + } + } + } - Gpu::streamSynchronize(); + ParallelDescriptor::Barrier(); - ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts); Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { +// amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept +// { +// if (d != 0) { +// phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; +// } +// }); + + for (int d=0; d Date: Thu, 2 Nov 2023 18:04:16 -0400 Subject: [PATCH 011/151] GPU-aware MPI compilation on Frontier -- ongoing tests --- exec/compressible_stag/build_frontier.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/exec/compressible_stag/build_frontier.sh b/exec/compressible_stag/build_frontier.sh index 3b02bbf8b..a56f6ab31 100755 --- a/exec/compressible_stag/build_frontier.sh +++ b/exec/compressible_stag/build_frontier.sh @@ -2,8 +2,9 @@ ## load necessary modules module load craype-accel-amd-gfx90a -module load rocm/5.2.0 # waiting for 5.6 for next bump -module load cray-mpich +module load amd-mixed +#module load rocm/5.2.0 # waiting for 5.6 for next bump +module load cray-mpich/8.1.23 module load cce/15.0.0 # must be loaded after rocm # GPU-aware MPI @@ -19,5 +20,8 @@ export AMREX_AMD_ARCH=gfx90a ##export CFLAGS="-I${ROCM_PATH}/include" ##export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" ##export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" +export LDFLAGS="-L${MPICH_DIR}/lib -lmpi ${CRAY_XPMEM_POST_LINK_OPTS} -lxpmem ${PE_MPICH_GTL_DIR_amd_gfx90a} ${PE_MPICH_GTL_LIBS_amd_gfx90a}" +export CXXFLAGS="-I${MPICH_DIR}/include" +export HIPFLAGS="--amdgpu-target=gfx90a" -make -j10 USE_CUDA=FALSE USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_ROCFFT=TRUE +make -j10 USE_CUDA=FALSE USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_ROCFFT=TRUE USE_ASSERTION=TRUE From ddf50e995cb0abc09ed5e395f5ac02771992a42a Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Thu, 2 Nov 2023 18:04:42 -0400 Subject: [PATCH 012/151] use native amrex functions for MFab reduction --- src_compressible_stag/DeriveVelProp.cpp | 76 ++++++++++++++++--------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp index e80f3ea92..9de3bf82d 100644 --- a/src_compressible_stag/DeriveVelProp.cpp +++ b/src_compressible_stag/DeriveVelProp.cpp @@ -30,7 +30,6 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, MultiFab ccTemp; MultiFab ccTempA; MultiFab ccTempDiv; - MultiFab eta_kin; // kinematic viscosity std::array< MultiFab, NUM_EDGE > curlU; std::array< MultiFab, NUM_EDGE > eta_edge; std::array< MultiFab, NUM_EDGE > curlUtemp; @@ -42,7 +41,6 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0); ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0); ccTempDiv.define(prim.boxArray(),prim.DistributionMap(),1,0); - eta_kin.define(prim.boxArray(),prim.DistributionMap(),1,ngc); #if (AMREX_SPACEDIM == 3) curlU[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0); curlU[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0); @@ -63,19 +61,6 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, curlUtemp[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0); #endif - // Get Kinematic Viscosity - for ( MFIter mfi(eta,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - // grow the box by ngc - const Box& bx = amrex::grow(mfi.tilebox(), ngc); - const Array4 & eta_kin_fab = eta_kin.array(mfi); - const Array4& eta_fab = eta.array(mfi); - const Array4& prim_fab = prim.array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - eta_kin_fab(i,j,k) = eta_fab(i,j,k) / prim_fab(i,j,k,0); - }); - } - // Setup temp variables Real temp; Vector tempvec(3); @@ -87,14 +72,38 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, Vector eps_s_vec(3); // solenoidal dissipation // turbulent kinetic energy - StagInnerProd(cumom,0,vel,0,macTemp,rhouu); +// StagInnerProd(cumom,0,vel,0,macTemp,rhouu); + { + auto mask = cumom[0].OwnerMask(geom.periodicity()); + rhouu[0] = MultiFab::Dot(cumom[0],0,vel[0],0,1,0); + } + { + auto mask = cumom[1].OwnerMask(geom.periodicity()); + rhouu[1] = MultiFab::Dot(cumom[1],0,vel[1],0,1,0); + } + { + auto mask = cumom[2].OwnerMask(geom.periodicity()); + rhouu[2] = MultiFab::Dot(cumom[2],0,vel[2],0,1,0); + } rhouu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2]; rhouu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0]; rhouu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1]; turbKE = 0.5*( rhouu[0] + rhouu[1] + rhouu[2] ); // RMS velocity - StagInnerProd(vel,0,vel,0,macTemp,uu); +// StagInnerProd(vel,0,vel,0,macTemp,uu); + { + auto mask = vel[0].OwnerMask(geom.periodicity()); + uu[0] = MultiFab::Dot(vel[0],0,vel[0],0,1,0); + } + { + auto mask = vel[1].OwnerMask(geom.periodicity()); + uu[1] = MultiFab::Dot(vel[1],0,vel[1],0,1,0); + } + { + auto mask = vel[2].OwnerMask(geom.periodicity()); + uu[2] = MultiFab::Dot(vel[2],0,vel[2],0,1,0); + } uu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2]; uu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0]; uu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1]; @@ -167,17 +176,30 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, // Compute \omega (curl) ComputeCurlFaceToEdge(vel,curlU,geom); - // Solenoidal dissipation: / - AverageCCToEdge(eta_kin,eta_edge,0,1,SPEC_BC_COMP,geom); + // Solenoidal dissipation: + AverageCCToEdge(eta,eta_edge,0,1,SPEC_BC_COMP,geom); EdgeInnerProd(curlU,0,curlU,0,curlUtemp,tempvec); - EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec); +// EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec); + { + auto mask = curlUtemp[0].OwnerMask(geom.periodicity()); + eps_s_vec[0] = MultiFab::Dot(curlUtemp[0],0,eta_edge[0],0,1,0); + } + { + auto mask = curlUtemp[1].OwnerMask(geom.periodicity()); + eps_s_vec[1] = MultiFab::Dot(curlUtemp[1],0,eta_edge[1],0,1,0); + } + { + auto mask = curlUtemp[2].OwnerMask(geom.periodicity()); + eps_s_vec[2] = MultiFab::Dot(curlUtemp[2],0,eta_edge[2],0,1,0); + } eps_s_vec[0] /= (n_cells[0]+1)*(n_cells[1]+1)*n_cells[2]; eps_s_vec[1] /= (n_cells[0]+1)*(n_cells[2]+1)*n_cells[1]; eps_s_vec[2] /= (n_cells[1]+1)*(n_cells[2]+1)*n_cells[0]; eps_s = (eps_s_vec[0] + eps_s_vec[1] + eps_s_vec[2]); - // Dilational dissipation (4/3)*/ - CCInnerProd(ccTempDiv,0,eta_kin,0,ccTemp,eps_d); + // Dilational dissipation (4/3)* +// CCInnerProd(ccTempDiv,0,eta,0,ccTemp,eps_d); + eps_d = MultiFab::Dot(eta, 0, ccTempDiv, 0, 1, 0); eps_d *= dProb*(4.0/3.0); // Ratio of Dilational to Solenoidal dissipation @@ -185,10 +207,12 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, Real eps_t = eps_s + eps_d; // Kolmogorov scales - Real eta_kin_avg = ComputeSpatialMean(eta_kin, 0); - kolm_s = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_s),0.25); - kolm_d = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_d),0.25); - kolm_t = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_t),0.25); + kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_s)),0.25); + kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_d)),0.25); + kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_t)),0.25); +// kolm_s = pow((eta_avg*eta_avg*eta_avg/eps_s),0.25); +// kolm_d = pow((eta_avg*eta_avg*eta_avg/eps_d),0.25); +// kolm_t = pow((eta_avg*eta_avg*eta_avg/eps_t),0.25); } #endif From 0abe9cd69db30b61f5f91d4b9199827a94c8fbd1 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Thu, 2 Nov 2023 18:05:15 -0400 Subject: [PATCH 013/151] reading checkpoint does not require old box array --- src_compressible_stag/Checkpoint.cpp | 41 ++++++++++++++-------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src_compressible_stag/Checkpoint.cpp b/src_compressible_stag/Checkpoint.cpp index e298af6ce..900b60299 100644 --- a/src_compressible_stag/Checkpoint.cpp +++ b/src_compressible_stag/Checkpoint.cpp @@ -1384,28 +1384,29 @@ void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, const std BoxArray& ba_old, DistributionMapping& dmap_old, int NVARS, int ghost, int nodal_flag) { - // define temporary MF - MultiFab mf_temp; - if (nodal_flag < 0) { - if (ghost) { - mf_temp.define(ba_old,dmap_old,NVARS,ngc); - } - else { - mf_temp.define(ba_old,dmap_old,NVARS,0); - } - - } - else { - if (ghost) { - mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,ngc); - } - else { - mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,0); - } - - } + //// define temporary MF + //MultiFab mf_temp; + //if (nodal_flag < 0) { + // if (ghost) { + // mf_temp.define(ba_old,dmap_old,NVARS,ngc); + // } + // else { + // mf_temp.define(ba_old,dmap_old,NVARS,0); + // } + + //} + //else { + // if (ghost) { + // mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,ngc); + // } + // else { + // mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,0); + // } + + //} // Read into temporary MF from file + MultiFab mf_temp; VisMF::Read(mf_temp,amrex::MultiFabFileFullPrefix(0, checkpointname, "Level_", mf_name)); // Copy temporary MF into the new MF From 124e761d974f7fd67bd49f3253da54011f429e16 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Thu, 2 Nov 2023 18:16:34 -0400 Subject: [PATCH 014/151] fix typo --- src_compressible_stag/DeriveVelProp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp index 9de3bf82d..34a0c1ab6 100644 --- a/src_compressible_stag/DeriveVelProp.cpp +++ b/src_compressible_stag/DeriveVelProp.cpp @@ -208,8 +208,8 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, // Kolmogorov scales kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_s)),0.25); - kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_d)),0.25); - kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_t)),0.25); + kolm_d = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_d)),0.25); + kolm_t = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_t)),0.25); // kolm_s = pow((eta_avg*eta_avg*eta_avg/eps_s),0.25); // kolm_d = pow((eta_avg*eta_avg*eta_avg/eps_d),0.25); // kolm_t = pow((eta_avg*eta_avg*eta_avg/eps_t),0.25); From 37bd3c3adc6cdbbc1979600426de3a246a0e60f3 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Fri, 3 Nov 2023 16:54:55 -0400 Subject: [PATCH 015/151] recompute transport coeff. when restarting for turbulent FFTs --- src_compressible_stag/main_driver.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index fb3f912dc..139ca23ab 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -947,6 +947,9 @@ void main_driver(const char* argv) RK3stepStag(cu, cumom, prim, vel, source, eta, zeta, kappa, chi, D, faceflux, edgeflux_x, edgeflux_y, edgeflux_z, cenflux, ranchem, geom, dt, step, turbforce); } + else { + calculateTransportCoeffs(prim, eta, zeta, kappa, chi, D); + } // update surface chemistry (via either surfchem_mui or MFsurfchem) #if defined(MUI) || defined(USE_AMREX_MPMD) From a7d900485ff0b0b85051a2d5a590bbc977dd147e Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Sun, 5 Nov 2023 12:45:24 -0800 Subject: [PATCH 016/151] do reductions on MFab and not BaseFab --- src_analysis/TurbSpectra.H | 30 +- src_analysis/TurbSpectra.cpp | 534 ++++++++++++++++++++++------------- 2 files changed, 350 insertions(+), 214 deletions(-) diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H index 4abf0bfcc..69e2cc77b 100644 --- a/src_analysis/TurbSpectra.H +++ b/src_analysis/TurbSpectra.H @@ -47,17 +47,25 @@ void Assert_rocfft_status (std::string const& name, rocfft_status status); #endif #if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) -void IntegrateKScalarHeffte(const BaseFab >& spectral_field, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const Real& sqrtnpts, - const int& step); -void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, - const BaseFab >& spectral_fieldy, - const BaseFab >& spectral_fieldz, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const int& step); +void IntegrateKScalarHeffte(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const Real& sqrtnpts, +// const int& step); +void IntegrateKVelocityHeffte(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, +// const BaseFab >& spectral_fieldy, +// const BaseFab >& spectral_fieldz, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const int& step); void TurbSpectrumScalarHeffte(const MultiFab& variables, const amrex::Geometry& geom, const int& step, diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 9265a5e99..3e6689fab 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -66,9 +66,12 @@ void TurbSpectrumScalarHeffte(const MultiFab& variables, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), + "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), + "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, + "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); int ncomp = variables.nComp(); @@ -113,35 +116,84 @@ void TurbSpectrumScalarHeffte(const MultiFab& variables, c_local_box.growHi(0,1); } + // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB + // create a BoxArray containing the fft boxes + // by construction, these boxes correlate to the associated spectral_data + // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank + BoxArray fft_ba; + { + BoxList bl; + bl.reserve(ba.size()); + + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + + Box r_box = b; + Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); + + // this avoids overlap for the cases when one or more r_box's + // have an even cell index in the hi-x cell + if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { + c_box.setBig(0,c_box.bigEnd(0)-1); + } + + // increase the size of boxes touching the hi-x domain by 1 in x + // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing + if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_box.growHi(0,1); + } + bl.push_back(c_box); + + } + fft_ba.define(std::move(bl)); + } + MultiFab cov(fft_ba, dm, ncomp, 0); - // each MPI rank gets storage for its piece of the fft - BaseFab > spectral_field(c_local_box, 1, The_Device_Arena()); - MultiFab variables_single(ba, dm, 1, 0); - using heffte_complex = typename heffte::fft_output::type; - - int r2c_direction = 0; - for (int comp=0; comp > spectral_field(c_local_box, 1, The_Device_Arena()); + MultiFab variables_single(ba, dm, 1, 0); + using heffte_complex = typename heffte::fft_output::type; + + int r2c_direction = 0; + for (int comp=0; comp fft + heffte::fft3d_r2c fft #elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft + heffte::fft3d_r2c fft #elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft + heffte::fft3d_r2c fft #endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); - variables_single.ParallelCopy(variables,comp,0,1); - fft.forward(variables_single[local_boxid].dataPtr(),spectral_data); - Gpu::streamSynchronize(); - - // Integrate spectra over k-shells - IntegrateKScalarHeffte(spectral_field,var_names[comp],scaling[comp],c_local_box,sqrtnpts,step); - } + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + variables_single.ParallelCopy(variables,comp,0,1); + fft.forward(variables_single[local_boxid].dataPtr(),spectral_data); + Gpu::streamSynchronize(); + + // Fill in the covariance multifab + int comp_gpu = comp; + Real sqrtnpts_gpu = sqrtnpts; + Real scaling_i_gpu = scaling[comp]; + std::string name_gpu = var_names[comp]; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spectral = spectral_field.const_array(); + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re = spectral(i,j,k).real(); + Real im = spectral(i,j,k).imag(); + data(i,j,k,comp_gpu) = (re*re + im*im)/(sqrtnpts_gpu*sqrtnpts_gpu*scaling_i_gpu); + }); + } + + // Integrate spectra over k-shells + IntegrateKScalarHeffte(cov,name_gpu,step,comp_gpu); + } } #endif @@ -314,9 +366,12 @@ void TurbSpectrumVelDecompHeffte(const MultiFab& vel, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, + "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, + "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); const GpuArray dx = geom.CellSizeArray(); @@ -514,11 +569,93 @@ void TurbSpectrumVelDecompHeffte(const MultiFab& vel, }); Gpu::streamSynchronize(); + + // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB + // create a BoxArray containing the fft boxes + // by construction, these boxes correlate to the associated spectral_data + // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank + BoxArray fft_ba; + { + BoxList bl; + bl.reserve(ba.size()); + + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + + Box r_box = b; + Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); + + // this avoids overlap for the cases when one or more r_box's + // have an even cell index in the hi-x cell + if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { + c_box.setBig(0,c_box.bigEnd(0)-1); + } + + // increase the size of boxes touching the hi-x domain by 1 in x + // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing + if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_box.growHi(0,1); + } + bl.push_back(c_box); + + } + fft_ba.define(std::move(bl)); + } + MultiFab cov(fft_ba, dm, 3, 0); // total, solenoidal, dilatational + + // Fill in the covariance multifab + Real sqrtnpts_gpu = sqrtnpts; + Real scaling_gpu = scaling; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spec_tx = spectral_field_Tx.const_array(); + Array4 > spec_ty = spectral_field_Ty.const_array(); + Array4 > spec_tz = spectral_field_Tz.const_array(); + Array4 > spec_sx = spectral_field_Sx.const_array(); + Array4 > spec_sy = spectral_field_Sy.const_array(); + Array4 > spec_sz = spectral_field_Sz.const_array(); + Array4 > spec_dx = spectral_field_Dx.const_array(); + Array4 > spec_dy = spectral_field_Dy.const_array(); + Array4 > spec_dz = spectral_field_Dz.const_array(); + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re_x, re_y, re_z, im_x, im_y, im_z; + + re_x = spec_tx(i,j,k).real(); + im_x = spec_tx(i,j,k).imag(); + re_y = spec_ty(i,j,k).real(); + im_y = spec_ty(i,j,k).imag(); + re_z = spec_tz(i,j,k).real(); + im_z = spec_tz(i,j,k).imag(); + data(i,j,k,0) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_sx(i,j,k).real(); + im_x = spec_sx(i,j,k).imag(); + re_y = spec_sy(i,j,k).real(); + im_y = spec_sy(i,j,k).imag(); + re_z = spec_sz(i,j,k).real(); + im_z = spec_sz(i,j,k).imag(); + data(i,j,k,1) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_dx(i,j,k).real(); + im_x = spec_dx(i,j,k).imag(); + re_y = spec_dy(i,j,k).real(); + im_y = spec_dy(i,j,k).imag(); + re_z = spec_dz(i,j,k).real(); + im_z = spec_dz(i,j,k).imag(); + data(i,j,k,2) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + }); + } // Integrate K spectrum for velocities - IntegrateKVelocityHeffte(spectral_field_Tx,spectral_field_Ty,spectral_field_Tz,"vel_total" ,scaling,c_local_box,step); - IntegrateKVelocityHeffte(spectral_field_Sx,spectral_field_Sy,spectral_field_Sz,"vel_solenoidal",scaling,c_local_box,step); - IntegrateKVelocityHeffte(spectral_field_Dx,spectral_field_Dy,spectral_field_Dz,"vel_dilational",scaling,c_local_box,step); + IntegrateKVelocityHeffte(cov,"vel_total" ,step,0); + IntegrateKVelocityHeffte(cov,"vel_solenoidal",step,1); + IntegrateKVelocityHeffte(cov,"vel_dilational",step,2); MultiFab vel_decomp_single(ba, dm, 1, 0); // inverse Fourier transform solenoidal and dilatational components @@ -658,8 +795,10 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, const amrex::Vector< std::string >& var_names) { BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, + "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); const GpuArray dx = geom.CellSizeArray(); long npts; @@ -1185,36 +1324,66 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, #endif // end heFFTe #if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) -void IntegrateKScalarHeffte(const BaseFab >& spectral_field, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const Real& sqrtnpts, - const int& step) +void IntegrateKScalarHeffte(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp) { int npts = n_cells[0]/2; -// Gpu::DeviceVector phisum_device(npts); -// Gpu::DeviceVector phicnt_device(npts); - Gpu::HostVector phisum_host(npts); - Gpu::HostVector phicnt_host(npts); + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); // Gpu::HostVector phisum_host(npts); -// Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data -// int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data +// Gpu::HostVector phicnt_host(npts); + + Gpu::HostVector phisum_host(npts); -// amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept -// { -// phisum_ptr[d] = 0.; -// phicnt_ptr[d] = 0; -// }); - for (int d=0; d & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int ki = i; + int kj = j; + int kk = k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + }); } + + Gpu::streamSynchronize(); - const Array4< const GpuComplex > spectral = spectral_field.const_array(); -// ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) -// { +// const auto lo = amrex::lbound(c_local_box); +// const auto hi = amrex::ubound(c_local_box); +// for (auto k = lo.z; k <= hi.z; ++k) { +// for (auto j = lo.y; j <= hi.y; ++j) { +// for (auto i = lo.x; i <= hi.x; ++i) { // if (i <= n_cells[0]/2) { // only half of kx-domain // int ki = i; // int kj = j; @@ -1229,68 +1398,38 @@ void IntegrateKScalarHeffte(const BaseFab >& spectral_field, // Real real = spectral(i,j,k).real(); // Real imag = spectral(i,j,k).imag(); // Real cov = (1.0/(sqrtnpts*sqrtnpts*scaling))*(real*real + imag*imag); -// amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov); -// amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); +// amrex::HostDevice::Atomic::Add(&(phisum_host[cell]), cov); +// amrex::HostDevice::Atomic::Add(&(phicnt_host[cell]),1); // } -// } -// else { -// amrex::Abort("i should not exceed n_cells[0]/2"); -// } -// }); - - // Gpu::streamSynchronize(); - - const auto lo = amrex::lbound(c_local_box); - const auto hi = amrex::ubound(c_local_box); - for (auto k = lo.z; k <= hi.z; ++k) { - for (auto j = lo.y; j <= hi.y; ++j) { - for (auto i = lo.x; i <= hi.x; ++i) { - if (i <= n_cells[0]/2) { // only half of kx-domain - int ki = i; - int kj = j; - int kk = k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real = spectral(i,j,k).real(); - Real imag = spectral(i,j,k).imag(); - Real cov = (1.0/(sqrtnpts*sqrtnpts*scaling))*(real*real + imag*imag); - amrex::HostDevice::Atomic::Add(&(phisum_host[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_host[cell]),1); - } - } - else { - amrex::Abort("i should not exceed n_cells[0]/2"); - } - } - } - } - - ParallelDescriptor::Barrier(); +// } +// else { +// amrex::Abort("i should not exceed n_cells[0]/2"); +// } +// } +// } +// } +// +// ParallelDescriptor::Barrier(); - ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts); + ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); Real dk = 1.; -// amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept -// { -// if (d != 0) { -// phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; -// } -// }); - - for (int d=0; d > > const Box& bx = mfi.tilebox(); - const Array4 > spectral = (*spectral_field[0]).const_array(); + const Array4 > spectral = (*spectral_field[0]).const_array(mfi); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -1392,39 +1531,66 @@ void IntegrateKScalar(const Vector > > #endif #if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) -void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, - const BaseFab >& spectral_fieldy, - const BaseFab >& spectral_fieldz, - const std::string& name, const Real& scaling, - const Box& c_local_box, - const int& step) +void IntegrateKVelocityHeffte(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp) { int npts = n_cells[0]/2; -// Gpu::DeviceVector phisum_device(npts); -// Gpu::DeviceVector phicnt_device(npts); - Gpu::HostVector phisum_host(npts); - Gpu::HostVector phicnt_host(npts); + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); // Gpu::HostVector phisum_host(npts); -// Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data -// int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data +// Gpu::HostVector phicnt_host(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data -// amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept -// { -// phisum_ptr[d] = 0.; -// phicnt_ptr[d] = 0; -// }); - for (int d=0; d & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int ki = i; + int kj = j; + int kk = k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + }); } + + Gpu::streamSynchronize(); - const Array4 > spectralx = spectral_fieldx.const_array(); - const Array4 > spectraly = spectral_fieldy.const_array(); - const Array4 > spectralz = spectral_fieldz.const_array(); -// ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) -// { +// const auto lo = amrex::lbound(c_local_box); +// const auto hi = amrex::ubound(c_local_box); +// for (auto k = lo.z; k <= hi.z; ++k) { +// for (auto j = lo.y; j <= hi.y; ++j) { +// for (auto i = lo.x; i <= hi.x; ++i) { // if (i <= n_cells[0]/2) { // only half of kx-domain // int ki = i; // int kj = j; @@ -1447,76 +1613,38 @@ void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, // imag = spectralz(i,j,k).imag(); // cov_z = (1.0/scaling)*(real*real + imag*imag); // cov = cov_x + cov_y + cov_z; -// amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov); -// amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); +// amrex::HostDevice::Atomic::Add(&(phisum_host[cell]), cov); +// amrex::HostDevice::Atomic::Add(&(phicnt_host[cell]),1); // } -// } -// else { -// amrex::Abort("i should not exceed n_cells[0]/2"); -// } -// }); +// } +// else { +// amrex::Abort("i should not exceed n_cells[0]/2"); +// } +// } +// } +// } // -// Gpu::streamSynchronize(); - - const auto lo = amrex::lbound(c_local_box); - const auto hi = amrex::ubound(c_local_box); - for (auto k = lo.z; k <= hi.z; ++k) { - for (auto j = lo.y; j <= hi.y; ++j) { - for (auto i = lo.x; i <= hi.x; ++i) { - if (i <= n_cells[0]/2) { // only half of kx-domain - int ki = i; - int kj = j; - int kk = k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); +// ParallelDescriptor::Barrier(); - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real, imag, cov_x, cov_y, cov_z, cov; - real = spectralx(i,j,k).real(); - imag = spectralx(i,j,k).imag(); - cov_x = (1.0/scaling)*(real*real + imag*imag); - real = spectraly(i,j,k).real(); - imag = spectraly(i,j,k).imag(); - cov_y = (1.0/scaling)*(real*real + imag*imag); - real = spectralz(i,j,k).real(); - imag = spectralz(i,j,k).imag(); - cov_z = (1.0/scaling)*(real*real + imag*imag); - cov = cov_x + cov_y + cov_z; - amrex::HostDevice::Atomic::Add(&(phisum_host[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_host[cell]),1); - } - } - else { - amrex::Abort("i should not exceed n_cells[0]/2"); - } - } - } - } - - ParallelDescriptor::Barrier(); - - ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts); + ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); Real dk = 1.; -// amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept -// { -// if (d != 0) { -// phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; -// } -// }); - - for (int d=0; d > const Box& bx = mfi.tilebox(); - const Array4 > spectralx = (*spectral_fieldx[0]).const_array(); - const Array4 > spectraly = (*spectral_fieldy[0]).const_array(); - const Array4 > spectralz = (*spectral_fieldz[0]).const_array(); + const Array4 > spectralx = (*spectral_fieldx[0]).const_array(mfi); + const Array4 > spectraly = (*spectral_fieldy[0]).const_array(mfi); + const Array4 > spectralz = (*spectral_fieldz[0]).const_array(mfi); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { From 2a39ddb9f125bcddf71bc9c9d6f1dcba039fa298 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 6 Nov 2023 17:10:39 -0800 Subject: [PATCH 017/151] disjoining pressure --- exec/thinFilm/main_driver.cpp | 42 +++++++++++++++++++++++----- exec/thinFilm/thinfilm_functions.cpp | 4 ++- exec/thinFilm/thinfilm_namespace.H | 1 + 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/exec/thinFilm/main_driver.cpp b/exec/thinFilm/main_driver.cpp index df1150845..614cc4ce8 100644 --- a/exec/thinFilm/main_driver.cpp +++ b/exec/thinFilm/main_driver.cpp @@ -135,9 +135,11 @@ void main_driver(const char* argv) // define DistributionMapping dmap.define(ba); - MultiFab height(ba, dmap, 1, 1); - MultiFab Laph (ba, dmap, 1, 1); + MultiFab height (ba, dmap, 1, 1); + MultiFab Laph (ba, dmap, 1, 1); + MultiFab disjoining(ba, dmap, 1, 1); Laph.setVal(0.); // prevent intermediate NaN calculations behind physical boundaries + disjoining.setVal(0.); // for statsitics @@ -169,6 +171,11 @@ void main_driver(const char* argv) gradLaph[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);, gradLaph[2].define(convert(ba,nodal_flag_z), dmap, 1, 0);); + std::array< MultiFab, AMREX_SPACEDIM > gradDisjoining; + AMREX_D_TERM(gradDisjoining[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);, + gradDisjoining[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);, + gradDisjoining[2].define(convert(ba,nodal_flag_z), dmap, 1, 0);); + std::array< MultiFab, AMREX_SPACEDIM > flux; AMREX_D_TERM(flux[0] .define(convert(ba,nodal_flag_x), dmap, 1, 0);, flux[1] .define(convert(ba,nodal_flag_y), dmap, 1, 0);, @@ -205,6 +212,7 @@ void main_driver(const char* argv) // constant factor in noise term Real ConstNoise = 2.*k_B*T_init[0] / (3.*visc_coef); Real Const3dx = thinfilm_gamma / (3.*visc_coef); + Real Const3dx_nogamma = 1. / (3.*visc_coef); Real time = 0.; @@ -301,7 +309,7 @@ void main_driver(const char* argv) AMREX_D_TERM(const Array4 & gradhx = gradh[0].array(mfi);, const Array4 & gradhy = gradh[1].array(mfi);, const Array4 & gradhz = gradh[2].array(mfi);); - + const Array4 & h = height.array(mfi); amrex::ParallelFor(bx_x, bx_y, @@ -365,12 +373,16 @@ void main_driver(const char* argv) } - // compute Laph + // compute Laph and disjoining for ( MFIter mfi(Laph,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& bx = mfi.tilebox(); const Array4 & L = Laph.array(mfi); + + const Array4 & h = height.array(mfi); + + const Array4 & Disjoining = disjoining.array(mfi); AMREX_D_TERM(const Array4 & gradhx = gradh[0].array(mfi);, const Array4 & gradhy = gradh[1].array(mfi);, @@ -380,11 +392,13 @@ void main_driver(const char* argv) { L(i,j,k) = x_flux_fac * (gradhx(i+1,j,k) - gradhx(i,j,k)) / dx[0] + y_flux_fac * (gradhy(i,j+1,k) - gradhy(i,j,k)) / dx[1]; + + Disjoining(i,j,k) = thinfilm_hamaker / (6.*M_PI*std::pow(h(i,j,k),3.)); }); } Laph.FillBoundary(geom.periodicity()); - // compute gradLaph + // compute gradLaph and gradDisjoining for ( MFIter mfi(height,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);, @@ -394,17 +408,25 @@ void main_driver(const char* argv) AMREX_D_TERM(const Array4 & gradLaphx = gradLaph[0].array(mfi);, const Array4 & gradLaphy = gradLaph[1].array(mfi);, const Array4 & gradLaphz = gradLaph[2].array(mfi);); + + AMREX_D_TERM(const Array4 & gradDisjoiningx = gradDisjoining[0].array(mfi);, + const Array4 & gradDisjoiningy = gradDisjoining[1].array(mfi);, + const Array4 & gradDisjoiningz = gradDisjoining[2].array(mfi);); const Array4 & L = Laph.array(mfi); + const Array4 & Disjoining = disjoining.array(mfi); + amrex::ParallelFor(bx_x, bx_y, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { gradLaphx(i,j,k) = ( L(i,j,k) - L(i-1,j,k) ) / dx[0]; + gradDisjoiningx(i,j,k) = ( Disjoining(i,j,k) -Disjoining(i-1,j,k) ) / dx[0]; }, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { gradLaphy(i,j,k) = ( L(i,j,k) - L(i,j-1,k) ) / dx[1]; + gradDisjoiningy(i,j,k) = ( Disjoining(i,j,k) -Disjoining(i,j-1,k) ) / dx[1]; }); } @@ -431,18 +453,24 @@ void main_driver(const char* argv) const Array4 & randfacey = randface[1].array(mfi);, const Array4 & randfacez = randface[2].array(mfi);); + AMREX_D_TERM(const Array4 & gradDisjoiningx = gradDisjoining[0].array(mfi);, + const Array4 & gradDisjoiningy = gradDisjoining[1].array(mfi);, + const Array4 & gradDisjoiningz = gradDisjoining[2].array(mfi);); + amrex::ParallelFor(bx_x, bx_y, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { fluxx(i,j,k) = x_flux_fac * ( std::sqrt(ConstNoise*std::pow(hfacex(i,j,k),3.) / (dt*dVol)) * randfacex(i,j,k) - + Const3dx * std::pow(hfacex(i,j,k),3.)*gradLaphx(i,j,k) ); + + Const3dx * std::pow(hfacex(i,j,k),3.)*gradLaphx(i,j,k) + + Const3dx_nogamma * std::pow(hfacex(i,j,k),3.)*gradDisjoiningx(i,j,k)); }, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { fluxy(i,j,k) = y_flux_fac * ( std::sqrt(ConstNoise*std::pow(hfacey(i,j,k),3.) / (dt*dVol)) * randfacey(i,j,k) - + Const3dx * std::pow(hfacey(i,j,k),3.)*gradLaphy(i,j,k) ); + + Const3dx * std::pow(hfacey(i,j,k),3.)*gradLaphy(i,j,k) + + Const3dx_nogamma * std::pow(hfacex(i,j,k),3.)*gradDisjoiningy(i,j,k) ); }); // lo x-faces diff --git a/exec/thinFilm/thinfilm_functions.cpp b/exec/thinFilm/thinfilm_functions.cpp index 3a4917d20..2c466f321 100644 --- a/exec/thinFilm/thinfilm_functions.cpp +++ b/exec/thinFilm/thinfilm_functions.cpp @@ -5,6 +5,7 @@ AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_h0; AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_gamma; AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_pertamp; +AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_hamaker; AMREX_GPU_MANAGED int thinfilm::thinfilm_icorr; AMREX_GPU_MANAGED int thinfilm::thinfilm_jcorr; @@ -19,6 +20,7 @@ void InitializeThinfilmNamespace() { thinfilm_icorr = 0; thinfilm_jcorr = 0; thinfilm_pertamp = 0.; + thinfilm_hamaker = 0.; do_fft_diag = 1; @@ -26,8 +28,8 @@ void InitializeThinfilmNamespace() { pp.get("thinfilm_h0",thinfilm_h0); pp.get("thinfilm_gamma",thinfilm_gamma); - pp.query("thinfilm_pertamp",thinfilm_pertamp); + pp.query("thinfilm_hamaker",thinfilm_hamaker); pp.query("thinfilm_icorr",thinfilm_icorr); pp.query("thinfilm_jcorr",thinfilm_jcorr); diff --git a/exec/thinFilm/thinfilm_namespace.H b/exec/thinFilm/thinfilm_namespace.H index 59091542f..16f3afefe 100644 --- a/exec/thinFilm/thinfilm_namespace.H +++ b/exec/thinFilm/thinfilm_namespace.H @@ -3,6 +3,7 @@ namespace thinfilm { extern AMREX_GPU_MANAGED amrex::Real thinfilm_h0; extern AMREX_GPU_MANAGED amrex::Real thinfilm_gamma; extern AMREX_GPU_MANAGED amrex::Real thinfilm_pertamp; + extern AMREX_GPU_MANAGED amrex::Real thinfilm_hamaker; extern AMREX_GPU_MANAGED int thinfilm_icorr; extern AMREX_GPU_MANAGED int thinfilm_jcorr; From 9dff1494157e7dade625d33cf5b5e4fa7a0d05de Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 6 Nov 2023 17:13:44 -0800 Subject: [PATCH 018/151] readability --- exec/thinFilm/main_driver.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exec/thinFilm/main_driver.cpp b/exec/thinFilm/main_driver.cpp index 614cc4ce8..f6b78432c 100644 --- a/exec/thinFilm/main_driver.cpp +++ b/exec/thinFilm/main_driver.cpp @@ -135,8 +135,8 @@ void main_driver(const char* argv) // define DistributionMapping dmap.define(ba); - MultiFab height (ba, dmap, 1, 1); - MultiFab Laph (ba, dmap, 1, 1); + MultiFab height(ba, dmap, 1, 1); + MultiFab Laph(ba, dmap, 1, 1); MultiFab disjoining(ba, dmap, 1, 1); Laph.setVal(0.); // prevent intermediate NaN calculations behind physical boundaries disjoining.setVal(0.); @@ -462,14 +462,14 @@ void main_driver(const char* argv) { fluxx(i,j,k) = x_flux_fac * ( std::sqrt(ConstNoise*std::pow(hfacex(i,j,k),3.) / (dt*dVol)) * randfacex(i,j,k) - + Const3dx * std::pow(hfacex(i,j,k),3.)*gradLaphx(i,j,k) + + Const3dx * std::pow(hfacex(i,j,k),3.)*gradLaphx(i,j,k) + Const3dx_nogamma * std::pow(hfacex(i,j,k),3.)*gradDisjoiningx(i,j,k)); }, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { fluxy(i,j,k) = y_flux_fac * ( std::sqrt(ConstNoise*std::pow(hfacey(i,j,k),3.) / (dt*dVol)) * randfacey(i,j,k) - + Const3dx * std::pow(hfacey(i,j,k),3.)*gradLaphy(i,j,k) + + Const3dx * std::pow(hfacey(i,j,k),3.)*gradLaphy(i,j,k) + Const3dx_nogamma * std::pow(hfacex(i,j,k),3.)*gradDisjoiningy(i,j,k) ); }); From f7bc077ac88102a790cc602d9606dd6d0a79c9e2 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 6 Nov 2023 17:15:17 -0800 Subject: [PATCH 019/151] more readability --- exec/thinFilm/main_driver.cpp | 2 +- exec/thinFilm/thinfilm_functions.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/exec/thinFilm/main_driver.cpp b/exec/thinFilm/main_driver.cpp index f6b78432c..d2b2e4929 100644 --- a/exec/thinFilm/main_driver.cpp +++ b/exec/thinFilm/main_driver.cpp @@ -309,7 +309,7 @@ void main_driver(const char* argv) AMREX_D_TERM(const Array4 & gradhx = gradh[0].array(mfi);, const Array4 & gradhy = gradh[1].array(mfi);, const Array4 & gradhz = gradh[2].array(mfi);); - + const Array4 & h = height.array(mfi); amrex::ParallelFor(bx_x, bx_y, diff --git a/exec/thinFilm/thinfilm_functions.cpp b/exec/thinFilm/thinfilm_functions.cpp index 2c466f321..dde70c0b9 100644 --- a/exec/thinFilm/thinfilm_functions.cpp +++ b/exec/thinFilm/thinfilm_functions.cpp @@ -28,6 +28,7 @@ void InitializeThinfilmNamespace() { pp.get("thinfilm_h0",thinfilm_h0); pp.get("thinfilm_gamma",thinfilm_gamma); + pp.query("thinfilm_pertamp",thinfilm_pertamp); pp.query("thinfilm_hamaker",thinfilm_hamaker); From 9cfa60320dc0d3c73de667dc63ac259cf1ae1a1a Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 6 Nov 2023 17:16:25 -0800 Subject: [PATCH 020/151] final cleanup - some testing now required --- exec/thinFilm/main_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exec/thinFilm/main_driver.cpp b/exec/thinFilm/main_driver.cpp index d2b2e4929..56cf91b57 100644 --- a/exec/thinFilm/main_driver.cpp +++ b/exec/thinFilm/main_driver.cpp @@ -136,7 +136,7 @@ void main_driver(const char* argv) dmap.define(ba); MultiFab height(ba, dmap, 1, 1); - MultiFab Laph(ba, dmap, 1, 1); + MultiFab Laph (ba, dmap, 1, 1); MultiFab disjoining(ba, dmap, 1, 1); Laph.setVal(0.); // prevent intermediate NaN calculations behind physical boundaries disjoining.setVal(0.); From 8d4d230f2e83e2443b2276bc75c3a67c31cf41c4 Mon Sep 17 00:00:00 2001 From: jbb Date: Thu, 9 Nov 2023 13:56:08 -0800 Subject: [PATCH 021/151] fixed bug in disjoining pressure --- exec/thinFilm/main_driver.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/exec/thinFilm/main_driver.cpp b/exec/thinFilm/main_driver.cpp index 56cf91b57..997c17835 100644 --- a/exec/thinFilm/main_driver.cpp +++ b/exec/thinFilm/main_driver.cpp @@ -397,6 +397,7 @@ void main_driver(const char* argv) }); } Laph.FillBoundary(geom.periodicity()); + disjoining.FillBoundary(geom.periodicity()); // compute gradLaph and gradDisjoining for ( MFIter mfi(height,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { @@ -517,8 +518,11 @@ void main_driver(const char* argv) const Array4 & h = height.array(mfi); + // amrex::Print{} << "HEIGHT " << time << " " << h(0,0,0) << " " << h(31,0,0) << std::endl; + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { + h(i,j,k) -= dt * ( (fluxx(i+1,j,k) - fluxx(i,j,k)) / dx[0] +(fluxy(i,j+1,k) - fluxy(i,j,k)) / dx[1]); }); @@ -539,6 +543,8 @@ void main_driver(const char* argv) // copy distributed data into 1D data height_onegrid.ParallelCopy(height, 0, 0, 1); + amrex::Real sumh = 0.; + for ( MFIter mfi(height_onegrid,false); mfi.isValid(); ++mfi ) { std::ofstream hstream; @@ -558,9 +564,14 @@ void main_driver(const char* argv) for (auto j = lo.y; j <= hi.y; ++j) { for (auto i = lo.x; i <= hi.x; ++i) { hstream << std::setprecision(15) << mfdata(i,j,0) << " "; + sumh += mfdata(i,j,0); + if(j==0 && i==0){ + amrex::Print{} << "HEIGHT " << time << " " << mfdata(0,0,0) << " " << mfdata(31,0,0) << std::endl; + } } hstream << "\n"; } + amrex::Print{} << "SUM " << sumh << std::endl; } // end MFIter From 63e790467646c4ef5f6e6a1bb6111b4bdbf875f8 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Wed, 29 Nov 2023 10:26:27 -0800 Subject: [PATCH 022/151] use bulk viscosity in bulk dissipation --- src_compressible_stag/DeriveVelProp.cpp | 17 +++++++++++++++-- .../compressible_functions_stag.H | 1 + src_compressible_stag/main_driver.cpp | 8 ++++---- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp index 34a0c1ab6..2b1c39d56 100644 --- a/src_compressible_stag/DeriveVelProp.cpp +++ b/src_compressible_stag/DeriveVelProp.cpp @@ -8,6 +8,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, std::array< MultiFab, AMREX_SPACEDIM >& cumom, MultiFab& prim, MultiFab& eta, + MultiFab& zeta, const amrex::Geometry& geom, Real& turbKE, Real& c_speed, Real& u_rms, @@ -26,6 +27,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, // Setup temp MultiFabs std::array< MultiFab, AMREX_SPACEDIM > macTemp; MultiFab gradU; + MultiFab eta_bulk_diss; MultiFab sound_speed; MultiFab ccTemp; MultiFab ccTempA; @@ -41,6 +43,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0); ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0); ccTempDiv.define(prim.boxArray(),prim.DistributionMap(),1,0); + if (visc_type == 3) eta_bulk_diss.define(prim.boxArray(),prim.DistributionMap(),1,0); #if (AMREX_SPACEDIM == 3) curlU[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0); curlU[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0); @@ -199,8 +202,18 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, // Dilational dissipation (4/3)* // CCInnerProd(ccTempDiv,0,eta,0,ccTemp,eps_d); - eps_d = MultiFab::Dot(eta, 0, ccTempDiv, 0, 1, 0); - eps_d *= dProb*(4.0/3.0); + if (visc_type == 3) { + // get eta_bulk_diss = kappa + 4/3 eta + MultiFab::LinComb(eta_bulk_diss, 1.0, zeta, 0, + 1.3333333333, eta, 0, + 0, 1, 0); + eps_d = MultiFab::Dot(eta_bulk_diss, 0, ccTempDiv, 0, 1, 0); + eps_d *= dProb; + } + else { + eps_d = MultiFab::Dot(eta, 0, ccTempDiv, 0, 1, 0); + eps_d *= dProb*(4.0/3.0); + } // Ratio of Dilational to Solenoidal dissipation eps_ratio = eps_d/eps_s; diff --git a/src_compressible_stag/compressible_functions_stag.H b/src_compressible_stag/compressible_functions_stag.H index 1ebaeca26..27476fbc2 100644 --- a/src_compressible_stag/compressible_functions_stag.H +++ b/src_compressible_stag/compressible_functions_stag.H @@ -431,6 +431,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, std::array< MultiFab, AMREX_SPACEDIM >& cumom, MultiFab& prim, MultiFab& eta, + MultiFab& zeta, const amrex::Geometry& geom, Real& turbKE, Real& c_speed, Real& u_rms, diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 07f592533..68789a4fe 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -952,9 +952,9 @@ void main_driver(const char* argv) RK3stepStag(cu, cumom, prim, vel, source, eta, zeta, kappa, chi, D, faceflux, edgeflux_x, edgeflux_y, edgeflux_z, cenflux, ranchem, geom, dt, step, turbforce); } - else { - calculateTransportCoeffs(prim, eta, zeta, kappa, chi, D); - } + else { + calculateTransportCoeffs(prim, eta, zeta, kappa, chi, D); + } // update surface chemistry (via either surfchem_mui or MFsurfchem) #if defined(MUI) || defined(USE_AMREX_MPMD) @@ -1177,7 +1177,7 @@ void main_driver(const char* argv) vel[i].FillBoundary(geom.periodicity()); cumom[i].FillBoundary(geom.periodicity()); } - GetTurbQty(vel, cumom, prim, eta, geom, + GetTurbQty(vel, cumom, prim, eta, zeta, geom, turbKE, c_speed, u_rms, taylor_len, taylor_Re, taylor_Ma, skew, kurt, From 71549d8cd2b87cf5cb287e089316f16eb1691aa3 Mon Sep 17 00:00:00 2001 From: isriva Date: Wed, 6 Dec 2023 10:29:41 -0800 Subject: [PATCH 023/151] PDFs for turbulence fields: velocity, pressure, density, temperature --- exec/compressible_stag/TURB_PDFS/GNUmakefile | 33 ++ exec/compressible_stag/TURB_PDFS/Make.package | 13 + exec/compressible_stag/TURB_PDFS/main.cpp | 407 ++++++++++++++++++ 3 files changed, 453 insertions(+) create mode 100644 exec/compressible_stag/TURB_PDFS/GNUmakefile create mode 100644 exec/compressible_stag/TURB_PDFS/Make.package create mode 100644 exec/compressible_stag/TURB_PDFS/main.cpp diff --git a/exec/compressible_stag/TURB_PDFS/GNUmakefile b/exec/compressible_stag/TURB_PDFS/GNUmakefile new file mode 100644 index 000000000..f7f8319c2 --- /dev/null +++ b/exec/compressible_stag/TURB_PDFS/GNUmakefile @@ -0,0 +1,33 @@ +AMREX_HOME ?= ../../../../amrex/ + +DEBUG = TRUE +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +PRECISION = DOUBLE + +USE_MPI = TRUE +USE_OMP = FALSE +USE_CUDA = FALSE + +TINY_PROFILE = FALSE + +################################################### + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package + +vpath %.c : . $(vpathdir) +vpath %.h : . $(vpathdir) +vpath %.cpp : . $(vpathdir) +vpath %.H : . $(vpathdir) +vpath %.F : . $(vpathdir) +vpath %.f : . $(vpathdir) +vpath %.f90 : . $(vpathdir) + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/exec/compressible_stag/TURB_PDFS/Make.package b/exec/compressible_stag/TURB_PDFS/Make.package new file mode 100644 index 000000000..01353cad8 --- /dev/null +++ b/exec/compressible_stag/TURB_PDFS/Make.package @@ -0,0 +1,13 @@ +CEXE_sources += main.cpp + +INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base +include $(AMREX_HOME)/Src/Base/Make.package +vpathdir += $(AMREX_HOME)/Src/Base + +INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Extern/amrdata +include $(AMREX_HOME)/Src/Extern/amrdata/Make.package +vpathdir += $(AMREX_HOME)/Src/Extern/amrdata + +#INCLUDE_LOCATIONS += $(AMREX_HOME)/Tools/C_util +#include $(AMREX_HOME)/Tools/C_util/Make.package +#vpathdir += $(AMREX_HOME)/Tools/C_util diff --git a/exec/compressible_stag/TURB_PDFS/main.cpp b/exec/compressible_stag/TURB_PDFS/main.cpp new file mode 100644 index 000000000..2600b8dc7 --- /dev/null +++ b/exec/compressible_stag/TURB_PDFS/main.cpp @@ -0,0 +1,407 @@ +#include +#include + +#include +#include + +using namespace amrex; +using namespace std; + +static +void +PrintUsage (const char* progName) +{ + Print() << std::endl + << "This utility computes PDF of scalars, and various powers of Laplacian of velocity field," << std::endl; + + Print() << "Usage:" << '\n'; + Print() << progName << " " << std::endl + << "OR" << std::endl + << progName << std::endl + << " step=" << std::endl + << " nbins= " << std::endl + << " range= " << std::endl + << std::endl; + + exit(1); +} + + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + { + + if (argc == 1) { + PrintUsage(argv[0]); + } + + ParmParse pp; + + int step; + pp.query("step",step); + + std::string iFile = amrex::Concatenate("plt",step,9); + + Vector scalar_out(3); + scalar_out[0] = amrex::Concatenate("rho_pdf",step,9); + scalar_out[1] = amrex::Concatenate("press_pdf",step,9); + scalar_out[2] = amrex::Concatenate("temp_pdf",step,9); + Vector Lap_out(5); + Lap_out[0] = amrex::Concatenate("L0_pdf",step,9); + Lap_out[1] = amrex::Concatenate("L1_pdf",step,9); + Lap_out[2] = amrex::Concatenate("L2_pdf",step,9); + Lap_out[3] = amrex::Concatenate("L3_pdf",step,9); + Lap_out[4] = amrex::Concatenate("L4_pdf",step,9); + + int nbins; + pp.get("nbins", nbins); + + Real range; + pp.get("range",range); + + amrex::Print() << "Reading from plotfile " << iFile << "\n"; + + // for the Header + std::string iFile2 = iFile; + iFile2 += "/Header"; + + // open header + ifstream x; + x.open(iFile2.c_str(), ios::in); + + // read in first line of header (typically "HyperCLaw-V1.1" or similar) + std::string str; + x >> str; + + // read in number of components from header + int ncomp; + x >> ncomp; + + // read in variable names from header + int flag = 0; + int rho_ind, press_ind, temp_ind, velx_ind; + for (int n=0; n> str; + if (str == "rhoInstant") rho_ind = flag; + if (str == "pInstant") press_ind = flag; + if (str == "tInstant") temp_ind = flag; + if (str == "uxInstantFACE") velx_ind = flag; + flag ++; + } + + // read in dimensionality from header + int dim; + x >> dim; + + // read in time + Real time; + x >> time; + + // read in finest level + int finest_level; + x >> finest_level; + + // read in prob_lo and prob_hi + amrex::GpuArray prob_lo, prob_hi; + for (int i=0; i<3; ++i) { + x >> prob_lo[i]; + } + for (int i=0; i<3; ++i) { + x >> prob_hi[i]; + } + + // now read in the plotfile data + // check to see whether the user pointed to the plotfile base directory + // or the data itself + if (amrex::FileExists(iFile+"/Level_0/Cell_H")) { + iFile += "/Level_0/Cell"; + } + if (amrex::FileExists(iFile+"/Level_00/Cell_H")) { + iFile += "/Level_00/Cell"; + } + + // storage for the input coarse and fine MultiFabs + MultiFab mf; + + // read in plotfile mf to MultiFab + VisMF::Read(mf, iFile); + + // get BoxArray and DistributionMapping + BoxArray ba = mf.boxArray(); + DistributionMapping dmap = mf.DistributionMap(); + + // physical dimensions of problem + RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])}, + {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])}); + + // single box with the enire domain + Box domain = ba.minimalBox().enclosedCells(); + + Real ncells = (double) domain.numPts(); + + // set to 1 (periodic) + Vector is_periodic(3,1); + + Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + + const Real* dx = geom.CellSize(); + + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs ///////////////////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab vel_grown(ba,dmap,3,1); + MultiFab laplacian(ba,dmap,3,1); + + // copy shifted velocity components from mf into vel_grown + Copy(vel_grown,mf,velx_ind,0,3,0); + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + + for (int m=0; m<5; ++m) { + + for ( MFIter mfi(vel_grown,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& vel = vel_grown.array(mfi); + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = -(vel(i+1,j,k,n) - 2.*vel(i,j,k,n) + vel(i-1,j,k,n)) / (dx[0]*dx[0]) + -(vel(i,j+1,k,n) - 2.*vel(i,j,k,n) + vel(i,j-1,k,n)) / (dx[1]*dx[1]) + -(vel(i,j,k+1,n) - 2.*vel(i,j,k,n) + vel(i,j,k+1,n)) / (dx[2]*dx[2]); + } + } + } + } + + } // end MFIter + + // copy lap into vel_grown + Copy(vel_grown,laplacian,0,0,3,0); + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + + Vector L2(3,0.); + for (int i=0; i<3; i++) + L2[i]=0.; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + L2[n] += lap(i,j,k,n)*lap(i,j,k,n); + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(L2.dataPtr(),3); + amrex::Long totpts = domain.numPts(); + L2[0] = sqrt(L2[0]/totpts); + L2[1] = sqrt(L2[1]/totpts); + L2[2] = sqrt(L2[2]/totpts); + Print() << "L2 norm of Laplacian to power " << m << " is " << L2[0] + << " " << L2[1] << " " << L2[2] << " " << std::endl; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = lap(i,j,k,n)/L2[n]; + + } + } + } + } + + } // end MFIter + + Vector bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i Date: Wed, 6 Dec 2023 18:01:42 -0800 Subject: [PATCH 024/151] minor change to PDF calculation --- exec/compressible_stag/TURB_PDFS/main.cpp | 63 ++++++++++++----------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/exec/compressible_stag/TURB_PDFS/main.cpp b/exec/compressible_stag/TURB_PDFS/main.cpp index 2600b8dc7..2a62e5ce9 100644 --- a/exec/compressible_stag/TURB_PDFS/main.cpp +++ b/exec/compressible_stag/TURB_PDFS/main.cpp @@ -156,41 +156,13 @@ int main (int argc, char* argv[]) // copy shifted velocity components from mf into vel_grown Copy(vel_grown,mf,velx_ind,0,3,0); + Copy(laplacian,mf,velx_ind,0,3,0); // fill ghost cells of vel_grown vel_grown.FillBoundary(geom.periodicity()); + laplacian.FillBoundary(geom.periodicity()); for (int m=0; m<5; ++m) { - - for ( MFIter mfi(vel_grown,false); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.validbox(); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - - const Array4& vel = vel_grown.array(mfi); - const Array4& lap = laplacian.array(mfi); - - for (auto n=0; n<3; ++n) { - for (auto k = lo.z; k <= hi.z; ++k) { - for (auto j = lo.y; j <= hi.y; ++j) { - for (auto i = lo.x; i <= hi.x; ++i) { - - lap(i,j,k,n) = -(vel(i+1,j,k,n) - 2.*vel(i,j,k,n) + vel(i-1,j,k,n)) / (dx[0]*dx[0]) - -(vel(i,j+1,k,n) - 2.*vel(i,j,k,n) + vel(i,j-1,k,n)) / (dx[1]*dx[1]) - -(vel(i,j,k+1,n) - 2.*vel(i,j,k,n) + vel(i,j,k+1,n)) / (dx[2]*dx[2]); - } - } - } - } - - } // end MFIter - - // copy lap into vel_grown - Copy(vel_grown,laplacian,0,0,3,0); - - // fill ghost cells of vel_grown - vel_grown.FillBoundary(geom.periodicity()); Vector L2(3,0.); for (int i=0; i<3; i++) @@ -306,6 +278,37 @@ int main (int argc, char* argv[]) } outfile.close(); } + + for ( MFIter mfi(vel_grown,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& vel = vel_grown.array(mfi); + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = -(vel(i+1,j,k,n) - 2.*vel(i,j,k,n) + vel(i-1,j,k,n)) / (dx[0]*dx[0]) + -(vel(i,j+1,k,n) - 2.*vel(i,j,k,n) + vel(i,j-1,k,n)) / (dx[1]*dx[1]) + -(vel(i,j,k+1,n) - 2.*vel(i,j,k,n) + vel(i,j,k+1,n)) / (dx[2]*dx[2]); + } + } + } + } + + } // end MFIter + + // copy lap into vel_grown + Copy(vel_grown,laplacian,0,0,3,0); + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + } // end loop //////////////////////////////////////////////////////////////////////// ////////////// velocity Laplacian PDFs ///////////////////////////////// From 27a019e22f864046ec9b171d67c252afbde91314 Mon Sep 17 00:00:00 2001 From: isriva Date: Thu, 7 Dec 2023 09:47:27 -0800 Subject: [PATCH 025/151] PDFs of decomposed velocities --- exec/compressible_stag/TURB_PDFS/GNUmakefile | 3 + exec/compressible_stag/TURB_PDFS/Make.package | 2 +- .../TURB_PDFS/main_decomp.cpp | 455 ++++++++++++++++++ 3 files changed, 459 insertions(+), 1 deletion(-) create mode 100644 exec/compressible_stag/TURB_PDFS/main_decomp.cpp diff --git a/exec/compressible_stag/TURB_PDFS/GNUmakefile b/exec/compressible_stag/TURB_PDFS/GNUmakefile index f7f8319c2..c9c864458 100644 --- a/exec/compressible_stag/TURB_PDFS/GNUmakefile +++ b/exec/compressible_stag/TURB_PDFS/GNUmakefile @@ -17,6 +17,9 @@ TINY_PROFILE = FALSE ################################################### +#EBASE = PDF +EBASE = main_decomp + include $(AMREX_HOME)/Tools/GNUMake/Make.defs include ./Make.package diff --git a/exec/compressible_stag/TURB_PDFS/Make.package b/exec/compressible_stag/TURB_PDFS/Make.package index 01353cad8..ddbf6fb70 100644 --- a/exec/compressible_stag/TURB_PDFS/Make.package +++ b/exec/compressible_stag/TURB_PDFS/Make.package @@ -1,4 +1,4 @@ -CEXE_sources += main.cpp +CEXE_sources += ${EBASE}.cpp INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base include $(AMREX_HOME)/Src/Base/Make.package diff --git a/exec/compressible_stag/TURB_PDFS/main_decomp.cpp b/exec/compressible_stag/TURB_PDFS/main_decomp.cpp new file mode 100644 index 000000000..61eab81ce --- /dev/null +++ b/exec/compressible_stag/TURB_PDFS/main_decomp.cpp @@ -0,0 +1,455 @@ +#include +#include + +#include +#include + +using namespace amrex; +using namespace std; + +static +void +PrintUsage (const char* progName) +{ + Print() << std::endl + << "This utility computes PDF of vorticity and divergence, and various powers of Laplacian of solenoidal and dilatational velocity field," << std::endl; + + Print() << "Usage:" << '\n'; + Print() << progName << " " << std::endl + << "OR" << std::endl + << progName << std::endl + << " step=" << std::endl + << " nbins= " << std::endl + << " range= " << std::endl + << std::endl; + + exit(1); +} + + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + { + + if (argc == 1) { + PrintUsage(argv[0]); + } + + ParmParse pp; + + int step; + pp.query("step",step); + + std::string iFile = amrex::Concatenate("vel_grad_decomp",step,9); + + Vector scalar_out(2); + scalar_out[0] = amrex::Concatenate("vort_pdf",step,9); + scalar_out[1] = amrex::Concatenate("div_pdf",step,9); + Vector Lap_out_sol(5); + Lap_out_sol[0] = amrex::Concatenate("L0_pdf_sol",step,9); + Lap_out_sol[1] = amrex::Concatenate("L1_pdf_sol",step,9); + Lap_out_sol[2] = amrex::Concatenate("L2_pdf_sol",step,9); + Lap_out_sol[3] = amrex::Concatenate("L3_pdf_sol",step,9); + Lap_out_sol[4] = amrex::Concatenate("L4_pdf_sol",step,9); + Vector Lap_out_dil(5); + Lap_out_dil[0] = amrex::Concatenate("L0_pdf_dil",step,9); + Lap_out_dil[1] = amrex::Concatenate("L1_pdf_dil",step,9); + Lap_out_dil[2] = amrex::Concatenate("L2_pdf_dil",step,9); + Lap_out_dil[3] = amrex::Concatenate("L3_pdf_dil",step,9); + Lap_out_dil[4] = amrex::Concatenate("L4_pdf_dil",step,9); + + int nbins; + pp.get("nbins", nbins); + + Real range; + pp.get("range",range); + + amrex::Print() << "Reading from vel_grad_decomp plotfile " << iFile << "\n"; + + // for the Header + std::string iFile2 = iFile; + iFile2 += "/Header"; + + // open header + ifstream x; + x.open(iFile2.c_str(), ios::in); + + // read in first line of header (typically "HyperCLaw-V1.1" or similar) + std::string str; + x >> str; + + // read in number of components from header + int ncomp; + x >> ncomp; + + // read in variable names from header + int flag = 0; + int vort_ind, div_ind, velx_sol_ind, velx_dil_ind; + for (int n=0; n> str; + if (str == "vort") vort_ind = flag; + if (str == "div") div_ind = flag; + if (str == "ux_s") velx_sol_ind = flag; + if (str == "ux_d") velx_dil_ind = flag; + flag ++; + } + + // read in dimensionality from header + int dim; + x >> dim; + + // read in time + Real time; + x >> time; + + // read in finest level + int finest_level; + x >> finest_level; + + // read in prob_lo and prob_hi + amrex::GpuArray prob_lo, prob_hi; + for (int i=0; i<3; ++i) { + x >> prob_lo[i]; + } + for (int i=0; i<3; ++i) { + x >> prob_hi[i]; + } + + // now read in the plotfile data + // check to see whether the user pointed to the plotfile base directory + // or the data itself + if (amrex::FileExists(iFile+"/Level_0/Cell_H")) { + iFile += "/Level_0/Cell"; + } + if (amrex::FileExists(iFile+"/Level_00/Cell_H")) { + iFile += "/Level_00/Cell"; + } + + // storage for the input coarse and fine MultiFabs + MultiFab mf; + + // read in plotfile mf to MultiFab + VisMF::Read(mf, iFile); + + // get BoxArray and DistributionMapping + BoxArray ba = mf.boxArray(); + DistributionMapping dmap = mf.DistributionMap(); + + // physical dimensions of problem + RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])}, + {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])}); + + // single box with the enire domain + Box domain = ba.minimalBox().enclosedCells(); + + Real ncells = (double) domain.numPts(); + + // set to 1 (periodic) + Vector is_periodic(3,1); + + Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + + const Real* dx = geom.CellSize(); + + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs///////////// //////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab vel_grown(ba,dmap,6,1); + MultiFab laplacian(ba,dmap,6,1); + + // copy shifted velocity components from mf into vel_grown + Copy(vel_grown,mf,velx_sol_ind,0,3,0); // sol + Copy(laplacian,mf,velx_sol_ind,0,3,0); // sol + Copy(vel_grown,mf,velx_dil_ind,3,3,0); // dil + Copy(laplacian,mf,velx_dil_ind,3,3,0); // dil + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + laplacian.FillBoundary(geom.periodicity()); + + for (int m=0; m<5; ++m) { + + Vector L2(6,0.); + for (int i=0; i<6; i++) + L2[i]=0.; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + L2[n] += lap(i,j,k,n)*lap(i,j,k,n); + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(L2.dataPtr(),6); + amrex::Long totpts = domain.numPts(); + L2[0] = sqrt(L2[0]/totpts); + L2[1] = sqrt(L2[1]/totpts); + L2[2] = sqrt(L2[2]/totpts); + L2[3] = sqrt(L2[3]/totpts); + L2[4] = sqrt(L2[4]/totpts); + L2[5] = sqrt(L2[5]/totpts); + Print() << "L2 norm of Laplacian (solenoidal) to power " << m << " is " << L2[0] + << " " << L2[1] << " " << L2[2] << " " << std::endl; + Print() << "L2 norm of Laplacian (dilational) to power " << m << " is " << L2[3] + << " " << L2[4] << " " << L2[5] << " " << std::endl; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = lap(i,j,k,n)/L2[n]; + + } + } + } + } + + } // end MFIter + + Vector bins_sol(nbins+1,0.); + Vector bins_dil(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count_sol=0; + amrex::Long totbin_sol=0; + amrex::Long count_dil=0; + amrex::Long totbin_dil=0; + for (int ind=0 ; ind < nbins+1; ind++) bins_sol[ind]=0; + for (int ind=0 ; ind < nbins+1; ind++) bins_dil[ind]=0; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins_sol[index] += 1; + totbin_sol++; + } + + count_sol++; + + } + } + } + } + + for (auto n=3; n<6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins_dil[index] += 1; + totbin_dil++; + } + + count_dil++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins_sol.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count_sol); + ParallelDescriptor::ReduceLongSum(totbin_sol); + ParallelDescriptor::ReduceRealSum(bins_dil.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count_dil); + ParallelDescriptor::ReduceLongSum(totbin_dil); + Print() << "Points outside of range (solenoidal) "<< count_sol - totbin_sol << " " << + (double)(count_sol-totbin_sol)/count_sol << std::endl; + Print() << "Points outside of range (dilational) "<< count_dil - totbin_dil << " " << + (double)(count_dil-totbin_dil)/count_dil << std::endl; + + // print out contents of bins to the screen + for (int i=0; i& vel = vel_grown.array(mfi); + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = -(vel(i+1,j,k,n) - 2.*vel(i,j,k,n) + vel(i-1,j,k,n)) / (dx[0]*dx[0]) + -(vel(i,j+1,k,n) - 2.*vel(i,j,k,n) + vel(i,j-1,k,n)) / (dx[1]*dx[1]) + -(vel(i,j,k+1,n) - 2.*vel(i,j,k,n) + vel(i,j,k+1,n)) / (dx[2]*dx[2]); + } + } + } + } + + } // end MFIter + + // copy lap into vel_grown + Copy(vel_grown,laplacian,0,0,6,0); + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + + } // end loop + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs //////////// //////////////////// + //////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////// + ///////////////////////// scalar PDFs ///////////////////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab scalar(ba,dmap,2,0); + Copy(scalar,mf,vort_ind,0,1,0); + Copy(scalar,mf,div_ind,1,1,0); + + // compute spatial mean + Real mean_vort = scalar.sum(0) / (ncells); + Real mean_div = scalar.sum(1) / (ncells); + + // get fluctuations + scalar.plus(-1.0*mean_vort, 0, 1); + scalar.plus(-1.0*mean_div, 1, 1); + + // get rms + Real rms_vort = scalar.norm2(0) / sqrt(ncells); + Real rms_div = scalar.norm2(1) / sqrt(ncells); + + // scale by rms + scalar.mult(1.0/rms_vort, 0, 1); + scalar.mult(1.0/rms_div, 1, 1); + + // now compute pdfs + for (int m = 0; m < 2; ++m) { + + Vector bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i Date: Thu, 7 Dec 2023 22:39:27 -0800 Subject: [PATCH 026/151] compute individual vorticity components and their PDFs --- .../TURB_PDFS/main_decomp.cpp | 104 +++++++++++++++--- 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/exec/compressible_stag/TURB_PDFS/main_decomp.cpp b/exec/compressible_stag/TURB_PDFS/main_decomp.cpp index 61eab81ce..cf7be5727 100644 --- a/exec/compressible_stag/TURB_PDFS/main_decomp.cpp +++ b/exec/compressible_stag/TURB_PDFS/main_decomp.cpp @@ -3,6 +3,7 @@ #include #include +#include using namespace amrex; using namespace std; @@ -44,9 +45,12 @@ int main (int argc, char* argv[]) std::string iFile = amrex::Concatenate("vel_grad_decomp",step,9); - Vector scalar_out(2); + Vector scalar_out(5); scalar_out[0] = amrex::Concatenate("vort_pdf",step,9); scalar_out[1] = amrex::Concatenate("div_pdf",step,9); + scalar_out[2] = amrex::Concatenate("vortx_pdf",step,9); + scalar_out[3] = amrex::Concatenate("vorty_pdf",step,9); + scalar_out[4] = amrex::Concatenate("vortz_pdf",step,9); Vector Lap_out_sol(5); Lap_out_sol[0] = amrex::Concatenate("L0_pdf_sol",step,9); Lap_out_sol[1] = amrex::Concatenate("L1_pdf_sol",step,9); @@ -86,13 +90,17 @@ int main (int argc, char* argv[]) // read in variable names from header int flag = 0; - int vort_ind, div_ind, velx_sol_ind, velx_dil_ind; + int vort_ind, div_ind, velx_sol_ind, vely_sol_ind, velz_sol_ind, velx_dil_ind, vely_dil_ind, velz_dil_ind; for (int n=0; n> str; if (str == "vort") vort_ind = flag; if (str == "div") div_ind = flag; if (str == "ux_s") velx_sol_ind = flag; + if (str == "uy_s") vely_sol_ind = flag; + if (str == "uz_s") velz_sol_ind = flag; if (str == "ux_d") velx_dil_ind = flag; + if (str == "uy_d") vely_dil_ind = flag; + if (str == "uz_d") velz_dil_ind = flag; flag ++; } @@ -157,17 +165,34 @@ int main (int argc, char* argv[]) ////////////// velocity Laplacian PDFs///////////// //////////////////// //////////////////////////////////////////////////////////////////////// MultiFab vel_grown(ba,dmap,6,1); + MultiFab vel_sol (ba,dmap,3,1); MultiFab laplacian(ba,dmap,6,1); // copy shifted velocity components from mf into vel_grown - Copy(vel_grown,mf,velx_sol_ind,0,3,0); // sol - Copy(laplacian,mf,velx_sol_ind,0,3,0); // sol - Copy(vel_grown,mf,velx_dil_ind,3,3,0); // dil - Copy(laplacian,mf,velx_dil_ind,3,3,0); // dil + Copy(vel_grown,mf,velx_sol_ind,0,1,0); // sol + Copy(vel_grown,mf,vely_sol_ind,1,1,0); // sol + Copy(vel_grown,mf,velz_sol_ind,2,1,0); // sol + + Copy(laplacian,mf,velx_sol_ind,0,1,0); // sol + Copy(laplacian,mf,vely_sol_ind,1,1,0); // sol + Copy(laplacian,mf,velz_sol_ind,2,1,0); // sol + + Copy(vel_grown,mf,velx_dil_ind,3,1,0); // dil + Copy(vel_grown,mf,vely_dil_ind,4,1,0); // dil + Copy(vel_grown,mf,velz_dil_ind,5,1,0); // dil + + Copy(laplacian,mf,velx_dil_ind,3,1,0); // dil + Copy(laplacian,mf,vely_dil_ind,4,1,0); // dil + Copy(laplacian,mf,velz_dil_ind,5,1,0); // dil + + Copy(vel_sol,mf,velx_sol_ind,0,1,0); // sol + Copy(vel_sol,mf,vely_sol_ind,1,1,0); // sol + Copy(vel_sol,mf,velz_sol_ind,2,1,0); // sol // fill ghost cells of vel_grown vel_grown.FillBoundary(geom.periodicity()); laplacian.FillBoundary(geom.periodicity()); + vel_sol .FillBoundary(geom.periodicity()); for (int m=0; m<5; ++m) { @@ -367,28 +392,74 @@ int main (int argc, char* argv[]) //////////////////////////////////////////////////////////////////////// ///////////////////////// scalar PDFs ///////////////////////////////// //////////////////////////////////////////////////////////////////////// - MultiFab scalar(ba,dmap,2,0); + MultiFab scalar(ba,dmap,5,0); // vort_mag, div, vort_x, vort_y, vort_z + scalar.setVal(0.0); Copy(scalar,mf,vort_ind,0,1,0); Copy(scalar,mf,div_ind,1,1,0); + // Compute vorticity components and store in scalar + for ( MFIter mfi(vel_sol,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + Array4 const& sol = vel_sol.array(mfi); + Array4 const& sca = scalar .array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + // dw/dy - dv/dz + sca(i,j,k,2) = + (sol(i,j+1,k,velz_sol_ind) - sol(i,j-1,k,velz_sol_ind)) / (2.*dx[1]) - + (sol(i,j,k+1,vely_sol_ind) - sol(i,j,k-1,vely_sol_ind)) / (2.*dx[2]); + + // dv/dx - du/dy + sca(i,j,k,4) = + (sol(i+1,j,k,vely_sol_ind) - sol(i-1,j,k,vely_sol_ind)) / (2.*dx[0]) - + (sol(i,j+1,k,velx_sol_ind) - sol(i,j-1,k,velx_sol_ind)) / (2.*dx[1]); + + // du/dz - dw/dx + sca(i,j,k,3) = + (sol(i,j,k+1,velx_sol_ind) - sol(i,j,k-1,velx_sol_ind)) / (2.*dx[2]) - + (sol(i+1,j,k,velz_sol_ind) - sol(i-1,j,k,velz_sol_ind)) / (2.*dx[0]); + + } + } + } + } + // compute spatial mean - Real mean_vort = scalar.sum(0) / (ncells); - Real mean_div = scalar.sum(1) / (ncells); + Real mean_vort = scalar.sum(0) / (ncells); + Real mean_div = scalar.sum(1) / (ncells); + Real mean_vortx = scalar.sum(2) / (ncells); + Real mean_vorty = scalar.sum(3) / (ncells); + Real mean_vortz = scalar.sum(4) / (ncells); // get fluctuations - scalar.plus(-1.0*mean_vort, 0, 1); - scalar.plus(-1.0*mean_div, 1, 1); + scalar.plus(-1.0*mean_vort, 0, 1); + scalar.plus(-1.0*mean_div, 1, 1); + scalar.plus(-1.0*mean_vortx, 2, 1); + scalar.plus(-1.0*mean_vorty, 3, 1); + scalar.plus(-1.0*mean_vortz, 4, 1); // get rms - Real rms_vort = scalar.norm2(0) / sqrt(ncells); - Real rms_div = scalar.norm2(1) / sqrt(ncells); + Real rms_vort = scalar.norm2(0) / sqrt(ncells); + Real rms_div = scalar.norm2(1) / sqrt(ncells); + Real rms_vortx = scalar.norm2(2) / sqrt(ncells); + Real rms_vorty = scalar.norm2(3) / sqrt(ncells); + Real rms_vortz = scalar.norm2(4) / sqrt(ncells); // scale by rms - scalar.mult(1.0/rms_vort, 0, 1); - scalar.mult(1.0/rms_div, 1, 1); + scalar.mult(1.0/rms_vort, 0, 1); + scalar.mult(1.0/rms_div, 1, 1); + scalar.mult(1.0/rms_vortx, 2, 1); + scalar.mult(1.0/rms_vorty, 3, 1); + scalar.mult(1.0/rms_vortz, 4, 1); // now compute pdfs - for (int m = 0; m < 2; ++m) { + for (int m = 0; m < 5; ++m) { Vector bins(nbins+1,0.); @@ -453,3 +524,4 @@ int main (int argc, char* argv[]) } + From fada4dfe10f6716a0504d2389bb038746e48d751 Mon Sep 17 00:00:00 2001 From: isriva Date: Fri, 8 Dec 2023 16:09:00 -0800 Subject: [PATCH 027/151] include FFTW libraries for heffte fftw backend --- exec/compressible_stag/GNUmakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 8124a8ee1..4d0d6bd30 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -75,6 +75,7 @@ endif ifeq ($(USE_HEFFTE_FFTW),TRUE) DEFINES += -DHEFFTE_FFTW + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f else ifeq ($(USE_HEFFTE_CUFFT),TRUE) DEFINES += -DHEFFTE_CUFFT else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) From af68571df19d3da024eead34517bda19d6dc501f Mon Sep 17 00:00:00 2001 From: isriva Date: Sun, 10 Dec 2023 17:08:35 -0800 Subject: [PATCH 028/151] enable cross-platform compilation --- exec/compressible_stag/GNUmakefile | 18 ++++- src_analysis/TurbSpectra.cpp | 104 ++--------------------------- 2 files changed, 23 insertions(+), 99 deletions(-) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 4d0d6bd30..a887cc599 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -1,7 +1,6 @@ # AMREX_HOME defines the directory in which we will find all the AMReX code. # If you set AMREX_HOME as an environment variable, this line will be ignored AMREX_HOME ?= ../../../amrex/ -HEFFTE_HOME ?= ../../../heffte/ DEBUG = FALSE USE_MPI = TRUE @@ -15,10 +14,19 @@ MAX_SPEC = 8 USE_PARTICLES = FALSE DO_TURB = FALSE + USE_HEFFTE_FFTW = FALSE USE_HEFFTE_CUFFT = FALSE USE_HEFFTE_ROCFFT = FALSE +ifeq ($(USE_HEFFTE_FFTW),TRUE) + HEFFTE_HOME ?= ../../../heffte/ +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + HEFFTE_HOME ?= ../../../heffte/ +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + HEFFTE_HOME ?= ../../../heffte/ +endif + include $(AMREX_HOME)/Tools/GNUMake/Make.defs VPATH_LOCATIONS += . @@ -48,7 +56,13 @@ include ../../src_common/Make.package VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ -include $(HEFFTE_HOME)/src/Make.package +ifeq ($(USE_HEFFTE_FFTW),TRUE) + include $(HEFFTE_HOME)/src/Make.package +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + include $(HEFFTE_HOME)/src/Make.package +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + include $(HEFFTE_HOME)/src/Make.package +endif include $(AMREX_HOME)/Src/Base/Make.package diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 3e6689fab..b90ad67f8 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -1378,38 +1378,6 @@ void IntegrateKScalarHeffte(const MultiFab& cov_mag, } Gpu::streamSynchronize(); - -// const auto lo = amrex::lbound(c_local_box); -// const auto hi = amrex::ubound(c_local_box); -// for (auto k = lo.z; k <= hi.z; ++k) { -// for (auto j = lo.y; j <= hi.y; ++j) { -// for (auto i = lo.x; i <= hi.x; ++i) { -// if (i <= n_cells[0]/2) { // only half of kx-domain -// int ki = i; -// int kj = j; -// int kk = k; -// -// Real dist = (ki*ki + kj*kj + kk*kk); -// dist = std::sqrt(dist); -// -// if ( dist <= n_cells[0]/2-0.5) { -// dist = dist+0.5; -// int cell = int(dist); -// Real real = spectral(i,j,k).real(); -// Real imag = spectral(i,j,k).imag(); -// Real cov = (1.0/(sqrtnpts*sqrtnpts*scaling))*(real*real + imag*imag); -// amrex::HostDevice::Atomic::Add(&(phisum_host[cell]), cov); -// amrex::HostDevice::Atomic::Add(&(phicnt_host[cell]),1); -// } -// } -// else { -// amrex::Abort("i should not exceed n_cells[0]/2"); -// } -// } -// } -// } -// -// ParallelDescriptor::Barrier(); ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); @@ -1422,12 +1390,6 @@ void IntegrateKScalarHeffte(const MultiFab& cov_mag, } }); -// for (int d=0; d > > for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - const Box& bx = mfi.tilebox(); + const Box& bx = mfi.fabbox(); - const Array4 > spectral = (*spectral_field[0]).const_array(mfi); + const Array4 > spectral = (*spectral_field[0]).const_array(); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -1541,8 +1503,6 @@ void IntegrateKVelocityHeffte(const MultiFab& cov_mag, Gpu::DeviceVector phisum_device(npts); Gpu::DeviceVector phicnt_device(npts); -// Gpu::HostVector phisum_host(npts); -// Gpu::HostVector phicnt_host(npts); Gpu::HostVector phisum_host(npts); @@ -1554,11 +1514,7 @@ void IntegrateKVelocityHeffte(const MultiFab& cov_mag, phisum_ptr[d] = 0.; phicnt_ptr[d] = 0; }); -// for (int d=0; d > for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - const Box& bx = mfi.tilebox(); + const Box& bx = mfi.fabbox(); - const Array4 > spectralx = (*spectral_fieldx[0]).const_array(mfi); - const Array4 > spectraly = (*spectral_fieldy[0]).const_array(mfi); - const Array4 > spectralz = (*spectral_fieldz[0]).const_array(mfi); + const Array4 > spectralx = (*spectral_fieldx[0]).const_array(); + const Array4 > spectraly = (*spectral_fieldy[0]).const_array(); + const Array4 > spectralz = (*spectral_fieldz[0]).const_array(); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { From ef07931517e75034e389742b12622c62e72a6422 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Fri, 26 Jan 2024 14:19:12 -0800 Subject: [PATCH 029/151] get heffte to compile on perlmutter and more PDF diagnostics --- exec/compressible_stag/TURB_PDFS/GNUmakefile | 2 +- .../TURB_PDFS/main_decomp.cpp | 293 ++++++++++++++++-- src_analysis/TurbSpectra.H | 11 +- src_analysis/TurbSpectra.cpp | 2 +- src_compressible_stag/main_driver.cpp | 2 +- 5 files changed, 268 insertions(+), 42 deletions(-) diff --git a/exec/compressible_stag/TURB_PDFS/GNUmakefile b/exec/compressible_stag/TURB_PDFS/GNUmakefile index c9c864458..e1fcfec48 100644 --- a/exec/compressible_stag/TURB_PDFS/GNUmakefile +++ b/exec/compressible_stag/TURB_PDFS/GNUmakefile @@ -17,7 +17,7 @@ TINY_PROFILE = FALSE ################################################### -#EBASE = PDF +#EBASE = main EBASE = main_decomp include $(AMREX_HOME)/Tools/GNUMake/Make.defs diff --git a/exec/compressible_stag/TURB_PDFS/main_decomp.cpp b/exec/compressible_stag/TURB_PDFS/main_decomp.cpp index cf7be5727..8a892b33b 100644 --- a/exec/compressible_stag/TURB_PDFS/main_decomp.cpp +++ b/exec/compressible_stag/TURB_PDFS/main_decomp.cpp @@ -46,11 +46,11 @@ int main (int argc, char* argv[]) std::string iFile = amrex::Concatenate("vel_grad_decomp",step,9); Vector scalar_out(5); - scalar_out[0] = amrex::Concatenate("vort_pdf",step,9); - scalar_out[1] = amrex::Concatenate("div_pdf",step,9); - scalar_out[2] = amrex::Concatenate("vortx_pdf",step,9); - scalar_out[3] = amrex::Concatenate("vorty_pdf",step,9); - scalar_out[4] = amrex::Concatenate("vortz_pdf",step,9); + scalar_out[0] = amrex::Concatenate("div_pdf",step,9); + scalar_out[1] = amrex::Concatenate("vortx_pdf",step,9); + scalar_out[2] = amrex::Concatenate("vorty_pdf",step,9); + scalar_out[3] = amrex::Concatenate("vortz_pdf",step,9); + scalar_out[4] = amrex::Concatenate("vort_pdf",step,9); Vector Lap_out_sol(5); Lap_out_sol[0] = amrex::Concatenate("L0_pdf_sol",step,9); Lap_out_sol[1] = amrex::Concatenate("L1_pdf_sol",step,9); @@ -392,10 +392,9 @@ int main (int argc, char* argv[]) //////////////////////////////////////////////////////////////////////// ///////////////////////// scalar PDFs ///////////////////////////////// //////////////////////////////////////////////////////////////////////// - MultiFab scalar(ba,dmap,5,0); // vort_mag, div, vort_x, vort_y, vort_z + MultiFab scalar(ba,dmap,4,0); // vort_mag, div, vort_x, vort_y, vort_z scalar.setVal(0.0); - Copy(scalar,mf,vort_ind,0,1,0); - Copy(scalar,mf,div_ind,1,1,0); + Copy(scalar,mf,div_ind,0,1,0); // Compute vorticity components and store in scalar for ( MFIter mfi(vel_sol,false); mfi.isValid(); ++mfi ) { @@ -404,19 +403,19 @@ int main (int argc, char* argv[]) const auto lo = amrex::lbound(bx); const auto hi = amrex::ubound(bx); - Array4 const& sol = vel_sol.array(mfi); - Array4 const& sca = scalar .array(mfi); + Array4 const& sol = vel_sol .array(mfi); + Array4 const& sca = scalar .array(mfi); for (auto k = lo.z; k <= hi.z; ++k) { for (auto j = lo.y; j <= hi.y; ++j) { for (auto i = lo.x; i <= hi.x; ++i) { // dw/dy - dv/dz - sca(i,j,k,2) = + sca(i,j,k,1) = (sol(i,j+1,k,velz_sol_ind) - sol(i,j-1,k,velz_sol_ind)) / (2.*dx[1]) - (sol(i,j,k+1,vely_sol_ind) - sol(i,j,k-1,vely_sol_ind)) / (2.*dx[2]); // dv/dx - du/dy - sca(i,j,k,4) = + sca(i,j,k,2) = (sol(i+1,j,k,vely_sol_ind) - sol(i-1,j,k,vely_sol_ind)) / (2.*dx[0]) - (sol(i,j+1,k,velx_sol_ind) - sol(i,j-1,k,velx_sol_ind)) / (2.*dx[1]); @@ -431,35 +430,31 @@ int main (int argc, char* argv[]) } // compute spatial mean - Real mean_vort = scalar.sum(0) / (ncells); - Real mean_div = scalar.sum(1) / (ncells); - Real mean_vortx = scalar.sum(2) / (ncells); - Real mean_vorty = scalar.sum(3) / (ncells); - Real mean_vortz = scalar.sum(4) / (ncells); + Real mean_div = scalar.sum(0) / (ncells); + Real mean_vortx = scalar.sum(1) / (ncells); + Real mean_vorty = scalar.sum(2) / (ncells); + Real mean_vortz = scalar.sum(3) / (ncells); // get fluctuations - scalar.plus(-1.0*mean_vort, 0, 1); - scalar.plus(-1.0*mean_div, 1, 1); - scalar.plus(-1.0*mean_vortx, 2, 1); - scalar.plus(-1.0*mean_vorty, 3, 1); - scalar.plus(-1.0*mean_vortz, 4, 1); + scalar.plus(-1.0*mean_div, 0, 1); + scalar.plus(-1.0*mean_vortx, 1, 1); + scalar.plus(-1.0*mean_vorty, 2, 1); + scalar.plus(-1.0*mean_vortz, 3, 1); // get rms - Real rms_vort = scalar.norm2(0) / sqrt(ncells); - Real rms_div = scalar.norm2(1) / sqrt(ncells); - Real rms_vortx = scalar.norm2(2) / sqrt(ncells); - Real rms_vorty = scalar.norm2(3) / sqrt(ncells); - Real rms_vortz = scalar.norm2(4) / sqrt(ncells); + Real rms_div = scalar.norm2(0) / sqrt(ncells); + Real rms_vortx = scalar.norm2(1) / sqrt(ncells); + Real rms_vorty = scalar.norm2(2) / sqrt(ncells); + Real rms_vortz = scalar.norm2(3) / sqrt(ncells); // scale by rms - scalar.mult(1.0/rms_vort, 0, 1); - scalar.mult(1.0/rms_div, 1, 1); - scalar.mult(1.0/rms_vortx, 2, 1); - scalar.mult(1.0/rms_vorty, 3, 1); - scalar.mult(1.0/rms_vortz, 4, 1); + scalar.mult(1.0/rms_div, 0, 1); + scalar.mult(1.0/rms_vortx, 1, 1); + scalar.mult(1.0/rms_vorty, 2, 1); + scalar.mult(1.0/rms_vortz, 3, 1); // now compute pdfs - for (int m = 0; m < 5; ++m) { + for (int m = 0; m < 4; ++m) { Vector bins(nbins+1,0.); @@ -518,6 +513,238 @@ int main (int argc, char* argv[]) outfile.close(); } } + + // total vorticity PDF + { + Vector bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto n = 1; n < 4; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& vel = vel_decomp.array(mfi); + + for (auto n = 0; n < 3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& vel = vel_decomp.array(mfi); + + for (auto n = 3; n < 6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i -#include -#include -#include - - // HEFFTE #if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) #include #endif +#include +#include +#include +#include + // non-HEFFTE #ifdef AMREX_USE_CUDA #include diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index b90ad67f8..3060a4d4e 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -1,5 +1,5 @@ -#include "common_functions.H" #include "TurbSpectra.H" +#include "common_functions.H" #include #include "AMReX_PlotFileUtil.H" diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 68789a4fe..61071bbf2 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -1,3 +1,4 @@ +#include "TurbSpectra.H" #include "common_functions.H" #include "compressible_functions.H" #include "compressible_functions_stag.H" @@ -8,7 +9,6 @@ #include "rng_functions.H" #include "StructFact.H" -#include "TurbSpectra.H" #if defined(TURB) #include "TurbForcingComp.H" From f67154937f457369a5ce9006f72a5ad9ef7e1b89 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Fri, 26 Jan 2024 14:19:42 -0800 Subject: [PATCH 030/151] spectral calculations for turbulence outupt --- .../SPECTRAL_FILTER/GNUmakefile | 65 ++ .../SPECTRAL_FILTER/Make.package | 5 + .../SPECTRAL_FILTER/build_perlmutter.sh | 30 + .../SPECTRAL_FILTER/main.cpp | 28 + .../SPECTRAL_FILTER/main_driver.cpp | 358 ++++++ .../SPECTRAL_FILTER/spectral_functions.H | 100 ++ .../SPECTRAL_FILTER/spectral_functions.cpp | 1005 +++++++++++++++++ exec/compressible_stag/build_perlmutter.sh | 30 + 8 files changed, 1621 insertions(+) create mode 100644 exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile create mode 100644 exec/compressible_stag/SPECTRAL_FILTER/Make.package create mode 100755 exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh create mode 100644 exec/compressible_stag/SPECTRAL_FILTER/main.cpp create mode 100644 exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp create mode 100644 exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H create mode 100644 exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp create mode 100755 exec/compressible_stag/build_perlmutter.sh diff --git a/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile b/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile new file mode 100644 index 000000000..141c3a621 --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile @@ -0,0 +1,65 @@ +AMREX_HOME ?= ../../../../amrex/ + +DEBUG = FALSE +USE_MPI = TRUE +USE_OMP = FALSE +USE_CUDA = FALSE +USE_HIP = FALSE +COMP = gnu +DIM = 3 +TINY_PROFILE = FALSE + +USE_HEFFTE_FFTW = FALSE +USE_HEFFTE_CUFFT = FALSE +USE_HEFFTE_ROCFFT = FALSE + +ifeq ($(USE_HEFFTE_FFTW),TRUE) + HEFFTE_HOME ?= ../../../../heffte/ +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + HEFFTE_HOME ?= ../../../../heffte/ +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + HEFFTE_HOME ?= ../../../../heffte/ +endif + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +VPATH_LOCATIONS += . +INCLUDE_LOCATIONS += . + +ifeq ($(USE_HEFFTE_FFTW),TRUE) + include $(HEFFTE_HOME)/src/Make.package +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + include $(HEFFTE_HOME)/src/Make.package +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + include $(HEFFTE_HOME)/src/Make.package +endif + +include ./Make.package + +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules + +ifeq ($(USE_CUDA),TRUE) + LIBRARIES += -lcufft +else ifeq ($(USE_HIP),TRUE) + # Use rocFFT. ROC_PATH is defined in amrex + INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include + LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib + LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft +else + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f +endif + +ifeq ($(DO_TURB), TRUE) + DEFINES += -DTURB +endif + +ifeq ($(USE_HEFFTE_FFTW),TRUE) + DEFINES += -DHEFFTE_FFTW + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + DEFINES += -DHEFFTE_CUFFT +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + DEFINES += -DHEFFTE_ROCFFT +endif diff --git a/exec/compressible_stag/SPECTRAL_FILTER/Make.package b/exec/compressible_stag/SPECTRAL_FILTER/Make.package new file mode 100644 index 000000000..e0391922a --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/Make.package @@ -0,0 +1,5 @@ +CEXE_sources += main.cpp +CEXE_sources += main_driver.cpp +CEXE_sources += spectral_functions.cpp + +CEXE_headers += spectral_functions.H diff --git a/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh b/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh new file mode 100755 index 000000000..f6becf08a --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh @@ -0,0 +1,30 @@ +#!/usr/bin/bash + +# required dependencies +module load gpu +module load PrgEnv-gnu +module load craype +module load craype-x86-milan +module load craype-accel-nvidia80 +module load cudatoolkit +module load cmake/3.24.3 + +# necessary to use CUDA-Aware MPI and run a job +export CRAY_ACCEL_TARGET=nvidia80 + +# optimize CUDA compilation for A100 +export AMREX_CUDA_ARCH=8.0 + +# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3) +# note: the cc/CC/ftn wrappers below add those +export CXXFLAGS="-march=znver3" +export CFLAGS="-march=znver3" + +# compiler environment hints +export CC=cc +export CXX=CC +export FC=ftn +export CUDACXX=$(which nvcc) +export CUDAHOSTCXX=CC + +make -j10 USE_CUDA=TRUE USE_HEFFTE_CUFFT=TRUE USE_ASSERTION=TRUE diff --git a/exec/compressible_stag/SPECTRAL_FILTER/main.cpp b/exec/compressible_stag/SPECTRAL_FILTER/main.cpp new file mode 100644 index 000000000..95c149e5a --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/main.cpp @@ -0,0 +1,28 @@ +#include +//#include + +// function declaration +void main_driver (const char* argv); + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + +// this specific part has been moved to Flagellum/main_driver.cpp +// { +// amrex::ParmParse pp("particles"); +//#ifdef AMREX_USE_GPU +// bool particles_do_tiling = true; +//#else +// bool particles_do_tiling = false; +//#endif +// pp.queryAdd("do_tiling", particles_do_tiling); +// } + + // argv[1] contains the name of the inputs file entered at the command line + main_driver(argv[1]); + + amrex::Finalize(); + + return 0; +} diff --git a/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp new file mode 100644 index 000000000..2c8c7fab4 --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp @@ -0,0 +1,358 @@ +#include "spectral_functions.H" +#include +#include +#include "AMReX_ParmParse.H" + +#include "chrono" + +using namespace std::chrono; +using namespace amrex; + +// argv contains the name of the inputs file entered at the command line +void main_driver(const char* argv) +{ + BL_PROFILE_VAR("main_driver()",main_driver); + + amrex::Vector nodal_flag_dir; + amrex::IntVect nodal_flag_x; + amrex::IntVect nodal_flag_y; + amrex::IntVect nodal_flag_z; + nodal_flag_dir.resize(3); + + for (int i=0; i<3; ++i) { + nodal_flag_x[i] = int(i==0); + nodal_flag_y[i] = int(i==1); + nodal_flag_z[i] = int(i==2); + AMREX_D_TERM(nodal_flag_dir[0][i] = nodal_flag_x[i];, + nodal_flag_dir[1][i] = nodal_flag_y[i];, + nodal_flag_dir[2][i] = nodal_flag_z[i];); + } + + // timer + Real ts1 = ParallelDescriptor::second(); + + std::string inputs_file = argv; + + ParmParse pp; + amrex::Vector temp_real(3,0.); + amrex::Vector temp_int (3,0 ); + + amrex::Vector max_grid_size(3,1 ); + amrex::Vector n_cells(3,0 ); + amrex::Vector prob_lo(3,0 ); + amrex::Vector prob_hi(3,0 ); + + if (pp.queryarr("n_cells",temp_int,0,3)) { + for (int i=0; i<3; ++i) { + n_cells[i] = temp_int[i]; + } + } + int npts = n_cells[0]*n_cells[1]*n_cells[2]; + if (pp.queryarr("prob_lo",temp_real,0,3)) { + for (int i=0; i<3; ++i) { + prob_lo[i] = temp_real[i]; + } + } + if (pp.queryarr("prob_hi",temp_real,0,3)) { + for (int i=0; i<3; ++i) { + prob_hi[i] = temp_real[i]; + } + } + pp.queryarr("max_grid_size",max_grid_size,0,3); + + int restart; + pp.query("restart",restart); + + int nprimvars; + pp.query("nprimvars",nprimvars); + + amrex::IntVect ngc; + for (int i=0; i<3; ++i) { + ngc[i] = 1; // number of ghost cells + } + if (pp.queryarr("ngc",temp_int,0,3)) { + for (int i=0; i<3; ++i) { + ngc[i] = temp_int[i]; + } + } + + amrex::Real kmin; + pp.query("kmin",kmin); + + amrex::Real kmax; + pp.query("kmax",kmax); + + std::array< MultiFab, 3 > vel; + MultiFab prim; + + // make BoxArray and Geometry + BoxArray ba; + Geometry geom; + DistributionMapping dmap; + + IntVect dom_lo(AMREX_D_DECL( 0, 0, 0)); + IntVect dom_hi(AMREX_D_DECL(n_cells[0]-1, n_cells[1]-1, n_cells[2]-1)); + Box domain(dom_lo, dom_hi); + + // This defines the physical box, [-1,1] in each direction. + RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])}, + {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])}); + + // This defines a Geometry object + Vector is_periodic(3,1); // force to be periodic -- can change later + geom.define(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + + const Real* dx = geom.CellSize(); + const RealBox& realDomain = geom.ProbDomain(); + + SpectralReadCheckPoint(geom, domain, prim, vel, ba, dmap, n_cells, nprimvars, max_grid_size, ngc, restart); + + MultiFab MFTurbScalar; + MultiFab MFTurbVel; + MultiFab vel_decomp_filter; + MultiFab scalar_filter; + MFTurbVel.define(ba, dmap, 3, 0); + MFTurbScalar.define(ba, dmap, 1, 0); + vel_decomp_filter.define(ba, dmap, 9, 0); + scalar_filter.define(ba, dmap, 1, 0); + vel_decomp_filter.setVal(0.0); + scalar_filter.setVal(0.0); + + // Set BC: 1) fill boundary 2) physical + for (int d=0; d<3; d++) { + vel[d].FillBoundary(geom.periodicity()); + } + prim.FillBoundary(geom.periodicity()); + + for(int d=0; d<3; d++) { + ShiftFaceToCC(vel[d], 0, MFTurbVel, d, 1); + } + MultiFab::Copy(MFTurbScalar, prim, 0, 0, 1, 0); + + SpectralVelDecomp(MFTurbVel, vel_decomp_filter, kmin, kmax, geom, n_cells); + SpectralScalarDecomp(MFTurbScalar, scalar_filter, kmin, kmax, geom, n_cells); + + SpectralWritePlotFile(restart, kmin, kmax, geom, vel_decomp_filter, scalar_filter); + + // Turbulence Diagnostics + Real u_rms, u_rms_s, u_rms_d, delta_u_rms; + Real taylor_len, taylor_Re_eta; + Real skew, skew_s, skew_d, kurt, kurt_s, kurt_d; + { + vel_decomp_filter.FillBoundary(geom.periodicity()); + + Vector dProb(3); + dProb[0] = 1.0/((n_cells[0]+1)*n_cells[1]*n_cells[2]); + dProb[1] = 1.0/((n_cells[1]+1)*n_cells[2]*n_cells[0]); + dProb[2] = 1.0/((n_cells[2]+1)*n_cells[0]*n_cells[1]); + + // Setup temp MultiFabs + std::array< MultiFab, AMREX_SPACEDIM > gradU; + std::array< MultiFab, AMREX_SPACEDIM > faceTemp; + MultiFab sound_speed; + MultiFab ccTemp; + MultiFab ccTempA; + AMREX_D_TERM(gradU[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 6, 0);, + gradU[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 6, 0);, + gradU[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 6, 0);); + AMREX_D_TERM(faceTemp[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 1, 0);, + faceTemp[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 1, 0);, + faceTemp[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 1, 0);); + sound_speed.define(prim.boxArray(),prim.DistributionMap(),1,0); + ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0); + ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0); + + // Setup temp variables + Vector gradU2_temp(3); + Vector gradU2(3); + Vector gradU3(3); + Vector gradU4(3); + Vector gradU2_s(3); + Vector gradU3_s(3); + Vector gradU4_s(3); + Vector gradU2_d(3); + Vector gradU3_d(3); + Vector gradU4_d(3); + + Vector comps {0,1,2}; + Vector comps_s{3,4,5}; + Vector comps_d{6,7,8}; + + // turbulent kinetic energy (total) + ccTemp.setVal(0.0); + MultiFab::AddProduct(ccTemp,vel_decomp_filter,0,vel_decomp_filter,0,0,1,0); //uu + MultiFab::AddProduct(ccTemp,vel_decomp_filter,1,vel_decomp_filter,1,0,1,0); //vv + MultiFab::AddProduct(ccTemp,vel_decomp_filter,2,vel_decomp_filter,2,0,1,0); //ww + u_rms = ccTemp.sum(0)/npts; + u_rms = sqrt(u_rms/3.0); + MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww) + + // turbulent kinetic energy (solenoidal) + ccTemp.setVal(0.0); + MultiFab::AddProduct(ccTemp,vel_decomp_filter,3,vel_decomp_filter,0,0,1,0); //uu + MultiFab::AddProduct(ccTemp,vel_decomp_filter,4,vel_decomp_filter,1,0,1,0); //vv + MultiFab::AddProduct(ccTemp,vel_decomp_filter,5,vel_decomp_filter,2,0,1,0); //ww + u_rms_s = ccTemp.sum(0)/npts; + u_rms_s = sqrt(u_rms_s/3.0); + MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww) + + // turbulent kinetic energy (dilatational) + ccTemp.setVal(0.0); + MultiFab::AddProduct(ccTemp,vel_decomp_filter,6,vel_decomp_filter,0,0,1,0); //uu + MultiFab::AddProduct(ccTemp,vel_decomp_filter,7,vel_decomp_filter,1,0,1,0); //vv + MultiFab::AddProduct(ccTemp,vel_decomp_filter,8,vel_decomp_filter,2,0,1,0); //ww + u_rms_d = ccTemp.sum(0)/npts; + u_rms_d = sqrt(u_rms_d/3.0); + MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww) + + // ratio of turbulent kinetic energies + delta_u_rms = u_rms_d/u_rms_s; + + // compute gradU = [du/dx dv/dy dw/dz] at cell-centers + ComputeGrad(vel_decomp_filter,gradU,0,0,9,-1,geom,0); + + // Compute Velocity gradient moment sum + // 2nd moment (total) + FCMoments(gradU,comps,faceTemp,2,gradU2_temp); + gradU2[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU2[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU2[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + ccTemp.setVal(0.0); + ccTempA.setVal(0.0); + ShiftFaceToCC(faceTemp[0],0,ccTempA,0,1); + MultiFab::Add(ccTemp,ccTempA,0,0,1,0); + ShiftFaceToCC(faceTemp[1],0,ccTempA,0,1); + MultiFab::Add(ccTemp,ccTempA,0,0,1,0); + ShiftFaceToCC(faceTemp[2],0,ccTempA,0,1); + MultiFab::Add(ccTemp,ccTempA,0,0,1,0); + Real avg_mom2 = ccTemp.sum(0)/npts; + // 2nd moment (solenoidal) + FCMoments(gradU,comps_s,faceTemp,2,gradU2_temp); + gradU2_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU2_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU2_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 2nd moment (dilatational) + FCMoments(gradU,comps_d,faceTemp,2,gradU2_temp); + gradU2_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU2_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU2_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + + // Taylor Mach + //ComputeSoundSpeed(sound_speed,prim,2); + //Real c_speed = sound_speed.sum(0)/npts; + Real rho_avg = prim.sum(0)/npts; + // Taylor Ma + //taylor_Ma = sqrt(3.0)*u_rms/c_speed; + // Taylor Microscale + taylor_len = sqrt(3.0)*u_rms/sqrt(avg_mom2); // from Wang et al., JFM, 2012 + taylor_Re_eta = rho_avg*taylor_len*u_rms; // from from John, Donzis, Sreenivasan, PRL 2019 + + // Compute Velocity gradient moment sum + // 3rd moment (total) + FCMoments(gradU,comps,faceTemp,3,gradU2_temp); + gradU3[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU3[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU3[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 3rd moment (solenoidal) + FCMoments(gradU,comps_s,faceTemp,3,gradU2_temp); + gradU3_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU3_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU3_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 3rd moment (dilatational) + FCMoments(gradU,comps_d,faceTemp,3,gradU2_temp); + gradU3_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU3_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU3_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + + // Compute Velocity gradient moment sum + // 4th moment (total) + FCMoments(gradU,comps,faceTemp,4,gradU2_temp); + gradU4[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU4[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU4[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 4th moment (solenoidal) + FCMoments(gradU,comps_s,faceTemp,4,gradU2_temp); + gradU4_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU4_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU4_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 4th moment (dilatational) + FCMoments(gradU,comps_d,faceTemp,4,gradU2_temp); + gradU4_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU4_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU4_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + + // Skewness + // <\sum_i (du_i/dx_i)^3> / (\sum_i <(du_i/dx_i)^2>^1.5) + skew = (gradU3[0] + gradU3[1] + gradU3[2])/ + (pow(gradU2[0],1.5) + pow(gradU2[1],1.5) + pow(gradU2[2],1.5)); + skew_s = (gradU3_s[0] + gradU3_s[1] + gradU3_s[2])/ + (pow(gradU2_s[0],1.5) + pow(gradU2_s[1],1.5) + pow(gradU2_s[2],1.5)); + skew_d = (gradU3_d[0] + gradU3_d[1] + gradU3_d[2])/ + (pow(gradU2_d[0],1.5) + pow(gradU2_d[1],1.5) + pow(gradU2_d[2],1.5)); + + // Kurtosis + // <\sum_i (du_i/dx_i)^4> / (\sum_i <(du_i/dx_i)^2>^2) + kurt = (gradU4[0] + gradU4[1] + gradU4[2])/ + (pow(gradU2[0],2.0) + pow(gradU2[1],2.0) + pow(gradU2[2],2.0)); + kurt_s = (gradU4_s[0] + gradU4_s[1] + gradU4_s[2])/ + (pow(gradU2_s[0],2.0) + pow(gradU2_s[1],2.0) + pow(gradU2_s[2],2.0)); + kurt_d = (gradU4_d[0] + gradU4_d[1] + gradU4_d[2])/ + (pow(gradU2_d[0],2.0) + pow(gradU2_d[1],2.0) + pow(gradU2_d[2],2.0)); + } + std::string turbfilename = "turbstats_"; + std::ostringstream os; + os << std::setprecision(3) << kmin; + turbfilename += os.str();; + std::ostringstream oss; + oss << std::setprecision(3) << kmax; + turbfilename += oss.str(); + + std::ofstream turboutfile; + turboutfile.open(turbfilename, std::ios::app); + turboutfile << "u_rms " << "u_rms_s " << "u_rms_d " << "delta_u_rms " + << "TaylorLen " << "TaylorRe*Eta " + << "skew " << "skew_s " << "skew_d " + << "kurt " << "kurt_s " << "kurt_d " + << std::endl; + turboutfile << u_rms << " "; + turboutfile << u_rms_s << " "; + turboutfile << u_rms_d << " "; + turboutfile << delta_u_rms << " "; + turboutfile << taylor_len << " "; + turboutfile << taylor_Re_eta << " "; + turboutfile << skew << " "; + turboutfile << skew_s << " "; + turboutfile << skew_d << " "; + turboutfile << kurt << " "; + turboutfile << kurt_s << " "; + turboutfile << kurt_d << " "; + turboutfile << std::endl; + + // timer + Real ts2 = ParallelDescriptor::second() - ts1; + ParallelDescriptor::ReduceRealMax(ts2, ParallelDescriptor::IOProcessorNumber()); + amrex::Print() << "Time (spectral filtering) " << ts2 << " seconds\n"; + + // MultiFab memory usage + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + + amrex::Long min_fab_megabytes = amrex::TotalBytesAllocatedInFabsHWM()/1048576; + amrex::Long max_fab_megabytes = min_fab_megabytes; + + ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc); + ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc); + + amrex::Print() << "High-water FAB megabyte spread across MPI nodes: [" + << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n"; + + min_fab_megabytes = amrex::TotalBytesAllocatedInFabs()/1048576; + max_fab_megabytes = min_fab_megabytes; + + ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc); + ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc); + + amrex::Print() << "Curent FAB megabyte spread across MPI nodes: [" + << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n"; + + turboutfile.close(); +} diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H new file mode 100644 index 000000000..5cd30f678 --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H @@ -0,0 +1,100 @@ +#ifndef _spectral_functions_stag_H_ +#define _spectral_functions_stag_H_ + +#include +#include +#include +#include + +#include +#include + + +#ifdef AMREX_USE_CUDA +#include +#elif AMREX_USE_HIP +# if __has_include() // ROCm 5.3+ +# include +# else +# include +# endif +#else +#include +#include +#endif + +#include + +#include + +#define ALIGN 16 + +using namespace amrex; + +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) +#ifdef AMREX_USE_CUDA +std::string cufftError (const cufftResult& err); +#endif +#ifdef AMREX_USE_HIP +std::string rocfftError (const rocfft_status err); +void Assert_rocfft_status (std::string const& name, rocfft_status status); +#endif +#endif + +void SpectralReadCheckPoint(amrex::Geometry& geom, + const amrex::Box& domain, + amrex::MultiFab& prim, + std::array& vel, + BoxArray& ba, DistributionMapping& dmap, + const amrex::Vector n_cells, + const int nprimvars, + const amrex::Vector max_grid_size, + const amrex::IntVect ngc, + const int restart); + +void SpectralVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp_filter, + const amrex::Real kmin, + const amrex::Real kmax, + const amrex::Geometry& geom, + const amrex::Vector n_cells); + +void SpectralScalarDecomp(const MultiFab& scalar, + MultiFab& scalar_filter, + const amrex::Real kmin, + const amrex::Real kmax, + const amrex::Geometry& geom, + const amrex::Vector n_cells); + +void SpectralWritePlotFile(const int step, + const amrex::Real& kmin, + const amrex::Real& kmax, + const amrex::Geometry& geom, + const amrex::MultiFab& vel_decomp_in, + const amrex::MultiFab& scalar_in); + +void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, + const std::string& checkpointname, + BoxArray& ba_old, DistributionMapping& dmap_old, + int NVARS, int NGC, const amrex::IntVect ngc, + int nodal_flag=-1); + +void ShiftFaceToCC(const MultiFab& face_in, int face_in_comp, + MultiFab& cc_in, int cc_in_comp, + int ncomp); + +void ComputeGrad(const MultiFab & phi_in, std::array & gphi, + int start_incomp, int start_outcomp, int ncomp, int bccomp, const Geometry & geom, + int increment); + +void SumStag(const std::array& m1, + amrex::Vector& sum); + +void FCMoments(const std::array& m1, + const amrex::Vector& comps, + std::array& mscr, + const int& power, + amrex::Vector& prod_val); + +#endif + diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp new file mode 100644 index 000000000..1569b4e4f --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp @@ -0,0 +1,1005 @@ +#include "spectral_functions.H" +#include "AMReX_PlotFileUtil.H" +#include "AMReX_PlotFileDataImpl.H" + +#include + +#include "chrono" +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +using namespace std::chrono; + +namespace { + void GotoNextLine (std::istream& is) + { + constexpr std::streamsize bl_ignore_max { 100000 }; + is.ignore(bl_ignore_max, '\n'); + } +} + +void SpectralReadCheckPoint(amrex::Geometry& geom, + const amrex::Box& domain, + amrex::MultiFab& prim, + std::array& vel, + BoxArray& ba, DistributionMapping& dmap, + const amrex::Vector n_cells, + const int nprimvars, + const amrex::Vector max_grid_size, + const amrex::IntVect ngc, + const int restart) +{ + // timer for profiling + BL_PROFILE_VAR("SpectralReadCheckPoint()",SpectralReadCheckPoint); + + // checkpoint file name, e.g., chk0000010 + const std::string& checkpointname = amrex::Concatenate("chk",restart,9); + + amrex::Print() << "Restart from checkpoint " << checkpointname << "\n"; + + VisMF::IO_Buffer io_buffer(VisMF::GetIOBufferSize()); + + std::string line, word; + + // read in old boxarray, and create old distribution map (this is to read in MFabs) + BoxArray ba_old; + DistributionMapping dmap_old; + + // initialize new boxarray + ba.define(domain); + ba.maxSize(IntVect(max_grid_size)); + dmap.define(ba, ParallelDescriptor::NProcs()); + + amrex::Vector nodal_flag_dir; + amrex::IntVect nodal_flag_x; + amrex::IntVect nodal_flag_y; + amrex::IntVect nodal_flag_z; + nodal_flag_dir.resize(3); + + for (int i=0; i<3; ++i) { + nodal_flag_x[i] = int(i==0); + nodal_flag_y[i] = int(i==1); + nodal_flag_z[i] = int(i==2); + AMREX_D_TERM(nodal_flag_dir[0][i] = nodal_flag_x[i];, + nodal_flag_dir[1][i] = nodal_flag_y[i];, + nodal_flag_dir[2][i] = nodal_flag_z[i];); + } + + // Header + { + std::string File(checkpointname + "/Header"); + Vector fileCharPtr; + ParallelDescriptor::ReadAndBcastFile(File, fileCharPtr); + std::string fileCharPtrString(fileCharPtr.dataPtr()); + std::istringstream is(fileCharPtrString, std::istringstream::in); + + // read in title line + std::getline(is, line); + + // read in time step number + int step; + is >> step; + GotoNextLine(is); + + // read in time + Real time; + is >> time; + GotoNextLine(is); + + // read in statsCount + int statsCount; + is >> statsCount; + GotoNextLine(is); + + // read in BoxArray (fluid) from Header + ba_old.readFrom(is); + GotoNextLine(is); + + // create old distribution mapping + dmap_old.define(ba_old, ParallelDescriptor::NProcs()); + + prim.define(ba,dmap,nprimvars,ngc); + // velocity and momentum (instantaneous, means, variances) + for (int d=0; d n_cells) +{ + BL_PROFILE_VAR("SpectralVelDecomp()",SpectralVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "SpectralVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, + "SpectralVelDecomp: Must have one Box per MPI process when using heFFTe"); + + const GpuArray dx = geom.CellSizeArray(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = vel.DistributionMap(); + BoxArray ba = vel.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field_Tx(c_local_box, 1, The_Device_Arena()); // totalx + BaseFab > spectral_field_Ty(c_local_box, 1, The_Device_Arena()); // totaly + BaseFab > spectral_field_Tz(c_local_box, 1, The_Device_Arena()); // totalz + BaseFab > spectral_field_Sx(c_local_box, 1, The_Device_Arena()); // solenoidalx + BaseFab > spectral_field_Sy(c_local_box, 1, The_Device_Arena()); // solenoidaly + BaseFab > spectral_field_Sz(c_local_box, 1, The_Device_Arena()); // solenoidalz + BaseFab > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx + BaseFab > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly + BaseFab > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz + MultiFab vel_single(ba, dm, 1, 0); + + int r2c_direction = 0; + + // ForwardTransform + // X + using heffte_complex = typename heffte::fft_output::type; + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 0, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + // Y + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 1, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + // Z + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 2, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + + Gpu::streamSynchronize(); + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + // Decompose velocity field into solenoidal and dilatational + Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(); + Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(); + Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(); + Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(); + Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(); + Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(); + Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(); + Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(); + Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; + + if (i <= nx/2) { + // Gradient Operators + GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Get the wavenumber + int ki = i; + int kj = j; + int kk = k; + Real knum = (ki*ki + kj*kj + kk*kk); + knum = std::sqrt(knum); + + // Scale Total velocity FFT components with Filtering + if ((knum >= kmin) and (knum <= kmax)) { + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + } + else { + spectral_tx(i,j,k) *= 0.0; + spectral_ty(i,j,k) *= 0.0; + spectral_tz(i,j,k) *= 0.0; + } + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + + }); + + Gpu::streamSynchronize(); + + MultiFab vel_decomp_filter_single(ba, dm, 1, 0); + // inverse Fourier transform filtered total velocity + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 0, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 1, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 2, 1); + } + // inverse Fourier transform filtered solenoidal and dilatational components + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 3, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 4, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 5, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 6, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 7, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 8, 1); + } + + + vel_decomp_filter.mult(1.0/sqrtnpts); + +} + + +void SpectralScalarDecomp(const MultiFab& scalar, + MultiFab& scalar_filter, + const amrex::Real kmin, + const amrex::Real kmax, + const amrex::Geometry& geom, + const amrex::Vector n_cells) +{ + BL_PROFILE_VAR("SpectralScalarDecomp()",SpectralScalarDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(scalar.nComp() == 1, + "SpectralScalarDecomp: must have 1 components of input scalar MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(scalar.local_size() == 1, + "SpectralScalarDecomp: Must have one Box per MPI process when using heFFTe"); + + const GpuArray dx = geom.CellSizeArray(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = scalar.DistributionMap(); + BoxArray ba = scalar.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field(c_local_box, 1, The_Device_Arena()); + MultiFab scalar_single(ba, dm, 1, 0); + + int r2c_direction = 0; + + // ForwardTransform + using heffte_complex = typename heffte::fft_output::type; + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + scalar_single.ParallelCopy(scalar, 0, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + fft.forward(scalar_single[local_boxid].dataPtr(),spectral_data); + } + + Gpu::streamSynchronize(); + + // filtering + Array4< GpuComplex > spectral = spectral_field.array(); + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + + if (i <= nx/2) { + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Get the wavenumber + int ki = i; + int kj = j; + int kk = k; + Real knum = (ki*ki + kj*kj + kk*kk); + knum = std::sqrt(knum); + + // Scale Scalar FFT components with Filtering + if ((knum >= kmin) and (knum <= kmax)) { + spectral(i,j,k) *= (1.0/sqrtnpts); + spectral(i,j,k) *= (1.0/sqrtnpts); + spectral(i,j,k) *= (1.0/sqrtnpts); + } + else { + spectral(i,j,k) *= 0.0; + spectral(i,j,k) *= 0.0; + spectral(i,j,k) *= 0.0; + } + }); + + Gpu::streamSynchronize(); + + MultiFab scalar_filter_single(ba, dm, 1, 0); + // inverse Fourier transform filtered scalar + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + fft.backward(spectral_data, scalar_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + scalar_filter.ParallelCopy(scalar_filter_single, 0, 0, 1); + } + + scalar_filter.mult(1.0/sqrtnpts); + +} + +void SpectralWritePlotFile(const int step, + const amrex::Real& kmin, + const amrex::Real& kmax, + const amrex::Geometry& geom, + const amrex::MultiFab& vel_decomp_in, + const amrex::MultiFab& scalar_in) +{ + + MultiFab output; + + // Cell-Centered Velocity Gradient Stats (1,2,3 are directions) + // 0: ux + // 1: uy + // 2: uz + // 3: ux_s + // 4: uy_s + // 5: uz_s + // 6: ux_d + // 7: uy_d + // 8: uz_d + // 9: umag + // 10: umag_s + // 11: umag_d + // 12: scalar + // 13: divergence = u_1,1 + u_2,2 + u_3,3 + // 14: vorticity w1 + // 15: vorticity w2 + // 16: vorticity w3 + // 17: vorticity mag: sqrt(w1**2 + w2**2 + w3**2) + output.define(vel_decomp_in.boxArray(), vel_decomp_in.DistributionMap(), 18, 0); + output.setVal(0.0); + + const GpuArray dx = geom.CellSizeArray(); + + for ( MFIter mfi(output,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4< Real>& out = output.array(mfi); + + const Array4& v_decomp = vel_decomp_in.array(mfi); + + const Array4& sca = scalar_in.array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + + out(i,j,k,0) = v_decomp(i,j,k,0); + out(i,j,k,1) = v_decomp(i,j,k,1); + out(i,j,k,2) = v_decomp(i,j,k,2); + out(i,j,k,3) = v_decomp(i,j,k,3); + out(i,j,k,4) = v_decomp(i,j,k,4); + out(i,j,k,5) = v_decomp(i,j,k,5); + out(i,j,k,6) = v_decomp(i,j,k,6); + out(i,j,k,7) = v_decomp(i,j,k,7); + out(i,j,k,8) = v_decomp(i,j,k,8); + + out(i,j,k,9) = std::sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag + out(i,j,k,10) = std::sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag solednoidal + out(i,j,k,11) = std::sqrt(out(i,j,k,6)*out(i,j,k,6) + out(i,j,k,7)*out(i,j,k,7) + out(i,j,k,8)*out(i,j,k,8)); // mag solednoidal + + out(i,j,k,12) = sca(i,j,k,0); + + // divergence + out(i,j,k,13) = 0.5*( (v_decomp(i+1,j,k,0) - v_decomp(i-1,j,k,0))/dx[0] + + (v_decomp(i,j+1,k,1) - v_decomp(i,j-1,k,1))/dx[1] + + (v_decomp(i,j,k+1,2) - v_decomp(i,j,k-1,2))/dx[2] ); + + // curl w1 = u_2,1 - u_1,2 + out(i,j,k,14) = 0.5*( (v_decomp(i+1,j,k,1) - v_decomp(i-1,j,k,1))/dx[0] - + (v_decomp(i,j+1,k,0) - v_decomp(i,j-1,k,0))/dx[1] ); + + // curl w2 = u_1,3 - u_3,1 + out(i,j,k,15) = 0.5*( (v_decomp(i,j,k+1,0) - v_decomp(i,j,k-1,0))/dx[2] - + (v_decomp(i+1,j,k,2) - v_decomp(i-1,j,k,2))/dx[0] ); + + // curl w2 = u_3,2 - u_2,3 + out(i,j,k,16) = 0.5*( (v_decomp(i,j+1,k,2) - v_decomp(i,j-1,k,2))/dx[1] - + (v_decomp(i,j,k+1,1) - v_decomp(i,j,k-1,1))/dx[2] ); + + // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3) + out(i,j,k,17) = std::sqrt( out(i,j,k,14)*out(i,j,k,14) + out(i,j,k,15)*out(i,j,k,15) + out(i,j,k,16)*out(i,j,k,16) ); + }); + } + + // Write on a plotfile + std::string plotfilename = amrex::Concatenate("filtered_",step,9); + std::ostringstream os; + os << std::setprecision(3) << kmin; + plotfilename += os.str();; + std::ostringstream oss; + oss << std::setprecision(3) << kmax; + plotfilename += oss.str(); + + amrex::Vector varNames(18); + varNames[0] = "ux"; + varNames[1] = "uy"; + varNames[2] = "uz"; + varNames[3] = "ux_s"; + varNames[4] = "uy_s"; + varNames[5] = "uz_s"; + varNames[6] = "ux_d"; + varNames[7] = "uy_d"; + varNames[8] = "uz_d"; + varNames[9] = "umag"; + varNames[10] = "umag_s"; + varNames[11] = "umag_d"; + varNames[12] = "rho"; + varNames[13] = "div"; + varNames[14] = "w1"; + varNames[15] = "w2"; + varNames[16] = "w3"; + varNames[17] = "vort"; + WriteSingleLevelPlotfile(plotfilename,output,varNames,geom,0.0,step); +} + +void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, const std::string& checkpointname, + BoxArray& ba_old, DistributionMapping& dmap_old, + int NVARS, int ghost, const amrex::IntVect ngc, + int nodal_flag) +{ + // Read into temporary MF from file + MultiFab mf_temp; + VisMF::Read(mf_temp,amrex::MultiFabFileFullPrefix(0, checkpointname, "Level_", mf_name)); + + // Copy temporary MF into the new MF + if (ghost) { + mf.ParallelCopy(mf_temp, 0, 0, NVARS, ngc, ngc); + } + else { + mf.ParallelCopy(mf_temp, 0, 0, NVARS, 0, 0); + } +} + +void ShiftFaceToCC(const MultiFab& face_in, int face_comp, + MultiFab& cc_in, int cc_comp, int ncomp) +{ + + BL_PROFILE_VAR("ShiftFaceToCC()",ShiftFaceToCC); + + if (!face_in.is_nodal(0) && !face_in.is_nodal(1) && !face_in.is_nodal(2)) { + Abort("ShiftFaceToCC requires a face-centered MultiFab"); + } + + // Loop over boxes (note that mfi takes a cell-centered multifab as an argument) + for (MFIter mfi(cc_in,TilingIfNotGPU()); mfi.isValid(); ++mfi) { + + const Box& bx = mfi.tilebox(); + + Array4 const& face = face_in.array(mfi); + + Array4 const& cc = cc_in.array(mfi); + + amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + cc(i,j,k,cc_comp+n) = face(i,j,k,face_comp+n); + }); + } +} + +void ComputeGrad(const MultiFab & phi_in, std::array & gphi, + int start_incomp, int start_outcomp, int ncomp, int bccomp, const Geometry & geom, + int increment) +{ + BL_PROFILE_VAR("ComputeGrad()",ComputeGrad); + + // Physical Domain + Box dom(geom.Domain()); + + const GpuArray dx = geom.CellSizeArray(); + + // if not incrementing, initialize data to zero + if (increment == 0) { + for (int dir=0; dir & phi = phi_in.array(mfi); + + AMREX_D_TERM(const Array4 & gphix = gphi[0].array(mfi);, + const Array4 & gphiy = gphi[1].array(mfi);, + const Array4 & gphiz = gphi[2].array(mfi);); + + AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);, + const Box & bx_y = mfi.nodaltilebox(1);, + const Box & bx_z = mfi.nodaltilebox(2);); + + amrex::ParallelFor(bx_x, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + gphix(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i-1,j,k,start_incomp+n)) / dx[0]; + }, + bx_y, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + gphiy(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i,j-1,k,start_incomp+n)) / dx[1]; + } + , bx_z, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + gphiz(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i,j,k-1,start_incomp+n)) / dx[2]; + } + ); + + } // end MFIter +} + +void FCMoments(const std::array& m1, + const amrex::Vector& comps, + std::array& mscr, + const int& power, + amrex::Vector& prod_val) +{ + + BL_PROFILE_VAR("FCMoments()",FCMoments); + + for (int d=0; d<3; ++d) { + MultiFab::Copy(mscr[d],m1[d],comps[d],0,1,0); + for(int i=1; i& m1, + amrex::Vector& sum) +{ + BL_PROFILE_VAR("SumStag()",SumStag); + + // Initialize to zero + std::fill(sum.begin(), sum.end(), 0.); + + ReduceOps reduce_op; + + //////// x-faces + + ReduceData reduce_datax(reduce_op); + using ReduceTuple = typename decltype(reduce_datax)::Type; + + for (MFIter mfi(m1[0],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const Box& bx_grid = mfi.validbox(); + + auto const& fab = m1[0].array(mfi); + + int xlo = bx_grid.smallEnd(0); + int xhi = bx_grid.bigEnd(0); + + reduce_op.eval(bx, reduce_datax, + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple + { + Real weight = (i>xlo && i(reduce_datax.value()); + ParallelDescriptor::ReduceRealSum(sum[0]); + + //////// y-faces + + ReduceData reduce_datay(reduce_op); + + for (MFIter mfi(m1[1],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const Box& bx_grid = mfi.validbox(); + + auto const& fab = m1[1].array(mfi); + + int ylo = bx_grid.smallEnd(1); + int yhi = bx_grid.bigEnd(1); + + reduce_op.eval(bx, reduce_datay, + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple + { + Real weight = (j>ylo && j(reduce_datay.value()); + ParallelDescriptor::ReduceRealSum(sum[1]); + + //////// z-faces + + ReduceData reduce_dataz(reduce_op); + + for (MFIter mfi(m1[2],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const Box& bx_grid = mfi.validbox(); + + auto const& fab = m1[2].array(mfi); + + int zlo = bx_grid.smallEnd(2); + int zhi = bx_grid.bigEnd(2); + + reduce_op.eval(bx, reduce_dataz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple + { + Real weight = (k>zlo && k(reduce_dataz.value()); + ParallelDescriptor::ReduceRealSum(sum[2]); +} + diff --git a/exec/compressible_stag/build_perlmutter.sh b/exec/compressible_stag/build_perlmutter.sh new file mode 100755 index 000000000..2118c7059 --- /dev/null +++ b/exec/compressible_stag/build_perlmutter.sh @@ -0,0 +1,30 @@ +#!/usr/bin/bash + +# required dependencies +module load gpu +module load PrgEnv-gnu +module load craype +module load craype-x86-milan +module load craype-accel-nvidia80 +module load cudatoolkit +module load cmake/3.24.3 + +# necessary to use CUDA-Aware MPI and run a job +export CRAY_ACCEL_TARGET=nvidia80 + +# optimize CUDA compilation for A100 +export AMREX_CUDA_ARCH=8.0 + +# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3) +# note: the cc/CC/ftn wrappers below add those +export CXXFLAGS="-march=znver3" +export CFLAGS="-march=znver3" + +# compiler environment hints +export CC=cc +export CXX=CC +export FC=ftn +export CUDACXX=$(which nvcc) +export CUDAHOSTCXX=CC + +make -j10 USE_CUDA=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_CUFFT=TRUE USE_ASSERTION=TRUE From b819acf9cdf2208ab1240824395c3fb546c684e6 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Wed, 31 Jan 2024 09:11:26 -0800 Subject: [PATCH 031/151] fix spectra calculations --- src_analysis/TurbSpectra.cpp | 48 +++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 3060a4d4e..532b85954 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -512,13 +512,21 @@ void TurbSpectrumVelDecompHeffte(const MultiFab& vel, Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + // Gradient Operators - GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; + GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; } else { // conjugate amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); @@ -1210,6 +1218,14 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + // Gradient Operators GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; @@ -1353,6 +1369,9 @@ void IntegrateKScalarHeffte(const MultiFab& cov_mag, // } int comp_gpu = comp; + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& bx = mfi.tilebox(); @@ -1363,7 +1382,9 @@ void IntegrateKScalarHeffte(const MultiFab& cov_mag, { int ki = i; int kj = j; + if (j >= ny/2) kj = ny - j; int kk = k; + if (k >= nz/2) kk = nz - k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1433,6 +1454,9 @@ void IntegrateKScalar(const Vector > > phicnt_ptr[d] = 0; }); + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& bx = mfi.fabbox(); @@ -1444,7 +1468,9 @@ void IntegrateKScalar(const Vector > > if (i <= bx.length(0)/2) { // only half of kx-domain int ki = i; int kj = j; + if (j >= ny/2) kj = ny - j; int kk = k; + if (k >= nz/2) kk = nz - k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1516,6 +1542,9 @@ void IntegrateKVelocityHeffte(const MultiFab& cov_mag, }); int comp_gpu = comp; + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& bx = mfi.tilebox(); @@ -1526,7 +1555,9 @@ void IntegrateKVelocityHeffte(const MultiFab& cov_mag, { int ki = i; int kj = j; + if (j >= ny/2) kj = ny - j; int kk = k; + if (k >= nz/2) kk = nz - k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); @@ -1595,6 +1626,9 @@ void IntegrateKVelocity(const Vector > phicnt_ptr[d] = 0; }); + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& bx = mfi.fabbox(); @@ -1608,7 +1642,9 @@ void IntegrateKVelocity(const Vector > if (i <= bx.length(0)/2) { // only half of kx-domain int ki = i; int kj = j; + if (j >= ny/2) kj = ny - j; int kk = k; + if (k >= nz/2) kk = nz - k; Real dist = (ki*ki + kj*kj + kk*kk); dist = std::sqrt(dist); From bb8695531a51f081dbe0269e52548d6d7bacf7fd Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Wed, 31 Jan 2024 09:11:47 -0800 Subject: [PATCH 032/151] fixed spectral filter calculations --- .../SPECTRAL_FILTER/main_driver.cpp | 117 ++++++++------ .../SPECTRAL_FILTER/spectral_functions.H | 10 +- .../SPECTRAL_FILTER/spectral_functions.cpp | 148 +++++++++++------- 3 files changed, 170 insertions(+), 105 deletions(-) diff --git a/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp index 2c8c7fab4..e9ee078b2 100644 --- a/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp +++ b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp @@ -109,14 +109,14 @@ void main_driver(const char* argv) MultiFab MFTurbScalar; MultiFab MFTurbVel; - MultiFab vel_decomp_filter; - MultiFab scalar_filter; + MultiFab vel_decomp_filter_heffte; + MultiFab scalar_filter_heffte; MFTurbVel.define(ba, dmap, 3, 0); MFTurbScalar.define(ba, dmap, 1, 0); - vel_decomp_filter.define(ba, dmap, 9, 0); - scalar_filter.define(ba, dmap, 1, 0); - vel_decomp_filter.setVal(0.0); - scalar_filter.setVal(0.0); + vel_decomp_filter_heffte.define(ba, dmap, 9, 0); + scalar_filter_heffte.define(ba, dmap, 1, 0); + vel_decomp_filter_heffte.setVal(0.0); + scalar_filter_heffte.setVal(0.0); // Set BC: 1) fill boundary 2) physical for (int d=0; d<3; d++) { @@ -129,32 +129,40 @@ void main_driver(const char* argv) } MultiFab::Copy(MFTurbScalar, prim, 0, 0, 1, 0); - SpectralVelDecomp(MFTurbVel, vel_decomp_filter, kmin, kmax, geom, n_cells); - SpectralScalarDecomp(MFTurbScalar, scalar_filter, kmin, kmax, geom, n_cells); + SpectralVelDecomp(MFTurbVel, vel_decomp_filter_heffte, kmin, kmax, geom, n_cells); + SpectralScalarDecomp(MFTurbScalar, scalar_filter_heffte, kmin, kmax, geom, n_cells); + + MultiFab vel_decomp_filter; + MultiFab scalar_filter; + vel_decomp_filter.define(ba, dmap, 9, 2); + scalar_filter.define(ba, dmap, 1, 2); + MultiFab::Copy(vel_decomp_filter,vel_decomp_filter_heffte,0,0,9,0); + MultiFab::Copy(scalar_filter,scalar_filter_heffte,0,0,1,0); + vel_decomp_filter.FillBoundary(geom.periodicity()); + scalar_filter.FillBoundary(geom.periodicity()); - SpectralWritePlotFile(restart, kmin, kmax, geom, vel_decomp_filter, scalar_filter); + SpectralWritePlotFile(restart, kmin, kmax, geom, vel_decomp_filter, scalar_filter, MFTurbVel, MFTurbScalar); // Turbulence Diagnostics Real u_rms, u_rms_s, u_rms_d, delta_u_rms; Real taylor_len, taylor_Re_eta; Real skew, skew_s, skew_d, kurt, kurt_s, kurt_d; + Vector var(9, 0.0); { - vel_decomp_filter.FillBoundary(geom.periodicity()); - Vector dProb(3); dProb[0] = 1.0/((n_cells[0]+1)*n_cells[1]*n_cells[2]); dProb[1] = 1.0/((n_cells[1]+1)*n_cells[2]*n_cells[0]); dProb[2] = 1.0/((n_cells[2]+1)*n_cells[0]*n_cells[1]); // Setup temp MultiFabs - std::array< MultiFab, AMREX_SPACEDIM > gradU; - std::array< MultiFab, AMREX_SPACEDIM > faceTemp; + std::array< MultiFab, 3 > gradU; + std::array< MultiFab, 3 > faceTemp; MultiFab sound_speed; MultiFab ccTemp; MultiFab ccTempA; - AMREX_D_TERM(gradU[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 6, 0);, - gradU[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 6, 0);, - gradU[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 6, 0);); + AMREX_D_TERM(gradU[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 9, 0);, + gradU[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 9, 0);, + gradU[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 9, 0);); AMREX_D_TERM(faceTemp[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 1, 0);, faceTemp[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 1, 0);, faceTemp[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 1, 0);); @@ -189,18 +197,18 @@ void main_driver(const char* argv) // turbulent kinetic energy (solenoidal) ccTemp.setVal(0.0); - MultiFab::AddProduct(ccTemp,vel_decomp_filter,3,vel_decomp_filter,0,0,1,0); //uu - MultiFab::AddProduct(ccTemp,vel_decomp_filter,4,vel_decomp_filter,1,0,1,0); //vv - MultiFab::AddProduct(ccTemp,vel_decomp_filter,5,vel_decomp_filter,2,0,1,0); //ww + MultiFab::AddProduct(ccTemp,vel_decomp_filter,3,vel_decomp_filter,3,0,1,0); //uu + MultiFab::AddProduct(ccTemp,vel_decomp_filter,4,vel_decomp_filter,4,0,1,0); //vv + MultiFab::AddProduct(ccTemp,vel_decomp_filter,5,vel_decomp_filter,5,0,1,0); //ww u_rms_s = ccTemp.sum(0)/npts; u_rms_s = sqrt(u_rms_s/3.0); MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww) // turbulent kinetic energy (dilatational) ccTemp.setVal(0.0); - MultiFab::AddProduct(ccTemp,vel_decomp_filter,6,vel_decomp_filter,0,0,1,0); //uu - MultiFab::AddProduct(ccTemp,vel_decomp_filter,7,vel_decomp_filter,1,0,1,0); //vv - MultiFab::AddProduct(ccTemp,vel_decomp_filter,8,vel_decomp_filter,2,0,1,0); //ww + MultiFab::AddProduct(ccTemp,vel_decomp_filter,6,vel_decomp_filter,6,0,1,0); //uu + MultiFab::AddProduct(ccTemp,vel_decomp_filter,7,vel_decomp_filter,7,0,1,0); //vv + MultiFab::AddProduct(ccTemp,vel_decomp_filter,8,vel_decomp_filter,8,0,1,0); //ww u_rms_d = ccTemp.sum(0)/npts; u_rms_d = sqrt(u_rms_d/3.0); MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww) @@ -298,36 +306,57 @@ void main_driver(const char* argv) (pow(gradU2_s[0],2.0) + pow(gradU2_s[1],2.0) + pow(gradU2_s[2],2.0)); kurt_d = (gradU4_d[0] + gradU4_d[1] + gradU4_d[2])/ (pow(gradU2_d[0],2.0) + pow(gradU2_d[1],2.0) + pow(gradU2_d[2],2.0)); + + // velocity variances + for (int i=0;i<9;++i) { + ccTemp.setVal(0.0); + MultiFab::AddProduct(ccTemp,vel_decomp_filter,i,vel_decomp_filter,i,0,1,0); + Real mean = vel_decomp_filter.sum(i)/npts; + Real mean2 = ccTemp.sum(0)/npts; + var[i] = mean2 - mean*mean; + } + } std::string turbfilename = "turbstats_"; std::ostringstream os; os << std::setprecision(3) << kmin; - turbfilename += os.str();; + turbfilename += os.str(); + turbfilename += "_"; std::ostringstream oss; oss << std::setprecision(3) << kmax; turbfilename += oss.str(); std::ofstream turboutfile; - turboutfile.open(turbfilename, std::ios::app); - turboutfile << "u_rms " << "u_rms_s " << "u_rms_d " << "delta_u_rms " - << "TaylorLen " << "TaylorRe*Eta " - << "skew " << "skew_s " << "skew_d " - << "kurt " << "kurt_s " << "kurt_d " - << std::endl; - turboutfile << u_rms << " "; - turboutfile << u_rms_s << " "; - turboutfile << u_rms_d << " "; - turboutfile << delta_u_rms << " "; - turboutfile << taylor_len << " "; - turboutfile << taylor_Re_eta << " "; - turboutfile << skew << " "; - turboutfile << skew_s << " "; - turboutfile << skew_d << " "; - turboutfile << kurt << " "; - turboutfile << kurt_s << " "; - turboutfile << kurt_d << " "; - turboutfile << std::endl; - + if (ParallelDescriptor::IOProcessor()) { + turboutfile.open(turbfilename, std::ios::app); + } + if (ParallelDescriptor::IOProcessor()) { + turboutfile << "u_rms " << "u_rms_s " << "u_rms_d " << "delta_u_rms " + << "TaylorLen " << "TaylorRe*Eta " + << "skew " << "skew_s " << "skew_d " + << "kurt " << "kurt_s " << "kurt_d " + << "var ux " << "var uy " << "var uz " + << "var uxs " << "var uys " << "var uzs " + << "var uxd " << "var uyd " << "var uzd " + << std::endl; + + turboutfile << u_rms << " "; + turboutfile << u_rms_s << " "; + turboutfile << u_rms_d << " "; + turboutfile << delta_u_rms << " "; + turboutfile << taylor_len << " "; + turboutfile << taylor_Re_eta << " "; + turboutfile << skew << " "; + turboutfile << skew_s << " "; + turboutfile << skew_d << " "; + turboutfile << kurt << " "; + turboutfile << kurt_s << " "; + turboutfile << kurt_d << " "; + for (int i=0;i<9;++i) { + turboutfile << var[i] << " "; + } + turboutfile << std::endl; + } // timer Real ts2 = ParallelDescriptor::second() - ts1; ParallelDescriptor::ReduceRealMax(ts2, ParallelDescriptor::IOProcessorNumber()); @@ -354,5 +383,5 @@ void main_driver(const char* argv) amrex::Print() << "Curent FAB megabyte spread across MPI nodes: [" << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n"; - turboutfile.close(); + if (ParallelDescriptor::IOProcessor()) turboutfile.close(); } diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H index 5cd30f678..12a72200f 100644 --- a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H +++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H @@ -44,7 +44,7 @@ void Assert_rocfft_status (std::string const& name, rocfft_status status); void SpectralReadCheckPoint(amrex::Geometry& geom, const amrex::Box& domain, amrex::MultiFab& prim, - std::array& vel, + std::array& vel, BoxArray& ba, DistributionMapping& dmap, const amrex::Vector n_cells, const int nprimvars, @@ -71,7 +71,9 @@ void SpectralWritePlotFile(const int step, const amrex::Real& kmax, const amrex::Geometry& geom, const amrex::MultiFab& vel_decomp_in, - const amrex::MultiFab& scalar_in); + const amrex::MultiFab& scalar_in, + const amrex::MultiFab& vel_total, + const amrex::MultiFab& scalar_total); void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, const std::string& checkpointname, @@ -83,11 +85,11 @@ void ShiftFaceToCC(const MultiFab& face_in, int face_in_comp, MultiFab& cc_in, int cc_in_comp, int ncomp); -void ComputeGrad(const MultiFab & phi_in, std::array & gphi, +void ComputeGrad(const MultiFab & phi_in, std::array & gphi, int start_incomp, int start_outcomp, int ncomp, int bccomp, const Geometry & geom, int increment); -void SumStag(const std::array& m1, +void SumStag(const std::array& m1, amrex::Vector& sum); void FCMoments(const std::array& m1, diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp index 1569b4e4f..aa144bef6 100644 --- a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp +++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp @@ -22,7 +22,7 @@ namespace { void SpectralReadCheckPoint(amrex::Geometry& geom, const amrex::Box& domain, amrex::MultiFab& prim, - std::array& vel, + std::array& vel, BoxArray& ba, DistributionMapping& dmap, const amrex::Vector n_cells, const int nprimvars, @@ -101,7 +101,7 @@ void SpectralReadCheckPoint(amrex::Geometry& geom, prim.define(ba,dmap,nprimvars,ngc); // velocity and momentum (instantaneous, means, variances) - for (int d=0; d dx = geom.CellSizeArray(); + const GpuArray dx = geom.CellSizeArray(); long npts; Box domain = geom.Domain(); @@ -280,13 +280,21 @@ void SpectralVelDecomp(const MultiFab& vel, Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + // Gradient Operators - GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; + GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; } else { // conjugate amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); @@ -295,58 +303,67 @@ void SpectralVelDecomp(const MultiFab& vel, // Get the wavenumber int ki = i; int kj = j; + if (j >= ny/2) kj = ny - j; int kk = k; + if (k >= nz/2) kk = nz - k; Real knum = (ki*ki + kj*kj + kk*kk); knum = std::sqrt(knum); // Scale Total velocity FFT components with Filtering if ((knum >= kmin) and (knum <= kmax)) { + spectral_tx(i,j,k) *= (1.0/sqrtnpts); spectral_ty(i,j,k) *= (1.0/sqrtnpts); spectral_tz(i,j,k) *= (1.0/sqrtnpts); - } - else { - spectral_tx(i,j,k) *= 0.0; - spectral_ty(i,j,k) *= 0.0; - spectral_tz(i,j,k) *= 0.0; - } - // Inverse Laplacian - Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; - - // Divergence of vel - Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + - spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + - spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; - Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + - spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + - spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; - - if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k) *= 0.0; - spectral_dy(i,j,k) *= 0.0; - spectral_dz(i,j,k) *= 0.0; + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); } else { - - // Dilatational velocity - GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, - (divC*GxR - divR*GxC) / Lap); - spectral_dx(i,j,k) = copy_dx; - - GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, - (divC*GyR - divR*GyC) / Lap); - spectral_dy(i,j,k) = copy_dy; - - GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, - (divC*GzR - divR*GzC) / Lap); - spectral_dz(i,j,k) = copy_dz; + spectral_tx(i,j,k) = 0.0; + spectral_ty(i,j,k) = 0.0; + spectral_tz(i,j,k) = 0.0; + spectral_sx(i,j,k) = 0.0; + spectral_sy(i,j,k) = 0.0; + spectral_sz(i,j,k) = 0.0; + spectral_dx(i,j,k) = 0.0; + spectral_dy(i,j,k) = 0.0; + spectral_dz(i,j,k) = 0.0; } - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); }); @@ -555,7 +572,7 @@ void SpectralScalarDecomp(const MultiFab& scalar, AMREX_ALWAYS_ASSERT_WITH_MESSAGE(scalar.local_size() == 1, "SpectralScalarDecomp: Must have one Box per MPI process when using heFFTe"); - const GpuArray dx = geom.CellSizeArray(); + const GpuArray dx = geom.CellSizeArray(); long npts; Box domain = geom.Domain(); @@ -695,7 +712,9 @@ void SpectralWritePlotFile(const int step, const amrex::Real& kmax, const amrex::Geometry& geom, const amrex::MultiFab& vel_decomp_in, - const amrex::MultiFab& scalar_in) + const amrex::MultiFab& scalar_in, + const amrex::MultiFab& vel_total, + const amrex::MultiFab& scalar_total) { MultiFab output; @@ -719,10 +738,12 @@ void SpectralWritePlotFile(const int step, // 15: vorticity w2 // 16: vorticity w3 // 17: vorticity mag: sqrt(w1**2 + w2**2 + w3**2) - output.define(vel_decomp_in.boxArray(), vel_decomp_in.DistributionMap(), 18, 0); + // 18: ux_org + // 19: scalar_org + output.define(vel_decomp_in.boxArray(), vel_decomp_in.DistributionMap(), 20, 0); output.setVal(0.0); - const GpuArray dx = geom.CellSizeArray(); + const GpuArray dx = geom.CellSizeArray(); for ( MFIter mfi(output,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { @@ -733,6 +754,10 @@ void SpectralWritePlotFile(const int step, const Array4& v_decomp = vel_decomp_in.array(mfi); const Array4& sca = scalar_in.array(mfi); + + const Array4& v_tot = vel_total.array(mfi); + + const Array4& sca_tot = scalar_total.array(mfi); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -772,6 +797,12 @@ void SpectralWritePlotFile(const int step, // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3) out(i,j,k,17) = std::sqrt( out(i,j,k,14)*out(i,j,k,14) + out(i,j,k,15)*out(i,j,k,15) + out(i,j,k,16)*out(i,j,k,16) ); + + // original velx + out(i,j,k,18) = v_tot(i,j,k,0); + + // original scalar + out(i,j,k,19) = sca_tot(i,j,k,0); }); } @@ -780,11 +811,12 @@ void SpectralWritePlotFile(const int step, std::ostringstream os; os << std::setprecision(3) << kmin; plotfilename += os.str();; + plotfilename += "_"; std::ostringstream oss; oss << std::setprecision(3) << kmax; plotfilename += oss.str(); - amrex::Vector varNames(18); + amrex::Vector varNames(20); varNames[0] = "ux"; varNames[1] = "uy"; varNames[2] = "uz"; @@ -803,6 +835,8 @@ void SpectralWritePlotFile(const int step, varNames[15] = "w2"; varNames[16] = "w3"; varNames[17] = "vort"; + varNames[18] = "ux_org"; + varNames[19] = "rho_org"; WriteSingleLevelPlotfile(plotfilename,output,varNames,geom,0.0,step); } @@ -850,7 +884,7 @@ void ShiftFaceToCC(const MultiFab& face_in, int face_comp, } } -void ComputeGrad(const MultiFab & phi_in, std::array & gphi, +void ComputeGrad(const MultiFab & phi_in, std::array & gphi, int start_incomp, int start_outcomp, int ncomp, int bccomp, const Geometry & geom, int increment) { @@ -859,11 +893,11 @@ void ComputeGrad(const MultiFab & phi_in, std::array & // Physical Domain Box dom(geom.Domain()); - const GpuArray dx = geom.CellSizeArray(); + const GpuArray dx = geom.CellSizeArray(); // if not incrementing, initialize data to zero if (increment == 0) { - for (int dir=0; dir& m1, SumStag(mscr,prod_val); } -void SumStag(const std::array& m1, +void SumStag(const std::array& m1, amrex::Vector& sum) { BL_PROFILE_VAR("SumStag()",SumStag); From fd7dddc5361dd756abf4c164de0c75d139cfda6b Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Wed, 31 Jan 2024 12:23:21 -0500 Subject: [PATCH 033/151] build script for frontier --- .../TURB_PDFS/build_frontier.sh | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100755 exec/compressible_stag/TURB_PDFS/build_frontier.sh diff --git a/exec/compressible_stag/TURB_PDFS/build_frontier.sh b/exec/compressible_stag/TURB_PDFS/build_frontier.sh new file mode 100755 index 000000000..9eb971164 --- /dev/null +++ b/exec/compressible_stag/TURB_PDFS/build_frontier.sh @@ -0,0 +1,27 @@ +#!/usr/bin/bash + +## load necessary modules +module load craype-accel-amd-gfx90a +module load amd-mixed +#module load rocm/5.2.0 # waiting for 5.6 for next bump +module load cray-mpich/8.1.23 +module load cce/15.0.0 # must be loaded after rocm + +# GPU-aware MPI +export MPICH_GPU_SUPPORT_ENABLED=1 + +# optimize CUDA compilation for MI250X +export AMREX_AMD_ARCH=gfx90a + +# compiler environment hints +##export CC=$(which hipcc) +##export CXX=$(which hipcc) +##export FC=$(which ftn) +##export CFLAGS="-I${ROCM_PATH}/include" +##export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" +##export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" +export LDFLAGS="-L${MPICH_DIR}/lib -lmpi ${CRAY_XPMEM_POST_LINK_OPTS} -lxpmem ${PE_MPICH_GTL_DIR_amd_gfx90a} ${PE_MPICH_GTL_LIBS_amd_gfx90a}" +export CXXFLAGS="-I${MPICH_DIR}/include" +export HIPFLAGS="--amdgpu-target=gfx90a" + +make -j10 USE_CUDA=FALSE USE_HIP=TRUE USE_ASSERTION=TRUE From fba95df4861669b45dbe6cf1066b0bb09bb72d5c Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 21 Aug 2024 12:12:08 -0700 Subject: [PATCH 034/151] reactDiff shell --- exec/reactDiff/GNUmakefile | 56 ++++++++++++++ exec/reactDiff/main_driver.cpp | 133 +++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 exec/reactDiff/GNUmakefile create mode 100644 exec/reactDiff/main_driver.cpp diff --git a/exec/reactDiff/GNUmakefile b/exec/reactDiff/GNUmakefile new file mode 100644 index 000000000..16102b888 --- /dev/null +++ b/exec/reactDiff/GNUmakefile @@ -0,0 +1,56 @@ +# AMREX_HOME defines the directory in which we will find all the AMReX code. +# If you set AMREX_HOME as an environment variable, this line will be ignored +AMREX_HOME ?= ../../../amrex/ + +DEBUG = FALSE +USE_MPI = TRUE +USE_OMP = FALSE +USE_CUDA = FALSE +COMP = gnu +DIM = 2 +MAX_SPEC = 8 + +TINY_PROFILE = FALSE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +# add this back on if we add any local files +#include ./Make.package +#VPATH_LOCATIONS += . +#INCLUDE_LOCATIONS += . + +#include ../../src_reactDiff/Make.package +#VPATH_LOCATIONS += ../../src_reactDiff/ +#INCLUDE_LOCATIONS += ../../src_reactDiff/ + +include ../../src_analysis/Make.package +VPATH_LOCATIONS += ../../src_analysis/ +INCLUDE_LOCATIONS += ../../src_analysis/ + +include ../../src_rng/Make.package +VPATH_LOCATIONS += ../../src_rng/ +INCLUDE_LOCATIONS += ../../src_rng/ + +include ../../src_common/Make.package +VPATH_LOCATIONS += ../../src_common/ +INCLUDE_LOCATIONS += ../../src_common/ + +include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/Boundary/Make.package +include $(AMREX_HOME)/Src/LinearSolvers/MLMG/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules + +ifeq ($(findstring cgpu, $(HOST)), cgpu) + CXXFLAGS += $(FFTW) +endif + +ifeq ($(USE_CUDA),TRUE) + LIBRARIES += -lcufft +else + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 +endif + +MAXSPECIES := $(strip $(MAX_SPEC)) +DEFINES += -DMAX_SPECIES=$(MAXSPECIES) + diff --git a/exec/reactDiff/main_driver.cpp b/exec/reactDiff/main_driver.cpp new file mode 100644 index 000000000..9e3021b82 --- /dev/null +++ b/exec/reactDiff/main_driver.cpp @@ -0,0 +1,133 @@ + +#include "common_functions.H" + +#include +#include +#include +#include + +#include "chrono" + +using namespace std::chrono; +using namespace amrex; + +// argv contains the name of the inputs file entered at the command line +void main_driver(const char* argv) +{ + + BL_PROFILE_VAR("main_driver()",main_driver); + + // store the current time so we can later compute total run time. + Real strt_time = ParallelDescriptor::second(); + + std::string inputs_file = argv; + + // Initialize variables in namespaces + InitializeCommonNamespace(); + + // is the problem periodic? + Vector is_periodic(AMREX_SPACEDIM,0); // set to 0 (not periodic) by default + for (int i=0; i 0) { + // initializes the seed for C++ random number calls + InitRandom(seed+ParallelDescriptor::MyProc(), + ParallelDescriptor::NProcs(), + seed+ParallelDescriptor::MyProc()); + } else if (seed == 0) { + // initializes the seed for C++ random number calls based on the clock + auto now = time_point_cast(system_clock::now()); + int randSeed = now.time_since_epoch().count(); + // broadcast the same root seed to all processors + ParallelDescriptor::Bcast(&randSeed,1,ParallelDescriptor::IOProcessorNumber()); + InitRandom(randSeed+ParallelDescriptor::MyProc(), + ParallelDescriptor::NProcs(), + randSeed+ParallelDescriptor::MyProc()); + } else { + Abort("Must supply non-negative seed"); + } + + } + + int step_start; + amrex::Real time; + + // Initialize the boxarray "ba" from the single box "bx" + ba.define(domain); + + // Break up boxarray "ba" into chunks no larger than "max_grid_size" along a direction + // note we are converting "Vector max_grid_size" to an IntVect + ba.maxSize(IntVect(max_grid_size)); + + dmap.define(ba); + + + + + + + + /////////////////////////////////////////// + + // time step loop + for(int step=step_start;step<=max_step;++step) { + + + // MultiFab memory usage + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + + amrex::Long min_fab_megabytes = amrex::TotalBytesAllocatedInFabsHWM()/1048576; + amrex::Long max_fab_megabytes = min_fab_megabytes; + + ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc); + ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc); + + amrex::Print() << "High-water FAB megabyte spread across MPI nodes: [" + << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n"; + + min_fab_megabytes = amrex::TotalBytesAllocatedInFabs()/1048576; + max_fab_megabytes = min_fab_megabytes; + + ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc); + ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc); + + amrex::Print() << "Curent FAB megabyte spread across MPI nodes: [" + << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n"; + + } + + // Call the timer again and compute the maximum difference between the start time + // and stop time over all processors + Real stop_time = ParallelDescriptor::second() - strt_time; + ParallelDescriptor::ReduceRealMax(stop_time); + amrex::Print() << "Run time = " << stop_time << std::endl; + +} From c8befe2eaf452b0a7eb8d59c364edbda25560e5f Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 21 Aug 2024 15:58:50 -0700 Subject: [PATCH 035/151] build chemistry namespace --- src_chemistry/chemistry_functions.cpp | 43 ++++++++++++++++++++++----- src_chemistry/chemistry_namespace.H | 39 +++++++++++++++++++----- 2 files changed, 68 insertions(+), 14 deletions(-) diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 1bcd46524..91213b395 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -3,8 +3,6 @@ AMREX_GPU_MANAGED int chemistry::nreaction; -AMREX_GPU_MANAGED GpuArray chemistry::rate_const; - // from the fortran code, stoich_coeffs_R = stoichiometric_factors(spec,1,reac) // from the fortran code, stoich_coeffs_P = stoichiometric_factors(spec,2,reac) // stoich_coeffs_PR = stoich_coeffs_P - stoich_coeffs_R @@ -12,10 +10,29 @@ AMREX_GPU_MANAGED Array2D chemistry::stoich_ AMREX_GPU_MANAGED Array2D chemistry::stoich_coeffs_P; AMREX_GPU_MANAGED Array2D chemistry::stoich_coeffs_PR; +// reaction rate constant for each reaction (assuming Law of Mass Action holds) +// using rate_multiplier, reaction rates can be changed by the same factor +// if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become +// n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). +AMREX_GPU_MANAGED GpuArray chemistry::rate_const; +AMREX_GPU_MANAGED amrex::Real chemistry::rate_multiplier; +AMREX_GPU_MANAGED int chemistry::include_discrete_LMA_correction; + +// if n is positive, exclude species n (=solvent) when computing reaction rates +// in this case, the concentration of the solvent is assumed to be constant, +// which should be reflected on rate constants. +// if 0, no species is excluded +// e.g. U + S -> 2U, if exclude_solvent_comput_rates=0, rate=k*n_U*n_S +// if exclude_solvent_comput_rates=2, rate=k_new*n_U where k_new=k*n_S +AMREX_GPU_MANAGED int chemistry::exclude_solvent_comput_rates; + // from the fortran code this was use_Poisson_rng (0=CLE; 1=tau leaping; -1=deterministic; 2=SSA) // here it's being used as reaction_type (0=deterministic; 1=CLE; 2=SSA; 3=tau leap) AMREX_GPU_MANAGED int chemistry::reaction_type; +// use mole fraction based LMA +AMREX_GPU_MANAGED int chemistry::use_mole_frac_LMA; + // specific to compressible codes AMREX_GPU_MANAGED GpuArray chemistry::alpha_param; AMREX_GPU_MANAGED GpuArray chemistry::beta_param; @@ -37,11 +54,6 @@ void InitializeChemistryNamespace() // if nreaction is set to zero or not defined in the inputs file, quit the routine if (nreaction==0) return; - // get rate constants - std::vector k_tmp(MAX_REACTION); - pp.getarr("rate_const",k_tmp,0,nreaction); - for (int m=0; m k_tmp(MAX_REACTION); + pp.getarr("rate_const",k_tmp,0,nreaction); + for (int m=0; m alpha_tmp(MAX_REACTION); pp.queryarr("alpha_param",alpha_tmp,0,nreaction); diff --git a/src_chemistry/chemistry_namespace.H b/src_chemistry/chemistry_namespace.H index df42071c2..66ed7853e 100644 --- a/src_chemistry/chemistry_namespace.H +++ b/src_chemistry/chemistry_namespace.H @@ -1,14 +1,39 @@ namespace chemistry { + extern AMREX_GPU_MANAGED int nreaction; - extern AMREX_GPU_MANAGED GpuArray rate_const; - extern AMREX_GPU_MANAGED GpuArray alpha_param; - extern AMREX_GPU_MANAGED GpuArray beta_param; - extern AMREX_GPU_MANAGED amrex::Real T0_chem; + // from the fortran code, stoich_coeffs_R = stoichiometric_factors(spec,1,reac) + // from the fortran code, stoich_coeffs_P = stoichiometric_factors(spec,2,reac) + // stoich_coeffs_PR = stoich_coeffs_P - stoich_coeffs_R + extern AMREX_GPU_MANAGED Array2D stoich_coeffs_R; + extern AMREX_GPU_MANAGED Array2D stoich_coeffs_P; + extern AMREX_GPU_MANAGED Array2D stoich_coeffs_PR; + + // reaction rate constant for each reaction (assuming Law of Mass Action holds) + // using rate_multiplier, reaction rates can be changed by the same factor + // if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become + // n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). + extern AMREX_GPU_MANAGED GpuArray rate_const; + extern AMREX_GPU_MANAGED amrex::Real rate_multiplier; + extern AMREX_GPU_MANAGED int include_discrete_LMA_correction; - extern AMREX_GPU_MANAGED Array2D stoich_coeffs_R; - extern AMREX_GPU_MANAGED Array2D stoich_coeffs_P; - extern AMREX_GPU_MANAGED Array2D stoich_coeffs_PR; + // if n is positive, exclude species n (=solvent) when computing reaction rates + // in this case, the concentration of the solvent is assumed to be constant, + // which should be reflected on rate constants. + // if 0, no species is excluded + // e.g. U + S -> 2U, if exclude_solvent_comput_rates=0, rate=k*n_U*n_S + // if exclude_solvent_comput_rates=2, rate=k_new*n_U where k_new=k*n_S + extern AMREX_GPU_MANAGED int exclude_solvent_comput_rates; + // from the fortran code this was use_Poisson_rng (0=CLE; 1=tau leaping; -1=deterministic; 2=SSA) + // here it's being used as reaction_type (0=deterministic; 1=CLE; 2=SSA; 3=tau leap) extern AMREX_GPU_MANAGED int reaction_type; + + // use mole fraction based LMA + extern AMREX_GPU_MANAGED int use_mole_frac_LMA; + + // specific to compressible codes + extern AMREX_GPU_MANAGED GpuArray alpha_param; + extern AMREX_GPU_MANAGED GpuArray beta_param; + extern AMREX_GPU_MANAGED amrex::Real T0_chem; } From 4e59a7a3b1219316f648c44498ca6a85165b1ba9 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 21 Aug 2024 20:20:32 -0700 Subject: [PATCH 036/151] chemistry functions --- src_chemistry/chemistry_functions.H | 8 ++++++ src_chemistry/chemistry_functions.cpp | 39 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src_chemistry/chemistry_functions.H b/src_chemistry/chemistry_functions.H index 41a5703f7..7033431e6 100644 --- a/src_chemistry/chemistry_functions.H +++ b/src_chemistry/chemistry_functions.H @@ -19,4 +19,12 @@ void InitializeChemistryNamespace(); // used in compressible code only void compute_compressible_chemistry_source_CLE(amrex::Real dt, amrex::Real dV, MultiFab& prim, MultiFab& source, MultiFab& ranchem); + +void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry geom, amrex::Real dt, + const MultiFab& n_interm, Vector lin_comb_coef_in); + +AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_in, + GpuArray& reaction_rates, + const amrex::Real& dv); + #endif diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 91213b395..7f335b9e9 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -191,3 +191,42 @@ void compute_compressible_chemistry_source_CLE(amrex::Real dt, amrex::Real dV, }); } } + + +void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry geom, amrex::Real dt, + const MultiFab& n_interm, Vector lin_comb_coef_in) +{ + int lin_comb_avg_react_rate = 1; + if (lin_comb_coef_in[0] == 1. && lin_comb_coef_in[1] == 0.) { + lin_comb_avg_react_rate = 0; + } + + GpuArray lin_comb_coef; + lin_comb_coef[0] = lin_comb_coef_in[0]; + lin_comb_coef[1] = lin_comb_coef_in[1]; + + const Real* dx = geom.CellSize(); + + Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2] : dx[0]*dx[1]*cell_depth; + + + +} + +AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_in, + GpuArray& reaction_rates, + const amrex::Real& dv) +{ + GpuArray n_nonneg; + + Real n_sum = 0.; + + for (int i=0; i Date: Thu, 22 Aug 2024 10:43:47 -0700 Subject: [PATCH 037/151] namelist --- exec/reactDiff/GNUmakefile | 14 +++++++++++--- src_reactDiff/Make.package | 3 +++ src_reactDiff/reactDiff_functions.H | 16 ++++++++++++++++ src_reactDiff/reactDiff_functions.cpp | 22 ++++++++++++++++++++++ src_reactDiff/reactDiff_namespace.H | 4 ++++ 5 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 src_reactDiff/Make.package create mode 100644 src_reactDiff/reactDiff_functions.H create mode 100644 src_reactDiff/reactDiff_functions.cpp create mode 100644 src_reactDiff/reactDiff_namespace.H diff --git a/exec/reactDiff/GNUmakefile b/exec/reactDiff/GNUmakefile index 16102b888..24812ca73 100644 --- a/exec/reactDiff/GNUmakefile +++ b/exec/reactDiff/GNUmakefile @@ -9,6 +9,7 @@ USE_CUDA = FALSE COMP = gnu DIM = 2 MAX_SPEC = 8 +MAX_REAC = 5 TINY_PROFILE = FALSE @@ -19,9 +20,13 @@ include $(AMREX_HOME)/Tools/GNUMake/Make.defs #VPATH_LOCATIONS += . #INCLUDE_LOCATIONS += . -#include ../../src_reactDiff/Make.package -#VPATH_LOCATIONS += ../../src_reactDiff/ -#INCLUDE_LOCATIONS += ../../src_reactDiff/ +include ../../src_reactDiff/Make.package +VPATH_LOCATIONS += ../../src_reactDiff/ +INCLUDE_LOCATIONS += ../../src_reactDiff/ + +include ../../src_chemistry/Make.package +VPATH_LOCATIONS += ../../src_chemistry/ +INCLUDE_LOCATIONS += ../../src_chemistry/ include ../../src_analysis/Make.package VPATH_LOCATIONS += ../../src_analysis/ @@ -54,3 +59,6 @@ endif MAXSPECIES := $(strip $(MAX_SPEC)) DEFINES += -DMAX_SPECIES=$(MAXSPECIES) +MAXREACTION := $(strip $(MAX_REAC)) +DEFINES += -DMAX_REACTION=$(MAXREACTION) + diff --git a/src_reactDiff/Make.package b/src_reactDiff/Make.package new file mode 100644 index 000000000..a2258eb76 --- /dev/null +++ b/src_reactDiff/Make.package @@ -0,0 +1,3 @@ +CEXE_sources += reactDiff_functions.cpp +CEXE_headers += reactDiff_functions.H +CEXE_headers += reactDiff_namespace.H diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H new file mode 100644 index 000000000..1c63e846e --- /dev/null +++ b/src_reactDiff/reactDiff_functions.H @@ -0,0 +1,16 @@ +#ifndef _reactdiff_functions_H_ +#define _reactdiff_functions_H_ + +#include + +#include "common_functions.H" +#include "common_namespace.H" +#include "reactDiff_namespace.H" + +using namespace reactDiff; +using namespace amrex; +using namespace common; + +void InitializeReactDiffNamespace(); + +#endif diff --git a/src_reactDiff/reactDiff_functions.cpp b/src_reactDiff/reactDiff_functions.cpp new file mode 100644 index 000000000..20f910869 --- /dev/null +++ b/src_reactDiff/reactDiff_functions.cpp @@ -0,0 +1,22 @@ +#include "reactDiff_functions.H" +#include "AMReX_ParmParse.H" + +// only used for split schemes (temporal_integrator>=0) +// 0=explicit trapezoidal predictor/corrector +// 1=Crank-Nicolson semi-implicit +// 2=explicit midpoint +// 3=multinomial diffusion +// 4=forward Euler + +AMREX_GPU_MANAGED int reactDiff::diffusion_type; + +void InitializeReactDiffNamespace() +{ + // extract inputs parameters + ParmParse pp; + + diffusion_type = 0; + pp.query("diffusion_type",diffusion_type); + + return; +} diff --git a/src_reactDiff/reactDiff_namespace.H b/src_reactDiff/reactDiff_namespace.H new file mode 100644 index 000000000..83fa5db0a --- /dev/null +++ b/src_reactDiff/reactDiff_namespace.H @@ -0,0 +1,4 @@ +namespace reactDiff { + + extern AMREX_GPU_MANAGED int diffusion_type; +} From 1e2cc296e9b7e185181a397bb62624f8d8e2072d Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 22 Aug 2024 13:14:56 -0700 Subject: [PATCH 038/151] reactDiff namespace --- src_reactDiff/reactDiff_functions.cpp | 122 +++++++++++++++++++++++++- src_reactDiff/reactDiff_namespace.H | 68 +++++++++++++- 2 files changed, 186 insertions(+), 4 deletions(-) diff --git a/src_reactDiff/reactDiff_functions.cpp b/src_reactDiff/reactDiff_functions.cpp index 20f910869..b000989fa 100644 --- a/src_reactDiff/reactDiff_functions.cpp +++ b/src_reactDiff/reactDiff_functions.cpp @@ -1,22 +1,138 @@ #include "reactDiff_functions.H" #include "AMReX_ParmParse.H" +// 0=D + R (first-order splitting) +// 1=(1/2)R + D + (1/2)R (Strang option 1) +// 2=(1/2)D + R + (1/2)D (Strang option 2) +// -1=unsplit forward Euler +// -2=unsplit explicit midpoint +// -3=unsplit multinomial diffusion +// -4=unsplit implicit midpoint +AMREX_GPU_MANAGED int reactDiff::temporal_integrator; + // only used for split schemes (temporal_integrator>=0) // 0=explicit trapezoidal predictor/corrector // 1=Crank-Nicolson semi-implicit // 2=explicit midpoint // 3=multinomial diffusion // 4=forward Euler +AMREX_GPU_MANAGED int reactDiff::reactDiff_diffusion_type; + +// only used for split schemes (temporal_integrator>=0) +// 0=first-order (deterministic, tau leaping, CLE, or SSA) +// 1=second-order (determinisitc, tau leaping, or CLE only) +AMREX_GPU_MANAGED int reactDiff::reactDiff_reaction_type; + +// only used for midpoint diffusion schemes (split as well as unsplit) +// corrector formulation of noise +// 1 = K(nold) * W1 + K(nold) * W2 +// 2 = K(nold) * W1 + K(npred) * W2 +// 3 = K(nold) * W1 + K(2*npred-nold) * W2 +AMREX_GPU_MANAGED int reactDiff::midpoint_stoch_flux_type; + +// how to compute n on faces for stochastic weighting +// 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic +// 10=arithmetic average with discontinuous Heaviside function +// 11=arithmetic average with C1-smoothed Heaviside function +// 12=arithmetic average with C2-smoothed Heaviside function +AMREX_GPU_MANAGED int reactDiff::avg_type; + +// use the Einkemmer boundary condition fix (split schemes only) +AMREX_GPU_MANAGED int reactDiff::inhomogeneous_bc_fix; + +// volume multiplier (dv = product(dx(1:MAX_SPACEDIM))*volume_factor) +// only really intended for 3D since in 2D one can control the cell depth +AMREX_GPU_MANAGED amrex::Real reactDiff::volume_factor; + +// initial values to be used in init_n.f90 +AMREX_GPU_MANAGED Array2D reactDiff::n_init_in; -AMREX_GPU_MANAGED int reactDiff::diffusion_type; +// initialize with all number of molecules strictly integer +AMREX_GPU_MANAGED int reactDiff::integer_populations; + +// Fickian diffusion coeffs +AMREX_GPU_MANAGED amrex::GpuArray reactDiff::D_fick; + +// diffusion boundary stencil order +AMREX_GPU_MANAGED int reactDiff::diffusion_stencil_order; + +// implicit diffusion solve verbosity +AMREX_GPU_MANAGED int reactDiff::diffusion_verbose; + +// implicit diffusion solve bottom solver verbosity +AMREX_GPU_MANAGED int reactDiff::diffusion_bottom_verbose; + +// relative eps for implicit diffusion solve +AMREX_GPU_MANAGED amrex::Real reactDiff::implicit_diffusion_rel_eps; + +// absolute eps for implicit diffusion solve +AMREX_GPU_MANAGED amrex::Real reactDiff::implicit_diffusion_abs_eps; void InitializeReactDiffNamespace() { // extract inputs parameters ParmParse pp; - diffusion_type = 0; - pp.query("diffusion_type",diffusion_type); + int temp_max = std::max(MAX_SPECIES,MAX_REACTION); + + amrex::Vector temp (temp_max,0.); + amrex::Vector temp_int(temp_max,0 ); + + temporal_integrator = 0; + pp.query("temporal_integrator",temporal_integrator); + + reactDiff_diffusion_type = 0; + pp.query("reactDiff_diffusion_type",reactDiff_diffusion_type); + + reactDiff_reaction_type = 0; + pp.query("reactDiff_reaction_type",reactDiff_reaction_type); + + midpoint_stoch_flux_type = 1; + pp.query("midpoint_stoch_flux_type",midpoint_stoch_flux_type); + + avg_type = 1; + pp.query("avg_type",avg_type); + + inhomogeneous_bc_fix = 0; + pp.query("inhomogeneous_bc_fix",inhomogeneous_bc_fix); + + volume_factor = 1.; + pp.query("volume_factor",volume_factor); + + if (pp.queryarr("n_init_in_1",temp)) { + for (int i=0; i=0) + // 0=explicit trapezoidal predictor/corrector + // 1=Crank-Nicolson semi-implicit + // 2=explicit midpoint + // 3=multinomial diffusion + // 4=forward Euler + extern AMREX_GPU_MANAGED int reactDiff_diffusion_type; + + // only used for split schemes (temporal_integrator>=0) + // 0=first-order (deterministic, tau leaping, CLE, or SSA) + // 1=second-order (determinisitc, tau leaping, or CLE only) + extern AMREX_GPU_MANAGED int reactDiff_reaction_type; + + // only used for midpoint diffusion schemes (split as well as unsplit) + // corrector formulation of noise + // 1 = K(nold) * W1 + K(nold) * W2 + // 2 = K(nold) * W1 + K(npred) * W2 + // 3 = K(nold) * W1 + K(2*npred-nold) * W2 + extern AMREX_GPU_MANAGED int midpoint_stoch_flux_type; + + // how to compute n on faces for stochastic weighting + // 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic + // 10=arithmetic average with discontinuous Heaviside function + // 11=arithmetic average with C1-smoothed Heaviside function + // 12=arithmetic average with C2-smoothed Heaviside function + extern AMREX_GPU_MANAGED int avg_type; + + // use the Einkemmer boundary condition fix (split schemes only) + extern AMREX_GPU_MANAGED int inhomogeneous_bc_fix; + + // volume multiplier (dv = product(dx(1:MAX_SPACEDIM))*volume_factor) + // only really intended for 3D since in 2D one can control the cell depth + extern AMREX_GPU_MANAGED amrex::Real volume_factor; + + // initial values to be used in init_n.f90 + extern AMREX_GPU_MANAGED Array2D n_init_in; + + // initialize with all number of molecules strictly integer + extern AMREX_GPU_MANAGED int integer_populations; + + // Fickian diffusion coeffs + extern AMREX_GPU_MANAGED amrex::GpuArray D_fick; + + // diffusion boundary stencil order + extern AMREX_GPU_MANAGED int diffusion_stencil_order; + + // implicit diffusion solve verbosity + extern AMREX_GPU_MANAGED int diffusion_verbose; + + // implicit diffusion solve bottom solver verbosity + extern AMREX_GPU_MANAGED int diffusion_bottom_verbose; + + // relative eps for implicit diffusion solve + extern AMREX_GPU_MANAGED amrex::Real implicit_diffusion_rel_eps; + + // absolute eps for implicit diffusion solve + extern AMREX_GPU_MANAGED amrex::Real implicit_diffusion_abs_eps; + } From 9576543ea7143033ad382c9c87ea33e6998e67fa Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 23 Aug 2024 12:34:17 -0700 Subject: [PATCH 039/151] setup preliminaries --- exec/reactDiff/inputs_2d | 28 +++++++++ exec/reactDiff/inputs_3d | 28 +++++++++ exec/reactDiff/main_driver.cpp | 90 +++++++++++++++++++++++---- src_reactDiff/Make.package | 1 + src_reactDiff/WritePlotFile.cpp | 32 ++++++++++ src_reactDiff/reactDiff_functions.H | 4 ++ src_reactDiff/reactDiff_functions.cpp | 12 +++- src_reactDiff/reactDiff_namespace.H | 5 +- 8 files changed, 183 insertions(+), 17 deletions(-) create mode 100644 exec/reactDiff/inputs_2d create mode 100644 exec/reactDiff/inputs_3d create mode 100644 src_reactDiff/WritePlotFile.cpp diff --git a/exec/reactDiff/inputs_2d b/exec/reactDiff/inputs_2d new file mode 100644 index 000000000..83d665b3b --- /dev/null +++ b/exec/reactDiff/inputs_2d @@ -0,0 +1,28 @@ + # Problem specification + prob_lo = 0.0 0.0 # physical lo coordinate + prob_hi = 1.0 1.0 # physical hi coordinate + + # number of cells in domain + n_cells = 32 32 + # max number of cells in a box + max_grid_size = 16 16 + + # Time-step control + fixed_dt = 0.1 + + # Controls for number of steps between actions + max_step = 10 + plot_int = 1 + + seed = 1 + + nspecies = 2 + + # Boundary conditions + # ---------------------- + # BC specifications: + # -1 = periodic + bc_spec_lo = -1 -1 + bc_spec_hi = -1 -1 + + stats_int = -1 diff --git a/exec/reactDiff/inputs_3d b/exec/reactDiff/inputs_3d new file mode 100644 index 000000000..05adb9bba --- /dev/null +++ b/exec/reactDiff/inputs_3d @@ -0,0 +1,28 @@ + # Problem specification + prob_lo = 0.0 0.0 0.0 # physical lo coordinate + prob_hi = 1.0 1.0 1.0 # physical hi coordinate + + # number of cells in domain + n_cells = 32 32 32 + # max number of cells in a box + max_grid_size = 16 16 16 + + # Time-step control + fixed_dt = 0.1 + + # Controls for number of steps between actions + max_step = 10 + plot_int = 1 + + seed = 1 + + nspecies = 2 + + # Boundary conditions + # ---------------------- + # BC specifications: + # -1 = periodic + bc_spec_lo = -1 -1 -1 + bc_spec_hi = -1 -1 -1 + + stats_int = -1 diff --git a/exec/reactDiff/main_driver.cpp b/exec/reactDiff/main_driver.cpp index 9e3021b82..4d7a9f4f3 100644 --- a/exec/reactDiff/main_driver.cpp +++ b/exec/reactDiff/main_driver.cpp @@ -1,5 +1,7 @@ #include "common_functions.H" +#include "chemistry_functions.H" +#include "reactDiff_functions.H" #include #include @@ -10,6 +12,8 @@ using namespace std::chrono; using namespace amrex; +using namespace common; +using namespace chemistry; // argv contains the name of the inputs file entered at the command line void main_driver(const char* argv) @@ -24,6 +28,8 @@ void main_driver(const char* argv) // Initialize variables in namespaces InitializeCommonNamespace(); + InitializeChemistryNamespace(); + InitializeReactDiffNamespace(); // is the problem periodic? Vector is_periodic(AMREX_SPACEDIM,0); // set to 0 (not periodic) by default @@ -42,14 +48,7 @@ void main_driver(const char* argv) Box domain(dom_lo, dom_hi); Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data()); - - // BoxArray - BoxArray ba; - - // how boxes are distrubuted among MPI processes - DistributionMapping dmap; - Real dt = fixed_dt; const Real* dx = geom.CellSize(); ///////////////////////////////////////// @@ -76,25 +75,90 @@ void main_driver(const char* argv) } } + + BoxArray ba; + DistributionMapping dmap; int step_start; amrex::Real time; - // Initialize the boxarray "ba" from the single box "bx" - ba.define(domain); + if (restart < 0) { + + step_start = 1; + time = 0.; + + // Initialize the boxarray "ba" from the single box "bx" + ba.define(domain); + + // Break up boxarray "ba" into chunks no larger than "max_grid_size" along a direction + // note we are converting "Vector max_grid_size" to an IntVect + ba.maxSize(IntVect(max_grid_size)); - // Break up boxarray "ba" into chunks no larger than "max_grid_size" along a direction - // note we are converting "Vector max_grid_size" to an IntVect - ba.maxSize(IntVect(max_grid_size)); + dmap.define(ba); + + } else { + + // checkpoint restart + + } - dmap.define(ba); + MultiFab n_old(ba,dmap,nspecies,1); + MultiFab n_new(ba,dmap,nspecies,1); + + if (model_file_init) { + Abort("model_file_init not supported yet"); + } else { + // Initialize n + // Init(); + n_old.setVal(0.); + } + if (std::abs(initial_variance_mass) > 0.) { + Abort("initial_variance_mass not supported yet"); + // add_init_n_fluctuations() + } + Real dt; + if (fixed_dt > 0.) { + dt = fixed_dt; + Print() << "Setting dt using fixed_dt = " << dt << std::endl; + } else { + Real D_Fick_max = 0.; + for (int i=0; i 0 yet"); + // compute_n_steady() + } + if (temporal_integrator < 0) { // unsplit schemes + // Donev: The code will work for a single cell also but may not be the most efficient, so issue warning: + if (n_cells[0] == 1 && n_cells[1] == 1) { + Print() << "WARNING in advance_reaction_diffusion: use splitting based schemes (temporal_integrator>=0) for single cell" << std::endl; + } + if (nreaction < 1) { + Print() << "WARNING in advance_reaction_diffusion: use splitting based schemes (temporal_integrator>=0) for diffusion only" << std::endl; + } + } + if (stats_int > 0) { + Abort("Structure factor not implemented yet"); + } + + int istep = (restart < 0) ? 0 : restart; + WritePlotFile(istep,time,geom,n_old); + /////////////////////////////////////////// // time step loop diff --git a/src_reactDiff/Make.package b/src_reactDiff/Make.package index a2258eb76..365e4872e 100644 --- a/src_reactDiff/Make.package +++ b/src_reactDiff/Make.package @@ -1,3 +1,4 @@ CEXE_sources += reactDiff_functions.cpp +CEXE_sources += WritePlotFile.cpp CEXE_headers += reactDiff_functions.H CEXE_headers += reactDiff_namespace.H diff --git a/src_reactDiff/WritePlotFile.cpp b/src_reactDiff/WritePlotFile.cpp new file mode 100644 index 000000000..3021df00c --- /dev/null +++ b/src_reactDiff/WritePlotFile.cpp @@ -0,0 +1,32 @@ +#include "AMReX_PlotFileUtil.H" +#include "reactDiff_functions.H" + +void WritePlotFile(int step, + const amrex::Real time, + const amrex::Geometry& geom, + const MultiFab& n_in) +{ + + BL_PROFILE_VAR("WritePlotFile()",WritePlotFile); + + std::string plotfilename = Concatenate(plot_base_name,step,7); + + amrex::Print() << "Writing plotfile " << plotfilename << "\n"; + + BoxArray ba = n_in.boxArray(); + DistributionMapping dmap = n_in.DistributionMap(); + + Vector varNames(nspecies); + + // keep a counter for plotfile variables + int cnt = 0; + + for (int i=0; i reactDiff::n_init_in; +// initialize from model file +AMREX_GPU_MANAGED int reactDiff::model_file_init; + // initialize with all number of molecules strictly integer AMREX_GPU_MANAGED int reactDiff::integer_populations; // Fickian diffusion coeffs -AMREX_GPU_MANAGED amrex::GpuArray reactDiff::D_fick; +AMREX_GPU_MANAGED amrex::GpuArray reactDiff::D_Fick; // diffusion boundary stencil order AMREX_GPU_MANAGED int reactDiff::diffusion_stencil_order; @@ -110,12 +113,15 @@ void InitializeReactDiffNamespace() } } + model_file_init = 0; + pp.query("model_file_init",model_file_init); + integer_populations = 0; pp.query("integer_populations",integer_populations); - if (pp.queryarr("D_fick",temp)) { + if (pp.queryarr("D_Fick",temp)) { for (int i=0; i n_init_in; + // initialize from model file + extern AMREX_GPU_MANAGED int model_file_init; + // initialize with all number of molecules strictly integer extern AMREX_GPU_MANAGED int integer_populations; // Fickian diffusion coeffs - extern AMREX_GPU_MANAGED amrex::GpuArray D_fick; + extern AMREX_GPU_MANAGED amrex::GpuArray D_Fick; // diffusion boundary stencil order extern AMREX_GPU_MANAGED int diffusion_stencil_order; From 9bf6d2c8a2fee122883639c8bd1c5136654a6253 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 23 Aug 2024 13:46:49 -0700 Subject: [PATCH 040/151] diffusion outline --- exec/reactDiff/main_driver.cpp | 47 +++++-- src_reactDiff/AdvanceDiffusion.cpp | 188 ++++++++++++++++++++++++++++ src_reactDiff/AdvanceTimestep.cpp | 66 ++++++++++ src_reactDiff/Make.package | 2 + src_reactDiff/reactDiff_functions.H | 27 ++++ 5 files changed, 317 insertions(+), 13 deletions(-) create mode 100644 src_reactDiff/AdvanceDiffusion.cpp create mode 100644 src_reactDiff/AdvanceTimestep.cpp diff --git a/exec/reactDiff/main_driver.cpp b/exec/reactDiff/main_driver.cpp index 4d7a9f4f3..69561bdeb 100644 --- a/exec/reactDiff/main_driver.cpp +++ b/exec/reactDiff/main_driver.cpp @@ -52,28 +52,30 @@ void main_driver(const char* argv) const Real* dx = geom.CellSize(); ///////////////////////////////////////// - //Initialise rngs + // Initialize seeds for random number generator ///////////////////////////////////////// if (restart < 0) { + int mySeed; + if (seed > 0) { - // initializes the seed for C++ random number calls - InitRandom(seed+ParallelDescriptor::MyProc(), - ParallelDescriptor::NProcs(), - seed+ParallelDescriptor::MyProc()); + // initializes the seed for C++ random number calls with a specified root seed + mySeed = seed; } else if (seed == 0) { - // initializes the seed for C++ random number calls based on the clock + // initializes the root seed for C++ random number calls based on the clock auto now = time_point_cast(system_clock::now()); - int randSeed = now.time_since_epoch().count(); + int mySeed = now.time_since_epoch().count(); // broadcast the same root seed to all processors - ParallelDescriptor::Bcast(&randSeed,1,ParallelDescriptor::IOProcessorNumber()); - InitRandom(randSeed+ParallelDescriptor::MyProc(), - ParallelDescriptor::NProcs(), - randSeed+ParallelDescriptor::MyProc()); + ParallelDescriptor::Bcast(&mySeed,1,ParallelDescriptor::IOProcessorNumber()); } else { Abort("Must supply non-negative seed"); } + // MPI ranks > 0 get a seed inremented by the rank + InitRandom(mySeed+ParallelDescriptor::MyProc(), + ParallelDescriptor::NProcs(), + mySeed+ParallelDescriptor::MyProc()); + } BoxArray ba; @@ -157,13 +159,32 @@ void main_driver(const char* argv) int istep = (restart < 0) ? 0 : restart; WritePlotFile(istep,time,geom,n_old); - - /////////////////////////////////////////// // time step loop for(int step=step_start;step<=max_step;++step) { + AdvanceTimestep(n_old,n_new,dt,time,geom); + + time += dt; + MultiFab::Copy(n_new,n_old,0,0,nspecies,1); + + if (stats_int > 0 && step%stats_int == 0 && step > n_steps_skip) { + Abort("fix structure factor snapshot"); + } + + if (plot_int > 0 && step%plot_int == 0) { + + WritePlotFile(step,time,geom,n_new); + + if (stats_int > 0 && step > n_steps_skip) { + Abort("fix structure factor plotfile write"); + } + } + + if (chk_int > 0 && step%chk_int == 0) { + Abort("fix checkpoint write"); + } // MultiFab memory usage const int IOProc = ParallelDescriptor::IOProcessorNumber(); diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp new file mode 100644 index 000000000..3dd9faaad --- /dev/null +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -0,0 +1,188 @@ +#include "reactDiff_functions.H" +#include "chemistry_functions.H" + +void AdvanceDiffusion(const MultiFab& n_old, + MultiFab& n_new, + const MultiFab& ext_src, + const Real& dt, + const Real& time, + const Geometry& geom) { + + BoxArray ba = n_old.boxArray(); + DistributionMapping dmap = n_old.DistributionMap(); + + // store for one component of D_Fick + std::array< MultiFab, AMREX_SPACEDIM > diff_coef_face; + AMREX_D_TERM(diff_coef_face[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);, + diff_coef_face[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);, + diff_coef_face[2].define(convert(ba,nodal_flag_z), dmap, 1, 0);); + + // do not do diffusion if only one cell (well-mixed system) + // there is no restriction on the number of cells + // but we can shortcut the single cell case anyway for simplicity + if (n_cells[0] == 0 && n_cells[1] == 0) { + Abort("AdvanceDiffusion() - fix one cell case"); + } + + if (reactDiff_diffusion_type == 3) { + Abort("AdvanceDiffusion() - write multinomial case"); + return; + } + + MultiFab diff_fluxdiv (ba,dmap,nspecies,0); + MultiFab stoch_fluxdiv(ba,dmap,nspecies,0); + + Abort("Write DiffusiveNFluxdiv()"); + // DiffusiveNFluxdiv(); + + if (variance_coef_mass > 0.) { + Abort("AdvanceDiffusion() - write stochastic case"); + } else { + stoch_fluxdiv.setVal(0.); + } + + if (reactDiff_diffusion_type == 0 || reactDiff_diffusion_type == 4) { + // explicit trapezoidal predictor-corrector OR forward Euler + + // forward Euler predictor + // n_k^{n+1,*} = n_k^n + dt div (D_k grad n_k)^n + // + dt div (sqrt(2 D_k n_k / dt) Z)^n + // + dt ext_src + MultiFab::Copy(n_new,n_old,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,diff_fluxdiv ,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,ext_src ,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + + if (reactDiff_diffusion_type == 4) { + Abort("AdvanceDiffusion() - write trapezoidal corrector"); + + /* + + ! Trapezoidal corrector: + ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n + ! + (dt/2) div (D_k grad n_k)^{n+1,*} + ! + dt div (sqrt(2 D_k n_k / dt) Z)^n + ! + dt ext_src + ! This is the same as stepping to time t+2*dt and then averaging with the state at time t: + ! n_new = 1/2 * (n_old + n_new + dt*div (D grad n_new) + div (sqrt(2 D_k n_k dt) Z)^n) + ! which is what we use below + + ! compute diffusive flux divergence + call diffusive_n_fluxdiv(mla,n_new,diff_coef_face,diff_fluxdiv,dx,the_bc_tower) + + do n=1,nlevs + call multifab_plus_plus_c(n_new(n),1,n_old(n),1,nspecies,0) + call multifab_saxpy_3(n_new(n),dt,diff_fluxdiv(n)) + call multifab_saxpy_3(n_new(n),dt,stoch_fluxdiv(n)) + call multifab_saxpy_3(n_new(n),dt,ext_src(n)) + call multifab_mult_mult_s_c(n_new(n),1,0.5d0,nspecies,0) + call multifab_fill_boundary(n_new(n)) + call multifab_physbc(n_new(n),1,scal_bc_comp,nspecies, & + the_bc_tower%bc_tower_array(n),dx_in=dx(n,:)) + end do + + */ + } + + } else if (reactDiff_diffusion_type == 1) { + Abort("AdvanceDiffusion() - write Crank-Nicolson"); + + /* + ! Crank-Nicolson + ! n_k^{n+1} = n_k^n + (dt/2)(div D_k grad n_k)^n + ! + (dt/2)(div D_k grad n_k)^n+1 + ! + dt div (sqrt(2 D_k n_k / dt) Z)^n + ! + dt ext_src + ! + ! in delta formulation: + ! + ! (I - div (dt/2) D_k grad) delta n_k = dt div (D_k grad n_k^n) + ! + dt div (sqrt(2 D_k n_k / dt) Z)^n + ! + dt ext_src + ! + ! we combine the entire rhs into stoch_fluxdiv + do n=1,nlevs + call multifab_plus_plus(stoch_fluxdiv(n),ext_src(n),0) + call multifab_plus_plus(stoch_fluxdiv(n),diff_fluxdiv(n),0) + call multifab_mult_mult_s(stoch_fluxdiv(n),dt) + end do + call implicit_diffusion(mla,n_old,n_new,stoch_fluxdiv,diff_coef_face,dx,dt,the_bc_tower) + */ + } else if (reactDiff_diffusion_type == 2) { + Abort("AdvanceDiffusion() - write explicit midpoint scheme"); + + /* +! explicit midpoint scheme + + ! n_k^{n+1/2} = n_k^n + (dt/2) div (D_k grad n_k)^n + ! + (dt/2) div (sqrt(2 D_k n_k / (dt/2) ) Z_1)^n + ! + (dt/2) ext_src + do n=1,nlevs + call multifab_copy_c(n_new(n),1,n_old(n),1,nspecies,0) + call multifab_saxpy_3(n_new(n),dt/2.d0 ,diff_fluxdiv(n)) + call multifab_saxpy_3(n_new(n),dt/sqrt(2.d0),stoch_fluxdiv(n)) + call multifab_saxpy_3(n_new(n),dt/2.d0 ,ext_src(n)) + call multifab_fill_boundary(n_new(n)) + call multifab_physbc(n_new(n),1,scal_bc_comp,nspecies, & + the_bc_tower%bc_tower_array(n),dx_in=dx(n,:)) + end do + + ! compute diffusive flux divergence at t^{n+1/2} + call diffusive_n_fluxdiv(mla,n_new,diff_coef_face,diff_fluxdiv,dx,the_bc_tower) + + if (variance_coef_mass .gt. 0.d0) then + ! fill random flux multifabs with new random numbers + call fill_mass_stochastic(mla,the_bc_tower%bc_tower_array) + + ! compute second-stage stochastic flux divergence and + ! add to first-stage stochastic flux divergence + select case (midpoint_stoch_flux_type) + case (1) + ! use n_old + call stochastic_n_fluxdiv(mla,n_old,diff_coef_face,stoch_fluxdiv,dx,dt, & + the_bc_tower,increment_in=.true.) + case (2) + ! use n_pred + call stochastic_n_fluxdiv(mla,n_new,diff_coef_face,stoch_fluxdiv,dx,dt, & + the_bc_tower,increment_in=.true.) + case (3) + ! We use n_new=2*n_pred-n_old here as temporary storage since we will overwrite it shortly + do n=1,nlevs + call multifab_mult_mult_s_c(n_new(n),1,2.d0,nspecies,n_new(n)%ng) + call multifab_sub_sub_c(n_new(n),1,n_old(n),1,nspecies,n_new(n)%ng) + end do + ! use n_new=2*n_pred-n_old + call stochastic_n_fluxdiv(mla,n_new,diff_coef_face,stoch_fluxdiv,dx,dt, & + the_bc_tower,increment_in=.true.) + case default + call bl_error("advance_diffusion: invalid midpoint_stoch_flux_type") + end select + end if + + ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^{n+1/2} + ! + dt div (sqrt(2 D_k n_k^n dt) Z_1 / sqrt(2) ) + ! + dt div (sqrt(2 D_k n_k^? dt) Z_2 / sqrt(2) ) + ! + dt ext_src + ! where + ! n_k^? = n_k^n (midpoint_stoch_flux_type=1) + ! = n_k^pred (midpoint_stoch_flux_type=2) + ! = 2*n_k^pred - n_k^n (midpoint_stoch_flux_type=3) + do n=1,nlevs + call multifab_copy_c(n_new(n),1,n_old(n),1,nspecies,0) + call multifab_saxpy_3(n_new(n),dt ,diff_fluxdiv(n)) + call multifab_saxpy_3(n_new(n),dt/sqrt(2.d0),stoch_fluxdiv(n)) + call multifab_saxpy_3(n_new(n),dt ,ext_src(n)) + call multifab_fill_boundary(n_new(n)) + call multifab_physbc(n_new(n),1,scal_bc_comp,nspecies, & + the_bc_tower%bc_tower_array(n),dx_in=dx(n,:)) + end do + */ + + } else { + Abort("AdvanceDiffusion() - invalid reactDiff_diffusion_type"); + } + + +} diff --git a/src_reactDiff/AdvanceTimestep.cpp b/src_reactDiff/AdvanceTimestep.cpp new file mode 100644 index 000000000..3062f970c --- /dev/null +++ b/src_reactDiff/AdvanceTimestep.cpp @@ -0,0 +1,66 @@ +#include "reactDiff_functions.H" +#include "chemistry_functions.H" + +void AdvanceTimestep(const MultiFab& n_old, + MultiFab& n_new, + const Real& dt, + const Real& time, + const Geometry& geom) { + + if (temporal_integrator > 0 && reactDiff_reaction_type != 0) { + if (reaction_type == 2) { + Abort("SSA (reaction_type==2) requires reactDiff_reaction_type=0 for split schemes"); + } + } + + // external source term for diffusion/reaction solvers for inhomogeneous bc algorithm + MultiFab Rn_steady(n_old.boxArray(), n_old.DistributionMap(), nspecies, 0); + + if (temporal_integrator < 0) { + // unsplit schemes + + + + } else { + + if (inhomogeneous_bc_fix) { + Abort("inhomogeneous_bc_fix not implemented yet"); + } else { + Rn_steady.setVal(0.); + } + + if (temporal_integrator == 0) { + // D + R +/* + call advance_diffusion(mla,n_old,n_new,dx,dt,the_bc_tower,Rn_steady) + do n=1,nlevs + call multifab_copy_c(n_old(n),1,n_new(n),1,nspecies,n_new(n)%ng) + end do + call advance_reaction (mla,n_new,n_old,dx,dt,the_bc_tower,Rn_steady) +*/ + + } else if (temporal_integrator == 1) { + // (1/2)R + D + (1/2)R +/* + call advance_reaction (mla,n_old,n_new,dx,0.5d0*dt,the_bc_tower,Rn_steady) + ! swap n_new/n_old to avoid calling copy() + call advance_diffusion(mla,n_new,n_old,dx,dt ,the_bc_tower,Rn_steady) + call advance_reaction (mla,n_old,n_new,dx,0.5d0*dt,the_bc_tower,Rn_steady) +*/ + + } else if (temporal_integrator == 2) { + // (1/2)D + R + (1/2)D +/* + call advance_diffusion(mla,n_old,n_new,dx,0.5d0*dt,the_bc_tower,Rn_steady) + ! swap n_new/n_old to avoid calling copy() + call advance_reaction (mla,n_new,n_old,dx,dt ,the_bc_tower,Rn_steady) + call advance_diffusion(mla,n_old,n_new,dx,0.5d0*dt,the_bc_tower,Rn_steady) +*/ + + } else { + Abort("AdvanceTimestep(): invalid temporal_integrator"); + } + + } + +} diff --git a/src_reactDiff/Make.package b/src_reactDiff/Make.package index 365e4872e..461c519ad 100644 --- a/src_reactDiff/Make.package +++ b/src_reactDiff/Make.package @@ -1,3 +1,5 @@ +CEXE_sources += AdvanceDiffusion.cpp +CEXE_sources += AdvanceTimestep.cpp CEXE_sources += reactDiff_functions.cpp CEXE_sources += WritePlotFile.cpp CEXE_headers += reactDiff_functions.H diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H index bb17aecf5..abdc49440 100644 --- a/src_reactDiff/reactDiff_functions.H +++ b/src_reactDiff/reactDiff_functions.H @@ -11,10 +11,37 @@ using namespace reactDiff; using namespace amrex; using namespace common; +//////////////////////// +// In reactDiff_functions.cpp +//////////////////////// void InitializeReactDiffNamespace(); +//////////////////////// +// In AdvanceDiffusion.cpp +//////////////////////// +void AdvanceDiffusion(const MultiFab& n_old, + MultiFab& n_new, + const MultiFab& ext_src, + const Real& dt, + const Real& time, + const Geometry& geom); + +//////////////////////// +// In AdvanceTimestep.cpp +//////////////////////// +void AdvanceTimestep(const MultiFab& n_old, + MultiFab& n_new, + const Real& dt, + const Real& time, + const Geometry& geom); + +//////////////////////// +// In WritePlotFile.cpp +//////////////////////// + void WritePlotFile(int step, const amrex::Real time, const amrex::Geometry& geom, const MultiFab& n_in); + #endif From 3f4d752c0ef28cb22f2efa2ff3240c7b1836faeb Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 23 Aug 2024 17:47:50 -0700 Subject: [PATCH 041/151] build diffusion operator for explicit --- src_reactDiff/AdvanceDiffusion.cpp | 92 +++++++++++++++++++++++++++++ src_reactDiff/reactDiff_functions.H | 5 ++ src_reactDiff/reactDiff_namespace.H | 55 +---------------- 3 files changed, 98 insertions(+), 54 deletions(-) diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index 3dd9faaad..95fce7123 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -1,6 +1,9 @@ #include "reactDiff_functions.H" #include "chemistry_functions.H" +#include "AMReX_MLMG.H" +#include + void AdvanceDiffusion(const MultiFab& n_old, MultiFab& n_new, const MultiFab& ext_src, @@ -184,5 +187,94 @@ void AdvanceDiffusion(const MultiFab& n_old, Abort("AdvanceDiffusion() - invalid reactDiff_diffusion_type"); } +} + + +void DiffusiveNFluxdiv(MultiFab& n_in, + MultiFab& diff_fluxdiv, + const Geometry& geom, + const Real& time) { + + // fill n ghost cells + n_in.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_in, geom, 0, nspecies, SPEC_BC_COMP, time); + + BoxArray ba = n_in.boxArray(); + DistributionMapping dmap = n_in.DistributionMap(); + + // don't need to set much here for explicit evaluations + LPInfo info; + + // operator of the form (ascalar * acoef - bscalar div bcoef grad) phi + MLABecLaplacian mlabec({geom}, {ba}, {dmap}, info); + mlabec.setMaxOrder(2); + + // store one component at a time and take L(phi) one component at a time + MultiFab phi (ba,dmap,1,1); + MultiFab Lphi(ba,dmap,1,0); + + MultiFab acoef(ba,dmap,1,0); + std::array< MultiFab, AMREX_SPACEDIM > bcoef; + AMREX_D_TERM(bcoef[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);, + bcoef[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);, + bcoef[2].define(convert(ba,nodal_flag_z), dmap, 1, 0);); + + // build array of boundary conditions needed by MLABecLaplacian + std::array lo_mlmg_bc; + std::array hi_mlmg_bc; + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) + { + if (bc_spec_lo[idim] == -1 || bc_spec_hi[idim] == -1) { + if ( !(bc_spec_lo[idim] == -1 && bc_spec_hi[idim] == -1) ) { + Abort("Both bc_spec_lo and bc_spec_hi must be periodic in a given direction if the other one is"); + } + lo_mlmg_bc[idim] = LinOpBCType::Periodic; + hi_mlmg_bc[idim] = LinOpBCType::Periodic; + } + + if (bc_spec_lo[idim] == 0) { + lo_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; + } else if (bc_spec_lo[idim] == 1) { + lo_mlmg_bc[idim] = LinOpBCType::Dirichlet; + } else if (bc_spec_lo[idim] != -1) { + Abort("Invalid bc_spec_lo"); + } + + if (bc_spec_hi[idim] == 0) { + hi_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; + } else if (bc_spec_hi[idim] == 1) { + hi_mlmg_bc[idim] = LinOpBCType::Dirichlet; + } else if (bc_spec_hi[idim] != -1) { + Abort("Invalid bc_spec_hi"); + } + } + + mlabec.setDomainBC(lo_mlmg_bc,hi_mlmg_bc); + + // set acoeff to 0and bcoeff to -1 + mlabec.setScalars(0., -1.); + + acoef.setVal(0.); + mlabec.setACoeffs(0, acoef); + + for (int i=0; i=0) - // 0=explicit trapezoidal predictor/corrector - // 1=Crank-Nicolson semi-implicit - // 2=explicit midpoint - // 3=multinomial diffusion - // 4=forward Euler extern AMREX_GPU_MANAGED int reactDiff_diffusion_type; - - // only used for split schemes (temporal_integrator>=0) - // 0=first-order (deterministic, tau leaping, CLE, or SSA) - // 1=second-order (determinisitc, tau leaping, or CLE only) extern AMREX_GPU_MANAGED int reactDiff_reaction_type; - - // only used for midpoint diffusion schemes (split as well as unsplit) - // corrector formulation of noise - // 1 = K(nold) * W1 + K(nold) * W2 - // 2 = K(nold) * W1 + K(npred) * W2 - // 3 = K(nold) * W1 + K(2*npred-nold) * W2 extern AMREX_GPU_MANAGED int midpoint_stoch_flux_type; - - // how to compute n on faces for stochastic weighting - // 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic - // 10=arithmetic average with discontinuous Heaviside function - // 11=arithmetic average with C1-smoothed Heaviside function - // 12=arithmetic average with C2-smoothed Heaviside function extern AMREX_GPU_MANAGED int avg_type; - - // use the Einkemmer boundary condition fix (split schemes only) extern AMREX_GPU_MANAGED int inhomogeneous_bc_fix; - - // volume multiplier (dv = product(dx(1:MAX_SPACEDIM))*volume_factor) - // only really intended for 3D since in 2D one can control the cell depth extern AMREX_GPU_MANAGED amrex::Real volume_factor; - - // initial values to be used in init_n.f90 extern AMREX_GPU_MANAGED Array2D n_init_in; - - // initialize from model file extern AMREX_GPU_MANAGED int model_file_init; - - // initialize with all number of molecules strictly integer extern AMREX_GPU_MANAGED int integer_populations; - - // Fickian diffusion coeffs extern AMREX_GPU_MANAGED amrex::GpuArray D_Fick; - - // diffusion boundary stencil order extern AMREX_GPU_MANAGED int diffusion_stencil_order; - - // implicit diffusion solve verbosity extern AMREX_GPU_MANAGED int diffusion_verbose; - - // implicit diffusion solve bottom solver verbosity extern AMREX_GPU_MANAGED int diffusion_bottom_verbose; - - // relative eps for implicit diffusion solve extern AMREX_GPU_MANAGED amrex::Real implicit_diffusion_rel_eps; - - // absolute eps for implicit diffusion solve extern AMREX_GPU_MANAGED amrex::Real implicit_diffusion_abs_eps; - } From fcbe57b64e665d3b082b5fb28b7220f30b327586 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 26 Aug 2024 09:28:00 -0700 Subject: [PATCH 042/151] more work --- src_reactDiff/InitN.cpp | 9 +++++++++ {exec/reactDiff => src_reactDiff}/main_driver.cpp | 0 2 files changed, 9 insertions(+) create mode 100644 src_reactDiff/InitN.cpp rename {exec/reactDiff => src_reactDiff}/main_driver.cpp (100%) diff --git a/src_reactDiff/InitN.cpp b/src_reactDiff/InitN.cpp new file mode 100644 index 000000000..0eacba081 --- /dev/null +++ b/src_reactDiff/InitN.cpp @@ -0,0 +1,9 @@ +#include "reactDiff_functions.H" + +void InitN(MultiFab& n_in, + const Geometry& geom) { + + const GpuArray dx = geom.CellSizeArray(); + + +} diff --git a/exec/reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp similarity index 100% rename from exec/reactDiff/main_driver.cpp rename to src_reactDiff/main_driver.cpp From 497c984b50a313be1ead013fcca0fbc42e7f0f8d Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 26 Aug 2024 09:28:18 -0700 Subject: [PATCH 043/151] more work --- exec/reactDiff/inputs_2d | 13 +++++++--- exec/reactDiff/inputs_3d | 16 ++++++++++--- src_reactDiff/AdvanceDiffusion.cpp | 22 ++++++++--------- src_reactDiff/InitN.cpp | 23 +++++++++++++++++- src_reactDiff/Make.package | 1 + src_reactDiff/main_driver.cpp | 37 ++++++++++++++++------------- src_reactDiff/reactDiff_functions.H | 7 ++++++ 7 files changed, 84 insertions(+), 35 deletions(-) diff --git a/exec/reactDiff/inputs_2d b/exec/reactDiff/inputs_2d index 83d665b3b..ec33f345a 100644 --- a/exec/reactDiff/inputs_2d +++ b/exec/reactDiff/inputs_2d @@ -13,6 +13,7 @@ # Controls for number of steps between actions max_step = 10 plot_int = 1 + stats_int = -1 seed = 1 @@ -22,7 +23,13 @@ # ---------------------- # BC specifications: # -1 = periodic - bc_spec_lo = -1 -1 - bc_spec_hi = -1 -1 + # 1 = wall (Neumann) + # 2 = reservoir (Dirichlet) + bc_mass_lo = -1 -1 + bc_mass_hi = -1 -1 - stats_int = -1 + # if wall/reservoir, these are the numerical Neumann/Dirichlet values + bc_Yk_x_lo = 0. 0. + bc_Yk_x_hi = 0. 0. + bc_Yk_y_lo = 0. 0. + bc_Yk_y_hi = 0. 0. diff --git a/exec/reactDiff/inputs_3d b/exec/reactDiff/inputs_3d index 05adb9bba..6b3b0eb43 100644 --- a/exec/reactDiff/inputs_3d +++ b/exec/reactDiff/inputs_3d @@ -13,6 +13,7 @@ # Controls for number of steps between actions max_step = 10 plot_int = 1 + stats_int = -1 seed = 1 @@ -22,7 +23,16 @@ # ---------------------- # BC specifications: # -1 = periodic - bc_spec_lo = -1 -1 -1 - bc_spec_hi = -1 -1 -1 + # 1 = wall (Neumann) + # 2 = reservoir (Dirichlet) + bc_mass_lo = -1 -1 -1 + bc_mass_hi = -1 -1 -1 + + # if wall/reservoir, these are the numerical Neumann/Dirichlet values + bc_Yk_x_lo = 0. 0. + bc_Yk_x_hi = 0. 0. + bc_Yk_y_lo = 0. 0. + bc_Yk_y_hi = 0. 0. + bc_Yk_z_lo = 0. 0. + bc_Yk_z_hi = 0. 0. - stats_int = -1 diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index 95fce7123..771424f24 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -225,28 +225,28 @@ void DiffusiveNFluxdiv(MultiFab& n_in, for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - if (bc_spec_lo[idim] == -1 || bc_spec_hi[idim] == -1) { - if ( !(bc_spec_lo[idim] == -1 && bc_spec_hi[idim] == -1) ) { - Abort("Both bc_spec_lo and bc_spec_hi must be periodic in a given direction if the other one is"); + if (bc_mass_lo[idim] == -1 || bc_mass_hi[idim] == -1) { + if ( !(bc_mass_lo[idim] == -1 && bc_mass_hi[idim] == -1) ) { + Abort("Both bc_mass_lo and bc_mass_hi must be periodic in a given direction if the other one is"); } lo_mlmg_bc[idim] = LinOpBCType::Periodic; hi_mlmg_bc[idim] = LinOpBCType::Periodic; } - if (bc_spec_lo[idim] == 0) { + if (bc_mass_lo[idim] == 0) { lo_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; - } else if (bc_spec_lo[idim] == 1) { + } else if (bc_mass_lo[idim] == 1) { lo_mlmg_bc[idim] = LinOpBCType::Dirichlet; - } else if (bc_spec_lo[idim] != -1) { - Abort("Invalid bc_spec_lo"); + } else if (bc_mass_lo[idim] != -1) { + Abort("Invalid bc_mass_lo"); } - if (bc_spec_hi[idim] == 0) { + if (bc_mass_hi[idim] == 0) { hi_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; - } else if (bc_spec_hi[idim] == 1) { + } else if (bc_mass_hi[idim] == 1) { hi_mlmg_bc[idim] = LinOpBCType::Dirichlet; - } else if (bc_spec_hi[idim] != -1) { - Abort("Invalid bc_spec_hi"); + } else if (bc_mass_hi[idim] != -1) { + Abort("Invalid bc_mass_hi"); } } diff --git a/src_reactDiff/InitN.cpp b/src_reactDiff/InitN.cpp index 0eacba081..596bbfa82 100644 --- a/src_reactDiff/InitN.cpp +++ b/src_reactDiff/InitN.cpp @@ -1,9 +1,30 @@ #include "reactDiff_functions.H" void InitN(MultiFab& n_in, - const Geometry& geom) { + const Geometry& geom, + const Real& time) { const GpuArray dx = geom.CellSizeArray(); + for ( MFIter mfi(n_in,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4 & ninit = n_in.array(mfi); + + if (prob_type == 0) { + //============================================================ + // Thermodynamic equilibrium + //============================================================ + + amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + ninit(i,j,k,n) = 0.; + }); + + } + } + n_in.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_in, geom, 0, nspecies, SPEC_BC_COMP, time); } diff --git a/src_reactDiff/Make.package b/src_reactDiff/Make.package index 461c519ad..ed434aa37 100644 --- a/src_reactDiff/Make.package +++ b/src_reactDiff/Make.package @@ -1,5 +1,6 @@ CEXE_sources += AdvanceDiffusion.cpp CEXE_sources += AdvanceTimestep.cpp +CEXE_sources += InitN.cpp CEXE_sources += reactDiff_functions.cpp CEXE_sources += WritePlotFile.cpp CEXE_headers += reactDiff_functions.H diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 69561bdeb..8952e3e1f 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -34,7 +34,7 @@ void main_driver(const char* argv) // is the problem periodic? Vector is_periodic(AMREX_SPACEDIM,0); // set to 0 (not periodic) by default for (int i=0; i 0.) { + Abort("initial_variance_mass not supported yet"); + // add_init_n_fluctuations() + } - MultiFab n_old(ba,dmap,nspecies,1); - MultiFab n_new(ba,dmap,nspecies,1); - - if (model_file_init) { - Abort("model_file_init not supported yet"); } else { - // Initialize n - // Init(); - n_old.setVal(0.); - } - if (std::abs(initial_variance_mass) > 0.) { - Abort("initial_variance_mass not supported yet"); - // add_init_n_fluctuations() + // checkpoint restart + } Real dt; diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H index f33e7f9e9..72fa1ff89 100644 --- a/src_reactDiff/reactDiff_functions.H +++ b/src_reactDiff/reactDiff_functions.H @@ -40,6 +40,13 @@ void AdvanceTimestep(const MultiFab& n_old, const Real& time, const Geometry& geom); +//////////////////////// +// In InitN.cpp +//////////////////////// +void InitN(MultiFab& n_in, + const Geometry& geom, + const Real& time); + //////////////////////// // In WritePlotFile.cpp //////////////////////// From 99b752ff37730be0b06e0caf9a1cf82affc83d2b Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 26 Aug 2024 09:29:52 -0700 Subject: [PATCH 044/151] remove bc_spec_lo/hi - not being used (bc_mass_lo/hi is implemented) --- exec/DSMC/inputs_conc | 3 --- exec/DSMC/inputs_conc_gpu | 3 --- src_common/common_functions.cpp | 16 ---------------- src_common/common_namespace.H | 8 -------- 4 files changed, 30 deletions(-) diff --git a/exec/DSMC/inputs_conc b/exec/DSMC/inputs_conc index 7a0eb560f..94ff31381 100644 --- a/exec/DSMC/inputs_conc +++ b/exec/DSMC/inputs_conc @@ -71,9 +71,6 @@ bc_mass_lo = -1 -1 -1 bc_mass_hi = -1 -1 -1 - bc_spec_lo = 1 -1 -1 - bc_spec_hi = 0 -1 -1 - # Temperature if thermal BC specified t_hi = 300 300 300 t_lo = 300 300 300 diff --git a/exec/DSMC/inputs_conc_gpu b/exec/DSMC/inputs_conc_gpu index aa310bdfa..b9cdadc5a 100644 --- a/exec/DSMC/inputs_conc_gpu +++ b/exec/DSMC/inputs_conc_gpu @@ -73,9 +73,6 @@ bc_mass_lo = -1 -1 -1 bc_mass_hi = -1 -1 -1 - bc_spec_lo = 1 -1 -1 - bc_spec_hi = 0 -1 -1 - # Temperature if thermal BC specified #t_hi = 519 300 300 #t_lo = 273 300 300 diff --git a/src_common/common_functions.cpp b/src_common/common_functions.cpp index 2c2db9421..e9ca0ad07 100644 --- a/src_common/common_functions.cpp +++ b/src_common/common_functions.cpp @@ -97,10 +97,6 @@ AMREX_GPU_MANAGED amrex::GpuArray common::bc_mass_l AMREX_GPU_MANAGED amrex::GpuArray common::bc_mass_hi; AMREX_GPU_MANAGED amrex::GpuArray common::bc_therm_lo; AMREX_GPU_MANAGED amrex::GpuArray common::bc_therm_hi; -AMREX_GPU_MANAGED amrex::GpuArray common::bc_spec_lo; -AMREX_GPU_MANAGED amrex::GpuArray common::bc_spec_hi; - - AMREX_GPU_MANAGED amrex::GpuArray common::p_lo; AMREX_GPU_MANAGED amrex::GpuArray common::p_hi; @@ -483,8 +479,6 @@ void InitializeCommonNamespace() { bc_mass_hi[i] = 0; bc_therm_lo[i] = 0; bc_therm_hi[i] = 0; - bc_spec_lo[i] = -1; - bc_spec_hi[i] = -1; // Pressure drop are periodic inflow/outflow walls (bc_[hi,lo]=-2). p_lo[i] = 0.; @@ -827,16 +821,6 @@ void InitializeCommonNamespace() { bc_mass_hi[i] = temp_int[i]; } } - if (pp.queryarr("bc_spec_lo",temp_int,0,AMREX_SPACEDIM)) { - for (int i=0; i bc_therm_lo; extern AMREX_GPU_MANAGED amrex::GpuArray bc_therm_hi; - // BC specifications: - // -1 = don't change species, else change species to number - // 1 = wall - // 2 = reservoir (Dirichlet values must be suppled by other means) - extern AMREX_GPU_MANAGED amrex::GpuArray bc_spec_lo; - extern AMREX_GPU_MANAGED amrex::GpuArray bc_spec_hi; - - // Pressure drop are periodic inflow/outflow walls (bc_[hi,lo]=-2). extern AMREX_GPU_MANAGED amrex::GpuArray p_lo; extern AMREX_GPU_MANAGED amrex::GpuArray p_hi; From ff6d8e15deec5fc43a918cb0e3b41161e2fe7bfb Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 26 Aug 2024 09:57:32 -0700 Subject: [PATCH 045/151] lots of more work --- exec/reactDiff/GNUmakefile | 2 +- exec/reactDiff/inputs_2d | 35 ----------- exec/reactDiff/inputs_3d | 38 ------------ exec/reactDiff/inputs_paper_BPM_2d | 75 +++++++++++++++++++++++ exec/reactDiff/inputs_paper_Lemarchand_3d | 43 +++++++++++++ src_reactDiff/InitN.cpp | 40 +++++++++++- 6 files changed, 157 insertions(+), 76 deletions(-) delete mode 100644 exec/reactDiff/inputs_2d delete mode 100644 exec/reactDiff/inputs_3d create mode 100644 exec/reactDiff/inputs_paper_BPM_2d create mode 100644 exec/reactDiff/inputs_paper_Lemarchand_3d diff --git a/exec/reactDiff/GNUmakefile b/exec/reactDiff/GNUmakefile index 24812ca73..916832401 100644 --- a/exec/reactDiff/GNUmakefile +++ b/exec/reactDiff/GNUmakefile @@ -9,7 +9,7 @@ USE_CUDA = FALSE COMP = gnu DIM = 2 MAX_SPEC = 8 -MAX_REAC = 5 +MAX_REAC = 7 TINY_PROFILE = FALSE diff --git a/exec/reactDiff/inputs_2d b/exec/reactDiff/inputs_2d deleted file mode 100644 index ec33f345a..000000000 --- a/exec/reactDiff/inputs_2d +++ /dev/null @@ -1,35 +0,0 @@ - # Problem specification - prob_lo = 0.0 0.0 # physical lo coordinate - prob_hi = 1.0 1.0 # physical hi coordinate - - # number of cells in domain - n_cells = 32 32 - # max number of cells in a box - max_grid_size = 16 16 - - # Time-step control - fixed_dt = 0.1 - - # Controls for number of steps between actions - max_step = 10 - plot_int = 1 - stats_int = -1 - - seed = 1 - - nspecies = 2 - - # Boundary conditions - # ---------------------- - # BC specifications: - # -1 = periodic - # 1 = wall (Neumann) - # 2 = reservoir (Dirichlet) - bc_mass_lo = -1 -1 - bc_mass_hi = -1 -1 - - # if wall/reservoir, these are the numerical Neumann/Dirichlet values - bc_Yk_x_lo = 0. 0. - bc_Yk_x_hi = 0. 0. - bc_Yk_y_lo = 0. 0. - bc_Yk_y_hi = 0. 0. diff --git a/exec/reactDiff/inputs_3d b/exec/reactDiff/inputs_3d deleted file mode 100644 index 6b3b0eb43..000000000 --- a/exec/reactDiff/inputs_3d +++ /dev/null @@ -1,38 +0,0 @@ - # Problem specification - prob_lo = 0.0 0.0 0.0 # physical lo coordinate - prob_hi = 1.0 1.0 1.0 # physical hi coordinate - - # number of cells in domain - n_cells = 32 32 32 - # max number of cells in a box - max_grid_size = 16 16 16 - - # Time-step control - fixed_dt = 0.1 - - # Controls for number of steps between actions - max_step = 10 - plot_int = 1 - stats_int = -1 - - seed = 1 - - nspecies = 2 - - # Boundary conditions - # ---------------------- - # BC specifications: - # -1 = periodic - # 1 = wall (Neumann) - # 2 = reservoir (Dirichlet) - bc_mass_lo = -1 -1 -1 - bc_mass_hi = -1 -1 -1 - - # if wall/reservoir, these are the numerical Neumann/Dirichlet values - bc_Yk_x_lo = 0. 0. - bc_Yk_x_hi = 0. 0. - bc_Yk_y_lo = 0. 0. - bc_Yk_y_hi = 0. 0. - bc_Yk_z_lo = 0. 0. - bc_Yk_z_hi = 0. 0. - diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d new file mode 100644 index 000000000..9127a64f0 --- /dev/null +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -0,0 +1,75 @@ +# Problem specification +prob_lo = 0.0 0.0 # physical lo coordinate +prob_hi = 32.0 32.0 # physical hi coordinate + +cell_depth = 10. + +# number of cells in domain +n_cells = 64 64 +# max number of cells in a box +max_grid_size = 32 32 + +# Time-step control +fixed_dt = 0.5 + +# Controls for number of steps between actions +max_step = 2 +plot_int = 1 +stats_int = -1 + +seed = 1 + +nspecies = 3 + +prob_type = 0 + +n_init_in_1 = 1685.8 533.5 56.38 # Start on the limit cycle + +D_Fick = 0.1 0.01 0.01 + +temporal_integrator = 0 +reactDiff_diffusion_type = 4 +reactDiff_reaction_type = 0 + +avg_type = 1 + +nreactions = 7 +rate_const = 0.0002 0.0002 1. 0.03666663 4.44444555555 0.00333333 16.66665 + + # BPM model is: + # (1) U + W --> V + W + # (2) V + V --> W + # (3) W --> V + V + # (4) V --> 0 + # (5) 0 --> V + # (6) U --> 0 + # (7) 0 --> U +stoich_1R = 1 0 1 +stoich_1P = 0 1 1 +stoich_2R = 0 2 0 +stoich_2P = 0 0 1 +stoich_3R = 0 0 1 +stoich_3P = 0 2 0 +stoich_4R = 0 1 0 +stoich_4P = 0 0 0 +stoich_5R = 0 0 0 +stoich_5P = 0 1 0 +stoich_6R = 1 0 0 +stoich_6P = 0 0 0 +stoich_7R = 0 0 0 +stoich_7P = 1 0 0 + +rate_multiplier = 1. + +include_discrete_LMA_correction = 1 + +reaction_type = 0 + +# Boundary conditions +# ---------------------- +# BC specifications: +# -1 = periodic +# 1 = wall (Neumann) +# 2 = reservoir (Dirichlet) +bc_mass_lo = -1 -1 +bc_mass_hi = -1 -1 diff --git a/exec/reactDiff/inputs_paper_Lemarchand_3d b/exec/reactDiff/inputs_paper_Lemarchand_3d new file mode 100644 index 000000000..592c69219 --- /dev/null +++ b/exec/reactDiff/inputs_paper_Lemarchand_3d @@ -0,0 +1,43 @@ +# Problem specification +prob_lo = 0.0 0.0 0.0 # physical lo coordinate +prob_hi = 512.0 512.0 512.0 # physical hi coordinate + +# number of cells in domain +n_cells = 64 64 64 +# max number of cells in a box +max_grid_size = 32 32 32 + +# Time-step control +fixed_dt = 0.1 + +# Controls for number of steps between actions +max_step = 10 +plot_int = 1 +stats_int = -1 + +seed = 1 + +nspecies = 2 + +prob_type = 5 + +n_init_in_1 = 2.16245 1.35018 +n_init_in_2 = 0. 10. + +# Boundary conditions +# ---------------------- +# BC specifications: +# -1 = periodic +# 1 = wall (Neumann) +# 2 = reservoir (Dirichlet) +bc_mass_lo = -1 -1 -1 +bc_mass_hi = -1 -1 -1 + +# if wall/reservoir, these are the numerical Neumann/Dirichlet values +bc_Yk_x_lo = 0. 0. +bc_Yk_x_hi = 0. 0. +bc_Yk_y_lo = 0. 0. +bc_Yk_y_hi = 0. 0. +bc_Yk_z_lo = 0. 0. +bc_Yk_z_hi = 0. 0. + diff --git a/src_reactDiff/InitN.cpp b/src_reactDiff/InitN.cpp index 596bbfa82..0b0575c83 100644 --- a/src_reactDiff/InitN.cpp +++ b/src_reactDiff/InitN.cpp @@ -10,7 +10,7 @@ void InitN(MultiFab& n_in, const Box& bx = mfi.tilebox(); - const Array4 & ninit = n_in.array(mfi); + const Array4 & n_init = n_in.array(mfi); if (prob_type == 0) { //============================================================ @@ -19,10 +19,46 @@ void InitN(MultiFab& n_in, amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { - ninit(i,j,k,n) = 0.; + n_init(i,j,k,n) = n_init_in(0,n); }); + } else if (prob_type == 5) { + //================================================================= + // bubble having radius = 0.5*perturb_width*dx(1) + // n_init = n_init_in(1,:) inside, n_init = n_init_in (2,:) outside + // can be discontinous or smooth depending on smoothing_width + //================================================================= + + Real rad = 0.5*perturb_width*dx[0]; + + amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + Real x = prob_lo[0] + (i+0.5)*dx[0] - 0.5*(prob_lo[0]+prob_hi[0]); + Real y = prob_lo[1] + (j+0.5)*dx[1] - 0.5*(prob_lo[1]+prob_hi[1]); + Real r = std::sqrt(x*x + y*y); +#if (AMREX_SPACEDIM == 3) + Real z = prob_lo[2] + (k+0.5)*dx[2] - 0.5*(prob_lo[2]+prob_hi[2]); + r = std::sqrt(x*x + y*y + z*z); +#endif + + if (smoothing_width == 0.) { + // discontinuous interface + if (r < rad) { + n_init(i,j,k,n) = n_init_in(0,n); + } else { + n_init(i,j,k,n) = n_init_in(1,n); + } + } else { + // smooth interface + n_init(i,j,k,n) = n_init_in(0,n) + (n_init_in(1,n) - n_init_in(0,n))* 0.5*(1. + std::tanh((r-rad)/(smoothing_width*dx[0]))); + } + + }); + + } else { + Abort("prob_type not implemented yet"); } + } n_in.FillBoundary(geom.periodicity()); From 9c3c7fe4528ab49013fc105313389a6a05f654d6 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 26 Aug 2024 16:43:29 -0700 Subject: [PATCH 046/151] outline for reactions --- exec/reactDiff/inputs_paper_BPM_2d | 2 +- src_reactDiff/AdvanceDiffusion.cpp | 5 ++- src_reactDiff/AdvanceReaction.cpp | 60 +++++++++++++++++++++++++++++ src_reactDiff/AdvanceTimestep.cpp | 12 ++---- src_reactDiff/Make.package | 1 + src_reactDiff/reactDiff_functions.H | 14 ++++++- 6 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 src_reactDiff/AdvanceReaction.cpp diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index 9127a64f0..c83b4ed25 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -33,7 +33,7 @@ reactDiff_reaction_type = 0 avg_type = 1 -nreactions = 7 +nreaction = 7 rate_const = 0.0002 0.0002 1. 0.03666663 4.44444555555 0.00333333 16.66665 # BPM model is: diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index 771424f24..391dc3b6e 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -1,10 +1,11 @@ #include "reactDiff_functions.H" -#include "chemistry_functions.H" #include "AMReX_MLMG.H" #include -void AdvanceDiffusion(const MultiFab& n_old, +// Solves n_t = div ( D grad (n)) + div (sqrt(2*variance*D*n)*W) + g +// where g is a constant in time external source +void AdvanceDiffusion(MultiFab& n_old, MultiFab& n_new, const MultiFab& ext_src, const Real& dt, diff --git a/src_reactDiff/AdvanceReaction.cpp b/src_reactDiff/AdvanceReaction.cpp new file mode 100644 index 000000000..850189fc0 --- /dev/null +++ b/src_reactDiff/AdvanceReaction.cpp @@ -0,0 +1,60 @@ +#include "reactDiff_functions.H" +#include "chemistry_functions.H" + +// this solves dn/dt = f(n) - g (note the minus sign for g) +// where f(n) are the chemical production rates (deterministic or stochastic) +// and g=ext_src is a constant (in time) *deterministic* source term. +// to model stochastic particle production (sources) include g in the definition of f instead. +// or add it as a reaction 0->products +void AdvanceReaction(MultiFab& n_old, + MultiFab& n_new, + const MultiFab& ext_src, + const Real& dt, + const Real& time, + const Geometry& geom) { + + BoxArray ba = n_old.boxArray(); + DistributionMapping dmap = n_old.DistributionMap(); + + // if there are no reactions to process, copy n_old to n_new, + // account for ext_src and return + if (nreaction < 1) { + MultiFab::LinComb(n_new,1,n_old,0,-dt,ext_src,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + return; + } + + MultiFab rate(ba,dmap,nspecies,0); + + if (reactDiff_reaction_type == 0) { // first-order det/tau-leaping/CLE, or SSA + + // ChemicalRates(); + + MultiFab::LinComb(n_new,1,n_old,0,-dt,rate,0,0,nspecies,0); + MultiFab::Saxpy(n_new,-dt,ext_src,0,0,nspecies,0); +/* + ! calculate rates + ! rates could be deterministic or stochastic depending on use_Poisson_rng + call chemical_rates(mla,n_old,rate,dx,dt,vol_fac_in=volume_factor) + + ! update + do n=1,nlevs + call multifab_copy_c(n_new(n),1,n_old(n),1,nspecies,0) + call multifab_saxpy_3(n_new(n),dt,rate(n)) + call multifab_saxpy_3(n_new(n),-dt,ext_src(n)) ! note the negative sign + + call multifab_fill_boundary(n_new(n)) + call multifab_physbc(n_new(n),1,scal_bc_comp,nspecies, & + the_bc_tower%bc_tower_array(n),dx_in=dx(n,:)) + end do +*/ + } else if (reactDiff_reaction_type == 1) { // second-order det/tau-leaping/CLE + + } else { + Abort("AdvanceReaction() - invalid reactDiff_reaction_type"); + } + + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); +} diff --git a/src_reactDiff/AdvanceTimestep.cpp b/src_reactDiff/AdvanceTimestep.cpp index 3062f970c..5c575201f 100644 --- a/src_reactDiff/AdvanceTimestep.cpp +++ b/src_reactDiff/AdvanceTimestep.cpp @@ -1,7 +1,7 @@ #include "reactDiff_functions.H" #include "chemistry_functions.H" -void AdvanceTimestep(const MultiFab& n_old, +void AdvanceTimestep(MultiFab& n_old, MultiFab& n_new, const Real& dt, const Real& time, @@ -31,13 +31,9 @@ void AdvanceTimestep(const MultiFab& n_old, if (temporal_integrator == 0) { // D + R -/* - call advance_diffusion(mla,n_old,n_new,dx,dt,the_bc_tower,Rn_steady) - do n=1,nlevs - call multifab_copy_c(n_old(n),1,n_new(n),1,nspecies,n_new(n)%ng) - end do - call advance_reaction (mla,n_new,n_old,dx,dt,the_bc_tower,Rn_steady) -*/ + AdvanceDiffusion(n_old,n_new,Rn_steady,dt,time,geom); + MultiFab::Copy(n_old,n_new,0,0,nspecies,1); + AdvanceReaction(n_old,n_new,Rn_steady,dt,time,geom); } else if (temporal_integrator == 1) { // (1/2)R + D + (1/2)R diff --git a/src_reactDiff/Make.package b/src_reactDiff/Make.package index ed434aa37..17a399309 100644 --- a/src_reactDiff/Make.package +++ b/src_reactDiff/Make.package @@ -1,3 +1,4 @@ +CEXE_sources += AdvanceReaction.cpp CEXE_sources += AdvanceDiffusion.cpp CEXE_sources += AdvanceTimestep.cpp CEXE_sources += InitN.cpp diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H index 72fa1ff89..1aae99522 100644 --- a/src_reactDiff/reactDiff_functions.H +++ b/src_reactDiff/reactDiff_functions.H @@ -19,7 +19,7 @@ void InitializeReactDiffNamespace(); //////////////////////// // In AdvanceDiffusion.cpp //////////////////////// -void AdvanceDiffusion(const MultiFab& n_old, +void AdvanceDiffusion(MultiFab& n_old, MultiFab& n_new, const MultiFab& ext_src, const Real& dt, @@ -31,10 +31,20 @@ void DiffusiveNFluxdiv(MultiFab& n_in, const Geometry& geom, const Real& time); +//////////////////////// +// In AdvanceReaction.cpp +//////////////////////// +void AdvanceReaction(MultiFab& n_old, + MultiFab& n_new, + const MultiFab& ext_src, + const Real& dt, + const Real& time, + const Geometry& geom); + //////////////////////// // In AdvanceTimestep.cpp //////////////////////// -void AdvanceTimestep(const MultiFab& n_old, +void AdvanceTimestep(MultiFab& n_old, MultiFab& n_new, const Real& dt, const Real& time, From 62f6d3dfe328eadc585b180c9fd1f6cec299b0c5 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 26 Aug 2024 20:43:58 -0700 Subject: [PATCH 047/151] more chemistry routines --- src_chemistry/chemistry_functions.H | 2 +- src_chemistry/chemistry_functions.cpp | 104 +++++++++++++++++++++++--- 2 files changed, 95 insertions(+), 11 deletions(-) diff --git a/src_chemistry/chemistry_functions.H b/src_chemistry/chemistry_functions.H index 7033431e6..9a5ebe065 100644 --- a/src_chemistry/chemistry_functions.H +++ b/src_chemistry/chemistry_functions.H @@ -21,7 +21,7 @@ void compute_compressible_chemistry_source_CLE(amrex::Real dt, amrex::Real dV, MultiFab& prim, MultiFab& source, MultiFab& ranchem); void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry geom, amrex::Real dt, - const MultiFab& n_interm, Vector lin_comb_coef_in); + const MultiFab& n_interm, Vector lin_comb_coef_in, Real volume_factor_in=1.); AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_in, GpuArray& reaction_rates, diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 7f335b9e9..59310da62 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -95,10 +95,10 @@ void InitializeChemistryNamespace() rate_multiplier = 1.; pp.query("rate_multiplier",rate_multiplier); - include_discrete_LMA_correction = 1; + include_discrete_LMA_correction = 0; pp.query("include_discrete_LMA_correction",include_discrete_LMA_correction); - exclude_solvent_comput_rates = 0; + exclude_solvent_comput_rates = -1; pp.query("exclude_solvent_comput_rates",exclude_solvent_comput_rates); // get reaction type (0=deterministic; 1=CLE; 2=SSA; 3=tau leap) @@ -194,8 +194,13 @@ void compute_compressible_chemistry_source_CLE(amrex::Real dt, amrex::Real dV, void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry geom, amrex::Real dt, - const MultiFab& n_interm, Vector lin_comb_coef_in) + const MultiFab& n_interm, Vector lin_comb_coef_in, Real volume_factor_in) { + if (nreaction == 1) { + chem_rate.setVal(0.); + return; + } + int lin_comb_avg_react_rate = 1; if (lin_comb_coef_in[0] == 1. && lin_comb_coef_in[1] == 0.) { lin_comb_avg_react_rate = 0; @@ -208,9 +213,53 @@ void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry g const Real* dx = geom.CellSize(); Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2] : dx[0]*dx[1]*cell_depth; - + dv *= volume_factor_in; + for (MFIter mfi(n_cc); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.validbox(); + + const Array4& n_arr = n_cc.array(mfi); + const Array4& n_int = n_interm.array(mfi); + const Array4& rate = chem_rate.array(mfi); + + if (reaction_type == 2) { // SSA + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + Abort("chemical_rates() - SSA not supported"); + }); + } else { + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + if (lin_comb_avg_react_rate == 1) { + Abort("chemical_rates(); lin_comb_avg_react_rate == 1 not supported"); + } else { + + GpuArray n_in; + GpuArray avg_reaction_rate; + GpuArray avg_num_reactions; + GpuArray num_reactions; + + for (int n=0; n& n_in, @@ -218,15 +267,50 @@ AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_ const amrex::Real& dv) { GpuArray n_nonneg; - Real n_sum = 0.; - - for (int i=0; i& n_in, + GpuArray& num_reactions, + GpuArray& avg_num_reactions) +{ + if (reaction_type == -1) { // deterministic + for (int n=0; n Date: Mon, 26 Aug 2024 20:45:17 -0700 Subject: [PATCH 048/151] more chemistry routines --- src_chemistry/chemistry_functions.H | 3 +++ src_chemistry/chemistry_functions.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src_chemistry/chemistry_functions.H b/src_chemistry/chemistry_functions.H index 9a5ebe065..33171b376 100644 --- a/src_chemistry/chemistry_functions.H +++ b/src_chemistry/chemistry_functions.H @@ -27,4 +27,7 @@ AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_ GpuArray& reaction_rates, const amrex::Real& dv); +AMREX_GPU_HOST_DEVICE void sample_num_reactions(GpuArray& n_in, + GpuArray& num_reactions, + GpuArray& avg_num_reactions); #endif diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 59310da62..1fc9bf015 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -251,7 +251,7 @@ void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry g } for (int r=0; r Date: Tue, 27 Aug 2024 08:10:58 -0700 Subject: [PATCH 049/151] starting the testing/debugging --- exec/reactDiff/inputs_paper_BPM_2d | 71 +++++++++++++++++++++--------- src_reactDiff/AdvanceDiffusion.cpp | 5 +-- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index c83b4ed25..7e3386ba6 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -2,13 +2,13 @@ prob_lo = 0.0 0.0 # physical lo coordinate prob_hi = 32.0 32.0 # physical hi coordinate -cell_depth = 10. - -# number of cells in domain +# number of cells in domain and maximum number of cells in a box n_cells = 64 64 -# max number of cells in a box max_grid_size = 32 32 +# to compute cell volume in 2D problems +cell_depth = 10. + # Time-step control fixed_dt = 0.5 @@ -20,30 +20,57 @@ stats_int = -1 seed = 1 nspecies = 3 +nreaction = 7 prob_type = 0 n_init_in_1 = 1685.8 533.5 56.38 # Start on the limit cycle -D_Fick = 0.1 0.01 0.01 - +# 0=D+R (first-order splitting) +# 1=(1/2)R + D + (1/2)R (Strang option 1) +# 2=(1/2)D + R + (1/2)D (Strang option 2) +# -1=unsplit forward Euler +# -2=unsplit explicit midpoint +# -3=unsplit multinomial diffusion +# -4=unsplit implicit midpoint temporal_integrator = 0 + +# only used for split schemes (temporal_integrator>=0) +# 0=explicit trapezoidal predictor/corrector +# 1=Crank-Nicolson semi-implicit +# 2=explicit midpoint +# 3=multinomial diffusion +# 4=forward Euler reactDiff_diffusion_type = 4 -reactDiff_reaction_type = 0 +# Fickian diffusion coeffs +D_Fick = 0.1 0.01 0.01 + +variance_coef_mass = 0. + +# how to compute n on faces for stochastic weighting +# 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic +# 10=arithmetic average with discontinuous Heaviside function +# 11=arithmetic average with C1-smoothed Heaviside function +# 12=arithmetic average with C2-smoothed Heaviside function avg_type = 1 -nreaction = 7 -rate_const = 0.0002 0.0002 1. 0.03666663 4.44444555555 0.00333333 16.66665 +# only used for split schemes (temporal_integrator>=0) +# 0=first-order (deterministic, tau leaping, CLE, or SSA) +# 1=second-order (determinisitc, tau leaping, or CLE only) +reactDiff_reaction_type = 0 + +# 0=deterministic; 1=CLE; 2=SSA; 3=tau leap +reaction_type = 0 - # BPM model is: - # (1) U + W --> V + W - # (2) V + V --> W - # (3) W --> V + V - # (4) V --> 0 - # (5) 0 --> V - # (6) U --> 0 - # (7) 0 --> U +# BPM model is: +# (1) U + W --> V + W +# (2) V + V --> W +# (3) W --> V + V +# (4) V --> 0 +# (5) 0 --> V +# (6) U --> 0 +# (7) 0 --> U stoich_1R = 1 0 1 stoich_1P = 0 1 1 stoich_2R = 0 2 0 @@ -59,11 +86,13 @@ stoich_6P = 0 0 0 stoich_7R = 0 0 0 stoich_7P = 1 0 0 +# reaction rate constant for each reaction (assuming Law of Mass Action holds) +# using rate_multiplier, reaction rates can be changed by the same factor +# if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become +# n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). +rate_const = 0.0002 0.0002 1. 0.03666663 4.44444555555 0.00333333 16.66665 rate_multiplier = 1. - -include_discrete_LMA_correction = 1 - -reaction_type = 0 +include_discrete_LMA_correction = 0 # Boundary conditions # ---------------------- diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index 391dc3b6e..d22346af4 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -36,8 +36,7 @@ void AdvanceDiffusion(MultiFab& n_old, MultiFab diff_fluxdiv (ba,dmap,nspecies,0); MultiFab stoch_fluxdiv(ba,dmap,nspecies,0); - Abort("Write DiffusiveNFluxdiv()"); - // DiffusiveNFluxdiv(); + DiffusiveNFluxdiv(n_old,diff_fluxdiv,geom,time); if (variance_coef_mass > 0.) { Abort("AdvanceDiffusion() - write stochastic case"); @@ -59,7 +58,7 @@ void AdvanceDiffusion(MultiFab& n_old, n_new.FillBoundary(geom.periodicity()); MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); - if (reactDiff_diffusion_type == 4) { + if (reactDiff_diffusion_type == 0) { Abort("AdvanceDiffusion() - write trapezoidal corrector"); /* From 5dcd786121a87d1ca902706431dbbd27d20a2e22 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 27 Aug 2024 08:22:26 -0700 Subject: [PATCH 050/151] more debugging --- src_chemistry/chemistry_functions.H | 4 ++-- src_chemistry/chemistry_functions.cpp | 12 ++++++------ src_reactDiff/AdvanceReaction.cpp | 9 +++++++-- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src_chemistry/chemistry_functions.H b/src_chemistry/chemistry_functions.H index 33171b376..bdacbeae6 100644 --- a/src_chemistry/chemistry_functions.H +++ b/src_chemistry/chemistry_functions.H @@ -20,8 +20,8 @@ void InitializeChemistryNamespace(); void compute_compressible_chemistry_source_CLE(amrex::Real dt, amrex::Real dV, MultiFab& prim, MultiFab& source, MultiFab& ranchem); -void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry geom, amrex::Real dt, - const MultiFab& n_interm, Vector lin_comb_coef_in, Real volume_factor_in=1.); +void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geometry& geom, const amrex::Real& dt, + const MultiFab& n_interm, Vector& lin_comb_coef_in, Real volume_factor_in=1.); AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_in, GpuArray& reaction_rates, diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 1fc9bf015..3f17ae188 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -193,8 +193,8 @@ void compute_compressible_chemistry_source_CLE(amrex::Real dt, amrex::Real dV, } -void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry geom, amrex::Real dt, - const MultiFab& n_interm, Vector lin_comb_coef_in, Real volume_factor_in) +void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geometry& geom, const amrex::Real& dt, + const MultiFab& n_interm, Vector& lin_comb_coef_in, Real volume_factor_in) { if (nreaction == 1) { chem_rate.setVal(0.); @@ -227,13 +227,13 @@ void chemical_rates(const MultiFab& n_cc, MultiFab& chem_rate, amrex::Geometry g if (reaction_type == 2) { // SSA amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { - Abort("chemical_rates() - SSA not supported"); + Abort("ChemicalRates() - SSA not supported"); }); } else { amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { if (lin_comb_avg_react_rate == 1) { - Abort("chemical_rates(); lin_comb_avg_react_rate == 1 not supported"); + Abort("ChemicalRates(); lin_comb_avg_react_rate == 1 not supported"); } else { GpuArray n_in; @@ -268,7 +268,7 @@ AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_ { GpuArray n_nonneg; Real n_sum = 0.; - + for (int n=0; n& n_in GpuArray& num_reactions, GpuArray& avg_num_reactions) { - if (reaction_type == -1) { // deterministic + if (reaction_type == 0) { // deterministic for (int n=0; n mattingly_lin_comb_coef(2); + mattingly_lin_comb_coef[0] = 1.; + mattingly_lin_comb_coef[1] = 0.; + if (reactDiff_reaction_type == 0) { // first-order det/tau-leaping/CLE, or SSA - // ChemicalRates(); + ChemicalRates(n_old,rate,geom,dt,n_old,mattingly_lin_comb_coef,volume_factor); MultiFab::LinComb(n_new,1,n_old,0,-dt,rate,0,0,nspecies,0); - MultiFab::Saxpy(n_new,-dt,ext_src,0,0,nspecies,0); + MultiFab::Saxpy(n_new,-dt,ext_src,0,0,nspecies,0); //note the negative sign /* ! calculate rates ! rates could be deterministic or stochastic depending on use_Poisson_rng From a3d7bb2dd4023c18c61cd664cb58ee8a967014ff Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 27 Aug 2024 08:49:25 -0700 Subject: [PATCH 051/151] bugfix --- src_reactDiff/main_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 8952e3e1f..7b8dc911d 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -170,7 +170,7 @@ void main_driver(const char* argv) AdvanceTimestep(n_old,n_new,dt,time,geom); time += dt; - MultiFab::Copy(n_new,n_old,0,0,nspecies,1); + MultiFab::Copy(n_old,n_new,0,0,nspecies,1); if (stats_int > 0 && step%stats_int == 0 && step > n_steps_skip) { Abort("fix structure factor snapshot"); From 786028eb0080a2a820247a66c5d39e79d5a2c730 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 27 Aug 2024 16:52:31 -0700 Subject: [PATCH 052/151] chemistry loop bugfix --- src_chemistry/chemistry_functions.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 3f17ae188..16e77fcb3 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -246,15 +246,15 @@ void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geome n_in[n] = n_arr(i,j,k,n); } compute_reaction_rates(n_in, avg_reaction_rate, dv); - for (int n=0; n& n_ if (use_mole_frac_LMA && include_discrete_LMA_correction) { Abort("compute_reaction_rates() - use_mole_frac_LMA && include_discrete_LMA_correction not supported"); - } else if (include_discrete_LMA_correction == 0 && exclude_solvent_comput_rates == 0) { - Abort("compute_reaction_rates() -include_discrete_LMA_correction == 0 && exclude_solvent_comput_rates == 0 not supported"); + } else if (include_discrete_LMA_correction == 0 && exclude_solvent_comput_rates == -1) { + + if (use_mole_frac_LMA) { + Abort("compute_reaction_rates() - use_mole_frac_LMA not supported"); + } + + for (int r=0; r Date: Tue, 27 Aug 2024 16:54:04 -0700 Subject: [PATCH 053/151] sign fix --- src_reactDiff/AdvanceReaction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_reactDiff/AdvanceReaction.cpp b/src_reactDiff/AdvanceReaction.cpp index 62050a2c0..d1dc97b35 100644 --- a/src_reactDiff/AdvanceReaction.cpp +++ b/src_reactDiff/AdvanceReaction.cpp @@ -36,7 +36,7 @@ void AdvanceReaction(MultiFab& n_old, ChemicalRates(n_old,rate,geom,dt,n_old,mattingly_lin_comb_coef,volume_factor); - MultiFab::LinComb(n_new,1,n_old,0,-dt,rate,0,0,nspecies,0); + MultiFab::LinComb(n_new,1,n_old,0,dt,rate,0,0,nspecies,0); MultiFab::Saxpy(n_new,-dt,ext_src,0,0,nspecies,0); //note the negative sign /* ! calculate rates From 020e76e64dcd973004f7787dc3c3af8c7bd454c9 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 28 Aug 2024 11:48:40 -0700 Subject: [PATCH 054/151] cleanup, added several reactDiff_diffusion_types --- exec/reactDiff/inputs_paper_BPM_2d | 4 +- src_reactDiff/AdvanceDiffusion.cpp | 115 +++++++++++++++------------- src_reactDiff/reactDiff_functions.H | 1 + 3 files changed, 64 insertions(+), 56 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index 7e3386ba6..fb7ff1e2c 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -13,8 +13,8 @@ cell_depth = 10. fixed_dt = 0.5 # Controls for number of steps between actions -max_step = 2 -plot_int = 1 +max_step = 20000 +plot_int = 200 stats_int = -1 seed = 1 diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index d22346af4..5d988b3ec 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -17,10 +17,17 @@ void AdvanceDiffusion(MultiFab& n_old, // store for one component of D_Fick std::array< MultiFab, AMREX_SPACEDIM > diff_coef_face; - AMREX_D_TERM(diff_coef_face[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);, - diff_coef_face[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);, - diff_coef_face[2].define(convert(ba,nodal_flag_z), dmap, 1, 0);); + AMREX_D_TERM(diff_coef_face[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);, + diff_coef_face[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);, + diff_coef_face[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0);); + for (int i=0; i 0.) { Abort("AdvanceDiffusion() - write stochastic case"); @@ -59,34 +66,27 @@ void AdvanceDiffusion(MultiFab& n_old, MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); if (reactDiff_diffusion_type == 0) { - Abort("AdvanceDiffusion() - write trapezoidal corrector"); - /* + ! Trapezoidal corrector: + ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n + ! + (dt/2) div (D_k grad n_k)^{n+1,*} + ! + dt div (sqrt(2 D_k n_k / dt) Z)^n + ! + dt ext_src + ! This is the same as stepping to time t+2*dt and then averaging with the state at time t: + ! n_new = 1/2 * (n_old + n_new + dt*div (D grad n_new) + div (sqrt(2 D_k n_k dt) Z)^n) + ! which is what we use below + */ - ! Trapezoidal corrector: - ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n - ! + (dt/2) div (D_k grad n_k)^{n+1,*} - ! + dt div (sqrt(2 D_k n_k / dt) Z)^n - ! + dt ext_src - ! This is the same as stepping to time t+2*dt and then averaging with the state at time t: - ! n_new = 1/2 * (n_old + n_new + dt*div (D grad n_new) + div (sqrt(2 D_k n_k dt) Z)^n) - ! which is what we use below - - ! compute diffusive flux divergence - call diffusive_n_fluxdiv(mla,n_new,diff_coef_face,diff_fluxdiv,dx,the_bc_tower) - - do n=1,nlevs - call multifab_plus_plus_c(n_new(n),1,n_old(n),1,nspecies,0) - call multifab_saxpy_3(n_new(n),dt,diff_fluxdiv(n)) - call multifab_saxpy_3(n_new(n),dt,stoch_fluxdiv(n)) - call multifab_saxpy_3(n_new(n),dt,ext_src(n)) - call multifab_mult_mult_s_c(n_new(n),1,0.5d0,nspecies,0) - call multifab_fill_boundary(n_new(n)) - call multifab_physbc(n_new(n),1,scal_bc_comp,nspecies, & - the_bc_tower%bc_tower_array(n),dx_in=dx(n,:)) - end do + // compute diffusive flux divergence + DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time); - */ + MultiFab::Saxpy(n_new,1.,n_old,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,diff_fluxdiv ,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,ext_src ,0,0,nspecies,0); + n_new.mult(0.5); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); } } else if (reactDiff_diffusion_type == 1) { @@ -114,27 +114,33 @@ void AdvanceDiffusion(MultiFab& n_old, call implicit_diffusion(mla,n_old,n_new,stoch_fluxdiv,diff_coef_face,dx,dt,the_bc_tower) */ } else if (reactDiff_diffusion_type == 2) { - Abort("AdvanceDiffusion() - write explicit midpoint scheme"); + if (variance_coef_mass > 0.) { + Abort("AdvanceDiffusion() - write stochastic part of explicit midpoint scheme"); + } + /* -! explicit midpoint scheme + ! explicit midpoint scheme ! n_k^{n+1/2} = n_k^n + (dt/2) div (D_k grad n_k)^n ! + (dt/2) div (sqrt(2 D_k n_k / (dt/2) ) Z_1)^n ! + (dt/2) ext_src - do n=1,nlevs - call multifab_copy_c(n_new(n),1,n_old(n),1,nspecies,0) - call multifab_saxpy_3(n_new(n),dt/2.d0 ,diff_fluxdiv(n)) - call multifab_saxpy_3(n_new(n),dt/sqrt(2.d0),stoch_fluxdiv(n)) - call multifab_saxpy_3(n_new(n),dt/2.d0 ,ext_src(n)) - call multifab_fill_boundary(n_new(n)) - call multifab_physbc(n_new(n),1,scal_bc_comp,nspecies, & - the_bc_tower%bc_tower_array(n),dx_in=dx(n,:)) - end do + */ - ! compute diffusive flux divergence at t^{n+1/2} - call diffusive_n_fluxdiv(mla,n_new,diff_coef_face,diff_fluxdiv,dx,the_bc_tower) + MultiFab::Copy(n_new,n_old,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,ext_src,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + // compute diffusive flux divergence at t^{n+1/2} + DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time); + + if (variance_coef_mass > 0.) { + Abort("AdvanceDiffusion() - write stochastic part of explicit midpoint scheme"); + } + /* if (variance_coef_mass .gt. 0.d0) then ! fill random flux multifabs with new random numbers call fill_mass_stochastic(mla,the_bc_tower%bc_tower_array) @@ -163,7 +169,9 @@ void AdvanceDiffusion(MultiFab& n_old, call bl_error("advance_diffusion: invalid midpoint_stoch_flux_type") end select end if - + */ + + /* ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^{n+1/2} ! + dt div (sqrt(2 D_k n_k^n dt) Z_1 / sqrt(2) ) ! + dt div (sqrt(2 D_k n_k^? dt) Z_2 / sqrt(2) ) @@ -172,16 +180,14 @@ void AdvanceDiffusion(MultiFab& n_old, ! n_k^? = n_k^n (midpoint_stoch_flux_type=1) ! = n_k^pred (midpoint_stoch_flux_type=2) ! = 2*n_k^pred - n_k^n (midpoint_stoch_flux_type=3) - do n=1,nlevs - call multifab_copy_c(n_new(n),1,n_old(n),1,nspecies,0) - call multifab_saxpy_3(n_new(n),dt ,diff_fluxdiv(n)) - call multifab_saxpy_3(n_new(n),dt/sqrt(2.d0),stoch_fluxdiv(n)) - call multifab_saxpy_3(n_new(n),dt ,ext_src(n)) - call multifab_fill_boundary(n_new(n)) - call multifab_physbc(n_new(n),1,scal_bc_comp,nspecies, & - the_bc_tower%bc_tower_array(n),dx_in=dx(n,:)) - end do - */ + */ + + MultiFab::Copy(n_new,n_old,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); } else { Abort("AdvanceDiffusion() - invalid reactDiff_diffusion_type"); @@ -192,6 +198,7 @@ void AdvanceDiffusion(MultiFab& n_old, void DiffusiveNFluxdiv(MultiFab& n_in, MultiFab& diff_fluxdiv, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, const Geometry& geom, const Real& time) { @@ -265,7 +272,7 @@ void DiffusiveNFluxdiv(MultiFab& n_in, // load D_fick for species i into bcoef for (int d=0; d& diff_coef_face, const Geometry& geom, const Real& time); From 0b86555b514945a4722fb7e80eaa86354779e243 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 28 Aug 2024 12:26:06 -0700 Subject: [PATCH 055/151] second-order split scheme chemistry --- src_chemistry/chemistry_functions.cpp | 45 ++++++++++-------- src_reactDiff/AdvanceReaction.cpp | 66 +++++++++++++++++++-------- 2 files changed, 73 insertions(+), 38 deletions(-) diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 16e77fcb3..0d4f1a11c 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -232,29 +232,36 @@ void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geome } else { amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { + GpuArray n_in; + GpuArray n_int_in; + GpuArray avg_reaction_rate; + GpuArray avg_reaction_rate_interm; + GpuArray avg_num_reactions; + GpuArray num_reactions; + + for (int n=0; n n_in; - GpuArray avg_reaction_rate; - GpuArray avg_num_reactions; - GpuArray num_reactions; - + for (int r=0; r Date: Wed, 28 Aug 2024 12:54:54 -0700 Subject: [PATCH 056/151] implemented both strang splitting schemes --- src_reactDiff/AdvanceTimestep.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src_reactDiff/AdvanceTimestep.cpp b/src_reactDiff/AdvanceTimestep.cpp index 5c575201f..8094dcd1b 100644 --- a/src_reactDiff/AdvanceTimestep.cpp +++ b/src_reactDiff/AdvanceTimestep.cpp @@ -37,21 +37,17 @@ void AdvanceTimestep(MultiFab& n_old, } else if (temporal_integrator == 1) { // (1/2)R + D + (1/2)R -/* - call advance_reaction (mla,n_old,n_new,dx,0.5d0*dt,the_bc_tower,Rn_steady) - ! swap n_new/n_old to avoid calling copy() - call advance_diffusion(mla,n_new,n_old,dx,dt ,the_bc_tower,Rn_steady) - call advance_reaction (mla,n_old,n_new,dx,0.5d0*dt,the_bc_tower,Rn_steady) -*/ + AdvanceReaction(n_old,n_new,Rn_steady,0.5*dt,time,geom); + // swap n_new/n_old to avoid calling copy() + AdvanceDiffusion(n_new,n_old,Rn_steady,dt,time,geom); + AdvanceReaction(n_old,n_new,Rn_steady,0.5*dt,time,geom); } else if (temporal_integrator == 2) { // (1/2)D + R + (1/2)D -/* - call advance_diffusion(mla,n_old,n_new,dx,0.5d0*dt,the_bc_tower,Rn_steady) - ! swap n_new/n_old to avoid calling copy() - call advance_reaction (mla,n_new,n_old,dx,dt ,the_bc_tower,Rn_steady) - call advance_diffusion(mla,n_old,n_new,dx,0.5d0*dt,the_bc_tower,Rn_steady) -*/ + AdvanceDiffusion(n_old,n_new,Rn_steady,0.5*dt,time,geom); + // swap n_new/n_old to avoid calling copy() + AdvanceReaction(n_new,n_old,Rn_steady,dt,time,geom); + AdvanceDiffusion(n_old,n_new,Rn_steady,0.5*dt,time,geom); } else { Abort("AdvanceTimestep(): invalid temporal_integrator"); From efcd0816abc48ece89beb02aee1d29e75a18cb5a Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 28 Aug 2024 15:14:44 -0700 Subject: [PATCH 057/151] incorporate include_discrete_LMA_correction option --- src_chemistry/chemistry_functions.cpp | 69 +++++++++++++++++++++++++-- src_reactDiff/main_driver.cpp | 55 ++++++++++++--------- 2 files changed, 98 insertions(+), 26 deletions(-) diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 0d4f1a11c..a96d5fb91 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -274,22 +274,69 @@ AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_ const amrex::Real& dv) { GpuArray n_nonneg; + Real n_sum = 0.; - for (int n=0; n=1) then + ! rate ~ N/N_sum + if(n_nonneg(species)>0.0d0) then ! This species is present in this cell + reaction_rates(reaction) = reaction_rates(reaction) * n_nonneg(species)/n_sum + else + reaction_rates(reaction) = 0.0d0 + end if + end if + if(stoichiometric_factors(species,1,reaction)>=2) then + ! rate ~ (N/N_sum)*((N-1)/(N_sum-1)) + ! Donev: Avoid division by zero or negative rates + if(n_nonneg(species)>1.0d0/dv) then ! There is at least one molecule of this species in this cell + reaction_rates(reaction) = reaction_rates(reaction) * (n_nonneg(species)-1.0d0/dv)/(n_sum-1.0d0/dv) + else + reaction_rates(reaction) = 0.0d0 + end if + end if + if(stoichiometric_factors(species,1,reaction)>=3) then ! Donev added ternary reactions here + ! rate ~ (N/N_sum)*((N-1)/(N_sum-1))*((N-2)/(N_sum-2)) + if(n_nonneg(species)>2.0d0/dv) then ! There is at least two molecules of this species in this cell + reaction_rates(reaction) = reaction_rates(reaction) * (n_nonneg(species)-2.0d0/dv)/(n_sum-2.0d0/dv) + else + reaction_rates(reaction) = 0.0d0 + end if + end if + if(stoichiometric_factors(species,1,reaction)>=4) then + ! This is essentially impossible in practice and won't happen + call bl_error("Stochiometric coefficients larger then 3 not supported") + end if + end do + end do +*/ + } else if (include_discrete_LMA_correction == 0 && exclude_solvent_comput_rates == -1) { if (use_mole_frac_LMA) { - Abort("compute_reaction_rates() - use_mole_frac_LMA not supported"); + for (int n=0; n& n_ continue; } if (include_discrete_LMA_correction) { - Abort("compute_reaction_rates() - include_discrete_LMA_correction == 1 not supported"); + + int coef = stoich_coeffs_R(r,n); + if (coef == 0) { + // Species doe not participate in reaction + } else if (coef == 1) { + reaction_rates[r] *= n_nonneg[n]; + } else if (coef == 2) { + reaction_rates[r] *= n_nonneg[n]*std::max(0.,n_nonneg[n]-1./dv); + } else if (coef == 3) { + reaction_rates[r] *= n_nonneg[n]*std::max(0.,n_nonneg[n]-1./dv)*std::max(0.,n_nonneg[n]-2./dv); + } else { + // This is essentially impossible in practice and won't happen + Abort("Stochiometric coefficients larger then 3 not supported"); + } + } else { reaction_rates[r] *= std::pow(n_nonneg[n],stoich_coeffs_R(r,n)); } diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 7b8dc911d..4f8122ee7 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -31,26 +31,6 @@ void main_driver(const char* argv) InitializeChemistryNamespace(); InitializeReactDiffNamespace(); - // is the problem periodic? - Vector is_periodic(AMREX_SPACEDIM,0); // set to 0 (not periodic) by default - for (int i=0; i 0 && use_mole_frac_LMA) { + if (include_discrete_LMA_correction == 1) { + Abort("Error: currently use_mole_frac_LMA can be used only with include_discrete_LMA_correction=0"); + } + if (exclude_solvent_comput_rates != -1) { + Abort("Error: currently use_mole_frac_LMA can be used only with exclude_solvent_comput_rates=-1"); + } + } + + // is the problem periodic? + Vector is_periodic(AMREX_SPACEDIM,0); // set to 0 (not periodic) by default + for (int i=0; i 0.) { - Abort("initial_variance_mass not supported yet"); - // add_init_n_fluctuations() + if (integer_populations == 0) { + Abort("add_init_n_fluctuations not supported yet"); + // add_init_n_fluctuations() + } } } else { From 17be21cf80c1628ca7f5c96bd84de0b06f32b541 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 28 Aug 2024 17:52:02 -0700 Subject: [PATCH 058/151] stochastic diffusion work --- src_reactDiff/AdvanceDiffusion.cpp | 96 +------------------- src_reactDiff/DiffusiveNFluxdiv.cpp | 101 +++++++++++++++++++++ src_reactDiff/Make.package | 2 + src_reactDiff/StochasticNFluxdiv.cpp | 126 +++++++++++++++++++++++++++ src_reactDiff/reactDiff_functions.H | 30 +++++-- 5 files changed, 254 insertions(+), 101 deletions(-) create mode 100644 src_reactDiff/DiffusiveNFluxdiv.cpp create mode 100644 src_reactDiff/StochasticNFluxdiv.cpp diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index 5d988b3ec..bdd8ca506 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -1,8 +1,5 @@ #include "reactDiff_functions.H" -#include "AMReX_MLMG.H" -#include - // Solves n_t = div ( D grad (n)) + div (sqrt(2*variance*D*n)*W) + g // where g is a constant in time external source void AdvanceDiffusion(MultiFab& n_old, @@ -15,7 +12,7 @@ void AdvanceDiffusion(MultiFab& n_old, BoxArray ba = n_old.boxArray(); DistributionMapping dmap = n_old.DistributionMap(); - // store for one component of D_Fick + // store D_Fick on faces std::array< MultiFab, AMREX_SPACEDIM > diff_coef_face; AMREX_D_TERM(diff_coef_face[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);, diff_coef_face[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);, @@ -194,94 +191,3 @@ void AdvanceDiffusion(MultiFab& n_old, } } - - -void DiffusiveNFluxdiv(MultiFab& n_in, - MultiFab& diff_fluxdiv, - const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, - const Geometry& geom, - const Real& time) { - - // fill n ghost cells - n_in.FillBoundary(geom.periodicity()); - MultiFabPhysBC(n_in, geom, 0, nspecies, SPEC_BC_COMP, time); - - BoxArray ba = n_in.boxArray(); - DistributionMapping dmap = n_in.DistributionMap(); - - // don't need to set much here for explicit evaluations - LPInfo info; - - // operator of the form (ascalar * acoef - bscalar div bcoef grad) phi - MLABecLaplacian mlabec({geom}, {ba}, {dmap}, info); - mlabec.setMaxOrder(2); - - // store one component at a time and take L(phi) one component at a time - MultiFab phi (ba,dmap,1,1); - MultiFab Lphi(ba,dmap,1,0); - - MultiFab acoef(ba,dmap,1,0); - std::array< MultiFab, AMREX_SPACEDIM > bcoef; - AMREX_D_TERM(bcoef[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);, - bcoef[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);, - bcoef[2].define(convert(ba,nodal_flag_z), dmap, 1, 0);); - - // build array of boundary conditions needed by MLABecLaplacian - std::array lo_mlmg_bc; - std::array hi_mlmg_bc; - - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) - { - if (bc_mass_lo[idim] == -1 || bc_mass_hi[idim] == -1) { - if ( !(bc_mass_lo[idim] == -1 && bc_mass_hi[idim] == -1) ) { - Abort("Both bc_mass_lo and bc_mass_hi must be periodic in a given direction if the other one is"); - } - lo_mlmg_bc[idim] = LinOpBCType::Periodic; - hi_mlmg_bc[idim] = LinOpBCType::Periodic; - } - - if (bc_mass_lo[idim] == 0) { - lo_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; - } else if (bc_mass_lo[idim] == 1) { - lo_mlmg_bc[idim] = LinOpBCType::Dirichlet; - } else if (bc_mass_lo[idim] != -1) { - Abort("Invalid bc_mass_lo"); - } - - if (bc_mass_hi[idim] == 0) { - hi_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; - } else if (bc_mass_hi[idim] == 1) { - hi_mlmg_bc[idim] = LinOpBCType::Dirichlet; - } else if (bc_mass_hi[idim] != -1) { - Abort("Invalid bc_mass_hi"); - } - } - - mlabec.setDomainBC(lo_mlmg_bc,hi_mlmg_bc); - - // set acoeff to 0and bcoeff to -1 - mlabec.setScalars(0., -1.); - - acoef.setVal(0.); - mlabec.setACoeffs(0, acoef); - - for (int i=0; i + +void DiffusiveNFluxdiv(MultiFab& n_in, + MultiFab& diff_fluxdiv, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, + const Geometry& geom, + const Real& time) { + + // single cell case set diffusive mass fluxdiv to zero and return + long cell_count = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : n_cells[0]*n_cells[1]*n_cells[2]; + if (cell_count == 1) { + diff_fluxdiv.setVal(0.); + return; + } + + // fill n ghost cells + n_in.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_in, geom, 0, nspecies, SPEC_BC_COMP, time); + + BoxArray ba = n_in.boxArray(); + DistributionMapping dmap = n_in.DistributionMap(); + + // don't need to set much here for explicit evaluations + LPInfo info; + + // operator of the form (ascalar * acoef - bscalar div bcoef grad) phi + MLABecLaplacian mlabec({geom}, {ba}, {dmap}, info); + mlabec.setMaxOrder(2); + + // store one component at a time and take L(phi) one component at a time + MultiFab phi (ba,dmap,1,1); + MultiFab Lphi(ba,dmap,1,0); + + MultiFab acoef(ba,dmap,1,0); + std::array< MultiFab, AMREX_SPACEDIM > bcoef; + AMREX_D_TERM(bcoef[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);, + bcoef[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);, + bcoef[2].define(convert(ba,nodal_flag_z), dmap, 1, 0);); + + // build array of boundary conditions needed by MLABecLaplacian + std::array lo_mlmg_bc; + std::array hi_mlmg_bc; + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) + { + if (bc_mass_lo[idim] == -1 || bc_mass_hi[idim] == -1) { + if ( !(bc_mass_lo[idim] == -1 && bc_mass_hi[idim] == -1) ) { + Abort("Both bc_mass_lo and bc_mass_hi must be periodic in a given direction if the other one is"); + } + lo_mlmg_bc[idim] = LinOpBCType::Periodic; + hi_mlmg_bc[idim] = LinOpBCType::Periodic; + } + + if (bc_mass_lo[idim] == 0) { + lo_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; + } else if (bc_mass_lo[idim] == 1) { + lo_mlmg_bc[idim] = LinOpBCType::Dirichlet; + } else if (bc_mass_lo[idim] != -1) { + Abort("Invalid bc_mass_lo"); + } + + if (bc_mass_hi[idim] == 0) { + hi_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; + } else if (bc_mass_hi[idim] == 1) { + hi_mlmg_bc[idim] = LinOpBCType::Dirichlet; + } else if (bc_mass_hi[idim] != -1) { + Abort("Invalid bc_mass_hi"); + } + } + + mlabec.setDomainBC(lo_mlmg_bc,hi_mlmg_bc); + + // set acoeff to 0and bcoeff to -1 + mlabec.setScalars(0., -1.); + + acoef.setVal(0.); + mlabec.setACoeffs(0, acoef); + + for (int i=0; i& diff_coef_face, + const Geometry& geom, + const Real& dt, + const Real& time, + int increment_div) { + + // single cell case set stochastic mass fluxdiv to zero + // (or its increment if increment_in=T) and return + long cell_count = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : n_cells[0]*n_cells[1]*n_cells[2]; + if (cell_count == 1 && increment_div==0) { + stoch_fluxdiv.setVal(0.); + return; + } + + BoxArray ba = n_in.boxArray(); + DistributionMapping dmap = n_in.DistributionMap(); + + std::array< MultiFab, AMREX_SPACEDIM > flux; + AMREX_D_TERM(flux[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);, + flux[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);, + flux[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0);); + + const Real* dx = geom.CellSize(); + + Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2] : dx[0]*dx[1]*cell_depth; + + // average n_in to faces, store in flux + for (MFIter mfi(n_in); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.validbox(); + + const Array4& n_arr = n_in.array(mfi); + + AMREX_D_TERM(const Array4 & fluxx = flux[0].array(mfi);, + const Array4 & fluxy = flux[1].array(mfi);, + const Array4 & fluxz = flux[2].array(mfi);); + + AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);, + const Box & bx_y = mfi.nodaltilebox(1);, + const Box & bx_z = mfi.nodaltilebox(2);); + + amrex::ParallelFor(bx_x, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + fluxx(i,j,k,n) = average_to_faces(n_arr(i-1,j,k,n),n_arr(i,j,k,n),dv); + }, + bx_y, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + fluxy(i,j,k,n) = average_to_faces(n_arr(i,j-1,k,n),n_arr(i,j,k,n),dv); + } +#if (AMREX_SPACEDIM == 3) + , bx_z, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + fluxz(i,j,k,n) = average_to_faces(n_arr(i,j,k-1,n),n_arr(i,j,k,n),dv); + } +#endif + ); + } + + // assemble_stoch_n_fluxes + // + // + // + + for (int i=0; i& diff_coef_face, - const Geometry& geom, - const Real& time); - //////////////////////// // In AdvanceReaction.cpp //////////////////////// @@ -51,6 +45,15 @@ void AdvanceTimestep(MultiFab& n_old, const Real& time, const Geometry& geom); +//////////////////////// +// In DiffusiveNFluxdiv.cpp +//////////////////////// +void DiffusiveNFluxdiv(MultiFab& n_in, + MultiFab& diff_fluxdiv, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, + const Geometry& geom, + const Real& time); + //////////////////////// // In InitN.cpp //////////////////////// @@ -58,6 +61,21 @@ void InitN(MultiFab& n_in, const Geometry& geom, const Real& time); +//////////////////////// +// In StochasticNFluxdiv.cpp +//////////////////////// +void StochasticNFluxdiv(MultiFab& n_in, + MultiFab& stoch_fluxdiv, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, + const Geometry& geom, + const Real& dt, + const Real& time, + int increment_div=0); + +AMREX_GPU_HOST_DEVICE Real average_to_faces(const Real& value1, + const Real& value2, + const Real& dv); + //////////////////////// // In WritePlotFile.cpp //////////////////////// From 1193194ed8de45a49ef3d14e87df584e2c8852ae Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 28 Aug 2024 19:21:03 -0700 Subject: [PATCH 059/151] unsplit integrator shell --- src_reactDiff/AdvanceReactionDiffusion.cpp | 100 +++++++++++++++++++++ src_reactDiff/AdvanceTimestep.cpp | 4 +- src_reactDiff/Make.package | 1 + src_reactDiff/reactDiff_functions.H | 10 +++ 4 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 src_reactDiff/AdvanceReactionDiffusion.cpp diff --git a/src_reactDiff/AdvanceReactionDiffusion.cpp b/src_reactDiff/AdvanceReactionDiffusion.cpp new file mode 100644 index 000000000..04c88c421 --- /dev/null +++ b/src_reactDiff/AdvanceReactionDiffusion.cpp @@ -0,0 +1,100 @@ +#include "reactDiff_functions.H" +#include "chemistry_functions.H" + +void AdvanceReactionDiffusion(MultiFab& n_old, + MultiFab& n_new, + const MultiFab& ext_src, + const Real& dt, + const Real& time, + const Geometry& geom) { + + BoxArray ba = n_old.boxArray(); + DistributionMapping dmap = n_old.DistributionMap(); + + // store D_Fick on faces + std::array< MultiFab, AMREX_SPACEDIM > diff_coef_face; + AMREX_D_TERM(diff_coef_face[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);, + diff_coef_face[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);, + diff_coef_face[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0);); + + for (int i=0; i mattingly_lin_comb_coef(2); + mattingly_lin_comb_coef[0] = 1.; + mattingly_lin_comb_coef[1] = 0.; + + MultiFab diff_fluxdiv (ba,dmap,nspecies,0); + MultiFab stoch_fluxdiv(ba,dmap,nspecies,0); + + DiffusiveNFluxdiv(n_old,diff_fluxdiv,diff_coef_face,geom,time); + + if (variance_coef_mass > 0.) { + Abort("AdvanceReactionDiffusion() - write stochastic case"); + } else { + stoch_fluxdiv.setVal(0.); + } + + //!!!!!!!!!!!!!!! + // time advance ! + //!!!!!!!!!!!!!!! + + if (temporal_integrator == -1) { // forward Euler + + // calculate rates + // rates could be deterministic or stochastic depending on use_Poisson_rng + ChemicalRates(n_old,rate1,geom,dt,n_old,mattingly_lin_comb_coef,volume_factor); + + // n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^n + // + dt div (sqrt(2 D_k n_k^n dt) Z) ! Gaussian noise + // + 1/dV * P( f(n_k)*dt*dV ) ! Poisson noise + // + dt ext_src + MultiFab::LinComb(n_new,1,n_old,0,dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,rate1,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + + } else if (temporal_integrator == -2) { // explicit midpoint + + // temporary storage for second rate + MultiFab rate2(ba,dmap,nspecies,0); + + if (reaction_type == 2) { // explicit midpoint with SSA + + Abort("AdvanceReactionDiffusion() - temporal_integrator=-2 (SSA) not written yet"); + + } else { + + Abort("AdvanceReactionDiffusion() - temporal_integrator=-2 (non-SSA) not written yet"); + + } // explicit midpoint for det/tau/CLE + + } else if (temporal_integrator == -4) { // implicit midpoint + + if (reaction_type == 2) { // implicit midpoint with SSA + + Abort("AdvanceReactionDiffusion() - temporal_integrator=-4 (SSA) not written yet"); + + } else { + + Abort("AdvanceReactionDiffusion() - temporal_integrator=-4 (non-SSA) not written yet"); + + } + } else { + + Abort("AdvanceReactionDiffusion() - invalid temporal_integrator"); + + } +} diff --git a/src_reactDiff/AdvanceTimestep.cpp b/src_reactDiff/AdvanceTimestep.cpp index 8094dcd1b..c74f4f994 100644 --- a/src_reactDiff/AdvanceTimestep.cpp +++ b/src_reactDiff/AdvanceTimestep.cpp @@ -17,9 +17,9 @@ void AdvanceTimestep(MultiFab& n_old, MultiFab Rn_steady(n_old.boxArray(), n_old.DistributionMap(), nspecies, 0); if (temporal_integrator < 0) { - // unsplit schemes - + // unsplit schemes + AdvanceReactionDiffusion(n_old,n_new,Rn_steady,dt,time,geom); } else { diff --git a/src_reactDiff/Make.package b/src_reactDiff/Make.package index 87bd380cc..693dcff07 100644 --- a/src_reactDiff/Make.package +++ b/src_reactDiff/Make.package @@ -1,4 +1,5 @@ CEXE_sources += AdvanceReaction.cpp +CEXE_sources += AdvanceReactionDiffusion.cpp CEXE_sources += AdvanceDiffusion.cpp CEXE_sources += AdvanceTimestep.cpp CEXE_sources += DiffusiveNFluxdiv.cpp diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H index 74f5daf65..3dc5cfd17 100644 --- a/src_reactDiff/reactDiff_functions.H +++ b/src_reactDiff/reactDiff_functions.H @@ -36,6 +36,16 @@ void AdvanceReaction(MultiFab& n_old, const Real& time, const Geometry& geom); +//////////////////////// +// In AdvanceReactionDiffusion.cpp +//////////////////////// +void AdvanceReactionDiffusion(MultiFab& n_old, + MultiFab& n_new, + const MultiFab& ext_src, + const Real& dt, + const Real& time, + const Geometry& geom); + //////////////////////// // In AdvanceTimestep.cpp //////////////////////// From 1c46839d828c28640fa2c649d3d61892ddf0b9c5 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 28 Aug 2024 19:30:13 -0700 Subject: [PATCH 060/151] fix first order unsplit integrator --- src_reactDiff/AdvanceTimestep.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src_reactDiff/AdvanceTimestep.cpp b/src_reactDiff/AdvanceTimestep.cpp index c74f4f994..fc5e5f507 100644 --- a/src_reactDiff/AdvanceTimestep.cpp +++ b/src_reactDiff/AdvanceTimestep.cpp @@ -18,6 +18,8 @@ void AdvanceTimestep(MultiFab& n_old, if (temporal_integrator < 0) { + Rn_steady.setVal(0.); + // unsplit schemes AdvanceReactionDiffusion(n_old,n_new,Rn_steady,dt,time,geom); From ce9414bbf8023f3cd93282f26bb04e194785570b Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 29 Aug 2024 09:41:37 -0700 Subject: [PATCH 061/151] diffusive noise --- exec/reactDiff/inputs_paper_BPM_2d | 4 +- src_reactDiff/AdvanceDiffusion.cpp | 2 +- src_reactDiff/InitN.cpp | 4 ++ src_reactDiff/StochasticNFluxdiv.cpp | 60 ++++++++++++++++++++++++++-- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index fb7ff1e2c..0b5d99a33 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -46,7 +46,7 @@ reactDiff_diffusion_type = 4 # Fickian diffusion coeffs D_Fick = 0.1 0.01 0.01 -variance_coef_mass = 0. +variance_coef_mass = 1. # how to compute n on faces for stochastic weighting # 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic @@ -92,7 +92,7 @@ stoich_7P = 1 0 0 # n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). rate_const = 0.0002 0.0002 1. 0.03666663 4.44444555555 0.00333333 16.66665 rate_multiplier = 1. -include_discrete_LMA_correction = 0 +include_discrete_LMA_correction = 1 # Boundary conditions # ---------------------- diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index bdd8ca506..08c7c3f27 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -43,7 +43,7 @@ void AdvanceDiffusion(MultiFab& n_old, DiffusiveNFluxdiv(n_old,diff_fluxdiv,diff_coef_face,geom,time); if (variance_coef_mass > 0.) { - Abort("AdvanceDiffusion() - write stochastic case"); + StochasticNFluxdiv(n_old,stoch_fluxdiv,diff_coef_face,geom,dt,time,0); } else { stoch_fluxdiv.setVal(0.); } diff --git a/src_reactDiff/InitN.cpp b/src_reactDiff/InitN.cpp index 0b0575c83..fb6bf5a05 100644 --- a/src_reactDiff/InitN.cpp +++ b/src_reactDiff/InitN.cpp @@ -58,6 +58,10 @@ void InitN(MultiFab& n_in, } else { Abort("prob_type not implemented yet"); } + + if (integer_populations == 1) { + Abort("InitN() - integer_populations not supported yet"); + } } diff --git a/src_reactDiff/StochasticNFluxdiv.cpp b/src_reactDiff/StochasticNFluxdiv.cpp index 2941a315b..5d389b74e 100644 --- a/src_reactDiff/StochasticNFluxdiv.cpp +++ b/src_reactDiff/StochasticNFluxdiv.cpp @@ -1,3 +1,4 @@ +#include "rng_functions.H" #include "reactDiff_functions.H" void StochasticNFluxdiv(MultiFab& n_in, @@ -24,6 +25,11 @@ void StochasticNFluxdiv(MultiFab& n_in, flux[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);, flux[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0);); + std::array< MultiFab, AMREX_SPACEDIM > rand; + AMREX_D_TERM(rand[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);, + rand[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);, + rand[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0);); + const Real* dx = geom.CellSize(); Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2] : dx[0]*dx[1]*cell_depth; @@ -60,11 +66,59 @@ void StochasticNFluxdiv(MultiFab& n_in, ); } + // generate random numbers + for (int i=0; i& n_arr = n_in.array(mfi); + + AMREX_D_TERM(const Array4 & fluxx = flux[0].array(mfi);, + const Array4 & fluxy = flux[1].array(mfi);, + const Array4 & fluxz = flux[2].array(mfi);); + + AMREX_D_TERM(const Array4 & randx = rand[0].array(mfi);, + const Array4 & randy = rand[1].array(mfi);, + const Array4 & randz = rand[2].array(mfi);); + + AMREX_D_TERM(const Array4 & coefx = diff_coef_face[0].array(mfi);, + const Array4 & coefy = diff_coef_face[1].array(mfi);, + const Array4 & coefz = diff_coef_face[2].array(mfi);); + + AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);, + const Box & bx_y = mfi.nodaltilebox(1);, + const Box & bx_z = mfi.nodaltilebox(2);); + + amrex::ParallelFor(bx_x, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + fluxx(i,j,k,n) = std::sqrt(coefx(i,j,k,n)*fluxx(i,j,k,n)) * randx(i,j,k,n); + }, + bx_y, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + fluxy(i,j,k,n) = std::sqrt(coefy(i,j,k,n)*fluxy(i,j,k,n)) * randy(i,j,k,n); + } +#if (AMREX_SPACEDIM == 3) + , bx_z, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + fluxz(i,j,k,n) = std::sqrt(coefz(i,j,k,n)*fluxz(i,j,k,n)) * randz(i,j,k,n); + } +#endif + ); + } + for (int i=0; i Date: Thu, 29 Aug 2024 09:54:30 -0700 Subject: [PATCH 062/151] paper setup --- exec/reactDiff/inputs_paper_Lemarchand_3d | 83 +++++++++++++++++++---- 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/exec/reactDiff/inputs_paper_Lemarchand_3d b/exec/reactDiff/inputs_paper_Lemarchand_3d index 592c69219..8a68e80ef 100644 --- a/exec/reactDiff/inputs_paper_Lemarchand_3d +++ b/exec/reactDiff/inputs_paper_Lemarchand_3d @@ -2,28 +2,89 @@ prob_lo = 0.0 0.0 0.0 # physical lo coordinate prob_hi = 512.0 512.0 512.0 # physical hi coordinate -# number of cells in domain -n_cells = 64 64 64 -# max number of cells in a box -max_grid_size = 32 32 32 +# number of cells in domain and maximum number of cells in a box +n_cells = 256 256 256 +max_grid_size = 128 128 128 # Time-step control -fixed_dt = 0.1 +fixed_dt = 0.25 # Controls for number of steps between actions -max_step = 10 -plot_int = 1 +max_step = 1000 +plot_int = 20 stats_int = -1 seed = 1 nspecies = 2 +nreaction = 4 prob_type = 5 n_init_in_1 = 2.16245 1.35018 n_init_in_2 = 0. 10. +# 0=D+R (first-order splitting) +# 1=(1/2)R + D + (1/2)R (Strang option 1) +# 2=(1/2)D + R + (1/2)D (Strang option 2) +# -1=unsplit forward Euler +# -2=unsplit explicit midpoint +# -3=unsplit multinomial diffusion +# -4=unsplit implicit midpoint +temporal_integrator = 0 + +# only used for split schemes (temporal_integrator>=0) +# 0=explicit trapezoidal predictor/corrector +# 1=Crank-Nicolson semi-implicit +# 2=explicit midpoint +# 3=multinomial diffusion +# 4=forward Euler +reactDiff_diffusion_type = 4 + +# Fickian diffusion coeffs +D_Fick = 1. 10. + +perturb_width = 16. +smoothing_width = 2. +variance_coef_mass = 1. + +# how to compute n on faces for stochastic weighting +# 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic +# 10=arithmetic average with discontinuous Heaviside function +# 11=arithmetic average with C1-smoothed Heaviside function +# 12=arithmetic average with C2-smoothed Heaviside function +avg_type = 1 + +# only used for split schemes (temporal_integrator>=0) +# 0=first-order (deterministic, tau leaping, CLE, or SSA) +# 1=second-order (determinisitc, tau leaping, or CLE only) +reactDiff_reaction_type = 0 + +# 0=deterministic; 1=CLE; 2=SSA; 3=tau leap +reaction_type = 0 + +# model is: +# (1) A -> 0 +# (2) 2A + B --> 3A +# (3) B --> 0 +# (4) 0 --> B +stoich_1R = 1 0 +stoich_1P = 0 0 +stoich_2R = 2 1 +stoich_2P = 3 0 +stoich_3R = 0 1 +stoich_3P = 0 0 +stoich_4R = 0 0 +stoich_4P = 0 1 + +# reaction rate constant for each reaction (assuming Law of Mass Action holds) +# using rate_multiplier, reaction rates can be changed by the same factor +# if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become +# n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). +rate_const = 4. 1.37 1. 10. +rate_multiplier = 0.1 +include_discrete_LMA_correction = 1 + # Boundary conditions # ---------------------- # BC specifications: @@ -33,11 +94,3 @@ n_init_in_2 = 0. 10. bc_mass_lo = -1 -1 -1 bc_mass_hi = -1 -1 -1 -# if wall/reservoir, these are the numerical Neumann/Dirichlet values -bc_Yk_x_lo = 0. 0. -bc_Yk_x_hi = 0. 0. -bc_Yk_y_lo = 0. 0. -bc_Yk_y_hi = 0. 0. -bc_Yk_z_lo = 0. 0. -bc_Yk_z_hi = 0. 0. - From c9076f3dff01fa49825cb0389841f742bc1afa89 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 4 Sep 2024 11:27:59 -0700 Subject: [PATCH 063/151] paper setups --- exec/reactDiff/inputs_paper_BPM_2d | 13 ++++++++ exec/reactDiff/inputs_paper_Lemarchand_3d | 37 +++++++++++++++-------- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index 0b5d99a33..f32aa0911 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -1,3 +1,16 @@ +# This inputs file is used for generating +# - Figures 5, 6, 7 (Section V.B) +# in Paper by C. Kim et al. "Stochastic simulation of reaction-diffusion +# systems: A fluctuating-hydrodynamics approach" +# J. Chem. Phys. 146, 124110 (2017). +# You can change some relevant parameters such as +# - cell_depth +# - n_cells (64^2 or 256^2) and max_grid_size +# - fixed_dt, max_step +# - plot_int (plot files) +# - temporal_integrator, reaction_type (numerical scheme) +# and run this inputs file. + # Problem specification prob_lo = 0.0 0.0 # physical lo coordinate prob_hi = 32.0 32.0 # physical hi coordinate diff --git a/exec/reactDiff/inputs_paper_Lemarchand_3d b/exec/reactDiff/inputs_paper_Lemarchand_3d index 8a68e80ef..adfffc1fc 100644 --- a/exec/reactDiff/inputs_paper_Lemarchand_3d +++ b/exec/reactDiff/inputs_paper_Lemarchand_3d @@ -1,16 +1,30 @@ +# This inputs file is used for generating +# - Figure 8 (Section V.C) +# in Paper by C. Kim et al. "Stochastic simulation of reaction-diffusion +# systems: A fluctuating-hydrodynamics approach" +# J. Chem. Phys. 146, 124110 (2017). +# You can change some relevant parameters such as +# - initial_variance_mass: 0 (smooth initial condition) 1 (with fluctuations): +# - variance_coef_mass: 0 (deterministic diffusion) 1 (stochastic) +# - reaction_type: 0=deterministic; 1=CLE; 2=SSA; 3=tau leap +# and run this inputs file. + +# ** in the paper, cell_depth = 1000 is used +# ** but in the current code, cross_section cannot be set independently + # Problem specification -prob_lo = 0.0 0.0 0.0 # physical lo coordinate -prob_hi = 512.0 512.0 512.0 # physical hi coordinate +prob_lo = 0.0 0.0 0.0 # physical lo coordinate +prob_hi = 512.0 512.0 512.0 # physical hi coordinate # number of cells in domain and maximum number of cells in a box n_cells = 256 256 256 -max_grid_size = 128 128 128 +max_grid_size = 64 64 64 # Time-step control fixed_dt = 0.25 # Controls for number of steps between actions -max_step = 1000 +max_step = 2000 plot_int = 20 stats_int = -1 @@ -20,9 +34,10 @@ nspecies = 2 nreaction = 4 prob_type = 5 +perturb_width = 16. # scale factor for perturbed part in initial profile (for prob_type=4,5) +smoothing_width = 1. # scale factor for smoothing initial profile (for prob_type=4,5) -n_init_in_1 = 2.16245 1.35018 -n_init_in_2 = 0. 10. +n_init_in_1 = 1685.8 533.5 56.38 # Start on the limit cycle # 0=D+R (first-order splitting) # 1=(1/2)R + D + (1/2)R (Strang option 1) @@ -44,8 +59,6 @@ reactDiff_diffusion_type = 4 # Fickian diffusion coeffs D_Fick = 1. 10. -perturb_width = 16. -smoothing_width = 2. variance_coef_mass = 1. # how to compute n on faces for stochastic weighting @@ -63,7 +76,6 @@ reactDiff_reaction_type = 0 # 0=deterministic; 1=CLE; 2=SSA; 3=tau leap reaction_type = 0 -# model is: # (1) A -> 0 # (2) 2A + B --> 3A # (3) B --> 0 @@ -81,7 +93,7 @@ stoich_4P = 0 1 # using rate_multiplier, reaction rates can be changed by the same factor # if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become # n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). -rate_const = 4. 1.37 1. 10. +rate_const = 4. 1.37 1. 10. rate_multiplier = 0.1 include_discrete_LMA_correction = 1 @@ -91,6 +103,5 @@ include_discrete_LMA_correction = 1 # -1 = periodic # 1 = wall (Neumann) # 2 = reservoir (Dirichlet) -bc_mass_lo = -1 -1 -1 -bc_mass_hi = -1 -1 -1 - +bc_mass_lo = -1 -1 +bc_mass_hi = -1 -1 From a018d364861b105ea37f93d6ee0066ccd3564440 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 4 Sep 2024 11:57:46 -0700 Subject: [PATCH 064/151] fine tuning of 3d setup. still needs work for larger time steps (implicit options) --- exec/reactDiff/inputs_paper_Lemarchand_3d | 15 ++++++++------- src_reactDiff/main_driver.cpp | 11 ++++++++++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/exec/reactDiff/inputs_paper_Lemarchand_3d b/exec/reactDiff/inputs_paper_Lemarchand_3d index adfffc1fc..9f8d20f03 100644 --- a/exec/reactDiff/inputs_paper_Lemarchand_3d +++ b/exec/reactDiff/inputs_paper_Lemarchand_3d @@ -21,11 +21,11 @@ n_cells = 256 256 256 max_grid_size = 64 64 64 # Time-step control -fixed_dt = 0.25 +fixed_dt = 0.01 # Controls for number of steps between actions -max_step = 2000 -plot_int = 20 +max_step = 20000 +plot_int = 200 stats_int = -1 seed = 1 @@ -37,7 +37,8 @@ prob_type = 5 perturb_width = 16. # scale factor for perturbed part in initial profile (for prob_type=4,5) smoothing_width = 1. # scale factor for smoothing initial profile (for prob_type=4,5) -n_init_in_1 = 1685.8 533.5 56.38 # Start on the limit cycle +n_init_in_1 = 2.16245 1.35018 +n_init_in_2 = 0. 10. # 0=D+R (first-order splitting) # 1=(1/2)R + D + (1/2)R (Strang option 1) @@ -59,7 +60,7 @@ reactDiff_diffusion_type = 4 # Fickian diffusion coeffs D_Fick = 1. 10. -variance_coef_mass = 1. +variance_coef_mass = 1.e-3 # how to compute n on faces for stochastic weighting # 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic @@ -103,5 +104,5 @@ include_discrete_LMA_correction = 1 # -1 = periodic # 1 = wall (Neumann) # 2 = reservoir (Dirichlet) -bc_mass_lo = -1 -1 -bc_mass_hi = -1 -1 +bc_mass_lo = -1 -1 -1 +bc_mass_hi = -1 -1 -1 diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 4f8122ee7..81db63d8e 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -177,12 +177,21 @@ void main_driver(const char* argv) // time step loop for(int step=step_start;step<=max_step;++step) { + + // store the current time so we can later compute total run time. + Real step_strt_time = ParallelDescriptor::second(); AdvanceTimestep(n_old,n_new,dt,time,geom); time += dt; MultiFab::Copy(n_old,n_new,0,0,nspecies,1); - + + // Call the timer again and compute the maximum difference between the start time + // and stop time over all processors + Real step_stop_time = ParallelDescriptor::second() - step_strt_time; + ParallelDescriptor::ReduceRealMax(step_stop_time); + amrex::Print() << "Time step " << step << " complted in " << step_stop_time << " seconds\n"; + if (stats_int > 0 && step%stats_int == 0 && step > n_steps_skip) { Abort("fix structure factor snapshot"); } From 4468fb4ad0c422b40933ffa25ac910a3a4bf8468 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 4 Sep 2024 16:37:03 -0700 Subject: [PATCH 065/151] implicit diffusion module, incorporated crank-nicolson strang option note this does NOT solve in delta formulation like the previous code this is because I am coding up generalized physical bc support, which the previous code didn't have (only periodic) --- src_reactDiff/AdvanceDiffusion.cpp | 26 +++---- src_reactDiff/ImplicitDiffusion.cpp | 117 ++++++++++++++++++++++++++++ src_reactDiff/Make.package | 1 + src_reactDiff/reactDiff_functions.H | 13 +++- 4 files changed, 142 insertions(+), 15 deletions(-) create mode 100644 src_reactDiff/ImplicitDiffusion.cpp diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index 08c7c3f27..4cefc40ff 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -87,8 +87,6 @@ void AdvanceDiffusion(MultiFab& n_old, } } else if (reactDiff_diffusion_type == 1) { - Abort("AdvanceDiffusion() - write Crank-Nicolson"); - /* ! Crank-Nicolson ! n_k^{n+1} = n_k^n + (dt/2)(div D_k grad n_k)^n @@ -96,20 +94,20 @@ void AdvanceDiffusion(MultiFab& n_old, ! + dt div (sqrt(2 D_k n_k / dt) Z)^n ! + dt ext_src ! - ! in delta formulation: - ! - ! (I - div (dt/2) D_k grad) delta n_k = dt div (D_k grad n_k^n) - ! + dt div (sqrt(2 D_k n_k / dt) Z)^n - ! + dt ext_src - ! + ! ( I- (dt/2) div D_k grad) n_k^n+1 = n_k^n + ! + (dt/2)(div D_k grad n_k)^n + ! + dt div (sqrt(2 D_k n_k / dt) Z)^n + ! + dt ext_src ! we combine the entire rhs into stoch_fluxdiv - do n=1,nlevs - call multifab_plus_plus(stoch_fluxdiv(n),ext_src(n),0) - call multifab_plus_plus(stoch_fluxdiv(n),diff_fluxdiv(n),0) - call multifab_mult_mult_s(stoch_fluxdiv(n),dt) - end do - call implicit_diffusion(mla,n_old,n_new,stoch_fluxdiv,diff_coef_face,dx,dt,the_bc_tower) */ + + MultiFab::Saxpy(stoch_fluxdiv,1.,ext_src,0,0,nspecies,0); + MultiFab::Saxpy(stoch_fluxdiv,0.5,diff_fluxdiv,0,0,nspecies,0); + stoch_fluxdiv.mult(dt); + MultiFab::Saxpy(stoch_fluxdiv,1.,n_old,0,0,nspecies,0); + + ImplicitDiffusion(n_old, n_new, stoch_fluxdiv, diff_coef_face, geom, 0.5*dt, time); + } else if (reactDiff_diffusion_type == 2) { if (variance_coef_mass > 0.) { diff --git a/src_reactDiff/ImplicitDiffusion.cpp b/src_reactDiff/ImplicitDiffusion.cpp new file mode 100644 index 000000000..bd4ba70b3 --- /dev/null +++ b/src_reactDiff/ImplicitDiffusion.cpp @@ -0,0 +1,117 @@ +#include "reactDiff_functions.H" + +#include "AMReX_MLMG.H" +#include + +// (I - (dt_fac) div D_k grad) n = rhs + +void ImplicitDiffusion(MultiFab& n_old, + MultiFab& n_new, + const MultiFab& rhs, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, + const Geometry& geom, + const Real& dt_fac, + const Real& time) { + + BoxArray ba = n_old.boxArray(); + DistributionMapping dmap = n_old.DistributionMap(); + + // fill n ghost cells + n_old.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_old, geom, 0, nspecies, SPEC_BC_COMP, time); + + LPInfo info; + + // operator of the form (ascalar * acoef - bscalar div bcoef grad) phi + MLABecLaplacian mlabec({geom}, {ba}, {dmap}, info); + mlabec.setMaxOrder(2); + + // store one component at a time and take L(phi) one component at a time + MultiFab phi (ba,dmap,1,1); + MultiFab rhs_comp(ba,dmap,1,0); + + // build array of boundary conditions needed by MLABecLaplacian + std::array lo_mlmg_bc; + std::array hi_mlmg_bc; + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) + { + if (bc_mass_lo[idim] == -1 || bc_mass_hi[idim] == -1) { + if ( !(bc_mass_lo[idim] == -1 && bc_mass_hi[idim] == -1) ) { + Abort("Both bc_mass_lo and bc_mass_hi must be periodic in a given direction if the other one is"); + } + lo_mlmg_bc[idim] = LinOpBCType::Periodic; + hi_mlmg_bc[idim] = LinOpBCType::Periodic; + } + + if (bc_mass_lo[idim] == 0) { + lo_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; + } else if (bc_mass_lo[idim] == 1) { + lo_mlmg_bc[idim] = LinOpBCType::Dirichlet; + } else if (bc_mass_lo[idim] != -1) { + Abort("Invalid bc_mass_lo"); + } + + if (bc_mass_hi[idim] == 0) { + hi_mlmg_bc[idim] = LinOpBCType::inhomogNeumann; + } else if (bc_mass_hi[idim] == 1) { + hi_mlmg_bc[idim] = LinOpBCType::Dirichlet; + } else if (bc_mass_hi[idim] != -1) { + Abort("Invalid bc_mass_hi"); + } + } + + mlabec.setDomainBC(lo_mlmg_bc,hi_mlmg_bc); + + // storage for acoeff and bcoeff in + // (ascalar * acoeff I - bscalar div bcoeff grad) phi = rhs + MultiFab acoef(ba,dmap,1,0); + std::array< MultiFab, AMREX_SPACEDIM > bcoef; + AMREX_D_TERM(bcoef[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);, + bcoef[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);, + bcoef[2].define(convert(ba,nodal_flag_z), dmap, 1, 0);); + + // set ascalar and bscalar to 1 + mlabec.setScalars(1., 1.); + + // acoeff = 1 + acoef.setVal(1.); + mlabec.setACoeffs(0, acoef); + + // set bcoeff to dt_fac * D_i + for (int i=0; i& diff_coef_face, + const Geometry& geom, + const Real& dt_fac, + const Real& time); + //////////////////////// // In InitN.cpp //////////////////////// From 8b6539db8ca8806ee7fe3aeefae74dc26d143951 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 4 Sep 2024 16:57:46 -0700 Subject: [PATCH 066/151] add integer populations option --- exec/reactDiff/inputs_paper_BPM_2d | 3 +++ src_reactDiff/InitN.cpp | 32 ++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index f32aa0911..16d1c0dd7 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -39,6 +39,8 @@ prob_type = 0 n_init_in_1 = 1685.8 533.5 56.38 # Start on the limit cycle +integer_populations = 1 + # 0=D+R (first-order splitting) # 1=(1/2)R + D + (1/2)R (Strang option 1) # 2=(1/2)D + R + (1/2)D (Strang option 2) @@ -60,6 +62,7 @@ reactDiff_diffusion_type = 4 D_Fick = 0.1 0.01 0.01 variance_coef_mass = 1. +initial_variance_mass = 1. # how to compute n on faces for stochastic weighting # 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic diff --git a/src_reactDiff/InitN.cpp b/src_reactDiff/InitN.cpp index fb6bf5a05..25b9ffff8 100644 --- a/src_reactDiff/InitN.cpp +++ b/src_reactDiff/InitN.cpp @@ -59,12 +59,36 @@ void InitN(MultiFab& n_in, Abort("prob_type not implemented yet"); } - if (integer_populations == 1) { - Abort("InitN() - integer_populations not supported yet"); - } + } + + if (integer_populations == 1) { // Ensure that the initial number of molecules are integers + + Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2] : dx[0]*dx[1]*cell_depth; + + if (initial_variance_mass < 0.) { // Distribute the particles on the box using a multinomial sampler + + Abort("integer_populations=1 with initial_variance_mass < 0. not supported yet"); + + } else { // Make the number of molecules in each cell Poisson distributed with desired mean + + for ( MFIter mfi(n_in,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + const Box& bx = mfi.tilebox(); + + const Array4 & n_init = n_in.array(mfi); + + amrex::ParallelForRNG(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, amrex::RandomEngine const& engine) noexcept + { + // Generate the initial fluctuations using a Poisson random number generator + // This assumes that the distribution of initial conditions is a product Poisson measure + int nparticles = RandomPoisson(n_init(i,j,k,n)*dv, engine); + n_init(i,j,k,n) = nparticles / dv; + }); + } + + } } - + n_in.FillBoundary(geom.periodicity()); MultiFabPhysBC(n_in, geom, 0, nspecies, SPEC_BC_COMP, time); } From 5aad36003ed3ca8bd659be4a330c7a4aa40205ba Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 4 Sep 2024 20:38:42 -0700 Subject: [PATCH 067/151] CLE and tau leaping implemented --- exec/reactDiff/inputs_paper_BPM_2d | 2 +- src_chemistry/chemistry_functions.H | 3 ++- src_chemistry/chemistry_functions.cpp | 16 +++++++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index 16d1c0dd7..250952597 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -77,7 +77,7 @@ avg_type = 1 reactDiff_reaction_type = 0 # 0=deterministic; 1=CLE; 2=SSA; 3=tau leap -reaction_type = 0 +reaction_type = 1 # BPM model is: # (1) U + W --> V + W diff --git a/src_chemistry/chemistry_functions.H b/src_chemistry/chemistry_functions.H index bdacbeae6..a918400a3 100644 --- a/src_chemistry/chemistry_functions.H +++ b/src_chemistry/chemistry_functions.H @@ -29,5 +29,6 @@ AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray& n_ AMREX_GPU_HOST_DEVICE void sample_num_reactions(GpuArray& n_in, GpuArray& num_reactions, - GpuArray& avg_num_reactions); + GpuArray& avg_num_reactions, + const amrex::RandomEngine& engine); #endif diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index a96d5fb91..9ed78fdbf 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -230,7 +230,7 @@ void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geome Abort("ChemicalRates() - SSA not supported"); }); } else { - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + amrex::ParallelForRNG(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k, amrex::RandomEngine const& engine) noexcept { GpuArray n_in; GpuArray n_int_in; @@ -258,7 +258,7 @@ void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geome for (int r=0; r& n_ AMREX_GPU_HOST_DEVICE void sample_num_reactions(GpuArray& n_in, GpuArray& num_reactions, - GpuArray& avg_num_reactions) + GpuArray& avg_num_reactions, + const amrex::RandomEngine& engine) { if (reaction_type == 0) { // deterministic for (int n=0; n Date: Fri, 6 Sep 2024 16:06:28 -0700 Subject: [PATCH 068/151] implement midpoint_stoch_flux_type for split integrators --- src_reactDiff/AdvanceDiffusion.cpp | 62 +++++++++++++----------------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index 4cefc40ff..0a7606c4d 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -109,10 +109,6 @@ void AdvanceDiffusion(MultiFab& n_old, ImplicitDiffusion(n_old, n_new, stoch_fluxdiv, diff_coef_face, geom, 0.5*dt, time); } else if (reactDiff_diffusion_type == 2) { - - if (variance_coef_mass > 0.) { - Abort("AdvanceDiffusion() - write stochastic part of explicit midpoint scheme"); - } /* ! explicit midpoint scheme @@ -133,38 +129,34 @@ void AdvanceDiffusion(MultiFab& n_old, DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time); if (variance_coef_mass > 0.) { - Abort("AdvanceDiffusion() - write stochastic part of explicit midpoint scheme"); + + // fill random flux multifabs with new random numbers and + // compute second-stage stochastic flux divergence and + // add to first-stage stochastic flux divergence + if (midpoint_stoch_flux_type == 1) { + + // use n_old + StochasticNFluxdiv(n_old,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); + + } else if (midpoint_stoch_flux_type == 2) { + + // use n_pred + StochasticNFluxdiv(n_new,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); + + } else if (midpoint_stoch_flux_type == 3) { + + // We use n_new=2*n_pred-n_old here as temporary storage since we will overwrite it shortly + n_new.mult(2.); + MultiFab::Subtract(n_new,n_old,0,0,nspecies,1); + + // use n_new=2*n_pred-n_old + StochasticNFluxdiv(n_new,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); + + } else { + Abort("AdvanceDiffusion() - invalid midpoint_stoch_flux_type"); + } } - /* - if (variance_coef_mass .gt. 0.d0) then - ! fill random flux multifabs with new random numbers - call fill_mass_stochastic(mla,the_bc_tower%bc_tower_array) - - ! compute second-stage stochastic flux divergence and - ! add to first-stage stochastic flux divergence - select case (midpoint_stoch_flux_type) - case (1) - ! use n_old - call stochastic_n_fluxdiv(mla,n_old,diff_coef_face,stoch_fluxdiv,dx,dt, & - the_bc_tower,increment_in=.true.) - case (2) - ! use n_pred - call stochastic_n_fluxdiv(mla,n_new,diff_coef_face,stoch_fluxdiv,dx,dt, & - the_bc_tower,increment_in=.true.) - case (3) - ! We use n_new=2*n_pred-n_old here as temporary storage since we will overwrite it shortly - do n=1,nlevs - call multifab_mult_mult_s_c(n_new(n),1,2.d0,nspecies,n_new(n)%ng) - call multifab_sub_sub_c(n_new(n),1,n_old(n),1,nspecies,n_new(n)%ng) - end do - ! use n_new=2*n_pred-n_old - call stochastic_n_fluxdiv(mla,n_new,diff_coef_face,stoch_fluxdiv,dx,dt, & - the_bc_tower,increment_in=.true.) - case default - call bl_error("advance_diffusion: invalid midpoint_stoch_flux_type") - end select - end if - */ + /* ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^{n+1/2} From b13a58ed7da7cdfdf8758b66bcb699c82738a62b Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 6 Sep 2024 16:12:51 -0700 Subject: [PATCH 069/151] split out GenerateStochasticFluxdivCorrector for split and unsplit algorithms to use --- src_reactDiff/AdvanceDiffusion.cpp | 63 ++++++++++++++++------------- src_reactDiff/reactDiff_functions.H | 8 ++++ 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index 0a7606c4d..becbe5bac 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -129,35 +129,9 @@ void AdvanceDiffusion(MultiFab& n_old, DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time); if (variance_coef_mass > 0.) { - - // fill random flux multifabs with new random numbers and - // compute second-stage stochastic flux divergence and - // add to first-stage stochastic flux divergence - if (midpoint_stoch_flux_type == 1) { - - // use n_old - StochasticNFluxdiv(n_old,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); - - } else if (midpoint_stoch_flux_type == 2) { - - // use n_pred - StochasticNFluxdiv(n_new,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); - - } else if (midpoint_stoch_flux_type == 3) { - - // We use n_new=2*n_pred-n_old here as temporary storage since we will overwrite it shortly - n_new.mult(2.); - MultiFab::Subtract(n_new,n_old,0,0,nspecies,1); - - // use n_new=2*n_pred-n_old - StochasticNFluxdiv(n_new,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); - - } else { - Abort("AdvanceDiffusion() - invalid midpoint_stoch_flux_type"); - } + GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom); } - /* ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^{n+1/2} ! + dt div (sqrt(2 D_k n_k^n dt) Z_1 / sqrt(2) ) @@ -181,3 +155,38 @@ void AdvanceDiffusion(MultiFab& n_old, } } + +void GenerateStochasticFluxdivCorrector(MultiFab& n_old, + MultiFab& n_new, + MultiFab& stoch_fluxdiv, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, + const Real& dt, + const Real& time, + const Geometry& geom) { + + // fill random flux multifabs with new random numbers and + // compute second-stage stochastic flux divergence and + // add to first-stage stochastic flux divergence + if (midpoint_stoch_flux_type == 1) { + + // use n_old + StochasticNFluxdiv(n_old,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); + + } else if (midpoint_stoch_flux_type == 2) { + + // use n_pred + StochasticNFluxdiv(n_new,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); + + } else if (midpoint_stoch_flux_type == 3) { + + // We use n_new=2*n_pred-n_old here as temporary storage since we will overwrite it shortly + n_new.mult(2.); + MultiFab::Subtract(n_new,n_old,0,0,nspecies,1); + + // use n_new=2*n_pred-n_old + StochasticNFluxdiv(n_new,stoch_fluxdiv,diff_coef_face,geom,dt,time,1); + + } else { + Abort("GenerateStochasticFluxdivCorrector() - invalid midpoint_stoch_flux_type"); + } +} diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H index 53b40892a..56cff865a 100644 --- a/src_reactDiff/reactDiff_functions.H +++ b/src_reactDiff/reactDiff_functions.H @@ -26,6 +26,14 @@ void AdvanceDiffusion(MultiFab& n_old, const Real& time, const Geometry& geom); +void GenerateStochasticFluxdivCorrector(MultiFab& n_old, + MultiFab& n_new, + MultiFab& stoch_fluxdiv, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, + const Real& dt, + const Real& time, + const Geometry& geom); + //////////////////////// // In AdvanceReaction.cpp //////////////////////// From 8ae192da59cb9c999794e7ddb6e6af74b7f2a011 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 6 Sep 2024 16:51:34 -0700 Subject: [PATCH 070/151] explicit midpoint (non-SSA) written --- src_reactDiff/AdvanceReactionDiffusion.cpp | 159 +++++++++++++++++++-- 1 file changed, 151 insertions(+), 8 deletions(-) diff --git a/src_reactDiff/AdvanceReactionDiffusion.cpp b/src_reactDiff/AdvanceReactionDiffusion.cpp index 04c88c421..bb203bd91 100644 --- a/src_reactDiff/AdvanceReactionDiffusion.cpp +++ b/src_reactDiff/AdvanceReactionDiffusion.cpp @@ -40,7 +40,7 @@ void AdvanceReactionDiffusion(MultiFab& n_old, DiffusiveNFluxdiv(n_old,diff_fluxdiv,diff_coef_face,geom,time); if (variance_coef_mass > 0.) { - Abort("AdvanceReactionDiffusion() - write stochastic case"); + StochasticNFluxdiv(n_old,stoch_fluxdiv,diff_coef_face,geom,dt,time,0); } else { stoch_fluxdiv.setVal(0.); } @@ -75,10 +75,74 @@ void AdvanceReactionDiffusion(MultiFab& n_old, Abort("AdvanceReactionDiffusion() - temporal_integrator=-2 (SSA) not written yet"); - } else { - - Abort("AdvanceReactionDiffusion() - temporal_integrator=-2 (non-SSA) not written yet"); - + } else { // explicit midpoint for det/tau/CLE + + //!!!!!!!!!!!!!! + // predictor ! + //!!!!!!!!!!!!!! + + // calculate rates from a(n_old) + ChemicalRates(n_old,rate1,geom,0.5*dt,n_old,mattingly_lin_comb_coef,volume_factor); + + /* + ! n_k^{n+1/2} = n_k^n + (dt/2) div (D_k grad n_k)^n + ! + (dt/sqrt(2)) div sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! Gaussian noise + ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! Poisson noise + ! + (dt/2) ext_src + */ + MultiFab::Copy(n_new,n_old,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,rate1,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,ext_src,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + + //!!!!!!!!!!!!!! + // corrector ! + //!!!!!!!!!!!!!! + + // Here we do not write this in the form that Mattingly et al do + // where we just continue the second half of the time step from where we left + // Rather, we compute terms at the midpoint and then add contributions from both + // halves of the time step to n_old + // This works simpler with diffusion but we have to store both rates1 and rates2 + + // compute diffusive flux divergence + DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time); + + // calculate rates from 2*a(n_pred)-a(n_old) + mattingly_lin_comb_coef[0] = -1.; + mattingly_lin_comb_coef[1] = 2.; + ChemicalRates(n_old,rate2,geom,0.5*dt,n_new,mattingly_lin_comb_coef,volume_factor); + + //compute stochastic flux divergence and add to the ones from the predictor stage + if (variance_coef_mass > 0.) { + GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom); + } + + /* + ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^{n+1/2} + ! + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise + ! + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise + ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! Poisson noise + ! + 1/dV * P_2( (2*f(n_k^pred)-f(n_k))*(dt/2)*dV ) ! Poisson noise + ! + dt ext_src + ! where + ! n_k^? = n_k^n (midpoint_stoch_flux_type=1) + ! = n_k^pred (midpoint_stoch_flux_type=2) + ! = 2*n_k^pred - n_k^n (midpoint_stoch_flux_type=3) + */ + + MultiFab::Copy(n_new,n_old,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,rate1,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,rate2,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + } // explicit midpoint for det/tau/CLE } else if (temporal_integrator == -4) { // implicit midpoint @@ -87,9 +151,88 @@ void AdvanceReactionDiffusion(MultiFab& n_old, Abort("AdvanceReactionDiffusion() - temporal_integrator=-4 (SSA) not written yet"); - } else { - - Abort("AdvanceReactionDiffusion() - temporal_integrator=-4 (non-SSA) not written yet"); + } else { // implicit midpoint for det/tau/CLE + +/* + ! backward Euler predictor to half-time + ! n_k^{n+1/2} = n_k^n + (dt/2) div (D_k grad n_k)^{n+1/2} + ! + (dt/sqrt(2)) div sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! Gaussian noise + ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! Poisson noise + ! + (dt/2) ext_src + ! + ! in delta form + ! + ! (I - div (dt/2) D_k grad) delta n_k = (dt/2) div (D_k grad n_k^n) + ! + (dt/sqrt(2)) div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 + ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) + ! + (dt/2) ext_src + + do n=1,nlevs + call multifab_build(rhs(n),mla%la(n),nspecies,0) + call multifab_build(rate2(n),mla%la(n),nspecies,0) + end do + + ! calculate rates + ! rates could be deterministic or stochastic depending on use_Poisson_rng + call chemical_rates(mla,n_old,rate1,dx,dt/2.d0,vol_fac_in=volume_factor) + + do n=1,nlevs + call multifab_setval(rhs(n),0.d0) + call multifab_saxpy_3(rhs(n),dt/2.d0,diff_fluxdiv(n)) + call multifab_saxpy_3(rhs(n),dt/sqrt(2.d0),stoch_fluxdiv(n)) + call multifab_saxpy_3(rhs(n),dt/2.d0,rate1(n)) + if(present(ext_src)) call multifab_saxpy_3(rhs(n),dt/2.d0,ext_src(n)) + end do + + call implicit_diffusion(mla,n_old,n_new,rhs,diff_coef_face,dx,dt,the_bc_tower) + + ! corrector + + ! calculate rates from 2*a(n_pred)-a(n_old) + call chemical_rates(mla,n_old,rate2,dx,dt/2.d0,n_new,mattingly_lin_comb_coef,vol_fac_in=volume_factor) + + ! compute stochastic flux divergence and add to the ones from the predictor stage + if (variance_coef_mass .gt. 0.d0) then + + ! first, fill random flux multifabs with new random numbers + call fill_mass_stochastic(mla,the_bc_tower%bc_tower_array) + + ! compute n on faces to use in the stochastic flux in the corrector + ! three possibilities + call generate_stochastic_fluxdiv_corrector() + + end if + + ! Crank-Nicolson + ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n + ! + (dt/2) div (D_k grad n_k)^{n+1} + ! + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise + ! + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise + ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! Poisson noise + ! + 1/dV * P_2( (2*f(n_k^pred)-f(n_k))*(dt/2)*dV ) ! Poisson noise + ! + dt ext_src + ! + ! in delta form + ! + ! (I - div (dt/2) D_k grad) delta n_k = dt div (D_k grad n_k^n) + ! + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise + ! + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise + ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! Poisson noise + ! + 1/dV * P_2( (2*f(n_k^pred)-f(n_k))*(dt/2)*dV ) ! Poisson noise + ! + dt ext_src + + do n=1,nlevs + call multifab_setval(rhs(n),0.d0) + call multifab_saxpy_3(rhs(n),dt,diff_fluxdiv(n)) + call multifab_saxpy_3(rhs(n),dt/sqrt(2.d0),stoch_fluxdiv(n)) + call multifab_saxpy_3(rhs(n),dt/2.d0,rate1(n)) + call multifab_saxpy_3(rhs(n),dt/2.d0,rate2(n)) + if(present(ext_src)) call multifab_saxpy_3(rhs(n),dt,ext_src(n)) + end do + + call implicit_diffusion(mla,n_old,n_new,rhs,diff_coef_face,dx,dt,the_bc_tower) +*/ + Abort("AdvanceReactionDiffusion() - temporal_integrator=-4 (non-SSA) not written yet"); } } else { From eb8b966101a69ee919b7321c55f6be3350d8684c Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 6 Sep 2024 19:57:12 -0700 Subject: [PATCH 071/151] implicit midpoint (non-SSA) unsplit implement volume scaling in 3d --- exec/reactDiff/inputs_paper_BPM_2d | 2 +- exec/reactDiff/inputs_paper_Lemarchand_3d | 14 ++-- src_chemistry/chemistry_functions.cpp | 2 +- src_reactDiff/AdvanceReactionDiffusion.cpp | 89 ++++++++++------------ src_reactDiff/InitN.cpp | 2 +- src_reactDiff/StochasticNFluxdiv.cpp | 2 +- 6 files changed, 50 insertions(+), 61 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index 250952597..1f9f0a321 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -48,7 +48,7 @@ integer_populations = 1 # -2=unsplit explicit midpoint # -3=unsplit multinomial diffusion # -4=unsplit implicit midpoint -temporal_integrator = 0 +temporal_integrator = -4 # only used for split schemes (temporal_integrator>=0) # 0=explicit trapezoidal predictor/corrector diff --git a/exec/reactDiff/inputs_paper_Lemarchand_3d b/exec/reactDiff/inputs_paper_Lemarchand_3d index 9f8d20f03..00ffa705d 100644 --- a/exec/reactDiff/inputs_paper_Lemarchand_3d +++ b/exec/reactDiff/inputs_paper_Lemarchand_3d @@ -9,9 +9,6 @@ # - reaction_type: 0=deterministic; 1=CLE; 2=SSA; 3=tau leap # and run this inputs file. -# ** in the paper, cell_depth = 1000 is used -# ** but in the current code, cross_section cannot be set independently - # Problem specification prob_lo = 0.0 0.0 0.0 # physical lo coordinate prob_hi = 512.0 512.0 512.0 # physical hi coordinate @@ -20,8 +17,11 @@ prob_hi = 512.0 512.0 512.0 # physical hi coordinate n_cells = 256 256 256 max_grid_size = 64 64 64 +# volume scale factor in 3D problems +cell_depth = 1000. + # Time-step control -fixed_dt = 0.01 +fixed_dt = 0.25 # Controls for number of steps between actions max_step = 20000 @@ -47,7 +47,7 @@ n_init_in_2 = 0. 10. # -2=unsplit explicit midpoint # -3=unsplit multinomial diffusion # -4=unsplit implicit midpoint -temporal_integrator = 0 +temporal_integrator = -4 # only used for split schemes (temporal_integrator>=0) # 0=explicit trapezoidal predictor/corrector @@ -60,7 +60,7 @@ reactDiff_diffusion_type = 4 # Fickian diffusion coeffs D_Fick = 1. 10. -variance_coef_mass = 1.e-3 +variance_coef_mass = 1. # how to compute n on faces for stochastic weighting # 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic @@ -75,7 +75,7 @@ avg_type = 1 reactDiff_reaction_type = 0 # 0=deterministic; 1=CLE; 2=SSA; 3=tau leap -reaction_type = 0 +reaction_type = 1 # (1) A -> 0 # (2) 2A + B --> 3A diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 9ed78fdbf..7ba682cb6 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -212,7 +212,7 @@ void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geome const Real* dx = geom.CellSize(); - Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2] : dx[0]*dx[1]*cell_depth; + Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2]*cell_depth : dx[0]*dx[1]*cell_depth; dv *= volume_factor_in; for (MFIter mfi(n_cc); mfi.isValid(); ++mfi) diff --git a/src_reactDiff/AdvanceReactionDiffusion.cpp b/src_reactDiff/AdvanceReactionDiffusion.cpp index bb203bd91..b53133f11 100644 --- a/src_reactDiff/AdvanceReactionDiffusion.cpp +++ b/src_reactDiff/AdvanceReactionDiffusion.cpp @@ -160,49 +160,41 @@ void AdvanceReactionDiffusion(MultiFab& n_old, ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! Poisson noise ! + (dt/2) ext_src ! - ! in delta form - ! - ! (I - div (dt/2) D_k grad) delta n_k = (dt/2) div (D_k grad n_k^n) + ! (I - div (dt/2) D_k grad) n_k^{n+1/2} = n_k^n ! + (dt/sqrt(2)) div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! + (dt/2) ext_src - - do n=1,nlevs - call multifab_build(rhs(n),mla%la(n),nspecies,0) - call multifab_build(rate2(n),mla%la(n),nspecies,0) - end do +*/ - ! calculate rates - ! rates could be deterministic or stochastic depending on use_Poisson_rng - call chemical_rates(mla,n_old,rate1,dx,dt/2.d0,vol_fac_in=volume_factor) + MultiFab rhs (ba,dmap,nspecies,0); + MultiFab rate2(ba,dmap,nspecies,0); - do n=1,nlevs - call multifab_setval(rhs(n),0.d0) - call multifab_saxpy_3(rhs(n),dt/2.d0,diff_fluxdiv(n)) - call multifab_saxpy_3(rhs(n),dt/sqrt(2.d0),stoch_fluxdiv(n)) - call multifab_saxpy_3(rhs(n),dt/2.d0,rate1(n)) - if(present(ext_src)) call multifab_saxpy_3(rhs(n),dt/2.d0,ext_src(n)) - end do + // calculate rates + // rates could be deterministic or stochastic depending on use_Poisson_rng + ChemicalRates(n_old,rate1,geom,0.5*dt,n_old,mattingly_lin_comb_coef,volume_factor); - call implicit_diffusion(mla,n_old,n_new,rhs,diff_coef_face,dx,dt,the_bc_tower) + MultiFab::Copy(rhs,n_old,0,0,nspecies,0); + MultiFab::Saxpy(rhs,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(rhs,0.5*dt,rate1,0,0,nspecies,0); + MultiFab::Saxpy(rhs,0.5*dt,ext_src,0,0,nspecies,0); - ! corrector + ImplicitDiffusion(n_old, n_new, rhs, diff_coef_face, geom, 0.5*dt, time); - ! calculate rates from 2*a(n_pred)-a(n_old) - call chemical_rates(mla,n_old,rate2,dx,dt/2.d0,n_new,mattingly_lin_comb_coef,vol_fac_in=volume_factor) + // corrector - ! compute stochastic flux divergence and add to the ones from the predictor stage - if (variance_coef_mass .gt. 0.d0) then + // calculate rates from 2*a(n_pred)-a(n_old) + mattingly_lin_comb_coef[0] = -1.; + mattingly_lin_comb_coef[1] = 2.; + ChemicalRates(n_old,rate2,geom,0.5*dt,n_new,mattingly_lin_comb_coef,volume_factor); - ! first, fill random flux multifabs with new random numbers - call fill_mass_stochastic(mla,the_bc_tower%bc_tower_array) - - ! compute n on faces to use in the stochastic flux in the corrector - ! three possibilities - call generate_stochastic_fluxdiv_corrector() - - end if + // compute stochastic flux divergence and add to the ones from the predictor stage + if (variance_coef_mass > 0.) { + // compute n on faces to use in the stochastic flux in the corrector + // three possibilities + GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom); + } +/* ! Crank-Nicolson ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n ! + (dt/2) div (D_k grad n_k)^{n+1} @@ -214,25 +206,22 @@ void AdvanceReactionDiffusion(MultiFab& n_old, ! ! in delta form ! - ! (I - div (dt/2) D_k grad) delta n_k = dt div (D_k grad n_k^n) - ! + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise - ! + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise - ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! Poisson noise - ! + 1/dV * P_2( (2*f(n_k^pred)-f(n_k))*(dt/2)*dV ) ! Poisson noise - ! + dt ext_src - - do n=1,nlevs - call multifab_setval(rhs(n),0.d0) - call multifab_saxpy_3(rhs(n),dt,diff_fluxdiv(n)) - call multifab_saxpy_3(rhs(n),dt/sqrt(2.d0),stoch_fluxdiv(n)) - call multifab_saxpy_3(rhs(n),dt/2.d0,rate1(n)) - call multifab_saxpy_3(rhs(n),dt/2.d0,rate2(n)) - if(present(ext_src)) call multifab_saxpy_3(rhs(n),dt,ext_src(n)) - end do - - call implicit_diffusion(mla,n_old,n_new,rhs,diff_coef_face,dx,dt,the_bc_tower) + ! (I - div (dt/2) D_k grad) n_k^{n+1} = n_k^n + ! + (dt/2) div (D_k grad n_k^n) + ! + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise + ! + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise + ! + 1/dV * P_1( f(n_k)*(dt/2)*dV ) ! Poisson noise + ! + 1/dV * P_2( (2*f(n_k^pred)-f(n_k))*(dt/2)*dV ) ! Poisson noise + ! + dt ext_src */ - Abort("AdvanceReactionDiffusion() - temporal_integrator=-4 (non-SSA) not written yet"); + MultiFab::Copy(rhs,n_old,0,0,nspecies,0); + MultiFab::Saxpy(rhs,0.5*dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(rhs,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(rhs,0.5*dt,rate1,0,0,nspecies,0); + MultiFab::Saxpy(rhs,0.5*dt,rate2,0,0,nspecies,0); + MultiFab::Saxpy(rhs,dt,ext_src,0,0,nspecies,0); + + ImplicitDiffusion(n_old, n_new, rhs, diff_coef_face, geom, 0.5*dt, time); } } else { diff --git a/src_reactDiff/InitN.cpp b/src_reactDiff/InitN.cpp index 25b9ffff8..95b5c0dbc 100644 --- a/src_reactDiff/InitN.cpp +++ b/src_reactDiff/InitN.cpp @@ -63,7 +63,7 @@ void InitN(MultiFab& n_in, if (integer_populations == 1) { // Ensure that the initial number of molecules are integers - Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2] : dx[0]*dx[1]*cell_depth; + Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2]*cell_depth : dx[0]*dx[1]*cell_depth; if (initial_variance_mass < 0.) { // Distribute the particles on the box using a multinomial sampler diff --git a/src_reactDiff/StochasticNFluxdiv.cpp b/src_reactDiff/StochasticNFluxdiv.cpp index 5d389b74e..6075a1bae 100644 --- a/src_reactDiff/StochasticNFluxdiv.cpp +++ b/src_reactDiff/StochasticNFluxdiv.cpp @@ -32,7 +32,7 @@ void StochasticNFluxdiv(MultiFab& n_in, const Real* dx = geom.CellSize(); - Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2] : dx[0]*dx[1]*cell_depth; + Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2]*cell_depth : dx[0]*dx[1]*cell_depth; // average n_in to faces, store in flux for (MFIter mfi(n_in); mfi.isValid(); ++mfi) From b249fcc6d50fff30317445ca99189df0348e9d06 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 6 Sep 2024 20:47:12 -0700 Subject: [PATCH 072/151] match paper --- exec/reactDiff/inputs_paper_Lemarchand_3d | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exec/reactDiff/inputs_paper_Lemarchand_3d b/exec/reactDiff/inputs_paper_Lemarchand_3d index 00ffa705d..9b1056f71 100644 --- a/exec/reactDiff/inputs_paper_Lemarchand_3d +++ b/exec/reactDiff/inputs_paper_Lemarchand_3d @@ -15,7 +15,7 @@ prob_hi = 512.0 512.0 512.0 # physical hi coordinate # number of cells in domain and maximum number of cells in a box n_cells = 256 256 256 -max_grid_size = 64 64 64 +max_grid_size = 256 256 256 # volume scale factor in 3D problems cell_depth = 1000. @@ -24,8 +24,8 @@ cell_depth = 1000. fixed_dt = 0.25 # Controls for number of steps between actions -max_step = 20000 -plot_int = 200 +max_step = 800 +plot_int = 10 stats_int = -1 seed = 1 @@ -75,7 +75,7 @@ avg_type = 1 reactDiff_reaction_type = 0 # 0=deterministic; 1=CLE; 2=SSA; 3=tau leap -reaction_type = 1 +reaction_type = 3 # (1) A -> 0 # (2) 2A + B --> 3A From 32f6395d7abc55e69d1575307a0e0466ca090c41 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 9 Sep 2024 08:02:18 -0700 Subject: [PATCH 073/151] SSA attempt --- src_chemistry/chemistry_functions.cpp | 54 ++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 7ba682cb6..737615ff7 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -225,10 +225,60 @@ void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geome const Array4& rate = chem_rate.array(mfi); if (reaction_type == 2) { // SSA - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + + amrex::ParallelForRNG(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k, amrex::RandomEngine const& engine) noexcept { - Abort("ChemicalRates() - SSA not supported"); + GpuArray n_old; + GpuArray n_new; + GpuArray avg_reaction_rate; + + Real t_local = 0.; + + for (int n=0; n dt) break; + + Real u2 = amrex::Random(engine); + u2 *= rTotal; + + // find which reaction has occured + int which_reaction=0; + Real rSum = 0.; + for (int m=0; m= u2) break; + } + + // update number densities for the reaction that has occured + for (int n=0; n Date: Mon, 9 Sep 2024 08:05:45 -0700 Subject: [PATCH 074/151] ssa bugfix --- src_chemistry/chemistry_functions.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp index 737615ff7..46f93c02d 100644 --- a/src_chemistry/chemistry_functions.cpp +++ b/src_chemistry/chemistry_functions.cpp @@ -277,6 +277,10 @@ void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geome n_new[n] += stoich_coeffs_PR(which_reaction,n)/dv; } } + + for (int n=0; n Date: Mon, 9 Sep 2024 09:15:09 -0700 Subject: [PATCH 075/151] unsplit SSA schemes --- src_reactDiff/AdvanceReactionDiffusion.cpp | 113 ++++++++++++++++++++- 1 file changed, 111 insertions(+), 2 deletions(-) diff --git a/src_reactDiff/AdvanceReactionDiffusion.cpp b/src_reactDiff/AdvanceReactionDiffusion.cpp index b53133f11..7824afa30 100644 --- a/src_reactDiff/AdvanceReactionDiffusion.cpp +++ b/src_reactDiff/AdvanceReactionDiffusion.cpp @@ -73,8 +73,65 @@ void AdvanceReactionDiffusion(MultiFab& n_old, if (reaction_type == 2) { // explicit midpoint with SSA - Abort("AdvanceReactionDiffusion() - temporal_integrator=-2 (SSA) not written yet"); + //!!!!!!!!!!!!!! + // predictor ! + //!!!!!!!!!!!!!! + + /* + ! n_k^{**} = n_k^n + (dt/2) div (D_k grad n_k)^n + ! + (dt/sqrt(2)) div sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! Gaussian noise + ! + (dt/2) ext_src + */ + MultiFab::Copy(n_new,n_old,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,ext_src,0,0,nspecies,0); + + // computing rate1 = R(n^{**},dt/2) / (dt/2) + ChemicalRates(n_new,rate1,geom,0.5*dt,n_old,mattingly_lin_comb_coef,volume_factor); + // n_k^* = n_k^{**} + R(n^{**},dt/2) + MultiFab::Saxpy(n_new,0.5*dt,rate1,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + + //!!!!!!!!!!!!!! + // corrector ! + //!!!!!!!!!!!!!! + + // compute diffusive flux divergence + DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time); + + // computing rate2 = R(n^*,dt/2) / (dt/2) + ChemicalRates(n_new,rate2,geom,0.5*dt,n_old,mattingly_lin_comb_coef,volume_factor); + + // compute stochastic flux divergence and add to the ones from the predictor stage + if (variance_coef_mass > 0.) { + GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom); + } + + /* + ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^* + ! + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise + ! + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise + ! + R(n^{**},dt/2) + ! + R(n^{*},dt/2) + ! + dt ext_src + ! where + ! n_k^? = n_k^n (midpoint_stoch_flux_type=1) + ! = n_k^pred (midpoint_stoch_flux_type=2) + ! = 2*n_k^pred - n_k^n (midpoint_stoch_flux_type=3) + */ + + MultiFab::Copy(n_new,n_old,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,rate1,0,0,nspecies,0); + MultiFab::Saxpy(n_new,0.5*dt,rate2,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + } else { // explicit midpoint for det/tau/CLE //!!!!!!!!!!!!!! @@ -149,7 +206,59 @@ void AdvanceReactionDiffusion(MultiFab& n_old, if (reaction_type == 2) { // implicit midpoint with SSA - Abort("AdvanceReactionDiffusion() - temporal_integrator=-4 (SSA) not written yet"); + /* + ! backward Euler predictor to half-time + ! n_k^* = n_k^n + (dt/2) div (D_k grad n_k)^* + ! + (dt/sqrt(2)) div sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! Gaussian noise + ! + (dt/2) ext_src + ! + ! (I - div (dt/2) D_k grad) n_k^* = n_k^n + ! + (dt/sqrt(2)) div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 + ! + (dt/2) ext_src + */ + + MultiFab rhs(ba,dmap,nspecies,0); + + MultiFab::Copy(rhs,n_old,0,0,nspecies,0); + MultiFab::Saxpy(rhs,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(rhs,0.5*dt,ext_src,0,0,nspecies,0); + + ImplicitDiffusion(n_old, n_new, rhs, diff_coef_face, geom, 0.5*dt, time); + + // corrector + + // compute R(n^*,dt) / dt + ChemicalRates(n_new,rate1,geom,dt,n_new,mattingly_lin_comb_coef,volume_factor); + + // compute stochastic flux divergence and add to the ones from the predictor stage + if (variance_coef_mass > 0.) { + GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom); + } + + /* + ! Crank-Nicolson + ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n + ! + (dt/2) div (D_k grad n_k)^{n+1} + ! + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise + ! + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise + ! + R(n^*,dt) + ! + dt ext_src + ! + ! (I - div (dt/2) D_k grad) n_k^{n+1} = n_k^n + + (dt/2) div (D_k grad n_k^n) + ! + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise + ! + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise + ! + R(n^*,dt) + ! + dt ext_src + */ + + MultiFab::Copy(rhs,n_old,0,0,nspecies,0); + MultiFab::Saxpy(rhs,0.5*dt,diff_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(rhs,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0); + MultiFab::Saxpy(rhs,dt,rate1,0,0,nspecies,0); + MultiFab::Saxpy(rhs,dt,ext_src,0,0,nspecies,0); + + ImplicitDiffusion(n_old, n_new, rhs, diff_coef_face, geom, 0.5*dt, time); } else { // implicit midpoint for det/tau/CLE From ddcf0bf93fc1157d9afa9a220da14edcd48b1bc7 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 9 Sep 2024 09:32:16 -0700 Subject: [PATCH 076/151] structure factor implemented --- exec/reactDiff/inputs_paper_BPM_2d | 5 +- exec/reactDiff/inputs_paper_Lemarchand_3d | 3 +- src_reactDiff/main_driver.cpp | 72 ++++++++++++++++++++--- 3 files changed, 68 insertions(+), 12 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index 1f9f0a321..9a728aad2 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -28,7 +28,8 @@ fixed_dt = 0.5 # Controls for number of steps between actions max_step = 20000 plot_int = 200 -stats_int = -1 +struct_fact_int = -1 +n_steps_skip = 2000 seed = 1 @@ -77,7 +78,7 @@ avg_type = 1 reactDiff_reaction_type = 0 # 0=deterministic; 1=CLE; 2=SSA; 3=tau leap -reaction_type = 1 +reaction_type = 3 # BPM model is: # (1) U + W --> V + W diff --git a/exec/reactDiff/inputs_paper_Lemarchand_3d b/exec/reactDiff/inputs_paper_Lemarchand_3d index 9b1056f71..0d8bec99d 100644 --- a/exec/reactDiff/inputs_paper_Lemarchand_3d +++ b/exec/reactDiff/inputs_paper_Lemarchand_3d @@ -26,7 +26,8 @@ fixed_dt = 0.25 # Controls for number of steps between actions max_step = 800 plot_int = 10 -stats_int = -1 +struct_fact_int = -1 +n_steps_skip = 200 seed = 1 diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 81db63d8e..005579e67 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -2,6 +2,7 @@ #include "common_functions.H" #include "chemistry_functions.H" #include "reactDiff_functions.H" +#include "StructFact.H" #include #include @@ -95,7 +96,39 @@ void main_driver(const char* argv) MultiFab n_old; MultiFab n_new; + + /////////////////////////////////////////// + // Initialize structure factor object for analysis + /////////////////////////////////////////// + + Vector< std::string > var_names; + var_names.resize(nspecies); + + int cnt = 0; + std::string x; + + // n0, n1, ... + for (int d=0; d var_scaling(nPairs); + for (int d=0; d s_pairA(nPairs); + amrex::Vector< int > s_pairB(nPairs); + + // Select which variable pairs to include in structure factor: + s_pairA[0] = 0; + s_pairB[0] = 0; + s_pairA[1] = 1; + s_pairB[1] = 1; + + structFact.define(ba,dmap,var_names,var_scaling,s_pairA,s_pairB); + } + } else { // checkpoint restart + Abort("checkpoint read not implemented yet"); } @@ -165,10 +218,6 @@ void main_driver(const char* argv) Print() << "WARNING in advance_reaction_diffusion: use splitting based schemes (temporal_integrator>=0) for diffusion only" << std::endl; } } - - if (stats_int > 0) { - Abort("Structure factor not implemented yet"); - } int istep = (restart < 0) ? 0 : restart; WritePlotFile(istep,time,geom,n_old); @@ -192,21 +241,26 @@ void main_driver(const char* argv) ParallelDescriptor::ReduceRealMax(step_stop_time); amrex::Print() << "Time step " << step << " complted in " << step_stop_time << " seconds\n"; - if (stats_int > 0 && step%stats_int == 0 && step > n_steps_skip) { - Abort("fix structure factor snapshot"); + // add a snapshot to the structure factor + if (step > n_steps_skip && struct_fact_int > 0 && (step-n_steps_skip)%struct_fact_int == 0) { + + // add this snapshot to the average in the structure factor + structFact.FortStructure(n_new,geom); + } if (plot_int > 0 && step%plot_int == 0) { WritePlotFile(step,time,geom,n_new); - if (stats_int > 0 && step > n_steps_skip) { - Abort("fix structure factor plotfile write"); + // write out structure factor to plotfile + if (step > n_steps_skip && struct_fact_int > 0) { + structFact.WritePlotFile(step,time,geom,"plt_SF"); } } if (chk_int > 0 && step%chk_int == 0) { - Abort("fix checkpoint write"); + Abort("checkpoint write not implemented yet"); } // MultiFab memory usage From 19dfab0f61bd6413f1ddfa9f1a6c11c9fb370c4e Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 11 Sep 2024 14:53:40 -0700 Subject: [PATCH 077/151] fix abort logic --- src_reactDiff/AdvanceTimestep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_reactDiff/AdvanceTimestep.cpp b/src_reactDiff/AdvanceTimestep.cpp index fc5e5f507..6a8a451a3 100644 --- a/src_reactDiff/AdvanceTimestep.cpp +++ b/src_reactDiff/AdvanceTimestep.cpp @@ -7,7 +7,7 @@ void AdvanceTimestep(MultiFab& n_old, const Real& time, const Geometry& geom) { - if (temporal_integrator > 0 && reactDiff_reaction_type != 0) { + if (temporal_integrator >0 0 && reactDiff_reaction_type != 0) { if (reaction_type == 2) { Abort("SSA (reaction_type==2) requires reactDiff_reaction_type=0 for split schemes"); } From 3098d67fd232a34856f661181de49ee98b1b4854 Mon Sep 17 00:00:00 2001 From: Matthew Blomquist Date: Tue, 17 Sep 2024 11:57:44 -0700 Subject: [PATCH 078/151] Correct type in comparator for temporal integrator. --- src_reactDiff/AdvanceTimestep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_reactDiff/AdvanceTimestep.cpp b/src_reactDiff/AdvanceTimestep.cpp index 6a8a451a3..d271c1701 100644 --- a/src_reactDiff/AdvanceTimestep.cpp +++ b/src_reactDiff/AdvanceTimestep.cpp @@ -7,7 +7,7 @@ void AdvanceTimestep(MultiFab& n_old, const Real& time, const Geometry& geom) { - if (temporal_integrator >0 0 && reactDiff_reaction_type != 0) { + if (temporal_integrator >= 0 && reactDiff_reaction_type != 0) { if (reaction_type == 2) { Abort("SSA (reaction_type==2) requires reactDiff_reaction_type=0 for split schemes"); } From e390e7dae4a7b5f67a79397edb643c02c5a4e14b Mon Sep 17 00:00:00 2001 From: Daniel Ladiges Date: Tue, 17 Sep 2024 13:41:19 -0700 Subject: [PATCH 079/151] syncing --- exec/DSMC/DsmcCollide.cpp | 2 +- exec/DSMC/inputs_conc | 4 ++-- exec/DSMC/main_driver.cpp | 13 ++++++++----- src_geometry/paramPlane.cpp | 2 ++ src_particles/DsmcParticleContainer.cpp | 4 ++++ 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/exec/DSMC/DsmcCollide.cpp b/exec/DSMC/DsmcCollide.cpp index 98e4e86db..0d27b410c 100644 --- a/exec/DSMC/DsmcCollide.cpp +++ b/exec/DSMC/DsmcCollide.cpp @@ -124,7 +124,7 @@ void FhdParticleContainer::CalcSelections(Real dt) crossSection = csx[ij_spec]; //crossSection = 0; if(i_spec==j_spec) {np_j = np_i-1;} - NSel = particle_neff_tmp*np_i*np_j*crossSection*vrmax*ocollisionCellVolTmp*dt; + NSel = particle_neff_tmp*np_i*np_j*crossSection*vrmax*ocollisionCellVolTmp*dt*2; if(i_spec==j_spec) {NSel = NSel*0.5;} arrselect(i,j,k,ij_spec) = std::floor(NSel + amrex::Random(engine)); diff --git a/exec/DSMC/inputs_conc b/exec/DSMC/inputs_conc index 7a0eb560f..151e80d9e 100644 --- a/exec/DSMC/inputs_conc +++ b/exec/DSMC/inputs_conc @@ -86,8 +86,8 @@ # Xk and Yk at the wall for Dirichlet (concentrations) - set one to zero # Ordering: (species 1, x-dir), (species 2, x-dir), ... (species 1, y-dir), ... - bc_Yk_x_lo = 0.45 0.55 # lo BC - bc_Yk_x_hi = 0.55 0.45 # hi BC + bc_Yk_x_lo = 0.49375 0.50625 # lo BC + bc_Yk_x_hi = 0.50625 0.49375 # hi BC bc_Yk_y_lo = 1.0 1.0 # lo BC bc_Yk_y_hi = 1.0 1.0 # hi BC bc_Yk_z_lo = 1.0 1.0 # lo BC diff --git a/exec/DSMC/main_driver.cpp b/exec/DSMC/main_driver.cpp index 473d0e3de..ef6ed217d 100644 --- a/exec/DSMC/main_driver.cpp +++ b/exec/DSMC/main_driver.cpp @@ -334,7 +334,8 @@ void main_driver(const char* argv) particles.CalcSelections(dt); particles.CollideParticles(dt); - if(istep%2!=0) +// if(istep%2!=0) + if(false) { particles.EvaluateStats(cuInst,cuMeans,cuVars,primInst,primMeans,primVars, cvlInst,cvlMeans,QMeans,coVars,spatialCross1D,statsCount++,time); @@ -355,17 +356,19 @@ void main_driver(const char* argv) //PrintMF(structFactPrimMF,0,-1); //PrintMF(primInst,1,1); - structFactPrim.FortStructure(structFactPrimMF,geom); + //structFactPrim.FortStructure(structFactPrimMF,geom); } } - particles.Source(dt, paramPlaneList, paramPlaneCount, cuInst); + //particles.externalForce(dt); + particles.Source(dt, paramPlaneList, paramPlaneCount, cuInst); particles.MoveParticlesCPP(dt, paramPlaneList, paramPlaneCount); //particles.updateTimeStep(geom,dt); //reduceMassFlux(paramPlaneList, paramPlaneCount); - if(istep%2==0) + if(true) +// if(istep%2==0) { particles.EvaluateStats(cuInst,cuMeans,cuVars,primInst,primMeans,primVars, cvlInst,cvlMeans,QMeans,coVars,spatialCross1D,statsCount++,time); @@ -386,7 +389,7 @@ void main_driver(const char* argv) //PrintMF(structFactPrimMF,0,-1); //PrintMF(primInst,1,1); - structFactPrim.FortStructure(structFactPrimMF,geom); + //structFactPrim.FortStructure(structFactPrimMF,geom); } } diff --git a/src_geometry/paramPlane.cpp b/src_geometry/paramPlane.cpp index c0f50d08e..773cc0672 100644 --- a/src_geometry/paramPlane.cpp +++ b/src_geometry/paramPlane.cpp @@ -57,6 +57,7 @@ void BuildParamplanes(paramPlane* paramPlaneList, const int paramplanes, const R for(int l=0; l=0; j--) { Real density = paramPlaneList[i].densityRight[j]*rho_lo[0]/properties[j].mass; + //cout << "n: " << density << endl; //Real density = paramPlaneList[i].densityRight[j]; Real xMom = paramPlaneList[i].xMomFluxRight[j]; From 4e52116dc1adcb22f47f7cec5183ab9929f4c78c Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 17 Sep 2024 17:09:02 -0700 Subject: [PATCH 080/151] start of MN diffusion - WIP, host only --- src_reactDiff/Make.package | 1 + src_reactDiff/MultinomialDiffusion.cpp | 81 ++++++++++++++++++++++++++ src_reactDiff/reactDiff_functions.H | 24 ++++++-- 3 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 src_reactDiff/MultinomialDiffusion.cpp diff --git a/src_reactDiff/Make.package b/src_reactDiff/Make.package index 778b6cb04..33c156374 100644 --- a/src_reactDiff/Make.package +++ b/src_reactDiff/Make.package @@ -5,6 +5,7 @@ CEXE_sources += AdvanceTimestep.cpp CEXE_sources += DiffusiveNFluxdiv.cpp CEXE_sources += ImplicitDiffusion.cpp CEXE_sources += InitN.cpp +CEXE_sources += MultinomialDiffusion.cpp CEXE_sources += reactDiff_functions.cpp CEXE_sources += StochasticNFluxdiv.cpp CEXE_sources += WritePlotFile.cpp diff --git a/src_reactDiff/MultinomialDiffusion.cpp b/src_reactDiff/MultinomialDiffusion.cpp new file mode 100644 index 000000000..fd82313f9 --- /dev/null +++ b/src_reactDiff/MultinomialDiffusion.cpp @@ -0,0 +1,81 @@ +#include "reactDiff_functions.H" + +#include "AMReX_MLMG.H" +#include + +#include + +void MultinomialDiffusion(MultiFab& n_in, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, + const Geometry& geom, + const Real& dt) { + + const GpuArray dx = geom.CellSizeArray(); + + Real dv = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2]*cell_depth; + + for (MFIter mfi(n_in); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.validbox(); + + const Array4 & n_arr = n_in.array(mfi); + + AMREX_D_TERM(const Array4 & diffx = diff_coef_face[0].array(mfi);, + const Array4 & diffy = diff_coef_face[1].array(mfi);, + const Array4 & diffz = diff_coef_face[2].array(mfi);); + + amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + + GpuArray p; + GpuArray fluxes; + + p[0] = diffx(i,j,k,n)*dt/(dx[0]*dx[0]); + p[1] = diffx(i+1,j,k,n)*dt/(dx[0]*dx[0]); + p[2] = diffy(i,j,k,n)*dt/(dx[0]*dx[1]); + p[3] = diffy(i,j+1,k,n)*dt/(dx[0]*dx[1]); +#if (AMREX_SPACEDIM == 3) + p[4] = diffz(i,j,k,n)*dt/(dx[0]*dx[2]); + p[5] = diffz(i,j,k+1,n)*dt/(dx[0]*dx[2]); +#endif + + int N = std::max(0., n_arr(i,j,k,n)*dv); + + multinomial_rng(fluxes, N, p); + + }); + } +} + +AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray& samples, + const int& N, + GpuArray& p) +{ +#if (AMREX_USE_CUDA) + Abort("MultinomialRNG not supported for CUDA"); +#else + + Real sum_p = 0; + for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) { + sum_p += p[sample]; + } + if (sum_p > 1.) { + Abort("multinomial_rng: probabilities must sum to 1 or less"); + } + + std::default_random_engine generator; + generator.seed(std::chrono::system_clock::now().time_since_epoch().count()); + + sum_p = 0.; + int sum_n = 0; + + for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) { + std::binomial_distribution distribution(N-sum_n, p[sample]/(1.-sum_p)); + samples[sample] = distribution(generator); + + sum_n += samples[sample]; + sum_p += p[sample]; + } + +#endif +} diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H index 56cff865a..eb9b36416 100644 --- a/src_reactDiff/reactDiff_functions.H +++ b/src_reactDiff/reactDiff_functions.H @@ -66,21 +66,21 @@ void AdvanceTimestep(MultiFab& n_old, //////////////////////// // In ImplicitDiffusion.cpp //////////////////////// -void DiffusiveNFluxdiv(MultiFab& n_in, - MultiFab& diff_fluxdiv, +void ImplicitDiffusion(MultiFab& n_old, + MultiFab& n_new, + const MultiFab& rhs, const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, const Geometry& geom, + const Real& dt_fac, const Real& time); //////////////////////// // In DiffusiveNFluxdiv.cpp //////////////////////// -void ImplicitDiffusion(MultiFab& n_old, - MultiFab& n_new, - const MultiFab& rhs, +void DiffusiveNFluxdiv(MultiFab& n_in, + MultiFab& diff_fluxdiv, const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, const Geometry& geom, - const Real& dt_fac, const Real& time); //////////////////////// @@ -90,6 +90,18 @@ void InitN(MultiFab& n_in, const Geometry& geom, const Real& time); +//////////////////////// +// In MultinomialDiffusion.cpp +//////////////////////// +void MultinomialDiffusion(MultiFab& n_in, + const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, + const Geometry& geom, + const Real& dt); + +AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray& samples, + const int& N, + GpuArray& p); + //////////////////////// // In StochasticNFluxdiv.cpp //////////////////////// From 9221dbc437f414b940af2f30ebba4f81d82d3aac Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 17 Sep 2024 17:58:35 -0700 Subject: [PATCH 081/151] multinomial WIP --- src_reactDiff/AdvanceDiffusion.cpp | 2 +- src_reactDiff/MultinomialDiffusion.cpp | 66 ++++++++++++++++++++++++-- src_reactDiff/reactDiff_functions.H | 6 ++- 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp index becbe5bac..a37601fe2 100644 --- a/src_reactDiff/AdvanceDiffusion.cpp +++ b/src_reactDiff/AdvanceDiffusion.cpp @@ -33,7 +33,7 @@ void AdvanceDiffusion(MultiFab& n_old, } if (reactDiff_diffusion_type == 3) { - Abort("AdvanceDiffusion() - write multinomial case"); + MultinomialDiffusion(n_old,n_new,diff_coef_face,geom,dt,time); return; } diff --git a/src_reactDiff/MultinomialDiffusion.cpp b/src_reactDiff/MultinomialDiffusion.cpp index fd82313f9..e8da7a55e 100644 --- a/src_reactDiff/MultinomialDiffusion.cpp +++ b/src_reactDiff/MultinomialDiffusion.cpp @@ -5,20 +5,35 @@ #include -void MultinomialDiffusion(MultiFab& n_in, +void MultinomialDiffusion(MultiFab& n_old, + MultiFab& n_new, const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, const Geometry& geom, - const Real& dt) { + const Real& dt, + const Real& time) +{ + BoxArray ba = n_old.boxArray(); + DistributionMapping dmap = n_old.DistributionMap(); + + MultiFab cell_update(ba, dmap, nspecies, 1); + + // set new state to zero everywhere, including ghost cells + n_new.setVal(0.); + + // copy old state into new in valid region only + MultiFab::Copy(n_new,n_old,0,0,nspecies,0); const GpuArray dx = geom.CellSizeArray(); Real dv = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2]*cell_depth; - for (MFIter mfi(n_in); mfi.isValid(); ++mfi) + for (MFIter mfi(n_new); mfi.isValid(); ++mfi) { const Box& bx = mfi.validbox(); - const Array4 & n_arr = n_in.array(mfi); + const Array4 & n_arr = n_new.array(mfi); + + const Array4 & update = cell_update.array(mfi); AMREX_D_TERM(const Array4 & diffx = diff_coef_face[0].array(mfi);, const Array4 & diffy = diff_coef_face[1].array(mfi);, @@ -43,8 +58,51 @@ void MultinomialDiffusion(MultiFab& n_in, multinomial_rng(fluxes, N, p); + // lo-x face + update(i ,j,k,n) -= fluxes[0]; + update(i-1,j,k,n) += fluxes[0]; + + // hi-x face + update(i ,j,k,n) -= fluxes[1]; + update(i+1,j,k,n) += fluxes[1]; + + // lo-y face + update(i,j, k,n) -= fluxes[2]; + update(i,j-1,k,n) += fluxes[2]; + + // hi-y face + update(i,j ,k,n) -= fluxes[3]; + update(i,j+1,k,n) += fluxes[3]; + +#if (AMREX_SPACEDIM == 3) + // lo-z face + update(i,j,k, n) -= fluxes[4]; + update(i,j,k-1,n) += fluxes[4]; + + // hi-z face + update(i,j,k, n) -= fluxes[5]; + update(i,j,k+1,n) += fluxes[5]; +#endif }); } + + for (MFIter mfi(n_new); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.growntilebox(1); + + const Array4 & n_arr = n_new.array(mfi); + + const Array4 & update = cell_update.array(mfi); + + amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + n_arr(i,j,k,n) += update(i,j,k,n) / dv; + }); + } + + n_new.SumBoundary(geom.periodicity()); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); } AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray& samples, diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H index eb9b36416..3315bc3c5 100644 --- a/src_reactDiff/reactDiff_functions.H +++ b/src_reactDiff/reactDiff_functions.H @@ -93,10 +93,12 @@ void InitN(MultiFab& n_in, //////////////////////// // In MultinomialDiffusion.cpp //////////////////////// -void MultinomialDiffusion(MultiFab& n_in, +void MultinomialDiffusion(MultiFab& n_old, + MultiFab& n_new, const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face, const Geometry& geom, - const Real& dt); + const Real& dt, + const Real& time); AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray& samples, const int& N, From dc9a4e16c0f9c32628f1d4b27eb786b7e1adc0a1 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 17 Sep 2024 18:10:02 -0700 Subject: [PATCH 082/151] MN diffusion bug fixes turn off initial fluctuations if initial_variance_mass=0. --- src_reactDiff/InitN.cpp | 2 +- src_reactDiff/MultinomialDiffusion.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src_reactDiff/InitN.cpp b/src_reactDiff/InitN.cpp index 95b5c0dbc..307b035f0 100644 --- a/src_reactDiff/InitN.cpp +++ b/src_reactDiff/InitN.cpp @@ -69,7 +69,7 @@ void InitN(MultiFab& n_in, Abort("integer_populations=1 with initial_variance_mass < 0. not supported yet"); - } else { // Make the number of molecules in each cell Poisson distributed with desired mean + } else if (initial_variance_mass > 0.) { // Make the number of molecules in each cell Poisson distributed with desired mean for ( MFIter mfi(n_in,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { diff --git a/src_reactDiff/MultinomialDiffusion.cpp b/src_reactDiff/MultinomialDiffusion.cpp index e8da7a55e..493c15df1 100644 --- a/src_reactDiff/MultinomialDiffusion.cpp +++ b/src_reactDiff/MultinomialDiffusion.cpp @@ -47,14 +47,14 @@ void MultinomialDiffusion(MultiFab& n_old, p[0] = diffx(i,j,k,n)*dt/(dx[0]*dx[0]); p[1] = diffx(i+1,j,k,n)*dt/(dx[0]*dx[0]); - p[2] = diffy(i,j,k,n)*dt/(dx[0]*dx[1]); - p[3] = diffy(i,j+1,k,n)*dt/(dx[0]*dx[1]); + p[2] = diffy(i,j,k,n)*dt/(dx[1]*dx[1]); + p[3] = diffy(i,j+1,k,n)*dt/(dx[1]*dx[1]); #if (AMREX_SPACEDIM == 3) - p[4] = diffz(i,j,k,n)*dt/(dx[0]*dx[2]); - p[5] = diffz(i,j,k+1,n)*dt/(dx[0]*dx[2]); + p[4] = diffz(i,j,k,n)*dt/(dx[2]*dx[2]); + p[5] = diffz(i,j,k+1,n)*dt/(dx[2]*dx[2]); #endif - int N = std::max(0., n_arr(i,j,k,n)*dv); + int N = std::max(0., std::round(n_arr(i,j,k,n)*dv)); multinomial_rng(fluxes, N, p); From 9e722e70664e96c3feaf97995553772e875c0cad Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 17 Sep 2024 18:22:26 -0700 Subject: [PATCH 083/151] multinomial diffusion fix --- src_reactDiff/MultinomialDiffusion.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src_reactDiff/MultinomialDiffusion.cpp b/src_reactDiff/MultinomialDiffusion.cpp index 493c15df1..7b296efe7 100644 --- a/src_reactDiff/MultinomialDiffusion.cpp +++ b/src_reactDiff/MultinomialDiffusion.cpp @@ -16,6 +16,7 @@ void MultinomialDiffusion(MultiFab& n_old, DistributionMapping dmap = n_old.DistributionMap(); MultiFab cell_update(ba, dmap, nspecies, 1); + cell_update.setVal(0.); // set new state to zero everywhere, including ghost cells n_new.setVal(0.); @@ -118,6 +119,7 @@ AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray& samp sum_p += p[sample]; } if (sum_p > 1.) { + Print() << "sum_p = " << sum_p << std::endl; Abort("multinomial_rng: probabilities must sum to 1 or less"); } From 2645e22e806f3b94efb41235bb5c5ec9178e673e Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 17 Sep 2024 18:26:57 -0700 Subject: [PATCH 084/151] multinomial unsplit case --- src_reactDiff/AdvanceReactionDiffusion.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src_reactDiff/AdvanceReactionDiffusion.cpp b/src_reactDiff/AdvanceReactionDiffusion.cpp index 7824afa30..f3519c4ca 100644 --- a/src_reactDiff/AdvanceReactionDiffusion.cpp +++ b/src_reactDiff/AdvanceReactionDiffusion.cpp @@ -26,14 +26,28 @@ void AdvanceReactionDiffusion(MultiFab& n_old, MultiFab rate1(ba,dmap,nspecies,0); - if (temporal_integrator == -3) { // multinomial diffusion - Abort("AdvanceReactionDiffusion() - temporal_integrator=-3 not supported yet"); - } - Vector mattingly_lin_comb_coef(2); mattingly_lin_comb_coef[0] = 1.; mattingly_lin_comb_coef[1] = 0.; + if (temporal_integrator == -3) { // multinomial diffusion + + // calculate rates + // rates could be deterministic or stochastic depending on use_Poisson_rng + ChemicalRates(n_old,rate1,geom,dt,n_old,mattingly_lin_comb_coef,volume_factor); + + // advance multinomial diffusion + MultinomialDiffusion(n_old,n_new,diff_coef_face,geom,dt,time); + + // add reaction contribution and external source + MultiFab::Saxpy(n_new,dt,rate1,0,0,nspecies,0); + MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0); + n_new.FillBoundary(geom.periodicity()); + MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time); + return; + + } + MultiFab diff_fluxdiv (ba,dmap,nspecies,0); MultiFab stoch_fluxdiv(ba,dmap,nspecies,0); From 74af2ad6e5fae781dd758bb958c42fd28144abf4 Mon Sep 17 00:00:00 2001 From: Daniel Ladiges Date: Wed, 18 Sep 2024 07:22:01 -0700 Subject: [PATCH 085/151] Initial code for including stochastic bounary interscetions in phonon code --- exec/phononDSMC/main_driver.cpp | 4 ++++ src_geometry/paramPlane.H | 2 ++ src_geometry/paramPlane.cpp | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/exec/phononDSMC/main_driver.cpp b/exec/phononDSMC/main_driver.cpp index ff31f5ba9..89959e6b6 100644 --- a/exec/phononDSMC/main_driver.cpp +++ b/exec/phononDSMC/main_driver.cpp @@ -33,6 +33,8 @@ void main_driver(const char* argv) Real time = 0.; int statsCount = 1; + iMultiFab bCell; + MultiFab cuInst, cuMeans, cuVars; if (seed > 0) @@ -99,6 +101,8 @@ void main_driver(const char* argv) cuInst.define(ba, dmap, ncon, 0); cuInst.setVal(0.); cuMeans.define(ba, dmap, ncon, 0); cuMeans.setVal(0.); cuVars.define(ba,dmap, ncon, 0); cuVars.setVal(0.); + + bCell.define(ba, dmap, 2, 1); bCell.setVal(0); } diff --git a/src_geometry/paramPlane.H b/src_geometry/paramPlane.H index e3661d8e0..106540a8e 100644 --- a/src_geometry/paramPlane.H +++ b/src_geometry/paramPlane.H @@ -147,6 +147,8 @@ typedef struct { void BuildParamplanes(paramPlane* paramPlaneList, const int paramplanes, const Real* domainLo, const Real* domainHi); void BuildParamplanesPhonon(paramPlane* paramPlaneList, const int paramplanes, const Real* domainLo, const Real* domainHi); +void SetBoundaryCells(paramPlane* paramPlaneList, const int paramplanes, const Real* domainLo, const Real* domainHi, iMultiFab& bCell); + double getTheta(double nx, double ny, double nz); double getPhi(double nx, double ny, double nz); diff --git a/src_geometry/paramPlane.cpp b/src_geometry/paramPlane.cpp index 773cc0672..e99d1f144 100644 --- a/src_geometry/paramPlane.cpp +++ b/src_geometry/paramPlane.cpp @@ -1989,3 +1989,8 @@ void BuildParamplanesPhonon(paramPlane* paramPlaneList, const int paramplanes, c } planeFile.close(); } + +void SetBoundaryCells(paramPlane* paramPlaneList, const int paramplanes, const Real* domainLo, const Real* domainHi, iMultiFab& bCell) +{ + +} From 318bbec541ca72297b8110a535a0a2dcce1ccede Mon Sep 17 00:00:00 2001 From: Matthew Blomquist Date: Wed, 18 Sep 2024 17:41:02 -0700 Subject: [PATCH 086/151] Print avg_n to file averageDensity.txt --- src_reactDiff/main_driver.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 005579e67..7609fa4af 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -223,6 +223,14 @@ void main_driver(const char* argv) WritePlotFile(istep,time,geom,n_old); /////////////////////////////////////////// + + // Create output file for averaged density + std::ofstream outputFile("averagedDensity.txt"); + outputFile << "time "; + + for (int comp = 0; comp < nspecies; ++comp) { + outputFile << "comp_" << comp << " "; + } // time step loop for(int step=step_start;step<=max_step;++step) { @@ -235,6 +243,19 @@ void main_driver(const char* argv) time += dt; MultiFab::Copy(n_old,n_new,0,0,nspecies,1); + // Compute average n for each species, print to file? + for (int comp = 0; comp < nspecies; ++comp) { + if (comp == 0) { + outputFile << "\n" << time << " "; + } + + amrex::Real n_sum = n_old.sum(comp); // or n_new depending on where you call this + amrex::Real n_avg = n_sum / (n_cells[0]*n_cells[1]); // for 3D you need n_cells[2] also + amrex::Print() << "time = " << time << " comp " << comp << " n_avg = " << n_avg << std::endl; + + outputFile << n_avg << " "; + } + // Call the timer again and compute the maximum difference between the start time // and stop time over all processors Real step_stop_time = ParallelDescriptor::second() - step_strt_time; @@ -286,6 +307,8 @@ void main_driver(const char* argv) } + outputFile.close(); + // Call the timer again and compute the maximum difference between the start time // and stop time over all processors Real stop_time = ParallelDescriptor::second() - strt_time; From c84bee988e816a96df0324e517d5aae09d23b364 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 19 Sep 2024 07:15:35 -0700 Subject: [PATCH 087/151] small tweaks to output format --- src_reactDiff/main_driver.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 7609fa4af..454b11bc0 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -84,6 +84,8 @@ void main_driver(const char* argv) IntVect dom_hi(AMREX_D_DECL(n_cells[0]-1, n_cells[1]-1, n_cells[2]-1)); Box domain(dom_lo, dom_hi); + long cell_count = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : n_cells[0]*n_cells[1]*n_cells[2]; + Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data()); const Real* dx = geom.CellSize(); @@ -231,6 +233,7 @@ void main_driver(const char* argv) for (int comp = 0; comp < nspecies; ++comp) { outputFile << "comp_" << comp << " "; } + outputFile << std::endl; // time step loop for(int step=step_start;step<=max_step;++step) { @@ -243,18 +246,19 @@ void main_driver(const char* argv) time += dt; MultiFab::Copy(n_old,n_new,0,0,nspecies,1); + outputFile << std::setprecision(12) << time << " "; + amrex::Print() << "time = " << time << " n_avg = "; + // Compute average n for each species, print to file? for (int comp = 0; comp < nspecies; ++comp) { - if (comp == 0) { - outputFile << "\n" << time << " "; - } - amrex::Real n_sum = n_old.sum(comp); // or n_new depending on where you call this - amrex::Real n_avg = n_sum / (n_cells[0]*n_cells[1]); // for 3D you need n_cells[2] also - amrex::Print() << "time = " << time << " comp " << comp << " n_avg = " << n_avg << std::endl; - + amrex::Real n_sum = n_old.sum(comp); + amrex::Real n_avg = n_sum / cell_count; + amrex::Print() << n_avg << " "; outputFile << n_avg << " "; } + amrex::Print() << std::endl; + outputFile << std::endl; // Call the timer again and compute the maximum difference between the start time // and stop time over all processors From bfe97969bdc1345268328238c52fff2a1987e45f Mon Sep 17 00:00:00 2001 From: HyuntaeJung Date: Sun, 22 Sep 2024 19:22:08 -0700 Subject: [PATCH 088/151] MFsurfchem - change number density to pressure --- .../test_MFsurfchem_COAr_eq/inputs_fhd_stag | 2 +- .../test_MFsurfchem_COAr_eq/params_COAr_eq.py | 4 ++-- src_MFsurfchem/MFsurfchem_functions.cpp | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/exec/compressible_stag/test_MFsurfchem_COAr_eq/inputs_fhd_stag b/exec/compressible_stag/test_MFsurfchem_COAr_eq/inputs_fhd_stag index 19d706270..3e5d36209 100644 --- a/exec/compressible_stag/test_MFsurfchem_COAr_eq/inputs_fhd_stag +++ b/exec/compressible_stag/test_MFsurfchem_COAr_eq/inputs_fhd_stag @@ -122,7 +122,7 @@ surf_site_num_dens = 1.027285e+15 # adsorption rate = ads_rate_const * num_dens # desoprtion rate = des_rate -ads_rate_const = 1.770226e-11 +ads_rate_const = 1.831671e+02 des_rate = 3.702336e+07 # e_beta = 0 # no additional energy update diff --git a/exec/compressible_stag/test_MFsurfchem_COAr_eq/params_COAr_eq.py b/exec/compressible_stag/test_MFsurfchem_COAr_eq/params_COAr_eq.py index c2550d8c3..a16240e1a 100644 --- a/exec/compressible_stag/test_MFsurfchem_COAr_eq/params_COAr_eq.py +++ b/exec/compressible_stag/test_MFsurfchem_COAr_eq/params_COAr_eq.py @@ -152,7 +152,7 @@ sprob = 1. rads1 = sprob*rcol1 -kads1 = rads1/n1 +kads1 = rads1/p1 kdes1 = rads1*math.exp((-delta_mu1+E_bind)*eV_cgs/(kB*temp)) @@ -162,7 +162,7 @@ print("- sticking prob = %f" % sprob) print("- rads1 = %e (rate)" % rads1) -print("- kads1 = rads1/n1 (rate const) = %e" % kads1) +print("- kads1 = rads1/p1 (rate const) = %e" % kads1) print("- kdes1 = %e" % kdes1) diff --git a/src_MFsurfchem/MFsurfchem_functions.cpp b/src_MFsurfchem/MFsurfchem_functions.cpp index b7805a1df..f608cda60 100644 --- a/src_MFsurfchem/MFsurfchem_functions.cpp +++ b/src_MFsurfchem/MFsurfchem_functions.cpp @@ -66,7 +66,7 @@ void InitializeMFSurfchemNamespace() stoch_MFsurfchem = 1; // default value pp.query("stoch_MFsurfchem",stoch_MFsurfchem); - k_beta = 0.5; // default value + k_beta = -0.5; // default value pp.query("k_beta",k_beta); e_beta = 0.5; // default value @@ -153,8 +153,8 @@ void sample_MFsurfchem(MultiFab& cu, MultiFab& prim, MultiFab& surfcov, MultiFab amrex:: Real tempratio = prim_arr(i,j,k,4)/T_init[0]; for (int m=0;m Date: Mon, 23 Sep 2024 12:57:15 -0700 Subject: [PATCH 089/151] scale noise appropriately in reactdiff diffusion module --- src_reactDiff/StochasticNFluxdiv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_reactDiff/StochasticNFluxdiv.cpp b/src_reactDiff/StochasticNFluxdiv.cpp index 6075a1bae..74dfff514 100644 --- a/src_reactDiff/StochasticNFluxdiv.cpp +++ b/src_reactDiff/StochasticNFluxdiv.cpp @@ -120,7 +120,7 @@ void StochasticNFluxdiv(MultiFab& n_in, } for (int i=0; i Date: Tue, 24 Sep 2024 16:05:57 +0900 Subject: [PATCH 090/151] renname dens to pres --- src_MFsurfchem/MFsurfchem_functions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src_MFsurfchem/MFsurfchem_functions.cpp b/src_MFsurfchem/MFsurfchem_functions.cpp index f608cda60..c3502d798 100644 --- a/src_MFsurfchem/MFsurfchem_functions.cpp +++ b/src_MFsurfchem/MFsurfchem_functions.cpp @@ -153,8 +153,8 @@ void sample_MFsurfchem(MultiFab& cu, MultiFab& prim, MultiFab& surfcov, MultiFab amrex:: Real tempratio = prim_arr(i,j,k,4)/T_init[0]; for (int m=0;m Date: Tue, 24 Sep 2024 16:12:27 +0900 Subject: [PATCH 091/151] dens to pres in Nads --- src_MFsurfchem/MFsurfchem_functions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_MFsurfchem/MFsurfchem_functions.cpp b/src_MFsurfchem/MFsurfchem_functions.cpp index c3502d798..4892dae9f 100644 --- a/src_MFsurfchem/MFsurfchem_functions.cpp +++ b/src_MFsurfchem/MFsurfchem_functions.cpp @@ -158,7 +158,7 @@ void sample_MFsurfchem(MultiFab& cu, MultiFab& prim, MultiFab& surfcov, MultiFab amrex::Real theta = surfcov_arr(i,j,k,m); - amrex::Real meanNads = ads_rate_const[m]*dens*(1-sumtheta)*Ntot*dt*pow(tempratio,k_beta); + amrex::Real meanNads = ads_rate_const[m]*pres*(1-sumtheta)*Ntot*dt*pow(tempratio,k_beta); amrex::Real meanNdes = des_rate[m]*theta*Ntot*dt; amrex::Real Nads; From 8b15a8e7f1ca12814e491b26c33870feefdef82c Mon Sep 17 00:00:00 2001 From: Daniel Ladiges Date: Tue, 24 Sep 2024 06:52:05 -0700 Subject: [PATCH 092/151] continued work on phonon BC code --- exec/phononDSMC/test_inputs/input_test | 12 ++++++------ src_geometry/paramplane_functions_K.H | 5 ++++- src_particles/DsmcParticleContainer.cpp | 17 ++++++++++++++--- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/exec/phononDSMC/test_inputs/input_test b/exec/phononDSMC/test_inputs/input_test index 03ef991e2..8913daab1 100644 --- a/exec/phononDSMC/test_inputs/input_test +++ b/exec/phononDSMC/test_inputs/input_test @@ -1,13 +1,13 @@ # Problem specification - prob_lo = 0.0 0.0 0.0 # physical lo coordinate - prob_hi = 0.08 0.08 0.01 # physical hi coordinate (cm) + prob_lo = -0.000597375 -0.00020999999999999998 0.0 # physical lo coordinate + prob_hi = 0.000597375 0.00020999999999999998 145e-7 # physical hi coordinate (cm) - n_cells = 80 80 10 # keep as powers of two - max_grid_size = 80 80 10 - max_particle_tile_size = 256 256 256 + n_cells = 80 80 4 # keep as powers of two + max_grid_size = 20 20 4 + max_particle_tile_size = 256 256 4 # Time-step control - fixed_dt = 1e-9 + fixed_dt = 2e-12 # Controls for number of steps between actions max_step = 10000 diff --git a/src_geometry/paramplane_functions_K.H b/src_geometry/paramplane_functions_K.H index fc0ad4372..dc99b2ad1 100644 --- a/src_geometry/paramplane_functions_K.H +++ b/src_geometry/paramplane_functions_K.H @@ -52,13 +52,15 @@ void find_inter_gpu(FhdParticleContainer::ParticleType& part, const Real delt, c *intsurf = -1; Real uval, vval, tval; - pre_check_gpu(part, delt, paramplanes, ns, &flag, phi, plo, inttime); + //pre_check_gpu(part, delt, paramplanes, ns, &flag, phi, plo, inttime); //Complete if(flag == 0) { for(int s=1;s<=ns;s++) { + //if((s != 2) && (s != 3)) + { const paramPlane* surf = ¶mplanes[s-1]; Real denominv = 1.0/(part.rdata(FHD_realData::velz)*surf->uy*surf->vx - part.rdata(FHD_realData::vely)*surf->uz*surf->vx - part.rdata(FHD_realData::velz)*surf->ux*surf->vy + part.rdata(FHD_realData::velx)*surf->uz*surf->vy + part.rdata(FHD_realData::vely)*surf->ux*surf->vz - part.rdata(FHD_realData::velx)*surf->uy*surf->vz); @@ -89,6 +91,7 @@ void find_inter_gpu(FhdParticleContainer::ParticleType& part, const Real delt, c *intside = 0; //0 for lhs } } + } } } diff --git a/src_particles/DsmcParticleContainer.cpp b/src_particles/DsmcParticleContainer.cpp index 39fd9a34a..1a50581b4 100644 --- a/src_particles/DsmcParticleContainer.cpp +++ b/src_particles/DsmcParticleContainer.cpp @@ -420,8 +420,11 @@ void FhdParticleContainer::MovePhononsCPP(const Real dt, paramPlane* paramPlaneL //Print() << "Pre " << part.id() << ": " << part.rdata(FHD_realData::velx + 0) << ", " << part.rdata(FHD_realData::velx + 1) << ", " << part.rdata(FHD_realData::velx + 2) << endl; // printf("DT: %e\n", dt); // cout << "DT: " << dt << endl; - find_inter_gpu(part, runtime, paramPlaneListPtr, paramPlaneCount, - &intsurf, &inttime, &intside, AMREX_ZFILL(plo), AMREX_ZFILL(phi)); + for(int ii = 0;ii<100;ii++) + { + find_inter_gpu(part, runtime, paramPlaneListPtr, paramPlaneCount, + &intsurf, &inttime, &intside, AMREX_ZFILL(plo), AMREX_ZFILL(phi)); + } Real tauImpurityInv = pow(part.rdata(FHD_realData::omega),4)/tau_i_p; Real tauTAInv = part.rdata(FHD_realData::omega)*pow(T_init[0],4)/tau_ta_p; @@ -455,7 +458,7 @@ void FhdParticleContainer::MovePhononsCPP(const Real dt, paramPlane* paramPlaneL } app_bc_phonon_gpu(&surf, part, intside, pdomsize, &push, &runtime, step, countPtr, specCountPtr, engine); - // app_bc_gpu(&surf, part, intside, pdomsize, &push, &runtime, dummy, engine); + //app_bc_gpu(&surf, part, intside, pdomsize, &push, &runtime, dummy, engine); //Print() << "Post " << part.id() << ": " << part.rdata(FHD_realData::velx + 0) << ", " << part.rdata(FHD_realData::velx + 1) << ", " << part.rdata(FHD_realData::velx + 2) << endl; if(part.id() == -1) { @@ -494,6 +497,10 @@ void FhdParticleContainer::MovePhononsCPP(const Real dt, paramPlane* paramPlaneL part.rdata(FHD_realData::timeFrac) = 1.0; +// if(step%2==0) +// { +// part.rdata(FHD_realData::velz) = -part.rdata(FHD_realData::velz); +// } if(part.idata(FHD_intData::newSpecies) != -1) { @@ -503,6 +510,10 @@ void FhdParticleContainer::MovePhononsCPP(const Real dt, paramPlane* paramPlaneL }); + + + + //Print() << "Pre buffer size: " << paramPlaneList[1].recCountRight << endl; for (int i = 0; i < np; i++) { From 14a6ed7d3563e2dca4b77318df17c17f66168076 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 26 Sep 2024 08:10:34 -0700 Subject: [PATCH 093/151] more precision in n_avg diagnostic --- src_reactDiff/main_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 454b11bc0..3f7909cb3 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -255,7 +255,7 @@ void main_driver(const char* argv) amrex::Real n_sum = n_old.sum(comp); amrex::Real n_avg = n_sum / cell_count; amrex::Print() << n_avg << " "; - outputFile << n_avg << " "; + outputFile << std::setprecision(15) << n_avg << " "; } amrex::Print() << std::endl; outputFile << std::endl; From 0252d1039109ae7ec73b502751e41d4d76b7298b Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 26 Sep 2024 08:49:03 -0700 Subject: [PATCH 094/151] brute force binomial/multinomial (slow but seems to work) --- src_reactDiff/MultinomialDiffusion.cpp | 45 +++++++++++++++++--------- src_reactDiff/reactDiff_functions.H | 3 +- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src_reactDiff/MultinomialDiffusion.cpp b/src_reactDiff/MultinomialDiffusion.cpp index 7b296efe7..f485c1e7e 100644 --- a/src_reactDiff/MultinomialDiffusion.cpp +++ b/src_reactDiff/MultinomialDiffusion.cpp @@ -40,24 +40,24 @@ void MultinomialDiffusion(MultiFab& n_old, const Array4 & diffy = diff_coef_face[1].array(mfi);, const Array4 & diffz = diff_coef_face[2].array(mfi);); - amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + amrex::ParallelForRNG(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, amrex::RandomEngine const& engine) noexcept { GpuArray p; GpuArray fluxes; - p[0] = diffx(i,j,k,n)*dt/(dx[0]*dx[0]); - p[1] = diffx(i+1,j,k,n)*dt/(dx[0]*dx[0]); - p[2] = diffy(i,j,k,n)*dt/(dx[1]*dx[1]); - p[3] = diffy(i,j+1,k,n)*dt/(dx[1]*dx[1]); + p[0] = diffx(i ,j ,k,n)*dt/(dx[0]*dx[0]); + p[1] = diffx(i+1,j ,k,n)*dt/(dx[0]*dx[0]); + p[2] = diffy(i ,j ,k,n)*dt/(dx[1]*dx[1]); + p[3] = diffy(i ,j+1,k,n)*dt/(dx[1]*dx[1]); #if (AMREX_SPACEDIM == 3) - p[4] = diffz(i,j,k,n)*dt/(dx[2]*dx[2]); - p[5] = diffz(i,j,k+1,n)*dt/(dx[2]*dx[2]); + p[4] = diffz(i ,j ,k ,n)*dt/(dx[2]*dx[2]); + p[5] = diffz(i ,j ,k+1,n)*dt/(dx[2]*dx[2]); #endif int N = std::max(0., std::round(n_arr(i,j,k,n)*dv)); - multinomial_rng(fluxes, N, p); + multinomial_rng(fluxes, N, p, engine); // lo-x face update(i ,j,k,n) -= fluxes[0]; @@ -108,34 +108,47 @@ void MultinomialDiffusion(MultiFab& n_old, AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray& samples, const int& N, - GpuArray& p) + GpuArray& p, + const amrex::RandomEngine& engine) { -#if (AMREX_USE_CUDA) - Abort("MultinomialRNG not supported for CUDA"); -#else - Real sum_p = 0; for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) { sum_p += p[sample]; } if (sum_p > 1.) { - Print() << "sum_p = " << sum_p << std::endl; + printf("sum_p = %f",sum_p); Abort("multinomial_rng: probabilities must sum to 1 or less"); } +#if 0 + // not sure why std:: binomial_distribition gives grid artifacts std::default_random_engine generator; generator.seed(std::chrono::system_clock::now().time_since_epoch().count()); +#endif sum_p = 0.; int sum_n = 0; for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) { +#if 0 + // not sure why std:: binomial_distribition gives grid artifacts +#if (AMREX_USE_CUDA) + Abort("std::binomial_distribution not supported for CUDA"); +#endif std::binomial_distribution distribution(N-sum_n, p[sample]/(1.-sum_p)); samples[sample] = distribution(generator); +#else + // brute force binomial distribution + int success = 0; + Real prob = p[sample]/(1.-sum_p); + for (int n=0; n& samples, const int& N, - GpuArray& p); + GpuArray& p, + const amrex::RandomEngine& engine); //////////////////////// // In StochasticNFluxdiv.cpp From a0104d9fe673cc8f6ade45ccec6446637b719385 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 26 Sep 2024 09:00:48 -0700 Subject: [PATCH 095/151] MN diffusion doesn't work on GPU (need sum reductions still) --- src_reactDiff/MultinomialDiffusion.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src_reactDiff/MultinomialDiffusion.cpp b/src_reactDiff/MultinomialDiffusion.cpp index f485c1e7e..971ae4036 100644 --- a/src_reactDiff/MultinomialDiffusion.cpp +++ b/src_reactDiff/MultinomialDiffusion.cpp @@ -12,6 +12,10 @@ void MultinomialDiffusion(MultiFab& n_old, const Real& dt, const Real& time) { +#if (AMREX_USE_CUDA) + Abort("std::MultinomailDiffusion not supported for CUDA (need sum reductions)"); +#endif + BoxArray ba = n_old.boxArray(); DistributionMapping dmap = n_old.DistributionMap(); @@ -132,9 +136,6 @@ AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray& samp for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) { #if 0 // not sure why std:: binomial_distribition gives grid artifacts -#if (AMREX_USE_CUDA) - Abort("std::binomial_distribution not supported for CUDA"); -#endif std::binomial_distribution distribution(N-sum_n, p[sample]/(1.-sum_p)); samples[sample] = distribution(generator); #else From 985bcd04f8abf9e49cbb6f4a52e9cbf014102dba Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 26 Sep 2024 09:34:59 -0700 Subject: [PATCH 096/151] more efficient brute force multinomial --- src_reactDiff/MultinomialDiffusion.cpp | 32 +++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src_reactDiff/MultinomialDiffusion.cpp b/src_reactDiff/MultinomialDiffusion.cpp index 971ae4036..b2d702e80 100644 --- a/src_reactDiff/MultinomialDiffusion.cpp +++ b/src_reactDiff/MultinomialDiffusion.cpp @@ -124,32 +124,38 @@ AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray& samp Abort("multinomial_rng: probabilities must sum to 1 or less"); } + // brute force multinomial + for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) { + samples[sample] = 0.; + } + for (int n=0; n distribution(N-sum_n, p[sample]/(1.-sum_p)); samples[sample] = distribution(generator); -#else - // brute force binomial distribution - int success = 0; - Real prob = p[sample]/(1.-sum_p); - for (int n=0; n Date: Thu, 26 Sep 2024 10:21:58 -0700 Subject: [PATCH 097/151] Paper setup SSA/2 + MN + SSA/2 --- exec/reactDiff/inputs_paper_BPM_2d | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d index 9a728aad2..d04c29768 100644 --- a/exec/reactDiff/inputs_paper_BPM_2d +++ b/exec/reactDiff/inputs_paper_BPM_2d @@ -17,17 +17,17 @@ prob_hi = 32.0 32.0 # physical hi coordinate # number of cells in domain and maximum number of cells in a box n_cells = 64 64 -max_grid_size = 32 32 +max_grid_size = 16 16 # to compute cell volume in 2D problems -cell_depth = 10. +cell_depth = 1. # Time-step control -fixed_dt = 0.5 +fixed_dt = 0.01 # Controls for number of steps between actions -max_step = 20000 -plot_int = 200 +max_step = 2000000 +plot_int = 20000 struct_fact_int = -1 n_steps_skip = 2000 @@ -49,7 +49,7 @@ integer_populations = 1 # -2=unsplit explicit midpoint # -3=unsplit multinomial diffusion # -4=unsplit implicit midpoint -temporal_integrator = -4 +temporal_integrator = 1 # only used for split schemes (temporal_integrator>=0) # 0=explicit trapezoidal predictor/corrector @@ -57,7 +57,7 @@ temporal_integrator = -4 # 2=explicit midpoint # 3=multinomial diffusion # 4=forward Euler -reactDiff_diffusion_type = 4 +reactDiff_diffusion_type = 3 # Fickian diffusion coeffs D_Fick = 0.1 0.01 0.01 @@ -78,7 +78,7 @@ avg_type = 1 reactDiff_reaction_type = 0 # 0=deterministic; 1=CLE; 2=SSA; 3=tau leap -reaction_type = 3 +reaction_type = 2 # BPM model is: # (1) U + W --> V + W From b3f3ae12af8e70a43f4ff2d8a969445bbd75186d Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 26 Sep 2024 11:39:30 -0700 Subject: [PATCH 098/151] surface chemistry structure factor --- src_compressible_stag/main_driver.cpp | 127 ++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 512657bf1..98267a1e4 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -300,6 +300,13 @@ void main_driver(const char* argv) StructFact turbStructFactVelDecomp; // decomposed velocity StructFact turbStructFactScalar; // scalars #endif + + // surface coverage structure factor + StructFact surfcovStructFact; + MultiFab surfcovFlattenedRotMaster; + Geometry surfcov_geom_flat; + BoxArray surfcov_ba_flat; + DistributionMapping surfcov_dmap_flat; Geometry geom_flat; Geometry geom_flat_2D; @@ -914,6 +921,108 @@ void main_driver(const char* argv) } #endif + if (n_ads_spec>0) { + + MultiFab Flattened; // flattened multifab defined below + + // we are only calling ExtractSlice here to obtain + // a built version of Flattened so can obtain what we need to build the + // structure factor and geometry objects for flattened data + // assume surface covered is stored in the "k" direction in the k=0 coordinate. + int surfcov_dir = 2; + int surfcov_plane = 0; + int surfcov_structVars = n_ads_spec; + int surfcov_nPairs = surfcov_structVars*(surfcov_structVars+1)/2; + + Vector< std::string > surfcov_var_names; + surfcov_var_names.resize(surfcov_structVars); + for (int d=0; d surfcov_var_scaling(surfcov_nPairs); + for (int d=0; d projected_hi(AMREX_SPACEDIM); + + // yes you could simplify this code but for now + // these are written out fully to better understand what is happening + // we wanted projected_hi[AMREX_SPACEDIM-1] to be equal to dx[projected_dir] + // and need to transmute the other indices depending on surfcov_dir +#if (AMREX_SPACEDIM == 2) + if (surfcov_dir == 0) { + projected_hi[0] = prob_hi[1]; + } else if (surfcov_dir == 1) { + projected_hi[0] = prob_hi[0]; + } + projected_hi[1] = prob_hi[surfcov_dir] / n_cells[surfcov_dir]; +#elif (AMREX_SPACEDIM == 3) + if (surfcov_dir == 0) { + projected_hi[0] = prob_hi[1]; + projected_hi[1] = prob_hi[2]; + } else if (surfcov_dir == 1) { + projected_hi[0] = prob_hi[0]; + projected_hi[1] = prob_hi[2]; + } else if (surfcov_dir == 2) { + projected_hi[0] = prob_hi[0]; + projected_hi[1] = prob_hi[1]; + } + projected_hi[2] = prob_hi[surfcov_dir] / n_cells[surfcov_dir]; +#endif + + RealBox real_box({AMREX_D_DECL( prob_lo[0], prob_lo[1], prob_lo[2])}, + {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])}); + + // This defines a Geometry object + surfcov_geom_flat.define(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + } + + surfcovStructFact.define(surfcov_surfcov_ba_flat,dmap_flat,surfcov_var_names,surfcov_var_scaling); + } + ///////////////////////////////////////////////// // Initialize Fluxes and Sources ///////////////////////////////////////////////// @@ -1439,6 +1548,20 @@ void main_driver(const char* argv) } } + + if (n_ads_spec > 0) { + int surfcov_dir = 2; + int surfcov_plane = 0; + int surfcov_structVars = n_ads_spec; + MultiFab Flattened; // flattened multifab defined below + ExtractSlice(surfcov, Flattened, geom, surfcov_dir, surfcov_plane, 0, surfcov_structVars); + // we rotate this flattened MultiFab to have normal in the z-direction since + // our structure factor class assumes this for flattened + MultiFab FlattenedRot = RotateFlattenedMF(Flattened); + surfcovFlattenedRotMaster.ParallelCopy(FlattenedRot,0,0,surfcov_structVars); + surfcovStructFact.FortStructure(surfcovFlattenedRotMaster,surfcov_geom_flat); + } + } // write out structure factor @@ -1495,6 +1618,10 @@ void main_driver(const char* argv) structFactConsArray[0].get_names(),"plt_SF_cons_2D"); } + + if (n_ads_spec > 0) { + surfcovStructFact.WritePlotFile(step,time,surfcov_geom_flat,"plt_SF_surfcov"); + } } // write checkpoint file From 6b6057759821b76990b3bc08066c67d3c9c35585 Mon Sep 17 00:00:00 2001 From: isriva Date: Fri, 25 Oct 2024 09:57:51 -0700 Subject: [PATCH 099/151] setup APIs for AMREX_FFT --- exec/compressible_stag/GNUmakefile | 1 + src_analysis/Make.package | 11 +- src_analysis/TurbSpectra.H | 113 +- src_analysis/TurbSpectra.cpp | 1804 ---------------------- src_analysis/TurbSpectra_distributed.H | 52 + src_analysis/TurbSpectra_distributed.cpp | 486 ++++++ src_analysis/TurbSpectra_heffte.H | 53 + src_analysis/TurbSpectra_heffte.cpp | 749 +++++++++ src_analysis/TurbSpectra_single.H | 68 + src_analysis/TurbSpectra_single.cpp | 1043 +++++++++++++ src_compressible_stag/main_driver.cpp | 10 - 11 files changed, 2469 insertions(+), 1921 deletions(-) create mode 100644 src_analysis/TurbSpectra_distributed.H create mode 100644 src_analysis/TurbSpectra_distributed.cpp create mode 100644 src_analysis/TurbSpectra_heffte.H create mode 100644 src_analysis/TurbSpectra_heffte.cpp create mode 100644 src_analysis/TurbSpectra_single.H create mode 100644 src_analysis/TurbSpectra_single.cpp diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 1a5db02f5..b2d2a70f0 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -19,6 +19,7 @@ DO_TURB = FALSE USE_HEFFTE_FFTW = FALSE USE_HEFFTE_CUFFT = FALSE USE_HEFFTE_ROCFFT = FALSE +USE_DISTRIBUTED_FFT = TRUE ifeq ($(USE_HEFFTE_FFTW),TRUE) HEFFTE_HOME ?= ../../../heffte/ diff --git a/src_analysis/Make.package b/src_analysis/Make.package index 98d27de23..5607d4de9 100644 --- a/src_analysis/Make.package +++ b/src_analysis/Make.package @@ -1,5 +1,12 @@ +CEXE_headers += StructFact.H CEXE_sources += StructFact.cpp -CEXE_sources += TurbSpectra.cpp -CEXE_headers += StructFact.H CEXE_headers += TurbSpectra.H +CEXE_headers += TurbSpectra_distributed.H +CEXE_sources += TurbSpectra_distributed.cpp + +#CEXE_sources += TurbSpectra_single.cpp +#CEXE_sources += TurbSpectra_heffte.cpp + +#CEXE_headers += TurbSpectra_single.H +#CEXE_headers += TurbSpectra_heffte.H diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H index 6761e5d19..c12214552 100644 --- a/src_analysis/TurbSpectra.H +++ b/src_analysis/TurbSpectra.H @@ -1,109 +1,12 @@ #ifndef _TurbSpectra_H_ #define _TurbSpectra_H_ -// HEFFTE -#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) -#include -#endif - -#include -#include -#include -#include - -// non-HEFFTE -#ifdef AMREX_USE_CUDA -#include -#elif AMREX_USE_HIP -# if __has_include() // ROCm 5.3+ -# include -# else -# include -# endif -#else -#include -#include -#endif - -#include - -#include - -#include "common_functions.H" - -#define ALIGN 16 - -using namespace amrex; - -#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) -#ifdef AMREX_USE_CUDA -std::string cufftError (const cufftResult& err); -#endif -#ifdef AMREX_USE_HIP -std::string rocfftError (const rocfft_status err); -void Assert_rocfft_status (std::string const& name, rocfft_status status); -#endif -#endif - -#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) -void IntegrateKScalarHeffte(const MultiFab& cov_mag, - const std::string& name, - const int& step, - const int& comp); -//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, -// const std::string& name, const Real& scaling, -// const Box& c_local_box, -// const Real& sqrtnpts, -// const int& step); -void IntegrateKVelocityHeffte(const MultiFab& cov_mag, - const std::string& name, - const int& step, - const int& comp); -//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, -// const BaseFab >& spectral_fieldy, -// const BaseFab >& spectral_fieldz, -// const std::string& name, const Real& scaling, -// const Box& c_local_box, -// const int& step); -void TurbSpectrumScalarHeffte(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& var_scaling, - const amrex::Vector< std::string >& var_names); -void TurbSpectrumVelDecompHeffte(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& var_scaling, - const amrex::Vector< std::string >& var_names); -#endif - -#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) -void IntegrateKScalar(const Vector > > >& spectral_field, - const MultiFab& variables_onegrid, - const std::string& name, - const Real& scaling, - const Real& sqrtnpts, - const int& step); -void IntegrateKVelocity(const Vector > > >& spectral_fieldx, - const Vector > > >& spectral_fieldy, - const Vector > > >& spectral_fieldz, - const MultiFab& vel_onegrid, - const std::string& name, - const Real& scaling, - const int& step); -void TurbSpectrumScalar(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& var_scaling, - const amrex::Vector< std::string >& var_names); -void TurbSpectrumVelDecomp(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& var_scaling, - const amrex::Vector< std::string >& var_names); -void InverseFFTVel(Vector > > >& spectral_field, - MultiFab& vel_decomp_onegrid, const IntVect& fft_size); -#endif +#include +//#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // use heFFTe +//#include +//#elif defined(USE_DISTRIBUTED_FFT) // use single grid FFT +//#include +//#else // use single grid FFT +//#include +//#endif #endif diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp index 532b85954..e69de29bb 100644 --- a/src_analysis/TurbSpectra.cpp +++ b/src_analysis/TurbSpectra.cpp @@ -1,1804 +0,0 @@ -#include "TurbSpectra.H" -#include "common_functions.H" - -#include -#include "AMReX_PlotFileUtil.H" -#include "AMReX_BoxArray.H" - -#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) -#ifdef AMREX_USE_CUDA -std::string cufftError (const cufftResult& err) -{ - switch (err) { - case CUFFT_SUCCESS: return "CUFFT_SUCCESS"; - case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN"; - case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED"; - case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE"; - case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE"; - case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR"; - case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED"; - case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED"; - case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE"; - case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA"; - default: return std::to_string(err) + " (unknown error code)"; - } -} -#endif - -#ifdef AMREX_USE_HIP -std::string rocfftError (const rocfft_status err) -{ - if (err == rocfft_status_success) { - return std::string("rocfft_status_success"); - } else if (err == rocfft_status_failure) { - return std::string("rocfft_status_failure"); - } else if (err == rocfft_status_invalid_arg_value) { - return std::string("rocfft_status_invalid_arg_value"); - } else if (err == rocfft_status_invalid_dimensions) { - return std::string("rocfft_status_invalid_dimensions"); - } else if (err == rocfft_status_invalid_array_type) { - return std::string("rocfft_status_invalid_array_type"); - } else if (err == rocfft_status_invalid_strides) { - return std::string("rocfft_status_invalid_strides"); - } else if (err == rocfft_status_invalid_distance) { - return std::string("rocfft_status_invalid_distance"); - } else if (err == rocfft_status_invalid_offset) { - return std::string("rocfft_status_invalid_offset"); - } else { - return std::to_string(err) + " (unknown error code)"; - } -} - -void Assert_rocfft_status (std::string const& name, rocfft_status status) -{ - if (status != rocfft_status_success) { - amrex::AllPrint() << name + " failed! Error: " + rocfftError(status) << "\n";; - } -} -#endif -#endif - -#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // heffte -void TurbSpectrumScalarHeffte(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), - "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), - "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, - "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); - - int ncomp = variables.nComp(); - - long npts; - Box domain = geom.Domain(); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - Real sqrtnpts = std::sqrt(npts); - - // get box array and distribution map of variables - DistributionMapping dm = variables.DistributionMap(); - BoxArray ba = variables.boxArray(); - - // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid - Box local_box; - int local_boxid; - { - for (int i = 0; i < ba.size(); ++i) { - Box b = ba[i]; - // each MPI rank has its own local_box Box and local_boxid ID - if (ParallelDescriptor::MyProc() == dm[i]) { - local_box = b; - local_boxid = i; - } - } - } - - // now each MPI rank works on its own box - // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset - - // start by coarsening each box by 2 in the x-direction - Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); - - // if the coarsened box's high-x index is even, we shrink the size in 1 in x - // this avoids overlap between coarsened boxes - if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { - c_local_box.setBig(0,c_local_box.bigEnd(0)-1); - } - // for any boxes that touch the hi-x domain we - // increase the size of boxes by 1 in x - // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) - if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { - c_local_box.growHi(0,1); - } - - // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB - // create a BoxArray containing the fft boxes - // by construction, these boxes correlate to the associated spectral_data - // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank - BoxArray fft_ba; - { - BoxList bl; - bl.reserve(ba.size()); - - for (int i = 0; i < ba.size(); ++i) { - Box b = ba[i]; - - Box r_box = b; - Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); - - // this avoids overlap for the cases when one or more r_box's - // have an even cell index in the hi-x cell - if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { - c_box.setBig(0,c_box.bigEnd(0)-1); - } - - // increase the size of boxes touching the hi-x domain by 1 in x - // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing - if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { - c_box.growHi(0,1); - } - bl.push_back(c_box); - - } - fft_ba.define(std::move(bl)); - } - MultiFab cov(fft_ba, dm, ncomp, 0); - - // each MPI rank gets storage for its piece of the fft - BaseFab > spectral_field(c_local_box, 1, The_Device_Arena()); - MultiFab variables_single(ba, dm, 1, 0); - using heffte_complex = typename heffte::fft_output::type; - - int r2c_direction = 0; - for (int comp=0; comp fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); - variables_single.ParallelCopy(variables,comp,0,1); - fft.forward(variables_single[local_boxid].dataPtr(),spectral_data); - Gpu::streamSynchronize(); - - // Fill in the covariance multifab - int comp_gpu = comp; - Real sqrtnpts_gpu = sqrtnpts; - Real scaling_i_gpu = scaling[comp]; - std::string name_gpu = var_names[comp]; - for (MFIter mfi(cov); mfi.isValid(); ++mfi) { - Array4 const& data = cov.array(mfi); - Array4 > spectral = spectral_field.const_array(); - const Box& bx = mfi.fabbox(); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - Real re = spectral(i,j,k).real(); - Real im = spectral(i,j,k).imag(); - data(i,j,k,comp_gpu) = (re*re + im*im)/(sqrtnpts_gpu*sqrtnpts_gpu*scaling_i_gpu); - }); - } - - // Integrate spectra over k-shells - IntegrateKScalarHeffte(cov,name_gpu,step,comp_gpu); - } -} -#endif - -#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) -void TurbSpectrumScalar(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); - int ncomp = variables.nComp(); - - long npts; - - // Initialize the boxarray "ba_onegrid" from the single box "domain" - BoxArray ba_onegrid; - { - Box domain = geom.Domain(); - ba_onegrid.define(domain); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - } - Real sqrtnpts = std::sqrt(npts); - DistributionMapping dmap_onegrid(ba_onegrid); - MultiFab variables_onegrid; - variables_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - -#ifdef AMREX_USE_CUDA - using FFTplan = cufftHandle; - using FFTcomplex = cuDoubleComplex; -#elif AMREX_USE_HIP - using FFTplan = rocfft_plan; - using FFTcomplex = double2; -#else - using FFTplan = fftw_plan; - using FFTcomplex = fftw_complex; -#endif - - // size of box including ghost cell range - IntVect fft_size; - - // contain to store FFT - note it is shrunk by "half" in x - Vector > > > spectral_field; - Vector forward_plan; - bool built_plan = false; - - // for CUDA builds we only need to build the plan once; track whether we did - for (int comp=0; comp >(spectral_bx,1, - The_Device_Arena())); - spectral_field.back()->setVal(0.0); // touch the memory - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - - built_plan = true; - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &variables_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } -} -#endif // end not-heFFTE - -#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // heffte -void TurbSpectrumVelDecompHeffte(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, - "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, - "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, - "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); - - const GpuArray dx = geom.CellSizeArray(); - - long npts; - Box domain = geom.Domain(); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - Real sqrtnpts = std::sqrt(npts); - - // get box array and distribution map of vel - DistributionMapping dm = vel.DistributionMap(); - BoxArray ba = vel.boxArray(); - - // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid - Box local_box; - int local_boxid; - { - for (int i = 0; i < ba.size(); ++i) { - Box b = ba[i]; - // each MPI rank has its own local_box Box and local_boxid ID - if (ParallelDescriptor::MyProc() == dm[i]) { - local_box = b; - local_boxid = i; - } - } - } - - // now each MPI rank works on its own box - // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset - - // start by coarsening each box by 2 in the x-direction - Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); - - // if the coarsened box's high-x index is even, we shrink the size in 1 in x - // this avoids overlap between coarsened boxes - if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { - c_local_box.setBig(0,c_local_box.bigEnd(0)-1); - } - // for any boxes that touch the hi-x domain we - // increase the size of boxes by 1 in x - // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) - if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { - c_local_box.growHi(0,1); - } - - // each MPI rank gets storage for its piece of the fft - BaseFab > spectral_field_Tx(c_local_box, 1, The_Device_Arena()); // totalx - BaseFab > spectral_field_Ty(c_local_box, 1, The_Device_Arena()); // totaly - BaseFab > spectral_field_Tz(c_local_box, 1, The_Device_Arena()); // totalz - BaseFab > spectral_field_Sx(c_local_box, 1, The_Device_Arena()); // solenoidalx - BaseFab > spectral_field_Sy(c_local_box, 1, The_Device_Arena()); // solenoidaly - BaseFab > spectral_field_Sz(c_local_box, 1, The_Device_Arena()); // solenoidalz - BaseFab > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx - BaseFab > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly - BaseFab > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz - MultiFab vel_single(ba, dm, 1, 0); - - int r2c_direction = 0; - - // ForwardTransform - // X - using heffte_complex = typename heffte::fft_output::type; - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - vel_single.ParallelCopy(vel, 0, 0, 1); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - } - // Y - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - vel_single.ParallelCopy(vel, 1, 0, 1); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - } - // Z - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - vel_single.ParallelCopy(vel, 2, 0, 1); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - } - - Gpu::streamSynchronize(); - - // Decompose velocity field into solenoidal and dilatational - Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(); - Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(); - Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(); - Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(); - Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(); - Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(); - Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(); - Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(); - Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(); - ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) - { - - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - - Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; - - if (i <= nx/2) { - - // Get the wavevector - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - // Gradient Operators - GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; - } - else { // conjugate - amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); - } - - // Scale Total velocity FFT components - spectral_tx(i,j,k) *= (1.0/sqrtnpts); - spectral_ty(i,j,k) *= (1.0/sqrtnpts); - spectral_tz(i,j,k) *= (1.0/sqrtnpts); - - // Inverse Laplacian - Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; - - // Divergence of vel - Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + - spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + - spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; - Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + - spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + - spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; - - if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k) *= 0.0; - spectral_dy(i,j,k) *= 0.0; - spectral_dz(i,j,k) *= 0.0; - } - else { - - // Dilatational velocity - GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, - (divC*GxR - divR*GxC) / Lap); - spectral_dx(i,j,k) = copy_dx; - - GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, - (divC*GyR - divR*GyC) / Lap); - spectral_dy(i,j,k) = copy_dy; - - GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, - (divC*GzR - divR*GzC) / Lap); - spectral_dz(i,j,k) = copy_dz; - } - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); - - }); - - Gpu::streamSynchronize(); - - // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB - // create a BoxArray containing the fft boxes - // by construction, these boxes correlate to the associated spectral_data - // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank - BoxArray fft_ba; - { - BoxList bl; - bl.reserve(ba.size()); - - for (int i = 0; i < ba.size(); ++i) { - Box b = ba[i]; - - Box r_box = b; - Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); - - // this avoids overlap for the cases when one or more r_box's - // have an even cell index in the hi-x cell - if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { - c_box.setBig(0,c_box.bigEnd(0)-1); - } - - // increase the size of boxes touching the hi-x domain by 1 in x - // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing - if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { - c_box.growHi(0,1); - } - bl.push_back(c_box); - - } - fft_ba.define(std::move(bl)); - } - MultiFab cov(fft_ba, dm, 3, 0); // total, solenoidal, dilatational - - // Fill in the covariance multifab - Real sqrtnpts_gpu = sqrtnpts; - Real scaling_gpu = scaling; - for (MFIter mfi(cov); mfi.isValid(); ++mfi) { - Array4 const& data = cov.array(mfi); - Array4 > spec_tx = spectral_field_Tx.const_array(); - Array4 > spec_ty = spectral_field_Ty.const_array(); - Array4 > spec_tz = spectral_field_Tz.const_array(); - Array4 > spec_sx = spectral_field_Sx.const_array(); - Array4 > spec_sy = spectral_field_Sy.const_array(); - Array4 > spec_sz = spectral_field_Sz.const_array(); - Array4 > spec_dx = spectral_field_Dx.const_array(); - Array4 > spec_dy = spectral_field_Dy.const_array(); - Array4 > spec_dz = spectral_field_Dz.const_array(); - const Box& bx = mfi.fabbox(); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - Real re_x, re_y, re_z, im_x, im_y, im_z; - - re_x = spec_tx(i,j,k).real(); - im_x = spec_tx(i,j,k).imag(); - re_y = spec_ty(i,j,k).real(); - im_y = spec_ty(i,j,k).imag(); - re_z = spec_tz(i,j,k).real(); - im_z = spec_tz(i,j,k).imag(); - data(i,j,k,0) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); - re_x = spec_sx(i,j,k).real(); - im_x = spec_sx(i,j,k).imag(); - re_y = spec_sy(i,j,k).real(); - im_y = spec_sy(i,j,k).imag(); - re_z = spec_sz(i,j,k).real(); - im_z = spec_sz(i,j,k).imag(); - data(i,j,k,1) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); - re_x = spec_dx(i,j,k).real(); - im_x = spec_dx(i,j,k).imag(); - re_y = spec_dy(i,j,k).real(); - im_y = spec_dy(i,j,k).imag(); - re_z = spec_dz(i,j,k).real(); - im_z = spec_dz(i,j,k).imag(); - data(i,j,k,2) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); - }); - } - - // Integrate K spectrum for velocities - IntegrateKVelocityHeffte(cov,"vel_total" ,step,0); - IntegrateKVelocityHeffte(cov,"vel_solenoidal",step,1); - IntegrateKVelocityHeffte(cov,"vel_dilational",step,2); - - MultiFab vel_decomp_single(ba, dm, 1, 0); - // inverse Fourier transform solenoidal and dilatational components - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); - } - - - vel_decomp.mult(1.0/sqrtnpts); - -} -#endif - -#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) -void TurbSpectrumVelDecomp(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, - "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, - "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); - const GpuArray dx = geom.CellSizeArray(); - - long npts; - - // Initialize the boxarray "ba_onegrid" from the single box "domain" - BoxArray ba_onegrid; - { - Box domain = geom.Domain(); - ba_onegrid.define(domain); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - } - Real sqrtnpts = std::sqrt(npts); - DistributionMapping dmap_onegrid(ba_onegrid); - MultiFab vel_onegrid; - vel_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - -#ifdef AMREX_USE_CUDA - using FFTplan = cufftHandle; - using FFTcomplex = cuDoubleComplex; -#elif AMREX_USE_HIP - using FFTplan = rocfft_plan; - using FFTcomplex = double2; -#else - using FFTplan = fftw_plan; - using FFTcomplex = fftw_complex; -#endif - - // size of box including ghost cell range - IntVect fft_size; - - // contain to store FFT - note it is shrunk by "half" in x - Vector > > > spectral_fieldx; - Vector > > > spectral_fieldy; - Vector > > > spectral_fieldz; - Vector > > > spectral_field_Sx; - Vector > > > spectral_field_Sy; - Vector > > > spectral_field_Sz; - Vector > > > spectral_field_Dx; - Vector > > > spectral_field_Dy; - Vector > > > spectral_field_Dz; - - // x-velocity - { - Vector forward_plan; - vel_onegrid.ParallelCopy(vel,0,0,1); - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); - - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT - - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; - - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - - spectral_fieldx.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_fieldx.back()->setVal(0.0); // touch the memory - - spectral_field_Sx.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Sx.back()->setVal(0.0); // touch the memory - - spectral_field_Dx.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Dx.back()->setVal(0.0); // touch the memory - - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldx.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldx[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldx[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &vel_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } - - } // end x-vel - - // y-velocity - { - Vector forward_plan; - vel_onegrid.ParallelCopy(vel,1,0,1); - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); - - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT - - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; - - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - - spectral_fieldy.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_fieldy.back()->setVal(0.0); // touch the memory - - spectral_field_Sy.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Sy.back()->setVal(0.0); // touch the memory - - spectral_field_Dy.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Dy.back()->setVal(0.0); // touch the memory - - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldy.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldy[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldy[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &vel_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } - - } // end y-vel - - // z-velocity - { - Vector forward_plan; - vel_onegrid.ParallelCopy(vel,2,0,1); - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); - - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT - - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; - - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - - spectral_fieldz.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_fieldz.back()->setVal(0.0); // touch the memory - - spectral_field_Sz.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Sz.back()->setVal(0.0); // touch the memory - - spectral_field_Dz.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Dz.back()->setVal(0.0); // touch the memory - - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldz.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldz[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldz[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &vel_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } - - } // end x-vel - - - // Decompose velocity field into solenoidal and dilatational - for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.tilebox(); - Array4< GpuComplex > spectral_tx = (*spectral_fieldx[0]) .array(); - Array4< GpuComplex > spectral_ty = (*spectral_fieldy[0]) .array(); - Array4< GpuComplex > spectral_tz = (*spectral_fieldz[0]) .array(); - Array4< GpuComplex > spectral_sx = (*spectral_field_Sx[0]).array(); - Array4< GpuComplex > spectral_sy = (*spectral_field_Sy[0]).array(); - Array4< GpuComplex > spectral_sz = (*spectral_field_Sz[0]).array(); - Array4< GpuComplex > spectral_dx = (*spectral_field_Dx[0]).array(); - Array4< GpuComplex > spectral_dy = (*spectral_field_Dy[0]).array(); - Array4< GpuComplex > spectral_dz = (*spectral_field_Dz[0]).array(); - - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - - Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; - - if (i <= nx/2) { - - // Get the wavevector - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - // Gradient Operators - GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; - - // Scale Total velocity FFT components - spectral_tx(i,j,k) *= (1.0/sqrtnpts); - spectral_ty(i,j,k) *= (1.0/sqrtnpts); - spectral_tz(i,j,k) *= (1.0/sqrtnpts); - - // Inverse Laplacian - Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; - - // Divergence of vel - Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + - spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + - spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; - Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + - spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + - spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; - - if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k) *= 0.0; - spectral_dy(i,j,k) *= 0.0; - spectral_dz(i,j,k) *= 0.0; - } - else { - // Dilatational velocity - GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, - (divC*GxR - divR*GxC) / Lap); - spectral_dx(i,j,k) = copy_dx; - - GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, - (divC*GyR - divR*GyC) / Lap); - spectral_dy(i,j,k) = copy_dy; - - GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, - (divC*GzR - divR*GzC) / Lap); - spectral_dz(i,j,k) = copy_dz; - } - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); - } - }); - } - - ParallelDescriptor::Barrier(); - - // Integrate K spectrum for velocities - IntegrateKVelocity(spectral_fieldx, spectral_fieldy, spectral_fieldz, vel_onegrid, "vel_total" ,scaling,step); - IntegrateKVelocity(spectral_field_Sx, spectral_field_Sy, spectral_field_Sz, vel_onegrid, "vel_solenoidal",scaling,step); - IntegrateKVelocity(spectral_field_Dx, spectral_field_Dy, spectral_field_Dz, vel_onegrid, "vel_dilatational",scaling,step); - - - // Inverse Solenoidal and Dilatational Velocity Components - { // solenoidal x - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Sx, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,0,1); - } - { // solenoidal y - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Sy, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,1,1); - } - { // solenoidal z - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Sz, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,2,1); - } - { // dilatational x - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Dx, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,3,1); - } - { // dilatational y - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Dy, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,4,1); - } - { // dilatational z - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Dz, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,5,1); - } - vel_decomp.mult(1.0/sqrtnpts); -} -#endif // end heFFTe - -#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) -void IntegrateKScalarHeffte(const MultiFab& cov_mag, - const std::string& name, - const int& step, - const int& comp) - -{ - int npts = n_cells[0]/2; - - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); -// Gpu::HostVector phisum_host(npts); -// Gpu::HostVector phicnt_host(npts); - - Gpu::HostVector phisum_host(npts); - - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); -// for (int d=0; d & cov = cov_mag.const_array(mfi); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); - amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); - } - }); - } - - Gpu::streamSynchronize(); - - ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); - - Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - if (d != 0) { - phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; - } - }); - - Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); - Gpu::streamSynchronize(); - - if (ParallelDescriptor::IOProcessor()) { - std::ofstream turb; - std::string turbBaseName = "turb_"+name; - std::string turbName = Concatenate(turbBaseName,step,7); - turbName += ".txt"; - - turb.open(turbName); - for (int d=1; d > > >& spectral_field, - const MultiFab& variables_onegrid, - const std::string& name, - const Real& scaling, - const Real& sqrtnpts, - const int& step) - -{ - int npts = n_cells[0]/2; - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - - Gpu::HostVector phisum_host(npts); - - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - // Integrate spectra over k-shells - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.fabbox(); - - const Array4 > spectral = (*spectral_field[0]).const_array(); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - if (i <= bx.length(0)/2) { // only half of kx-domain - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real = spectral(i,j,k).real(); - Real imag = spectral(i,j,k).imag(); - Real cov = (1.0/(scaling*sqrtnpts*sqrtnpts))*(real*real + imag*imag); - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); - } - } - }); - } - - for (int d=1; d phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - - Gpu::HostVector phisum_host(npts); - - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - - int comp_gpu = comp; - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.tilebox(); - - const Array4 & cov = cov_mag.const_array(mfi); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); - amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); - } - }); - } - - Gpu::streamSynchronize(); - - ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); - - Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - if (d != 0) { - phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; - } - }); - - Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); - Gpu::streamSynchronize(); - - if (ParallelDescriptor::IOProcessor()) { - std::ofstream turb; - std::string turbBaseName = "turb_"+name; - std::string turbName = Concatenate(turbBaseName,step,7); - turbName += ".txt"; - - turb.open(turbName); - for (int d=1; d > > >& spectral_fieldx, - const Vector > > >& spectral_fieldy, - const Vector > > >& spectral_fieldz, - const MultiFab& vel_onegrid, - const std::string& name, - const Real& scaling, - const int& step) -{ - int npts = n_cells[0]/2; - - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - Gpu::HostVector phisum_host(npts); - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - // Integrate spectra over k-shells - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.fabbox(); - - const Array4 > spectralx = (*spectral_fieldx[0]).const_array(); - const Array4 > spectraly = (*spectral_fieldy[0]).const_array(); - const Array4 > spectralz = (*spectral_fieldz[0]).const_array(); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - if (i <= bx.length(0)/2) { // only half of kx-domain - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real, imag, cov_x, cov_y, cov_z, cov; - real = spectralx(i,j,k).real(); - imag = spectralx(i,j,k).imag(); - cov_x = (1.0/scaling)*(real*real + imag*imag); - real = spectraly(i,j,k).real(); - imag = spectraly(i,j,k).imag(); - cov_y = (1.0/scaling)*(real*real + imag*imag); - real = spectralz(i,j,k).real(); - imag = spectralz(i,j,k).imag(); - cov_z = (1.0/scaling)*(real*real + imag*imag); - cov = cov_x + cov_y + cov_z; - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); - } - } - }); - } - - for (int d=1; d > > >& spectral_field, - MultiFab& vel_decomp_onegrid, const IntVect& fft_size) -{ - -#ifdef AMREX_USE_CUDA - using FFTplan = cufftHandle; - using FFTcomplex = cuDoubleComplex; -#elif AMREX_USE_HIP - using FFTplan = rocfft_plan; - using FFTcomplex = double2; -#else - using FFTplan = fftw_plan; - using FFTcomplex = fftw_complex; -#endif - - Vector backward_plan; - - for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { - FFTplan fplan; -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_inverse, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], - reinterpret_cast - (spectral_field.back()->dataPtr()), - vel_decomp_onegrid[mfi].dataPtr(), - FFTW_ESTIMATE); -#endif - backward_plan.push_back(fplan); - } - - ParallelDescriptor::Barrier(); - - // Backward Transform - for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecZ2D(backward_plan[i], - reinterpret_cast - (spectral_field[i]->dataPtr()), - vel_decomp_onegrid[mfi].dataPtr()); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); - result = rocfft_execute(backward_plan[i], - (void**) &vel_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(backward_plan[i]); -#endif - } - - // destroy fft plan - for (int i = 0; i < backward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(backward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(backward_plan[i]); -#else - fftw_destroy_plan(backward_plan[i]); -#endif - } - -} -#endif diff --git a/src_analysis/TurbSpectra_distributed.H b/src_analysis/TurbSpectra_distributed.H new file mode 100644 index 000000000..419756555 --- /dev/null +++ b/src_analysis/TurbSpectra_distributed.H @@ -0,0 +1,52 @@ +#ifndef _TurbSpectraDistributed_H_ +#define _TurbSpectraDistributed_H_ + +#include +#include +#include +#include + +#include + +#include + +#include "common_functions.H" + +#define ALIGN 16 + +using namespace amrex; + + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const Real& sqrtnpts, +// const int& step); +void IntegrateKVelocity(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, +// const BaseFab >& spectral_fieldy, +// const BaseFab >& spectral_fieldz, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const int& step); +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); + + +#endif diff --git a/src_analysis/TurbSpectra_distributed.cpp b/src_analysis/TurbSpectra_distributed.cpp new file mode 100644 index 000000000..389daca72 --- /dev/null +++ b/src_analysis/TurbSpectra_distributed.cpp @@ -0,0 +1,486 @@ +#include + +#include "TurbSpectra.H" +#include "common_functions.H" + +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), + "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), + "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, + "TurbSpectrumScalar: Must have one Box per MPI process"); + + int ncomp = variables.nComp(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of variables + DistributionMapping dm = variables.DistributionMap(); + BoxArray ba = variables.boxArray(); + + MultiFab cov(ba, dm, ncomp, 0); + MultiFab mf; + mf.define(ba, dm, 1, 0);; + + for (int comp=0; comp r2c(geom.Domain()); + r2c.forward(mf,cmf); + } + + // Fill in the covariance multifab + int comp_gpu = comp; + Real sqrtnpts_gpu = sqrtnpts; + Real scaling_i_gpu = scaling[comp]; + std::string name_gpu = var_names[comp]; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spectral = cmf.const_array(mfi); + const Box& bx = mfi.validbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re = spectral(i,j,k).real(); + Real im = spectral(i,j,k).imag(); + data(i,j,k,comp_gpu) = (re*re + im*im)/(sqrtnpts_gpu*sqrtnpts_gpu*scaling_i_gpu); + }); + } + + // Integrate spectra over k-shells + IntegrateKScalar(cov,name_gpu,step,comp_gpu); + } +} + + +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, + "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, + "TurbSpectrumVelDecomp: Must have one Box per MPI process"); + + const GpuArray dx = geom.CellSizeArray(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = vel.DistributionMap(); + BoxArray ba = vel.boxArray(); + + // each MPI rank gets storage for its piece of the fft + cMultiFab spectral_field_Tx(ba,dm,1,0); // totalx + cMultiFab spectral_field_Ty(ba,dm,1,0); // totaly + cMultiFab spectral_field_Tz(ba,dm,1,0); // totalz + cMultiFab spectral_field_Sx(ba,dm,1,0); // solenoidalx + cMultiFab spectral_field_Sy(ba,dm,1,0); // solenoidaly + cMultiFab spectral_field_Sz(ba,dm,1,0); // solenoidalz + cMultiFab spectral_field_Dx(ba,dm,1,0); // dilatationalx + cMultiFab spectral_field_Dy(ba,dm,1,0); // dilatationaly + cMultiFab spectral_field_Dz(ba,dm,1,0); // dilatationalz + + MultiFab vel_single(ba, dm, 1, 0); + + int r2c_direction = 0; + + // ForwardTransform + // X + { + vel_single.ParallelCopy(vel, 0, 0, 1); + amrex::FFT::R2C r2c(geom.Domain()); + r2c.forward(mf,spectral_field_Tx); + } + // Y + { + vel_single.ParallelCopy(vel, 1, 0, 1); + amrex::FFT::R2C r2c(geom.Domain()); + r2c.forward(mf,spectral_field_Ty); + } + // Z + { + vel_single.ParallelCopy(vel, 2, 0, 1); + amrex::FFT::R2C r2c(geom.Domain()); + r2c.forward(mf,spectral_field_Tz); + } + + // Decompose velocity field into solenoidal and dilatational + for (MFIter mfi(spectral_field_Tx); mfi.isValid(); ++mfi) { + Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(mfi); + Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(mfi); + Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(mfi); + Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(mfi); + Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(mfi); + Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(mfi); + Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(mfi); + Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(mfi); + Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(mfi); + const Box& bx = mfi.validbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; + + if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + // Gradient Operators + GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Scale Total velocity FFT components + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + + }); + } + + MultiFab cov(ba, dm, 3, 0); // total, solenoidal, dilatational + + // Fill in the covariance multifab + Real sqrtnpts_gpu = sqrtnpts; + Real scaling_gpu = scaling; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spec_tx = spectral_field_Tx.const_array(mfi); + Array4 > spec_ty = spectral_field_Ty.const_array(mfi); + Array4 > spec_tz = spectral_field_Tz.const_array(mfi); + Array4 > spec_sx = spectral_field_Sx.const_array(mfi); + Array4 > spec_sy = spectral_field_Sy.const_array(mfi); + Array4 > spec_sz = spectral_field_Sz.const_array(mfi); + Array4 > spec_dx = spectral_field_Dx.const_array(mfi); + Array4 > spec_dy = spectral_field_Dy.const_array(mfi); + Array4 > spec_dz = spectral_field_Dz.const_array(mfi); + const Box& bx = mfi.validbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re_x, re_y, re_z, im_x, im_y, im_z; + + re_x = spec_tx(i,j,k).real(); + im_x = spec_tx(i,j,k).imag(); + re_y = spec_ty(i,j,k).real(); + im_y = spec_ty(i,j,k).imag(); + re_z = spec_tz(i,j,k).real(); + im_z = spec_tz(i,j,k).imag(); + data(i,j,k,0) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_sx(i,j,k).real(); + im_x = spec_sx(i,j,k).imag(); + re_y = spec_sy(i,j,k).real(); + im_y = spec_sy(i,j,k).imag(); + re_z = spec_sz(i,j,k).real(); + im_z = spec_sz(i,j,k).imag(); + data(i,j,k,1) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_dx(i,j,k).real(); + im_x = spec_dx(i,j,k).imag(); + re_y = spec_dy(i,j,k).real(); + im_y = spec_dy(i,j,k).imag(); + re_z = spec_dz(i,j,k).real(); + im_z = spec_dz(i,j,k).imag(); + data(i,j,k,2) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + }); + } + + // Integrate K spectrum for velocities + IntegrateKVelocity(cov,"vel_total" ,step,0); + IntegrateKVelocity(cov,"vel_solenoidal",step,1); + IntegrateKVelocity(cov,"vel_dilational",step,2); + + // inverse Fourier transform solenoidal and dilatational components + { + amrex::FFT::R2C r2c(geom.Domain()); + MultiFab vel_decomp_single(ba, dm, 1, 0); + r2c.backward(spectral_field_Sx,vel_decomp_single); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); + } + { + amrex::FFT::R2C r2c(geom.Domain()); + MultiFab vel_decomp_single(ba, dm, 1, 0); + r2c.backward(spectral_field_Sy,vel_decomp_single); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); + } + { + amrex::FFT::R2C r2c(geom.Domain()); + MultiFab vel_decomp_single(ba, dm, 1, 0); + r2c.backward(spectral_field_Sz,vel_decomp_single); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); + } + { + amrex::FFT::R2C r2c(geom.Domain()); + MultiFab vel_decomp_single(ba, dm, 1, 0); + r2c.backward(spectral_field_Dx,vel_decomp_single); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); + } + { + amrex::FFT::R2C r2c(geom.Domain()); + MultiFab vel_decomp_single(ba, dm, 1, 0); + r2c.backward(spectral_field_Dy,vel_decomp_single); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); + } + { + amrex::FFT::R2C r2c(geom.Domain()); + MultiFab vel_decomp_single(ba, dm, 1, 0); + r2c.backward(spectral_field_Dz,vel_decomp_single); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); + } + + vel_decomp.mult(1.0/sqrtnpts); + +} + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp) + +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); +// Gpu::HostVector phisum_host(npts); +// Gpu::HostVector phicnt_host(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); +// for (int d=0; d & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + }); + } + + Gpu::streamSynchronize(); + + ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + + Real dk = 1.; + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + if (d != 0) { + phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; + } + }); + + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::streamSynchronize(); + + if (ParallelDescriptor::IOProcessor()) { + std::ofstream turb; + std::string turbBaseName = "turb_"+name; + std::string turbName = Concatenate(turbBaseName,step,7); + turbName += ".txt"; + + turb.open(turbName); + for (int d=1; d phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + int comp_gpu = comp; + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4 & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + }); + } + + Gpu::streamSynchronize(); + + ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + + Real dk = 1.; + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + if (d != 0) { + phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; + } + }); + + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::streamSynchronize(); + + if (ParallelDescriptor::IOProcessor()) { + std::ofstream turb; + std::string turbBaseName = "turb_"+name; + std::string turbName = Concatenate(turbBaseName,step,7); + turbName += ".txt"; + + turb.open(turbName); + for (int d=1; d + +#include +#include +#include +#include + +#include + +#include + +#include "common_functions.H" + +#define ALIGN 16 + +using namespace amrex; + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const Real& sqrtnpts, +// const int& step); +void IntegrateKVelocity(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, +// const BaseFab >& spectral_fieldy, +// const BaseFab >& spectral_fieldz, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const int& step); +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); + + +#endif diff --git a/src_analysis/TurbSpectra_heffte.cpp b/src_analysis/TurbSpectra_heffte.cpp new file mode 100644 index 000000000..90cc2615e --- /dev/null +++ b/src_analysis/TurbSpectra_heffte.cpp @@ -0,0 +1,749 @@ +#include "TurbSpectra.H" +#include "common_functions.H" + +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), + "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), + "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, + "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); + + int ncomp = variables.nComp(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of variables + DistributionMapping dm = variables.DistributionMap(); + BoxArray ba = variables.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB + // create a BoxArray containing the fft boxes + // by construction, these boxes correlate to the associated spectral_data + // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank + BoxArray fft_ba; + { + BoxList bl; + bl.reserve(ba.size()); + + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + + Box r_box = b; + Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); + + // this avoids overlap for the cases when one or more r_box's + // have an even cell index in the hi-x cell + if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { + c_box.setBig(0,c_box.bigEnd(0)-1); + } + + // increase the size of boxes touching the hi-x domain by 1 in x + // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing + if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_box.growHi(0,1); + } + bl.push_back(c_box); + + } + fft_ba.define(std::move(bl)); + } + MultiFab cov(fft_ba, dm, ncomp, 0); + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field(c_local_box, 1, The_Device_Arena()); + MultiFab variables_single(ba, dm, 1, 0); + using heffte_complex = typename heffte::fft_output::type; + + int r2c_direction = 0; + for (int comp=0; comp fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + variables_single.ParallelCopy(variables,comp,0,1); + fft.forward(variables_single[local_boxid].dataPtr(),spectral_data); + Gpu::streamSynchronize(); + + // Fill in the covariance multifab + int comp_gpu = comp; + Real sqrtnpts_gpu = sqrtnpts; + Real scaling_i_gpu = scaling[comp]; + std::string name_gpu = var_names[comp]; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spectral = spectral_field.const_array(); + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re = spectral(i,j,k).real(); + Real im = spectral(i,j,k).imag(); + data(i,j,k,comp_gpu) = (re*re + im*im)/(sqrtnpts_gpu*sqrtnpts_gpu*scaling_i_gpu); + }); + } + + // Integrate spectra over k-shells + IntegrateKScalar(cov,name_gpu,step,comp_gpu); + } +} + +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, + "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, + "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); + + const GpuArray dx = geom.CellSizeArray(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = vel.DistributionMap(); + BoxArray ba = vel.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field_Tx(c_local_box, 1, The_Device_Arena()); // totalx + BaseFab > spectral_field_Ty(c_local_box, 1, The_Device_Arena()); // totaly + BaseFab > spectral_field_Tz(c_local_box, 1, The_Device_Arena()); // totalz + BaseFab > spectral_field_Sx(c_local_box, 1, The_Device_Arena()); // solenoidalx + BaseFab > spectral_field_Sy(c_local_box, 1, The_Device_Arena()); // solenoidaly + BaseFab > spectral_field_Sz(c_local_box, 1, The_Device_Arena()); // solenoidalz + BaseFab > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx + BaseFab > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly + BaseFab > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz + MultiFab vel_single(ba, dm, 1, 0); + + int r2c_direction = 0; + + // ForwardTransform + // X + using heffte_complex = typename heffte::fft_output::type; + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 0, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + // Y + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 1, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + // Z + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 2, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + + Gpu::streamSynchronize(); + + // Decompose velocity field into solenoidal and dilatational + Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(); + Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(); + Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(); + Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(); + Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(); + Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(); + Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(); + Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(); + Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; + + if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + // Gradient Operators + GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Scale Total velocity FFT components + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + + }); + + Gpu::streamSynchronize(); + + // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB + // create a BoxArray containing the fft boxes + // by construction, these boxes correlate to the associated spectral_data + // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank + BoxArray fft_ba; + { + BoxList bl; + bl.reserve(ba.size()); + + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + + Box r_box = b; + Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); + + // this avoids overlap for the cases when one or more r_box's + // have an even cell index in the hi-x cell + if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { + c_box.setBig(0,c_box.bigEnd(0)-1); + } + + // increase the size of boxes touching the hi-x domain by 1 in x + // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing + if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_box.growHi(0,1); + } + bl.push_back(c_box); + + } + fft_ba.define(std::move(bl)); + } + MultiFab cov(fft_ba, dm, 3, 0); // total, solenoidal, dilatational + + // Fill in the covariance multifab + Real sqrtnpts_gpu = sqrtnpts; + Real scaling_gpu = scaling; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spec_tx = spectral_field_Tx.const_array(); + Array4 > spec_ty = spectral_field_Ty.const_array(); + Array4 > spec_tz = spectral_field_Tz.const_array(); + Array4 > spec_sx = spectral_field_Sx.const_array(); + Array4 > spec_sy = spectral_field_Sy.const_array(); + Array4 > spec_sz = spectral_field_Sz.const_array(); + Array4 > spec_dx = spectral_field_Dx.const_array(); + Array4 > spec_dy = spectral_field_Dy.const_array(); + Array4 > spec_dz = spectral_field_Dz.const_array(); + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re_x, re_y, re_z, im_x, im_y, im_z; + + re_x = spec_tx(i,j,k).real(); + im_x = spec_tx(i,j,k).imag(); + re_y = spec_ty(i,j,k).real(); + im_y = spec_ty(i,j,k).imag(); + re_z = spec_tz(i,j,k).real(); + im_z = spec_tz(i,j,k).imag(); + data(i,j,k,0) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_sx(i,j,k).real(); + im_x = spec_sx(i,j,k).imag(); + re_y = spec_sy(i,j,k).real(); + im_y = spec_sy(i,j,k).imag(); + re_z = spec_sz(i,j,k).real(); + im_z = spec_sz(i,j,k).imag(); + data(i,j,k,1) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_dx(i,j,k).real(); + im_x = spec_dx(i,j,k).imag(); + re_y = spec_dy(i,j,k).real(); + im_y = spec_dy(i,j,k).imag(); + re_z = spec_dz(i,j,k).real(); + im_z = spec_dz(i,j,k).imag(); + data(i,j,k,2) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + }); + } + + // Integrate K spectrum for velocities + IntegrateKVelocity(cov,"vel_total" ,step,0); + IntegrateKVelocity(cov,"vel_solenoidal",step,1); + IntegrateKVelocity(cov,"vel_dilational",step,2); + + MultiFab vel_decomp_single(ba, dm, 1, 0); + // inverse Fourier transform solenoidal and dilatational components + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); + } + + + vel_decomp.mult(1.0/sqrtnpts); + +} + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp) + +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); +// Gpu::HostVector phisum_host(npts); +// Gpu::HostVector phicnt_host(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); +// for (int d=0; d & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + }); + } + + Gpu::streamSynchronize(); + + ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + + Real dk = 1.; + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + if (d != 0) { + phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; + } + }); + + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::streamSynchronize(); + + if (ParallelDescriptor::IOProcessor()) { + std::ofstream turb; + std::string turbBaseName = "turb_"+name; + std::string turbName = Concatenate(turbBaseName,step,7); + turbName += ".txt"; + + turb.open(turbName); + for (int d=1; d phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + int comp_gpu = comp; + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4 & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + }); + } + + Gpu::streamSynchronize(); + + ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + + Real dk = 1.; + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + if (d != 0) { + phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; + } + }); + + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::streamSynchronize(); + + if (ParallelDescriptor::IOProcessor()) { + std::ofstream turb; + std::string turbBaseName = "turb_"+name; + std::string turbName = Concatenate(turbBaseName,step,7); + turbName += ".txt"; + + turb.open(turbName); + for (int d=1; d +#elif AMREX_USE_HIP +# if __has_include() // ROCm 5.3+ +# include +# else +# include +# endif +#else +#include +#include +#endif + +#include +#include +#include +#include + +#include + +#include + +#include "common_functions.H" + +#define ALIGN 16 + +using namespace amrex; + +#ifdef AMREX_USE_CUDA +std::string cufftError (const cufftResult& err); +#endif +#ifdef AMREX_USE_HIP +std::string rocfftError (const rocfft_status err); +void Assert_rocfft_status (std::string const& name, rocfft_status status); +#endif + +void IntegrateKScalar(const Vector > > >& spectral_field, + const MultiFab& variables_onegrid, + const std::string& name, + const Real& scaling, + const Real& sqrtnpts, + const int& step); +void IntegrateKVelocity(const Vector > > >& spectral_fieldx, + const Vector > > >& spectral_fieldy, + const Vector > > >& spectral_fieldz, + const MultiFab& vel_onegrid, + const std::string& name, + const Real& scaling, + const int& step); +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); +void InverseFFTVel(Vector > > >& spectral_field, + MultiFab& vel_decomp_onegrid, const IntVect& fft_size); + + +#endif diff --git a/src_analysis/TurbSpectra_single.cpp b/src_analysis/TurbSpectra_single.cpp new file mode 100644 index 000000000..b6bccc029 --- /dev/null +++ b/src_analysis/TurbSpectra_single.cpp @@ -0,0 +1,1043 @@ +#include "TurbSpectra.H" +#include "common_functions.H" + +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +#ifdef AMREX_USE_CUDA +std::string cufftError (const cufftResult& err) +{ + switch (err) { + case CUFFT_SUCCESS: return "CUFFT_SUCCESS"; + case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN"; + case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED"; + case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE"; + case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE"; + case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR"; + case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED"; + case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED"; + case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE"; + case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA"; + default: return std::to_string(err) + " (unknown error code)"; + } +} +#endif + +#ifdef AMREX_USE_HIP +std::string rocfftError (const rocfft_status err) +{ + if (err == rocfft_status_success) { + return std::string("rocfft_status_success"); + } else if (err == rocfft_status_failure) { + return std::string("rocfft_status_failure"); + } else if (err == rocfft_status_invalid_arg_value) { + return std::string("rocfft_status_invalid_arg_value"); + } else if (err == rocfft_status_invalid_dimensions) { + return std::string("rocfft_status_invalid_dimensions"); + } else if (err == rocfft_status_invalid_array_type) { + return std::string("rocfft_status_invalid_array_type"); + } else if (err == rocfft_status_invalid_strides) { + return std::string("rocfft_status_invalid_strides"); + } else if (err == rocfft_status_invalid_distance) { + return std::string("rocfft_status_invalid_distance"); + } else if (err == rocfft_status_invalid_offset) { + return std::string("rocfft_status_invalid_offset"); + } else { + return std::to_string(err) + " (unknown error code)"; + } +} + +void Assert_rocfft_status (std::string const& name, rocfft_status status) +{ + if (status != rocfft_status_success) { + amrex::AllPrint() << name + " failed! Error: " + rocfftError(status) << "\n";; + } +} +#endif + +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + int ncomp = variables.nComp(); + + long npts; + + // Initialize the boxarray "ba_onegrid" from the single box "domain" + BoxArray ba_onegrid; + { + Box domain = geom.Domain(); + ba_onegrid.define(domain); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + } + Real sqrtnpts = std::sqrt(npts); + DistributionMapping dmap_onegrid(ba_onegrid); + MultiFab variables_onegrid; + variables_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + // size of box including ghost cell range + IntVect fft_size; + + // contain to store FFT - note it is shrunk by "half" in x + Vector > > > spectral_field; + Vector forward_plan; + bool built_plan = false; + + // for CUDA builds we only need to build the plan once; track whether we did + for (int comp=0; comp >(spectral_bx,1, + The_Device_Arena())); + spectral_field.back()->setVal(0.0); // touch the memory + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + variables_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_field.back()->dataPtr()), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + + built_plan = true; + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + variables_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_field[i]->dataPtr())); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], + (void**) &variables_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // Integrate spectra over k-shells + IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } +} + +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, + "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + const GpuArray dx = geom.CellSizeArray(); + + long npts; + + // Initialize the boxarray "ba_onegrid" from the single box "domain" + BoxArray ba_onegrid; + { + Box domain = geom.Domain(); + ba_onegrid.define(domain); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + } + Real sqrtnpts = std::sqrt(npts); + DistributionMapping dmap_onegrid(ba_onegrid); + MultiFab vel_onegrid; + vel_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + // size of box including ghost cell range + IntVect fft_size; + + // contain to store FFT - note it is shrunk by "half" in x + Vector > > > spectral_fieldx; + Vector > > > spectral_fieldy; + Vector > > > spectral_fieldz; + Vector > > > spectral_field_Sx; + Vector > > > spectral_field_Sy; + Vector > > > spectral_field_Sz; + Vector > > > spectral_field_Dx; + Vector > > > spectral_field_Dy; + Vector > > > spectral_field_Dz; + + // x-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,0,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_fieldx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldx.back()->setVal(0.0); // touch the memory + + spectral_field_Sx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sx.back()->setVal(0.0); // touch the memory + + spectral_field_Dx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dx.back()->setVal(0.0); // touch the memory + + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldx.back()->dataPtr()), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldx[i]->dataPtr())); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldx[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } + + } // end x-vel + + // y-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,1,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_fieldy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldy.back()->setVal(0.0); // touch the memory + + spectral_field_Sy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sy.back()->setVal(0.0); // touch the memory + + spectral_field_Dy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dy.back()->setVal(0.0); // touch the memory + + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldy.back()->dataPtr()), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldy[i]->dataPtr())); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldy[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } + + } // end y-vel + + // z-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,2,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_fieldz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldz.back()->setVal(0.0); // touch the memory + + spectral_field_Sz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sz.back()->setVal(0.0); // touch the memory + + spectral_field_Dz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dz.back()->setVal(0.0); // touch the memory + + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldz.back()->dataPtr()), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldz[i]->dataPtr())); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldz[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } + + } // end x-vel + + + // Decompose velocity field into solenoidal and dilatational + for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + Array4< GpuComplex > spectral_tx = (*spectral_fieldx[0]) .array(); + Array4< GpuComplex > spectral_ty = (*spectral_fieldy[0]) .array(); + Array4< GpuComplex > spectral_tz = (*spectral_fieldz[0]) .array(); + Array4< GpuComplex > spectral_sx = (*spectral_field_Sx[0]).array(); + Array4< GpuComplex > spectral_sy = (*spectral_field_Sy[0]).array(); + Array4< GpuComplex > spectral_sz = (*spectral_field_Sz[0]).array(); + Array4< GpuComplex > spectral_dx = (*spectral_field_Dx[0]).array(); + Array4< GpuComplex > spectral_dy = (*spectral_field_Dy[0]).array(); + Array4< GpuComplex > spectral_dz = (*spectral_field_Dz[0]).array(); + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; + + if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + // Gradient Operators + GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; + + // Scale Total velocity FFT components + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + } + }); + } + + ParallelDescriptor::Barrier(); + + // Integrate K spectrum for velocities + IntegrateKVelocity(spectral_fieldx, spectral_fieldy, spectral_fieldz, vel_onegrid, "vel_total" ,scaling,step); + IntegrateKVelocity(spectral_field_Sx, spectral_field_Sy, spectral_field_Sz, vel_onegrid, "vel_solenoidal",scaling,step); + IntegrateKVelocity(spectral_field_Dx, spectral_field_Dy, spectral_field_Dz, vel_onegrid, "vel_dilatational",scaling,step); + + + // Inverse Solenoidal and Dilatational Velocity Components + { // solenoidal x + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sx, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,0,1); + } + { // solenoidal y + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sy, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,1,1); + } + { // solenoidal z + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sz, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,2,1); + } + { // dilatational x + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dx, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,3,1); + } + { // dilatational y + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dy, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,4,1); + } + { // dilatational z + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dz, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,5,1); + } + vel_decomp.mult(1.0/sqrtnpts); +} + +void IntegrateKScalar(const Vector > > >& spectral_field, + const MultiFab& variables_onegrid, + const std::string& name, + const Real& scaling, + const Real& sqrtnpts, + const int& step) + +{ + int npts = n_cells[0]/2; + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + // Integrate spectra over k-shells + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.fabbox(); + + const Array4 > spectral = (*spectral_field[0]).const_array(); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= bx.length(0)/2) { // only half of kx-domain + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real = spectral(i,j,k).real(); + Real imag = spectral(i,j,k).imag(); + Real cov = (1.0/(scaling*sqrtnpts*sqrtnpts))*(real*real + imag*imag); + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + } + + for (int d=1; d > > >& spectral_fieldx, + const Vector > > >& spectral_fieldy, + const Vector > > >& spectral_fieldz, + const MultiFab& vel_onegrid, + const std::string& name, + const Real& scaling, + const int& step) +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + Gpu::HostVector phisum_host(npts); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + // Integrate spectra over k-shells + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.fabbox(); + + const Array4 > spectralx = (*spectral_fieldx[0]).const_array(); + const Array4 > spectraly = (*spectral_fieldy[0]).const_array(); + const Array4 > spectralz = (*spectral_fieldz[0]).const_array(); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= bx.length(0)/2) { // only half of kx-domain + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real, imag, cov_x, cov_y, cov_z, cov; + real = spectralx(i,j,k).real(); + imag = spectralx(i,j,k).imag(); + cov_x = (1.0/scaling)*(real*real + imag*imag); + real = spectraly(i,j,k).real(); + imag = spectraly(i,j,k).imag(); + cov_y = (1.0/scaling)*(real*real + imag*imag); + real = spectralz(i,j,k).real(); + imag = spectralz(i,j,k).imag(); + cov_z = (1.0/scaling)*(real*real + imag*imag); + cov = cov_x + cov_y + cov_z; + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + } + + for (int d=1; d > > >& spectral_field, + MultiFab& vel_decomp_onegrid, const IntVect& fft_size) +{ + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + Vector backward_plan; + + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + FFTplan fplan; +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_inverse, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], + reinterpret_cast + (spectral_field.back()->dataPtr()), + vel_decomp_onegrid[mfi].dataPtr(), + FFTW_ESTIMATE); +#endif + backward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // Backward Transform + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecZ2D(backward_plan[i], + reinterpret_cast + (spectral_field[i]->dataPtr()), + vel_decomp_onegrid[mfi].dataPtr()); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); + result = rocfft_execute(backward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(backward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < backward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(backward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(backward_plan[i]); +#else + fftw_destroy_plan(backward_plan[i]); +#endif + } + +} + diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index beba0271b..e712e23be 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -1169,22 +1169,12 @@ void main_driver(const char* argv) // decomposed velocities Vector< std::string > var_names_turbVel{"vel_total","vel_solenoidal","vel_dilation"}; Real scaling_turb_veldecomp = dVolinv; -#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // heffte - TurbSpectrumVelDecompHeffte(MFTurbVel, vel_decomp, geom, step, scaling_turb_veldecomp, var_names_turbVel); -#endif -#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) TurbSpectrumVelDecomp(MFTurbVel, vel_decomp, geom, step, scaling_turb_veldecomp, var_names_turbVel); -#endif // scalars Vector< std::string > var_names_turbScalar{"rho","temp","press"}; Vector scaling_turb_scalar(3, dVolinv); -#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // heffte - TurbSpectrumScalarHeffte(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); -#endif -#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) TurbSpectrumScalar(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); -#endif } if (turbForcing > 0) { From 8f8e12db29807a1f1284e305a8c9fba9c162f8ab Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Fri, 25 Oct 2024 12:58:11 -0700 Subject: [PATCH 100/151] vorticity components written to plotfiles now --- src_compressible_stag/DeriveVelProp.cpp | 120 ++++++++++++++++++ .../compressible_functions_stag.H | 5 + src_compressible_stag/main_driver.cpp | 5 + 3 files changed, 130 insertions(+) diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp index 2b1c39d56..c437cd384 100644 --- a/src_compressible_stag/DeriveVelProp.cpp +++ b/src_compressible_stag/DeriveVelProp.cpp @@ -614,3 +614,123 @@ void EvaluateWritePlotFileVelGrad(int step, } #endif +#if defined(TURB) +void EvaluateWritePlotFileVelGradTiny(int step, + const amrex::Real time, + const amrex::Geometry& geom, + const std::array& vel, + const amrex::MultiFab& vel_decomp_in) +{ + BL_PROFILE_VAR("EvaluateWritePlotFileVelGradTiny()",EvaluateWritePlotFileVelGradTiny); + + MultiFab output; + + // 0: vorticity wx_sifted + // 1: vorticity wy_shifted + // 2: vorticity wz_shifted + // 3: vorticity wx_avg + // 4: vorticity wy_avg + // 5: vorticity wz_avg + // 6: vorticity_mag_shft_then_sq = sqrt(wx + wy + wz) + // 7: vorticity_mag_avg_then_sq = sqrt(wx + wy + wz) + // 8: vorticity_mag_sq_then_avg = sqrt(wx + wy + wz) + // 9: divergence = u_1,1 + u_2,2 + u_3,3 + output.define(convert(vel[0].boxArray(),IntVect(AMREX_D_DECL(0,0,0))), vel[0].DistributionMap(), 10, 0); + output.setVal(0.0); + + const GpuArray dx = geom.CellSizeArray(); + + for ( MFIter mfi(output,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4< Real>& out = output.array(mfi); + + AMREX_D_TERM(Array4 const& velx = vel[0].array(mfi);, + Array4 const& vely = vel[1].array(mfi);, + Array4 const& velz = vel[2].array(mfi);); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + + // divergence + out(i,j,k,9) = (velx(i+1,j,k) - velx(i,j,k))/dx[0] + + (vely(i,j+1,k) - vely(i,j,k))/dx[1] + + (velz(i,j,k+1) - velz(i,j,k))/dx[2] ; + + // on edges: u_1,2 and u_2,1 and curl w1 = u_2,1 - u_1,2 + Real u12_mm = (velx(i,j,k) - velx(i,j-1,k))/dx[1]; + Real u21_mm = (vely(i,j,k) - vely(i-1,j,k))/dx[0]; + Real w1_mm = u21_mm - u12_mm; + Real u12_mp = (velx(i,j+1,k) - velx(i,j,k))/dx[1]; + Real u21_mp = (vely(i,j+1,k) - vely(i-1,j+1,k))/dx[0]; + Real w1_mp = u21_mp - u12_mp; + Real u12_pm = (velx(i+1,j,k) - velx(i+1,j-1,k))/dx[1]; + Real u21_pm = (vely(i+1,j,k) - vely(i,j,k))/dx[0]; + Real w1_pm = u21_pm - u12_pm; + Real u12_pp = (velx(i+1,j+1,k) - velx(i+1,j,k))/dx[1]; + Real u21_pp = (vely(i+1,j+1,k) - vely(i,j+1,k))/dx[0]; + Real w1_pp = u21_pp - u12_pp; + out(i,j,k,0) = w1_mm; + out(i,j,k,3) = 0.5*(w1_mm+w1_mp+w1_pm+w1_pp); + + // on edges: u_1,3 and u_3,1 and curl w2 = u_1,3 - u_3,1 + Real u13_mm = (velx(i,j,k) - velx(i,j,k-1))/dx[2]; + Real u31_mm = (velz(i,j,k) - velz(i-1,j,k))/dx[0]; + Real w2_mm = u13_mm - u31_mm; + Real u13_mp = (velx(i,j,k+1) - velx(i,j,k))/dx[2]; + Real u31_mp = (velz(i,j,k+1) - velz(i-1,j,k+1))/dx[0]; + Real w2_mp = u13_mp - u31_mp; + Real u13_pm = (velx(i+1,j,k) - velx(i+1,j,k-1))/dx[2]; + Real u31_pm = (velz(i+1,j,k) - velz(i,j,k))/dx[0]; + Real w2_pm = u13_pm - u31_pm; + Real u13_pp = (velx(i+1,j,k+1) - velx(i+1,j,k))/dx[2]; + Real u31_pp = (velz(i+1,j,k+1) - velz(i,j,k+1))/dx[0]; + Real w2_pp = u13_pp - u31_pp; + out(i,j,k,1) = w2_mm; + out(i,j,k,4) = 0.5*(w2_mm+w2_mp+w2_pm+w2_pp); + + // on edges: u_2,3 and u_3,2 and curl w2 = u_3,2 - u_2,3 + Real u23_mm = (vely(i,j,k) - vely(i,j,k-1))/dx[2]; + Real u32_mm = (velz(i,j,k) - velz(i,j-1,k))/dx[1]; + Real w3_mm = u32_mm - u23_mm; + Real u23_mp = (vely(i,j,k+1) - vely(i,j,k))/dx[2]; + Real u32_mp = (velz(i,j,k+1) - velz(i,j-1,k+1))/dx[1]; + Real w3_mp = u32_mp - u23_mp; + Real u23_pm = (vely(i,j+1,k) - vely(i,j+1,k-1))/dx[2]; + Real u32_pm = (velz(i,j+1,k) - velz(i,j,k))/dx[1]; + Real w3_pm = u32_pm - u23_pm; + Real u23_pp = (vely(i,j+1,k+1) - vely(i,j+1,k))/dx[2]; + Real u32_pp = (velz(i,j+1,k+1) - velz(i,j,k+1))/dx[1]; + Real w3_pp = u32_pp - u23_pp; + out(i,j,k,2) = w3_mm; + out(i,j,k,5) = 0.5*(w3_mm+w3_mp+w3_pm+w3_pp); + + // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3) + out(i,j,k,6) = sqrt(w1_mm*w1_mm + w2_mm*w2_mm + w3_mm*w3_mm); + out(i,j,k,7) = sqrt(out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5) + + out(i,j,k,6)*out(i,j,k,6)); + out(i,j,k,8) = std::sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp + + w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp + + w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp)); + }); + } + + // Write on a plotfile + std::string plotfilename = amrex::Concatenate("vort_div",step,9); + amrex::Vector varNames(10); + varNames[0] = "w1_shift"; + varNames[1] = "w2_shift"; + varNames[2] = "w3_shift"; + varNames[3] = "w1_avg"; + varNames[4] = "w2_avg"; + varNames[5] = "w3_avg"; + varNames[6] = "vort_mag_shft"; + varNames[7] = "vort_mag_shft_avg"; + varNames[8] = "vort_mag_avg"; + varNames[9] = "div"; + WriteSingleLevelPlotfile(plotfilename,output,varNames,geom,time,step); +} +#endif + + diff --git a/src_compressible_stag/compressible_functions_stag.H b/src_compressible_stag/compressible_functions_stag.H index 74b701303..f0b628ae3 100644 --- a/src_compressible_stag/compressible_functions_stag.H +++ b/src_compressible_stag/compressible_functions_stag.H @@ -54,6 +54,11 @@ void EvaluateWritePlotFileVelGrad(int step, const amrex::Geometry& geom, const std::array& vel, const amrex::MultiFab& vel_decomp); +void EvaluateWritePlotFileVelGradTiny(int step, + const amrex::Real time, + const amrex::Geometry& geom, + const std::array& vel, + const amrex::MultiFab& vel_decomp); #endif void conservedToPrimitiveStag(MultiFab& prim_in, std::array& velStag_in, diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index beba0271b..a98136b57 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -690,6 +690,7 @@ void main_driver(const char* argv) #if defined(TURB) if (turbForcing > 0) { EvaluateWritePlotFileVelGrad(0, 0.0, geom, vel, vel_decomp); + EvaluateWritePlotFileVelGradTiny(0, 0.0, geom, vel, vel_decomp); } #endif @@ -1128,6 +1129,9 @@ void main_driver(const char* argv) writePlt = ((step+1)%plot_int == 0); } } +#if defined(TURB) + if ((turbRestartRun == 0) and (turbForcing >= 1)) writePlt = true; +#endif if (writePlt) { //yzAverage(cuMeans, cuVars, primMeans, primVars, spatialCross, @@ -1189,6 +1193,7 @@ void main_driver(const char* argv) if (turbForcing > 0) { EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp); + EvaluateWritePlotFileVelGradTiny(step, time, geom, vel, vel_decomp); } #endif } From 529902b6e950df7552125a4570c6899a4289b82d Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Fri, 25 Oct 2024 15:13:29 -0700 Subject: [PATCH 101/151] corrected spectral filter calculations --- .../SPECTRAL_FILTER/main_driver.cpp | 75 +++++++++++++++++-- .../SPECTRAL_FILTER/spectral_functions.H | 11 +++ .../SPECTRAL_FILTER/spectral_functions.cpp | 38 ++++++++++ 3 files changed, 118 insertions(+), 6 deletions(-) diff --git a/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp index e9ee078b2..7db4b16dd 100644 --- a/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp +++ b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp @@ -66,6 +66,9 @@ void main_driver(const char* argv) int nprimvars; pp.query("nprimvars",nprimvars); + int plot_filter = 0; + pp.query("plot_filter",plot_filter); + amrex::IntVect ngc; for (int i=0; i<3; ++i) { ngc[i] = 1; // number of ghost cells @@ -102,7 +105,7 @@ void main_driver(const char* argv) Vector is_periodic(3,1); // force to be periodic -- can change later geom.define(domain,&real_box,CoordSys::cartesian,is_periodic.data()); - const Real* dx = geom.CellSize(); + const GpuArray dx = geom.CellSizeArray(); const RealBox& realDomain = geom.ProbDomain(); SpectralReadCheckPoint(geom, domain, prim, vel, ba, dmap, n_cells, nprimvars, max_grid_size, ngc, restart); @@ -141,13 +144,14 @@ void main_driver(const char* argv) vel_decomp_filter.FillBoundary(geom.periodicity()); scalar_filter.FillBoundary(geom.periodicity()); - SpectralWritePlotFile(restart, kmin, kmax, geom, vel_decomp_filter, scalar_filter, MFTurbVel, MFTurbScalar); + if (plot_filter) SpectralWritePlotFile(restart, kmin, kmax, geom, vel_decomp_filter, scalar_filter, MFTurbVel, MFTurbScalar); // Turbulence Diagnostics Real u_rms, u_rms_s, u_rms_d, delta_u_rms; Real taylor_len, taylor_Re_eta; Real skew, skew_s, skew_d, kurt, kurt_s, kurt_d; Vector var(9, 0.0); + Real skew_vort, kurt_vort, skew_div, kurt_div; { Vector dProb(3); dProb[0] = 1.0/((n_cells[0]+1)*n_cells[1]*n_cells[2]); @@ -316,8 +320,61 @@ void main_driver(const char* argv) var[i] = mean2 - mean*mean; } + // skewness and kurtosis of velocity voritcity and divergence + MultiFab vel_stats; + vel_stats.define(prim.boxArray(),prim.DistributionMap(),4,0); // div, w1, w2, w3 + for ( MFIter mfi(vel_stats,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + const Box& bx = mfi.tilebox(); + const Array4& v_decomp = vel_decomp_filter.array(mfi); + const Array4< Real>& v_stats = vel_stats.array(mfi); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + // divergence + v_stats(i,j,k,0) = 0.5*( (v_decomp(i+1,j,k,0) - v_decomp(i-1,j,k,0))/dx[0] + + (v_decomp(i,j+1,k,1) - v_decomp(i,j-1,k,1))/dx[1] + + (v_decomp(i,j,k+1,2) - v_decomp(i,j,k-1,2))/dx[2] ); + + // curl w1 = u_2,1 - u_1,2 + v_stats(i,j,k,1) = 0.5*( (v_decomp(i+1,j,k,1) - v_decomp(i-1,j,k,1))/dx[0] - + (v_decomp(i,j+1,k,0) - v_decomp(i,j-1,k,0))/dx[1] ); + + // curl w2 = u_1,3 - u_3,1 + v_stats(i,j,k,2) = 0.5*( (v_decomp(i,j,k+1,0) - v_decomp(i,j,k-1,0))/dx[2] - + (v_decomp(i+1,j,k,2) - v_decomp(i-1,j,k,2))/dx[0] ); + + // curl w2 = u_3,2 - u_2,3 + v_stats(i,j,k,3) = 0.5*( (v_decomp(i,j+1,k,2) - v_decomp(i,j-1,k,2))/dx[1] - + (v_decomp(i,j,k+1,1) - v_decomp(i,j,k-1,1))/dx[2] ); + + }); + } + // compute spatial mean + Real mean_div = vel_stats.sum(0) / (npts); + Real mean_w1 = vel_stats.sum(1) / (npts); + Real mean_w2 = vel_stats.sum(2) / (npts); + Real mean_w3 = vel_stats.sum(3) / (npts); + vel_stats.plus(-1.0*mean_div, 0, 1); + vel_stats.plus(-1.0*mean_w1, 1, 1); + vel_stats.plus(-1.0*mean_w2, 2, 1); + vel_stats.plus(-1.0*mean_w3, 3, 1); + + Vector U2(4); + Vector U3(4); + Vector U4(4); + for (int i=0;i<4;++i) { + CCMoments(vel_stats,i,ccTempA,2,U2[i]); + CCMoments(vel_stats,i,ccTempA,3,U3[i]); + CCMoments(vel_stats,i,ccTempA,4,U4[i]); + } + skew_div = U3[0]/pow(U2[0],1.5); + kurt_div = U4[0]/pow(U4[0],2.0); + skew_vort = (U3[1] + U3[2] + U3[3])/ + (pow(U2[1],1.5) + pow(U2[2],1.5) + pow(U2[3],1.5)); + kurt_vort = (U4[1] + U4[2] + U4[3])/ + (pow(U2[1],2.0) + pow(U2[2],2.0) + pow(U2[3],2.0)); + } - std::string turbfilename = "turbstats_"; + std::string turbfilename = amrex::Concatenate("turbstats_filtered_",restart,9); std::ostringstream os; os << std::setprecision(3) << kmin; turbfilename += os.str(); @@ -335,9 +392,11 @@ void main_driver(const char* argv) << "TaylorLen " << "TaylorRe*Eta " << "skew " << "skew_s " << "skew_d " << "kurt " << "kurt_s " << "kurt_d " - << "var ux " << "var uy " << "var uz " - << "var uxs " << "var uys " << "var uzs " - << "var uxd " << "var uyd " << "var uzd " + << "var_ux " << "var_uy " << "var_uz " + << "var_uxs " << "var_uys " << "var_uzs " + << "var_uxd " << "var_uyd " << "var_uzd " + << "skew_div " << "kurt_div " + << "skew_vort " << "kurt_vort " << std::endl; turboutfile << u_rms << " "; @@ -355,6 +414,10 @@ void main_driver(const char* argv) for (int i=0;i<9;++i) { turboutfile << var[i] << " "; } + turboutfile << skew_div << " "; + turboutfile << kurt_div << " "; + turboutfile << skew_vort << " "; + turboutfile << kurt_vort << " "; turboutfile << std::endl; } // timer diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H index 12a72200f..4f99f51a3 100644 --- a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H +++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H @@ -98,5 +98,16 @@ void FCMoments(const std::array& m1, const int& power, amrex::Vector& prod_val); +void SumCC(const amrex::MultiFab& m1, + const int& comp, + amrex::Real& sum, + const bool& divide_by_ncells); + +void CCMoments(const amrex::MultiFab& m1, + const int& comp1, + amrex::MultiFab& mscr, + const int& power, + amrex::Real& prod_val); + #endif diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp index aa144bef6..b36e18b8d 100644 --- a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp +++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp @@ -661,7 +661,9 @@ void SpectralScalarDecomp(const MultiFab& scalar, // Get the wavenumber int ki = i; int kj = j; + if (j >= ny/2) kj = ny - j; int kk = k; + if (k >= nz/2) kk = nz - k; Real knum = (ki*ki + kj*kj + kk*kk); knum = std::sqrt(knum); @@ -1037,3 +1039,39 @@ void SumStag(const std::array& m1, ParallelDescriptor::ReduceRealSum(sum[2]); } +void CCMoments(const amrex::MultiFab& m1, + const int& comp1, + amrex::MultiFab& mscr, + const int& power, + amrex::Real& prod_val) +{ + + BL_PROFILE_VAR("CCMoments()",CCMoments); + + MultiFab::Copy(mscr,m1,comp1,0,1,0); + for(int i=1; i Date: Fri, 25 Oct 2024 15:14:15 -0700 Subject: [PATCH 102/151] modified Makefiles to build on Perlmutter --- exec/compressible_stag/GNUmakefile | 30 +- .../SPECTRAL_FILTER/GNUmakefile | 32 +- .../SPECTRAL_FILTER/build_perlmutter.sh | 14 +- exec/compressible_stag/TURB_PDFS/GNUmakefile | 5 +- .../TURB_PDFS/build_perlmutter.sh | 20 + .../TURB_PDFS/main_multisteps.cpp | 526 ++++++++++++++++++ .../build_perlmutter_101724.sh | 30 + 7 files changed, 621 insertions(+), 36 deletions(-) create mode 100755 exec/compressible_stag/TURB_PDFS/build_perlmutter.sh create mode 100644 exec/compressible_stag/TURB_PDFS/main_multisteps.cpp create mode 100755 exec/compressible_stag/build_perlmutter_101724.sh diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 1a5db02f5..3bfa28eb8 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -23,7 +23,7 @@ USE_HEFFTE_ROCFFT = FALSE ifeq ($(USE_HEFFTE_FFTW),TRUE) HEFFTE_HOME ?= ../../../heffte/ else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - HEFFTE_HOME ?= ../../../heffte/ + HEFFTE_HOME ?= ../../../heffte-org/build_aware/ else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) HEFFTE_HOME ?= ../../../heffte/ endif @@ -57,12 +57,25 @@ include ../../src_common/Make.package VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ +#ifeq ($(USE_HEFFTE_FFTW),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#else ifeq ($(USE_HEFFTE_CUFFT),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#endif + ifeq ($(USE_HEFFTE_FFTW),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_FFTW + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_CUFFT + VPATH_LOCATIONS += $(HEFFTE_HOME)/include + INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include + LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib + LIBRARIES += -lheffte else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_ROCFFT endif include $(AMREX_HOME)/Src/Base/Make.package @@ -88,15 +101,6 @@ ifeq ($(DO_TURB), TRUE) DEFINES += -DTURB endif -ifeq ($(USE_HEFFTE_FFTW),TRUE) - DEFINES += -DHEFFTE_FFTW - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f -else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - DEFINES += -DHEFFTE_CUFFT -else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - DEFINES += -DHEFFTE_ROCFFT -endif - MAXSPECIES := $(strip $(MAX_SPEC)) DEFINES += -DMAX_SPECIES=$(MAXSPECIES) diff --git a/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile b/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile index 141c3a621..052b38516 100644 --- a/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile +++ b/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile @@ -16,7 +16,7 @@ USE_HEFFTE_ROCFFT = FALSE ifeq ($(USE_HEFFTE_FFTW),TRUE) HEFFTE_HOME ?= ../../../../heffte/ else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - HEFFTE_HOME ?= ../../../../heffte/ + HEFFTE_HOME ?= ../../../../heffte-org/build_aware/ else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) HEFFTE_HOME ?= ../../../../heffte/ endif @@ -26,16 +26,28 @@ include $(AMREX_HOME)/Tools/GNUMake/Make.defs VPATH_LOCATIONS += . INCLUDE_LOCATIONS += . +#ifeq ($(USE_HEFFTE_FFTW),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#else ifeq ($(USE_HEFFTE_CUFFT),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#endif + +include ./Make.package ifeq ($(USE_HEFFTE_FFTW),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_FFTW + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_CUFFT + VPATH_LOCATIONS += $(HEFFTE_HOME)/include + INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include + LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib + LIBRARIES += -lheffte else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_ROCFFT endif -include ./Make.package - include $(AMREX_HOME)/Src/Base/Make.package include $(AMREX_HOME)/Tools/GNUMake/Make.rules @@ -55,11 +67,3 @@ ifeq ($(DO_TURB), TRUE) DEFINES += -DTURB endif -ifeq ($(USE_HEFFTE_FFTW),TRUE) - DEFINES += -DHEFFTE_FFTW - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f -else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - DEFINES += -DHEFFTE_CUFFT -else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - DEFINES += -DHEFFTE_ROCFFT -endif diff --git a/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh b/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh index f6becf08a..e3bd5aac6 100755 --- a/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh +++ b/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh @@ -1,17 +1,17 @@ #!/usr/bin/bash # required dependencies -module load gpu -module load PrgEnv-gnu -module load craype -module load craype-x86-milan -module load craype-accel-nvidia80 +module load cray-fftw +module load cmake module load cudatoolkit -module load cmake/3.24.3 + +module list # necessary to use CUDA-Aware MPI and run a job export CRAY_ACCEL_TARGET=nvidia80 +export MPICH_GPU_SUPPORT_ENABLED=1 + # optimize CUDA compilation for A100 export AMREX_CUDA_ARCH=8.0 @@ -27,4 +27,4 @@ export FC=ftn export CUDACXX=$(which nvcc) export CUDAHOSTCXX=CC -make -j10 USE_CUDA=TRUE USE_HEFFTE_CUFFT=TRUE USE_ASSERTION=TRUE +make -j10 USE_CUDA=TRUE USE_HEFFTE_CUFFT=TRUE USE_ASSERTION=TRUE MAX_SPEC=2 diff --git a/exec/compressible_stag/TURB_PDFS/GNUmakefile b/exec/compressible_stag/TURB_PDFS/GNUmakefile index e1fcfec48..2f6bd7e1c 100644 --- a/exec/compressible_stag/TURB_PDFS/GNUmakefile +++ b/exec/compressible_stag/TURB_PDFS/GNUmakefile @@ -11,14 +11,15 @@ PRECISION = DOUBLE USE_MPI = TRUE USE_OMP = FALSE -USE_CUDA = FALSE +USE_CUDA = TRUE TINY_PROFILE = FALSE ################################################### #EBASE = main -EBASE = main_decomp +#EBASE = main_decomp +EBASE = main_multisteps include $(AMREX_HOME)/Tools/GNUMake/Make.defs diff --git a/exec/compressible_stag/TURB_PDFS/build_perlmutter.sh b/exec/compressible_stag/TURB_PDFS/build_perlmutter.sh new file mode 100755 index 000000000..37901f67e --- /dev/null +++ b/exec/compressible_stag/TURB_PDFS/build_perlmutter.sh @@ -0,0 +1,20 @@ +#!/usr/bin/bash + +# required dependencies +module load PrgEnv-gnu +module load craype +module load craype-x86-milan + +module list + +# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3) +# note: the cc/CC/ftn wrappers below add those +export CXXFLAGS="-march=znver3" +export CFLAGS="-march=znver3" + +# compiler environment hints +export CC=cc +export CXX=CC +export FC=ftn + +make -j10 USE_CUDA=FALSE MAX_SPEC=2 USE_ASSERTION=TRUE DEBUG=FALSE diff --git a/exec/compressible_stag/TURB_PDFS/main_multisteps.cpp b/exec/compressible_stag/TURB_PDFS/main_multisteps.cpp new file mode 100644 index 000000000..a5c9f35f2 --- /dev/null +++ b/exec/compressible_stag/TURB_PDFS/main_multisteps.cpp @@ -0,0 +1,526 @@ +#include +#include + +#include +#include +#include + +using namespace amrex; +using namespace std; + +static +void +PrintUsage (const char* progName) +{ + Print() << std::endl + << "This utility computes PDF of vorticity and divergence, and various thermodynamic scalars," << std::endl; + + Print() << "Usage:" << '\n'; + Print() << progName << " " << std::endl + << "OR" << std::endl + << progName << std::endl + << " steps=" << std::endl + << " nbins= " << std::endl + << " range= " << std::endl + << std::endl; + + exit(1); +} + + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + { + + if (argc == 1) { + PrintUsage(argv[0]); + } + + ParmParse pp; + + std::vector steps; + pp.queryarr("steps",steps); + int nsteps = steps.size(); + Print() << "number of steps to process: " << nsteps << std::endl; + + Vector scalar_out(5); + scalar_out[0] = amrex::Concatenate("div_pdf_",steps[0],9); + scalar_out[0] = scalar_out[0] + "_"; + scalar_out[0] = amrex::Concatenate(scalar_out[0],steps[nsteps-1],9); + scalar_out[1] = amrex::Concatenate("vortx_pdf_",steps[0],9); + scalar_out[1] = scalar_out[1] + "_"; + scalar_out[1] = amrex::Concatenate(scalar_out[1],steps[nsteps-1],9); + scalar_out[2] = amrex::Concatenate("vorty_pdf_",steps[0],9); + scalar_out[2] = scalar_out[2] + "_"; + scalar_out[2] = amrex::Concatenate(scalar_out[2],steps[nsteps-1],9); + scalar_out[3] = amrex::Concatenate("vortz_pdf_",steps[0],9); + scalar_out[3] = scalar_out[3] + "_"; + scalar_out[3] = amrex::Concatenate(scalar_out[3],steps[nsteps-1],9); + scalar_out[4] = amrex::Concatenate("vort_pdf_",steps[0],9); + scalar_out[4] = scalar_out[4] + "_"; + scalar_out[4] = amrex::Concatenate(scalar_out[4],steps[nsteps-1],9); + + int nbins; + pp.get("nbins", nbins); + + Real range; + pp.get("range",range); + + Vector > bins; + Vector count(5,0); + Vector totbin(5,0); + for (int i=0; i<5; ++i) { + bins.push_back(Vector (nbins+1,0.)); + } + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + + for (int step=0; step> str; + + // read in number of components from header + int ncomp; + x >> ncomp; + + // read in variable names from header + int flag = 0; + int vort_ind, div_ind, velx_sol_ind, vely_sol_ind, velz_sol_ind, velx_dil_ind, vely_dil_ind, velz_dil_ind; + for (int n=0; n> str; + if (str == "vort") vort_ind = flag; + if (str == "div") div_ind = flag; + if (str == "ux_s") velx_sol_ind = flag; + if (str == "uy_s") vely_sol_ind = flag; + if (str == "uz_s") velz_sol_ind = flag; + flag ++; + } + + // read in dimensionality from header + int dim; + x >> dim; + + // read in time + Real time; + x >> time; + + // read in finest level + int finest_level; + x >> finest_level; + + // read in prob_lo and prob_hi + amrex::GpuArray prob_lo, prob_hi; + for (int i=0; i<3; ++i) { + x >> prob_lo[i]; + } + for (int i=0; i<3; ++i) { + x >> prob_hi[i]; + } + + // now read in the plotfile data + // check to see whether the user pointed to the plotfile base directory + // or the data itself + if (amrex::FileExists(iFile+"/Level_0/Cell_H")) { + iFile += "/Level_0/Cell"; + } + if (amrex::FileExists(iFile+"/Level_00/Cell_H")) { + iFile += "/Level_00/Cell"; + } + + // storage for the input coarse and fine MultiFabs + MultiFab mf; + + // read in plotfile mf to MultiFab + VisMF::Read(mf, iFile); + + // get BoxArray and DistributionMapping + BoxArray ba = mf.boxArray(); + DistributionMapping dmap = mf.DistributionMap(); + + // physical dimensions of problem + RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])}, + {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])}); + + // single box with the enire domain + Box domain = ba.minimalBox().enclosedCells(); + + Real ncells = (double) domain.numPts(); + + // set to 1 (periodic) + Vector is_periodic(3,1); + + Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + + const Real* dx = geom.CellSize(); + + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs///////////// //////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab vel_grown(ba,dmap,6,1); + MultiFab vel_sol (ba,dmap,3,1); + + // copy shifted velocity components from mf into vel_grown + Copy(vel_grown,mf,velx_sol_ind,0,1,0); // sol + Copy(vel_grown,mf,vely_sol_ind,1,1,0); // sol + Copy(vel_grown,mf,velz_sol_ind,2,1,0); // sol + + Copy(vel_grown,mf,velx_dil_ind,3,1,0); // dil + Copy(vel_grown,mf,vely_dil_ind,4,1,0); // dil + Copy(vel_grown,mf,velz_dil_ind,5,1,0); // dil + + Copy(vel_sol,mf,velx_sol_ind,0,1,0); // sol + Copy(vel_sol,mf,vely_sol_ind,1,1,0); // sol + Copy(vel_sol,mf,velz_sol_ind,2,1,0); // sol + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + vel_sol .FillBoundary(geom.periodicity()); + + //////////////////////////////////////////////////////////////////////// + ///////////////////////// scalar PDFs ///////////////////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab scalar(ba,dmap,4,0); // vort_mag, div, vort_x, vort_y, vort_z + scalar.setVal(0.0); + Copy(scalar,mf,div_ind,0,1,0); + + // Compute vorticity components and store in scalar + for ( MFIter mfi(vel_sol,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + Array4 const& sol = vel_sol .array(mfi); + Array4 const& sca = scalar .array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + // dw/dy - dv/dz + sca(i,j,k,1) = + (sol(i,j+1,k,velz_sol_ind) - sol(i,j-1,k,velz_sol_ind)) / (2.*dx[1]) - + (sol(i,j,k+1,vely_sol_ind) - sol(i,j,k-1,vely_sol_ind)) / (2.*dx[2]); + + // dv/dx - du/dy + sca(i,j,k,2) = + (sol(i+1,j,k,vely_sol_ind) - sol(i-1,j,k,vely_sol_ind)) / (2.*dx[0]) - + (sol(i,j+1,k,velx_sol_ind) - sol(i,j-1,k,velx_sol_ind)) / (2.*dx[1]); + + // du/dz - dw/dx + sca(i,j,k,3) = + (sol(i,j,k+1,velx_sol_ind) - sol(i,j,k-1,velx_sol_ind)) / (2.*dx[2]) - + (sol(i+1,j,k,velz_sol_ind) - sol(i-1,j,k,velz_sol_ind)) / (2.*dx[0]); + + } + } + } + } + + // compute spatial mean + Real mean_div = scalar.sum(0) / (ncells); + Real mean_vortx = scalar.sum(1) / (ncells); + Real mean_vorty = scalar.sum(2) / (ncells); + Real mean_vortz = scalar.sum(3) / (ncells); + + // get fluctuations + scalar.plus(-1.0*mean_div, 0, 1); + scalar.plus(-1.0*mean_vortx, 1, 1); + scalar.plus(-1.0*mean_vorty, 2, 1); + scalar.plus(-1.0*mean_vortz, 3, 1); + + // get rms + Real rms_div = scalar.norm2(0) / sqrt(ncells); + Real rms_vortx = scalar.norm2(1) / sqrt(ncells); + Real rms_vorty = scalar.norm2(2) / sqrt(ncells); + Real rms_vortz = scalar.norm2(3) / sqrt(ncells); + + // scale by rms + scalar.mult(1.0/rms_div, 0, 1); + scalar.mult(1.0/rms_vortx, 1, 1); + scalar.mult(1.0/rms_vorty, 2, 1); + scalar.mult(1.0/rms_vortz, 3, 1); + + // ompute pdfs + for (int m = 0; m < 4; ++m) { + + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[m][index] += 1; + totbin[m]++; + } + + count[m]++; + + } + } + } + + } // end MFIter + ParallelDescriptor::ReduceRealSum(bins[m].dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count[m]); + ParallelDescriptor::ReduceLongSum(totbin[m]); + + Print() << "Points outside of range "<< count[m] - totbin[m] << " " << + (double)(count[m]-totbin[m])/count[m] << std::endl; + } + + // ompute pdfs vorticity + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto n = 1; n < 4; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[4][index] += 1; + totbin[4]++; + } + + count[4]++; + + } + } + } + } + + } // end MFIter + ParallelDescriptor::ReduceRealSum(bins[4].dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count[4]); + ParallelDescriptor::ReduceLongSum(totbin[4]); + + Print() << "Points outside of range "<< count[4] - totbin[4] << " " << + (double)(count[4]-totbin[4])/count[4] << std::endl; + + } // end nsteps + + // print out contents of bins to the screen + for (int m=0; m<5; ++m) { + for (int i=0; i bins(nbins+1,0.); +// +// int halfbin = nbins/2; +// Real hbinwidth = range/nbins; +// Real binwidth = 2.*range/nbins; +// amrex::Long count=0; +// amrex::Long totbin=0; +// for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; +// +// for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) { +// +// const Box& bx = mfi.validbox(); +// const auto lo = amrex::lbound(bx); +// const auto hi = amrex::ubound(bx); +// +// const Array4& vel = vel_decomp.array(mfi); +// +// for (auto n = 0; n < 3; ++n) { +// for (auto k = lo.z; k <= hi.z; ++k) { +// for (auto j = lo.y; j <= hi.y; ++j) { +// for (auto i = lo.x; i <= hi.x; ++i) { +// +// int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth); +// index += halfbin; +// +// if( index >=0 && index <= nbins) { +// bins[index] += 1; +// totbin++; +// } +// +// count++; +// +// } +// } +// } +// } +// +// } // end MFIter +// +// ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); +// ParallelDescriptor::ReduceLongSum(count); +// ParallelDescriptor::ReduceLongSum(totbin); +// Print() << "Points outside of range "<< count - totbin << " " << +// (double)(count-totbin)/count << std::endl; +// +// // print out contents of bins to the screen +// for (int i=0; i bins(nbins+1,0.); +// +// int halfbin = nbins/2; +// Real hbinwidth = range/nbins; +// Real binwidth = 2.*range/nbins; +// amrex::Long count=0; +// amrex::Long totbin=0; +// for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; +// +// for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) { +// +// const Box& bx = mfi.validbox(); +// const auto lo = amrex::lbound(bx); +// const auto hi = amrex::ubound(bx); +// +// const Array4& vel = vel_decomp.array(mfi); +// +// for (auto n = 3; n < 6; ++n) { +// for (auto k = lo.z; k <= hi.z; ++k) { +// for (auto j = lo.y; j <= hi.y; ++j) { +// for (auto i = lo.x; i <= hi.x; ++i) { +// +// int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth); +// index += halfbin; +// +// if( index >=0 && index <= nbins) { +// bins[index] += 1; +// totbin++; +// } +// +// count++; +// +// } +// } +// } +// } +// +// } // end MFIter +// +// ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); +// ParallelDescriptor::ReduceLongSum(count); +// ParallelDescriptor::ReduceLongSum(totbin); +// Print() << "Points outside of range "<< count - totbin << " " << +// (double)(count-totbin)/count << std::endl; +// +// // print out contents of bins to the screen +// for (int i=0; i Date: Fri, 25 Oct 2024 19:35:24 -0400 Subject: [PATCH 103/151] build on Frontier --- exec/compressible_stag/GNUmakefile | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 1a5db02f5..790c38d61 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -25,7 +25,7 @@ ifeq ($(USE_HEFFTE_FFTW),TRUE) else ifeq ($(USE_HEFFTE_CUFFT),TRUE) HEFFTE_HOME ?= ../../../heffte/ else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - HEFFTE_HOME ?= ../../../heffte/ + HEFFTE_HOME ?= ../../../heffte-org/build_noaware/ endif include $(AMREX_HOME)/Tools/GNUMake/Make.defs @@ -58,11 +58,16 @@ VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ ifeq ($(USE_HEFFTE_FFTW),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_FFTW + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_CUFFT else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - include $(HEFFTE_HOME)/src/Make.package + DEFINES += -DHEFFTE_ROCFFT + VPATH_LOCATIONS += $(HEFFTE_HOME)/include + INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include + LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib + LIBRARIES += -lheffte endif include $(AMREX_HOME)/Src/Base/Make.package @@ -88,15 +93,6 @@ ifeq ($(DO_TURB), TRUE) DEFINES += -DTURB endif -ifeq ($(USE_HEFFTE_FFTW),TRUE) - DEFINES += -DHEFFTE_FFTW - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f -else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - DEFINES += -DHEFFTE_CUFFT -else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - DEFINES += -DHEFFTE_ROCFFT -endif - MAXSPECIES := $(strip $(MAX_SPEC)) DEFINES += -DMAX_SPECIES=$(MAXSPECIES) From 61be49aa0f0398c90bf7f226ec065bc6834c986e Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Fri, 25 Oct 2024 19:36:40 -0400 Subject: [PATCH 104/151] more build files for Frontier --- .../SPECTRAL_FILTER/build_frontier.sh | 27 ++++++++++++++++++ exec/compressible_stag/TURB_PDFS/GNUmakefile | 3 +- .../TURB_PDFS/build_frontier.sh | 28 +++++-------------- .../build_frontier_101324.sh | 24 ++++++++++++++++ 4 files changed, 60 insertions(+), 22 deletions(-) create mode 100755 exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh create mode 100755 exec/compressible_stag/build_frontier_101324.sh diff --git a/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh b/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh new file mode 100755 index 000000000..dcb05e97b --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh @@ -0,0 +1,27 @@ +#!/usr/bin/bash + +## load necessary modules +module load craype-accel-amd-gfx90a +module load amd-mixed +#module load rocm/5.2.0 # waiting for 5.6 for next bump +module load cray-mpich/8.1.23 +module load cce/15.0.0 # must be loaded after rocm + +# GPU-aware MPI +export MPICH_GPU_SUPPORT_ENABLED=1 + +# optimize CUDA compilation for MI250X +export AMREX_AMD_ARCH=gfx90a + +# compiler environment hints +##export CC=$(which hipcc) +##export CXX=$(which hipcc) +##export FC=$(which ftn) +##export CFLAGS="-I${ROCM_PATH}/include" +##export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" +##export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" +export LDFLAGS="-L${MPICH_DIR}/lib -lmpi ${CRAY_XPMEM_POST_LINK_OPTS} -lxpmem ${PE_MPICH_GTL_DIR_amd_gfx90a} ${PE_MPICH_GTL_LIBS_amd_gfx90a}" +export CXXFLAGS="-I${MPICH_DIR}/include" +export HIPFLAGS="--amdgpu-target=gfx90a" + +make -j10 USE_HIP=TRUE USE_HEFFTE_ROCFFT=TRUE USE_ASSERTION=TRUE diff --git a/exec/compressible_stag/TURB_PDFS/GNUmakefile b/exec/compressible_stag/TURB_PDFS/GNUmakefile index e1fcfec48..41e137cab 100644 --- a/exec/compressible_stag/TURB_PDFS/GNUmakefile +++ b/exec/compressible_stag/TURB_PDFS/GNUmakefile @@ -18,7 +18,8 @@ TINY_PROFILE = FALSE ################################################### #EBASE = main -EBASE = main_decomp +#EBASE = main_decomp +EBASE = main_multisteps include $(AMREX_HOME)/Tools/GNUMake/Make.defs diff --git a/exec/compressible_stag/TURB_PDFS/build_frontier.sh b/exec/compressible_stag/TURB_PDFS/build_frontier.sh index 9eb971164..36b9b69a0 100755 --- a/exec/compressible_stag/TURB_PDFS/build_frontier.sh +++ b/exec/compressible_stag/TURB_PDFS/build_frontier.sh @@ -1,27 +1,13 @@ #!/usr/bin/bash ## load necessary modules -module load craype-accel-amd-gfx90a -module load amd-mixed -#module load rocm/5.2.0 # waiting for 5.6 for next bump -module load cray-mpich/8.1.23 -module load cce/15.0.0 # must be loaded after rocm - -# GPU-aware MPI -export MPICH_GPU_SUPPORT_ENABLED=1 - -# optimize CUDA compilation for MI250X -export AMREX_AMD_ARCH=gfx90a +module load PrgEnv-cray +module load cray-mpich +module load cce # compiler environment hints -##export CC=$(which hipcc) -##export CXX=$(which hipcc) -##export FC=$(which ftn) -##export CFLAGS="-I${ROCM_PATH}/include" -##export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" -##export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" -export LDFLAGS="-L${MPICH_DIR}/lib -lmpi ${CRAY_XPMEM_POST_LINK_OPTS} -lxpmem ${PE_MPICH_GTL_DIR_amd_gfx90a} ${PE_MPICH_GTL_LIBS_amd_gfx90a}" -export CXXFLAGS="-I${MPICH_DIR}/include" -export HIPFLAGS="--amdgpu-target=gfx90a" +export CC=$(which craycc) +export CXX=$(which craycc) +export FC=$(which crayftn) -make -j10 USE_CUDA=FALSE USE_HIP=TRUE USE_ASSERTION=TRUE +make -j10 USE_CUDA=FALSE USE_HIP=FALSE USE_ASSERTION=TRUE diff --git a/exec/compressible_stag/build_frontier_101324.sh b/exec/compressible_stag/build_frontier_101324.sh new file mode 100755 index 000000000..ac9401b7a --- /dev/null +++ b/exec/compressible_stag/build_frontier_101324.sh @@ -0,0 +1,24 @@ +#!/usr/bin/bash + +## load necessary modules +module load craype-accel-amd-gfx90a +module load rocm +module load cray-mpich +module load cce # must be loaded after rocm +module load cray-fftw + +# GPU-aware MPI +export MPICH_GPU_SUPPORT_ENABLED=1 + +# optimize CUDA compilation for MI250X +export AMREX_AMD_ARCH=gfx90a + +# compiler environment hints +export CC=$(which hipcc) +export CXX=$(which hipcc) +export FC=$(which ftn) +export CFLAGS="-I${ROCM_PATH}/include" +export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" +export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" + +make -j8 USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_ROCFFT=FALSE USE_ASSERTION=TRUE From b14f0204faf2d4829216d69c05d1cf2e6d268da0 Mon Sep 17 00:00:00 2001 From: Changho Kim Date: Mon, 28 Oct 2024 01:29:20 -0700 Subject: [PATCH 105/151] exec/compressible_stag/test_NO2_dimerization_neq_wall: entropy_check.py and zavg3.sh --- .../test_NO2_dimerization_neq_wall/entropy_check.py | 8 ++++---- .../test_NO2_dimerization_neq_wall/zavg3.sh | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/exec/compressible_stag/test_NO2_dimerization_neq_wall/entropy_check.py b/exec/compressible_stag/test_NO2_dimerization_neq_wall/entropy_check.py index 99da07311..17650d620 100644 --- a/exec/compressible_stag/test_NO2_dimerization_neq_wall/entropy_check.py +++ b/exec/compressible_stag/test_NO2_dimerization_neq_wall/entropy_check.py @@ -80,7 +80,7 @@ tmp1 = mu2face[k]*F2z[k]/Tface[k] term2[k] += (tmp2-tmp1)/dz -# Q.gradT/T^2 +# (Q.gradT)/T^2 gradTz = np.zeros(nz) for k in range(1,nz): gradTz[k] = (prim4[k]-prim4[k-1])/dz @@ -107,7 +107,7 @@ tmp2 = F2z[k+1]*gradmu2Tz[k+1] term4[k] += 0.5*(tmp1+tmp2) -# sum muk*Mk*Omegak +# (sum muk*Mk*Omegak)/T M1 = 46.0055 M2 = 92.0110 term5 = np.zeros(nz) @@ -136,7 +136,7 @@ for k in range(1,nz-1): term14[k] = (Qz[k+1]-Qz[k])/dz/prim4[k] -# sum muk*(div(Fk)-Mk*Omegak)/T +# (sum muk*(div(Fk)-Mk*Omegak))/T term15 = np.zeros(nz) for k in range(1,nz-1): tmp = (F1z[k+1]-F1z[k])/dz @@ -148,5 +148,5 @@ # output for final terms outfile3 = "res.entropy_check" -np.savetxt(outfile3,np.column_stack((z_in,term1,term2,term3,term4,term5,term11,term12,term13,term14,term15))) +np.savetxt(outfile3,np.column_stack((z_in,term1,term2,term3,term4,term5,term11,term12,term13,term14,term15,mu1,mu2,Omega1,Omega2))) print("** %s generated" % outfile3) diff --git a/exec/compressible_stag/test_NO2_dimerization_neq_wall/zavg3.sh b/exec/compressible_stag/test_NO2_dimerization_neq_wall/zavg3.sh index c5736afe4..0f3ed6522 100755 --- a/exec/compressible_stag/test_NO2_dimerization_neq_wall/zavg3.sh +++ b/exec/compressible_stag/test_NO2_dimerization_neq_wall/zavg3.sh @@ -13,3 +13,8 @@ fi $exec -p $pltfile -o res.zavg3 -v 10 rhoMean rhoEMean rhoYkMean_0 rhoYkMean_1 tMean pMean YkMean_0 YkMean_1 XkMean_0 XkMean_1 python entropy_check.py + +echo "mv res.zavg3 res.zavg3_${pltfile}" +mv res.zavg3 res.zavg3_${pltfile} +echo "mv res.entropy_check res.entropy_check_${pltfile}" +mv res.entropy_check res.entropy_check_${pltfile} From 66ae99d615baf33b2b8936347a7d8806ef84a5f0 Mon Sep 17 00:00:00 2001 From: Changho Kim Date: Wed, 30 Oct 2024 04:22:42 -0700 Subject: [PATCH 106/151] exec/reactDiff/test_Schlogl_2d: Schlogl example added --- exec/reactDiff/test_Schlogl_2d/clean.sh | 3 + .../test_Schlogl_2d/inputs_Schlogl_2d | 101 ++++++++++++++++++ exec/reactDiff/test_Schlogl_2d/job_script.sh | 9 ++ exec/reactDiff/test_Schlogl_2d/submit_job.sh | 5 + 4 files changed, 118 insertions(+) create mode 100755 exec/reactDiff/test_Schlogl_2d/clean.sh create mode 100644 exec/reactDiff/test_Schlogl_2d/inputs_Schlogl_2d create mode 100644 exec/reactDiff/test_Schlogl_2d/job_script.sh create mode 100755 exec/reactDiff/test_Schlogl_2d/submit_job.sh diff --git a/exec/reactDiff/test_Schlogl_2d/clean.sh b/exec/reactDiff/test_Schlogl_2d/clean.sh new file mode 100755 index 000000000..d5c9d18e1 --- /dev/null +++ b/exec/reactDiff/test_Schlogl_2d/clean.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +rm -rf slurm-*.out plt* averagedDensity.txt diff --git a/exec/reactDiff/test_Schlogl_2d/inputs_Schlogl_2d b/exec/reactDiff/test_Schlogl_2d/inputs_Schlogl_2d new file mode 100644 index 000000000..c54794ec8 --- /dev/null +++ b/exec/reactDiff/test_Schlogl_2d/inputs_Schlogl_2d @@ -0,0 +1,101 @@ +# Problem specification +prob_lo = 0.0 0.0 # physical lo coordinate +prob_hi = 32.0 32.0 # physical hi coordinate + +# number of cells in domain and maximum number of cells in a box +n_cells = 64 64 +max_grid_size = 16 16 + +# to compute cell volume in 2D problems +cell_depth = 1. + +# Time-step control +fixed_dt = 0.001 + +# Controls for number of steps between actions +max_step = 1000000 +plot_int = 100000 +struct_fact_int = 1 +n_steps_skip = 100000 + +seed = 0 + +nspecies = 1 +nreaction = 4 + +prob_type = 0 + +n_init_in_1 = 1000. + +integer_populations = 1 + +# 0=D+R (first-order splitting) +# 1=(1/2)R + D + (1/2)R (Strang option 1) +# 2=(1/2)D + R + (1/2)D (Strang option 2) +# -1=unsplit forward Euler +# -2=unsplit explicit midpoint +# -3=unsplit multinomial diffusion +# -4=unsplit implicit midpoint +temporal_integrator = 1 + +# only used for split schemes (temporal_integrator>=0) +# 0=explicit trapezoidal predictor/corrector +# 1=Crank-Nicolson semi-implicit +# 2=explicit midpoint +# 3=multinomial diffusion +# 4=forward Euler +reactDiff_diffusion_type = 0 + +# Fickian diffusion coeffs +D_Fick = 1. + +variance_coef_mass = 1. +initial_variance_mass = 1. + +# how to compute n on faces for stochastic weighting +# 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic +# 10=arithmetic average with discontinuous Heaviside function +# 11=arithmetic average with C1-smoothed Heaviside function +# 12=arithmetic average with C2-smoothed Heaviside function +avg_type = 1 + +# only used for split schemes (temporal_integrator>=0) +# 0=first-order (deterministic, tau leaping, CLE, or SSA) +# 1=second-order (determinisitc, tau leaping, or CLE only) +reactDiff_reaction_type = 0 + +# 0=deterministic; 1=CLE; 2=SSA; 3=tau leap +reaction_type = 2 + +# Schlog model is: +# (1) 2X --> 3X +# (2) 3X --> 2X +# (3) 0 --> X +# (4) X --> 0 +stoich_1R = 2 +stoich_1P = 3 +stoich_2R = 3 +stoich_2P = 2 +stoich_3R = 0 +stoich_3P = 1 +stoich_4R = 1 +stoich_4P = 0 + +# reaction rate constant for each reaction (assuming Law of Mass Action holds) +# using rate_multiplier, reaction rates can be changed by the same factor +# if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become +# n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). +#rate_const = 1e-4 1e-7 200. 0.2 # thermodynamic equilibrium +rate_const = 1e-4 2e-7 200. 0.1 # case where detailed balance is not satisfied + +rate_multiplier = 1. +include_discrete_LMA_correction = 1 + +# Boundary conditions +# ---------------------- +# BC specifications: +# -1 = periodic +# 1 = wall (Neumann) +# 2 = reservoir (Dirichlet) +bc_mass_lo = -1 -1 +bc_mass_hi = -1 -1 diff --git a/exec/reactDiff/test_Schlogl_2d/job_script.sh b/exec/reactDiff/test_Schlogl_2d/job_script.sh new file mode 100644 index 000000000..86337152e --- /dev/null +++ b/exec/reactDiff/test_Schlogl_2d/job_script.sh @@ -0,0 +1,9 @@ +#! /bin/bash -l +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=16 +#SBATCH --partition test +#SBATCH --time=0-00:30:00 + +# COMMANDS HERE + +srun -n 16 ../main2d.gnu.MPI.ex inputs_Schlogl_2d diff --git a/exec/reactDiff/test_Schlogl_2d/submit_job.sh b/exec/reactDiff/test_Schlogl_2d/submit_job.sh new file mode 100755 index 000000000..66c0ac813 --- /dev/null +++ b/exec/reactDiff/test_Schlogl_2d/submit_job.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +# LOAD NECESSARY MODULES + +sbatch job_script.sh From ca1e5045c9d014296a431bc93bd543fd0279572e Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Thu, 31 Oct 2024 09:20:40 -0700 Subject: [PATCH 107/151] modified build script for Perlmutter --- exec/compressible_stag/build_perlmutter_101724.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exec/compressible_stag/build_perlmutter_101724.sh b/exec/compressible_stag/build_perlmutter_101724.sh index 819004290..e2b81f938 100755 --- a/exec/compressible_stag/build_perlmutter_101724.sh +++ b/exec/compressible_stag/build_perlmutter_101724.sh @@ -27,4 +27,4 @@ export FC=ftn export CUDACXX=$(which nvcc) export CUDAHOSTCXX=CC -make -j10 USE_CUDA=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_CUFFT=TRUE USE_ASSERTION=TRUE +make -j10 USE_CUDA=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_FFT=TRUE USE_ASSERTION=TRUE From de7ad327bfe07ff299f803db73c6d6819fac13b1 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Thu, 31 Oct 2024 09:21:02 -0700 Subject: [PATCH 108/151] small fixes to get amrex fft working --- src_analysis/TurbSpectra_distributed.cpp | 266 +++++++++++++---------- 1 file changed, 146 insertions(+), 120 deletions(-) diff --git a/src_analysis/TurbSpectra_distributed.cpp b/src_analysis/TurbSpectra_distributed.cpp index 84c0da9fa..bf9c6a637 100644 --- a/src_analysis/TurbSpectra_distributed.cpp +++ b/src_analysis/TurbSpectra_distributed.cpp @@ -32,14 +32,21 @@ void TurbSpectrumScalar(const MultiFab& variables, DistributionMapping dm = variables.DistributionMap(); BoxArray ba = variables.boxArray(); - MultiFab cov(ba, dm, ncomp, 0); + // box array and dmap for FFT + Box cdomain = geom.Domain(); + cdomain.setBig(0,cdomain.length(0)/2); + auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(), + {AMREX_D_DECL(true,true,false)}); + DistributionMapping cdm = amrex::FFT::detail::make_iota_distromap(cba.size()); + + MultiFab cov(cba, cdm, ncomp, 0); MultiFab mf; mf.define(ba, dm, 1, 0);; for (int comp=0; comp r2c(geom.Domain()); r2c.forward(mf,cmf); @@ -93,22 +100,27 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, // get box array and distribution map of vel DistributionMapping dm = vel.DistributionMap(); BoxArray ba = vel.boxArray(); + + // box array and dmap for FFT + Box cdomain = geom.Domain(); + cdomain.setBig(0,cdomain.length(0)/2); + auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(), + {AMREX_D_DECL(true,true,false)}); + DistributionMapping cdm = amrex::FFT::detail::make_iota_distromap(cba.size()); // each MPI rank gets storage for its piece of the fft - cMultiFab spectral_field_Tx(ba,dm,1,0); // totalx - cMultiFab spectral_field_Ty(ba,dm,1,0); // totaly - cMultiFab spectral_field_Tz(ba,dm,1,0); // totalz - cMultiFab spectral_field_Sx(ba,dm,1,0); // solenoidalx - cMultiFab spectral_field_Sy(ba,dm,1,0); // solenoidaly - cMultiFab spectral_field_Sz(ba,dm,1,0); // solenoidalz - cMultiFab spectral_field_Dx(ba,dm,1,0); // dilatationalx - cMultiFab spectral_field_Dy(ba,dm,1,0); // dilatationaly - cMultiFab spectral_field_Dz(ba,dm,1,0); // dilatationalz + cMultiFab spectral_field_Tx(cba,cdm,1,0); // totalx + cMultiFab spectral_field_Ty(cba,cdm,1,0); // totaly + cMultiFab spectral_field_Tz(cba,cdm,1,0); // totalz + cMultiFab spectral_field_Sx(cba,cdm,1,0); // solenoidalx + cMultiFab spectral_field_Sy(cba,cdm,1,0); // solenoidaly + cMultiFab spectral_field_Sz(cba,cdm,1,0); // solenoidalz + cMultiFab spectral_field_Dx(cba,cdm,1,0); // dilatationalx + cMultiFab spectral_field_Dy(cba,cdm,1,0); // dilatationaly + cMultiFab spectral_field_Dz(cba,cdm,1,0); // dilatationalz MultiFab vel_single(ba, dm, 1, 0); - int r2c_direction = 0; - // ForwardTransform // X { @@ -166,60 +178,59 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; + + // Scale Total velocity FFT components + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); } else { // conjugate amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); } - // Scale Total velocity FFT components - spectral_tx(i,j,k) *= (1.0/sqrtnpts); - spectral_ty(i,j,k) *= (1.0/sqrtnpts); - spectral_tz(i,j,k) *= (1.0/sqrtnpts); - - // Inverse Laplacian - Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; - - // Divergence of vel - Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + - spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + - spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; - Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + - spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + - spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; - - if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k) *= 0.0; - spectral_dy(i,j,k) *= 0.0; - spectral_dz(i,j,k) *= 0.0; - } - else { - - // Dilatational velocity - GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, - (divC*GxR - divR*GxC) / Lap); - spectral_dx(i,j,k) = copy_dx; - - GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, - (divC*GyR - divR*GyC) / Lap); - spectral_dy(i,j,k) = copy_dy; - - GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, - (divC*GzR - divR*GzC) / Lap); - spectral_dz(i,j,k) = copy_dz; - } - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); - }); } - MultiFab cov(ba, dm, 3, 0); // total, solenoidal, dilatational + MultiFab cov(cba, cdm, 3, 0); // total, solenoidal, dilatational // Fill in the covariance multifab - Real sqrtnpts_gpu = sqrtnpts; Real scaling_gpu = scaling; for (MFIter mfi(cov); mfi.isValid(); ++mfi) { Array4 const& data = cov.array(mfi); @@ -235,35 +246,40 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, const Box& bx = mfi.validbox(); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - Real re_x, re_y, re_z, im_x, im_y, im_z; - - re_x = spec_tx(i,j,k).real(); - im_x = spec_tx(i,j,k).imag(); - re_y = spec_ty(i,j,k).real(); - im_y = spec_ty(i,j,k).imag(); - re_z = spec_tz(i,j,k).real(); - im_z = spec_tz(i,j,k).imag(); - data(i,j,k,0) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); - re_x = spec_sx(i,j,k).real(); - im_x = spec_sx(i,j,k).imag(); - re_y = spec_sy(i,j,k).real(); - im_y = spec_sy(i,j,k).imag(); - re_z = spec_sz(i,j,k).real(); - im_z = spec_sz(i,j,k).imag(); - data(i,j,k,1) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); - re_x = spec_dx(i,j,k).real(); - im_x = spec_dx(i,j,k).imag(); - re_y = spec_dy(i,j,k).real(); - im_y = spec_dy(i,j,k).imag(); - re_z = spec_dz(i,j,k).real(); - im_z = spec_dz(i,j,k).imag(); - data(i,j,k,2) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); + if (i <= n_cells[0]/2) { + Real re_x, re_y, re_z, im_x, im_y, im_z; + + re_x = spec_tx(i,j,k).real(); + im_x = spec_tx(i,j,k).imag(); + re_y = spec_ty(i,j,k).real(); + im_y = spec_ty(i,j,k).imag(); + re_z = spec_tz(i,j,k).real(); + im_z = spec_tz(i,j,k).imag(); + data(i,j,k,0) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_sx(i,j,k).real(); + im_x = spec_sx(i,j,k).imag(); + re_y = spec_sy(i,j,k).real(); + im_y = spec_sy(i,j,k).imag(); + re_z = spec_sz(i,j,k).real(); + im_z = spec_sz(i,j,k).imag(); + data(i,j,k,1) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_dx(i,j,k).real(); + im_x = spec_dx(i,j,k).imag(); + re_y = spec_dy(i,j,k).real(); + im_y = spec_dy(i,j,k).imag(); + re_z = spec_dz(i,j,k).real(); + im_z = spec_dz(i,j,k).imag(); + data(i,j,k,2) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + } + else { + amrex::Abort("check the code; i should not go beyond n_cells[0]/2"); + } }); } @@ -275,37 +291,37 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, // inverse Fourier transform solenoidal and dilatational components { amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); + MultiFab vel_decomp_single(ba, dm, 1, 0); r2c.backward(spectral_field_Sx,vel_decomp_single); vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); } { amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); + MultiFab vel_decomp_single(ba, dm, 1, 0); r2c.backward(spectral_field_Sy,vel_decomp_single); vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); } { amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); + MultiFab vel_decomp_single(ba, dm, 1, 0); r2c.backward(spectral_field_Sz,vel_decomp_single); vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); } { amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); + MultiFab vel_decomp_single(ba, dm, 1, 0); r2c.backward(spectral_field_Dx,vel_decomp_single); vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); } { amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); + MultiFab vel_decomp_single(ba, dm, 1, 0); r2c.backward(spectral_field_Dy,vel_decomp_single); vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); } { amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); + MultiFab vel_decomp_single(ba, dm, 1, 0); r2c.backward(spectral_field_Dz,vel_decomp_single); vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); } @@ -354,20 +370,25 @@ void IntegrateKScalar(const MultiFab& cov_mag, amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); - amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + if (i <= n_cells[0]/2) { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + else { + amrex::Abort("check the code; i should not go beyond n_cells[0]/2"); } }); } @@ -436,20 +457,25 @@ void IntegrateKVelocity(const MultiFab& cov_mag, amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); - amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + if (i <= n_cells[0]/2) { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + else { + amrex::Abort("check the code; i should not go beyond n_cells[0]/2"); } }); } From ce96feb668f1d597f0d504655b0fd9666258fb00 Mon Sep 17 00:00:00 2001 From: Ishan Srivastava Date: Fri, 1 Nov 2024 13:25:38 -0400 Subject: [PATCH 109/151] modified buld script for Frontier --- exec/compressible_stag/build_frontier_101324.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/exec/compressible_stag/build_frontier_101324.sh b/exec/compressible_stag/build_frontier_101324.sh index ac9401b7a..01c565fd7 100755 --- a/exec/compressible_stag/build_frontier_101324.sh +++ b/exec/compressible_stag/build_frontier_101324.sh @@ -1,11 +1,7 @@ #!/usr/bin/bash ## load necessary modules -module load craype-accel-amd-gfx90a -module load rocm -module load cray-mpich -module load cce # must be loaded after rocm -module load cray-fftw +module load rocm/5.7.1 # GPU-aware MPI export MPICH_GPU_SUPPORT_ENABLED=1 @@ -21,4 +17,4 @@ export CFLAGS="-I${ROCM_PATH}/include" export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" -make -j8 USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_ROCFFT=FALSE USE_ASSERTION=TRUE +make -j8 USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_FFT=TRUE USE_ASSERTION=TRUE From 08439dd9affc0c3f5375ff4976731fc914e137df Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 1 Nov 2024 17:01:28 -0400 Subject: [PATCH 110/151] simplification of fft setup --- src_analysis/TurbSpectra_distributed.cpp | 92 ++++++++---------------- 1 file changed, 30 insertions(+), 62 deletions(-) diff --git a/src_analysis/TurbSpectra_distributed.cpp b/src_analysis/TurbSpectra_distributed.cpp index bf9c6a637..8aa6a4350 100644 --- a/src_analysis/TurbSpectra_distributed.cpp +++ b/src_analysis/TurbSpectra_distributed.cpp @@ -23,34 +23,22 @@ void TurbSpectrumScalar(const MultiFab& variables, int ncomp = variables.nComp(); - long npts; Box domain = geom.Domain(); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); + auto npts = domain.numPts(); Real sqrtnpts = std::sqrt(npts); - // get box array and distribution map of variables - DistributionMapping dm = variables.DistributionMap(); - BoxArray ba = variables.boxArray(); - - // box array and dmap for FFT - Box cdomain = geom.Domain(); - cdomain.setBig(0,cdomain.length(0)/2); - auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(), - {AMREX_D_DECL(true,true,false)}); - DistributionMapping cdm = amrex::FFT::detail::make_iota_distromap(cba.size()); + amrex::FFT::R2C r2c(geom.Domain()); + + auto const& [cba, cdm] = r2c.getSpectralDataLayout(); MultiFab cov(cba, cdm, ncomp, 0); - MultiFab mf; - mf.define(ba, dm, 1, 0);; for (int comp=0; comp r2c(geom.Domain()); - r2c.forward(mf,cmf); - } + MultiFab mf(variables, amrex::make_alias, comp, 1); + cMultiFab cmf(cba, cdm, 1, 0); + + r2c.forward(mf,cmf); // Fill in the covariance multifab int comp_gpu = comp; @@ -92,21 +80,18 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, const GpuArray dx = geom.CellSizeArray(); - long npts; Box domain = geom.Domain(); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); + auto npts = domain.numPts(); Real sqrtnpts = std::sqrt(npts); // get box array and distribution map of vel DistributionMapping dm = vel.DistributionMap(); BoxArray ba = vel.boxArray(); + amrex::FFT::R2C r2c(geom.Domain()); + // box array and dmap for FFT - Box cdomain = geom.Domain(); - cdomain.setBig(0,cdomain.length(0)/2); - auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(), - {AMREX_D_DECL(true,true,false)}); - DistributionMapping cdm = amrex::FFT::detail::make_iota_distromap(cba.size()); + auto const& [cba, cdm] = r2c.getSpectralDataLayout(); // each MPI rank gets storage for its piece of the fft cMultiFab spectral_field_Tx(cba,cdm,1,0); // totalx @@ -119,26 +104,21 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, cMultiFab spectral_field_Dy(cba,cdm,1,0); // dilatationaly cMultiFab spectral_field_Dz(cba,cdm,1,0); // dilatationalz - MultiFab vel_single(ba, dm, 1, 0); - // ForwardTransform // X { - vel_single.ParallelCopy(vel, 0, 0, 1); - amrex::FFT::R2C r2c(geom.Domain()); - r2c.forward(vel_single,spectral_field_Tx); + MultiFab vel_single(vel, amrex::make_alias, 0, 1); + r2c.forward(vel_single,spectral_field_Tx); } // Y { - vel_single.ParallelCopy(vel, 1, 0, 1); - amrex::FFT::R2C r2c(geom.Domain()); - r2c.forward(vel_single,spectral_field_Ty); + MultiFab vel_single(vel, amrex::make_alias, 1, 1); + r2c.forward(vel_single,spectral_field_Ty); } // Z { - vel_single.ParallelCopy(vel, 2, 0, 1); - amrex::FFT::R2C r2c(geom.Domain()); - r2c.forward(vel_single,spectral_field_Tz); + MultiFab vel_single(vel, amrex::make_alias, 2, 1); + r2c.forward(vel_single,spectral_field_Tz); } // Decompose velocity field into solenoidal and dilatational @@ -290,40 +270,28 @@ void TurbSpectrumVelDecomp(const MultiFab& vel, // inverse Fourier transform solenoidal and dilatational components { - amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); - r2c.backward(spectral_field_Sx,vel_decomp_single); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 0, 1); + r2c.backward(spectral_field_Sx,vel_decomp_single); } { - amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); - r2c.backward(spectral_field_Sy,vel_decomp_single); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 1, 1); + r2c.backward(spectral_field_Sy,vel_decomp_single); } { - amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); - r2c.backward(spectral_field_Sz,vel_decomp_single); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 2, 1); + r2c.backward(spectral_field_Sz,vel_decomp_single); } { - amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); - r2c.backward(spectral_field_Dx,vel_decomp_single); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 3, 1); + r2c.backward(spectral_field_Dx,vel_decomp_single); } { - amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); - r2c.backward(spectral_field_Dy,vel_decomp_single); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 4, 1); + r2c.backward(spectral_field_Dy,vel_decomp_single); } { - amrex::FFT::R2C r2c(geom.Domain()); - MultiFab vel_decomp_single(ba, dm, 1, 0); - r2c.backward(spectral_field_Dz,vel_decomp_single); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 5, 1); + r2c.backward(spectral_field_Dz,vel_decomp_single); } vel_decomp.mult(1.0/sqrtnpts); From 13f9d43e28564bd9a2efa4caa3c6d88229562775 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 5 Nov 2024 09:02:30 -0800 Subject: [PATCH 111/151] Implement slice structure factor for compressible_stag To use, set project_dir (normal direction of plane) and slicepoint (coordinate in project_dir to slice) in the inputs file; e.g. if you care about the k=0 plane use project_dir=2 and slicepoint=0 If you do not specify slicepoint it will take the vertical average over project_dir over the entire dataset plt_SF_prim_Flattened plt_SF_cons_Flattened contains the result --- src_compressible_stag/main_driver.cpp | 45 ++++++++++++++++++--------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 98267a1e4..da87869ed 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -208,8 +208,13 @@ void main_driver(const char* argv) if ((plot_cross) and ((cross_cell < 0) or (cross_cell > n_cells[0]-1))) { Abort("Cross cell needs to be within the domain: 0 <= cross_cell <= n_cells[0] - 1"); } - if ((do_slab_sf) and ((membrane_cell <= 0) or (membrane_cell >= n_cells[0]-1))) { - Abort("Slab structure factor needs a membrane cell within the domain: 0 < cross_cell < n_cells[0] - 1"); + if (project_dir >= 0) { + if (do_slab_sf and ((membrane_cell <= 0) or (membrane_cell >= n_cells[project_dir]-1))) { + Abort("Slab structure factor needs a membrane cell within the domain: 0 < cross_cell < n_cells[project_dir] - 1"); + } + if (do_slab_sf and slicepoint >= 0) { + Abort("Cannot use do_slab_sf and slicepoint"); + } } if ((project_dir >= 0) and ((do_1D) or (do_2D))) { Abort("Projected structure factors (project_dir) works only for 3D case"); @@ -276,9 +281,9 @@ void main_driver(const char* argv) MultiFab structFactPrimMF; MultiFab structFactConsMF; - // Structure factor for 2D averaged data - StructFact structFactPrimVerticalAverage; - StructFact structFactConsVerticalAverage; + // Structure factor for vertically-averaged or sliced data + StructFact structFactPrimFlattened; + StructFact structFactConsFlattened; // Structure factor for 2D averaged data (across a membrane) StructFact structFactPrimVerticalAverage0; @@ -781,7 +786,11 @@ void main_driver(const char* argv) { MultiFab X, XRot; - ComputeVerticalAverage(prim, X, geom, project_dir, 0, nprimvars); + if (slicepoint < 0) { + ComputeVerticalAverage(prim, X, geom, project_dir, 0, nprimvars); + } else { + ExtractSlice(prim, X, geom, project_dir, slicepoint, 0, 1); + } XRot = RotateFlattenedMF(X); ba_flat = XRot.boxArray(); dmap_flat = XRot.DistributionMap(); @@ -845,8 +854,8 @@ void main_driver(const char* argv) } if (do_slab_sf == 0) { - structFactPrimVerticalAverage.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim,2); - structFactConsVerticalAverage.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons,2); + structFactPrimFlattened.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); + structFactConsFlattened.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); } else { structFactPrimVerticalAverage0.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); @@ -1468,19 +1477,27 @@ void main_driver(const char* argv) { MultiFab X, XRot; - ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim); + if (slicepoint < 0) { + ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim); + } else { + ExtractSlice(structFactPrimMF, X, geom, project_dir, slicepoint, 0, structVarsPrim); + } XRot = RotateFlattenedMF(X); master_project_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(master_project_rot_prim,geom_flat); + structFactPrimFlattened.FortStructure(master_project_rot_prim,geom_flat); } { MultiFab X, XRot; - ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons); + if (slicepoint < 0) { + ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons); + } else { + ExtractSlice(structFactConsMF, X, geom, project_dir, slicepoint, 0, structVarsCons); + } XRot = RotateFlattenedMF(X); master_project_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons); - structFactConsVerticalAverage.FortStructure(master_project_rot_cons,geom_flat); + structFactConsFlattened.FortStructure(master_project_rot_cons,geom_flat); } } @@ -1576,8 +1593,8 @@ void main_driver(const char* argv) if (project_dir >= 0) { if (do_slab_sf == 0) { - structFactPrimVerticalAverage.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverage"); - structFactConsVerticalAverage.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverage"); + structFactPrimFlattened.WritePlotFile(step,time,geom_flat,"plt_SF_prim_Flattened"); + structFactConsFlattened.WritePlotFile(step,time,geom_flat,"plt_SF_cons_Flattened"); } else { structFactPrimVerticalAverage0.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageSlab0"); From f7d10c0ffc1ffd30cb5ca9d721872e09942a05bb Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 14 Nov 2024 14:21:39 -0500 Subject: [PATCH 112/151] Fix an issue on Frontier Copy data to host first before calling MPI reduce. --- src_analysis/TurbSpectra_distributed.cpp | 78 ++++++++---------------- 1 file changed, 25 insertions(+), 53 deletions(-) diff --git a/src_analysis/TurbSpectra_distributed.cpp b/src_analysis/TurbSpectra_distributed.cpp index 8aa6a4350..e628c5ab1 100644 --- a/src_analysis/TurbSpectra_distributed.cpp +++ b/src_analysis/TurbSpectra_distributed.cpp @@ -306,25 +306,10 @@ void IntegrateKScalar(const MultiFab& cov_mag, { int npts = n_cells[0]/2; - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); -// Gpu::HostVector phisum_host(npts); -// Gpu::HostVector phicnt_host(npts); - - Gpu::HostVector phisum_host(npts); - + Gpu::DeviceVector phisum_device(npts, 0); + Gpu::DeviceVector phicnt_device(npts, 0); Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); -// for (int d=0; d phisum_host(npts); + Gpu::HostVector phicnt_host(npts); + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::copyAsync(Gpu::deviceToHost, phicnt_device.begin(), phicnt_device.end(), phicnt_host.begin()); Gpu::streamSynchronize(); - - ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + + ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts); Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - if (d != 0) { - phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; - } - }); - - Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); - Gpu::streamSynchronize(); + for (int d = 1; d < npts; ++d) { + phisum_host[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_host[d]; + } if (ParallelDescriptor::IOProcessor()) { std::ofstream turb; @@ -399,20 +382,11 @@ void IntegrateKVelocity(const MultiFab& cov_mag, { int npts = n_cells[0]/2; - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - - Gpu::HostVector phisum_host(npts); - + Gpu::DeviceVector phisum_device(npts, 0); + Gpu::DeviceVector phicnt_device(npts, 0); Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - + int comp_gpu = comp; int nx = n_cells[0]; int ny = n_cells[1]; @@ -448,21 +422,19 @@ void IntegrateKVelocity(const MultiFab& cov_mag, }); } + Gpu::HostVector phisum_host(npts); + Gpu::HostVector phicnt_host(npts); + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::copyAsync(Gpu::deviceToHost, phicnt_device.begin(), phicnt_device.end(), phicnt_host.begin()); Gpu::streamSynchronize(); - ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts); Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - if (d != 0) { - phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; - } - }); - - Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); - Gpu::streamSynchronize(); + for (int d = 1; d < npts; ++d) { + phisum_host[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_host[d]; + } if (ParallelDescriptor::IOProcessor()) { std::ofstream turb; From c01b5831d4d58e09eb721cf453cc52df9acad555 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 18 Nov 2024 11:31:25 -0800 Subject: [PATCH 113/151] fix bug when hi-x wall moves in the y-direction --- src_common/MultiFabPhysBC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_common/MultiFabPhysBC.cpp b/src_common/MultiFabPhysBC.cpp index 529efaf59..069485559 100644 --- a/src_common/MultiFabPhysBC.cpp +++ b/src_common/MultiFabPhysBC.cpp @@ -390,7 +390,7 @@ void MultiFabPhysBCMacVel(MultiFab& vel, const Geometry& geom, int dim, int is_i } } - if ((dim != 0) && (bc_vel_lo[0] == 1 || bc_vel_hi[0] == 2) && (bx.bigEnd(0) > dom.bigEnd(0))) { + if ((dim != 0) && (bc_vel_hi[0] == 1 || bc_vel_hi[0] == 2) && (bx.bigEnd(0) > dom.bigEnd(0))) { if (bc_vel_hi[0] == 1) { // slip amrex::ParallelFor(bx,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { From 6041c07a165cd28ea992c436347af05a286e4ad6 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 18 Nov 2024 11:47:47 -0800 Subject: [PATCH 114/151] fix moving wall solver in delta form --- exec/multispec/AdvanceTimestepBousq.cpp | 4 ++-- exec/multispec/AdvanceTimestepInertial.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/exec/multispec/AdvanceTimestepBousq.cpp b/exec/multispec/AdvanceTimestepBousq.cpp index 8d9f3931c..bed33cd1e 100644 --- a/exec/multispec/AdvanceTimestepBousq.cpp +++ b/exec/multispec/AdvanceTimestepBousq.cpp @@ -273,7 +273,7 @@ void AdvanceTimestepBousq(std::array< MultiFab, AMREX_SPACEDIM >& umac, // set normal velocity of physical domain boundaries MultiFabPhysBCDomainVel(umac[i],geom,i); // set transverse velocity behind physical boundaries - int is_inhomogeneous = 1; + int is_inhomogeneous = 0; MultiFabPhysBCMacVel(umac[i],geom,i,is_inhomogeneous); // fill periodic and interior ghost cells umac[i].FillBoundary(geom.periodicity()); @@ -707,7 +707,7 @@ void AdvanceTimestepBousq(std::array< MultiFab, AMREX_SPACEDIM >& umac, // set normal velocity of physical domain boundaries MultiFabPhysBCDomainVel(umac[i],geom,i); // set transverse velocity behind physical boundaries - int is_inhomogeneous = 1; + int is_inhomogeneous = 0; MultiFabPhysBCMacVel(umac[i],geom,i,is_inhomogeneous); // fill periodic and interior ghost cells umac[i].FillBoundary(geom.periodicity()); diff --git a/exec/multispec/AdvanceTimestepInertial.cpp b/exec/multispec/AdvanceTimestepInertial.cpp index bbfbc263b..b89d13739 100644 --- a/exec/multispec/AdvanceTimestepInertial.cpp +++ b/exec/multispec/AdvanceTimestepInertial.cpp @@ -309,7 +309,7 @@ void AdvanceTimestepInertial(std::array< MultiFab, AMREX_SPACEDIM >& umac, // set normal velocity of physical domain boundaries MultiFabPhysBCDomainVel(umac[i],geom,i); // set transverse velocity behind physical boundaries - int is_inhomogeneous = 1; + int is_inhomogeneous = 0; MultiFabPhysBCMacVel(umac[i],geom,i,is_inhomogeneous); // fill periodic and interior ghost cells umac[i].FillBoundary(geom.periodicity()); @@ -598,7 +598,7 @@ void AdvanceTimestepInertial(std::array< MultiFab, AMREX_SPACEDIM >& umac, // set normal velocity of physical domain boundaries MultiFabPhysBCDomainVel(umac[i],geom,i); // set transverse velocity behind physical boundaries - int is_inhomogeneous = 1; + int is_inhomogeneous = 0; MultiFabPhysBCMacVel(umac[i],geom,i,is_inhomogeneous); // fill periodic and interior ghost cells umac[i].FillBoundary(geom.periodicity()); From 0cac562fdd31693f5110df4720fa2000566f10ea Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 19 Nov 2024 19:21:28 -0800 Subject: [PATCH 115/151] introduce code that forces the slice multifab to have index 0:0 in the dir direction, regardless of slicepoint the structure factor flattened code requires this --- src_common/ComputeAverages.cpp | 41 ++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src_common/ComputeAverages.cpp b/src_common/ComputeAverages.cpp index 6880c70de..e63b371da 100644 --- a/src_common/ComputeAverages.cpp +++ b/src_common/ComputeAverages.cpp @@ -384,7 +384,44 @@ void ExtractSlice(const MultiFab& mf, MultiFab& mf_slice, // create a new DistributionMapping and define the MultiFab DistributionMapping dmap_slice(ba_slice); - mf_slice.define(ba_slice,dmap_slice,ncomp,0); + MultiFab mf_slice_tmp(ba_slice,dmap_slice,ncomp,0); - mf_slice.ParallelCopy(mf, incomp, 0, ncomp); + mf_slice_tmp.ParallelCopy(mf, incomp, 0, ncomp); + + // now copy this into a multifab with index zero in the dir direction rather than slicepoint + // (structure factor code requires this) + dom_lo[dir] = 0; + dom_hi[dir] = 0; + + Box domain_slice2(dom_lo,dom_hi); + BoxArray ba_slice2(domain_slice2); + ba_slice2.maxSize(IntVect(max_grid_slice)); + mf_slice.define(ba_slice2,dmap_slice,ncomp,0); + + for ( MFIter mfi(mf_slice_tmp,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4 & slice = mf_slice.array(mfi); + const Array4 & slice_tmp = mf_slice_tmp.array(mfi); + + if (dir == 0) { + amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + slice(0,j,k) = slice_tmp(i,j,k); + }); + } + if (dir == 1) { + amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + slice(i,0,k) = slice_tmp(i,j,k); + }); + } + if (dir == 2) { + amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + slice(i,j,0) = slice_tmp(i,j,k); + }); + } + } } From 81ae8acc4f0aaf00ffac22f3b21d2fdd842e42d3 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 21 Nov 2024 14:32:39 -0800 Subject: [PATCH 116/151] fix copy of slice MF components --- src_common/ComputeAverages.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src_common/ComputeAverages.cpp b/src_common/ComputeAverages.cpp index e63b371da..34791dcc2 100644 --- a/src_common/ComputeAverages.cpp +++ b/src_common/ComputeAverages.cpp @@ -408,19 +408,19 @@ void ExtractSlice(const MultiFab& mf, MultiFab& mf_slice, if (dir == 0) { amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { - slice(0,j,k) = slice_tmp(i,j,k); + slice(0,j,k,n) = slice_tmp(i,j,k,n); }); } if (dir == 1) { amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { - slice(i,0,k) = slice_tmp(i,j,k); + slice(i,0,k,n) = slice_tmp(i,j,k,n); }); } if (dir == 2) { amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { - slice(i,j,0) = slice_tmp(i,j,k); + slice(i,j,0,n) = slice_tmp(i,j,k,n); }); } } From dd17133f446417d48966050abca99d0dad658e5a Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 4 Dec 2024 15:29:27 -0800 Subject: [PATCH 117/151] add ability to write deltaY to plotfile must specify plot_deltaY = 1 project_dir = (0, 1, or 2) --- src_common/ComputeAverages.cpp | 8 +++++-- src_common/common_functions.H | 2 +- src_common/common_functions.cpp | 3 +++ src_common/common_namespace.H | 1 + src_compressible_stag/writePlotFileStag.cpp | 24 ++++++++++++++++++++- 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src_common/ComputeAverages.cpp b/src_common/ComputeAverages.cpp index 34791dcc2..2b474cc53 100644 --- a/src_common/ComputeAverages.cpp +++ b/src_common/ComputeAverages.cpp @@ -95,8 +95,12 @@ void WriteHorizontalAverage(const MultiFab& mf_in, const int& dir, const int& in void WriteHorizontalAverageToMF(const MultiFab& mf_in, MultiFab& mf_out, const int& dir, const int& incomp, - const int& ncomp) + const int& ncomp, int outcomp) { + if (outcomp == -1) { + outcomp = incomp; // default outcomp is incomp unless specified + } + // number of points in the averaging direction int npts = n_cells[dir]; @@ -166,7 +170,7 @@ void WriteHorizontalAverageToMF(const MultiFab& mf_in, MultiFab& mf_out, const Array4 mf = mf_out.array(mfi); for (auto n=0; n0) nplot += nspec_surfcov*6; } - + + if (plot_deltaY == 1) { + nplot += nspecies; + } + amrex::BoxArray ba = cuMeans.boxArray(); amrex::DistributionMapping dmap = cuMeans.DistributionMap(); @@ -257,6 +261,16 @@ void WritePlotFileStag(int step, } } + if (plot_deltaY == 1) { + MultiFab Ybar(ba, dmap, nspecies, 0); + // Yk is component 6: in prim + WriteHorizontalAverageToMF(prim,Ybar,project_dir,6,nspecies,0); + Ybar.mult(-1.); + amrex::MultiFab::Add(Ybar,prim,6,0,nspecies,0); + amrex::MultiFab::Copy(plotfile,Ybar,0,cnt,nspecies,0); + cnt+= nspecies; + } + // Set variable names cnt = 0; @@ -445,6 +459,14 @@ void WritePlotFileStag(int step, } + if (plot_deltaY == 1) { + x = "deltaYk_"; + for (i=0; i Date: Wed, 4 Dec 2024 17:21:52 -0800 Subject: [PATCH 118/151] fix inputs_giantflux_3d to work by ensuring bc YK's sum to 1 add error checking for this condition in the compressible_stag code --- exec/compressible/inputs_giantfluct_3d | 4 ++-- src_compressible_stag/boundaryStag.cpp | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/exec/compressible/inputs_giantfluct_3d b/exec/compressible/inputs_giantfluct_3d index 575927ee0..9a18b2409 100644 --- a/exec/compressible/inputs_giantfluct_3d +++ b/exec/compressible/inputs_giantfluct_3d @@ -98,7 +98,7 @@ # Xk and Yk at the wall for Dirichlet (concentrations) - set one to zero # Ordering: (species 1, x-dir), (species 2, x-dir), ... (species 1, y-dir), ... - bc_Yk_y_lo = 0.2 0.09316672 0.70683296 # lo BC - bc_Yk_y_hi = 0.3 0.40683328 0.29316704 # hi BC + bc_Yk_y_lo = 0.2 0.09316672 0.70683328 # lo BC + bc_Yk_y_hi = 0.3 0.40683328 0.29316672 # hi BC diff --git a/src_compressible_stag/boundaryStag.cpp b/src_compressible_stag/boundaryStag.cpp index b4c100394..965688570 100644 --- a/src_compressible_stag/boundaryStag.cpp +++ b/src_compressible_stag/boundaryStag.cpp @@ -32,6 +32,9 @@ void SetupCWallStag() { } else if (amrex::Math::abs(sumy-1) < 1.e-10) { GetMolfrac(bc_Yk_x_lo,bc_Xk_x_lo); } + else { + Abort("SetupCWallStag: lo-x; mass or mole fractions do not sum to 1"); + } } if (bc_mass_lo[0] >= 3) { @@ -69,6 +72,8 @@ void SetupCWallStag() { GetMassfrac(bc_Xk_x_hi,bc_Yk_x_hi); } else if (amrex::Math::abs(sumy-1) < 1.e-10) { GetMolfrac(bc_Yk_x_hi,bc_Xk_x_hi); + } else { + Abort("SetupCWallStag: hi-x; mass or mole fractions do not sum to 1"); } } @@ -108,6 +113,8 @@ void SetupCWallStag() { GetMassfrac(bc_Xk_y_lo,bc_Yk_y_lo); } else if (amrex::Math::abs(sumy-1) < 1.e-10) { GetMolfrac(bc_Yk_y_lo,bc_Xk_y_lo); + } else { + Abort("SetupCWallStag: lo-y; mass or mole fractions do not sum to 1"); } } @@ -146,6 +153,8 @@ void SetupCWallStag() { GetMassfrac(bc_Xk_y_hi,bc_Yk_y_hi); } else if (amrex::Math::abs(sumy-1) < 1.e-10) { GetMolfrac(bc_Yk_y_hi,bc_Xk_y_hi); + } else { + Abort("SetupCWallStag: hi-y; mass or mole fractions do not sum to 1"); } } @@ -185,6 +194,8 @@ void SetupCWallStag() { GetMassfrac(bc_Xk_z_lo,bc_Yk_z_lo); } else if (amrex::Math::abs(sumy-1) < 1.e-10) { GetMolfrac(bc_Yk_z_lo,bc_Xk_z_lo); + } else { + Abort("SetupCWallStag: lo-z; mass or mole fractions do not sum to 1"); } } @@ -223,6 +234,8 @@ void SetupCWallStag() { GetMassfrac(bc_Xk_z_hi,bc_Yk_z_hi); } else if (amrex::Math::abs(sumy-1) < 1.e-10) { GetMolfrac(bc_Yk_z_hi,bc_Xk_z_hi); + } else { + Abort("SetupCWallStag: hi-z; mass or mole fractions do not sum to 1"); } } From 9f1ccd2cbfb4ed0f978a245541b33fbdf4d0c9ce Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Wed, 4 Dec 2024 18:05:26 -0800 Subject: [PATCH 119/151] use plot_deltaY_dir instead of project_dir --- src_common/common_functions.cpp | 6 +++--- src_common/common_namespace.H | 2 +- src_compressible_stag/writePlotFileStag.cpp | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src_common/common_functions.cpp b/src_common/common_functions.cpp index 8dfa70290..bbc4d429a 100644 --- a/src_common/common_functions.cpp +++ b/src_common/common_functions.cpp @@ -240,7 +240,7 @@ int common::plot_means; int common::plot_vars; int common::plot_covars; int common::plot_cross; -int common::plot_deltaY; +int common::plot_deltaY_dir; int common::particle_motion; AMREX_GPU_MANAGED amrex::Real common::turb_a; @@ -609,7 +609,7 @@ void InitializeCommonNamespace() { plot_vars = 0; plot_covars = 0; plot_cross = 0; - plot_deltaY = 0; + plot_deltaY_dir = -1; particle_motion = 0; // turblent forcing parameters @@ -1137,7 +1137,7 @@ void InitializeCommonNamespace() { pp.query("plot_vars",plot_vars); pp.query("plot_covars",plot_covars); pp.query("plot_cross",plot_cross); - pp.query("plot_deltaY",plot_deltaY); + pp.query("plot_deltaY_dir",plot_deltaY_dir); pp.query("particle_motion",particle_motion); pp.query("turb_a",turb_a); pp.query("turb_b",turb_b); diff --git a/src_common/common_namespace.H b/src_common/common_namespace.H index bddc39614..b5493d3a1 100644 --- a/src_common/common_namespace.H +++ b/src_common/common_namespace.H @@ -314,7 +314,7 @@ namespace common { extern int plot_vars; extern int plot_covars; extern int plot_cross; - extern int plot_deltaY; + extern int plot_deltaY_dir; extern int particle_motion; // parameters for turbulent forcing example diff --git a/src_compressible_stag/writePlotFileStag.cpp b/src_compressible_stag/writePlotFileStag.cpp index 94f56adc3..820c4cd8e 100644 --- a/src_compressible_stag/writePlotFileStag.cpp +++ b/src_compressible_stag/writePlotFileStag.cpp @@ -88,7 +88,7 @@ void WritePlotFileStag(int step, if (nspec_surfcov>0) nplot += nspec_surfcov*6; } - if (plot_deltaY == 1) { + if (plot_deltaY_dir != -1) { nplot += nspecies; } @@ -261,10 +261,10 @@ void WritePlotFileStag(int step, } } - if (plot_deltaY == 1) { + if (plot_deltaY_dir != -1) { MultiFab Ybar(ba, dmap, nspecies, 0); // Yk is component 6: in prim - WriteHorizontalAverageToMF(prim,Ybar,project_dir,6,nspecies,0); + WriteHorizontalAverageToMF(prim,Ybar,plot_deltaY_dir,6,nspecies,0); Ybar.mult(-1.); amrex::MultiFab::Add(Ybar,prim,6,0,nspecies,0); amrex::MultiFab::Copy(plotfile,Ybar,0,cnt,nspecies,0); @@ -459,7 +459,7 @@ void WritePlotFileStag(int step, } - if (plot_deltaY == 1) { + if (plot_deltaY_dir != 1) { x = "deltaYk_"; for (i=0; i Date: Mon, 9 Dec 2024 14:42:51 -0800 Subject: [PATCH 120/151] test PR just to see how CI is working --- exec/hydro/main_driver.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/exec/hydro/main_driver.cpp b/exec/hydro/main_driver.cpp index 56145b88d..73a3d4aa1 100644 --- a/exec/hydro/main_driver.cpp +++ b/exec/hydro/main_driver.cpp @@ -13,7 +13,6 @@ #include "gmres_functions.H" - #include #include #include From 8bbf830ec9bff04cba2e717e5fee37c305785a3d Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 9 Dec 2024 16:03:25 -0800 Subject: [PATCH 121/151] add libcusparse-dev-11-2 to the dependencies_nvc11.sh --- .github/workflows/dependencies/dependencies_nvcc11.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dependencies/dependencies_nvcc11.sh b/.github/workflows/dependencies/dependencies_nvcc11.sh index 5be711d81..a79b152b6 100755 --- a/.github/workflows/dependencies/dependencies_nvcc11.sh +++ b/.github/workflows/dependencies/dependencies_nvcc11.sh @@ -34,7 +34,8 @@ sudo apt-get install -y \ cuda-nvml-dev-11-2 \ cuda-nvtx-11-2 \ libcurand-dev-11-2 \ - libcufft-dev-11-2 + libcufft-dev-11-2 \ + libcusparse-dev-11-2 sudo ln -s cuda-11.2 /usr/local/cuda From af2bec369aaba90bc17aaee1f7fe6f6b71d7d099 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 10 Dec 2024 09:40:21 -0800 Subject: [PATCH 122/151] bugfix; fix broken regression tests --- src_compressible_stag/writePlotFileStag.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_compressible_stag/writePlotFileStag.cpp b/src_compressible_stag/writePlotFileStag.cpp index 820c4cd8e..4a3742b3e 100644 --- a/src_compressible_stag/writePlotFileStag.cpp +++ b/src_compressible_stag/writePlotFileStag.cpp @@ -459,7 +459,7 @@ void WritePlotFileStag(int step, } - if (plot_deltaY_dir != 1) { + if (plot_deltaY_dir != -1) { x = "deltaYk_"; for (i=0; i Date: Tue, 10 Dec 2024 09:44:43 -0800 Subject: [PATCH 123/151] include Src/FFT --- exec/compressible_stag/GNUmakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 123e6ad18..e08b1d380 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -76,6 +76,7 @@ else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) endif include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/FFT/Make.package include ../../src_analysis/Make.package VPATH_LOCATIONS += ../../src_analysis/ From b85480884fe02890f4e0f2f16a27ec191c2763f5 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 12 Dec 2024 09:11:46 -0800 Subject: [PATCH 124/151] fix compilation of hydro with AMReX_FFT --- exec/hydro/GNUmakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/exec/hydro/GNUmakefile b/exec/hydro/GNUmakefile index 1a81d9633..d628bf7e3 100644 --- a/exec/hydro/GNUmakefile +++ b/exec/hydro/GNUmakefile @@ -39,6 +39,7 @@ VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/FFT/Make.package include $(AMREX_HOME)/Src/Boundary/Make.package include $(AMREX_HOME)/Src/LinearSolvers/MLMG/Make.package From b83bb535ea828e1af5b0b650ba779812ae840d27 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 12 Dec 2024 09:18:37 -0800 Subject: [PATCH 125/151] USE_FFT=TRUE in GNUmakefile --- exec/hydro/GNUmakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exec/hydro/GNUmakefile b/exec/hydro/GNUmakefile index d628bf7e3..15b0b9824 100644 --- a/exec/hydro/GNUmakefile +++ b/exec/hydro/GNUmakefile @@ -8,7 +8,7 @@ USE_OMP = FALSE USE_CUDA = FALSE COMP = gnu DIM = 3 -MAX_SPEC = 8 +USE_FFT = TRUE TINY_PROFILE = FALSE From 9e883753cdc7acce8ea2c1016f12e580b0713082 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 12 Dec 2024 11:32:20 -0800 Subject: [PATCH 126/151] get multispec compiling --- exec/multispec/GNUmakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/exec/multispec/GNUmakefile b/exec/multispec/GNUmakefile index ba60c98d5..11ca6c596 100644 --- a/exec/multispec/GNUmakefile +++ b/exec/multispec/GNUmakefile @@ -48,6 +48,7 @@ VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/FFT/Make.package include $(AMREX_HOME)/Src/Boundary/Make.package include $(AMREX_HOME)/Src/LinearSolvers/MLMG/Make.package include $(AMREX_HOME)/Src/AmrCore/Make.package From fc4cec878a7ac88b217249c904cc7af4679b49b4 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 12 Dec 2024 11:55:13 -0800 Subject: [PATCH 127/151] fix compilation USE_FFT=TRUE --- exec/multispec/GNUmakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/exec/multispec/GNUmakefile b/exec/multispec/GNUmakefile index 11ca6c596..5a4c366e6 100644 --- a/exec/multispec/GNUmakefile +++ b/exec/multispec/GNUmakefile @@ -10,6 +10,7 @@ COMP = gnu DIM = 3 DSMC = FALSE MAX_SPEC = 8 +USE_FFT = TRUE # MAX_ELEM needs to be MAX_SPEC*(MAX_SPEC-1)/2 MAX_ELEM = 28 From 7cc1c02441b991ec8b35adb14601c7e455a63823 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 12 Dec 2024 13:20:19 -0800 Subject: [PATCH 128/151] remove blank file --- src_analysis/TurbSpectra.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src_analysis/TurbSpectra.cpp diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp deleted file mode 100644 index e69de29bb..000000000 From dc29a33f85d9a9335b9b1dcfa0dc4475ebe0b131 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Thu, 12 Dec 2024 15:19:09 -0800 Subject: [PATCH 129/151] get examples compiling --- exec/compressible/GNUmakefile | 1 + exec/compressible_stag/GNUmakefile | 2 +- exec/hydro/GNUmakefile | 1 + exec/immersedIons/GNUmakefile | 1 + exec/multispec/GNUmakefile | 5 ++--- exec/structFactTest/GNUmakefile | 4 ++-- src_compressible_stag/main_driver.cpp | 4 +--- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/exec/compressible/GNUmakefile b/exec/compressible/GNUmakefile index cd7ee153f..5cee2876a 100644 --- a/exec/compressible/GNUmakefile +++ b/exec/compressible/GNUmakefile @@ -11,6 +11,7 @@ DIM = 3 TINY_PROFILE = FALSE MAX_SPEC = 8 MAX_REAC = 5 +USE_FFT = TRUE USE_PARTICLES = FALSE diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index e08b1d380..1c7de9faf 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -12,6 +12,7 @@ DIM = 3 TINY_PROFILE = FALSE MAX_SPEC = 8 MAX_REAC = 5 +USE_FFT = TRUE USE_PARTICLES = FALSE DO_TURB = FALSE @@ -76,7 +77,6 @@ else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) endif include $(AMREX_HOME)/Src/Base/Make.package -include $(AMREX_HOME)/Src/FFT/Make.package include ../../src_analysis/Make.package VPATH_LOCATIONS += ../../src_analysis/ diff --git a/exec/hydro/GNUmakefile b/exec/hydro/GNUmakefile index 15b0b9824..0f6b98117 100644 --- a/exec/hydro/GNUmakefile +++ b/exec/hydro/GNUmakefile @@ -8,6 +8,7 @@ USE_OMP = FALSE USE_CUDA = FALSE COMP = gnu DIM = 3 +MAX_SPEC = 2 USE_FFT = TRUE TINY_PROFILE = FALSE diff --git a/exec/immersedIons/GNUmakefile b/exec/immersedIons/GNUmakefile index c2ea936ae..709882051 100644 --- a/exec/immersedIons/GNUmakefile +++ b/exec/immersedIons/GNUmakefile @@ -12,6 +12,7 @@ COMP = gnu DIM = 3 DSMC = FALSE MAX_SPEC = 8 +USE_FFT = TRUE TINY_PROFILE = FALSE USE_PARTICLES = TRUE diff --git a/exec/multispec/GNUmakefile b/exec/multispec/GNUmakefile index 5a4c366e6..5cc88aa1a 100644 --- a/exec/multispec/GNUmakefile +++ b/exec/multispec/GNUmakefile @@ -9,10 +9,10 @@ USE_CUDA = FALSE COMP = gnu DIM = 3 DSMC = FALSE -MAX_SPEC = 8 +MAX_SPEC = 8 USE_FFT = TRUE # MAX_ELEM needs to be MAX_SPEC*(MAX_SPEC-1)/2 -MAX_ELEM = 28 +MAX_ELEM = 28 TINY_PROFILE = FALSE USE_PARTICLES = FALSE @@ -49,7 +49,6 @@ VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ include $(AMREX_HOME)/Src/Base/Make.package -include $(AMREX_HOME)/Src/FFT/Make.package include $(AMREX_HOME)/Src/Boundary/Make.package include $(AMREX_HOME)/Src/LinearSolvers/MLMG/Make.package include $(AMREX_HOME)/Src/AmrCore/Make.package diff --git a/exec/structFactTest/GNUmakefile b/exec/structFactTest/GNUmakefile index b39f97814..af26559ab 100644 --- a/exec/structFactTest/GNUmakefile +++ b/exec/structFactTest/GNUmakefile @@ -8,7 +8,8 @@ USE_OMP = FALSE USE_CUDA = FALSE COMP = gnu DIM = 3 -MAX_SPEC = 8 +MAX_SPEC = 2 +USE_FFT = TRUE TINY_PROFILE = FALSE @@ -21,7 +22,6 @@ include ../../src_analysis/Make.package VPATH_LOCATIONS += ../../src_analysis/ INCLUDE_LOCATIONS += ../../src_analysis/ - include ../../src_common/Make.package VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 581c2ec32..a21e222fb 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -1304,9 +1304,7 @@ void main_driver(const char* argv) Vector< std::string > var_names_turbScalar{"rho","temp","press"}; Vector scaling_turb_scalar(3, dVolinv); TurbSpectrumScalar(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); - } - - if (turbForcing > 0) { + EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp); EvaluateWritePlotFileVelGradTiny(step, time, geom, vel, vel_decomp); } From c28f5ae86baba3df58af9b85e2424340fd01809b Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 08:06:16 -0800 Subject: [PATCH 130/151] remove the single and heffte versions of turbulence code - use only amrex::FFT now --- src_analysis/Make.package | 9 +- src_analysis/TurbSpectra.H | 62 +- ...pectra_distributed.cpp => TurbSpectra.cpp} | 0 src_analysis/TurbSpectra_distributed.H | 52 - src_analysis/TurbSpectra_heffte.H | 53 - src_analysis/TurbSpectra_heffte.cpp | 749 ------------ src_analysis/TurbSpectra_single.H | 68 -- src_analysis/TurbSpectra_single.cpp | 1043 ----------------- 8 files changed, 52 insertions(+), 1984 deletions(-) rename src_analysis/{TurbSpectra_distributed.cpp => TurbSpectra.cpp} (100%) delete mode 100644 src_analysis/TurbSpectra_distributed.H delete mode 100644 src_analysis/TurbSpectra_heffte.H delete mode 100644 src_analysis/TurbSpectra_heffte.cpp delete mode 100644 src_analysis/TurbSpectra_single.H delete mode 100644 src_analysis/TurbSpectra_single.cpp diff --git a/src_analysis/Make.package b/src_analysis/Make.package index 5607d4de9..a2c26cceb 100644 --- a/src_analysis/Make.package +++ b/src_analysis/Make.package @@ -2,11 +2,4 @@ CEXE_headers += StructFact.H CEXE_sources += StructFact.cpp CEXE_headers += TurbSpectra.H -CEXE_headers += TurbSpectra_distributed.H -CEXE_sources += TurbSpectra_distributed.cpp - -#CEXE_sources += TurbSpectra_single.cpp -#CEXE_sources += TurbSpectra_heffte.cpp - -#CEXE_headers += TurbSpectra_single.H -#CEXE_headers += TurbSpectra_heffte.H +CEXE_sources += TurbSpectra.cpp diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H index c12214552..419756555 100644 --- a/src_analysis/TurbSpectra.H +++ b/src_analysis/TurbSpectra.H @@ -1,12 +1,52 @@ -#ifndef _TurbSpectra_H_ -#define _TurbSpectra_H_ - -#include -//#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // use heFFTe -//#include -//#elif defined(USE_DISTRIBUTED_FFT) // use single grid FFT -//#include -//#else // use single grid FFT -//#include -//#endif +#ifndef _TurbSpectraDistributed_H_ +#define _TurbSpectraDistributed_H_ + +#include +#include +#include +#include + +#include + +#include + +#include "common_functions.H" + +#define ALIGN 16 + +using namespace amrex; + + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const Real& sqrtnpts, +// const int& step); +void IntegrateKVelocity(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, +// const BaseFab >& spectral_fieldy, +// const BaseFab >& spectral_fieldz, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const int& step); +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); + + #endif diff --git a/src_analysis/TurbSpectra_distributed.cpp b/src_analysis/TurbSpectra.cpp similarity index 100% rename from src_analysis/TurbSpectra_distributed.cpp rename to src_analysis/TurbSpectra.cpp diff --git a/src_analysis/TurbSpectra_distributed.H b/src_analysis/TurbSpectra_distributed.H deleted file mode 100644 index 419756555..000000000 --- a/src_analysis/TurbSpectra_distributed.H +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef _TurbSpectraDistributed_H_ -#define _TurbSpectraDistributed_H_ - -#include -#include -#include -#include - -#include - -#include - -#include "common_functions.H" - -#define ALIGN 16 - -using namespace amrex; - - -void IntegrateKScalar(const MultiFab& cov_mag, - const std::string& name, - const int& step, - const int& comp); -//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, -// const std::string& name, const Real& scaling, -// const Box& c_local_box, -// const Real& sqrtnpts, -// const int& step); -void IntegrateKVelocity(const MultiFab& cov_mag, - const std::string& name, - const int& step, - const int& comp); -//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, -// const BaseFab >& spectral_fieldy, -// const BaseFab >& spectral_fieldz, -// const std::string& name, const Real& scaling, -// const Box& c_local_box, -// const int& step); -void TurbSpectrumScalar(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& var_scaling, - const amrex::Vector< std::string >& var_names); -void TurbSpectrumVelDecomp(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& var_scaling, - const amrex::Vector< std::string >& var_names); - - -#endif diff --git a/src_analysis/TurbSpectra_heffte.H b/src_analysis/TurbSpectra_heffte.H deleted file mode 100644 index 81e173d2c..000000000 --- a/src_analysis/TurbSpectra_heffte.H +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef _TurbSpectraHeffte_H_ -#define _TurbSpectraHeffte_H_ - -#include - -#include -#include -#include -#include - -#include - -#include - -#include "common_functions.H" - -#define ALIGN 16 - -using namespace amrex; - -void IntegrateKScalar(const MultiFab& cov_mag, - const std::string& name, - const int& step, - const int& comp); -//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, -// const std::string& name, const Real& scaling, -// const Box& c_local_box, -// const Real& sqrtnpts, -// const int& step); -void IntegrateKVelocity(const MultiFab& cov_mag, - const std::string& name, - const int& step, - const int& comp); -//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, -// const BaseFab >& spectral_fieldy, -// const BaseFab >& spectral_fieldz, -// const std::string& name, const Real& scaling, -// const Box& c_local_box, -// const int& step); -void TurbSpectrumScalar(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& var_scaling, - const amrex::Vector< std::string >& var_names); -void TurbSpectrumVelDecomp(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& var_scaling, - const amrex::Vector< std::string >& var_names); - - -#endif diff --git a/src_analysis/TurbSpectra_heffte.cpp b/src_analysis/TurbSpectra_heffte.cpp deleted file mode 100644 index 90cc2615e..000000000 --- a/src_analysis/TurbSpectra_heffte.cpp +++ /dev/null @@ -1,749 +0,0 @@ -#include "TurbSpectra.H" -#include "common_functions.H" - -#include -#include "AMReX_PlotFileUtil.H" -#include "AMReX_BoxArray.H" - -void TurbSpectrumScalar(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), - "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), - "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, - "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); - - int ncomp = variables.nComp(); - - long npts; - Box domain = geom.Domain(); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - Real sqrtnpts = std::sqrt(npts); - - // get box array and distribution map of variables - DistributionMapping dm = variables.DistributionMap(); - BoxArray ba = variables.boxArray(); - - // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid - Box local_box; - int local_boxid; - { - for (int i = 0; i < ba.size(); ++i) { - Box b = ba[i]; - // each MPI rank has its own local_box Box and local_boxid ID - if (ParallelDescriptor::MyProc() == dm[i]) { - local_box = b; - local_boxid = i; - } - } - } - - // now each MPI rank works on its own box - // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset - - // start by coarsening each box by 2 in the x-direction - Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); - - // if the coarsened box's high-x index is even, we shrink the size in 1 in x - // this avoids overlap between coarsened boxes - if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { - c_local_box.setBig(0,c_local_box.bigEnd(0)-1); - } - // for any boxes that touch the hi-x domain we - // increase the size of boxes by 1 in x - // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) - if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { - c_local_box.growHi(0,1); - } - - // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB - // create a BoxArray containing the fft boxes - // by construction, these boxes correlate to the associated spectral_data - // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank - BoxArray fft_ba; - { - BoxList bl; - bl.reserve(ba.size()); - - for (int i = 0; i < ba.size(); ++i) { - Box b = ba[i]; - - Box r_box = b; - Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); - - // this avoids overlap for the cases when one or more r_box's - // have an even cell index in the hi-x cell - if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { - c_box.setBig(0,c_box.bigEnd(0)-1); - } - - // increase the size of boxes touching the hi-x domain by 1 in x - // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing - if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { - c_box.growHi(0,1); - } - bl.push_back(c_box); - - } - fft_ba.define(std::move(bl)); - } - MultiFab cov(fft_ba, dm, ncomp, 0); - - // each MPI rank gets storage for its piece of the fft - BaseFab > spectral_field(c_local_box, 1, The_Device_Arena()); - MultiFab variables_single(ba, dm, 1, 0); - using heffte_complex = typename heffte::fft_output::type; - - int r2c_direction = 0; - for (int comp=0; comp fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); - variables_single.ParallelCopy(variables,comp,0,1); - fft.forward(variables_single[local_boxid].dataPtr(),spectral_data); - Gpu::streamSynchronize(); - - // Fill in the covariance multifab - int comp_gpu = comp; - Real sqrtnpts_gpu = sqrtnpts; - Real scaling_i_gpu = scaling[comp]; - std::string name_gpu = var_names[comp]; - for (MFIter mfi(cov); mfi.isValid(); ++mfi) { - Array4 const& data = cov.array(mfi); - Array4 > spectral = spectral_field.const_array(); - const Box& bx = mfi.fabbox(); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - Real re = spectral(i,j,k).real(); - Real im = spectral(i,j,k).imag(); - data(i,j,k,comp_gpu) = (re*re + im*im)/(sqrtnpts_gpu*sqrtnpts_gpu*scaling_i_gpu); - }); - } - - // Integrate spectra over k-shells - IntegrateKScalar(cov,name_gpu,step,comp_gpu); - } -} - -void TurbSpectrumVelDecomp(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, - "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, - "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, - "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); - - const GpuArray dx = geom.CellSizeArray(); - - long npts; - Box domain = geom.Domain(); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - Real sqrtnpts = std::sqrt(npts); - - // get box array and distribution map of vel - DistributionMapping dm = vel.DistributionMap(); - BoxArray ba = vel.boxArray(); - - // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid - Box local_box; - int local_boxid; - { - for (int i = 0; i < ba.size(); ++i) { - Box b = ba[i]; - // each MPI rank has its own local_box Box and local_boxid ID - if (ParallelDescriptor::MyProc() == dm[i]) { - local_box = b; - local_boxid = i; - } - } - } - - // now each MPI rank works on its own box - // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset - - // start by coarsening each box by 2 in the x-direction - Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); - - // if the coarsened box's high-x index is even, we shrink the size in 1 in x - // this avoids overlap between coarsened boxes - if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { - c_local_box.setBig(0,c_local_box.bigEnd(0)-1); - } - // for any boxes that touch the hi-x domain we - // increase the size of boxes by 1 in x - // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) - if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { - c_local_box.growHi(0,1); - } - - // each MPI rank gets storage for its piece of the fft - BaseFab > spectral_field_Tx(c_local_box, 1, The_Device_Arena()); // totalx - BaseFab > spectral_field_Ty(c_local_box, 1, The_Device_Arena()); // totaly - BaseFab > spectral_field_Tz(c_local_box, 1, The_Device_Arena()); // totalz - BaseFab > spectral_field_Sx(c_local_box, 1, The_Device_Arena()); // solenoidalx - BaseFab > spectral_field_Sy(c_local_box, 1, The_Device_Arena()); // solenoidaly - BaseFab > spectral_field_Sz(c_local_box, 1, The_Device_Arena()); // solenoidalz - BaseFab > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx - BaseFab > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly - BaseFab > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz - MultiFab vel_single(ba, dm, 1, 0); - - int r2c_direction = 0; - - // ForwardTransform - // X - using heffte_complex = typename heffte::fft_output::type; - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - vel_single.ParallelCopy(vel, 0, 0, 1); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - } - // Y - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - vel_single.ParallelCopy(vel, 1, 0, 1); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - } - // Z - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - vel_single.ParallelCopy(vel, 2, 0, 1); - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); - fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); - } - - Gpu::streamSynchronize(); - - // Decompose velocity field into solenoidal and dilatational - Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(); - Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(); - Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(); - Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(); - Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(); - Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(); - Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(); - Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(); - Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(); - ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) - { - - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - - Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; - - if (i <= nx/2) { - - // Get the wavevector - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - // Gradient Operators - GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; - } - else { // conjugate - amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); - } - - // Scale Total velocity FFT components - spectral_tx(i,j,k) *= (1.0/sqrtnpts); - spectral_ty(i,j,k) *= (1.0/sqrtnpts); - spectral_tz(i,j,k) *= (1.0/sqrtnpts); - - // Inverse Laplacian - Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; - - // Divergence of vel - Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + - spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + - spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; - Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + - spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + - spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; - - if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k) *= 0.0; - spectral_dy(i,j,k) *= 0.0; - spectral_dz(i,j,k) *= 0.0; - } - else { - - // Dilatational velocity - GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, - (divC*GxR - divR*GxC) / Lap); - spectral_dx(i,j,k) = copy_dx; - - GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, - (divC*GyR - divR*GyC) / Lap); - spectral_dy(i,j,k) = copy_dy; - - GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, - (divC*GzR - divR*GzC) / Lap); - spectral_dz(i,j,k) = copy_dz; - } - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); - - }); - - Gpu::streamSynchronize(); - - // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB - // create a BoxArray containing the fft boxes - // by construction, these boxes correlate to the associated spectral_data - // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank - BoxArray fft_ba; - { - BoxList bl; - bl.reserve(ba.size()); - - for (int i = 0; i < ba.size(); ++i) { - Box b = ba[i]; - - Box r_box = b; - Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); - - // this avoids overlap for the cases when one or more r_box's - // have an even cell index in the hi-x cell - if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { - c_box.setBig(0,c_box.bigEnd(0)-1); - } - - // increase the size of boxes touching the hi-x domain by 1 in x - // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing - if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { - c_box.growHi(0,1); - } - bl.push_back(c_box); - - } - fft_ba.define(std::move(bl)); - } - MultiFab cov(fft_ba, dm, 3, 0); // total, solenoidal, dilatational - - // Fill in the covariance multifab - Real sqrtnpts_gpu = sqrtnpts; - Real scaling_gpu = scaling; - for (MFIter mfi(cov); mfi.isValid(); ++mfi) { - Array4 const& data = cov.array(mfi); - Array4 > spec_tx = spectral_field_Tx.const_array(); - Array4 > spec_ty = spectral_field_Ty.const_array(); - Array4 > spec_tz = spectral_field_Tz.const_array(); - Array4 > spec_sx = spectral_field_Sx.const_array(); - Array4 > spec_sy = spectral_field_Sy.const_array(); - Array4 > spec_sz = spectral_field_Sz.const_array(); - Array4 > spec_dx = spectral_field_Dx.const_array(); - Array4 > spec_dy = spectral_field_Dy.const_array(); - Array4 > spec_dz = spectral_field_Dz.const_array(); - const Box& bx = mfi.fabbox(); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - Real re_x, re_y, re_z, im_x, im_y, im_z; - - re_x = spec_tx(i,j,k).real(); - im_x = spec_tx(i,j,k).imag(); - re_y = spec_ty(i,j,k).real(); - im_y = spec_ty(i,j,k).imag(); - re_z = spec_tz(i,j,k).real(); - im_z = spec_tz(i,j,k).imag(); - data(i,j,k,0) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); - re_x = spec_sx(i,j,k).real(); - im_x = spec_sx(i,j,k).imag(); - re_y = spec_sy(i,j,k).real(); - im_y = spec_sy(i,j,k).imag(); - re_z = spec_sz(i,j,k).real(); - im_z = spec_sz(i,j,k).imag(); - data(i,j,k,1) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); - re_x = spec_dx(i,j,k).real(); - im_x = spec_dx(i,j,k).imag(); - re_y = spec_dy(i,j,k).real(); - im_y = spec_dy(i,j,k).imag(); - re_z = spec_dz(i,j,k).real(); - im_z = spec_dz(i,j,k).imag(); - data(i,j,k,2) = (re_x*re_x + im_x*im_x + - re_y*re_y + im_y*im_y + - re_z*re_z + im_z*im_z)/(scaling_gpu); - }); - } - - // Integrate K spectrum for velocities - IntegrateKVelocity(cov,"vel_total" ,step,0); - IntegrateKVelocity(cov,"vel_solenoidal",step,1); - IntegrateKVelocity(cov,"vel_dilational",step,2); - - MultiFab vel_decomp_single(ba, dm, 1, 0); - // inverse Fourier transform solenoidal and dilatational components - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); - } - { -#if defined(HEFFTE_CUFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_ROCFFT) - heffte::fft3d_r2c fft -#elif defined(HEFFTE_FFTW) - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, - {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, - r2c_direction, ParallelDescriptor::Communicator()); - - heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); - fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); - - Gpu::streamSynchronize(); - vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); - } - - - vel_decomp.mult(1.0/sqrtnpts); - -} - -void IntegrateKScalar(const MultiFab& cov_mag, - const std::string& name, - const int& step, - const int& comp) - -{ - int npts = n_cells[0]/2; - - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); -// Gpu::HostVector phisum_host(npts); -// Gpu::HostVector phicnt_host(npts); - - Gpu::HostVector phisum_host(npts); - - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); -// for (int d=0; d & cov = cov_mag.const_array(mfi); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); - amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); - } - }); - } - - Gpu::streamSynchronize(); - - ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); - - Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - if (d != 0) { - phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; - } - }); - - Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); - Gpu::streamSynchronize(); - - if (ParallelDescriptor::IOProcessor()) { - std::ofstream turb; - std::string turbBaseName = "turb_"+name; - std::string turbName = Concatenate(turbBaseName,step,7); - turbName += ".txt"; - - turb.open(turbName); - for (int d=1; d phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - - Gpu::HostVector phisum_host(npts); - - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - - int comp_gpu = comp; - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.tilebox(); - - const Array4 & cov = cov_mag.const_array(mfi); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); - amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); - } - }); - } - - Gpu::streamSynchronize(); - - ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); - ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); - - Real dk = 1.; - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - if (d != 0) { - phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; - } - }); - - Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); - Gpu::streamSynchronize(); - - if (ParallelDescriptor::IOProcessor()) { - std::ofstream turb; - std::string turbBaseName = "turb_"+name; - std::string turbName = Concatenate(turbBaseName,step,7); - turbName += ".txt"; - - turb.open(turbName); - for (int d=1; d -#elif AMREX_USE_HIP -# if __has_include() // ROCm 5.3+ -# include -# else -# include -# endif -#else -#include -#include -#endif - -#include -#include -#include -#include - -#include - -#include - -#include "common_functions.H" - -#define ALIGN 16 - -using namespace amrex; - -#ifdef AMREX_USE_CUDA -std::string cufftError (const cufftResult& err); -#endif -#ifdef AMREX_USE_HIP -std::string rocfftError (const rocfft_status err); -void Assert_rocfft_status (std::string const& name, rocfft_status status); -#endif - -void IntegrateKScalar(const Vector > > >& spectral_field, - const MultiFab& variables_onegrid, - const std::string& name, - const Real& scaling, - const Real& sqrtnpts, - const int& step); -void IntegrateKVelocity(const Vector > > >& spectral_fieldx, - const Vector > > >& spectral_fieldy, - const Vector > > >& spectral_fieldz, - const MultiFab& vel_onegrid, - const std::string& name, - const Real& scaling, - const int& step); -void TurbSpectrumScalar(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& var_scaling, - const amrex::Vector< std::string >& var_names); -void TurbSpectrumVelDecomp(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& var_scaling, - const amrex::Vector< std::string >& var_names); -void InverseFFTVel(Vector > > >& spectral_field, - MultiFab& vel_decomp_onegrid, const IntVect& fft_size); - - -#endif diff --git a/src_analysis/TurbSpectra_single.cpp b/src_analysis/TurbSpectra_single.cpp deleted file mode 100644 index b6bccc029..000000000 --- a/src_analysis/TurbSpectra_single.cpp +++ /dev/null @@ -1,1043 +0,0 @@ -#include "TurbSpectra.H" -#include "common_functions.H" - -#include -#include "AMReX_PlotFileUtil.H" -#include "AMReX_BoxArray.H" - -#ifdef AMREX_USE_CUDA -std::string cufftError (const cufftResult& err) -{ - switch (err) { - case CUFFT_SUCCESS: return "CUFFT_SUCCESS"; - case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN"; - case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED"; - case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE"; - case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE"; - case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR"; - case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED"; - case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED"; - case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE"; - case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA"; - default: return std::to_string(err) + " (unknown error code)"; - } -} -#endif - -#ifdef AMREX_USE_HIP -std::string rocfftError (const rocfft_status err) -{ - if (err == rocfft_status_success) { - return std::string("rocfft_status_success"); - } else if (err == rocfft_status_failure) { - return std::string("rocfft_status_failure"); - } else if (err == rocfft_status_invalid_arg_value) { - return std::string("rocfft_status_invalid_arg_value"); - } else if (err == rocfft_status_invalid_dimensions) { - return std::string("rocfft_status_invalid_dimensions"); - } else if (err == rocfft_status_invalid_array_type) { - return std::string("rocfft_status_invalid_array_type"); - } else if (err == rocfft_status_invalid_strides) { - return std::string("rocfft_status_invalid_strides"); - } else if (err == rocfft_status_invalid_distance) { - return std::string("rocfft_status_invalid_distance"); - } else if (err == rocfft_status_invalid_offset) { - return std::string("rocfft_status_invalid_offset"); - } else { - return std::to_string(err) + " (unknown error code)"; - } -} - -void Assert_rocfft_status (std::string const& name, rocfft_status status) -{ - if (status != rocfft_status_success) { - amrex::AllPrint() << name + " failed! Error: " + rocfftError(status) << "\n";; - } -} -#endif - -void TurbSpectrumScalar(const MultiFab& variables, - const amrex::Geometry& geom, - const int& step, - const amrex::Vector& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); - int ncomp = variables.nComp(); - - long npts; - - // Initialize the boxarray "ba_onegrid" from the single box "domain" - BoxArray ba_onegrid; - { - Box domain = geom.Domain(); - ba_onegrid.define(domain); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - } - Real sqrtnpts = std::sqrt(npts); - DistributionMapping dmap_onegrid(ba_onegrid); - MultiFab variables_onegrid; - variables_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - -#ifdef AMREX_USE_CUDA - using FFTplan = cufftHandle; - using FFTcomplex = cuDoubleComplex; -#elif AMREX_USE_HIP - using FFTplan = rocfft_plan; - using FFTcomplex = double2; -#else - using FFTplan = fftw_plan; - using FFTcomplex = fftw_complex; -#endif - - // size of box including ghost cell range - IntVect fft_size; - - // contain to store FFT - note it is shrunk by "half" in x - Vector > > > spectral_field; - Vector forward_plan; - bool built_plan = false; - - // for CUDA builds we only need to build the plan once; track whether we did - for (int comp=0; comp >(spectral_bx,1, - The_Device_Arena())); - spectral_field.back()->setVal(0.0); // touch the memory - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - - built_plan = true; - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &variables_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // Integrate spectra over k-shells - IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } -} - -void TurbSpectrumVelDecomp(const MultiFab& vel, - MultiFab& vel_decomp, - const amrex::Geometry& geom, - const int& step, - const amrex::Real& scaling, - const amrex::Vector< std::string >& var_names) -{ - BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, - "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, - "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); - const GpuArray dx = geom.CellSizeArray(); - - long npts; - - // Initialize the boxarray "ba_onegrid" from the single box "domain" - BoxArray ba_onegrid; - { - Box domain = geom.Domain(); - ba_onegrid.define(domain); - npts = (domain.length(0)*domain.length(1)*domain.length(2)); - } - Real sqrtnpts = std::sqrt(npts); - DistributionMapping dmap_onegrid(ba_onegrid); - MultiFab vel_onegrid; - vel_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - -#ifdef AMREX_USE_CUDA - using FFTplan = cufftHandle; - using FFTcomplex = cuDoubleComplex; -#elif AMREX_USE_HIP - using FFTplan = rocfft_plan; - using FFTcomplex = double2; -#else - using FFTplan = fftw_plan; - using FFTcomplex = fftw_complex; -#endif - - // size of box including ghost cell range - IntVect fft_size; - - // contain to store FFT - note it is shrunk by "half" in x - Vector > > > spectral_fieldx; - Vector > > > spectral_fieldy; - Vector > > > spectral_fieldz; - Vector > > > spectral_field_Sx; - Vector > > > spectral_field_Sy; - Vector > > > spectral_field_Sz; - Vector > > > spectral_field_Dx; - Vector > > > spectral_field_Dy; - Vector > > > spectral_field_Dz; - - // x-velocity - { - Vector forward_plan; - vel_onegrid.ParallelCopy(vel,0,0,1); - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); - - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT - - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; - - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - - spectral_fieldx.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_fieldx.back()->setVal(0.0); // touch the memory - - spectral_field_Sx.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Sx.back()->setVal(0.0); // touch the memory - - spectral_field_Dx.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Dx.back()->setVal(0.0); // touch the memory - - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldx.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldx[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldx[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &vel_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } - - } // end x-vel - - // y-velocity - { - Vector forward_plan; - vel_onegrid.ParallelCopy(vel,1,0,1); - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); - - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT - - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; - - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - - spectral_fieldy.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_fieldy.back()->setVal(0.0); // touch the memory - - spectral_field_Sy.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Sy.back()->setVal(0.0); // touch the memory - - spectral_field_Dy.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Dy.back()->setVal(0.0); // touch the memory - - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldy.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldy[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldy[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &vel_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } - - } // end y-vel - - // z-velocity - { - Vector forward_plan; - vel_onegrid.ParallelCopy(vel,2,0,1); - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - - // grab a single box including ghost cell range - Box realspace_bx = mfi.fabbox(); - - // size of box including ghost cell range - fft_size = realspace_bx.length(); // This will be different for hybrid FFT - - // this is the size of the box, except the 0th component is 'halved plus 1' - IntVect spectral_bx_size = fft_size; - spectral_bx_size[0] = fft_size[0]/2 + 1; - - // spectral box - Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); - - spectral_fieldz.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_fieldz.back()->setVal(0.0); // touch the memory - - spectral_field_Sz.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Sz.back()->setVal(0.0); // touch the memory - - spectral_field_Dz.emplace_back(new BaseFab >(spectral_bx,1, - The_Device_Arena())); - spectral_field_Dz.back()->setVal(0.0); // touch the memory - - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldz.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - forward_plan.push_back(fplan); - } - - ParallelDescriptor::Barrier(); - - // ForwardTransform - for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - vel_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_fieldz[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldz[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &vel_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } - - } // end x-vel - - - // Decompose velocity field into solenoidal and dilatational - for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.tilebox(); - Array4< GpuComplex > spectral_tx = (*spectral_fieldx[0]) .array(); - Array4< GpuComplex > spectral_ty = (*spectral_fieldy[0]) .array(); - Array4< GpuComplex > spectral_tz = (*spectral_fieldz[0]) .array(); - Array4< GpuComplex > spectral_sx = (*spectral_field_Sx[0]).array(); - Array4< GpuComplex > spectral_sy = (*spectral_field_Sy[0]).array(); - Array4< GpuComplex > spectral_sz = (*spectral_field_Sz[0]).array(); - Array4< GpuComplex > spectral_dx = (*spectral_field_Dx[0]).array(); - Array4< GpuComplex > spectral_dy = (*spectral_field_Dy[0]).array(); - Array4< GpuComplex > spectral_dz = (*spectral_field_Dz[0]).array(); - - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - - Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; - - if (i <= nx/2) { - - // Get the wavevector - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - // Gradient Operators - GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; - - // Scale Total velocity FFT components - spectral_tx(i,j,k) *= (1.0/sqrtnpts); - spectral_ty(i,j,k) *= (1.0/sqrtnpts); - spectral_tz(i,j,k) *= (1.0/sqrtnpts); - - // Inverse Laplacian - Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; - - // Divergence of vel - Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + - spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + - spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; - Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + - spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + - spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; - - if (Lap < 1.0e-12) { // zero mode for no bulk motion - spectral_dx(i,j,k) *= 0.0; - spectral_dy(i,j,k) *= 0.0; - spectral_dz(i,j,k) *= 0.0; - } - else { - // Dilatational velocity - GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, - (divC*GxR - divR*GxC) / Lap); - spectral_dx(i,j,k) = copy_dx; - - GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, - (divC*GyR - divR*GyC) / Lap); - spectral_dy(i,j,k) = copy_dy; - - GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, - (divC*GzR - divR*GzC) / Lap); - spectral_dz(i,j,k) = copy_dz; - } - - // Solenoidal velocity - spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); - spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); - spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); - } - }); - } - - ParallelDescriptor::Barrier(); - - // Integrate K spectrum for velocities - IntegrateKVelocity(spectral_fieldx, spectral_fieldy, spectral_fieldz, vel_onegrid, "vel_total" ,scaling,step); - IntegrateKVelocity(spectral_field_Sx, spectral_field_Sy, spectral_field_Sz, vel_onegrid, "vel_solenoidal",scaling,step); - IntegrateKVelocity(spectral_field_Dx, spectral_field_Dy, spectral_field_Dz, vel_onegrid, "vel_dilatational",scaling,step); - - - // Inverse Solenoidal and Dilatational Velocity Components - { // solenoidal x - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Sx, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,0,1); - } - { // solenoidal y - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Sy, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,1,1); - } - { // solenoidal z - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Sz, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,2,1); - } - { // dilatational x - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Dx, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,3,1); - } - { // dilatational y - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Dy, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,4,1); - } - { // dilatational z - MultiFab vel_decomp_onegrid; - vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); - vel_decomp_onegrid.setVal(0.0); - InverseFFTVel(spectral_field_Dz, vel_decomp_onegrid,fft_size); - // copy into external multifab - vel_decomp.ParallelCopy(vel_decomp_onegrid,0,5,1); - } - vel_decomp.mult(1.0/sqrtnpts); -} - -void IntegrateKScalar(const Vector > > >& spectral_field, - const MultiFab& variables_onegrid, - const std::string& name, - const Real& scaling, - const Real& sqrtnpts, - const int& step) - -{ - int npts = n_cells[0]/2; - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - - Gpu::HostVector phisum_host(npts); - - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - // Integrate spectra over k-shells - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.fabbox(); - - const Array4 > spectral = (*spectral_field[0]).const_array(); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - if (i <= bx.length(0)/2) { // only half of kx-domain - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real = spectral(i,j,k).real(); - Real imag = spectral(i,j,k).imag(); - Real cov = (1.0/(scaling*sqrtnpts*sqrtnpts))*(real*real + imag*imag); - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); - } - } - }); - } - - for (int d=1; d > > >& spectral_fieldx, - const Vector > > >& spectral_fieldy, - const Vector > > >& spectral_fieldz, - const MultiFab& vel_onegrid, - const std::string& name, - const Real& scaling, - const int& step) -{ - int npts = n_cells[0]/2; - - Gpu::DeviceVector phisum_device(npts); - Gpu::DeviceVector phicnt_device(npts); - Gpu::HostVector phisum_host(npts); - Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - // Integrate spectra over k-shells - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - - int nx = n_cells[0]; - int ny = n_cells[1]; - int nz = n_cells[2]; - for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.fabbox(); - - const Array4 > spectralx = (*spectral_fieldx[0]).const_array(); - const Array4 > spectraly = (*spectral_fieldy[0]).const_array(); - const Array4 > spectralz = (*spectral_fieldz[0]).const_array(); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - if (i <= bx.length(0)/2) { // only half of kx-domain - int ki = i; - int kj = j; - if (j >= ny/2) kj = ny - j; - int kk = k; - if (k >= nz/2) kk = nz - k; - - Real dist = (ki*ki + kj*kj + kk*kk); - dist = std::sqrt(dist); - - if ( dist <= n_cells[0]/2-0.5) { - dist = dist+0.5; - int cell = int(dist); - Real real, imag, cov_x, cov_y, cov_z, cov; - real = spectralx(i,j,k).real(); - imag = spectralx(i,j,k).imag(); - cov_x = (1.0/scaling)*(real*real + imag*imag); - real = spectraly(i,j,k).real(); - imag = spectraly(i,j,k).imag(); - cov_y = (1.0/scaling)*(real*real + imag*imag); - real = spectralz(i,j,k).real(); - imag = spectralz(i,j,k).imag(); - cov_z = (1.0/scaling)*(real*real + imag*imag); - cov = cov_x + cov_y + cov_z; - amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); - amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); - } - } - }); - } - - for (int d=1; d > > >& spectral_field, - MultiFab& vel_decomp_onegrid, const IntVect& fft_size) -{ - -#ifdef AMREX_USE_CUDA - using FFTplan = cufftHandle; - using FFTcomplex = cuDoubleComplex; -#elif AMREX_USE_HIP - using FFTplan = rocfft_plan; - using FFTcomplex = double2; -#else - using FFTplan = fftw_plan; - using FFTcomplex = fftw_complex; -#endif - - Vector backward_plan; - - for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { - FFTplan fplan; -#ifdef AMREX_USE_CUDA // CUDA - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP // HIP - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_inverse, rocfft_precision_double, - 3, lengths, 1, nullptr); - Assert_rocfft_status("rocfft_plan_create", result); -#else // host - fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], - reinterpret_cast - (spectral_field.back()->dataPtr()), - vel_decomp_onegrid[mfi].dataPtr(), - FFTW_ESTIMATE); -#endif - backward_plan.push_back(fplan); - } - - ParallelDescriptor::Barrier(); - - // Backward Transform - for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecZ2D(backward_plan[i], - reinterpret_cast - (spectral_field[i]->dataPtr()), - vel_decomp_onegrid[mfi].dataPtr()); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftError(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - Assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize); - Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - Assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); - result = rocfft_execute(backward_plan[i], - (void**) &vel_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - Assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - Assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(backward_plan[i]); -#endif - } - - // destroy fft plan - for (int i = 0; i < backward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(backward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(backward_plan[i]); -#else - fftw_destroy_plan(backward_plan[i]); -#endif - } - -} - From 740997f619216f40375b2345cb4cebbaeaee1430 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 08:52:59 -0800 Subject: [PATCH 131/151] beginnings of amrex::FFT - much work to do before it will even compile --- src_analysis/StructFact.H | 20 --- src_analysis/StructFact.cpp | 346 ++++-------------------------------- 2 files changed, 32 insertions(+), 334 deletions(-) diff --git a/src_analysis/StructFact.H b/src_analysis/StructFact.H index fa1c0656e..cefbd6508 100644 --- a/src_analysis/StructFact.H +++ b/src_analysis/StructFact.H @@ -5,32 +5,12 @@ #include #include #include - -// These are for FFTW / cuFFT / rocFFT - -#ifdef AMREX_USE_CUDA -#include -#elif AMREX_USE_HIP -# if __has_include() // ROCm 5.3+ -# include -# else -# include -# endif -#else -#include -#if AMREX_USE_MPI -#include -#endif -#endif - #include #include #include "common_functions.H" -#define ALIGN 16 - using namespace amrex; class StructFact { diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp index 115076ca9..cc71770de 100644 --- a/src_analysis/StructFact.cpp +++ b/src_analysis/StructFact.cpp @@ -5,56 +5,7 @@ #include "AMReX_PlotFileUtil.H" #include "AMReX_BoxArray.H" -#ifdef AMREX_USE_CUDA -std::string cufftErrorToString (const cufftResult& err) -{ - switch (err) { - case CUFFT_SUCCESS: return "CUFFT_SUCCESS"; - case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN"; - case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED"; - case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE"; - case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE"; - case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR"; - case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED"; - case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED"; - case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE"; - case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA"; - default: return std::to_string(err) + " (unknown error code)"; - } -} -#endif - -#ifdef AMREX_USE_HIP -std::string rocfftErrorToString (const rocfft_status err) -{ - if (err == rocfft_status_success) { - return std::string("rocfft_status_success"); - } else if (err == rocfft_status_failure) { - return std::string("rocfft_status_failure"); - } else if (err == rocfft_status_invalid_arg_value) { - return std::string("rocfft_status_invalid_arg_value"); - } else if (err == rocfft_status_invalid_dimensions) { - return std::string("rocfft_status_invalid_dimensions"); - } else if (err == rocfft_status_invalid_array_type) { - return std::string("rocfft_status_invalid_array_type"); - } else if (err == rocfft_status_invalid_strides) { - return std::string("rocfft_status_invalid_strides"); - } else if (err == rocfft_status_invalid_distance) { - return std::string("rocfft_status_invalid_distance"); - } else if (err == rocfft_status_invalid_offset) { - return std::string("rocfft_status_invalid_offset"); - } else { - return std::to_string(err) + " (unknown error code)"; - } -} - -void assert_rocfft_status (std::string const& name, rocfft_status status) -{ - if (status != rocfft_status_success) { - amrex::AllPrint() << name + " failed! Error: " + rocfftErrorToString(status) << "\n";; - } -} -#endif +#include StructFact::StructFact() {} @@ -186,12 +137,6 @@ void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_i } } - /* - for (int n=0; n > > phi_fft(ba_fft, dm_fft, 1, 0); -#ifdef AMREX_USE_CUDA - using FFTplan = cufftHandle; - using FFTcomplex = cuDoubleComplex; -#elif AMREX_USE_HIP - using FFTplan = rocfft_plan; - using FFTcomplex = double2; -#else - using FFTplan = fftw_plan; - using FFTcomplex = fftw_complex; -#endif - - // contain to store FFT - note it is shrunk by "half" in x - Vector > > > spectral_field; - - Vector forward_plan; - - // for CUDA builds we only need to build the plan once; track whether we did - bool built_plan = false; + // create storage for one component of variables + MultiFab phi(ba,dm,1,0); + // we will take one FFT at a time and copy the answer into the + // corresponding component of variables_dft_real/imag for (int comp=0; comp >(spectral_bx,1, - The_Device_Arena())); - spectral_field.back()->setVal(0.0); // touch the memory - - FFTplan fplan; - -#ifdef AMREX_USE_CUDA // CUDA - if (is_flattened) { -#if (AMREX_SPACEDIM == 2) - cufftResult result = cufftPlan1d(&fplan, fft_size[0], CUFFT_D2Z, 1); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan1d forward failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#elif (AMREX_SPACEDIM == 3) - cufftResult result = cufftPlan2d(&fplan, fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan2d forward failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#endif - } else { -#if (AMREX_SPACEDIM == 2) - cufftResult result = cufftPlan2d(&fplan, fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan2d forward failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#elif (AMREX_SPACEDIM == 3) - cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#endif - } -#elif AMREX_USE_HIP // HIP - if (is_flattened) { -#if (AMREX_SPACEDIM == 2) - const std::size_t lengths[] = {std::size_t(fft_size[0])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 1, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); -#elif (AMREX_SPACEDIM == 3) - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 2, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); -#endif - } else { -#if (AMREX_SPACEDIM == 2) - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 2, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); -#elif (AMREX_SPACEDIM == 3) - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, - rocfft_transform_type_real_forward, rocfft_precision_double, - 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); -#endif - } -#else // host - - if (is_flattened) { -#if (AMREX_SPACEDIM == 2) - fplan = fftw_plan_dft_r2c_1d(fft_size[0], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field.back()->dataPtr()), - FFTW_ESTIMATE); -#elif (AMREX_SPACEDIM == 3) - fplan = fftw_plan_dft_r2c_2d(fft_size[1], fft_size[0], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - } else { -#if (AMREX_SPACEDIM == 2) - fplan = fftw_plan_dft_r2c_2d(fft_size[1], fft_size[0], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field.back()->dataPtr()), - FFTW_ESTIMATE); -#elif (AMREX_SPACEDIM == 3) - fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field.back()->dataPtr()), - FFTW_ESTIMATE); -#endif - } -#endif - - forward_plan.push_back(fplan); - } - - built_plan = true; - - } + if (comp_fft == false) continue; - ParallelDescriptor::Barrier(); + // copy component "comp" into a MultiFab with one component + MultiFab::Copy(phi,variables,comp,0,1,0); // ForwardTransform - for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecD2Z(forward_plan[i], - variables_onegrid[mfi].dataPtr(), - reinterpret_cast - (spectral_field[i]->dataPtr())); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - assert_rocfft_status("rocfft_execution_info_create", result); + my_fft.forward(phi,phi_fft); - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); - assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); - result = rocfft_execute(forward_plan[i], - (void**) &variables_onegrid_ptr, // in - (void**) &spectral_field_ptr, // out - execinfo); - assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(forward_plan[i]); -#endif - } + // copy my_fft into a single-grid MultiFab + // copy data to a full-sized MultiFab // this involves copying the complex conjugate from the half-sized field @@ -862,18 +615,6 @@ void StructFact::ComputeFFT(const MultiFab& variables, variables_dft_imag.ParallelCopy(variables_dft_imag_onegrid,0,comp,1); } - - // destroy fft plan - for (int i = 0; i < forward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(forward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(forward_plan[i]); -#else - fftw_destroy_plan(forward_plan[i]); -#endif - } -// fftw_mpi_cleanup(); } void StructFact::InverseFFT(MultiFab& variables, @@ -884,14 +625,6 @@ void StructFact::InverseFFT(MultiFab& variables, BL_PROFILE_VAR("StructFact::InverseFFT()", InverseFFT); -#ifdef AMREX_USE_CUDA - // Print() << "Using cuFFT\n"; -#elif AMREX_USE_HIP - // Print() << "Using rocFFT\n"; -#else - // Print() << "Using FFTW\n"; -#endif - bool is_flattened = false; long npts; @@ -928,8 +661,6 @@ void StructFact::InverseFFT(MultiFab& variables, variables_dft_real_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); variables_dft_imag_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); -// fftw_mpi_init(); - #ifdef AMREX_USE_CUDA using FFTplan = cufftHandle; using FFTcomplex = cuDoubleComplex; @@ -1152,19 +883,6 @@ void StructFact::InverseFFT(MultiFab& variables, variables.ParallelCopy(variables_onegrid,0,comp,1); } - // destroy fft plan - for (int i = 0; i < backward_plan.size(); ++i) { -#ifdef AMREX_USE_CUDA - cufftDestroy(backward_plan[i]); -#elif AMREX_USE_HIP - rocfft_plan_destroy(backward_plan[i]); -#else - fftw_destroy_plan(backward_plan[i]); -#endif - } - -// fftw_mpi_cleanup(); - } From 30ff76475af7a7989992794aeadf886b4516be86 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 09:20:21 -0800 Subject: [PATCH 132/151] forward FFT now seems to work --- src_analysis/StructFact.cpp | 47 ++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp index cc71770de..8f961b856 100644 --- a/src_analysis/StructFact.cpp +++ b/src_analysis/StructFact.cpp @@ -478,27 +478,26 @@ void StructFact::ComputeFFT(const MultiFab& variables, bool is_flattened = false; - // compute number of points in the domain and the square root -#if (AMREX_SPACEDIM == 2) - long npts = (domain.length(0)*domain.length(1)); -#elif (AMREX_SPACEDIM == 3) - long npts = (domain.length(0)*domain.length(1)*domain.length(2)); -#endif - Real sqrtnpts = std::sqrt(npts); - Box domain = geom.Domain(); if (domain.bigEnd(AMREX_SPACEDIM-1) == 0) { is_flattened = true; // flattened case } + // compute number of points in the domain and the square root + long npts = (AMREX_SPACEDIM == 2) ? (domain.length(0)*domain.length(1)) : (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + // extract BoxArray and DistributionMapping from variables BoxArray ba = variables.boxArray(); DistributionMapping dm = variables.DistributionMap(); + // create storage for one component of variables + MultiFab phi(ba,dm,1,0); + // Initialize the boxarray "ba_onegrid" from the single box "domain" // Initilize a DistributionMapping for one grid BoxArray ba_onegrid(domain); - DistributionMapping dmap_onegrid(ba_onegrid); + DistributionMapping dm_onegrid(ba_onegrid); // create amrex::FFT object amrex::FFT::R2C my_fft(domain); @@ -507,8 +506,12 @@ void StructFact::ComputeFFT(const MultiFab& variables, auto const& [ba_fft, dm_fft] = my_fft.getSpectralDataLayout(); FabArray > > phi_fft(ba_fft, dm_fft, 1, 0); - // create storage for one component of variables - MultiFab phi(ba,dm,1,0); + Box domain_fft = ba_fft.minimalBox(); + BoxArray ba_fft_onegrid(domain_fft); + FabArray > > phi_fft_onegrid(ba_fft_onegrid, dm_onegrid, 1, 0); + + MultiFab variables_dft_real_onegrid(ba_onegrid,dm_onegrid,1,0); + MultiFab variables_dft_imag_onegrid(ba_onegrid,dm_onegrid,1,0); // we will take one FFT at a time and copy the answer into the // corresponding component of variables_dft_real/imag @@ -531,20 +534,20 @@ void StructFact::ComputeFFT(const MultiFab& variables, my_fft.forward(phi,phi_fft); // copy my_fft into a single-grid MultiFab - + phi_fft_onegrid.ParallelCopy(phi_fft,0,0,1); // copy data to a full-sized MultiFab // this involves copying the complex conjugate from the half-sized field // into the appropriate place in the full MultiFab for (MFIter mfi(variables_dft_real_onegrid); mfi.isValid(); ++mfi) { - Array4< GpuComplex > spectral = (*spectral_field[0]).array(); + Box bx = mfi.fabbox(); + + Array4> spectral = phi_fft_onegrid.array(mfi); Array4 const& realpart = variables_dft_real_onegrid.array(mfi); Array4 const& imagpart = variables_dft_imag_onegrid.array(mfi); - Box bx = mfi.fabbox(); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -599,16 +602,6 @@ void StructFact::ComputeFFT(const MultiFab& variables, realpart(i,j,k) /= sqrtnpts; imagpart(i,j,k) /= sqrtnpts; }); - - /* - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - std::cout << "HACKFFT " << i << " " << j << " " << k << " " - << realpart(i,j,k) << " + " << imagpart(i,j,k) << "i" - << std::endl; - }); - */ } variables_dft_real.ParallelCopy(variables_dft_real_onegrid,0,comp,1); @@ -622,7 +615,7 @@ void StructFact::InverseFFT(MultiFab& variables, const MultiFab& variables_dft_imag, const Geometry& geom) { - +#if 0 BL_PROFILE_VAR("StructFact::InverseFFT()", InverseFFT); bool is_flattened = false; @@ -882,7 +875,7 @@ void StructFact::InverseFFT(MultiFab& variables, variables_onegrid.mult(1.0/sqrtnpts); variables.ParallelCopy(variables_onegrid,0,comp,1); } - +#endif } From aae027e6ea70a6bac7c2b8474446b10ae512b836 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 09:48:27 -0800 Subject: [PATCH 133/151] cleanup --- src_analysis/StructFact.H | 66 ++-- src_analysis/StructFact.cpp | 729 +----------------------------------- 2 files changed, 46 insertions(+), 749 deletions(-) diff --git a/src_analysis/StructFact.H b/src_analysis/StructFact.H index cefbd6508..1206a794a 100644 --- a/src_analysis/StructFact.H +++ b/src_analysis/StructFact.H @@ -58,44 +58,37 @@ public: StructFact(); - StructFact(const amrex::BoxArray&, const amrex::DistributionMapping&, - const amrex::Vector< std::string >&, - const amrex::Vector< amrex::Real >&, - const int& verbosity=0); - - StructFact(const amrex::BoxArray&, const amrex::DistributionMapping&, - const amrex::Vector< std::string >&, - const amrex::Vector< amrex::Real >&, - const amrex::Vector< int >&, const amrex::Vector< int >&, - const int& verbosity=0); - - void define(const amrex::BoxArray&, const amrex::DistributionMapping&, - const amrex::Vector< std::string >&, - const amrex::Vector< amrex::Real >&, - const int& verbosity=0); - - void define(const amrex::BoxArray&, const amrex::DistributionMapping&, - const amrex::Vector< std::string >&, - const amrex::Vector< amrex::Real >&, - const amrex::Vector< int >&, const amrex::Vector< int >&, - const int& verbosity=0); - - void defineDecomp(const amrex::BoxArray&, const amrex::DistributionMapping&, - const amrex::Vector< std::string >&, - const amrex::Vector< amrex::Real >&, - const amrex::Vector< int >&, - const amrex::Vector< int >&); + StructFact(const BoxArray& ba_in, + const DistributionMapping& dmap_in, + const Vector< std::string >& var_names, + const Vector< Real >& var_scaling_in, + const int& verbosity_in=0); + + StructFact(const BoxArray& ba_in, + const DistributionMapping& dmap_in, + const Vector< std::string >& var_names, + const Vector< Real >& var_scaling_in, + const Vector< int >& s_pairA_in, + const Vector< int >& s_pairB_in, + const int& verbosity_in=0); + + void define(const BoxArray& ba_in, + const DistributionMapping& dmap_in, + const Vector< std::string >& var_names, + const Vector< Real >& var_scaling_in, + const int& verbosity_in=0); + + void define(const BoxArray& ba_in, + const DistributionMapping& dmap_in, + const Vector< std::string >& var_names, + const Vector< Real >& var_scaling_in, + const Vector< int >& s_pairA_in, + const Vector< int >& s_pairB_in, + const int& verbosity_in=0); void FortStructure(const amrex::MultiFab&, const amrex::Geometry&, const int& reset=0); - void FortStructureDecomp(const amrex::MultiFab& vel, const amrex::Geometry& geom, - const int& reset=0); - - void DecomposeVelFourier(const amrex::MultiFab& vel_dft_real, - const amrex::MultiFab& vel_dft_imag, - const amrex::Geometry& geom); - void Reset(); void ComputeFFT(const amrex::MultiFab&, amrex::MultiFab&, @@ -105,8 +98,6 @@ public: void InverseFFT(amrex::MultiFab&, const amrex::MultiFab&, const amrex::MultiFab&, const amrex::Geometry&); - void GetDecompVel(amrex::MultiFab&, const amrex::Geometry&); - void WritePlotFile(const int, const amrex::Real, const amrex::Geometry&, std::string, const int& zero_avg=1); @@ -122,9 +113,6 @@ public: void IntegratekShellsScalar(const int& step, const amrex::Geometry& geom, const amrex::Vector< std::string >& names); - void IntegratekShellsDecomp(const int& step, const amrex::Geometry& geom, - const std::string& name_sol="vel_sol", const std::string& name_dil="vel_dil"); - void AddToExternal(amrex::MultiFab& x_mag, amrex::MultiFab& x_realimag, const amrex::Geometry&, const int& zero_avg=1); int get_ncov() const { return NCOV; } diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp index 8f961b856..bf12e4c7f 100644 --- a/src_analysis/StructFact.cpp +++ b/src_analysis/StructFact.cpp @@ -7,13 +7,15 @@ #include +// blank constructor StructFact::StructFact() {} -// var_names contains the names of all variables under consideration -// this constructor computes the covariances of all possible pairs of variables +// this constructor takes in var_names, which contains the names of all variables under consideration +// we will compute the covariances of all possible pairs of variables // var_scaling must be sized to match the total number of pairs of variables -StructFact::StructFact(const BoxArray& ba_in, const DistributionMapping& dmap_in, +StructFact::StructFact(const BoxArray& ba_in, + const DistributionMapping& dmap_in, const Vector< std::string >& var_names, const Vector< Real >& var_scaling_in, const int& verbosity_in) { @@ -22,10 +24,11 @@ StructFact::StructFact(const BoxArray& ba_in, const DistributionMapping& dmap_in } -// var_names contains the names of all variables under consideration -// this constructor compute the covariances of the pairs of variables defined in s_pairA/B_in +// this constructor takes in var_names, which contains the names of all variables under consideration +// we will compute the covariances of the pairs of variables defined in s_pairA/B_in // var_scaling must be sized to match the total number of pairs of variables -StructFact::StructFact(const BoxArray& ba_in, const DistributionMapping& dmap_in, +StructFact::StructFact(const BoxArray& ba_in, + const DistributionMapping& dmap_in, const Vector< std::string >& var_names, const Vector< Real >& var_scaling_in, const Vector< int >& s_pairA_in, @@ -36,9 +39,11 @@ StructFact::StructFact(const BoxArray& ba_in, const DistributionMapping& dmap_in } - -// this builds a list of all possible pairs of variables and calls define() -void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_in, +// this define takes in var_names, which contains the names of all variables under consideration +// we will compute the covariances of all possible pairs of variables +// var_scaling must be sized to match the total number of pairs of variables +void StructFact::define(const BoxArray& ba_in, + const DistributionMapping& dmap_in, const Vector< std::string >& var_names, const Vector< Real >& var_scaling_in, const int& verbosity_in) { @@ -61,7 +66,11 @@ void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_i } -void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_in, +// this define takes in var_names, which contains the names of all variables under consideration +// we will compute the covariances of the pairs of variables defined in s_pairA/B_in +// var_scaling must be sized to match the total number of pairs of variables +void StructFact::define(const BoxArray& ba_in, + const DistributionMapping& dmap_in, const Vector< std::string >& var_names, const Vector< Real >& var_scaling_in, const Vector< int >& s_pairA_in, @@ -196,72 +205,6 @@ void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_i } } -void StructFact::defineDecomp(const amrex::BoxArray& ba_in, - const amrex::DistributionMapping& dmap_in, - const Vector< std::string >& /*var_names*/, - const amrex::Vector< amrex::Real >& var_scaling_in, - const Vector< int >& s_pairA_in, - const Vector< int >& s_pairB_in) -{ - - BL_PROFILE_VAR("StructFact::defineDecomp()",StructFactDefineDecomp); - - decompose = true; - - if (s_pairA_in.size() != s_pairB_in.size()) - amrex::Error("StructFact::define() - Must have an equal number of components"); - - NVAR = 3; - NCOV = 6; - scaling.resize(NCOV); - for (int n=0; n > > > spectral_field; - - Vector backward_plan; - - // for CUDA builds we only need to build the plan once; track whether we did - bool built_plan = false; - - for (int comp=0; comp >(spectral_bx,1, - The_Device_Arena())); - spectral_field.back()->setVal(0.0); // touch the memory - - Array4< GpuComplex > spectral = (*spectral_field[0]).array(); - Array4 const& realpart = variables_dft_real_onegrid.array(mfi); - Array4 const& imagpart = variables_dft_imag_onegrid.array(mfi); - - Box bx = mfi.fabbox(); - - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - if (i <= bx.length(0)/2) { - GpuComplex copy(realpart(i,j,k),imagpart(i,j,k)); - spectral(i,j,k) = copy; - } - }); - } - - // build FFTplan if necessary - for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { - - if (!built_plan) { - - Box realspace_bx = mfi.fabbox(); - - IntVect fft_size = realspace_bx.length(); - - FFTplan bplan; - -#ifdef AMREX_USE_CUDA // CUDA - if (is_flattened) { -#if (AMREX_SPACEDIM == 2) - cufftResult result = cufftPlan1d(&bplan, fft_size[0], CUFFT_Z2D, 1); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan1d forward failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#elif (AMREX_SPACEDIM == 3) - cufftResult result = cufftPlan2d(&bplan, fft_size[1], fft_size[0], CUFFT_Z2D); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan2d forward failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#endif - } else { -#if (AMREX_SPACEDIM == 2) - cufftResult result = cufftPlan2d(&bplan, fft_size[1], fft_size[0], CUFFT_Z2D); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan2d forward failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#elif (AMREX_SPACEDIM == 3) - cufftResult result = cufftPlan3d(&bplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " cufftplan3d forward failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#endif - } -#elif AMREX_USE_HIP // HIP - if (is_flattened) { -#if (AMREX_SPACEDIM == 2) - const std::size_t lengths[] = {std::size_t(fft_size[0])}; - rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, - rocfft_transform_type_real_inverse, rocfft_precision_double, - 1, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); -#elif (AMREX_SPACEDIM == 3) - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])}; - rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, - rocfft_transform_type_real_inverse, rocfft_precision_double, - 2, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); -#endif - } else { -#if (AMREX_SPACEDIM == 2) - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])}; - rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, - rocfft_transform_type_real_inverse, rocfft_precision_double, - 2, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); -#elif (AMREX_SPACEDIM == 3) - const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; - rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, - rocfft_transform_type_real_inverse, rocfft_precision_double, - 3, lengths, 1, nullptr); - assert_rocfft_status("rocfft_plan_create", result); -#endif - } -#else // host - - if (is_flattened) { -#if (AMREX_SPACEDIM == 2) - bplan = fftw_plan_dft_c2r_1d(fft_size[0], - reinterpret_cast - (spectral_field.back()->dataPtr()), - variables_onegrid[mfi].dataPtr(), - FFTW_ESTIMATE); -#elif (AMREX_SPACEDIM == 3) - bplan = fftw_plan_dft_c2r_2d(fft_size[1], fft_size[0], - reinterpret_cast - (spectral_field.back()->dataPtr()), - variables_onegrid[mfi].dataPtr(), - FFTW_ESTIMATE); -#endif - } else { -#if (AMREX_SPACEDIM == 2) - bplan = fftw_plan_dft_c2r_2d(fft_size[1], fft_size[0], - reinterpret_cast - (spectral_field.back()->dataPtr()), - variables_onegrid[mfi].dataPtr(), - FFTW_ESTIMATE); -#elif (AMREX_SPACEDIM == 3) - bplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], - reinterpret_cast - (spectral_field.back()->dataPtr()), - variables_onegrid[mfi].dataPtr(), - FFTW_ESTIMATE); -#endif - } -#endif - - backward_plan.push_back(bplan); - } - - built_plan = true; - - } // end MFITer - - ParallelDescriptor::Barrier(); - - // InverseTransform - for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { - int i = mfi.LocalIndex(); -#ifdef AMREX_USE_CUDA - cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream()); - cufftResult result = cufftExecZ2D(backward_plan[i], - reinterpret_cast - (spectral_field[i]->dataPtr()), - variables_onegrid[mfi].dataPtr()); - if (result != CUFFT_SUCCESS) { - amrex::AllPrint() << " forward transform using cufftExec failed! Error: " - << cufftErrorToString(result) << "\n"; - } -#elif AMREX_USE_HIP - rocfft_execution_info execinfo = nullptr; - rocfft_status result = rocfft_execution_info_create(&execinfo); - assert_rocfft_status("rocfft_execution_info_create", result); - - std::size_t buffersize = 0; - result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize); - assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); - - void* buffer = amrex::The_Arena()->alloc(buffersize); - result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); - assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); - - result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); - assert_rocfft_status("rocfft_execution_info_set_stream", result); - - amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(); - FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); - result = rocfft_execute(backward_plan[i], - (void**) &spectral_field_ptr, // in - (void**) &variables_onegrid_ptr, // out - execinfo); - assert_rocfft_status("rocfft_execute", result); - amrex::Gpu::streamSynchronize(); - amrex::The_Arena()->free(buffer); - result = rocfft_execution_info_destroy(execinfo); - assert_rocfft_status("rocfft_execution_info_destroy", result); -#else - fftw_execute(backward_plan[i]); -#endif - } - - variables_onegrid.mult(1.0/sqrtnpts); - variables.ParallelCopy(variables_onegrid,0,comp,1); - } -#endif -} - - void StructFact::WritePlotFile(const int step, const Real time, const Geometry& geom, std::string plotfile_base, const int& zero_avg) { @@ -1247,129 +810,6 @@ void StructFact::IntegratekShells(const int& step, const Geometry& /*geom*/, con } } } - -void StructFact::IntegratekShellsDecomp(const int& step, - const amrex::Geometry& /*geom*/, - const std::string& name_sol, - const std::string& name_dil) -{ - BL_PROFILE_VAR("StructFact::IntegratekShellsDecomp",IntegratekShellsDecomp); - - GpuArray center; - for (int d=0; d phisum_sol_device(npts); - Gpu::DeviceVector phisum_dil_device(npts); - Gpu::DeviceVector phicnt_device(npts); - - Gpu::HostVector phisum_sol_host(npts); - Gpu::HostVector phisum_dil_host(npts); - - Real* phisum_sol_ptr = phisum_sol_device.dataPtr(); // pointer to data - Real* phisum_dil_ptr = phisum_dil_device.dataPtr(); // pointer to data - int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data - - amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept - { - phisum_sol_ptr[d] = 0.; - phisum_dil_ptr[d] = 0.; - phicnt_ptr[d] = 0; - }); - - // only consider cells that are within 15k of the center point - - for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - - const Box& bx = mfi.tilebox(); - - const Array4 & cov = cov_mag.array(mfi); - - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int ilen = amrex::Math::abs(i-center[0]); - int jlen = amrex::Math::abs(j-center[1]); - int klen = (AMREX_SPACEDIM == 3) ? amrex::Math::abs(k-center[2]) : 0; - - Real dist = (ilen*ilen + jlen*jlen + klen*klen); - dist = std::sqrt(dist); - - if ( dist <= center[0]-0.5) { - dist = dist+0.5; - int cell = int(dist); - for (int d=0; d& names) { @@ -1515,137 +955,6 @@ void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const Geom } - -void StructFact::DecomposeVelFourier(const amrex::MultiFab& vel_dft_real, - const amrex::MultiFab& vel_dft_imag, - const amrex::Geometry& geom) -{ - BL_PROFILE_VAR("StructFact::DecomposeVelFourier",DecomposeVelFourier); - - const BoxArray& ba = vel_sol_real.boxArray(); - const DistributionMapping& dm = vel_sol_real.DistributionMap(); - MultiFab dft_real, dft_imag; - dft_real.define(ba, dm, 3, 0); - dft_imag.define(ba, dm, 3, 0); - dft_real.ParallelCopy(vel_dft_real,0,0,3); - dft_imag.ParallelCopy(vel_dft_imag,0,0,3); - - const GpuArray dx = geom.CellSizeArray(); - - for (MFIter mfi(dft_real); mfi.isValid(); ++mfi) { - - Box bx = mfi.fabbox(); - - Array4 const& real = dft_real.array(mfi); - Array4 const& imag = dft_imag.array(mfi); - - Array4< Real> const& real_sol = vel_sol_real.array(mfi); - Array4< Real> const& imag_sol = vel_sol_imag.array(mfi); - - Array4< Real> const& real_dil = vel_dil_real.array(mfi); - Array4< Real> const& imag_dil = vel_dil_imag.array(mfi); - - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - int nx = bx.length(0); - int ny = bx.length(1); - int nz = bx.length(2); - - Real GxR, GxC, GyR, GyC, GzR, GzC; - - if (i <= bx.length(0)/2) { - // Gradient Operators - GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; - } - else { // conjugate - // Gradient Operators - GxR = (cos(2.0*M_PI*(nx-i)/nx)-1.0)/dx[0]; - GxC = (sin(2.0*M_PI*(nx-i)/nx)-0.0)/dx[0]; - GyR = (cos(2.0*M_PI*(ny-j)/ny)-1.0)/dx[1]; - GyC = (sin(2.0*M_PI*(ny-j)/ny)-0.0)/dx[1]; - GzR = (cos(2.0*M_PI*(nz-k)/nz)-1.0)/dx[2]; - GzC = (sin(2.0*M_PI*(nz-k)/nz)-0.0)/dx[2]; - } - - // Inverse Laplacian - Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; - - // Divergence of vel - Real divR = real(i,j,k,0)*GxR - imag(i,j,k,0)*GxC + - real(i,j,k,1)*GyR - imag(i,j,k,1)*GyC + - real(i,j,k,2)*GzR - imag(i,j,k,2)*GzC ; - Real divC = real(i,j,k,0)*GxC + imag(i,j,k,0)*GxR + - real(i,j,k,1)*GyC + imag(i,j,k,1)*GyR + - real(i,j,k,2)*GzC + imag(i,j,k,2)*GzR ; - - if (Lap < 1.0e-12) { // zero mode for no bulk motion - real_dil(i,j,k,0) = 0.0; - real_dil(i,j,k,1) = 0.0; - real_dil(i,j,k,2) = 0.0; - imag_dil(i,j,k,0) = 0.0; - imag_dil(i,j,k,1) = 0.0; - imag_dil(i,j,k,2) = 0.0; - } - else { - // Dilatational velocity - real_dil(i,j,k,0) = (divR*GxR + divC*GxC) / Lap; - real_dil(i,j,k,1) = (divR*GyR + divC*GyC) / Lap; - real_dil(i,j,k,2) = (divR*GzR + divC*GzC) / Lap; - imag_dil(i,j,k,0) = (divC*GxR - divR*GxC) / Lap; - imag_dil(i,j,k,1) = (divC*GyR - divR*GyC) / Lap; - imag_dil(i,j,k,2) = (divC*GzR - divR*GzC) / Lap; - - // Solenoidal velocity - real_sol(i,j,k,0) = real(i,j,k,0) - real_dil(i,j,k,0); - real_sol(i,j,k,1) = real(i,j,k,1) - real_dil(i,j,k,1); - real_sol(i,j,k,2) = real(i,j,k,2) - real_dil(i,j,k,2); - imag_sol(i,j,k,0) = imag(i,j,k,0) - imag_dil(i,j,k,0); - imag_sol(i,j,k,1) = imag(i,j,k,1) - imag_dil(i,j,k,1); - imag_sol(i,j,k,2) = imag(i,j,k,2) - imag_dil(i,j,k,2); - } - }); - } -} - -void StructFact::GetDecompVel(MultiFab& vel_decomp, const Geometry& geom) -{ - BL_PROFILE_VAR("StructFact::GetDecompVel()", GetDecompVel); - - if (!decompose) - amrex::Error("StructFact::GetDecompVel() is specific for vel decomposition in turbulence"); - - const BoxArray& ba_in = vel_decomp.boxArray(); - const DistributionMapping& dmap_in = vel_decomp.DistributionMap(); - - MultiFab vel; - vel.define(ba_in, dmap_in, 3, 0); - - const BoxArray& ba = vel_sol_real.boxArray(); - const DistributionMapping& dm = vel_sol_real.DistributionMap(); - MultiFab dft_real, dft_imag; - dft_real.define(ba, dm, 3, 0); - dft_imag.define(ba, dm, 3, 0); - - dft_real.ParallelCopy(vel_sol_real,0,0,3); - dft_imag.ParallelCopy(vel_sol_imag,0,0,3); - - InverseFFT(vel, dft_real, dft_imag, geom); - vel_decomp.ParallelCopy(vel,0,0,3); - - dft_real.ParallelCopy(vel_dil_real,0,0,3); - dft_imag.ParallelCopy(vel_dil_imag,0,0,3); - - InverseFFT(vel, dft_real, dft_imag, geom); - vel_decomp.ParallelCopy(vel,0,3,3); - -} - void StructFact::WriteCheckPoint(const int& step, std::string checkfile_base) { From 84a4be03c7cee79a95fb7a1175506f8de9da3b57 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 11:11:06 -0800 Subject: [PATCH 134/151] cleanup; remove dependence on geom in many functions --- exec/Ek_calculator/main_driver.cpp | 2 +- exec/hydro/main_driver.cpp | 4 +- src_analysis/StructFact.H | 19 ++++----- src_analysis/StructFact.cpp | 58 ++++++++++----------------- src_compressible/main_driver.cpp | 4 +- src_compressible_stag/main_driver.cpp | 4 +- 6 files changed, 36 insertions(+), 55 deletions(-) diff --git a/exec/Ek_calculator/main_driver.cpp b/exec/Ek_calculator/main_driver.cpp index 1a0a331dd..39e18788c 100644 --- a/exec/Ek_calculator/main_driver.cpp +++ b/exec/Ek_calculator/main_driver.cpp @@ -100,7 +100,7 @@ void main_driver(const char* argv) // reset and compute structure factor turbStructFact.FortStructure(vel,geom,1); - turbStructFact.CallFinalize(geom); + turbStructFact.CallFinalize(); // integrate cov_mag over shells in k and write to file turbStructFact.IntegratekShells(0,geom); diff --git a/exec/hydro/main_driver.cpp b/exec/hydro/main_driver.cpp index 73a3d4aa1..734842fed 100644 --- a/exec/hydro/main_driver.cpp +++ b/exec/hydro/main_driver.cpp @@ -617,10 +617,10 @@ void main_driver(const char* argv) } // reset and compute structure factor turbStructFact.FortStructure(structFactMF,geom,1); - turbStructFact.CallFinalize(geom); + turbStructFact.CallFinalize(); // integrate cov_mag over shells in k and write to file - turbStructFact.IntegratekShells(step,geom); + turbStructFact.IntegratekShells(step); } } diff --git a/src_analysis/StructFact.H b/src_analysis/StructFact.H index 1206a794a..7575a6e91 100644 --- a/src_analysis/StructFact.H +++ b/src_analysis/StructFact.H @@ -86,34 +86,31 @@ public: const Vector< int >& s_pairB_in, const int& verbosity_in=0); - void FortStructure(const amrex::MultiFab&, const amrex::Geometry&, + void FortStructure(const amrex::MultiFab&, const Geometry&, const int& reset=0); void Reset(); void ComputeFFT(const amrex::MultiFab&, amrex::MultiFab&, - amrex::MultiFab&, const amrex::Geometry&, + amrex::MultiFab&, bool unpack=true); - - void InverseFFT(amrex::MultiFab&, const amrex::MultiFab&, - const amrex::MultiFab&, const amrex::Geometry&); void WritePlotFile(const int, const amrex::Real, const amrex::Geometry&, std::string, const int& zero_avg=1); - void Finalize(amrex::MultiFab&, amrex::MultiFab&, const Geometry& geom, + void Finalize(amrex::MultiFab&, amrex::MultiFab&, const int& zero_avg=1); - void CallFinalize(const Geometry& geom, const int& zero_avg=1); + void CallFinalize(const int& zero_avg=1); - void ShiftFFT(amrex::MultiFab&, const Geometry& geom, + void ShiftFFT(amrex::MultiFab&, const int& zero_avg=1); - void IntegratekShells(const int& step, const amrex::Geometry& geom, const std::string& name=""); + void IntegratekShells(const int& step, const std::string& name=""); - void IntegratekShellsScalar(const int& step, const amrex::Geometry& geom, const amrex::Vector< std::string >& names); + void IntegratekShellsScalar(const int& step, const amrex::Vector< std::string >& names); - void AddToExternal(amrex::MultiFab& x_mag, amrex::MultiFab& x_realimag, const amrex::Geometry&, const int& zero_avg=1); + void AddToExternal(amrex::MultiFab& x_mag, amrex::MultiFab& x_realimag, const int& zero_avg=1); int get_ncov() const { return NCOV; } diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp index bf12e4c7f..4cad8d3ed 100644 --- a/src_analysis/StructFact.cpp +++ b/src_analysis/StructFact.cpp @@ -205,9 +205,10 @@ void StructFact::define(const BoxArray& ba_in, } } -void StructFact::FortStructure(const MultiFab& variables, const Geometry& geom, - const int& reset) { - +void StructFact::FortStructure(const MultiFab& variables, + const Geometry& geom, + const int& reset) +{ BL_PROFILE_VAR("StructFact::FortStructure()",FortStructure); const BoxArray& ba = variables.boxArray(); @@ -216,7 +217,7 @@ void StructFact::FortStructure(const MultiFab& variables, const Geometry& geom, MultiFab variables_dft_real(ba, dm, NVAR, 0); MultiFab variables_dft_imag(ba, dm, NVAR, 0); - ComputeFFT(variables, variables_dft_real, variables_dft_imag, geom); + ComputeFFT(variables, variables_dft_real, variables_dft_imag); // temporary storage built on BoxArray and DistributionMapping of "variables" // One case where "variables" and "cov_real/imag/mag" may have different DistributionMappings @@ -302,7 +303,6 @@ void StructFact::Reset() { void StructFact::ComputeFFT(const MultiFab& variables, MultiFab& variables_dft_real, MultiFab& variables_dft_imag, - const Geometry& geom, bool unpack) { @@ -310,7 +310,7 @@ void StructFact::ComputeFFT(const MultiFab& variables, bool is_flattened = false; - Box domain = geom.Domain(); + Box domain = variables.boxArray().minimalBox(); if (domain.bigEnd(AMREX_SPACEDIM-1) == 0) { is_flattened = true; // flattened case } @@ -462,7 +462,7 @@ void StructFact::WritePlotFile(const int step, const Real time, const Geometry& MultiFab::Copy(cov_imag_temp, cov_imag, 0, 0, NCOV, 0); // Finalize covariances - scale & compute magnitude - Finalize(cov_real_temp, cov_imag_temp, geom, zero_avg); + Finalize(cov_real_temp, cov_imag_temp, zero_avg); ////////////////////////////////////////////////////////////////////////////////// // Write out structure factor magnitude to plot file @@ -482,24 +482,8 @@ void StructFact::WritePlotFile(const int step, const Real time, const Geometry& MultiFab::Copy(plotfile, cov_mag, 0, 0, NCOV, 0); // copy structure factor into plotfile - Real dx = geom.CellSize(0); - Real pi = 3.1415926535897932; - Box domain = geom.Domain(); - - RealBox real_box({AMREX_D_DECL(-pi/dx,-pi/dx,-pi/dx)}, - {AMREX_D_DECL( pi/dx, pi/dx, pi/dx)}); - - // check bc_vel_lo/hi to determine the periodicity - Vector is_periodic(AMREX_SPACEDIM,0); // set to 0 (not periodic) by default - for (int i=0; i& names) { +void StructFact::IntegratekShellsScalar(const int& step, const amrex::Vector< std::string >& names) { BL_PROFILE_VAR("StructFact::IntegratekShellsMisc",IntegratekShellsMisc); @@ -923,7 +907,7 @@ void StructFact::IntegratekShellsScalar(const int& step, const Geometry& /*geom* } } -void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const Geometry& geom, const int& zero_avg) { +void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const int& zero_avg) { BL_PROFILE_VAR("StructFact::AddToExternal",AddToExternal); @@ -940,7 +924,7 @@ void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const Geom MultiFab::Copy(cov_imag_temp, cov_imag, 0, 0, NCOV, 0); // Finalize covariances - scale & compute magnitude - Finalize(cov_real_temp, cov_imag_temp, geom, zero_avg); + Finalize(cov_real_temp, cov_imag_temp, zero_avg); nPlot = NCOV; plotfile.define(cov_mag.boxArray(), cov_mag.DistributionMap(), nPlot, 0); diff --git a/src_compressible/main_driver.cpp b/src_compressible/main_driver.cpp index 504ae956a..4aa942d89 100644 --- a/src_compressible/main_driver.cpp +++ b/src_compressible/main_driver.cpp @@ -732,10 +732,10 @@ void main_driver(const char* argv) // reset and compute structure factor turbStructFact.FortStructure(structFactMF,geom,1); - turbStructFact.CallFinalize(geom); + turbStructFact.CallFinalize(); // integrate cov_mag over shells in k and write to file - turbStructFact.IntegratekShells(step,geom); + turbStructFact.IntegratekShells(step); // timer t2 = ParallelDescriptor::second() - t1; diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index a21e222fb..2e02c8fda 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -1584,8 +1584,8 @@ void main_driver(const char* argv) cons_realimag.setVal(0.0); for (int i=0; i Date: Fri, 3 Jan 2025 11:24:19 -0800 Subject: [PATCH 135/151] more removal of geometry dependence in SF code --- exec/DSMC/main_driver.cpp | 4 ++-- exec/DSMC_granular/main_driver.cpp | 2 +- exec/Ek_calculator/main_driver.cpp | 2 +- exec/cellbdytest_new/main_driver.cpp | 4 ++-- exec/compressible_mui/sav_src/2020/m00.cpp | 6 ++--- .../sav_src/2020/m01-cutemp_jsq.cpp | 6 ++--- exec/compressible_mui/sav_src/2020/m01.cpp | 6 ++--- exec/compressible_mui/sav_src/2020/m02.cpp | 6 ++--- exec/compressible_mui/sav_src/2020/m10.cpp | 6 ++--- exec/compressible_mui/sav_src/2020/m11.cpp | 6 ++--- exec/compressible_mui/sav_src/2020/m12.cpp | 6 ++--- .../202101_before_mui_span/main_driver.cpp | 6 ++--- .../main_driver.cpp_0126_bc | 6 ++--- .../202106_before_summer/main_driver.cpp | 6 ++--- exec/hydro/main_driver.cpp | 10 ++++----- exec/immersedIons/main_driver.cpp | 4 ++-- .../cell_body/main_driver.cpp | 2 +- .../channel_dumbbell/main_driver.cpp | 2 +- .../channel_multiblob/main_driver.cpp | 2 +- .../channel_rigid/main_driver.cpp | 2 +- .../flagellum/main_driver.cpp | 2 +- .../taylor_line/main_driver.cpp | 2 +- exec/multispec/main_driver.cpp | 4 ++-- exec/structFactTest/main_driver.cpp | 2 +- src_analysis/StructFact.H | 2 +- src_analysis/StructFact.cpp | 1 - src_compressible/main_driver.cpp | 10 ++++----- src_compressible_stag/main_driver.cpp | 22 +++++++++---------- src_reactDiff/main_driver.cpp | 2 +- unmaintained/channel_soft/main_driver.cpp | 2 +- .../exercises/compressible/main_driver.cpp | 4 ++-- .../exercises/incompressible/main_driver.cpp | 2 +- 32 files changed, 74 insertions(+), 75 deletions(-) diff --git a/exec/DSMC/main_driver.cpp b/exec/DSMC/main_driver.cpp index ef6ed217d..919dd34f1 100644 --- a/exec/DSMC/main_driver.cpp +++ b/exec/DSMC/main_driver.cpp @@ -356,7 +356,7 @@ void main_driver(const char* argv) //PrintMF(structFactPrimMF,0,-1); //PrintMF(primInst,1,1); - //structFactPrim.FortStructure(structFactPrimMF,geom); + //structFactPrim.FortStructure(structFactPrimMF); } } @@ -389,7 +389,7 @@ void main_driver(const char* argv) //PrintMF(structFactPrimMF,0,-1); //PrintMF(primInst,1,1); - //structFactPrim.FortStructure(structFactPrimMF,geom); + //structFactPrim.FortStructure(structFactPrimMF); } } diff --git a/exec/DSMC_granular/main_driver.cpp b/exec/DSMC_granular/main_driver.cpp index d6ac3a736..7d2b1c4a0 100644 --- a/exec/DSMC_granular/main_driver.cpp +++ b/exec/DSMC_granular/main_driver.cpp @@ -402,7 +402,7 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF,primInst,8,cnt_sf,numvars_sf,0); cnt_sf += numvars_sf; - structFactPrim.FortStructure(structFactPrimMF,geom); + structFactPrim.FortStructure(structFactPrimMF); } if(istep > amrex::Math::abs(n_steps_skip) && diff --git a/exec/Ek_calculator/main_driver.cpp b/exec/Ek_calculator/main_driver.cpp index 39e18788c..2b81bb9b4 100644 --- a/exec/Ek_calculator/main_driver.cpp +++ b/exec/Ek_calculator/main_driver.cpp @@ -99,7 +99,7 @@ void main_driver(const char* argv) // reset and compute structure factor - turbStructFact.FortStructure(vel,geom,1); + turbStructFact.FortStructure(vel,1); turbStructFact.CallFinalize(); // integrate cov_mag over shells in k and write to file diff --git a/exec/cellbdytest_new/main_driver.cpp b/exec/cellbdytest_new/main_driver.cpp index b52b39535..3a07ded68 100644 --- a/exec/cellbdytest_new/main_driver.cpp +++ b/exec/cellbdytest_new/main_driver.cpp @@ -1809,13 +1809,13 @@ void main_driver(const char* argv) // charge MultiFab::Copy(struct_cc_charge, charge, 0, 0, nvar_sf_charge, 0); - structFact_charge.FortStructure(struct_cc_charge,geomP); + structFact_charge.FortStructure(struct_cc_charge); // velocity for (int d=0; d= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp b/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp index 25cfbc595..bf8a5be05 100644 --- a/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp +++ b/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp @@ -684,11 +684,11 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m01.cpp b/exec/compressible_mui/sav_src/2020/m01.cpp index 3b3c99530..6354863ec 100644 --- a/exec/compressible_mui/sav_src/2020/m01.cpp +++ b/exec/compressible_mui/sav_src/2020/m01.cpp @@ -672,11 +672,11 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m02.cpp b/exec/compressible_mui/sav_src/2020/m02.cpp index 9bdaf63ca..6e18d8a32 100644 --- a/exec/compressible_mui/sav_src/2020/m02.cpp +++ b/exec/compressible_mui/sav_src/2020/m02.cpp @@ -725,11 +725,11 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m10.cpp b/exec/compressible_mui/sav_src/2020/m10.cpp index f44916f31..32db38e49 100644 --- a/exec/compressible_mui/sav_src/2020/m10.cpp +++ b/exec/compressible_mui/sav_src/2020/m10.cpp @@ -681,11 +681,11 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m11.cpp b/exec/compressible_mui/sav_src/2020/m11.cpp index 1930c883b..d9a823c2e 100644 --- a/exec/compressible_mui/sav_src/2020/m11.cpp +++ b/exec/compressible_mui/sav_src/2020/m11.cpp @@ -685,11 +685,11 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m12.cpp b/exec/compressible_mui/sav_src/2020/m12.cpp index fd5196685..5b0309ede 100644 --- a/exec/compressible_mui/sav_src/2020/m12.cpp +++ b/exec/compressible_mui/sav_src/2020/m12.cpp @@ -687,11 +687,11 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp index 719c76a95..917300b60 100644 --- a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp +++ b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp @@ -700,11 +700,11 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc index 01a09fec3..efb7dc1ff 100644 --- a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc +++ b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc @@ -713,11 +713,11 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); - structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat); + structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp b/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp index 7281a18ac..0e799ee87 100644 --- a/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp +++ b/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp @@ -852,8 +852,8 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { MultiFab primFlattened; // flattened multifab defined below if (slicepoint < 0) { @@ -864,7 +864,7 @@ void main_driver(const char* argv) // we rotate this flattened MultiFab to have normal in the z-direction since // SWFFT only presently supports flattened MultiFabs with z-normal. MultiFab primFlattenedRot = RotateFlattenedMF(primFlattened); - structFactPrimFlattened.FortStructure(primFlattenedRot,geom_flat); + structFactPrimFlattened.FortStructure(primFlattenedRot); } } diff --git a/exec/hydro/main_driver.cpp b/exec/hydro/main_driver.cpp index 734842fed..2fdf97be8 100644 --- a/exec/hydro/main_driver.cpp +++ b/exec/hydro/main_driver.cpp @@ -476,7 +476,7 @@ void main_driver(const char* argv) for(int d=0; d= 0) { MultiFab Flattened; // flattened multifab defined below if (slicepoint < 0) { @@ -488,7 +488,7 @@ void main_driver(const char* argv) // our structure factor class assumes this for flattened MultiFab FlattenedRot = RotateFlattenedMF(Flattened); FlattenedRotMaster.ParallelCopy(FlattenedRot,0,0,structVars); - structFactFlattened.FortStructure(FlattenedRotMaster,geom_flat); + structFactFlattened.FortStructure(FlattenedRotMaster); } } @@ -574,7 +574,7 @@ void main_driver(const char* argv) for(int d=0; d= 0) { MultiFab Flattened; // flattened multifab defined below if (slicepoint < 0) { @@ -585,7 +585,7 @@ void main_driver(const char* argv) // we rotate this flattened MultiFab to have normal in the z-direction since // our structure factor class assumes this for flattened MultiFab FlattenedRot = RotateFlattenedMF(Flattened); - structFactFlattened.FortStructure(FlattenedRot,geom_flat); + structFactFlattened.FortStructure(FlattenedRot); } } @@ -616,7 +616,7 @@ void main_driver(const char* argv) ShiftFaceToCC(umac[d], 0, structFactMF, d, 1); } // reset and compute structure factor - turbStructFact.FortStructure(structFactMF,geom,1); + turbStructFact.FortStructure(structFactMF,1); turbStructFact.CallFinalize(); // integrate cov_mag over shells in k and write to file diff --git a/exec/immersedIons/main_driver.cpp b/exec/immersedIons/main_driver.cpp index 51dd39c8f..14880270f 100644 --- a/exec/immersedIons/main_driver.cpp +++ b/exec/immersedIons/main_driver.cpp @@ -1579,13 +1579,13 @@ void main_driver(const char* argv) // charge MultiFab::Copy(struct_cc_charge, charge, 0, 0, nvar_sf_charge, 0); - structFact_charge.FortStructure(struct_cc_charge,geomP); + structFact_charge.FortStructure(struct_cc_charge); // velocity for (int d=0; d& s_pairB_in, const int& verbosity_in=0); - void FortStructure(const amrex::MultiFab&, const Geometry&, + void FortStructure(const amrex::MultiFab&, const int& reset=0); void Reset(); diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp index 4cad8d3ed..cbd9ab72f 100644 --- a/src_analysis/StructFact.cpp +++ b/src_analysis/StructFact.cpp @@ -206,7 +206,6 @@ void StructFact::define(const BoxArray& ba_in, } void StructFact::FortStructure(const MultiFab& variables, - const Geometry& geom, const int& reset) { BL_PROFILE_VAR("StructFact::FortStructure()",FortStructure); diff --git a/src_compressible/main_driver.cpp b/src_compressible/main_driver.cpp index 4aa942d89..73c8eec97 100644 --- a/src_compressible/main_driver.cpp +++ b/src_compressible/main_driver.cpp @@ -731,7 +731,7 @@ void main_driver(const char* argv) MultiFab::Copy(structFactMF, prim, 1, 0, AMREX_SPACEDIM, 0); // reset and compute structure factor - turbStructFact.FortStructure(structFactMF,geom,1); + turbStructFact.FortStructure(structFactMF,1); turbStructFact.CallFinalize(); // integrate cov_mag over shells in k and write to file @@ -767,8 +767,8 @@ void main_driver(const char* argv) MultiFab::Copy(structFactPrimMF, prim, 0, 0, structVarsPrim, 0); MultiFab::Copy(structFactConsMF, cu, 0, 0, structVarsCons-1, 0); MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1, 0); // temperature too - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { MultiFab primFlattened; // flattened multifab defined below MultiFab consFlattened; // flattened multifab defined below @@ -783,11 +783,11 @@ void main_driver(const char* argv) // our structure factor class assumes this for flattened MultiFab primFlattenedRot = RotateFlattenedMF(primFlattened); primFlattenedRotMaster.ParallelCopy(primFlattenedRot,0,0,structVarsPrim); - structFactPrimFlattened.FortStructure(primFlattenedRotMaster,geom_flat); + structFactPrimFlattened.FortStructure(primFlattenedRotMaster); MultiFab consFlattenedRot = RotateFlattenedMF(consFlattened); consFlattenedRotMaster.ParallelCopy(consFlattenedRot,0,0,structVarsCons); - structFactConsFlattened.FortStructure(consFlattenedRotMaster,geom_flat); + structFactConsFlattened.FortStructure(consFlattenedRotMaster); } // timer diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 2e02c8fda..6646e84bf 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -1431,8 +1431,8 @@ void main_driver(const char* argv) //////////////////////////////////////////////////// if ((do_1D==0) and (do_2D==0)) { - structFactPrim.FortStructure(structFactPrimMF,geom); - structFactCons.FortStructure(structFactConsMF,geom); + structFactPrim.FortStructure(structFactPrimMF); + structFactCons.FortStructure(structFactConsMF); } if (project_dir >= 0) { @@ -1449,7 +1449,7 @@ void main_driver(const char* argv) } XRot = RotateFlattenedMF(X); master_project_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim); - structFactPrimFlattened.FortStructure(master_project_rot_prim,geom_flat); + structFactPrimFlattened.FortStructure(master_project_rot_prim); } { @@ -1462,7 +1462,7 @@ void main_driver(const char* argv) } XRot = RotateFlattenedMF(X); master_project_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons); - structFactConsFlattened.FortStructure(master_project_rot_cons,geom_flat); + structFactConsFlattened.FortStructure(master_project_rot_cons); } } @@ -1474,7 +1474,7 @@ void main_driver(const char* argv) ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim, 0, membrane_cell-1); XRot = RotateFlattenedMF(X); master_project_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim); - structFactPrimVerticalAverage0.FortStructure(master_project_rot_prim,geom_flat); + structFactPrimVerticalAverage0.FortStructure(master_project_rot_prim); } { @@ -1483,7 +1483,7 @@ void main_driver(const char* argv) ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim, membrane_cell, n_cells[project_dir]-1); XRot = RotateFlattenedMF(X); master_project_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim); - structFactPrimVerticalAverage1.FortStructure(master_project_rot_prim,geom_flat); + structFactPrimVerticalAverage1.FortStructure(master_project_rot_prim); } { @@ -1492,7 +1492,7 @@ void main_driver(const char* argv) ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons, 0, membrane_cell-1); XRot = RotateFlattenedMF(X); master_project_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons); - structFactConsVerticalAverage0.FortStructure(master_project_rot_cons,geom_flat); + structFactConsVerticalAverage0.FortStructure(master_project_rot_cons); } { @@ -1501,7 +1501,7 @@ void main_driver(const char* argv) ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons, membrane_cell, n_cells[project_dir]-1); XRot = RotateFlattenedMF(X); master_project_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons); - structFactConsVerticalAverage1.FortStructure(master_project_rot_cons,geom_flat); + structFactConsVerticalAverage1.FortStructure(master_project_rot_cons); } } } @@ -1516,7 +1516,7 @@ void main_driver(const char* argv) ExtractSlice(structFactPrimMF, X, geom, 2, i, 0, structVarsPrim); XRot = RotateFlattenedMF(X); master_2D_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim); - structFactPrimArray[i].FortStructure(master_2D_rot_prim,geom_flat_2D); + structFactPrimArray[i].FortStructure(master_2D_rot_prim); } { @@ -1525,7 +1525,7 @@ void main_driver(const char* argv) ExtractSlice(structFactConsMF, X, geom, 2, i, 0, structVarsCons); XRot = RotateFlattenedMF(X); master_2D_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons); - structFactConsArray[i].FortStructure(master_2D_rot_cons,geom_flat_2D); + structFactConsArray[i].FortStructure(master_2D_rot_cons); } } @@ -1541,7 +1541,7 @@ void main_driver(const char* argv) // our structure factor class assumes this for flattened MultiFab FlattenedRot = RotateFlattenedMF(Flattened); surfcovFlattenedRotMaster.ParallelCopy(FlattenedRot,0,0,surfcov_structVars); - surfcovStructFact.FortStructure(surfcovFlattenedRotMaster,surfcov_geom_flat); + surfcovStructFact.FortStructure(surfcovFlattenedRotMaster); } } diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp index 3f7909cb3..c92e65ffc 100644 --- a/src_reactDiff/main_driver.cpp +++ b/src_reactDiff/main_driver.cpp @@ -270,7 +270,7 @@ void main_driver(const char* argv) if (step > n_steps_skip && struct_fact_int > 0 && (step-n_steps_skip)%struct_fact_int == 0) { // add this snapshot to the average in the structure factor - structFact.FortStructure(n_new,geom); + structFact.FortStructure(n_new); } diff --git a/unmaintained/channel_soft/main_driver.cpp b/unmaintained/channel_soft/main_driver.cpp index 662d05927..7cb46cf18 100644 --- a/unmaintained/channel_soft/main_driver.cpp +++ b/unmaintained/channel_soft/main_driver.cpp @@ -571,7 +571,7 @@ void main_driver(const char * argv) { for(int d=0; d n_steps_skip && struct_fact_int > 0 && (step-n_steps_skip)%struct_fact_int == 0) { // MultiFab::Copy(struct_in_cc, cu, 0, 0, nvar_sf, 0); -// structFact.FortStructure(struct_in_cc,geom); +// structFact.FortStructure(struct_in_cc); // if(project_dir >= 0) { // ComputeVerticalAverage(cu, cuVertAvg, geom, project_dir, 0, nvars); -// structFactVA.FortStructure(cuVertAvg,geom_flat); +// structFactVA.FortStructure(cuVertAvg); // } // } diff --git a/unmaintained/exercises/incompressible/main_driver.cpp b/unmaintained/exercises/incompressible/main_driver.cpp index b273e1fcc..48f3b9cd3 100644 --- a/unmaintained/exercises/incompressible/main_driver.cpp +++ b/unmaintained/exercises/incompressible/main_driver.cpp @@ -387,7 +387,7 @@ void main_driver(const char* argv) for(int d=0; d Date: Fri, 3 Jan 2025 11:35:05 -0800 Subject: [PATCH 136/151] more cleanup --- src_analysis/StructFact.H | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src_analysis/StructFact.H b/src_analysis/StructFact.H index 22f90476a..611ab8c12 100644 --- a/src_analysis/StructFact.H +++ b/src_analysis/StructFact.H @@ -24,9 +24,6 @@ class StructFact { // Total number of states to average over, updated by FortStructure() int nsamples = 0; - // decompose velocity field - bool decompose = false; - // Vector containing covariance scaling Vector< Real > scaling; @@ -50,12 +47,6 @@ public: // Vector of MultiFabs containing final magnitude of covariances MultiFab cov_mag; - // MultiFabs of real/imag for solenoidal/dilatational - MultiFab vel_sol_real; - MultiFab vel_sol_imag; - MultiFab vel_dil_real; - MultiFab vel_dil_imag; - StructFact(); StructFact(const BoxArray& ba_in, From 8c94f931e9d85771878c3c8ccde26d8776c2b12f Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 12:14:57 -0800 Subject: [PATCH 137/151] more cleanup --- exec/compressible_stag/GNUmakefile | 43 +----------------------------- src_analysis/TurbSpectra.H | 13 ++------- 2 files changed, 3 insertions(+), 53 deletions(-) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 1c7de9faf..e0cf9740f 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -17,19 +17,6 @@ USE_FFT = TRUE USE_PARTICLES = FALSE DO_TURB = FALSE -USE_HEFFTE_FFTW = FALSE -USE_HEFFTE_CUFFT = FALSE -USE_HEFFTE_ROCFFT = FALSE -USE_DISTRIBUTED_FFT = TRUE - -ifeq ($(USE_HEFFTE_FFTW),TRUE) - HEFFTE_HOME ?= ../../../heffte/ -else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - HEFFTE_HOME ?= ../../../heffte-org/build_aware/ -else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - HEFFTE_HOME ?= ../../../heffte-org/build_noaware/ -endif - include $(AMREX_HOME)/Tools/GNUMake/Make.defs VPATH_LOCATIONS += . @@ -59,41 +46,13 @@ include ../../src_common/Make.package VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ -#ifeq ($(USE_HEFFTE_FFTW),TRUE) -# include $(HEFFTE_HOME)/src/Make.package -#else ifeq ($(USE_HEFFTE_CUFFT),TRUE) -# include $(HEFFTE_HOME)/src/Make.package -#else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) -# include $(HEFFTE_HOME)/src/Make.package -#endif - -ifeq ($(USE_HEFFTE_FFTW),TRUE) - DEFINES += -DHEFFTE_FFTW - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f -else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - DEFINES += -DHEFFTE_CUFFT -else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - DEFINES += -DHEFFTE_ROCFFT -endif - include $(AMREX_HOME)/Src/Base/Make.package include ../../src_analysis/Make.package VPATH_LOCATIONS += ../../src_analysis/ INCLUDE_LOCATIONS += ../../src_analysis/ - -include $(AMREX_HOME)/Tools/GNUMake/Make.rules -ifeq ($(USE_CUDA),TRUE) - LIBRARIES += -lcufft -else ifeq ($(USE_HIP),TRUE) - # Use rocFFT. ROC_PATH is defined in amrex - INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include - LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib - LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft -else - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f -endif +include $(AMREX_HOME)/Tools/GNUMake/Make.rules ifeq ($(DO_TURB), TRUE) DEFINES += -DTURB diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H index 419756555..66a8ee524 100644 --- a/src_analysis/TurbSpectra.H +++ b/src_analysis/TurbSpectra.H @@ -21,21 +21,12 @@ void IntegrateKScalar(const MultiFab& cov_mag, const std::string& name, const int& step, const int& comp); -//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, -// const std::string& name, const Real& scaling, -// const Box& c_local_box, -// const Real& sqrtnpts, -// const int& step); + void IntegrateKVelocity(const MultiFab& cov_mag, const std::string& name, const int& step, const int& comp); -//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, -// const BaseFab >& spectral_fieldy, -// const BaseFab >& spectral_fieldz, -// const std::string& name, const Real& scaling, -// const Box& c_local_box, -// const int& step); + void TurbSpectrumScalar(const MultiFab& variables, const amrex::Geometry& geom, const int& step, From 927067d3c8394cbbade5b61f092e61b8b8a3225f Mon Sep 17 00:00:00 2001 From: isriva Date: Fri, 3 Jan 2025 12:34:52 -0800 Subject: [PATCH 138/151] modified GNUMakefile to remove HEFFTE options --- exec/compressible_stag/GNUmakefile | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 1c7de9faf..9d27ef1d4 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -17,19 +17,6 @@ USE_FFT = TRUE USE_PARTICLES = FALSE DO_TURB = FALSE -USE_HEFFTE_FFTW = FALSE -USE_HEFFTE_CUFFT = FALSE -USE_HEFFTE_ROCFFT = FALSE -USE_DISTRIBUTED_FFT = TRUE - -ifeq ($(USE_HEFFTE_FFTW),TRUE) - HEFFTE_HOME ?= ../../../heffte/ -else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - HEFFTE_HOME ?= ../../../heffte-org/build_aware/ -else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - HEFFTE_HOME ?= ../../../heffte-org/build_noaware/ -endif - include $(AMREX_HOME)/Tools/GNUMake/Make.defs VPATH_LOCATIONS += . @@ -59,23 +46,6 @@ include ../../src_common/Make.package VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ -#ifeq ($(USE_HEFFTE_FFTW),TRUE) -# include $(HEFFTE_HOME)/src/Make.package -#else ifeq ($(USE_HEFFTE_CUFFT),TRUE) -# include $(HEFFTE_HOME)/src/Make.package -#else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) -# include $(HEFFTE_HOME)/src/Make.package -#endif - -ifeq ($(USE_HEFFTE_FFTW),TRUE) - DEFINES += -DHEFFTE_FFTW - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f -else ifeq ($(USE_HEFFTE_CUFFT),TRUE) - DEFINES += -DHEFFTE_CUFFT -else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) - DEFINES += -DHEFFTE_ROCFFT -endif - include $(AMREX_HOME)/Src/Base/Make.package include ../../src_analysis/Make.package From 45b79e362b8d3707b35b7422d364380e3536a69e Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 12:59:16 -0800 Subject: [PATCH 139/151] cleanup flattened code, simplify a bit --- exec/hydro/main_driver.cpp | 93 +++------- src_common/Make.package | 1 - src_common/RotateFlattenedMF.cpp | 51 ------ src_common/common_functions.H | 5 - src_compressible/main_driver.cpp | 112 ++++-------- src_compressible_stag/main_driver.cpp | 249 +++++++++----------------- 6 files changed, 133 insertions(+), 378 deletions(-) delete mode 100644 src_common/RotateFlattenedMF.cpp diff --git a/exec/hydro/main_driver.cpp b/exec/hydro/main_driver.cpp index 2fdf97be8..d3edef023 100644 --- a/exec/hydro/main_driver.cpp +++ b/exec/hydro/main_driver.cpp @@ -350,7 +350,7 @@ void main_driver(const char* argv) /////////////////////////////////////////// StructFact structFactFlattened; - MultiFab FlattenedRotMaster; + MultiFab FlattenedMaster; Geometry geom_flat; @@ -365,76 +365,30 @@ void main_driver(const char* argv) } else { ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, 1); } - // we rotate this flattened MultiFab to have normal in the z-direction since - // our structure factor class assumes this for flattened - MultiFab FlattenedRot = RotateFlattenedMF(Flattened); - BoxArray ba_flat = FlattenedRot.boxArray(); - const DistributionMapping& dmap_flat = FlattenedRot.DistributionMap(); - FlattenedRotMaster.define(ba_flat,dmap_flat,structVars,0); + BoxArray ba_flat = Flattened.boxArray(); + const DistributionMapping& dmap_flat = Flattened.DistributionMap(); + FlattenedMaster.define(ba_flat,dmap_flat,structVars,0); { - IntVect dom_lo(AMREX_D_DECL(0,0,0)); - IntVect dom_hi; - - // yes you could simplify this code but for now - // these are written out fully to better understand what is happening - // we wanted dom_hi[AMREX_SPACEDIM-1] to be equal to 0 - // and need to transmute the other indices depending on project_dir -#if (AMREX_SPACEDIM == 2) - if (project_dir == 0) { - dom_hi[0] = n_cells[1]-1; - } - else if (project_dir == 1) { - dom_hi[0] = n_cells[0]-1; - } - dom_hi[1] = 0; -#elif (AMREX_SPACEDIM == 3) - if (project_dir == 0) { - dom_hi[0] = n_cells[1]-1; - dom_hi[1] = n_cells[2]-1; - } else if (project_dir == 1) { - dom_hi[0] = n_cells[0]-1; - dom_hi[1] = n_cells[2]-1; - } else if (project_dir == 2) { - dom_hi[0] = n_cells[0]-1; - dom_hi[1] = n_cells[1]-1; - } - dom_hi[2] = 0; -#endif - Box domain(dom_lo, dom_hi); - + Box domain_flat = FlattenedMaster.boxArray().minimalBox(); + // This defines the physical box + // we retain prob_lo and prob_hi in all directions except project_dir, + // where the physical size is 0 to dx[project_dir] + Vector projected_lo(AMREX_SPACEDIM); Vector projected_hi(AMREX_SPACEDIM); - // yes you could simplify this code but for now - // these are written out fully to better understand what is happening - // we wanted projected_hi[AMREX_SPACEDIM-1] to be equal to dx[projected_dir] - // and need to transmute the other indices depending on project_dir -#if (AMREX_SPACEDIM == 2) - if (project_dir == 0) { - projected_hi[0] = prob_hi[1]; - } else if (project_dir == 1) { - projected_hi[0] = prob_hi[0]; + for (int d=0; d1 cells in the other directions) -// returns a flattened multifab that is now flattened in the AMREX_SPACEDIM-1 direction -// (z direction in 3D, y direction in 2D) -MultiFab RotateFlattenedMF(MultiFab const& mf) -{ - BoxArray const& old_ba = mf.boxArray(); - DistributionMapping const& dm = mf.DistributionMap(); - Box const& domain_box = old_ba.minimalBox(); - int short_direction; - int short_size = domain_box.shortside(short_direction); - if (short_size != 1) { - Print() << "RotateFlattenedMF needs a MF with short_size==1; returning the original input MultiFab\n"; - return MultiFab(mf, amrex::make_alias, 0, mf.nComp()); - } else if (short_direction == AMREX_SPACEDIM-1) { - return MultiFab(mf, amrex::make_alias, 0, mf.nComp()); - } else { - IntVect old_ng = mf.nGrowVect(); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(old_ng[short_direction] == 0, - "Not supposed to have ghost cells in the shortest direction"); - IntVect ng; - if (short_direction == 0) { - ng = IntVect(AMREX_D_DECL(old_ng[1],old_ng[2],0)); - } else { - ng = IntVect(AMREX_D_DECL(old_ng[0],old_ng[2],0)); - } - BoxList bl = old_ba.boxList(); - for (auto& b : bl) { - const auto lo = b.smallEnd(); - const auto hi = b.bigEnd(); - if (short_direction == 0) { - b = Box(IntVect(AMREX_D_DECL(lo[1],lo[2],0)), - IntVect(AMREX_D_DECL(hi[1],hi[2],0)), - b.ixType()); - } else { - b = Box(IntVect(AMREX_D_DECL(lo[0],lo[2],0)), - IntVect(AMREX_D_DECL(hi[0],hi[2],0)), - b.ixType()); - } - } - BoxArray new_ba(std::move(bl)); - const int ncomp = mf.nComp(); - MultiFab new_mf(new_ba, dm, ncomp, ng, MFInfo().SetAlloc(false)); - for (MFIter mfi(new_mf); mfi.isValid(); ++mfi) { - new_mf.setFab(mfi, FArrayBox(mfi.fabbox(), ncomp, mf[mfi.index()].dataPtr())); - } - return new_mf; - } -} diff --git a/src_common/common_functions.H b/src_common/common_functions.H index 7cfeba665..2e3a6d0c9 100644 --- a/src_common/common_functions.H +++ b/src_common/common_functions.H @@ -243,11 +243,6 @@ void CCL2Norm(const MultiFab & m1, amrex::MultiFab& mscr, Real & norm_l2); -/////////////////////////// -// in RotateFlattenedMF.cpp - -MultiFab RotateFlattenedMF(MultiFab const& mf); - /////////////////////////// // in InterpCoarsen.cpp void FaceFillCoarse(Vector>& mf, int map); diff --git a/src_compressible/main_driver.cpp b/src_compressible/main_driver.cpp index 73c8eec97..bf97b00e9 100644 --- a/src_compressible/main_driver.cpp +++ b/src_compressible/main_driver.cpp @@ -406,7 +406,7 @@ void main_driver(const char* argv) // structure factor class for flattened dataset StructFact structFactPrimFlattened; - MultiFab primFlattenedRotMaster; + MultiFab primFlattenedMaster; ////////////////////////////////////////////// @@ -465,7 +465,7 @@ void main_driver(const char* argv) // structure factor class for flattened dataset StructFact structFactConsFlattened; - MultiFab consFlattenedRotMaster; + MultiFab consFlattenedMaster; ////////////////////////////////////////////// @@ -482,77 +482,31 @@ void main_driver(const char* argv) } else { ExtractSlice(structFactPrimMF, Flattened, geom, project_dir, slicepoint, 0, 1); } - // we rotate this flattened MultiFab to have normal in the z-direction since - // our structure factor class assumes this for flattened - MultiFab FlattenedRot = RotateFlattenedMF(Flattened); - BoxArray ba_flat = FlattenedRot.boxArray(); - const DistributionMapping& dmap_flat = FlattenedRot.DistributionMap(); - primFlattenedRotMaster.define(ba_flat,dmap_flat,structVarsPrim,0); - consFlattenedRotMaster.define(ba_flat,dmap_flat,structVarsCons,0); + BoxArray ba_flat = Flattened.boxArray(); + const DistributionMapping& dmap_flat = Flattened.DistributionMap(); + primFlattenedMaster.define(ba_flat,dmap_flat,structVarsPrim,0); + consFlattenedMaster.define(ba_flat,dmap_flat,structVarsCons,0); { - IntVect dom_lo(AMREX_D_DECL(0,0,0)); - IntVect dom_hi; - - // yes you could simplify this code but for now - // these are written out fully to better understand what is happening - // we wanted dom_hi[AMREX_SPACEDIM-1] to be equal to 0 - // and need to transmute the other indices depending on project_dir -#if (AMREX_SPACEDIM == 2) - if (project_dir == 0) { - dom_hi[0] = n_cells[1]-1; - } - else if (project_dir == 1) { - dom_hi[0] = n_cells[0]-1; - } - dom_hi[1] = 0; -#elif (AMREX_SPACEDIM == 3) - if (project_dir == 0) { - dom_hi[0] = n_cells[1]-1; - dom_hi[1] = n_cells[2]-1; - } else if (project_dir == 1) { - dom_hi[0] = n_cells[0]-1; - dom_hi[1] = n_cells[2]-1; - } else if (project_dir == 2) { - dom_hi[0] = n_cells[0]-1; - dom_hi[1] = n_cells[1]-1; - } - dom_hi[2] = 0; -#endif - Box domain(dom_lo, dom_hi); - - // This defines the physical box - Vector projected_hi(AMREX_SPACEDIM); - - // yes you could simplify this code but for now - // these are written out fully to better understand what is happening - // we wanted projected_hi[AMREX_SPACEDIM-1] to be equal to dx[projected_dir] - // and need to transmute the other indices depending on project_dir -#if (AMREX_SPACEDIM == 2) - if (project_dir == 0) { - projected_hi[0] = prob_hi[1]; - } else if (project_dir == 1) { - projected_hi[0] = prob_hi[0]; - } - projected_hi[1] = prob_hi[project_dir] / n_cells[project_dir]; -#elif (AMREX_SPACEDIM == 3) - if (project_dir == 0) { - projected_hi[0] = prob_hi[1]; - projected_hi[1] = prob_hi[2]; - } else if (project_dir == 1) { - projected_hi[0] = prob_hi[0]; - projected_hi[1] = prob_hi[2]; - } else if (project_dir == 2) { - projected_hi[0] = prob_hi[0]; - projected_hi[1] = prob_hi[1]; - } - projected_hi[2] = prob_hi[project_dir] / n_cells[project_dir]; -#endif - - RealBox real_box({AMREX_D_DECL( prob_lo[0], prob_lo[1], prob_lo[2])}, - {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])}); - - // This defines a Geometry object - geom_flat.define(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + Box domain_flat = primFlattenedMaster.boxArray().minimalBox(); + + // This defines the physical box + // we retain prob_lo and prob_hi in all directions except project_dir, + // where the physical size is 0 to dx[project_dir] + Vector projected_lo(AMREX_SPACEDIM); + Vector projected_hi(AMREX_SPACEDIM); + + for (int d=0; d structFactPrimArray; Vector < StructFact > structFactConsArray; - MultiFab master_2D_rot_prim; - MultiFab master_2D_rot_cons; + MultiFab prim2DFlattenedMaster; + MultiFab cons2DFlattenedMaster; #if defined(TURB) // Structure factor for compressible turbulence @@ -315,7 +315,7 @@ void main_driver(const char* argv) // surface coverage structure factor StructFact surfcovStructFact; - MultiFab surfcovFlattenedRotMaster; + MultiFab surfcovFlattenedMaster; Geometry surfcov_geom_flat; BoxArray surfcov_ba_flat; DistributionMapping surfcov_dmap_flat; @@ -766,75 +766,43 @@ void main_driver(const char* argv) // structure factor class for vertically-averaged dataset if (project_dir >= 0) { - + MultiFab Flattened; // flattened multifab define below + + // we are only calling ComputeVerticalAverage or ExtractSlice here to obtain + // a built version of Flattened so can obtain what we need to build the + // structure factor and geometry objects for flattened data + if (slicepoint < 0) { + ComputeVerticalAverage(prim, Flattened, geom, project_dir, 0, nprimvars); + } else { + ExtractSlice(prim, Flattened, geom, project_dir, slicepoint, 0, 1); + } + ba_flat = Flattened.boxArray(); + dmap_flat = Flattened.DistributionMap(); + primFlattenedMaster.define(ba_flat,dmap_flat,structVarsPrim,0); + consFlattenedMaster.define(ba_flat,dmap_flat,structVarsCons,0); { - MultiFab X, XRot; - if (slicepoint < 0) { - ComputeVerticalAverage(prim, X, geom, project_dir, 0, nprimvars); - } else { - ExtractSlice(prim, X, geom, project_dir, slicepoint, 0, 1); - } - XRot = RotateFlattenedMF(X); - ba_flat = XRot.boxArray(); - dmap_flat = XRot.DistributionMap(); - master_project_rot_prim.define(ba_flat,dmap_flat,structVarsPrim,0); - master_project_rot_cons.define(ba_flat,dmap_flat,structVarsCons,0); - - IntVect dom_lo_flat(AMREX_D_DECL(0,0,0)); - IntVect dom_hi_flat; -#if (AMREX_SPACEDIM == 2) - if (project_dir == 0) { - dom_hi_flat[0] = n_cells[1]-1; - dom_hi_flat[1] = 0; - } - else if (project_dir == 1) { - dom_hi_flat[0] = n_cells[0]-1; - dom_hi_flat[1] = 0; - } -#elif (AMREX_SPACEDIM == 3) - if (project_dir == 0) { - dom_hi_flat[0] = n_cells[1]-1; - dom_hi_flat[1] = n_cells[2]-1; - dom_hi_flat[2] = 0; - } else if (project_dir == 1) { - dom_hi_flat[0] = n_cells[0]-1; - dom_hi_flat[1] = n_cells[2]-1; - dom_hi_flat[2] = 0; - } else if (project_dir == 2) { - dom_hi_flat[0] = n_cells[0]-1; - dom_hi_flat[1] = n_cells[1]-1; - dom_hi_flat[2] = 0; - } -#endif - Box domain_flat(dom_lo_flat, dom_hi_flat); + Box domain_flat = primFlattenedMaster.boxArray().minimalBox(); // This defines the physical box + // we retain prob_lo and prob_hi in all directions except project_dir, + // where the physical size is 0 to dx[project_dir] + Vector projected_lo(AMREX_SPACEDIM); Vector projected_hi(AMREX_SPACEDIM); - for (int d=0; d projected_lo(AMREX_SPACEDIM); Vector projected_hi(AMREX_SPACEDIM); - // yes you could simplify this code but for now - // these are written out fully to better understand what is happening - // we wanted projected_hi[AMREX_SPACEDIM-1] to be equal to dx[projected_dir] - // and need to transmute the other indices depending on surfcov_dir -#if (AMREX_SPACEDIM == 2) - if (surfcov_dir == 0) { - projected_hi[0] = prob_hi[1]; - } else if (surfcov_dir == 1) { - projected_hi[0] = prob_hi[0]; - } - projected_hi[1] = prob_hi[surfcov_dir] / n_cells[surfcov_dir]; -#elif (AMREX_SPACEDIM == 3) - if (surfcov_dir == 0) { - projected_hi[0] = prob_hi[1]; - projected_hi[1] = prob_hi[2]; - } else if (surfcov_dir == 1) { - projected_hi[0] = prob_hi[0]; - projected_hi[1] = prob_hi[2]; - } else if (surfcov_dir == 2) { - projected_hi[0] = prob_hi[0]; - projected_hi[1] = prob_hi[1]; + for (int d=0; d Date: Fri, 3 Jan 2025 13:22:30 -0800 Subject: [PATCH 140/151] fix surface coverage SF bugs --- src_compressible_stag/main_driver.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 0c61fb4f4..c883805f3 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -887,15 +887,15 @@ void main_driver(const char* argv) } ExtractSlice(surfcov, Flattened, geom, surfcov_dir, surfcov_plane, 0, surfcov_structVars); - BoxArray surfcov_surfcov_ba_flat = Flattened.boxArray(); + BoxArray surfcov_ba_flat = Flattened.boxArray(); const DistributionMapping& dmap_flat = Flattened.DistributionMap(); - surfcovFlattenedMaster.define(surfcov_surfcov_ba_flat,dmap_flat,surfcov_structVars,0); + surfcovFlattenedMaster.define(surfcov_ba_flat,dmap_flat,surfcov_structVars,0); { Box domain_flat = surfcovFlattenedMaster.boxArray().minimalBox(); // This defines the physical box - // we retain prob_lo and prob_hi in all directions except project_dir, - // where the physical size is 0 to dx[project_dir] + // we retain prob_lo and prob_hi in all directions except surfcov_dir, + // where the physical size is 0 to dx[surfcov_dir] Vector projected_lo(AMREX_SPACEDIM); Vector projected_hi(AMREX_SPACEDIM); @@ -903,8 +903,8 @@ void main_driver(const char* argv) projected_lo[d] = prob_lo[d]; projected_hi[d] = prob_hi[d]; } - projected_lo[project_dir] = 0.; - projected_hi[project_dir] = (prob_hi[project_dir] - prob_lo[project_dir]) / n_cells[project_dir]; + projected_lo[surfcov_dir] = 0.; + projected_hi[surfcov_dir] = (prob_hi[surfcov_dir] - prob_lo[surfcov_dir]) / n_cells[surfcov_dir]; RealBox real_box_flat({AMREX_D_DECL(projected_lo[0],projected_lo[1],projected_lo[2])}, {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])}); @@ -916,7 +916,7 @@ void main_driver(const char* argv) surfcov_geom_flat.define(domain_flat,&real_box_flat,CoordSys::cartesian,is_periodic.data()); } - surfcovStructFact.define(surfcov_surfcov_ba_flat,dmap_flat,surfcov_var_names,surfcov_var_scaling); + surfcovStructFact.define(surfcov_ba_flat,dmap_flat,surfcov_var_names,surfcov_var_scaling); } ///////////////////////////////////////////////// From 19900023c60619f613e927abdc24246bbb1b12ea Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 13:29:04 -0800 Subject: [PATCH 141/151] cleanup makefile --- exec/compressible_stag/GNUmakefile | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 9d27ef1d4..e0cf9740f 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -51,19 +51,8 @@ include $(AMREX_HOME)/Src/Base/Make.package include ../../src_analysis/Make.package VPATH_LOCATIONS += ../../src_analysis/ INCLUDE_LOCATIONS += ../../src_analysis/ - -include $(AMREX_HOME)/Tools/GNUMake/Make.rules -ifeq ($(USE_CUDA),TRUE) - LIBRARIES += -lcufft -else ifeq ($(USE_HIP),TRUE) - # Use rocFFT. ROC_PATH is defined in amrex - INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include - LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib - LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft -else - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f -endif +include $(AMREX_HOME)/Tools/GNUMake/Make.rules ifeq ($(DO_TURB), TRUE) DEFINES += -DTURB From f323cadad299b62b76a1fb352bf000b59e8a4692 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 15:42:41 -0800 Subject: [PATCH 142/151] renname variables --- src_common/common_functions.cpp | 2 +- src_compressible_stag/main_driver.cpp | 34 +++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src_common/common_functions.cpp b/src_common/common_functions.cpp index bbc4d429a..eb708935c 100644 --- a/src_common/common_functions.cpp +++ b/src_common/common_functions.cpp @@ -340,7 +340,7 @@ void InitializeCommonNamespace() { // primvars - number of primative variables (no default) cross_cell = 0; // cell to compute spatial correlation - do_slab_sf = 0; // whether to compute SF in two slabs separated by cross_cell + do_slab_sf = 0; // whether to compute SF in two slabs separated by membrane_cell for (int i=0; i= 0) { if (do_slab_sf and ((membrane_cell <= 0) or (membrane_cell >= n_cells[project_dir]-1))) { - Abort("Slab structure factor needs a membrane cell within the domain: 0 < cross_cell < n_cells[project_dir] - 1"); + Abort("Slab structure factor needs a membrane cell within the domain: 0 < membrane_cell < n_cells[project_dir] - 1"); } if (do_slab_sf and slicepoint >= 0) { Abort("Cannot use do_slab_sf and slicepoint"); @@ -293,10 +293,10 @@ void main_driver(const char* argv) StructFact structFactConsFlattened; // Structure factor for 2D averaged data (across a membrane) - StructFact structFactPrimVerticalAverage0; - StructFact structFactPrimVerticalAverage1; - StructFact structFactConsVerticalAverage0; - StructFact structFactConsVerticalAverage1; + StructFact structFactPrimVerticalAverageMembraneLo; + StructFact structFactPrimVerticalAverageMembraneHi; + StructFact structFactConsVerticalAverageMembraneLo; + StructFact structFactConsVerticalAverageMembraneHi; MultiFab primFlattenedMaster; MultiFab consFlattenedMaster; @@ -809,10 +809,10 @@ void main_driver(const char* argv) structFactConsFlattened.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); } else { - structFactPrimVerticalAverage0.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); - structFactPrimVerticalAverage1.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); - structFactConsVerticalAverage0.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); - structFactConsVerticalAverage1.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); + structFactPrimVerticalAverageMembraneLo.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); + structFactPrimVerticalAverageMembraneHi.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); + structFactConsVerticalAverageMembraneLo.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); + structFactConsVerticalAverageMembraneHi.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); } } @@ -1395,7 +1395,7 @@ void main_driver(const char* argv) ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim, 0, membrane_cell-1); primFlattenedMaster.ParallelCopy(X, 0, 0, structVarsPrim); - structFactPrimVerticalAverage0.FortStructure(primFlattenedMaster); + structFactPrimVerticalAverageMembraneLo.FortStructure(primFlattenedMaster); } { @@ -1403,7 +1403,7 @@ void main_driver(const char* argv) ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim, membrane_cell, n_cells[project_dir]-1); primFlattenedMaster.ParallelCopy(X, 0, 0, structVarsPrim); - structFactPrimVerticalAverage1.FortStructure(primFlattenedMaster); + structFactPrimVerticalAverageMembraneHi.FortStructure(primFlattenedMaster); } { @@ -1411,7 +1411,7 @@ void main_driver(const char* argv) ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons, 0, membrane_cell-1); consFlattenedMaster.ParallelCopy(X, 0, 0, structVarsCons); - structFactConsVerticalAverage0.FortStructure(consFlattenedMaster); + structFactConsVerticalAverageMembraneLo.FortStructure(consFlattenedMaster); } { @@ -1419,7 +1419,7 @@ void main_driver(const char* argv) ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons, membrane_cell, n_cells[project_dir]-1); consFlattenedMaster.ParallelCopy(X, 0, 0, structVarsCons); - structFactConsVerticalAverage1.FortStructure(consFlattenedMaster); + structFactConsVerticalAverageMembraneHi.FortStructure(consFlattenedMaster); } } } @@ -1475,10 +1475,10 @@ void main_driver(const char* argv) structFactConsFlattened.WritePlotFile(step,time,geom_flat,"plt_SF_cons_Flattened"); } else { - structFactPrimVerticalAverage0.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageSlab0"); - structFactPrimVerticalAverage1.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageSlab1"); - structFactConsVerticalAverage0.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverageSlab0"); - structFactConsVerticalAverage1.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverageSlab1"); + structFactPrimVerticalAverageMembraneLo.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageMembraneLo"); + structFactPrimVerticalAverageMembraneHi.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageMembraneHi"); + structFactConsVerticalAverageMembraneLo.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverageMembraneLo"); + structFactConsVerticalAverageMembraneHi.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverageMembraneHi"); } } From 1d0fb7e398c8ee5fcb4720a2fc0db2cbc7ea8c6c Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 19:54:34 -0800 Subject: [PATCH 143/151] more SF cleanup --- src_compressible_stag/main_driver.cpp | 428 ++++++++++++-------------- 1 file changed, 200 insertions(+), 228 deletions(-) diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 38856d653..59fc217dd 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -209,16 +209,22 @@ void main_driver(const char* argv) if ((plot_cross) and ((cross_cell < 0) or (cross_cell > n_cells[0]-1))) { Abort("Cross cell needs to be within the domain: 0 <= cross_cell <= n_cells[0] - 1"); } - if (project_dir >= 0) { + if (struct_fact_int >0 and project_dir >= 0) { if (do_slab_sf and ((membrane_cell <= 0) or (membrane_cell >= n_cells[project_dir]-1))) { Abort("Slab structure factor needs a membrane cell within the domain: 0 < membrane_cell < n_cells[project_dir] - 1"); } + if (do_1D) { + Abort("Projected structure factors (project_dir) does not work for do_1D case"); + } if (do_slab_sf and slicepoint >= 0) { Abort("Cannot use do_slab_sf and slicepoint"); } - } - if ((project_dir >= 0) and ((do_1D) or (do_2D))) { - Abort("Projected structure factors (project_dir) works only for 3D case"); + if (do_2D and slicepoint >= 0) { + Abort("Cannot use do_2D and slicepoint"); + } + if (do_2D and project_dir != 2) { + Abort("Structure factors with do_2D requires project_dir == 2"); + } } if ((all_correl > 1) or (all_correl < 0)) { Abort("all_correl can be 0 or 1"); @@ -285,26 +291,41 @@ void main_driver(const char* argv) // Standard 3D structure factors StructFact structFactPrim; StructFact structFactCons; + // MultiFabs to copy data into for snapshots for full 3D data MultiFab structFactPrimMF; MultiFab structFactConsMF; // Structure factor for vertically-averaged or sliced data + // these are enabled if project_dir >= 0 AND do_slab_sf == 0 StructFact structFactPrimFlattened; StructFact structFactConsFlattened; - // Structure factor for 2D averaged data (across a membrane) + // these are enabled if project_dir >= 0 AND do_slab_sf == 1 StructFact structFactPrimVerticalAverageMembraneLo; StructFact structFactPrimVerticalAverageMembraneHi; StructFact structFactConsVerticalAverageMembraneLo; StructFact structFactConsVerticalAverageMembraneHi; - MultiFab primFlattenedMaster; - MultiFab consFlattenedMaster; - - // Vector of structure factors for 2D simulation + // Vector of structure factors of slices for 2D simulation + // these are enabled if do_2D (this mode assumes z slices; project_dir must equal 2) Vector < StructFact > structFactPrimArray; Vector < StructFact > structFactConsArray; - MultiFab prim2DFlattenedMaster; - MultiFab cons2DFlattenedMaster; + // MultiFabs to copy data into for snapshots for flattened + MultiFab structFactPrimFlattenedMF; + MultiFab structFactConsFlattenedMF; + + Geometry geom_flat; + BoxArray ba_flat; + DistributionMapping dmap_flat; + + // Structure factor for surface coverage slice + // these are enabled if n_ads_spec > 0 and assumes the k=0 plane is the slice of interest + StructFact structFactSurfCov; + // MultiFab to copy data into for snapshop + MultiFab structFactSurfCovMF; + + Geometry geom_surfcov; + BoxArray ba_surfcov; + DistributionMapping dmap_surfcov; #if defined(TURB) // Structure factor for compressible turbulence @@ -313,20 +334,6 @@ void main_driver(const char* argv) StructFact turbStructFactScalar; // scalars #endif - // surface coverage structure factor - StructFact surfcovStructFact; - MultiFab surfcovFlattenedMaster; - Geometry surfcov_geom_flat; - BoxArray surfcov_ba_flat; - DistributionMapping surfcov_dmap_flat; - - Geometry geom_flat; - Geometry geom_flat_2D; - BoxArray ba_flat; - BoxArray ba_flat_2D; - DistributionMapping dmap_flat; - DistributionMapping dmap_flat_2D; - // "primitive" variable structure factor will contain // rho // vel (shifted) @@ -758,165 +765,131 @@ void main_driver(const char* argv) /////////////////////////////////////////// if (struct_fact_int > 0) { - structFactPrimMF.define(ba, dmap, structVarsPrim, 0); - structFactPrim.define(ba,dmap,prim_var_names,var_scaling_prim); - - structFactConsMF.define(ba, dmap, structVarsCons, 0); - structFactCons.define(ba,dmap,cons_var_names,var_scaling_cons); - + + if ((do_1D==0) and (do_2D==0)) { + structFactPrim.define(ba,dmap,prim_var_names,var_scaling_prim); + structFactCons.define(ba,dmap,cons_var_names,var_scaling_cons); + structFactConsMF.define(ba,dmap,structVarsCons,0); + structFactPrimMF.define(ba,dmap,structVarsPrim,0); + } + // structure factor class for vertically-averaged dataset if (project_dir >= 0) { + MultiFab Flattened; // flattened multifab define below - // we are only calling ComputeVerticalAverage or ExtractSlice here to obtain + // we are only calling ExtractSlice here to obtain // a built version of Flattened so can obtain what we need to build the // structure factor and geometry objects for flattened data - if (slicepoint < 0) { - ComputeVerticalAverage(prim, Flattened, geom, project_dir, 0, nprimvars); - } else { - ExtractSlice(prim, Flattened, geom, project_dir, slicepoint, 0, 1); - } + ExtractSlice(prim, Flattened, geom, project_dir, 0, 0, 1); + ba_flat = Flattened.boxArray(); dmap_flat = Flattened.DistributionMap(); - primFlattenedMaster.define(ba_flat,dmap_flat,structVarsPrim,0); - consFlattenedMaster.define(ba_flat,dmap_flat,structVarsCons,0); - { - Box domain_flat = primFlattenedMaster.boxArray().minimalBox(); - // This defines the physical box - // we retain prob_lo and prob_hi in all directions except project_dir, - // where the physical size is 0 to dx[project_dir] - Vector projected_lo(AMREX_SPACEDIM); - Vector projected_hi(AMREX_SPACEDIM); + structFactPrimFlattenedMF.define(ba_flat,dmap_flat,structVarsPrim,0); + structFactConsFlattenedMF.define(ba_flat,dmap_flat,structVarsCons,0); - for (int d=0; d projected_lo(AMREX_SPACEDIM); + Vector projected_hi(AMREX_SPACEDIM); - if (do_slab_sf == 0) { - structFactPrimFlattened.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); - structFactConsFlattened.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); - } - else { - structFactPrimVerticalAverageMembraneLo.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); - structFactPrimVerticalAverageMembraneHi.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim); - structFactConsVerticalAverageMembraneLo.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); - structFactConsVerticalAverageMembraneHi.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons); + for (int d=0; d projected_hi(AMREX_SPACEDIM); - for (int d=0; d0) { + if (n_ads_spec>0) { - MultiFab Flattened; // flattened multifab defined below - - // we are only calling ExtractSlice here to obtain - // a built version of Flattened so can obtain what we need to build the - // structure factor and geometry objects for flattened data - // assume surface covered is stored in the "k" direction in the k=0 coordinate. - int surfcov_dir = 2; - int surfcov_plane = 0; - int surfcov_structVars = n_ads_spec; - int surfcov_nPairs = surfcov_structVars*(surfcov_structVars+1)/2; - - Vector< std::string > surfcov_var_names; - surfcov_var_names.resize(surfcov_structVars); - for (int d=0; d surfcov_var_scaling(surfcov_nPairs); - for (int d=0; d surfcov_var_names; + surfcov_var_names.resize(surfcov_structVars); + for (int d=0; d surfcov_var_scaling(surfcov_nPairs); + for (int d=0; d projected_lo(AMREX_SPACEDIM); - Vector projected_hi(AMREX_SPACEDIM); + // This defines the physical box + // we retain prob_lo and prob_hi in all directions except surfcov_dir, + // where the physical size is 0 to dx[surfcov_dir] + Vector projected_lo(AMREX_SPACEDIM); + Vector projected_hi(AMREX_SPACEDIM); - for (int d=0; d= 0) { - if (do_slab_sf == 0) { - - { - MultiFab X; + if (do_2D) { - if (slicepoint < 0) { - ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim); - } else { - ExtractSlice(structFactPrimMF, X, geom, project_dir, slicepoint, 0, structVarsPrim); - } - primFlattenedMaster.ParallelCopy(X, 0, 0, structVarsPrim); - structFactPrimFlattened.FortStructure(primFlattenedMaster); - } + for (int i=0; i 0) { - surfcovStructFact.WritePlotFile(step,time,surfcov_geom_flat,"plt_SF_surfcov"); + structFactSurfCov.WritePlotFile(step,time,geom_surfcov,"plt_SF_surfcov"); } } From af2ac09af8ef770285346e7b109ad98112d12246 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Fri, 3 Jan 2025 20:07:53 -0800 Subject: [PATCH 144/151] EOL whitespace --- src_analysis/StructFact.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp index cbd9ab72f..23011882a 100644 --- a/src_analysis/StructFact.cpp +++ b/src_analysis/StructFact.cpp @@ -565,8 +565,6 @@ void StructFact::CallFinalize(const int& zero_avg) Finalize(cov_real_temp, cov_imag_temp, zero_avg); } - - void StructFact::ShiftFFT(MultiFab& dft_out, const int& zero_avg) { BL_PROFILE_VAR("StructFact::ShiftFFT()",ShiftFFT); @@ -604,11 +602,11 @@ void StructFact::ShiftFFT(MultiFab& dft_out, const int& zero_avg) { const Box& bx = mfi.tilebox(); const Array4& dft_temp = dft_onegrid_temp.array(mfi); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { + { if (i == 0 && j == 0 && k == 0) { dft_temp(i,j,k) = 0.; } - }); + }); } } From c940ce777e4f1893bd3d316b8374e7ae1169affd Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Sat, 4 Jan 2025 14:06:01 -0800 Subject: [PATCH 145/151] fix the Nx=1 case and the (Nx=1 AND Ny=1) cases for unpacking the truncated FFT into a full multifab. In these cases the spectral box is from Ny/2+1 since there is only 1 cell in x, and analogous for the Nx=Ny=1 cases, the spectral box here is Nz/2+1 --- src_analysis/StructFact.cpp | 150 +++++++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 43 deletions(-) diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp index 23011882a..06b837d91 100644 --- a/src_analysis/StructFact.cpp +++ b/src_analysis/StructFact.cpp @@ -307,11 +307,22 @@ void StructFact::ComputeFFT(const MultiFab& variables, BL_PROFILE_VAR("StructFact::ComputeFFT()", ComputeFFT); - bool is_flattened = false; - Box domain = variables.boxArray().minimalBox(); - if (domain.bigEnd(AMREX_SPACEDIM-1) == 0) { - is_flattened = true; // flattened case + bool chopped_in_x = false; + bool chopped_in_y = false; + bool chopped_in_z = false; + + // figure out which direction the spectral box will be chopped + if (domain.length(0) > 1) { + chopped_in_x = true; + } else if (domain.length(1) > 1) { + chopped_in_y = true; +#if (AMREX_SPACEDIM == 3) + } else if (domain.length(2) > 1) { + chopped_in_z = true; +#endif + } else { + Abort("Calling ComputeFFT for a MultiFab with only 1 cell"); } // compute number of points in the domain and the square root @@ -379,61 +390,114 @@ void StructFact::ComputeFFT(const MultiFab& variables, Array4 const& realpart = variables_dft_real_onegrid.array(mfi); Array4 const& imagpart = variables_dft_imag_onegrid.array(mfi); - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - /* - Unpacking rules: + /* + Unpacking rules: + + For domains from (0,0,0) to (Nx-1,Ny-1,Nz-1) and chopped_in_x (i.e., Nx > 1) - For domains from (0,0,0) to (Nx-1,Ny-1,Nz-1) + For any cells with i index > Nx/2, these values are complex conjugates of the corresponding + entry where (Nx-i,Ny-j,Nz-k) UNLESS that index is zero, in which case you use 0. - For any cells with i index > Nx/2, these values are complex conjugates of the corresponding - entry where (Nx-i,Ny-j,Nz-k) UNLESS that index is zero, in which case you use 0. + e.g. for an 8^3 domain, any cell with i index - e.g. for an 8^3 domain, any cell with i index + Cell (6,2,3) is complex conjugate of (2,6,5) - Cell (6,2,3) is complex conjugate of (2,6,5) + Cell (4,1,0) is complex conjugate of (4,7,0) (note that the FFT is computed for 0 <= i <= Nx/2) - Cell (4,1,0) is complex conjugate of (4,7,0) (note that the FFT is computed for 0 <= i <= Nx/2) - */ - if (i <= bx.length(0)/2) { - // copy value - realpart(i,j,k) = spectral(i,j,k).real(); - imagpart(i,j,k) = spectral(i,j,k).imag(); - } else { - // copy complex conjugate - int iloc = bx.length(0)-i; - int jloc, kloc; - if (is_flattened) { + The analogy extends for the chopped_in_y and z directions + */ + + if (chopped_in_x) { + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= bx.length(0)/2) { + // copy value + realpart(i,j,k) = spectral(i,j,k).real(); + imagpart(i,j,k) = spectral(i,j,k).imag(); + } else { + // copy complex conjugate + int iloc = bx.length(0)-i; + int jloc = (j == 0) ? 0 : bx.length(1)-j; #if (AMREX_SPACEDIM == 2) - jloc = 0; + int kloc = 0; #elif (AMREX_SPACEDIM == 3) - jloc = (j == 0) ? 0 : bx.length(1)-j; + int kloc = (k == 0) ? 0 : bx.length(2)-k; #endif - kloc = 0; + if (unpack) { + realpart(i,j,k) = spectral(iloc,jloc,kloc).real(); + imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag(); + } + else { + realpart(i,j,k) = 0.0; + imagpart(i,j,k) = 0.0; + } + } + + realpart(i,j,k) /= sqrtnpts; + imagpart(i,j,k) /= sqrtnpts; + }); + } + + if (chopped_in_y) { + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (j <= bx.length(1)/2) { + // copy value + realpart(i,j,k) = spectral(i,j,k).real(); + imagpart(i,j,k) = spectral(i,j,k).imag(); } else { - jloc = (j == 0) ? 0 : bx.length(1)-j; + // copy complex conjugate + int iloc = (i == 0) ? 0 : bx.length(0)-i; + int jloc = bx.length(1)-j; #if (AMREX_SPACEDIM == 2) - kloc = 0; + int kloc = 0; #elif (AMREX_SPACEDIM == 3) - kloc = (k == 0) ? 0 : bx.length(2)-k; + int kloc = (k == 0) ? 0 : bx.length(2)-k; #endif + if (unpack) { + realpart(i,j,k) = spectral(iloc,jloc,kloc).real(); + imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag(); + } + else { + realpart(i,j,k) = 0.0; + imagpart(i,j,k) = 0.0; + } } - if (unpack) { - realpart(i,j,k) = spectral(iloc,jloc,kloc).real(); - imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag(); - } - else { - realpart(i,j,k) = 0.0; - imagpart(i,j,k) = 0.0; + realpart(i,j,k) /= sqrtnpts; + imagpart(i,j,k) /= sqrtnpts; + }); + } + + if (chopped_in_z) { + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (k <= bx.length(2)/2) { + // copy value + realpart(i,j,k) = spectral(i,j,k).real(); + imagpart(i,j,k) = spectral(i,j,k).imag(); + } else { + // copy complex conjugate + int iloc = (i == 0) ? 0 : bx.length(0)-i; + int jloc = (j == 0) ? 0 : bx.length(1)-j; + int kloc = bx.length(2)-k; + + if (unpack) { + realpart(i,j,k) = spectral(iloc,jloc,kloc).real(); + imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag(); + } + else { + realpart(i,j,k) = 0.0; + imagpart(i,j,k) = 0.0; + } } - } - realpart(i,j,k) /= sqrtnpts; - imagpart(i,j,k) /= sqrtnpts; - }); - } + realpart(i,j,k) /= sqrtnpts; + imagpart(i,j,k) /= sqrtnpts; + }); + } + + } // end MFIter variables_dft_real.ParallelCopy(variables_dft_real_onegrid,0,comp,1); variables_dft_imag.ParallelCopy(variables_dft_imag_onegrid,0,comp,1); From 48d14f398c07febb8b7f8d50fa271143bec8a5ad Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 6 Jan 2025 08:08:58 -0800 Subject: [PATCH 146/151] more structure factor cleanup --- exec/hydro/main_driver.cpp | 10 ++----- src_compressible/main_driver.cpp | 13 ++------- src_compressible_stag/main_driver.cpp | 42 +++++++-------------------- 3 files changed, 17 insertions(+), 48 deletions(-) diff --git a/exec/hydro/main_driver.cpp b/exec/hydro/main_driver.cpp index d3edef023..9db7bbe61 100644 --- a/exec/hydro/main_driver.cpp +++ b/exec/hydro/main_driver.cpp @@ -350,7 +350,6 @@ void main_driver(const char* argv) /////////////////////////////////////////// StructFact structFactFlattened; - MultiFab FlattenedMaster; Geometry geom_flat; @@ -367,9 +366,8 @@ void main_driver(const char* argv) } BoxArray ba_flat = Flattened.boxArray(); const DistributionMapping& dmap_flat = Flattened.DistributionMap(); - FlattenedMaster.define(ba_flat,dmap_flat,structVars,0); { - Box domain_flat = FlattenedMaster.boxArray().minimalBox(); + Box domain_flat = ba_flat.minimalBox(); // This defines the physical box // we retain prob_lo and prob_hi in all directions except project_dir, @@ -438,8 +436,7 @@ void main_driver(const char* argv) } else { ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, structVars); } - FlattenedMaster.ParallelCopy(Flattened,0,0,structVars); - structFactFlattened.FortStructure(FlattenedMaster); + structFactFlattened.FortStructure(Flattened); } } @@ -533,8 +530,7 @@ void main_driver(const char* argv) } else { ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, structVars); } - FlattenedMaster.ParallelCopy(Flattened,0,0,structVars); - structFactFlattened.FortStructure(FlattenedMaster); + structFactFlattened.FortStructure(Flattened); } } diff --git a/src_compressible/main_driver.cpp b/src_compressible/main_driver.cpp index bf97b00e9..fd5c62fcd 100644 --- a/src_compressible/main_driver.cpp +++ b/src_compressible/main_driver.cpp @@ -406,7 +406,6 @@ void main_driver(const char* argv) // structure factor class for flattened dataset StructFact structFactPrimFlattened; - MultiFab primFlattenedMaster; ////////////////////////////////////////////// @@ -465,7 +464,6 @@ void main_driver(const char* argv) // structure factor class for flattened dataset StructFact structFactConsFlattened; - MultiFab consFlattenedMaster; ////////////////////////////////////////////// @@ -484,10 +482,8 @@ void main_driver(const char* argv) } BoxArray ba_flat = Flattened.boxArray(); const DistributionMapping& dmap_flat = Flattened.DistributionMap(); - primFlattenedMaster.define(ba_flat,dmap_flat,structVarsPrim,0); - consFlattenedMaster.define(ba_flat,dmap_flat,structVarsCons,0); { - Box domain_flat = primFlattenedMaster.boxArray().minimalBox(); + Box domain_flat = ba_flat.minimalBox(); // This defines the physical box // we retain prob_lo and prob_hi in all directions except project_dir, @@ -733,11 +729,8 @@ void main_driver(const char* argv) ExtractSlice(structFactPrimMF, primFlattened, geom, project_dir, slicepoint, 0, structVarsPrim); ExtractSlice(structFactConsMF, consFlattened, geom, project_dir, slicepoint, 0, structVarsCons); } - primFlattenedMaster.ParallelCopy(primFlattened,0,0,structVarsPrim); - structFactPrimFlattened.FortStructure(primFlattenedMaster); - - consFlattenedMaster.ParallelCopy(consFlattened,0,0,structVarsCons); - structFactConsFlattened.FortStructure(consFlattenedMaster); + structFactPrimFlattened.FortStructure(primFlattened); + structFactConsFlattened.FortStructure(consFlattened); } // timer diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 59fc217dd..061a7296e 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -309,9 +309,6 @@ void main_driver(const char* argv) // these are enabled if do_2D (this mode assumes z slices; project_dir must equal 2) Vector < StructFact > structFactPrimArray; Vector < StructFact > structFactConsArray; - // MultiFabs to copy data into for snapshots for flattened - MultiFab structFactPrimFlattenedMF; - MultiFab structFactConsFlattenedMF; Geometry geom_flat; BoxArray ba_flat; @@ -320,12 +317,8 @@ void main_driver(const char* argv) // Structure factor for surface coverage slice // these are enabled if n_ads_spec > 0 and assumes the k=0 plane is the slice of interest StructFact structFactSurfCov; - // MultiFab to copy data into for snapshop - MultiFab structFactSurfCovMF; Geometry geom_surfcov; - BoxArray ba_surfcov; - DistributionMapping dmap_surfcov; #if defined(TURB) // Structure factor for compressible turbulence @@ -786,10 +779,7 @@ void main_driver(const char* argv) ba_flat = Flattened.boxArray(); dmap_flat = Flattened.DistributionMap(); - structFactPrimFlattenedMF.define(ba_flat,dmap_flat,structVarsPrim,0); - structFactConsFlattenedMF.define(ba_flat,dmap_flat,structVarsCons,0); - - Box domain_flat = structFactPrimFlattenedMF.boxArray().minimalBox(); + Box domain_flat = ba_flat.minimalBox(); // This defines the physical box // we retain prob_lo and prob_hi in all directions except project_dir, @@ -864,9 +854,8 @@ void main_driver(const char* argv) ExtractSlice(surfcov, Flattened, geom, surfcov_dir, surfcov_plane, 0, surfcov_structVars); BoxArray ba_surfcov = Flattened.boxArray(); const DistributionMapping& dmap_surfcov = Flattened.DistributionMap(); - structFactSurfCovMF.define(ba_surfcov,dmap_surfcov,surfcov_structVars,0); { - Box domain_surfcov = structFactSurfCovMF.boxArray().minimalBox(); + Box domain_surfcov = ba_surfcov.minimalBox(); // This defines the physical box // we retain prob_lo and prob_hi in all directions except surfcov_dir, @@ -1342,16 +1331,14 @@ void main_driver(const char* argv) MultiFab Flattened; ExtractSlice(structFactPrimMF, Flattened, geom, project_dir, i, 0, structVarsPrim); - structFactPrimFlattenedMF.ParallelCopy(Flattened, 0, 0, structVarsPrim); - structFactPrimArray[i].FortStructure(structFactPrimFlattenedMF); + structFactPrimArray[i].FortStructure(Flattened); } { MultiFab Flattened; ExtractSlice(structFactConsMF, Flattened, geom, project_dir, i, 0, structVarsCons); - structFactConsFlattenedMF.ParallelCopy(Flattened, 0, 0, structVarsCons); - structFactConsArray[i].FortStructure(structFactConsFlattenedMF); + structFactConsArray[i].FortStructure(Flattened); } } @@ -1367,8 +1354,7 @@ void main_driver(const char* argv) } else { ExtractSlice(structFactPrimMF, Flattened, geom, project_dir, slicepoint, 0, structVarsPrim); } - structFactPrimFlattenedMF.ParallelCopy(Flattened, 0, 0, structVarsPrim); - structFactPrimFlattened.FortStructure(structFactPrimFlattenedMF); + structFactPrimFlattened.FortStructure(Flattened); } { @@ -1379,8 +1365,7 @@ void main_driver(const char* argv) } else { ExtractSlice(structFactConsMF, Flattened, geom, project_dir, slicepoint, 0, structVarsCons); } - structFactConsFlattenedMF.ParallelCopy(Flattened, 0, 0, structVarsCons); - structFactConsFlattened.FortStructure(structFactConsFlattenedMF); + structFactConsFlattened.FortStructure(Flattened); } } else { @@ -1388,32 +1373,28 @@ void main_driver(const char* argv) MultiFab Flattened; ComputeVerticalAverage(structFactPrimMF, Flattened, geom, project_dir, 0, structVarsPrim, 0, membrane_cell-1); - structFactPrimFlattenedMF.ParallelCopy(Flattened, 0, 0, structVarsPrim); - structFactPrimVerticalAverageMembraneLo.FortStructure(structFactPrimFlattenedMF); + structFactPrimVerticalAverageMembraneLo.FortStructure(Flattened); } { MultiFab Flattened; ComputeVerticalAverage(structFactPrimMF, Flattened, geom, project_dir, 0, structVarsPrim, membrane_cell, n_cells[project_dir]-1); - structFactPrimFlattenedMF.ParallelCopy(Flattened, 0, 0, structVarsPrim); - structFactPrimVerticalAverageMembraneHi.FortStructure(structFactPrimFlattenedMF); + structFactPrimVerticalAverageMembraneHi.FortStructure(Flattened); } { MultiFab Flattened; ComputeVerticalAverage(structFactConsMF, Flattened, geom, project_dir, 0, structVarsCons, 0, membrane_cell-1); - structFactConsFlattenedMF.ParallelCopy(Flattened, 0, 0, structVarsCons); - structFactConsVerticalAverageMembraneLo.FortStructure(structFactConsFlattenedMF); + structFactConsVerticalAverageMembraneLo.FortStructure(Flattened); } { MultiFab Flattened; ComputeVerticalAverage(structFactConsMF, Flattened, geom, project_dir, 0, structVarsCons, membrane_cell, n_cells[project_dir]-1); - structFactConsFlattenedMF.ParallelCopy(Flattened, 0, 0, structVarsCons); - structFactConsVerticalAverageMembraneHi.FortStructure(structFactConsFlattenedMF); + structFactConsVerticalAverageMembraneHi.FortStructure(Flattened); } } } @@ -1425,8 +1406,7 @@ void main_driver(const char* argv) int surfcov_structVars = n_ads_spec; MultiFab Flattened; // flattened multifab defined below ExtractSlice(surfcov, Flattened, geom, surfcov_dir, surfcov_plane, 0, surfcov_structVars); - structFactSurfCovMF.ParallelCopy(Flattened,0,0,surfcov_structVars); - structFactSurfCov.FortStructure(structFactSurfCovMF); + structFactSurfCov.FortStructure(Flattened); } } From 1ee25e1b7a4d4425a183ea9886cf8c115aa30aad Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 13 Jan 2025 14:18:09 -0800 Subject: [PATCH 147/151] cleanup ExtractSlice and ComputeVerticalAverage routines move toward modernizing ComputeVerticalAverage to leverage AMReX GPU calls --- exec/compressible_mui/sav_src/2020/m00.cpp | 4 +-- .../sav_src/2020/m01-cutemp_jsq.cpp | 4 +-- exec/compressible_mui/sav_src/2020/m01.cpp | 4 +-- exec/compressible_mui/sav_src/2020/m02.cpp | 4 +-- exec/compressible_mui/sav_src/2020/m10.cpp | 4 +-- exec/compressible_mui/sav_src/2020/m11.cpp | 4 +-- exec/compressible_mui/sav_src/2020/m12.cpp | 4 +-- .../202101_before_mui_span/main_driver.cpp | 4 +-- .../main_driver.cpp_0126_bc | 4 +-- .../202106_before_summer/main_driver.cpp | 8 +++--- exec/hydro/main_driver.cpp | 12 ++++----- src_common/ComputeAverages.cpp | 8 +++--- src_common/common_functions.H | 4 +-- src_compressible/main_driver.cpp | 12 ++++----- src_compressible_stag/main_driver.cpp | 26 +++++++++---------- .../exercises/compressible/main_driver.cpp | 4 +-- 16 files changed, 55 insertions(+), 55 deletions(-) diff --git a/exec/compressible_mui/sav_src/2020/m00.cpp b/exec/compressible_mui/sav_src/2020/m00.cpp index 2e642e72d..ecac3a41c 100644 --- a/exec/compressible_mui/sav_src/2020/m00.cpp +++ b/exec/compressible_mui/sav_src/2020/m00.cpp @@ -521,7 +521,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -675,7 +675,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp b/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp index bf8a5be05..22344b5b6 100644 --- a/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp +++ b/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp @@ -533,7 +533,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -687,7 +687,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m01.cpp b/exec/compressible_mui/sav_src/2020/m01.cpp index 6354863ec..e76d0cd51 100644 --- a/exec/compressible_mui/sav_src/2020/m01.cpp +++ b/exec/compressible_mui/sav_src/2020/m01.cpp @@ -521,7 +521,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -675,7 +675,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m02.cpp b/exec/compressible_mui/sav_src/2020/m02.cpp index 6e18d8a32..78e026cbb 100644 --- a/exec/compressible_mui/sav_src/2020/m02.cpp +++ b/exec/compressible_mui/sav_src/2020/m02.cpp @@ -574,7 +574,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -728,7 +728,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m10.cpp b/exec/compressible_mui/sav_src/2020/m10.cpp index 32db38e49..578cbdeec 100644 --- a/exec/compressible_mui/sav_src/2020/m10.cpp +++ b/exec/compressible_mui/sav_src/2020/m10.cpp @@ -523,7 +523,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -684,7 +684,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m11.cpp b/exec/compressible_mui/sav_src/2020/m11.cpp index d9a823c2e..2148a2786 100644 --- a/exec/compressible_mui/sav_src/2020/m11.cpp +++ b/exec/compressible_mui/sav_src/2020/m11.cpp @@ -527,7 +527,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -688,7 +688,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/2020/m12.cpp b/exec/compressible_mui/sav_src/2020/m12.cpp index 5b0309ede..35f59540e 100644 --- a/exec/compressible_mui/sav_src/2020/m12.cpp +++ b/exec/compressible_mui/sav_src/2020/m12.cpp @@ -527,7 +527,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -690,7 +690,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp index 917300b60..613f1b062 100644 --- a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp +++ b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp @@ -534,7 +534,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -703,7 +703,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc index efb7dc1ff..8b01a4370 100644 --- a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc +++ b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc @@ -547,7 +547,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ prim.setVal(0.0); - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); BoxArray ba_flat = primVertAvg.boxArray(); const DistributionMapping& dmap_flat = primVertAvg.DistributionMap(); { @@ -716,7 +716,7 @@ void main_driver(const char* argv) structFactPrim.FortStructure(structFactPrimMF); structFactCons.FortStructure(structFactConsMF); if(project_dir >= 0) { - ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim); structFactPrimVerticalAverage.FortStructure(primVertAvg); } } diff --git a/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp b/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp index 0e799ee87..389c5f646 100644 --- a/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp +++ b/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp @@ -514,9 +514,9 @@ void main_driver(const char* argv) // a built version of primFlattened so can obtain what we need to build the // structure factor and geometry objects for flattened data if (slicepoint < 0) { - ComputeVerticalAverage(prim, primFlattened, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primFlattened, project_dir, 0, structVarsPrim); } else { - ExtractSlice(prim, primFlattened, geom, project_dir, slicepoint, 0, structVarsPrim); + ExtractSlice(prim, primFlattened, project_dir, slicepoint, 0, structVarsPrim); } // we rotate this flattened MultiFab to have normal in the z-direction since // SWFFT only presently supports flattened MultiFabs with z-normal. @@ -857,9 +857,9 @@ void main_driver(const char* argv) if(project_dir >= 0) { MultiFab primFlattened; // flattened multifab defined below if (slicepoint < 0) { - ComputeVerticalAverage(prim, primFlattened, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(prim, primFlattened, project_dir, 0, structVarsPrim); } else { - ExtractSlice(prim, primFlattened, geom, project_dir, slicepoint, 0, structVarsPrim); + ExtractSlice(prim, primFlattened, project_dir, slicepoint, 0, structVarsPrim); } // we rotate this flattened MultiFab to have normal in the z-direction since // SWFFT only presently supports flattened MultiFabs with z-normal. diff --git a/exec/hydro/main_driver.cpp b/exec/hydro/main_driver.cpp index 9db7bbe61..4fd0a5b16 100644 --- a/exec/hydro/main_driver.cpp +++ b/exec/hydro/main_driver.cpp @@ -360,9 +360,9 @@ void main_driver(const char* argv) // a built version of Flattened so can obtain what we need to build the // structure factor and geometry objects for flattened data if (slicepoint < 0) { - ComputeVerticalAverage(structFactMF, Flattened, geom, project_dir, 0, 1); + ComputeVerticalAverage(structFactMF, Flattened, project_dir, 0, 1); } else { - ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, 1); + ExtractSlice(structFactMF, Flattened, project_dir, slicepoint, 0, 1); } BoxArray ba_flat = Flattened.boxArray(); const DistributionMapping& dmap_flat = Flattened.DistributionMap(); @@ -432,9 +432,9 @@ void main_driver(const char* argv) if(project_dir >= 0) { MultiFab Flattened; // flattened multifab defined below if (slicepoint < 0) { - ComputeVerticalAverage(structFactMF, Flattened, geom, project_dir, 0, structVars); + ComputeVerticalAverage(structFactMF, Flattened, project_dir, 0, structVars); } else { - ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, structVars); + ExtractSlice(structFactMF, Flattened, project_dir, slicepoint, 0, structVars); } structFactFlattened.FortStructure(Flattened); } @@ -526,9 +526,9 @@ void main_driver(const char* argv) if(project_dir >= 0) { MultiFab Flattened; // flattened multifab defined below if (slicepoint < 0) { - ComputeVerticalAverage(structFactMF, Flattened, geom, project_dir, 0, structVars); + ComputeVerticalAverage(structFactMF, Flattened, project_dir, 0, structVars); } else { - ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, structVars); + ExtractSlice(structFactMF, Flattened, project_dir, slicepoint, 0, structVars); } structFactFlattened.FortStructure(Flattened); } diff --git a/src_common/ComputeAverages.cpp b/src_common/ComputeAverages.cpp index 2b474cc53..5e896c253 100644 --- a/src_common/ComputeAverages.cpp +++ b/src_common/ComputeAverages.cpp @@ -193,7 +193,7 @@ void WriteHorizontalAverageToMF(const MultiFab& mf_in, MultiFab& mf_out, void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, - const Geometry& geom, const int& dir, + const int& dir, const int& incomp, const int& ncomp, const int& slablo, const int& slabhi) { @@ -215,7 +215,7 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, MultiFab mf_pencil; // get a single Box that spans the full domain - Box domain(geom.Domain()); + Box domain(mf.boxArray().minimalBox()); // these are the transverse directions (i.e., NOT the dir direction) int dir1=0, dir2=0; @@ -346,7 +346,7 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, } void ExtractSlice(const MultiFab& mf, MultiFab& mf_slice, - const Geometry& geom, const int dir, const int slice, + const int dir, const int slice, const int incomp, const int ncomp) { BL_PROFILE_VAR("ExtractSlice()",ExtractSlice); @@ -354,7 +354,7 @@ void ExtractSlice(const MultiFab& mf, MultiFab& mf_slice, // create BoxArray // get lo and hi coordinates of problem domain - Box domain(geom.Domain()); + Box domain(mf.boxArray().minimalBox()); IntVect dom_lo(domain.loVect()); IntVect dom_hi(domain.hiVect()); diff --git a/src_common/common_functions.H b/src_common/common_functions.H index 2e3a6d0c9..affabd688 100644 --- a/src_common/common_functions.H +++ b/src_common/common_functions.H @@ -150,11 +150,11 @@ void WriteHorizontalAverageToMF(const MultiFab& mf_in, MultiFab& mf_out, const int& dir, const int& incomp, const int& ncomp, int outcomp=-1); -void ComputeVerticalAverage(const MultiFab & mf, MultiFab & mf_flat, const Geometry & geom, +void ComputeVerticalAverage(const MultiFab & mf, MultiFab & mf_flat, const int& dir, const int& incomp, const int& ncomp, const int& slablo=-1, const int& slabhi=99999); -void ExtractSlice(const MultiFab & mf, MultiFab & mf_slice, const Geometry & geom, +void ExtractSlice(const MultiFab & mf, MultiFab & mf_slice, const int dir, const int slice, const int incomp, const int ncomp); /////////////////////////// diff --git a/src_compressible/main_driver.cpp b/src_compressible/main_driver.cpp index fd5c62fcd..c9654aa60 100644 --- a/src_compressible/main_driver.cpp +++ b/src_compressible/main_driver.cpp @@ -476,9 +476,9 @@ void main_driver(const char* argv) // a built version of primFlattened so can obtain what we need to build the // structure factor and geometry objects for flattened data if (slicepoint < 0) { - ComputeVerticalAverage(structFactPrimMF, Flattened, geom, project_dir, 0, 1); + ComputeVerticalAverage(structFactPrimMF, Flattened, project_dir, 0, 1); } else { - ExtractSlice(structFactPrimMF, Flattened, geom, project_dir, slicepoint, 0, 1); + ExtractSlice(structFactPrimMF, Flattened, project_dir, slicepoint, 0, 1); } BoxArray ba_flat = Flattened.boxArray(); const DistributionMapping& dmap_flat = Flattened.DistributionMap(); @@ -723,11 +723,11 @@ void main_driver(const char* argv) MultiFab primFlattened; // flattened multifab defined below MultiFab consFlattened; // flattened multifab defined below if (slicepoint < 0) { - ComputeVerticalAverage(structFactPrimMF, primFlattened, geom, project_dir, 0, structVarsPrim); - ComputeVerticalAverage(structFactConsMF, consFlattened, geom, project_dir, 0, structVarsCons); + ComputeVerticalAverage(structFactPrimMF, primFlattened, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(structFactConsMF, consFlattened, project_dir, 0, structVarsCons); } else { - ExtractSlice(structFactPrimMF, primFlattened, geom, project_dir, slicepoint, 0, structVarsPrim); - ExtractSlice(structFactConsMF, consFlattened, geom, project_dir, slicepoint, 0, structVarsCons); + ExtractSlice(structFactPrimMF, primFlattened, project_dir, slicepoint, 0, structVarsPrim); + ExtractSlice(structFactConsMF, consFlattened, project_dir, slicepoint, 0, structVarsCons); } structFactPrimFlattened.FortStructure(primFlattened); structFactConsFlattened.FortStructure(consFlattened); diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index 061a7296e..c455fee2d 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -774,7 +774,7 @@ void main_driver(const char* argv) // we are only calling ExtractSlice here to obtain // a built version of Flattened so can obtain what we need to build the // structure factor and geometry objects for flattened data - ExtractSlice(prim, Flattened, geom, project_dir, 0, 0, 1); + ExtractSlice(prim, Flattened, project_dir, 0, 0, 1); ba_flat = Flattened.boxArray(); dmap_flat = Flattened.DistributionMap(); @@ -851,7 +851,7 @@ void main_driver(const char* argv) surfcov_var_scaling[d] = 1.; } - ExtractSlice(surfcov, Flattened, geom, surfcov_dir, surfcov_plane, 0, surfcov_structVars); + ExtractSlice(surfcov, Flattened, surfcov_dir, surfcov_plane, 0, surfcov_structVars); BoxArray ba_surfcov = Flattened.boxArray(); const DistributionMapping& dmap_surfcov = Flattened.DistributionMap(); { @@ -1330,14 +1330,14 @@ void main_driver(const char* argv) { MultiFab Flattened; - ExtractSlice(structFactPrimMF, Flattened, geom, project_dir, i, 0, structVarsPrim); + ExtractSlice(structFactPrimMF, Flattened, project_dir, i, 0, structVarsPrim); structFactPrimArray[i].FortStructure(Flattened); } { MultiFab Flattened; - ExtractSlice(structFactConsMF, Flattened, geom, project_dir, i, 0, structVarsCons); + ExtractSlice(structFactConsMF, Flattened, project_dir, i, 0, structVarsCons); structFactConsArray[i].FortStructure(Flattened); } @@ -1350,9 +1350,9 @@ void main_driver(const char* argv) MultiFab Flattened; if (slicepoint < 0) { - ComputeVerticalAverage(structFactPrimMF, Flattened, geom, project_dir, 0, structVarsPrim); + ComputeVerticalAverage(structFactPrimMF, Flattened, project_dir, 0, structVarsPrim); } else { - ExtractSlice(structFactPrimMF, Flattened, geom, project_dir, slicepoint, 0, structVarsPrim); + ExtractSlice(structFactPrimMF, Flattened, project_dir, slicepoint, 0, structVarsPrim); } structFactPrimFlattened.FortStructure(Flattened); } @@ -1361,9 +1361,9 @@ void main_driver(const char* argv) MultiFab Flattened; if (slicepoint < 0) { - ComputeVerticalAverage(structFactConsMF, Flattened, geom, project_dir, 0, structVarsCons); + ComputeVerticalAverage(structFactConsMF, Flattened, project_dir, 0, structVarsCons); } else { - ExtractSlice(structFactConsMF, Flattened, geom, project_dir, slicepoint, 0, structVarsCons); + ExtractSlice(structFactConsMF, Flattened, project_dir, slicepoint, 0, structVarsCons); } structFactConsFlattened.FortStructure(Flattened); } @@ -1372,28 +1372,28 @@ void main_driver(const char* argv) { MultiFab Flattened; - ComputeVerticalAverage(structFactPrimMF, Flattened, geom, project_dir, 0, structVarsPrim, 0, membrane_cell-1); + ComputeVerticalAverage(structFactPrimMF, Flattened, project_dir, 0, structVarsPrim, 0, membrane_cell-1); structFactPrimVerticalAverageMembraneLo.FortStructure(Flattened); } { MultiFab Flattened; - ComputeVerticalAverage(structFactPrimMF, Flattened, geom, project_dir, 0, structVarsPrim, membrane_cell, n_cells[project_dir]-1); + ComputeVerticalAverage(structFactPrimMF, Flattened, project_dir, 0, structVarsPrim, membrane_cell, n_cells[project_dir]-1); structFactPrimVerticalAverageMembraneHi.FortStructure(Flattened); } { MultiFab Flattened; - ComputeVerticalAverage(structFactConsMF, Flattened, geom, project_dir, 0, structVarsCons, 0, membrane_cell-1); + ComputeVerticalAverage(structFactConsMF, Flattened, project_dir, 0, structVarsCons, 0, membrane_cell-1); structFactConsVerticalAverageMembraneLo.FortStructure(Flattened); } { MultiFab Flattened; - ComputeVerticalAverage(structFactConsMF, Flattened, geom, project_dir, 0, structVarsCons, membrane_cell, n_cells[project_dir]-1); + ComputeVerticalAverage(structFactConsMF, Flattened, project_dir, 0, structVarsCons, membrane_cell, n_cells[project_dir]-1); structFactConsVerticalAverageMembraneHi.FortStructure(Flattened); } } @@ -1405,7 +1405,7 @@ void main_driver(const char* argv) int surfcov_plane = 0; int surfcov_structVars = n_ads_spec; MultiFab Flattened; // flattened multifab defined below - ExtractSlice(surfcov, Flattened, geom, surfcov_dir, surfcov_plane, 0, surfcov_structVars); + ExtractSlice(surfcov, Flattened, surfcov_dir, surfcov_plane, 0, surfcov_structVars); structFactSurfCov.FortStructure(Flattened); } diff --git a/unmaintained/exercises/compressible/main_driver.cpp b/unmaintained/exercises/compressible/main_driver.cpp index a3e55e150..3b7cbea00 100644 --- a/unmaintained/exercises/compressible/main_driver.cpp +++ b/unmaintained/exercises/compressible/main_driver.cpp @@ -412,7 +412,7 @@ void main_driver(const char* argv) if(project_dir >= 0){ cu.setVal(0.0); - ComputeVerticalAverage(cu, cuVertAvg, geom, project_dir, 0, nvars); + ComputeVerticalAverage(cu, cuVertAvg, project_dir, 0, nvars); BoxArray ba_flat = cuVertAvg.boxArray(); const DistributionMapping& dmap_flat = cuVertAvg.DistributionMap(); { @@ -538,7 +538,7 @@ void main_driver(const char* argv) // MultiFab::Copy(struct_in_cc, cu, 0, 0, nvar_sf, 0); // structFact.FortStructure(struct_in_cc); // if(project_dir >= 0) { -// ComputeVerticalAverage(cu, cuVertAvg, geom, project_dir, 0, nvars); +// ComputeVerticalAverage(cu, cuVertAvg, project_dir, 0, nvars); // structFactVA.FortStructure(cuVertAvg); // } // } From a9bb05a0a8b2488d2619fa04b171092da995fb0e Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Mon, 13 Jan 2025 14:42:41 -0800 Subject: [PATCH 148/151] fix index bug for case where ComputeVerticalAverage is called with incomp>0 --- src_common/ComputeAverages.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src_common/ComputeAverages.cpp b/src_common/ComputeAverages.cpp index 5e896c253..a9f277ada 100644 --- a/src_common/ComputeAverages.cpp +++ b/src_common/ComputeAverages.cpp @@ -294,7 +294,7 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, if (dir == 0) { - for (auto n = incomp; n Date: Tue, 14 Jan 2025 13:14:46 -0800 Subject: [PATCH 149/151] fix compilation issue --- exec/structFactTest/main_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exec/structFactTest/main_driver.cpp b/exec/structFactTest/main_driver.cpp index 87696d4fd..64e5a639b 100644 --- a/exec/structFactTest/main_driver.cpp +++ b/exec/structFactTest/main_driver.cpp @@ -134,7 +134,7 @@ void main_driver(const char* argv) // take an FFT and write them out MultiFab dft_real(ba, dmap, 2, 0); MultiFab dft_imag(ba, dmap, 2, 0); - structFact.ComputeFFT(struct_cc,dft_real,dft_imag,geom); + structFact.ComputeFFT(struct_cc,dft_real,dft_imag); WriteSingleLevelPlotfile("plt_real", dft_real, {"var1", "var2"}, geom, 0., 0); WriteSingleLevelPlotfile("plt_imag", dft_imag, {"var1", "var2"}, geom, 0., 0); From 7b72f52a34134b9cb4e4dcb0c361289ebc91534c Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 14 Jan 2025 13:24:23 -0800 Subject: [PATCH 150/151] gpu friendly ComputeVerticalAverage --- src_common/ComputeAverages.cpp | 134 +++++++-------------------------- 1 file changed, 26 insertions(+), 108 deletions(-) diff --git a/src_common/ComputeAverages.cpp b/src_common/ComputeAverages.cpp index a9f277ada..5073cbf22 100644 --- a/src_common/ComputeAverages.cpp +++ b/src_common/ComputeAverages.cpp @@ -203,20 +203,32 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, if (dir >= AMREX_SPACEDIM) { Abort("ComputeVerticalAverage: invalid dir"); } - - // debugging - bool write_data = false; - - // this is a full MultiFab with pencil-shaped boxes - // we will define mf_flat as a flattened MultiFab that - // has the same BoxArray but flattened in the dir direction - // and the same DistributionMapping so - // we can do the averaging from mf_pencil to mf_flat on a box-by-box basis - MultiFab mf_pencil; // get a single Box that spans the full domain Box domain(mf.boxArray().minimalBox()); + auto const& ma = mf.const_arrays(); + auto fab = ReduceToPlane(dir, domain, mf, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) -> Real + { + return ma[box_no](i,j,k); // data at (i,j,k) of Box box_no + }); + + + Box dom2d = fab.box(); + Vector bv(ParallelDescriptor::NProcs(),dom2d); + BoxArray ba(bv.data(), bv.size()); + + Vector pmap(ParallelDescriptor::NProcs()); + std::iota(pmap.begin(), pmap.end(), 0); + DistributionMapping dm(std::move(pmap)); + + MultiFab mftmp(ba, dm, fab.nComp(), 0, MFInfo().SetAlloc(false)); + mftmp.setFab(ParallelDescriptor::MyProc(), + FArrayBox(fab.box(), fab.nComp(), fab.dataPtr())); + + BoxArray ba2(dom2d); + // these are the transverse directions (i.e., NOT the dir direction) int dir1=0, dir2=0; #if (AMREX_SPACEDIM == 2) @@ -233,7 +245,7 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, dir2 = 1; } #endif - + // max_grid_size_pencil will be equal to the number of cells in the domain in the dir direction // and uses max_grid_projection to set the non-dir directions Vector max_grid_size_pencil(AMREX_SPACEDIM); @@ -243,105 +255,11 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, max_grid_size_pencil[dir2] = max_grid_projection[1]; #endif - // create the BoxArray for the pencil MultiFab - BoxArray ba_pencil(domain); - ba_pencil.maxSize(IntVect(max_grid_size_pencil)); - - // create DistributionMapping on the pencil BoxArray - DistributionMapping dmap_pencil(ba_pencil); - - // build pencil MultiFab - mf_pencil.define(ba_pencil,dmap_pencil,ncomp,0); + ba2.maxSize(IntVect(max_grid_size_pencil)); - // copy data from full MultiFab to pencil MultiFab - mf_pencil.ParallelCopy(mf, incomp, 0, ncomp); - - // create a single flattened box with coordinate index 0 in the dir direction - IntVect dom_lo(domain.loVect()); - IntVect dom_hi(domain.hiVect()); - if (dom_lo[dir] != 0) { - Abort("ComputeVerticalAverage requires dom_lo[dir]=0"); - } - dom_hi[dir] = 0; - Box domain_flat(dom_lo, dom_hi); - - // create the BoxArray for the flattened MultiFab - BoxArray ba_flat(domain_flat); - ba_flat.maxSize(IntVect(max_grid_size_pencil)); - - // build flattened MultiFab and initialize to zero - mf_flat.define(ba_flat,dmap_pencil,ncomp,0); + mf_flat.define(ba2, DistributionMapping{ba2}, fab.nComp(), 0); mf_flat.setVal(0.); - - // this is the inverse of the number of cells in the dir direction we are averaging over - // by default we average over the entire domain, but one can pass in slab_lo/hi to set bounds - Real ninv; - if (slablo != -1 && slabhi != 99999) { - ninv = 1./(slabhi-slablo+1); - } else { - ninv = 1./(domain.length(dir)); - } - - // average pencil data onto the flattened MultiFab - for ( MFIter mfi(mf_pencil); mfi.isValid(); ++mfi ) { - const Box& bx = mfi.validbox(); - - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - - const Array4 meanfab = mf_flat.array(mfi); - const Array4 inputfab = mf_pencil.array(mfi); - - if (dir == 0) { - - for (auto n = 0; n= slablo) and (i <= slabhi)) { - meanfab(0,j,k,n) = meanfab(0,j,k,n) + ninv*inputfab(i,j,k,n); - } - } - } - } - } - - } else if (dir == 1) { - - for (auto n = 0; n= slablo) and (j <= slabhi)) { - meanfab(i,0,k,n) = meanfab(i,0,k,n) + ninv*inputfab(i,j,k,n); - } - } - } - } - } - - } else if (dir == 2) { - - for (auto n = 0; n= slablo) and (k <= slabhi)) { - meanfab(i,j,0,n) = meanfab(i,j,0,n) + ninv*inputfab(i,j,k,n); - } - } - } - } - } - } - } - - // debugging - if (write_data) { - VisMF::Write(mf,"mf_full"); - VisMF::Write(mf_pencil,"mf_pencil"); - VisMF::Write(mf_flat,"mf_flat"); - } + mf_flat.ParallelAdd(mftmp); } From 8fb83a622c25a785a461cebf77f28ae2f67a0e3f Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 14 Jan 2025 14:28:11 -0800 Subject: [PATCH 151/151] updated gpu-safe computeverticalaverage that works for multi-components input multifabs --- src_common/ComputeAverages.cpp | 77 ++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/src_common/ComputeAverages.cpp b/src_common/ComputeAverages.cpp index 5073cbf22..22b494cb9 100644 --- a/src_common/ComputeAverages.cpp +++ b/src_common/ComputeAverages.cpp @@ -207,28 +207,6 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, // get a single Box that spans the full domain Box domain(mf.boxArray().minimalBox()); - auto const& ma = mf.const_arrays(); - auto fab = ReduceToPlane(dir, domain, mf, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) -> Real - { - return ma[box_no](i,j,k); // data at (i,j,k) of Box box_no - }); - - - Box dom2d = fab.box(); - Vector bv(ParallelDescriptor::NProcs(),dom2d); - BoxArray ba(bv.data(), bv.size()); - - Vector pmap(ParallelDescriptor::NProcs()); - std::iota(pmap.begin(), pmap.end(), 0); - DistributionMapping dm(std::move(pmap)); - - MultiFab mftmp(ba, dm, fab.nComp(), 0, MFInfo().SetAlloc(false)); - mftmp.setFab(ParallelDescriptor::MyProc(), - FArrayBox(fab.box(), fab.nComp(), fab.dataPtr())); - - BoxArray ba2(dom2d); - // these are the transverse directions (i.e., NOT the dir direction) int dir1=0, dir2=0; #if (AMREX_SPACEDIM == 2) @@ -255,12 +233,59 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat, max_grid_size_pencil[dir2] = max_grid_projection[1]; #endif - ba2.maxSize(IntVect(max_grid_size_pencil)); + // this is the inverse of the number of cells in the dir direction we are averaging over + // by default we average over the entire domain, but one can pass in slab_lo/hi to set bounds + Real ninv; + if (slablo != -1 && slabhi != 99999) { + ninv = 1./(slabhi-slablo+1); + } else { + ninv = 1./(domain.length(dir)); + } + + MultiFab mf_onecomp(mf.boxArray(), mf.DistributionMap(), 1, 0); + + for (int n=0; n(dir, domain, mf_onecomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) -> Real + { + return ma[box_no](i,j,k); // data at (i,j,k) of Box box_no + }); + Box dom2d = fab.box(); + Vector bv(ParallelDescriptor::NProcs(),dom2d); + BoxArray ba(bv.data(), bv.size()); + + Vector pmap(ParallelDescriptor::NProcs()); + std::iota(pmap.begin(), pmap.end(), 0); + DistributionMapping dm(std::move(pmap)); + + MultiFab mftmp(ba, dm, 1, 0, MFInfo().SetAlloc(false)); + mftmp.setFab(ParallelDescriptor::MyProc(), + FArrayBox(fab.box(), 1, fab.dataPtr())); + + // divide by number of cells in column to create average + mftmp.mult(ninv); + + BoxArray ba2(dom2d); + + ba2.maxSize(IntVect(max_grid_size_pencil)); + + if (n==0) { + mf_flat.define(ba2, DistributionMapping{ba2}, ncomp, 0); + } + + MultiFab mf_flat_onecomp(ba2, DistributionMapping{ba2}, fab.nComp(), 0); + mf_flat_onecomp.setVal(0.); + mf_flat_onecomp.ParallelAdd(mftmp); + + mf_flat.ParallelCopy(mf_flat_onecomp, 0, n, 1); + } } void ExtractSlice(const MultiFab& mf, MultiFab& mf_slice,