diff --git a/.github/workflows/dependencies/dependencies_nvcc11.sh b/.github/workflows/dependencies/dependencies_nvcc11.sh
index 5be711d81..a79b152b6 100755
--- a/.github/workflows/dependencies/dependencies_nvcc11.sh
+++ b/.github/workflows/dependencies/dependencies_nvcc11.sh
@@ -34,7 +34,8 @@ sudo apt-get install -y \
     cuda-nvml-dev-11-2           \
     cuda-nvtx-11-2               \
     libcurand-dev-11-2           \
-    libcufft-dev-11-2
+    libcufft-dev-11-2            \
+    libcusparse-dev-11-2
 sudo ln -s cuda-11.2 /usr/local/cuda
 
 
diff --git a/exec/DSMC/DsmcCollide.cpp b/exec/DSMC/DsmcCollide.cpp
index 98e4e86db..0d27b410c 100644
--- a/exec/DSMC/DsmcCollide.cpp
+++ b/exec/DSMC/DsmcCollide.cpp
@@ -124,7 +124,7 @@ void FhdParticleContainer::CalcSelections(Real dt)
 					crossSection = csx[ij_spec];
 					//crossSection = 0;
 					if(i_spec==j_spec) {np_j = np_i-1;}
-					NSel = particle_neff_tmp*np_i*np_j*crossSection*vrmax*ocollisionCellVolTmp*dt;
+					NSel = particle_neff_tmp*np_i*np_j*crossSection*vrmax*ocollisionCellVolTmp*dt*2;
 					if(i_spec==j_spec) {NSel = NSel*0.5;}
 					arrselect(i,j,k,ij_spec) = std::floor(NSel + amrex::Random(engine));
 					
diff --git a/exec/DSMC/inputs_conc b/exec/DSMC/inputs_conc
index 7a0eb560f..6b61d597d 100644
--- a/exec/DSMC/inputs_conc
+++ b/exec/DSMC/inputs_conc
@@ -71,9 +71,6 @@
 	bc_mass_lo  = -1 -1 -1
 	bc_mass_hi  = -1 -1 -1
 	
-	bc_spec_lo = 1 -1 -1
-	bc_spec_hi = 0 -1 -1
-  
 	# Temperature if thermal BC specified
 	t_hi = 300 300 300
 	t_lo = 300 300 300
@@ -86,8 +83,8 @@
 	# Xk and Yk at the wall for Dirichlet (concentrations) - set one to zero
 	# Ordering: (species 1, x-dir), (species 2, x-dir), ... (species 1, y-dir), ... 
 
-	bc_Yk_x_lo = 0.45  0.55  # lo BC
-	bc_Yk_x_hi = 0.55  0.45  # hi BC
+	bc_Yk_x_lo = 0.49375  0.50625  # lo BC
+	bc_Yk_x_hi = 0.50625  0.49375  # hi BC
 	bc_Yk_y_lo = 1.0  1.0   # lo BC
 	bc_Yk_y_hi = 1.0  1.0   # hi BC
 	bc_Yk_z_lo = 1.0  1.0   # lo BC
diff --git a/exec/DSMC/inputs_conc_gpu b/exec/DSMC/inputs_conc_gpu
index aa310bdfa..b9cdadc5a 100644
--- a/exec/DSMC/inputs_conc_gpu
+++ b/exec/DSMC/inputs_conc_gpu
@@ -73,9 +73,6 @@
 	bc_mass_lo  = -1 -1 -1
 	bc_mass_hi  = -1 -1 -1
 	
-	bc_spec_lo = 1 -1 -1
-	bc_spec_hi = 0 -1 -1
-  
 	# Temperature if thermal BC specified
 	#t_hi = 519 300 300
 	#t_lo = 273 300 300
diff --git a/exec/DSMC/main_driver.cpp b/exec/DSMC/main_driver.cpp
index 473d0e3de..919dd34f1 100644
--- a/exec/DSMC/main_driver.cpp
+++ b/exec/DSMC/main_driver.cpp
@@ -334,7 +334,8 @@ void main_driver(const char* argv)
 		particles.CalcSelections(dt);
 		particles.CollideParticles(dt);
 		
-		if(istep%2!=0)
+//		if(istep%2!=0)
+		if(false)
         {        
 		    particles.EvaluateStats(cuInst,cuMeans,cuVars,primInst,primMeans,primVars,
 					cvlInst,cvlMeans,QMeans,coVars,spatialCross1D,statsCount++,time);
@@ -355,17 +356,19 @@ void main_driver(const char* argv)
 	            //PrintMF(structFactPrimMF,0,-1);
       	        //PrintMF(primInst,1,1);
 	            
-                structFactPrim.FortStructure(structFactPrimMF,geom);
+                //structFactPrim.FortStructure(structFactPrimMF);
 		
 		    }
         }
-		particles.Source(dt, paramPlaneList, paramPlaneCount, cuInst);
+
 		//particles.externalForce(dt);
+		particles.Source(dt, paramPlaneList, paramPlaneCount, cuInst);		
 		particles.MoveParticlesCPP(dt, paramPlaneList, paramPlaneCount);
 		//particles.updateTimeStep(geom,dt);
                 //reduceMassFlux(paramPlaneList, paramPlaneCount);
 
-        if(istep%2==0)
+        if(true)
+//        if(istep%2==0)        
         {        
 		    particles.EvaluateStats(cuInst,cuMeans,cuVars,primInst,primMeans,primVars,
 					cvlInst,cvlMeans,QMeans,coVars,spatialCross1D,statsCount++,time);
@@ -386,7 +389,7 @@ void main_driver(const char* argv)
 	            //PrintMF(structFactPrimMF,0,-1);
       	        //PrintMF(primInst,1,1);
 	            
-                structFactPrim.FortStructure(structFactPrimMF,geom);
+                //structFactPrim.FortStructure(structFactPrimMF);
 		
 		    }
         }
diff --git a/exec/DSMC_granular/main_driver.cpp b/exec/DSMC_granular/main_driver.cpp
index d6ac3a736..7d2b1c4a0 100644
--- a/exec/DSMC_granular/main_driver.cpp
+++ b/exec/DSMC_granular/main_driver.cpp
@@ -402,7 +402,7 @@ void main_driver(const char* argv)
 			MultiFab::Copy(structFactPrimMF,primInst,8,cnt_sf,numvars_sf,0);
 			cnt_sf += numvars_sf;
 
-			structFactPrim.FortStructure(structFactPrimMF,geom);
+			structFactPrim.FortStructure(structFactPrimMF);
 		}
 
 		if(istep > amrex::Math::abs(n_steps_skip) &&
diff --git a/exec/Ek_calculator/main_driver.cpp b/exec/Ek_calculator/main_driver.cpp
index 1a0a331dd..2b81bb9b4 100644
--- a/exec/Ek_calculator/main_driver.cpp
+++ b/exec/Ek_calculator/main_driver.cpp
@@ -99,8 +99,8 @@ void main_driver(const char* argv)
 
 
     // reset and compute structure factor
-    turbStructFact.FortStructure(vel,geom,1);
-    turbStructFact.CallFinalize(geom);
+    turbStructFact.FortStructure(vel,1);
+    turbStructFact.CallFinalize();
 
     // integrate cov_mag over shells in k and write to file
     turbStructFact.IntegratekShells(0,geom);
diff --git a/exec/cellbdytest_new/main_driver.cpp b/exec/cellbdytest_new/main_driver.cpp
index b52b39535..3a07ded68 100644
--- a/exec/cellbdytest_new/main_driver.cpp
+++ b/exec/cellbdytest_new/main_driver.cpp
@@ -1809,13 +1809,13 @@ void main_driver(const char* argv)
 
             // charge
             MultiFab::Copy(struct_cc_charge, charge, 0, 0, nvar_sf_charge, 0);
-            structFact_charge.FortStructure(struct_cc_charge,geomP);
+            structFact_charge.FortStructure(struct_cc_charge);
 
             // velocity
             for (int d=0; d<AMREX_SPACEDIM; ++d) {
                 ShiftFaceToCC(umac[d],0,struct_cc_vel,d,1);
             }
-            structFact_vel.FortStructure(struct_cc_vel,geom);
+            structFact_vel.FortStructure(struct_cc_vel);
             
             // plot structure factor on plot_int
             if (istep%plot_int == 0) {
diff --git a/exec/compressible/GNUmakefile b/exec/compressible/GNUmakefile
index cd7ee153f..5cee2876a 100644
--- a/exec/compressible/GNUmakefile
+++ b/exec/compressible/GNUmakefile
@@ -11,6 +11,7 @@ DIM           = 3
 TINY_PROFILE  = FALSE
 MAX_SPEC      = 8
 MAX_REAC      = 5
+USE_FFT       = TRUE
 
 USE_PARTICLES = FALSE
 
diff --git a/exec/compressible/inputs_giantfluct_3d b/exec/compressible/inputs_giantfluct_3d
index 575927ee0..9a18b2409 100644
--- a/exec/compressible/inputs_giantfluct_3d
+++ b/exec/compressible/inputs_giantfluct_3d
@@ -98,7 +98,7 @@
 
   # Xk and Yk at the wall for Dirichlet (concentrations) - set one to zero
   # Ordering: (species 1, x-dir), (species 2, x-dir), ... (species 1, y-dir), ...
-  bc_Yk_y_lo = 0.2 0.09316672 0.70683296     # lo BC
-  bc_Yk_y_hi = 0.3 0.40683328 0.29316704     # hi BC
+  bc_Yk_y_lo = 0.2 0.09316672 0.70683328     # lo BC
+  bc_Yk_y_hi = 0.3 0.40683328 0.29316672     # hi BC
 
 
diff --git a/exec/compressible_mui/sav_src/2020/m00.cpp b/exec/compressible_mui/sav_src/2020/m00.cpp
index d9b9be874..ecac3a41c 100644
--- a/exec/compressible_mui/sav_src/2020/m00.cpp
+++ b/exec/compressible_mui/sav_src/2020/m00.cpp
@@ -521,7 +521,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -672,11 +672,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp b/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp
index 25cfbc595..22344b5b6 100644
--- a/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp
+++ b/exec/compressible_mui/sav_src/2020/m01-cutemp_jsq.cpp
@@ -533,7 +533,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -684,11 +684,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/2020/m01.cpp b/exec/compressible_mui/sav_src/2020/m01.cpp
index 3b3c99530..e76d0cd51 100644
--- a/exec/compressible_mui/sav_src/2020/m01.cpp
+++ b/exec/compressible_mui/sav_src/2020/m01.cpp
@@ -521,7 +521,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -672,11 +672,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/2020/m02.cpp b/exec/compressible_mui/sav_src/2020/m02.cpp
index 9bdaf63ca..78e026cbb 100644
--- a/exec/compressible_mui/sav_src/2020/m02.cpp
+++ b/exec/compressible_mui/sav_src/2020/m02.cpp
@@ -574,7 +574,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -725,11 +725,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/2020/m10.cpp b/exec/compressible_mui/sav_src/2020/m10.cpp
index f44916f31..578cbdeec 100644
--- a/exec/compressible_mui/sav_src/2020/m10.cpp
+++ b/exec/compressible_mui/sav_src/2020/m10.cpp
@@ -523,7 +523,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -681,11 +681,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/2020/m11.cpp b/exec/compressible_mui/sav_src/2020/m11.cpp
index 1930c883b..2148a2786 100644
--- a/exec/compressible_mui/sav_src/2020/m11.cpp
+++ b/exec/compressible_mui/sav_src/2020/m11.cpp
@@ -527,7 +527,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -685,11 +685,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/2020/m12.cpp b/exec/compressible_mui/sav_src/2020/m12.cpp
index fd5196685..35f59540e 100644
--- a/exec/compressible_mui/sav_src/2020/m12.cpp
+++ b/exec/compressible_mui/sav_src/2020/m12.cpp
@@ -527,7 +527,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -687,11 +687,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp
index 719c76a95..613f1b062 100644
--- a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp
+++ b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp
@@ -534,7 +534,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -700,11 +700,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc
index 01a09fec3..8b01a4370 100644
--- a/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc
+++ b/exec/compressible_mui/sav_src/202101_before_mui_span/main_driver.cpp_0126_bc
@@ -547,7 +547,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       prim.setVal(0.0);
-      ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
+      ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
       BoxArray ba_flat = primVertAvg.boxArray();
       const DistributionMapping& dmap_flat = primVertAvg.DistributionMap();
       {
@@ -713,11 +713,11 @@ void main_driver(const char* argv)
            MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
            MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
            MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-           structFactPrim.FortStructure(structFactPrimMF,geom);
-           structFactCons.FortStructure(structFactConsMF,geom);
+           structFactPrim.FortStructure(structFactPrimMF);
+           structFactCons.FortStructure(structFactConsMF);
            if(project_dir >= 0) {
-                ComputeVerticalAverage(prim, primVertAvg, geom, project_dir, 0, structVarsPrim);
-                structFactPrimVerticalAverage.FortStructure(primVertAvg,geom_flat);
+                ComputeVerticalAverage(prim, primVertAvg, project_dir, 0, structVarsPrim);
+                structFactPrimVerticalAverage.FortStructure(primVertAvg);
            }
         }
 
diff --git a/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp b/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp
index 7281a18ac..389c5f646 100644
--- a/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp
+++ b/exec/compressible_mui/sav_src/202106_before_summer/main_driver.cpp
@@ -514,9 +514,9 @@ void main_driver(const char* argv)
       // a built version of primFlattened so can obtain what we need to build the
       // structure factor and geometry objects for flattened data
       if (slicepoint < 0) {
-          ComputeVerticalAverage(prim, primFlattened, geom, project_dir, 0, structVarsPrim);
+          ComputeVerticalAverage(prim, primFlattened, project_dir, 0, structVarsPrim);
       } else {
-          ExtractSlice(prim, primFlattened, geom, project_dir, slicepoint, 0, structVarsPrim);
+          ExtractSlice(prim, primFlattened, project_dir, slicepoint, 0, structVarsPrim);
       }
       // we rotate this flattened MultiFab to have normal in the z-direction since
       // SWFFT only presently supports flattened MultiFabs with z-normal.
@@ -852,19 +852,19 @@ void main_driver(const char* argv)
             MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
             MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
             MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-            structFactPrim.FortStructure(structFactPrimMF,geom);
-            structFactCons.FortStructure(structFactConsMF,geom);
+            structFactPrim.FortStructure(structFactPrimMF);
+            structFactCons.FortStructure(structFactConsMF);
             if(project_dir >= 0) {
                 MultiFab primFlattened;  // flattened multifab defined below
                 if (slicepoint < 0) {
-                    ComputeVerticalAverage(prim, primFlattened, geom, project_dir, 0, structVarsPrim);
+                    ComputeVerticalAverage(prim, primFlattened, project_dir, 0, structVarsPrim);
                 } else {
-                    ExtractSlice(prim, primFlattened, geom, project_dir, slicepoint, 0, structVarsPrim);
+                    ExtractSlice(prim, primFlattened, project_dir, slicepoint, 0, structVarsPrim);
                 }
                 // we rotate this flattened MultiFab to have normal in the z-direction since
                 // SWFFT only presently supports flattened MultiFabs with z-normal.
                 MultiFab primFlattenedRot = RotateFlattenedMF(primFlattened);
-                structFactPrimFlattened.FortStructure(primFlattenedRot,geom_flat);
+                structFactPrimFlattened.FortStructure(primFlattenedRot);
             }
         }
 
diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile
index 24dfa9492..e0cf9740f 100644
--- a/exec/compressible_stag/GNUmakefile
+++ b/exec/compressible_stag/GNUmakefile
@@ -12,9 +12,10 @@ DIM           = 3
 TINY_PROFILE  = FALSE
 MAX_SPEC      = 8
 MAX_REAC      = 5
+USE_FFT       = TRUE
 
 USE_PARTICLES = FALSE
-DO_TURB 	  = FALSE
+DO_TURB       = FALSE
 
 include $(AMREX_HOME)/Tools/GNUMake/Make.defs
 
@@ -41,7 +42,6 @@ include ../../src_rng/Make.package
 VPATH_LOCATIONS   += ../../src_rng/
 INCLUDE_LOCATIONS += ../../src_rng/
 
-
 include ../../src_common/Make.package
 VPATH_LOCATIONS   += ../../src_common/
 INCLUDE_LOCATIONS += ../../src_common/
@@ -54,21 +54,6 @@ INCLUDE_LOCATIONS += ../../src_analysis/
 
 include $(AMREX_HOME)/Tools/GNUMake/Make.rules
 
-ifeq ($(findstring cgpu, $(HOST)), cgpu)
-  CXXFLAGS += $(FFTW)
-endif
-
-ifeq ($(USE_CUDA),TRUE)
-  LIBRARIES += -lcufft
-else ifeq ($(USE_HIP),TRUE)
-  # Use rocFFT.  ROC_PATH is defined in amrex
-  INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include
-  LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib
-  LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft
-else
-  LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3
-endif
-
 ifeq ($(DO_TURB), TRUE)
   DEFINES += -DTURB
 endif
diff --git a/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile b/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile
new file mode 100644
index 000000000..052b38516
--- /dev/null
+++ b/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile
@@ -0,0 +1,69 @@
+AMREX_HOME ?= ../../../../amrex/
+
+DEBUG         = FALSE
+USE_MPI       = TRUE
+USE_OMP       = FALSE
+USE_CUDA      = FALSE
+USE_HIP       = FALSE
+COMP          = gnu
+DIM           = 3
+TINY_PROFILE  = FALSE
+
+USE_HEFFTE_FFTW   = FALSE
+USE_HEFFTE_CUFFT  = FALSE
+USE_HEFFTE_ROCFFT = FALSE
+
+ifeq ($(USE_HEFFTE_FFTW),TRUE)
+  HEFFTE_HOME ?= ../../../../heffte/
+else ifeq ($(USE_HEFFTE_CUFFT),TRUE)
+  HEFFTE_HOME ?= ../../../../heffte-org/build_aware/
+else ifeq ($(USE_HEFFTE_ROCFFT),TRUE)
+  HEFFTE_HOME ?= ../../../../heffte/
+endif
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+VPATH_LOCATIONS   += .
+INCLUDE_LOCATIONS += .
+
+#ifeq ($(USE_HEFFTE_FFTW),TRUE)
+#  include $(HEFFTE_HOME)/src/Make.package
+#else ifeq ($(USE_HEFFTE_CUFFT),TRUE)
+#  include $(HEFFTE_HOME)/src/Make.package
+#else ifeq ($(USE_HEFFTE_ROCFFT),TRUE)
+#  include $(HEFFTE_HOME)/src/Make.package
+#endif
+
+include ./Make.package
+ifeq ($(USE_HEFFTE_FFTW),TRUE)
+  DEFINES += -DHEFFTE_FFTW
+  LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f
+else ifeq ($(USE_HEFFTE_CUFFT),TRUE)
+  DEFINES += -DHEFFTE_CUFFT
+  VPATH_LOCATIONS += $(HEFFTE_HOME)/include
+  INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include
+  LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib
+  LIBRARIES += -lheffte
+else ifeq ($(USE_HEFFTE_ROCFFT),TRUE)
+  DEFINES += -DHEFFTE_ROCFFT
+endif
+
+include $(AMREX_HOME)/Src/Base/Make.package
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
+
+ifeq ($(USE_CUDA),TRUE)
+  LIBRARIES += -lcufft
+else ifeq ($(USE_HIP),TRUE)
+  # Use rocFFT.  ROC_PATH is defined in amrex
+  INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include
+  LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib
+  LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft
+else
+  LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f
+endif
+
+ifeq ($(DO_TURB), TRUE)
+  DEFINES += -DTURB
+endif
+
diff --git a/exec/compressible_stag/SPECTRAL_FILTER/Make.package b/exec/compressible_stag/SPECTRAL_FILTER/Make.package
new file mode 100644
index 000000000..e0391922a
--- /dev/null
+++ b/exec/compressible_stag/SPECTRAL_FILTER/Make.package
@@ -0,0 +1,5 @@
+CEXE_sources += main.cpp
+CEXE_sources += main_driver.cpp
+CEXE_sources += spectral_functions.cpp
+
+CEXE_headers += spectral_functions.H
diff --git a/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh b/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh
new file mode 100755
index 000000000..dcb05e97b
--- /dev/null
+++ b/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/bash
+
+## load necessary modules 
+module load craype-accel-amd-gfx90a
+module load amd-mixed
+#module load rocm/5.2.0  # waiting for 5.6 for next bump
+module load cray-mpich/8.1.23
+module load cce/15.0.0  # must be loaded after rocm
+
+# GPU-aware MPI
+export MPICH_GPU_SUPPORT_ENABLED=1
+
+# optimize CUDA compilation for MI250X
+export AMREX_AMD_ARCH=gfx90a
+
+# compiler environment hints
+##export CC=$(which hipcc)
+##export CXX=$(which hipcc)
+##export FC=$(which ftn)
+##export CFLAGS="-I${ROCM_PATH}/include"
+##export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed"
+##export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa"
+export LDFLAGS="-L${MPICH_DIR}/lib -lmpi ${CRAY_XPMEM_POST_LINK_OPTS} -lxpmem ${PE_MPICH_GTL_DIR_amd_gfx90a} ${PE_MPICH_GTL_LIBS_amd_gfx90a}"
+export CXXFLAGS="-I${MPICH_DIR}/include"
+export HIPFLAGS="--amdgpu-target=gfx90a"
+
+make -j10 USE_HIP=TRUE USE_HEFFTE_ROCFFT=TRUE USE_ASSERTION=TRUE
diff --git a/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh b/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh
new file mode 100755
index 000000000..e3bd5aac6
--- /dev/null
+++ b/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/bash
+
+# required dependencies
+module load cray-fftw
+module load cmake
+module load cudatoolkit
+
+module list
+
+# necessary to use CUDA-Aware MPI and run a job
+export CRAY_ACCEL_TARGET=nvidia80
+
+export MPICH_GPU_SUPPORT_ENABLED=1
+
+# optimize CUDA compilation for A100
+export AMREX_CUDA_ARCH=8.0
+
+# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3)
+# note: the cc/CC/ftn wrappers below add those
+export CXXFLAGS="-march=znver3"
+export CFLAGS="-march=znver3"
+
+# compiler environment hints
+export CC=cc
+export CXX=CC
+export FC=ftn
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=CC
+
+make -j10 USE_CUDA=TRUE USE_HEFFTE_CUFFT=TRUE USE_ASSERTION=TRUE MAX_SPEC=2
diff --git a/exec/compressible_stag/SPECTRAL_FILTER/main.cpp b/exec/compressible_stag/SPECTRAL_FILTER/main.cpp
new file mode 100644
index 000000000..95c149e5a
--- /dev/null
+++ b/exec/compressible_stag/SPECTRAL_FILTER/main.cpp
@@ -0,0 +1,28 @@
+#include <AMReX.H>
+//#include <AMReX_ParmParse.H>
+
+// function declaration
+void main_driver (const char* argv);
+
+int main (int argc, char* argv[])
+{
+    amrex::Initialize(argc,argv);
+
+// this specific part has been moved to Flagellum/main_driver.cpp
+//    {
+//        amrex::ParmParse pp("particles");
+//#ifdef AMREX_USE_GPU
+//        bool particles_do_tiling = true;
+//#else
+//        bool particles_do_tiling = false;
+//#endif
+//        pp.queryAdd("do_tiling", particles_do_tiling);
+//    }
+
+    // argv[1] contains the name of the inputs file entered at the command line
+    main_driver(argv[1]);
+
+    amrex::Finalize();
+
+    return 0;
+}
diff --git a/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp
new file mode 100644
index 000000000..7db4b16dd
--- /dev/null
+++ b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp
@@ -0,0 +1,450 @@
+#include "spectral_functions.H"
+#include <AMReX_Vector.H>
+#include <AMReX_MPMD.H>
+#include "AMReX_ParmParse.H"
+
+#include "chrono"
+
+using namespace std::chrono;
+using namespace amrex;
+
+// argv contains the name of the inputs file entered at the command line
+void main_driver(const char* argv)
+{
+    BL_PROFILE_VAR("main_driver()",main_driver);
+    
+    amrex::Vector<amrex::IntVect> nodal_flag_dir;
+    amrex::IntVect                nodal_flag_x;
+    amrex::IntVect                nodal_flag_y;
+    amrex::IntVect                nodal_flag_z;
+    nodal_flag_dir.resize(3);
+
+    for (int i=0; i<3; ++i) {
+        nodal_flag_x[i] = int(i==0);
+        nodal_flag_y[i] = int(i==1);
+        nodal_flag_z[i] = int(i==2);
+        AMREX_D_TERM(nodal_flag_dir[0][i] = nodal_flag_x[i];,
+                     nodal_flag_dir[1][i] = nodal_flag_y[i];,
+                     nodal_flag_dir[2][i] = nodal_flag_z[i];);
+    }
+
+    // timer
+    Real ts1 = ParallelDescriptor::second();
+    
+    std::string inputs_file = argv;
+
+    ParmParse pp;
+    amrex::Vector<amrex::Real> temp_real(3,0.);
+    amrex::Vector<int>         temp_int (3,0 );
+
+    amrex::Vector<int>   max_grid_size(3,1 );
+    amrex::Vector<int>   n_cells(3,0 );
+    amrex::Vector<Real>  prob_lo(3,0 );
+    amrex::Vector<Real>  prob_hi(3,0 );
+    
+    if (pp.queryarr("n_cells",temp_int,0,3)) {
+        for (int i=0; i<3; ++i) {
+            n_cells[i] = temp_int[i];
+        }
+    }
+    int npts = n_cells[0]*n_cells[1]*n_cells[2];
+    if (pp.queryarr("prob_lo",temp_real,0,3)) {
+        for (int i=0; i<3; ++i) {
+            prob_lo[i] = temp_real[i];
+        }
+    }
+    if (pp.queryarr("prob_hi",temp_real,0,3)) {
+        for (int i=0; i<3; ++i) {
+            prob_hi[i] = temp_real[i];
+        }
+    }
+    pp.queryarr("max_grid_size",max_grid_size,0,3);
+
+    int restart;
+    pp.query("restart",restart);
+
+    int nprimvars;
+    pp.query("nprimvars",nprimvars);
+
+    int plot_filter = 0;
+    pp.query("plot_filter",plot_filter);
+
+    amrex::IntVect ngc;
+    for (int i=0; i<3; ++i) {
+        ngc[i] = 1;           // number of ghost cells
+    }
+    if (pp.queryarr("ngc",temp_int,0,3)) {
+        for (int i=0; i<3; ++i) {
+            ngc[i] = temp_int[i];
+        }
+    }
+
+    amrex::Real kmin;
+    pp.query("kmin",kmin);
+
+    amrex::Real kmax;
+    pp.query("kmax",kmax);
+    
+    std::array< MultiFab, 3 > vel;
+    MultiFab prim;
+    
+    // make BoxArray and Geometry
+    BoxArray ba;
+    Geometry geom;
+    DistributionMapping dmap;
+
+    IntVect dom_lo(AMREX_D_DECL(           0,            0,            0));
+    IntVect dom_hi(AMREX_D_DECL(n_cells[0]-1, n_cells[1]-1, n_cells[2]-1));
+    Box domain(dom_lo, dom_hi);
+
+    // This defines the physical box, [-1,1] in each direction.
+    RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])},
+                     {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])});
+
+    // This defines a Geometry object
+    Vector<int> is_periodic(3,1);  // force to be periodic -- can change later
+    geom.define(domain,&real_box,CoordSys::cartesian,is_periodic.data());
+
+    const GpuArray<Real, 3> dx = geom.CellSizeArray();
+    const RealBox& realDomain = geom.ProbDomain();
+
+    SpectralReadCheckPoint(geom, domain, prim, vel, ba, dmap, n_cells, nprimvars, max_grid_size, ngc, restart);
+    
+    MultiFab MFTurbScalar;
+    MultiFab MFTurbVel;
+    MultiFab vel_decomp_filter_heffte;
+    MultiFab scalar_filter_heffte;
+    MFTurbVel.define(ba, dmap, 3, 0);
+    MFTurbScalar.define(ba, dmap, 1, 0);
+    vel_decomp_filter_heffte.define(ba, dmap, 9, 0);
+    scalar_filter_heffte.define(ba, dmap, 1, 0);
+    vel_decomp_filter_heffte.setVal(0.0);
+    scalar_filter_heffte.setVal(0.0);
+
+    // Set BC: 1) fill boundary 2) physical
+    for (int d=0; d<3; d++) {
+        vel[d].FillBoundary(geom.periodicity());
+    }
+    prim.FillBoundary(geom.periodicity());
+
+    for(int d=0; d<3; d++) {
+        ShiftFaceToCC(vel[d], 0, MFTurbVel, d, 1);
+    }
+    MultiFab::Copy(MFTurbScalar, prim, 0, 0, 1, 0);
+    
+    SpectralVelDecomp(MFTurbVel, vel_decomp_filter_heffte, kmin, kmax, geom, n_cells);
+    SpectralScalarDecomp(MFTurbScalar, scalar_filter_heffte, kmin, kmax, geom, n_cells);
+
+    MultiFab vel_decomp_filter;
+    MultiFab scalar_filter;
+    vel_decomp_filter.define(ba, dmap, 9, 2);
+    scalar_filter.define(ba, dmap, 1, 2);
+    MultiFab::Copy(vel_decomp_filter,vel_decomp_filter_heffte,0,0,9,0);
+    MultiFab::Copy(scalar_filter,scalar_filter_heffte,0,0,1,0);
+    vel_decomp_filter.FillBoundary(geom.periodicity());
+    scalar_filter.FillBoundary(geom.periodicity());
+
+    if (plot_filter) SpectralWritePlotFile(restart, kmin, kmax, geom, vel_decomp_filter, scalar_filter, MFTurbVel, MFTurbScalar);
+
+    // Turbulence Diagnostics
+    Real u_rms, u_rms_s, u_rms_d, delta_u_rms;
+    Real taylor_len, taylor_Re_eta;
+    Real skew, skew_s, skew_d, kurt, kurt_s, kurt_d;
+    Vector<Real> var(9, 0.0);
+    Real skew_vort, kurt_vort, skew_div, kurt_div;
+    {
+      Vector<Real> dProb(3);
+      dProb[0] = 1.0/((n_cells[0]+1)*n_cells[1]*n_cells[2]);
+      dProb[1] = 1.0/((n_cells[1]+1)*n_cells[2]*n_cells[0]);
+      dProb[2] = 1.0/((n_cells[2]+1)*n_cells[0]*n_cells[1]);
+
+      // Setup temp MultiFabs
+      std::array< MultiFab, 3 > gradU;
+      std::array< MultiFab, 3 > faceTemp;
+      MultiFab sound_speed;
+      MultiFab ccTemp;
+      MultiFab ccTempA;
+      AMREX_D_TERM(gradU[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 9, 0);,
+                   gradU[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 9, 0);,
+                   gradU[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 9, 0););   
+      AMREX_D_TERM(faceTemp[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 1, 0);,
+                   faceTemp[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 1, 0);,
+                   faceTemp[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 1, 0););   
+      sound_speed.define(prim.boxArray(),prim.DistributionMap(),1,0);
+      ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0);
+      ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0);
+
+      // Setup temp variables
+      Vector<Real> gradU2_temp(3);
+      Vector<Real> gradU2(3);
+      Vector<Real> gradU3(3);
+      Vector<Real> gradU4(3);
+      Vector<Real> gradU2_s(3);
+      Vector<Real> gradU3_s(3);
+      Vector<Real> gradU4_s(3);
+      Vector<Real> gradU2_d(3);
+      Vector<Real> gradU3_d(3);
+      Vector<Real> gradU4_d(3);
+      
+      Vector<int> comps   {0,1,2};
+      Vector<int> comps_s{3,4,5};
+      Vector<int> comps_d{6,7,8};
+
+      // turbulent kinetic energy (total)
+      ccTemp.setVal(0.0);
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,0,vel_decomp_filter,0,0,1,0); //uu
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,1,vel_decomp_filter,1,0,1,0); //vv
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,2,vel_decomp_filter,2,0,1,0); //ww
+      u_rms = ccTemp.sum(0)/npts;
+      u_rms = sqrt(u_rms/3.0);
+      MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww)
+
+      // turbulent kinetic energy (solenoidal)
+      ccTemp.setVal(0.0);
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,3,vel_decomp_filter,3,0,1,0); //uu
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,4,vel_decomp_filter,4,0,1,0); //vv
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,5,vel_decomp_filter,5,0,1,0); //ww
+      u_rms_s = ccTemp.sum(0)/npts;
+      u_rms_s = sqrt(u_rms_s/3.0);
+      MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww)
+
+      // turbulent kinetic energy (dilatational)
+      ccTemp.setVal(0.0);
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,6,vel_decomp_filter,6,0,1,0); //uu
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,7,vel_decomp_filter,7,0,1,0); //vv
+      MultiFab::AddProduct(ccTemp,vel_decomp_filter,8,vel_decomp_filter,8,0,1,0); //ww
+      u_rms_d = ccTemp.sum(0)/npts;
+      u_rms_d = sqrt(u_rms_d/3.0);
+      MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww)
+      
+      // ratio of turbulent kinetic energies
+      delta_u_rms  = u_rms_d/u_rms_s;
+
+      // compute gradU = [du/dx dv/dy dw/dz] at cell-centers
+      ComputeGrad(vel_decomp_filter,gradU,0,0,9,-1,geom,0);
+
+      // Compute Velocity gradient moment sum
+      // 2nd moment (total)
+      FCMoments(gradU,comps,faceTemp,2,gradU2_temp);
+      gradU2[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU2[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU2[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+      ccTemp.setVal(0.0);
+      ccTempA.setVal(0.0);
+      ShiftFaceToCC(faceTemp[0],0,ccTempA,0,1);
+      MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
+      ShiftFaceToCC(faceTemp[1],0,ccTempA,0,1);
+      MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
+      ShiftFaceToCC(faceTemp[2],0,ccTempA,0,1);
+      MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
+      Real avg_mom2 = ccTemp.sum(0)/npts;
+      // 2nd moment (solenoidal)
+      FCMoments(gradU,comps_s,faceTemp,2,gradU2_temp);
+      gradU2_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU2_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU2_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+      // 2nd moment (dilatational)
+      FCMoments(gradU,comps_d,faceTemp,2,gradU2_temp);
+      gradU2_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU2_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU2_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+
+      // Taylor Mach
+      //ComputeSoundSpeed(sound_speed,prim,2);
+      //Real c_speed = sound_speed.sum(0)/npts;
+      Real rho_avg = prim.sum(0)/npts;
+      // Taylor Ma
+      //taylor_Ma = sqrt(3.0)*u_rms/c_speed;
+      // Taylor Microscale
+      taylor_len = sqrt(3.0)*u_rms/sqrt(avg_mom2); // from Wang et al., JFM, 2012
+      taylor_Re_eta = rho_avg*taylor_len*u_rms; // from from John, Donzis, Sreenivasan, PRL 2019
+
+      // Compute Velocity gradient moment sum
+      // 3rd moment (total)
+      FCMoments(gradU,comps,faceTemp,3,gradU2_temp);
+      gradU3[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU3[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU3[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+      // 3rd moment (solenoidal)
+      FCMoments(gradU,comps_s,faceTemp,3,gradU2_temp);
+      gradU3_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU3_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU3_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+      // 3rd moment (dilatational)
+      FCMoments(gradU,comps_d,faceTemp,3,gradU2_temp);
+      gradU3_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU3_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU3_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+
+      // Compute Velocity gradient moment sum
+      // 4th moment (total)
+      FCMoments(gradU,comps,faceTemp,4,gradU2_temp);
+      gradU4[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU4[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU4[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+      // 4th moment (solenoidal)
+      FCMoments(gradU,comps_s,faceTemp,4,gradU2_temp);
+      gradU4_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU4_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU4_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+      // 4th moment (dilatational)
+      FCMoments(gradU,comps_d,faceTemp,4,gradU2_temp);
+      gradU4_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+      gradU4_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+      gradU4_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+              
+      // Skewness
+      // <\sum_i (du_i/dx_i)^3> / (\sum_i <(du_i/dx_i)^2>^1.5)
+      skew   = (gradU3[0] + gradU3[1] + gradU3[2])/
+               (pow(gradU2[0],1.5) + pow(gradU2[1],1.5) + pow(gradU2[2],1.5)); 
+      skew_s = (gradU3_s[0] + gradU3_s[1] + gradU3_s[2])/
+               (pow(gradU2_s[0],1.5) + pow(gradU2_s[1],1.5) + pow(gradU2_s[2],1.5)); 
+      skew_d = (gradU3_d[0] + gradU3_d[1] + gradU3_d[2])/
+               (pow(gradU2_d[0],1.5) + pow(gradU2_d[1],1.5) + pow(gradU2_d[2],1.5)); 
+              
+      // Kurtosis
+      // <\sum_i (du_i/dx_i)^4> / (\sum_i <(du_i/dx_i)^2>^2)
+      kurt   = (gradU4[0] + gradU4[1] + gradU4[2])/
+               (pow(gradU2[0],2.0) + pow(gradU2[1],2.0) + pow(gradU2[2],2.0)); 
+      kurt_s = (gradU4_s[0] + gradU4_s[1] + gradU4_s[2])/
+               (pow(gradU2_s[0],2.0) + pow(gradU2_s[1],2.0) + pow(gradU2_s[2],2.0)); 
+      kurt_d = (gradU4_d[0] + gradU4_d[1] + gradU4_d[2])/
+               (pow(gradU2_d[0],2.0) + pow(gradU2_d[1],2.0) + pow(gradU2_d[2],2.0)); 
+    
+      // velocity variances
+      for (int i=0;i<9;++i) {
+        ccTemp.setVal(0.0);
+        MultiFab::AddProduct(ccTemp,vel_decomp_filter,i,vel_decomp_filter,i,0,1,0);
+        Real mean = vel_decomp_filter.sum(i)/npts;
+        Real mean2 = ccTemp.sum(0)/npts;
+        var[i] = mean2 - mean*mean;
+      }
+
+      // skewness and kurtosis of velocity voritcity and divergence
+      MultiFab vel_stats;
+      vel_stats.define(prim.boxArray(),prim.DistributionMap(),4,0); // div, w1, w2, w3
+      for ( MFIter mfi(vel_stats,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        const Box& bx = mfi.tilebox();
+        const Array4<const Real>&  v_decomp = vel_decomp_filter.array(mfi);
+        const Array4<      Real>&  v_stats  = vel_stats.array(mfi);
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            // divergence
+            v_stats(i,j,k,0) = 0.5*( (v_decomp(i+1,j,k,0) - v_decomp(i-1,j,k,0))/dx[0] +
+                                  (v_decomp(i,j+1,k,1) - v_decomp(i,j-1,k,1))/dx[1] +
+                                  (v_decomp(i,j,k+1,2) - v_decomp(i,j,k-1,2))/dx[2] );
+
+            // curl w1 = u_2,1 - u_1,2
+            v_stats(i,j,k,1) = 0.5*( (v_decomp(i+1,j,k,1) - v_decomp(i-1,j,k,1))/dx[0] -
+                                  (v_decomp(i,j+1,k,0) - v_decomp(i,j-1,k,0))/dx[1] );
+
+            // curl w2 = u_1,3 - u_3,1
+            v_stats(i,j,k,2) = 0.5*( (v_decomp(i,j,k+1,0) - v_decomp(i,j,k-1,0))/dx[2] -
+                                  (v_decomp(i+1,j,k,2) - v_decomp(i-1,j,k,2))/dx[0] );
+
+            // curl w2 = u_3,2 - u_2,3
+            v_stats(i,j,k,3) = 0.5*( (v_decomp(i,j+1,k,2) - v_decomp(i,j-1,k,2))/dx[1] -
+                                  (v_decomp(i,j,k+1,1) - v_decomp(i,j,k-1,1))/dx[2] );
+          
+        });
+      }
+      // compute spatial mean
+      Real mean_div  = vel_stats.sum(0) / (npts);
+      Real mean_w1   = vel_stats.sum(1) / (npts);
+      Real mean_w2   = vel_stats.sum(2) / (npts);
+      Real mean_w3   = vel_stats.sum(3) / (npts);
+      vel_stats.plus(-1.0*mean_div, 0, 1);
+      vel_stats.plus(-1.0*mean_w1,  1, 1);
+      vel_stats.plus(-1.0*mean_w2,  2, 1);
+      vel_stats.plus(-1.0*mean_w3,  3, 1);
+
+      Vector<Real> U2(4);
+      Vector<Real> U3(4);
+      Vector<Real> U4(4);
+      for (int i=0;i<4;++i) {
+        CCMoments(vel_stats,i,ccTempA,2,U2[i]);
+        CCMoments(vel_stats,i,ccTempA,3,U3[i]);
+        CCMoments(vel_stats,i,ccTempA,4,U4[i]);
+      }
+      skew_div = U3[0]/pow(U2[0],1.5);
+      kurt_div = U4[0]/pow(U4[0],2.0);
+      skew_vort = (U3[1] + U3[2] + U3[3])/
+                  (pow(U2[1],1.5) + pow(U2[2],1.5) + pow(U2[3],1.5));
+      kurt_vort = (U4[1] + U4[2] + U4[3])/
+                  (pow(U2[1],2.0) + pow(U2[2],2.0) + pow(U2[3],2.0));
+
+    }
+    std::string turbfilename = amrex::Concatenate("turbstats_filtered_",restart,9);
+    std::ostringstream os;
+    os << std::setprecision(3) << kmin;
+    turbfilename += os.str();
+    turbfilename += "_";
+    std::ostringstream oss;
+    oss << std::setprecision(3) << kmax;
+    turbfilename += oss.str();
+    
+    std::ofstream turboutfile;
+    if (ParallelDescriptor::IOProcessor()) {
+      turboutfile.open(turbfilename, std::ios::app);
+    }
+    if (ParallelDescriptor::IOProcessor()) {
+      turboutfile << "u_rms " << "u_rms_s " << "u_rms_d " << "delta_u_rms " 
+                  << "TaylorLen " << "TaylorRe*Eta "
+                  << "skew " << "skew_s " << "skew_d "
+                  << "kurt " << "kurt_s " << "kurt_d "
+                  << "var_ux " << "var_uy " << "var_uz "
+                  << "var_uxs " << "var_uys " << "var_uzs "
+                  << "var_uxd " << "var_uyd " << "var_uzd "
+                  << "skew_div " << "kurt_div "
+                  << "skew_vort " << "kurt_vort "
+                  << std::endl;
+
+      turboutfile << u_rms << " ";
+      turboutfile << u_rms_s << " ";
+      turboutfile << u_rms_d << " ";
+      turboutfile << delta_u_rms << " ";
+      turboutfile << taylor_len << " ";
+      turboutfile << taylor_Re_eta << " ";
+      turboutfile << skew << " ";
+      turboutfile << skew_s << " ";
+      turboutfile << skew_d << " ";
+      turboutfile << kurt << " ";
+      turboutfile << kurt_s << " ";
+      turboutfile << kurt_d << " ";
+      for (int i=0;i<9;++i) {
+        turboutfile << var[i] << " ";
+      }
+      turboutfile << skew_div << " ";
+      turboutfile << kurt_div << " ";
+      turboutfile << skew_vort << " ";
+      turboutfile << kurt_vort << " ";
+      turboutfile << std::endl;
+    }
+    // timer
+    Real ts2 = ParallelDescriptor::second() - ts1;
+    ParallelDescriptor::ReduceRealMax(ts2,  ParallelDescriptor::IOProcessorNumber());
+    amrex::Print() << "Time (spectral filtering) " << ts2 << " seconds\n";
+    
+    // MultiFab memory usage
+    const int IOProc = ParallelDescriptor::IOProcessorNumber();
+
+    amrex::Long min_fab_megabytes  = amrex::TotalBytesAllocatedInFabsHWM()/1048576;
+    amrex::Long max_fab_megabytes  = min_fab_megabytes;
+
+    ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc);
+    ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc);
+
+    amrex::Print() << "High-water FAB megabyte spread across MPI nodes: ["
+                   << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n";
+
+    min_fab_megabytes  = amrex::TotalBytesAllocatedInFabs()/1048576;
+    max_fab_megabytes  = min_fab_megabytes;
+
+    ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc);
+    ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc);
+
+    amrex::Print() << "Curent     FAB megabyte spread across MPI nodes: ["
+                   << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n";
+    
+    if (ParallelDescriptor::IOProcessor()) turboutfile.close();
+}
diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H
new file mode 100644
index 000000000..4f99f51a3
--- /dev/null
+++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H
@@ -0,0 +1,113 @@
+#ifndef _spectral_functions_stag_H_
+#define _spectral_functions_stag_H_
+
+#include <heffte.h>
+#include <AMReX.H>
+#include <AMReX_MultiFab.H>
+#include <AMReX_ArrayLim.H>
+
+#include <AMReX_Vector.H>
+#include <AMReX_VisMF.H>
+
+
+#ifdef AMREX_USE_CUDA
+#include <cufft.h>
+#elif AMREX_USE_HIP
+#  if __has_include(<rocfft/rocfft.h>)  // ROCm 5.3+
+#    include <rocfft/rocfft.h>
+#  else
+#    include <rocfft.h>
+#  endif
+#else
+#include <fftw3.h>
+#include <fftw3-mpi.h>
+#endif
+
+#include <AMReX_GpuComplex.H>
+
+#include <string>
+
+#define ALIGN 16
+
+using namespace amrex;
+
+#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT)
+#ifdef AMREX_USE_CUDA
+std::string cufftError (const cufftResult& err);
+#endif
+#ifdef AMREX_USE_HIP
+std::string rocfftError (const rocfft_status err);
+void Assert_rocfft_status (std::string const& name, rocfft_status status);
+#endif
+#endif
+
+void SpectralReadCheckPoint(amrex::Geometry& geom,
+                            const amrex::Box& domain,
+                            amrex::MultiFab& prim,
+                            std::array<MultiFab, 3>& vel,
+                            BoxArray& ba, DistributionMapping& dmap,
+                            const amrex::Vector<int> n_cells,
+                            const int nprimvars,
+                            const amrex::Vector<int> max_grid_size,
+                            const amrex::IntVect ngc,
+                            const int restart);
+
+void SpectralVelDecomp(const MultiFab& vel,
+                       MultiFab& vel_decomp_filter,
+                       const amrex::Real kmin,
+                       const amrex::Real kmax,
+                       const amrex::Geometry& geom,
+                       const amrex::Vector<int> n_cells);
+
+void SpectralScalarDecomp(const MultiFab& scalar,
+                          MultiFab& scalar_filter,
+                          const amrex::Real kmin,
+                          const amrex::Real kmax,
+                          const amrex::Geometry& geom,
+                          const amrex::Vector<int> n_cells);
+
+void SpectralWritePlotFile(const int step,
+                           const amrex::Real& kmin,
+                           const amrex::Real& kmax,
+                           const amrex::Geometry& geom,
+                           const amrex::MultiFab& vel_decomp_in,
+                           const amrex::MultiFab& scalar_in,
+                           const amrex::MultiFab& vel_total,
+                           const amrex::MultiFab& scalar_total);
+
+void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, 
+                             const std::string& checkpointname, 
+                             BoxArray& ba_old, DistributionMapping& dmap_old,
+                             int NVARS, int NGC, const amrex::IntVect ngc,
+                             int nodal_flag=-1);
+
+void ShiftFaceToCC(const MultiFab& face_in, int face_in_comp,
+                   MultiFab& cc_in, int cc_in_comp,
+                   int ncomp);
+
+void ComputeGrad(const MultiFab & phi_in, std::array<MultiFab, 3> & gphi,
+                 int start_incomp, int start_outcomp, int ncomp, int bccomp, const Geometry & geom,
+                 int increment);
+
+void SumStag(const std::array<MultiFab, 3>& m1,
+	     amrex::Vector<amrex::Real>& sum);
+
+void FCMoments(const std::array<MultiFab, 3>& m1,
+		       const amrex::Vector<int>& comps,
+               std::array<MultiFab, 3>&  mscr,
+		       const int& power,
+		       amrex::Vector<amrex::Real>& prod_val);
+
+void SumCC(const amrex::MultiFab& m1,
+	   const int& comp,
+	   amrex::Real& sum,
+	   const bool& divide_by_ncells);
+
+void CCMoments(const amrex::MultiFab& m1,
+		 const int& comp1,
+                 amrex::MultiFab& mscr,
+		 const int& power,
+		 amrex::Real& prod_val);
+
+#endif
+
diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp
new file mode 100644
index 000000000..b36e18b8d
--- /dev/null
+++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp
@@ -0,0 +1,1077 @@
+#include "spectral_functions.H"
+#include "AMReX_PlotFileUtil.H"
+#include "AMReX_PlotFileDataImpl.H"
+
+#include <sys/stat.h>
+
+#include "chrono"
+#include <AMReX_MultiFabUtil.H>
+#include "AMReX_PlotFileUtil.H"
+#include "AMReX_BoxArray.H"
+
+using namespace std::chrono;
+
+namespace {
+    void GotoNextLine (std::istream& is)
+    {
+        constexpr std::streamsize bl_ignore_max { 100000 };
+        is.ignore(bl_ignore_max, '\n');
+    }
+}
+
+void SpectralReadCheckPoint(amrex::Geometry& geom,
+                            const amrex::Box& domain,
+                            amrex::MultiFab& prim,
+                            std::array<MultiFab, 3>& vel,
+                            BoxArray& ba, DistributionMapping& dmap,
+                            const amrex::Vector<int> n_cells,
+                            const int nprimvars,
+                            const amrex::Vector<int> max_grid_size,
+                            const amrex::IntVect ngc,
+                            const int restart)
+{
+    // timer for profiling
+    BL_PROFILE_VAR("SpectralReadCheckPoint()",SpectralReadCheckPoint);
+
+    // checkpoint file name, e.g., chk0000010
+    const std::string& checkpointname = amrex::Concatenate("chk",restart,9);
+
+    amrex::Print() << "Restart from checkpoint " << checkpointname << "\n";
+
+    VisMF::IO_Buffer io_buffer(VisMF::GetIOBufferSize());
+
+    std::string line, word;
+
+    // read in old boxarray, and create old distribution map (this is to read in MFabs)
+    BoxArray ba_old;
+    DistributionMapping dmap_old;
+
+    // initialize new boxarray
+    ba.define(domain);
+    ba.maxSize(IntVect(max_grid_size));
+    dmap.define(ba, ParallelDescriptor::NProcs());
+    
+    amrex::Vector<amrex::IntVect> nodal_flag_dir;
+    amrex::IntVect                nodal_flag_x;
+    amrex::IntVect                nodal_flag_y;
+    amrex::IntVect                nodal_flag_z;
+    nodal_flag_dir.resize(3);
+
+    for (int i=0; i<3; ++i) {
+        nodal_flag_x[i] = int(i==0);
+        nodal_flag_y[i] = int(i==1);
+        nodal_flag_z[i] = int(i==2);
+        AMREX_D_TERM(nodal_flag_dir[0][i] = nodal_flag_x[i];,
+                     nodal_flag_dir[1][i] = nodal_flag_y[i];,
+                     nodal_flag_dir[2][i] = nodal_flag_z[i];);
+    }
+    
+    // Header
+    {
+        std::string File(checkpointname + "/Header");
+        Vector<char> fileCharPtr;
+        ParallelDescriptor::ReadAndBcastFile(File, fileCharPtr);
+        std::string fileCharPtrString(fileCharPtr.dataPtr());
+        std::istringstream is(fileCharPtrString, std::istringstream::in);
+
+        // read in title line
+        std::getline(is, line);
+
+        // read in time step number
+        int step;
+        is >> step;
+        GotoNextLine(is);
+
+        // read in time
+        Real time;
+        is >> time;
+        GotoNextLine(is);
+
+        // read in statsCount
+        int statsCount;
+        is >> statsCount;
+        GotoNextLine(is);
+
+        // read in BoxArray (fluid) from Header
+        ba_old.readFrom(is);
+        GotoNextLine(is);
+
+        // create old distribution mapping
+        dmap_old.define(ba_old, ParallelDescriptor::NProcs());
+
+        prim.define(ba,dmap,nprimvars,ngc);
+        // velocity and momentum (instantaneous, means, variances)
+        for (int d=0; d<3; d++) {
+            vel[d].define(convert(ba,nodal_flag_dir[d]), dmap, 1, ngc);
+        }
+    }
+
+    // C++ random number engine
+    // each MPI process reads in its own file
+    int comm_rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
+
+    int n_ranks;
+    MPI_Comm_size(MPI_COMM_WORLD, &n_ranks);
+    
+    // read in the MultiFab data
+    Read_Copy_MF_Checkpoint(prim,"prim",checkpointname,ba_old,dmap_old,nprimvars,1,ngc);
+
+    Read_Copy_MF_Checkpoint(vel[0],"velx",checkpointname,ba_old,dmap_old,1,1,ngc,0);
+    Read_Copy_MF_Checkpoint(vel[1],"vely",checkpointname,ba_old,dmap_old,1,1,ngc,1);
+    Read_Copy_MF_Checkpoint(vel[2],"velz",checkpointname,ba_old,dmap_old,1,1,ngc,2);
+
+    // FillBoundaries
+    prim.FillBoundary(geom.periodicity());
+    vel[0].FillBoundary(geom.periodicity());
+    vel[1].FillBoundary(geom.periodicity());
+    vel[2].FillBoundary(geom.periodicity());
+}
+
+void SpectralVelDecomp(const MultiFab& vel,
+                       MultiFab& vel_decomp_filter,
+                       const amrex::Real kmin,
+                       const amrex::Real kmax,
+                       const amrex::Geometry& geom,
+                       const amrex::Vector<int> n_cells)
+{
+    BL_PROFILE_VAR("SpectralVelDecomp()",SpectralVelDecomp);
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, 
+        "SpectralVelDecomp: must have 3 components of input vel MultiFab");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, 
+        "SpectralVelDecomp: Must have one Box per MPI process when using heFFTe");
+
+    const GpuArray<Real, 3> dx = geom.CellSizeArray();
+    
+    long npts;
+    Box domain = geom.Domain();
+    npts = (domain.length(0)*domain.length(1)*domain.length(2));
+    Real sqrtnpts = std::sqrt(npts);
+    
+    // get box array and distribution map of vel
+    DistributionMapping dm = vel.DistributionMap();
+    BoxArray ba            = vel.boxArray();
+    
+    // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid
+    Box local_box;
+    int local_boxid;
+    {
+        for (int i = 0; i < ba.size(); ++i) {
+            Box b = ba[i];
+            // each MPI rank has its own local_box Box and local_boxid ID
+            if (ParallelDescriptor::MyProc() == dm[i]) {
+                local_box = b;
+                local_boxid = i;
+            }
+        }
+    }
+
+    // now each MPI rank works on its own box
+    // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset
+
+    // start by coarsening each box by 2 in the x-direction
+    Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1)));
+
+    // if the coarsened box's high-x index is even, we shrink the size in 1 in x
+    // this avoids overlap between coarsened boxes
+    if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) {
+        c_local_box.setBig(0,c_local_box.bigEnd(0)-1);
+    }
+    // for any boxes that touch the hi-x domain we
+    // increase the size of boxes by 1 in x
+    // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz)
+    if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) {
+        c_local_box.growHi(0,1);
+    }
+
+    // each MPI rank gets storage for its piece of the fft
+    BaseFab<GpuComplex<Real> > spectral_field_Tx(c_local_box, 1, The_Device_Arena()); // totalx
+    BaseFab<GpuComplex<Real> > spectral_field_Ty(c_local_box, 1, The_Device_Arena()); // totaly
+    BaseFab<GpuComplex<Real> > spectral_field_Tz(c_local_box, 1, The_Device_Arena()); // totalz
+    BaseFab<GpuComplex<Real> > spectral_field_Sx(c_local_box, 1, The_Device_Arena()); // solenoidalx
+    BaseFab<GpuComplex<Real> > spectral_field_Sy(c_local_box, 1, The_Device_Arena()); // solenoidaly
+    BaseFab<GpuComplex<Real> > spectral_field_Sz(c_local_box, 1, The_Device_Arena()); // solenoidalz
+    BaseFab<GpuComplex<Real> > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx
+    BaseFab<GpuComplex<Real> > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly
+    BaseFab<GpuComplex<Real> > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz
+	  MultiFab vel_single(ba, dm, 1, 0);
+    	
+    int r2c_direction = 0;
+    
+    // ForwardTransform
+    // X
+    using heffte_complex = typename heffte::fft_output<Real>::type;
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      vel_single.ParallelCopy(vel, 0, 0, 1);
+	    heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr();
+      fft.forward(vel_single[local_boxid].dataPtr(),spectral_data);
+    }
+    // Y
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      vel_single.ParallelCopy(vel, 1, 0, 1);
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr();
+      fft.forward(vel_single[local_boxid].dataPtr(),spectral_data);
+    }
+    // Z
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      vel_single.ParallelCopy(vel, 2, 0, 1);
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr();
+      fft.forward(vel_single[local_boxid].dataPtr(),spectral_data);
+    }
+    
+    Gpu::streamSynchronize();
+    
+    int nx = n_cells[0]; 
+    int ny = n_cells[1]; 
+    int nz = n_cells[2];
+    
+    // Decompose velocity field into solenoidal and dilatational
+    Array4< GpuComplex<Real> > spectral_tx = spectral_field_Tx.array();
+    Array4< GpuComplex<Real> > spectral_ty = spectral_field_Ty.array();
+    Array4< GpuComplex<Real> > spectral_tz = spectral_field_Tz.array();
+    Array4< GpuComplex<Real> > spectral_sx = spectral_field_Sx.array();
+    Array4< GpuComplex<Real> > spectral_sy = spectral_field_Sy.array();
+    Array4< GpuComplex<Real> > spectral_sz = spectral_field_Sz.array();
+    Array4< GpuComplex<Real> > spectral_dx = spectral_field_Dx.array();
+    Array4< GpuComplex<Real> > spectral_dy = spectral_field_Dy.array();
+    Array4< GpuComplex<Real> > spectral_dz = spectral_field_Dz.array();
+    ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k)
+    {
+
+       Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0;
+       
+       if (i <= nx/2) { 
+           
+           // Get the wavevector
+           int ki = i;
+           int kj = j;
+           if (j >= ny/2) kj = ny - j;
+           int kk = k;
+           if (k >= nz/2) kk = nz - k;
+
+           // Gradient Operators
+           GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0];
+           GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0];
+           GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1];
+           GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1];
+           GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2];
+           GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2];
+       }
+       else { // conjugate
+            amrex::Abort("check the code; i should not go beyond bx.length(0)/2");
+       }
+
+       // Get the wavenumber
+       int ki = i;
+       int kj = j;
+       if (j >= ny/2) kj = ny - j;
+       int kk = k;
+       if (k >= nz/2) kk = nz - k;
+       Real knum = (ki*ki + kj*kj + kk*kk);
+       knum = std::sqrt(knum);
+
+       // Scale Total velocity FFT components with Filtering
+       if ((knum >= kmin) and (knum <= kmax)) {
+         
+         spectral_tx(i,j,k) *= (1.0/sqrtnpts);
+         spectral_ty(i,j,k) *= (1.0/sqrtnpts);
+         spectral_tz(i,j,k) *= (1.0/sqrtnpts);
+
+         // Inverse Laplacian
+         Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC;
+
+         // Divergence of vel
+         Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC +
+                     spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC +
+                     spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ;
+         Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR +
+                     spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR +
+                     spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ;
+
+         if (Lap < 1.0e-12) { // zero mode for no bulk motion
+             spectral_dx(i,j,k) *= 0.0;
+             spectral_dy(i,j,k) *= 0.0;
+             spectral_dz(i,j,k) *= 0.0;
+         }
+         else {
+
+             // Dilatational velocity 
+             GpuComplex<Real> copy_dx((divR*GxR + divC*GxC) / Lap, 
+                                      (divC*GxR - divR*GxC) / Lap);
+             spectral_dx(i,j,k) = copy_dx;
+
+             GpuComplex<Real> copy_dy((divR*GyR + divC*GyC) / Lap,
+                                      (divC*GyR - divR*GyC) / Lap);
+             spectral_dy(i,j,k) = copy_dy;
+
+             GpuComplex<Real> copy_dz((divR*GzR + divC*GzC) / Lap,
+                                      (divC*GzR - divR*GzC) / Lap);
+             spectral_dz(i,j,k) = copy_dz;
+         }
+             
+         // Solenoidal velocity
+         spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k);
+         spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); 
+         spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k);
+       }
+       else {
+         spectral_tx(i,j,k) = 0.0;
+         spectral_ty(i,j,k) = 0.0;
+         spectral_tz(i,j,k) = 0.0;
+         spectral_sx(i,j,k) = 0.0;
+         spectral_sy(i,j,k) = 0.0;
+         spectral_sz(i,j,k) = 0.0;
+         spectral_dx(i,j,k) = 0.0;
+         spectral_dy(i,j,k) = 0.0;
+         spectral_dz(i,j,k) = 0.0;
+       }
+
+    });
+
+    Gpu::streamSynchronize();
+    
+	  MultiFab vel_decomp_filter_single(ba, dm, 1, 0);
+    // inverse Fourier transform filtered total velocity 
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 0, 1);
+    }
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 1, 1);
+    }
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 2, 1);
+    }
+    // inverse Fourier transform filtered solenoidal and dilatational components 
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 3, 1);
+    }
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 4, 1);
+    }
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 5, 1);
+    }
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 6, 1);
+    }
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 7, 1);
+    }
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr();
+      fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 8, 1);
+    }
+
+    
+    vel_decomp_filter.mult(1.0/sqrtnpts);
+
+}
+
+
+void SpectralScalarDecomp(const MultiFab& scalar,
+                          MultiFab& scalar_filter,
+                          const amrex::Real kmin,
+                          const amrex::Real kmax,
+                          const amrex::Geometry& geom,
+                          const amrex::Vector<int> n_cells)
+{
+    BL_PROFILE_VAR("SpectralScalarDecomp()",SpectralScalarDecomp);
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(scalar.nComp() == 1, 
+        "SpectralScalarDecomp: must have 1 components of input scalar MultiFab");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(scalar.local_size() == 1, 
+        "SpectralScalarDecomp: Must have one Box per MPI process when using heFFTe");
+
+    const GpuArray<Real, 3> dx = geom.CellSizeArray();
+    
+    long npts;
+    Box domain = geom.Domain();
+    npts = (domain.length(0)*domain.length(1)*domain.length(2));
+    Real sqrtnpts = std::sqrt(npts);
+    
+    // get box array and distribution map of vel
+    DistributionMapping dm = scalar.DistributionMap();
+    BoxArray ba            = scalar.boxArray();
+    
+    // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid
+    Box local_box;
+    int local_boxid;
+    {
+        for (int i = 0; i < ba.size(); ++i) {
+            Box b = ba[i];
+            // each MPI rank has its own local_box Box and local_boxid ID
+            if (ParallelDescriptor::MyProc() == dm[i]) {
+                local_box = b;
+                local_boxid = i;
+            }
+        }
+    }
+
+    // now each MPI rank works on its own box
+    // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset
+
+    // start by coarsening each box by 2 in the x-direction
+    Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1)));
+
+    // if the coarsened box's high-x index is even, we shrink the size in 1 in x
+    // this avoids overlap between coarsened boxes
+    if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) {
+        c_local_box.setBig(0,c_local_box.bigEnd(0)-1);
+    }
+    // for any boxes that touch the hi-x domain we
+    // increase the size of boxes by 1 in x
+    // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz)
+    if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) {
+        c_local_box.growHi(0,1);
+    }
+
+    // each MPI rank gets storage for its piece of the fft
+    BaseFab<GpuComplex<Real> > spectral_field(c_local_box, 1, The_Device_Arena());
+	  MultiFab scalar_single(ba, dm, 1, 0);
+    	
+    int r2c_direction = 0;
+    
+    // ForwardTransform
+    using heffte_complex = typename heffte::fft_output<Real>::type;
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      scalar_single.ParallelCopy(scalar, 0, 0, 1);
+	    heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr();
+      fft.forward(scalar_single[local_boxid].dataPtr(),spectral_data);
+    }
+    
+    Gpu::streamSynchronize();
+    
+    // filtering
+    Array4< GpuComplex<Real> > spectral = spectral_field.array();
+    int nx = n_cells[0]; 
+    int ny = n_cells[1]; 
+    int nz = n_cells[2];
+    ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k)
+    {
+
+       if (i <= nx/2) { 
+       }
+       else { // conjugate
+            amrex::Abort("check the code; i should not go beyond bx.length(0)/2");
+       }
+
+       // Get the wavenumber
+       int ki = i;
+       int kj = j;
+       if (j >= ny/2) kj = ny - j;
+       int kk = k;
+       if (k >= nz/2) kk = nz - k;
+       Real knum = (ki*ki + kj*kj + kk*kk);
+       knum = std::sqrt(knum);
+
+       // Scale Scalar FFT components with Filtering
+       if ((knum >= kmin) and (knum <= kmax)) {
+         spectral(i,j,k) *= (1.0/sqrtnpts);
+         spectral(i,j,k) *= (1.0/sqrtnpts);
+         spectral(i,j,k) *= (1.0/sqrtnpts);
+       }
+       else {
+         spectral(i,j,k) *= 0.0;
+         spectral(i,j,k) *= 0.0;
+         spectral(i,j,k) *= 0.0;
+       }
+    });
+
+    Gpu::streamSynchronize();
+    
+	  MultiFab scalar_filter_single(ba, dm, 1, 0);
+    // inverse Fourier transform filtered scalar 
+    {
+#if defined(HEFFTE_CUFFT)
+      heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif defined(HEFFTE_ROCFFT)
+      heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#elif defined(HEFFTE_FFTW)
+      heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+      ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+      {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+      {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+      {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+      r2c_direction, ParallelDescriptor::Communicator());
+	    
+      heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr();
+      fft.backward(spectral_data, scalar_filter_single[local_boxid].dataPtr());
+    
+      Gpu::streamSynchronize();
+      scalar_filter.ParallelCopy(scalar_filter_single, 0, 0, 1);
+    }
+    
+    scalar_filter.mult(1.0/sqrtnpts);
+
+}
+
+void SpectralWritePlotFile(const int step,
+                           const amrex::Real& kmin,
+                           const amrex::Real& kmax,
+                           const amrex::Geometry& geom,
+                           const amrex::MultiFab& vel_decomp_in,
+                           const amrex::MultiFab& scalar_in,
+                           const amrex::MultiFab& vel_total,
+                           const amrex::MultiFab& scalar_total)
+{
+
+    MultiFab output;
+    
+    // Cell-Centered Velocity Gradient Stats (1,2,3 are directions)
+    // 0: ux
+    // 1: uy
+    // 2: uz
+    // 3: ux_s
+    // 4: uy_s
+    // 5: uz_s
+    // 6: ux_d
+    // 7: uy_d
+    // 8: uz_d
+    // 9: umag
+    // 10: umag_s
+    // 11: umag_d
+    // 12: scalar
+    // 13: divergence = u_1,1 + u_2,2 + u_3,3
+    // 14: vorticity w1
+    // 15: vorticity w2
+    // 16: vorticity w3
+    // 17: vorticity mag: sqrt(w1**2 + w2**2 + w3**2)
+    // 18: ux_org
+    // 19: scalar_org
+    output.define(vel_decomp_in.boxArray(), vel_decomp_in.DistributionMap(), 20, 0);
+    output.setVal(0.0);
+
+    const GpuArray<Real, 3> dx = geom.CellSizeArray();
+
+    for ( MFIter mfi(output,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+        const Box& bx = mfi.tilebox();
+        
+        const Array4<      Real>&             out   = output.array(mfi);
+
+        const Array4<const Real>&  v_decomp         = vel_decomp_in.array(mfi);
+
+        const Array4<const Real>&  sca              = scalar_in.array(mfi);
+
+        const Array4<const Real>&  v_tot            = vel_total.array(mfi);
+
+        const Array4<const Real>&  sca_tot          = scalar_total.array(mfi);
+        
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            
+            out(i,j,k,0) = v_decomp(i,j,k,0);
+            out(i,j,k,1) = v_decomp(i,j,k,1);
+            out(i,j,k,2) = v_decomp(i,j,k,2);
+            out(i,j,k,3) = v_decomp(i,j,k,3);
+            out(i,j,k,4) = v_decomp(i,j,k,4);
+            out(i,j,k,5) = v_decomp(i,j,k,5);
+            out(i,j,k,6) = v_decomp(i,j,k,6);
+            out(i,j,k,7) = v_decomp(i,j,k,7);
+            out(i,j,k,8) = v_decomp(i,j,k,8);
+
+            out(i,j,k,9)  = std::sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag
+            out(i,j,k,10) = std::sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag solednoidal
+            out(i,j,k,11) = std::sqrt(out(i,j,k,6)*out(i,j,k,6) + out(i,j,k,7)*out(i,j,k,7) + out(i,j,k,8)*out(i,j,k,8)); // mag solednoidal
+            
+            out(i,j,k,12) = sca(i,j,k,0);
+
+            // divergence
+            out(i,j,k,13) = 0.5*( (v_decomp(i+1,j,k,0) - v_decomp(i-1,j,k,0))/dx[0] +
+                                  (v_decomp(i,j+1,k,1) - v_decomp(i,j-1,k,1))/dx[1] +
+                                  (v_decomp(i,j,k+1,2) - v_decomp(i,j,k-1,2))/dx[2] );
+
+            // curl w1 = u_2,1 - u_1,2
+            out(i,j,k,14) = 0.5*( (v_decomp(i+1,j,k,1) - v_decomp(i-1,j,k,1))/dx[0] -
+                                  (v_decomp(i,j+1,k,0) - v_decomp(i,j-1,k,0))/dx[1] );
+
+            // curl w2 = u_1,3 - u_3,1
+            out(i,j,k,15) = 0.5*( (v_decomp(i,j,k+1,0) - v_decomp(i,j,k-1,0))/dx[2] -
+                                  (v_decomp(i+1,j,k,2) - v_decomp(i-1,j,k,2))/dx[0] );
+
+            // curl w2 = u_3,2 - u_2,3
+            out(i,j,k,16) = 0.5*( (v_decomp(i,j+1,k,2) - v_decomp(i,j-1,k,2))/dx[1] -
+                                  (v_decomp(i,j,k+1,1) - v_decomp(i,j,k-1,1))/dx[2] );
+
+            // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3)
+            out(i,j,k,17) = std::sqrt( out(i,j,k,14)*out(i,j,k,14) + out(i,j,k,15)*out(i,j,k,15) + out(i,j,k,16)*out(i,j,k,16) );
+
+            // original velx
+            out(i,j,k,18) = v_tot(i,j,k,0);
+
+            // original scalar
+            out(i,j,k,19) = sca_tot(i,j,k,0);
+        });
+    }
+
+    // Write on a plotfile
+    std::string plotfilename = amrex::Concatenate("filtered_",step,9);
+    std::ostringstream os;
+    os << std::setprecision(3) << kmin;
+    plotfilename += os.str();;
+    plotfilename += "_";
+    std::ostringstream oss;
+    oss << std::setprecision(3) << kmax;
+    plotfilename += oss.str();
+
+    amrex::Vector<std::string> varNames(20);
+    varNames[0] = "ux";
+    varNames[1] = "uy";
+    varNames[2] = "uz";
+    varNames[3] = "ux_s";
+    varNames[4] = "uy_s";
+    varNames[5] = "uz_s";
+    varNames[6] = "ux_d";
+    varNames[7] = "uy_d";
+    varNames[8] = "uz_d";
+    varNames[9] = "umag";
+    varNames[10] = "umag_s";
+    varNames[11] = "umag_d";
+    varNames[12] = "rho";
+    varNames[13] = "div";
+    varNames[14] = "w1";
+    varNames[15] = "w2";
+    varNames[16] = "w3";
+    varNames[17] = "vort";
+    varNames[18] = "ux_org";
+    varNames[19] = "rho_org";
+    WriteSingleLevelPlotfile(plotfilename,output,varNames,geom,0.0,step);
+}
+
+void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, const std::string& checkpointname, 
+                             BoxArray& ba_old, DistributionMapping& dmap_old,
+                             int NVARS, int ghost, const amrex::IntVect ngc, 
+                             int nodal_flag)
+{
+    // Read into temporary MF from file
+    MultiFab mf_temp;
+    VisMF::Read(mf_temp,amrex::MultiFabFileFullPrefix(0, checkpointname, "Level_", mf_name));
+
+    // Copy temporary MF into the new MF
+    if (ghost) {
+        mf.ParallelCopy(mf_temp, 0, 0, NVARS, ngc, ngc);
+    }
+    else {
+        mf.ParallelCopy(mf_temp, 0, 0, NVARS, 0, 0);
+    }
+}
+
+void ShiftFaceToCC(const MultiFab& face_in, int face_comp,
+                   MultiFab& cc_in, int cc_comp, int ncomp)
+{
+
+    BL_PROFILE_VAR("ShiftFaceToCC()",ShiftFaceToCC);
+
+    if (!face_in.is_nodal(0) && !face_in.is_nodal(1) && !face_in.is_nodal(2)) {
+        Abort("ShiftFaceToCC requires a face-centered MultiFab");
+    }
+
+    // Loop over boxes (note that mfi takes a cell-centered multifab as an argument)
+    for (MFIter mfi(cc_in,TilingIfNotGPU()); mfi.isValid(); ++mfi) {
+
+        const Box& bx = mfi.tilebox();
+
+        Array4<Real const> const& face = face_in.array(mfi);
+
+        Array4<Real> const& cc = cc_in.array(mfi);
+
+        amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            cc(i,j,k,cc_comp+n) = face(i,j,k,face_comp+n);
+        });
+    }
+}
+
+void ComputeGrad(const MultiFab & phi_in, std::array<MultiFab, 3> & gphi,
+                 int start_incomp, int start_outcomp, int ncomp, int bccomp, const Geometry & geom,
+                 int increment)
+{
+    BL_PROFILE_VAR("ComputeGrad()",ComputeGrad);
+    
+    // Physical Domain
+    Box dom(geom.Domain());
+    
+    const GpuArray<Real, 3> dx = geom.CellSizeArray();
+
+    // if not incrementing, initialize data to zero
+    if (increment == 0) {
+        for (int dir=0; dir<3; ++dir) {
+            gphi[dir].setVal(0.,start_outcomp,ncomp,0);
+        }
+    }
+    
+    // Loop over boxes (note that mfi takes a cell-centered multifab as an argument)
+    for ( MFIter mfi(phi_in,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+
+        const Array4<Real const> & phi = phi_in.array(mfi);
+
+        AMREX_D_TERM(const Array4<Real> & gphix = gphi[0].array(mfi);,
+                     const Array4<Real> & gphiy = gphi[1].array(mfi);,
+                     const Array4<Real> & gphiz = gphi[2].array(mfi););
+
+        AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);,
+                     const Box & bx_y = mfi.nodaltilebox(1);,
+                     const Box & bx_z = mfi.nodaltilebox(2););
+        
+        amrex::ParallelFor(bx_x, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            gphix(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i-1,j,k,start_incomp+n)) / dx[0];
+        },
+                           bx_y, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            gphiy(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i,j-1,k,start_incomp+n)) / dx[1];
+        }
+                         , bx_z, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            gphiz(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i,j,k-1,start_incomp+n)) / dx[2];
+        }
+        );
+
+    } // end MFIter
+}
+
+void FCMoments(const std::array<MultiFab, 3>& m1,
+		       const amrex::Vector<int>& comps,
+               std::array<MultiFab, 3>&  mscr,
+		       const int& power,
+		       amrex::Vector<amrex::Real>& prod_val)
+{
+
+  BL_PROFILE_VAR("FCMoments()",FCMoments);
+
+  for (int d=0; d<3; ++d) {
+    MultiFab::Copy(mscr[d],m1[d],comps[d],0,1,0);
+    for(int i=1; i<power; i++){
+      MultiFab::Multiply(mscr[d],m1[d],comps[d],0,1,0);
+    }
+  }
+  SumStag(mscr,prod_val);
+}
+
+void SumStag(const std::array<MultiFab, 3>& m1,
+	     amrex::Vector<amrex::Real>& sum)
+{
+  BL_PROFILE_VAR("SumStag()",SumStag);
+
+  // Initialize to zero
+  std::fill(sum.begin(), sum.end(), 0.);
+
+  ReduceOps<ReduceOpSum> reduce_op;
+
+  //////// x-faces
+
+  ReduceData<Real> reduce_datax(reduce_op);
+  using ReduceTuple = typename decltype(reduce_datax)::Type;
+
+  for (MFIter mfi(m1[0],TilingIfNotGPU()); mfi.isValid(); ++mfi)
+  {
+      const Box& bx = mfi.tilebox();
+      const Box& bx_grid = mfi.validbox();
+
+      auto const& fab = m1[0].array(mfi);
+
+      int xlo = bx_grid.smallEnd(0);
+      int xhi = bx_grid.bigEnd(0);
+
+      reduce_op.eval(bx, reduce_datax,
+      [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
+      {
+          Real weight = (i>xlo && i<xhi) ? 1.0 : 0.5;
+          return {fab(i,j,k)*weight};
+      });
+  }
+
+  sum[0] = amrex::get<0>(reduce_datax.value());
+  ParallelDescriptor::ReduceRealSum(sum[0]);
+
+  //////// y-faces
+
+  ReduceData<Real> reduce_datay(reduce_op);
+
+  for (MFIter mfi(m1[1],TilingIfNotGPU()); mfi.isValid(); ++mfi)
+  {
+      const Box& bx = mfi.tilebox();
+      const Box& bx_grid = mfi.validbox();
+
+      auto const& fab = m1[1].array(mfi);
+
+      int ylo = bx_grid.smallEnd(1);
+      int yhi = bx_grid.bigEnd(1);
+
+      reduce_op.eval(bx, reduce_datay,
+      [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
+      {
+          Real weight = (j>ylo && j<yhi) ? 1.0 : 0.5;
+          return {fab(i,j,k)*weight};
+      });
+  }
+
+  sum[1] = amrex::get<0>(reduce_datay.value());
+  ParallelDescriptor::ReduceRealSum(sum[1]);
+
+  //////// z-faces
+
+  ReduceData<Real> reduce_dataz(reduce_op);
+
+  for (MFIter mfi(m1[2],TilingIfNotGPU()); mfi.isValid(); ++mfi)
+  {
+      const Box& bx = mfi.tilebox();
+      const Box& bx_grid = mfi.validbox();
+
+      auto const& fab = m1[2].array(mfi);
+
+      int zlo = bx_grid.smallEnd(2);
+      int zhi = bx_grid.bigEnd(2);
+
+      reduce_op.eval(bx, reduce_dataz,
+      [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
+      {
+          Real weight = (k>zlo && k<zhi) ? 1.0 : 0.5;
+          return {fab(i,j,k)*weight};
+      });
+  }
+
+  sum[2] = amrex::get<0>(reduce_dataz.value());
+  ParallelDescriptor::ReduceRealSum(sum[2]);
+}
+
+void CCMoments(const amrex::MultiFab& m1,
+		 const int& comp1,
+                 amrex::MultiFab& mscr,
+		 const int& power,
+		 amrex::Real& prod_val)
+{
+
+  BL_PROFILE_VAR("CCMoments()",CCMoments);
+
+  MultiFab::Copy(mscr,m1,comp1,0,1,0);
+  for(int i=1; i<power; i++){
+  MultiFab::Multiply(mscr,m1,comp1,0,1,0);
+  }
+
+  prod_val = 0.;
+  SumCC(mscr,0,prod_val,false);
+}
+
+void SumCC(const amrex::MultiFab& m1,
+	   const int& comp,
+	   amrex::Real& sum,
+	   const bool& divide_by_ncells)
+{
+  BL_PROFILE_VAR("SumCC()",SumCC);
+
+  sum = 0.;
+  sum = m1.MultiFab::sum(comp, false);
+
+  if (divide_by_ncells == 1) {
+    BoxArray ba_temp = m1.boxArray();
+    long numpts = ba_temp.numPts();
+    sum = sum/(double)(numpts);
+  }
+}
+
+
diff --git a/exec/compressible_stag/TURB_PDFS/GNUmakefile b/exec/compressible_stag/TURB_PDFS/GNUmakefile
new file mode 100644
index 000000000..2f6bd7e1c
--- /dev/null
+++ b/exec/compressible_stag/TURB_PDFS/GNUmakefile
@@ -0,0 +1,37 @@
+AMREX_HOME ?= ../../../../amrex/
+
+DEBUG	= TRUE
+DEBUG	= FALSE
+
+DIM	= 3
+
+COMP    = gcc
+
+PRECISION = DOUBLE
+
+USE_MPI   = TRUE
+USE_OMP   = FALSE
+USE_CUDA  = TRUE
+
+TINY_PROFILE = FALSE
+
+###################################################
+
+#EBASE     = main
+#EBASE     = main_decomp
+EBASE     = main_multisteps
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+include ./Make.package
+include $(AMREX_HOME)/Src/Base/Make.package
+
+vpath %.c   : . $(vpathdir)
+vpath %.h   : . $(vpathdir)
+vpath %.cpp : . $(vpathdir)
+vpath %.H   : . $(vpathdir)
+vpath %.F   : . $(vpathdir)
+vpath %.f   : . $(vpathdir)
+vpath %.f90 : . $(vpathdir)
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
diff --git a/exec/compressible_stag/TURB_PDFS/Make.package b/exec/compressible_stag/TURB_PDFS/Make.package
new file mode 100644
index 000000000..ddbf6fb70
--- /dev/null
+++ b/exec/compressible_stag/TURB_PDFS/Make.package
@@ -0,0 +1,13 @@
+CEXE_sources += ${EBASE}.cpp
+
+INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base
+include $(AMREX_HOME)/Src/Base/Make.package
+vpathdir += $(AMREX_HOME)/Src/Base
+
+INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Extern/amrdata
+include $(AMREX_HOME)/Src/Extern/amrdata/Make.package
+vpathdir += $(AMREX_HOME)/Src/Extern/amrdata
+
+#INCLUDE_LOCATIONS += $(AMREX_HOME)/Tools/C_util
+#include $(AMREX_HOME)/Tools/C_util/Make.package
+#vpathdir += $(AMREX_HOME)/Tools/C_util
diff --git a/exec/compressible_stag/TURB_PDFS/build_frontier.sh b/exec/compressible_stag/TURB_PDFS/build_frontier.sh
new file mode 100755
index 000000000..36b9b69a0
--- /dev/null
+++ b/exec/compressible_stag/TURB_PDFS/build_frontier.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/bash
+
+## load necessary modules 
+module load PrgEnv-cray
+module load cray-mpich
+module load cce 
+
+# compiler environment hints
+export CC=$(which craycc)
+export CXX=$(which craycc)
+export FC=$(which crayftn)
+
+make -j10 USE_CUDA=FALSE USE_HIP=FALSE USE_ASSERTION=TRUE 
diff --git a/exec/compressible_stag/TURB_PDFS/build_perlmutter.sh b/exec/compressible_stag/TURB_PDFS/build_perlmutter.sh
new file mode 100755
index 000000000..37901f67e
--- /dev/null
+++ b/exec/compressible_stag/TURB_PDFS/build_perlmutter.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/bash
+
+# required dependencies
+module load PrgEnv-gnu
+module load craype
+module load craype-x86-milan
+
+module list
+
+# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3)
+# note: the cc/CC/ftn wrappers below add those
+export CXXFLAGS="-march=znver3"
+export CFLAGS="-march=znver3"
+
+# compiler environment hints
+export CC=cc
+export CXX=CC
+export FC=ftn
+
+make -j10 USE_CUDA=FALSE MAX_SPEC=2 USE_ASSERTION=TRUE DEBUG=FALSE
diff --git a/exec/compressible_stag/TURB_PDFS/main.cpp b/exec/compressible_stag/TURB_PDFS/main.cpp
new file mode 100644
index 000000000..2a62e5ce9
--- /dev/null
+++ b/exec/compressible_stag/TURB_PDFS/main.cpp
@@ -0,0 +1,410 @@
+#include <fstream>
+#include <iostream>
+
+#include <AMReX_ParmParse.H>
+#include <AMReX_PlotFileUtil.H>
+
+using namespace amrex;
+using namespace std;
+
+static
+void
+PrintUsage (const char* progName)
+{
+    Print() << std::endl
+            << "This utility computes PDF of scalars, and various powers of Laplacian of velocity field," << std::endl;
+
+    Print() << "Usage:" << '\n';
+    Print() << progName << " <inputs>" << std::endl
+            << "OR" << std::endl
+            << progName << std::endl
+            << " step=<step number of plotfile to be read>" << std::endl
+            << " nbins=<number of bins> " << std::endl
+            << " range=<lo/hi end of range> " << std::endl
+            << std::endl;
+
+    exit(1);
+}
+
+
+int main (int argc, char* argv[])
+{
+    amrex::Initialize(argc,argv);
+
+    {
+
+        if (argc == 1) {
+            PrintUsage(argv[0]);
+        }
+        
+        ParmParse pp;
+        
+        int step;
+        pp.query("step",step);
+        
+        std::string iFile         = amrex::Concatenate("plt",step,9);
+
+        Vector<std::string> scalar_out(3);
+        scalar_out[0] = amrex::Concatenate("rho_pdf",step,9);
+        scalar_out[1] = amrex::Concatenate("press_pdf",step,9);
+        scalar_out[2] = amrex::Concatenate("temp_pdf",step,9);
+        Vector<std::string> Lap_out(5);
+        Lap_out[0] = amrex::Concatenate("L0_pdf",step,9);
+        Lap_out[1] = amrex::Concatenate("L1_pdf",step,9);
+        Lap_out[2] = amrex::Concatenate("L2_pdf",step,9);
+        Lap_out[3] = amrex::Concatenate("L3_pdf",step,9);
+        Lap_out[4] = amrex::Concatenate("L4_pdf",step,9);
+
+        int nbins;
+        pp.get("nbins", nbins);
+
+        Real range;
+        pp.get("range",range);
+
+        amrex::Print() << "Reading from plotfile " << iFile << "\n";
+
+        // for the Header
+        std::string iFile2 = iFile;
+        iFile2 += "/Header";
+
+        // open header
+        ifstream x;
+        x.open(iFile2.c_str(), ios::in);
+
+        // read in first line of header (typically "HyperCLaw-V1.1" or similar)
+        std::string str;
+        x >> str;
+
+        // read in number of components from header
+        int ncomp;
+        x >> ncomp;
+
+        // read in variable names from header
+        int flag = 0;
+        int rho_ind, press_ind, temp_ind, velx_ind;
+        for (int n=0; n<ncomp; ++n) {
+            x >> str;
+            if (str == "rhoInstant") rho_ind = flag;
+            if (str == "pInstant") press_ind = flag;
+            if (str == "tInstant") temp_ind = flag;
+            if (str == "uxInstantFACE") velx_ind = flag;
+            flag ++;
+        }
+
+        // read in dimensionality from header
+        int dim;
+        x >> dim;
+
+        // read in time
+        Real time;
+        x >> time;
+
+        // read in finest level
+        int finest_level;
+        x >> finest_level;
+
+        // read in prob_lo and prob_hi
+        amrex::GpuArray<amrex::Real, 3> prob_lo, prob_hi;
+        for (int i=0; i<3; ++i) {
+            x >> prob_lo[i];        
+        }
+        for (int i=0; i<3; ++i) {
+            x >> prob_hi[i];        
+        }
+        
+        // now read in the plotfile data
+        // check to see whether the user pointed to the plotfile base directory
+        // or the data itself
+        if (amrex::FileExists(iFile+"/Level_0/Cell_H")) {
+           iFile += "/Level_0/Cell";
+        }
+        if (amrex::FileExists(iFile+"/Level_00/Cell_H")) {
+           iFile += "/Level_00/Cell";
+        }
+
+        // storage for the input coarse and fine MultiFabs
+        MultiFab mf;
+
+        // read in plotfile mf to MultiFab
+        VisMF::Read(mf, iFile);
+
+        // get BoxArray and DistributionMapping
+        BoxArray ba = mf.boxArray();
+        DistributionMapping dmap = mf.DistributionMap();
+
+        // physical dimensions of problem
+        RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])},
+                         {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])});
+
+        // single box with the enire domain
+        Box domain = ba.minimalBox().enclosedCells();
+
+        Real ncells = (double) domain.numPts();
+
+        // set to 1 (periodic)
+        Vector<int> is_periodic(3,1);
+        
+        Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data());
+        
+        const Real* dx = geom.CellSize();
+  
+        ////////////////////////////////////////////////////////////////////////
+        ////////////// velocity Laplacian PDFs /////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////
+        MultiFab vel_grown(ba,dmap,3,1);
+        MultiFab laplacian(ba,dmap,3,1);
+
+        // copy shifted velocity components from mf into vel_grown
+        Copy(vel_grown,mf,velx_ind,0,3,0);
+        Copy(laplacian,mf,velx_ind,0,3,0);
+
+        // fill ghost cells of vel_grown
+        vel_grown.FillBoundary(geom.periodicity());
+        laplacian.FillBoundary(geom.periodicity());
+
+        for (int m=0; m<5; ++m) {
+            
+            Vector<Real> L2(3,0.);
+            for (int i=0; i<3; i++)
+                L2[i]=0.;
+
+            for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+                
+                const Array4<Real>& lap = laplacian.array(mfi);
+
+                for (auto n=0; n<3; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    L2[n] += lap(i,j,k,n)*lap(i,j,k,n);
+                        
+                }
+                }
+                }
+                }
+
+            } // end MFIter
+
+            ParallelDescriptor::ReduceRealSum(L2.dataPtr(),3);
+            amrex::Long totpts =  domain.numPts();
+            L2[0] = sqrt(L2[0]/totpts);
+            L2[1] = sqrt(L2[1]/totpts);
+            L2[2] = sqrt(L2[2]/totpts);
+            Print() << "L2 norm of Laplacian to power " << m << " is " << L2[0] 
+                    << " "  << L2[1] << " "  << L2[2] << " " << std::endl;
+
+            for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+
+                const Array4<Real>& lap = laplacian.array(mfi);
+
+                for (auto n=0; n<3; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    lap(i,j,k,n) = lap(i,j,k,n)/L2[n];
+
+                }
+                }
+                }
+                }
+
+            } // end MFIter
+
+            Vector<Real> bins(nbins+1,0.);
+
+            int halfbin = nbins/2;
+            Real hbinwidth = range/nbins;
+            Real binwidth = 2.*range/nbins;
+            amrex::Long count=0;
+            amrex::Long totbin=0;
+            for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0;
+
+            for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+                
+                const Array4<Real>& lap = laplacian.array(mfi);
+
+                for (auto n=0; n<3; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth);
+                    index += halfbin;
+                    
+                    if( index >=0 && index <= nbins) {
+                        bins[index] += 1;
+                        totbin++;
+                    }
+
+                    count++;
+                        
+                }
+                }
+                }
+                }
+
+            } // end MFIter
+
+            ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1);
+            ParallelDescriptor::ReduceLongSum(count);
+            ParallelDescriptor::ReduceLongSum(totbin);
+            Print() << "Points outside of range "<< count - totbin << " " << 
+                       (double)(count-totbin)/count << std::endl;
+
+            // print out contents of bins to the screen
+            for (int i=0; i<nbins+1; ++i) {
+                Print() << "For  m= "<< m << " " <<  (i-halfbin)*binwidth << " " 
+                        << bins[i]/(count*binwidth) << std::endl;
+            }
+            if (ParallelDescriptor::IOProcessor()) {
+                std::ofstream outfile;
+                outfile.open(Lap_out[m]);
+                for (int i=0; i<nbins+1; ++i) {
+                    outfile << (i-halfbin)*binwidth << " " << bins[i]/(count*binwidth) << std::endl;
+                }
+                outfile.close();
+            }
+        
+            for ( MFIter mfi(vel_grown,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+
+                const Array4<Real>& vel = vel_grown.array(mfi);
+                const Array4<Real>& lap = laplacian.array(mfi);
+
+                for (auto n=0; n<3; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    lap(i,j,k,n) = -(vel(i+1,j,k,n) - 2.*vel(i,j,k,n) + vel(i-1,j,k,n)) / (dx[0]*dx[0])
+                                   -(vel(i,j+1,k,n) - 2.*vel(i,j,k,n) + vel(i,j-1,k,n)) / (dx[1]*dx[1])
+                                   -(vel(i,j,k+1,n) - 2.*vel(i,j,k,n) + vel(i,j,k+1,n)) / (dx[2]*dx[2]);
+                }
+                }
+                }
+                }
+
+            } // end MFIter
+
+            // copy lap into vel_grown
+            Copy(vel_grown,laplacian,0,0,3,0);
+
+            // fill ghost cells of vel_grown
+            vel_grown.FillBoundary(geom.periodicity());
+
+        } // end loop
+        ////////////////////////////////////////////////////////////////////////
+        ////////////// velocity Laplacian PDFs /////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////
+  
+        ////////////////////////////////////////////////////////////////////////
+        ///////////////////////// scalar  PDFs /////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////
+        MultiFab scalar(ba,dmap,3,0);
+        Copy(scalar,mf,rho_ind,0,1,0);
+        Copy(scalar,mf,press_ind,1,1,0);
+        Copy(scalar,mf,temp_ind,2,1,0);
+
+        // compute spatial mean
+        Real mean_rho   = scalar.sum(0) / (ncells);
+        Real mean_press = scalar.sum(1) / (ncells);
+        Real mean_temp  = scalar.sum(2) / (ncells);
+
+        // get fluctuations
+        scalar.plus(-1.0*mean_rho,   0, 1);
+        scalar.plus(-1.0*mean_press, 1, 1);
+        scalar.plus(-1.0*mean_temp,  2, 1);
+
+        // get rms
+        Real rms_rho   = scalar.norm2(0) / sqrt(ncells);
+        Real rms_press = scalar.norm2(1) / sqrt(ncells);
+        Real rms_temp  = scalar.norm2(2) / sqrt(ncells);
+
+        // scale by rms
+        scalar.mult(1.0/rms_rho,   0, 1);
+        scalar.mult(1.0/rms_press, 1, 1);
+        scalar.mult(1.0/rms_temp,  2, 1);
+
+        // now compute pdfs
+        for (int m = 0; m < 3; ++m) {
+
+            Vector<Real> bins(nbins+1,0.);
+
+            int halfbin = nbins/2;
+            Real hbinwidth = range/nbins;
+            Real binwidth = 2.*range/nbins;
+            amrex::Long count=0;
+            amrex::Long totbin=0;
+            for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0;
+
+            for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+                
+                const Array4<Real>& sca = scalar.array(mfi);
+
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth);
+                    index += halfbin;
+                    
+                    if( index >=0 && index <= nbins) {
+                        bins[index] += 1;
+                        totbin++;
+                    }
+
+                    count++;
+                        
+                }
+                }
+                }
+
+            } // end MFIter
+
+            ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1);
+            ParallelDescriptor::ReduceLongSum(count);
+            ParallelDescriptor::ReduceLongSum(totbin);
+            Print() << "Points outside of range "<< count - totbin << " " << 
+                       (double)(count-totbin)/count << std::endl;
+
+            // print out contents of bins to the screen
+            for (int i=0; i<nbins+1; ++i) {
+                Print() << "For scalar m = "<< m << " " <<  (i-halfbin)*binwidth << " " 
+                        << bins[i]/(count*binwidth) << std::endl;
+            }
+            if (ParallelDescriptor::IOProcessor()) {
+                std::ofstream outfile;
+                outfile.open(scalar_out[m]);
+                for (int i=0; i<nbins+1; ++i) {
+                    outfile << (i-halfbin)*binwidth << " " << bins[i]/(count*binwidth) << std::endl;
+                }
+                outfile.close();
+            }
+        }
+    }
+        
+    amrex::Finalize();
+
+}
+
diff --git a/exec/compressible_stag/TURB_PDFS/main_decomp.cpp b/exec/compressible_stag/TURB_PDFS/main_decomp.cpp
new file mode 100644
index 000000000..8a892b33b
--- /dev/null
+++ b/exec/compressible_stag/TURB_PDFS/main_decomp.cpp
@@ -0,0 +1,754 @@
+#include <fstream>
+#include <iostream>
+
+#include <AMReX_ParmParse.H>
+#include <AMReX_PlotFileUtil.H>
+#include <AMReX_VisMF.H>
+
+using namespace amrex;
+using namespace std;
+
+static
+void
+PrintUsage (const char* progName)
+{
+    Print() << std::endl
+            << "This utility computes PDF of vorticity and divergence, and various powers of Laplacian of solenoidal and dilatational velocity field," << std::endl;
+
+    Print() << "Usage:" << '\n';
+    Print() << progName << " <inputs>" << std::endl
+            << "OR" << std::endl
+            << progName << std::endl
+            << " step=<step number of plotfile to be read>" << std::endl
+            << " nbins=<number of bins> " << std::endl
+            << " range=<lo/hi end of range> " << std::endl
+            << std::endl;
+
+    exit(1);
+}
+
+
+int main (int argc, char* argv[])
+{
+    amrex::Initialize(argc,argv);
+
+    {
+
+        if (argc == 1) {
+            PrintUsage(argv[0]);
+        }
+        
+        ParmParse pp;
+        
+        int step;
+        pp.query("step",step);
+        
+        std::string iFile         = amrex::Concatenate("vel_grad_decomp",step,9);
+
+        Vector<std::string> scalar_out(5);
+        scalar_out[0] = amrex::Concatenate("div_pdf",step,9);
+        scalar_out[1] = amrex::Concatenate("vortx_pdf",step,9);
+        scalar_out[2] = amrex::Concatenate("vorty_pdf",step,9);
+        scalar_out[3] = amrex::Concatenate("vortz_pdf",step,9);
+        scalar_out[4] = amrex::Concatenate("vort_pdf",step,9);
+        Vector<std::string> Lap_out_sol(5);
+        Lap_out_sol[0] = amrex::Concatenate("L0_pdf_sol",step,9);
+        Lap_out_sol[1] = amrex::Concatenate("L1_pdf_sol",step,9);
+        Lap_out_sol[2] = amrex::Concatenate("L2_pdf_sol",step,9);
+        Lap_out_sol[3] = amrex::Concatenate("L3_pdf_sol",step,9);
+        Lap_out_sol[4] = amrex::Concatenate("L4_pdf_sol",step,9);
+        Vector<std::string> Lap_out_dil(5);
+        Lap_out_dil[0] = amrex::Concatenate("L0_pdf_dil",step,9);
+        Lap_out_dil[1] = amrex::Concatenate("L1_pdf_dil",step,9);
+        Lap_out_dil[2] = amrex::Concatenate("L2_pdf_dil",step,9);
+        Lap_out_dil[3] = amrex::Concatenate("L3_pdf_dil",step,9);
+        Lap_out_dil[4] = amrex::Concatenate("L4_pdf_dil",step,9);
+               
+        int nbins;
+        pp.get("nbins", nbins);
+
+        Real range;
+        pp.get("range",range);
+
+        amrex::Print() << "Reading from vel_grad_decomp plotfile " << iFile << "\n";
+
+        // for the Header
+        std::string iFile2 = iFile;
+        iFile2 += "/Header";
+
+        // open header
+        ifstream x;
+        x.open(iFile2.c_str(), ios::in);
+
+        // read in first line of header (typically "HyperCLaw-V1.1" or similar)
+        std::string str;
+        x >> str;
+
+        // read in number of components from header
+        int ncomp;
+        x >> ncomp;
+
+        // read in variable names from header
+        int flag = 0;
+        int vort_ind, div_ind, velx_sol_ind, vely_sol_ind, velz_sol_ind, velx_dil_ind, vely_dil_ind, velz_dil_ind;
+        for (int n=0; n<ncomp; ++n) {
+            x >> str;
+            if (str == "vort") vort_ind = flag;
+            if (str == "div")  div_ind = flag;
+            if (str == "ux_s") velx_sol_ind = flag;
+            if (str == "uy_s") vely_sol_ind = flag;
+            if (str == "uz_s") velz_sol_ind = flag;
+            if (str == "ux_d") velx_dil_ind = flag;
+            if (str == "uy_d") vely_dil_ind = flag;
+            if (str == "uz_d") velz_dil_ind = flag;
+            flag ++;
+        }
+
+        // read in dimensionality from header
+        int dim;
+        x >> dim;
+
+        // read in time
+        Real time;
+        x >> time;
+
+        // read in finest level
+        int finest_level;
+        x >> finest_level;
+
+        // read in prob_lo and prob_hi
+        amrex::GpuArray<amrex::Real, 3> prob_lo, prob_hi;
+        for (int i=0; i<3; ++i) {
+            x >> prob_lo[i];        
+        }
+        for (int i=0; i<3; ++i) {
+            x >> prob_hi[i];        
+        }
+        
+        // now read in the plotfile data
+        // check to see whether the user pointed to the plotfile base directory
+        // or the data itself
+        if (amrex::FileExists(iFile+"/Level_0/Cell_H")) {
+           iFile += "/Level_0/Cell";
+        }
+        if (amrex::FileExists(iFile+"/Level_00/Cell_H")) {
+           iFile += "/Level_00/Cell";
+        }
+
+        // storage for the input coarse and fine MultiFabs
+        MultiFab mf;
+
+        // read in plotfile mf to MultiFab
+        VisMF::Read(mf, iFile);
+
+        // get BoxArray and DistributionMapping
+        BoxArray ba = mf.boxArray();
+        DistributionMapping dmap = mf.DistributionMap();
+
+        // physical dimensions of problem
+        RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])},
+                         {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])});
+
+        // single box with the enire domain
+        Box domain = ba.minimalBox().enclosedCells();
+
+        Real ncells = (double) domain.numPts();
+
+        // set to 1 (periodic)
+        Vector<int> is_periodic(3,1);
+        
+        Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data());
+        
+        const Real* dx = geom.CellSize();
+  
+        ////////////////////////////////////////////////////////////////////////
+        ////////////// velocity Laplacian PDFs///////////// ////////////////////
+        ////////////////////////////////////////////////////////////////////////
+        MultiFab vel_grown(ba,dmap,6,1);
+        MultiFab vel_sol  (ba,dmap,3,1);
+        MultiFab laplacian(ba,dmap,6,1);
+
+        // copy shifted velocity components from mf into vel_grown
+        Copy(vel_grown,mf,velx_sol_ind,0,1,0); // sol
+        Copy(vel_grown,mf,vely_sol_ind,1,1,0); // sol
+        Copy(vel_grown,mf,velz_sol_ind,2,1,0); // sol
+        
+        Copy(laplacian,mf,velx_sol_ind,0,1,0); // sol
+        Copy(laplacian,mf,vely_sol_ind,1,1,0); // sol
+        Copy(laplacian,mf,velz_sol_ind,2,1,0); // sol
+        
+        Copy(vel_grown,mf,velx_dil_ind,3,1,0); // dil
+        Copy(vel_grown,mf,vely_dil_ind,4,1,0); // dil
+        Copy(vel_grown,mf,velz_dil_ind,5,1,0); // dil
+        
+        Copy(laplacian,mf,velx_dil_ind,3,1,0); // dil
+        Copy(laplacian,mf,vely_dil_ind,4,1,0); // dil
+        Copy(laplacian,mf,velz_dil_ind,5,1,0); // dil
+
+        Copy(vel_sol,mf,velx_sol_ind,0,1,0); // sol
+        Copy(vel_sol,mf,vely_sol_ind,1,1,0); // sol
+        Copy(vel_sol,mf,velz_sol_ind,2,1,0); // sol
+
+        // fill ghost cells of vel_grown
+        vel_grown.FillBoundary(geom.periodicity());
+        laplacian.FillBoundary(geom.periodicity());
+        vel_sol  .FillBoundary(geom.periodicity());
+
+        for (int m=0; m<5; ++m) {
+            
+            Vector<Real> L2(6,0.);
+            for (int i=0; i<6; i++)
+                L2[i]=0.;
+
+            for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+                
+                const Array4<Real>& lap = laplacian.array(mfi);
+
+                for (auto n=0; n<6; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    L2[n] += lap(i,j,k,n)*lap(i,j,k,n);
+                        
+                }
+                }
+                }
+                }
+
+            } // end MFIter
+
+            ParallelDescriptor::ReduceRealSum(L2.dataPtr(),6);
+            amrex::Long totpts =  domain.numPts();
+            L2[0] = sqrt(L2[0]/totpts);
+            L2[1] = sqrt(L2[1]/totpts);
+            L2[2] = sqrt(L2[2]/totpts);
+            L2[3] = sqrt(L2[3]/totpts);
+            L2[4] = sqrt(L2[4]/totpts);
+            L2[5] = sqrt(L2[5]/totpts);
+            Print() << "L2 norm of Laplacian (solenoidal) to power " << m << " is " << L2[0] 
+                    << " "  << L2[1] << " "  << L2[2] << " " << std::endl;
+            Print() << "L2 norm of Laplacian (dilational) to power " << m << " is " << L2[3] 
+                    << " "  << L2[4] << " "  << L2[5] << " " << std::endl;
+
+            for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+
+                const Array4<Real>& lap = laplacian.array(mfi);
+
+                for (auto n=0; n<6; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    lap(i,j,k,n) = lap(i,j,k,n)/L2[n];
+
+                }
+                }
+                }
+                }
+
+            } // end MFIter
+
+            Vector<Real> bins_sol(nbins+1,0.);
+            Vector<Real> bins_dil(nbins+1,0.);
+
+            int halfbin = nbins/2;
+            Real hbinwidth = range/nbins;
+            Real binwidth = 2.*range/nbins;
+            amrex::Long count_sol=0;
+            amrex::Long totbin_sol=0;
+            amrex::Long count_dil=0;
+            amrex::Long totbin_dil=0;
+            for (int ind=0 ; ind < nbins+1; ind++) bins_sol[ind]=0;
+            for (int ind=0 ; ind < nbins+1; ind++) bins_dil[ind]=0;
+
+            for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+                
+                const Array4<Real>& lap = laplacian.array(mfi);
+
+                for (auto n=0; n<3; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth);
+                    index += halfbin;
+                    
+                    if( index >=0 && index <= nbins) {
+                        bins_sol[index] += 1;
+                        totbin_sol++;
+                    }
+
+                    count_sol++;
+                        
+                }
+                }
+                }
+                }
+
+                for (auto n=3; n<6; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth);
+                    index += halfbin;
+                    
+                    if( index >=0 && index <= nbins) {
+                        bins_dil[index] += 1;
+                        totbin_dil++;
+                    }
+
+                    count_dil++;
+                        
+                }
+                }
+                }
+                }
+
+            } // end MFIter
+
+            ParallelDescriptor::ReduceRealSum(bins_sol.dataPtr(),nbins+1);
+            ParallelDescriptor::ReduceLongSum(count_sol);
+            ParallelDescriptor::ReduceLongSum(totbin_sol);
+            ParallelDescriptor::ReduceRealSum(bins_dil.dataPtr(),nbins+1);
+            ParallelDescriptor::ReduceLongSum(count_dil);
+            ParallelDescriptor::ReduceLongSum(totbin_dil);
+            Print() << "Points outside of range (solenoidal) "<< count_sol - totbin_sol << " " << 
+                       (double)(count_sol-totbin_sol)/count_sol << std::endl;
+            Print() << "Points outside of range (dilational) "<< count_dil - totbin_dil << " " << 
+                       (double)(count_dil-totbin_dil)/count_dil << std::endl;
+
+            // print out contents of bins to the screen
+            for (int i=0; i<nbins+1; ++i) {
+                Print() << "(solenoidal) For  m= "<< m << " " <<  (i-halfbin)*binwidth << " " 
+                        << bins_sol[i]/(count_sol*binwidth) << std::endl;
+            }
+            for (int i=0; i<nbins+1; ++i) {
+                Print() << "(dilational) For  m= "<< m << " " <<  (i-halfbin)*binwidth << " " 
+                        << bins_dil[i]/(count_dil*binwidth) << std::endl;
+            }
+            if (ParallelDescriptor::IOProcessor()) {
+                std::ofstream outfile;
+                outfile.open(Lap_out_sol[m]);
+                for (int i=0; i<nbins+1; ++i) {
+                    outfile << (i-halfbin)*binwidth << " " << bins_sol[i]/(count_sol*binwidth) << std::endl;
+                }
+                outfile.close();
+                outfile.open(Lap_out_dil[m]);
+                for (int i=0; i<nbins+1; ++i) {
+                    outfile << (i-halfbin)*binwidth << " " << bins_dil[i]/(count_dil*binwidth) << std::endl;
+                }
+                outfile.close();
+            }
+        
+            for ( MFIter mfi(vel_grown,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+
+                const Array4<Real>& vel = vel_grown.array(mfi);
+                const Array4<Real>& lap = laplacian.array(mfi);
+
+                for (auto n=0; n<6; ++n) {
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    lap(i,j,k,n) = -(vel(i+1,j,k,n) - 2.*vel(i,j,k,n) + vel(i-1,j,k,n)) / (dx[0]*dx[0])
+                                   -(vel(i,j+1,k,n) - 2.*vel(i,j,k,n) + vel(i,j-1,k,n)) / (dx[1]*dx[1])
+                                   -(vel(i,j,k+1,n) - 2.*vel(i,j,k,n) + vel(i,j,k+1,n)) / (dx[2]*dx[2]);
+                }
+                }
+                }
+                }
+
+            } // end MFIter
+
+            // copy lap into vel_grown
+            Copy(vel_grown,laplacian,0,0,6,0);
+
+            // fill ghost cells of vel_grown
+            vel_grown.FillBoundary(geom.periodicity());
+
+        } // end loop
+        ////////////////////////////////////////////////////////////////////////
+        ////////////// velocity Laplacian PDFs //////////// ////////////////////
+        ////////////////////////////////////////////////////////////////////////
+  
+        ////////////////////////////////////////////////////////////////////////
+        ///////////////////////// scalar  PDFs /////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////
+        MultiFab scalar(ba,dmap,4,0);    // vort_mag, div, vort_x, vort_y, vort_z
+        scalar.setVal(0.0);
+        Copy(scalar,mf,div_ind,0,1,0);
+
+        // Compute vorticity components and store in scalar
+        for ( MFIter mfi(vel_sol,false); mfi.isValid(); ++mfi ) {
+
+            const Box& bx = mfi.validbox();
+            const auto lo = amrex::lbound(bx);
+            const auto hi = amrex::ubound(bx);
+
+            Array4<Real const> const& sol  = vel_sol   .array(mfi);
+            Array4<Real>       const& sca  = scalar    .array(mfi);
+
+            for (auto k = lo.z; k <= hi.z; ++k) {
+            for (auto j = lo.y; j <= hi.y; ++j) {
+            for (auto i = lo.x; i <= hi.x; ++i) {
+                // dw/dy - dv/dz
+                sca(i,j,k,1) =
+                    (sol(i,j+1,k,velz_sol_ind) - sol(i,j-1,k,velz_sol_ind)) / (2.*dx[1]) -
+                    (sol(i,j,k+1,vely_sol_ind) - sol(i,j,k-1,vely_sol_ind)) / (2.*dx[2]);
+
+                // dv/dx - du/dy
+                sca(i,j,k,2) =
+                    (sol(i+1,j,k,vely_sol_ind) - sol(i-1,j,k,vely_sol_ind)) / (2.*dx[0]) -
+                    (sol(i,j+1,k,velx_sol_ind) - sol(i,j-1,k,velx_sol_ind)) / (2.*dx[1]);
+
+                // du/dz - dw/dx                
+                sca(i,j,k,3) =
+                    (sol(i,j,k+1,velx_sol_ind) - sol(i,j,k-1,velx_sol_ind)) / (2.*dx[2]) -
+                    (sol(i+1,j,k,velz_sol_ind) - sol(i-1,j,k,velz_sol_ind)) / (2.*dx[0]);
+
+            }
+            }
+            }
+        }
+
+        // compute spatial mean
+        Real mean_div     = scalar.sum(0) / (ncells);
+        Real mean_vortx   = scalar.sum(1) / (ncells);
+        Real mean_vorty   = scalar.sum(2) / (ncells);
+        Real mean_vortz   = scalar.sum(3) / (ncells);
+
+        // get fluctuations
+        scalar.plus(-1.0*mean_div,     0, 1);
+        scalar.plus(-1.0*mean_vortx,   1, 1);
+        scalar.plus(-1.0*mean_vorty,   2, 1);
+        scalar.plus(-1.0*mean_vortz,   3, 1);
+
+        // get rms
+        Real rms_div     = scalar.norm2(0) / sqrt(ncells);
+        Real rms_vortx   = scalar.norm2(1) / sqrt(ncells);
+        Real rms_vorty   = scalar.norm2(2) / sqrt(ncells);
+        Real rms_vortz   = scalar.norm2(3) / sqrt(ncells);
+
+        // scale by rms
+        scalar.mult(1.0/rms_div,     0, 1);
+        scalar.mult(1.0/rms_vortx,   1, 1);
+        scalar.mult(1.0/rms_vorty,   2, 1);
+        scalar.mult(1.0/rms_vortz,   3, 1);
+
+        // now compute pdfs
+        for (int m = 0; m < 4; ++m) {
+
+            Vector<Real> bins(nbins+1,0.);
+
+            int halfbin = nbins/2;
+            Real hbinwidth = range/nbins;
+            Real binwidth = 2.*range/nbins;
+            amrex::Long count=0;
+            amrex::Long totbin=0;
+            for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0;
+
+            for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) {
+
+                const Box& bx = mfi.validbox();
+                const auto lo = amrex::lbound(bx);
+                const auto hi = amrex::ubound(bx);
+                
+                const Array4<Real>& sca = scalar.array(mfi);
+
+                for (auto k = lo.z; k <= hi.z; ++k) {
+                for (auto j = lo.y; j <= hi.y; ++j) {
+                for (auto i = lo.x; i <= hi.x; ++i) {
+
+                    int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth);
+                    index += halfbin;
+                    
+                    if( index >=0 && index <= nbins) {
+                        bins[index] += 1;
+                        totbin++;
+                    }
+
+                    count++;
+                        
+                }
+                }
+                }
+
+            } // end MFIter
+
+            ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1);
+            ParallelDescriptor::ReduceLongSum(count);
+            ParallelDescriptor::ReduceLongSum(totbin);
+            Print() << "Points outside of range "<< count - totbin << " " << 
+                       (double)(count-totbin)/count << std::endl;
+
+            // print out contents of bins to the screen
+            for (int i=0; i<nbins+1; ++i) {
+                Print() << "For scalar m = "<< m << " " <<  (i-halfbin)*binwidth << " " 
+                        << bins[i]/(count*binwidth) << std::endl;
+            }
+            if (ParallelDescriptor::IOProcessor()) {
+                std::ofstream outfile;
+                outfile.open(scalar_out[m]);
+                for (int i=0; i<nbins+1; ++i) {
+                    outfile << (i-halfbin)*binwidth << " " << bins[i]/(count*binwidth) << std::endl;
+                }
+                outfile.close();
+            }
+        }
+
+        // total vorticity PDF
+        {
+          Vector<Real> bins(nbins+1,0.);
+
+          int halfbin = nbins/2;
+          Real hbinwidth = range/nbins;
+          Real binwidth = 2.*range/nbins;
+          amrex::Long count=0;
+          amrex::Long totbin=0;
+          for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0;
+
+          for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) {
+
+              const Box& bx = mfi.validbox();
+              const auto lo = amrex::lbound(bx);
+              const auto hi = amrex::ubound(bx);
+              
+              const Array4<Real>& sca = scalar.array(mfi);
+
+              for (auto n = 1;    n < 4;     ++n) { 
+              for (auto k = lo.z; k <= hi.z; ++k) {
+              for (auto j = lo.y; j <= hi.y; ++j) {
+              for (auto i = lo.x; i <= hi.x; ++i) {
+
+                  int index = floor((sca(i,j,k,n) + hbinwidth)/binwidth);
+                  index += halfbin;
+                  
+                  if( index >=0 && index <= nbins) {
+                      bins[index] += 1;
+                      totbin++;
+                  }
+
+                  count++;
+                      
+              }
+              }
+              }
+              }
+
+          } // end MFIter
+
+          ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1);
+          ParallelDescriptor::ReduceLongSum(count);
+          ParallelDescriptor::ReduceLongSum(totbin);
+          Print() << "Points outside of range "<< count - totbin << " " << 
+                     (double)(count-totbin)/count << std::endl;
+
+          // print out contents of bins to the screen
+          for (int i=0; i<nbins+1; ++i) {
+              Print() << "For scalar m = "<< 4 << " " <<  (i-halfbin)*binwidth << " " 
+                      << bins[i]/(count*binwidth) << std::endl;
+          }
+          if (ParallelDescriptor::IOProcessor()) {
+              std::ofstream outfile;
+              outfile.open(scalar_out[4]);
+              for (int i=0; i<nbins+1; ++i) {
+                  outfile << (i-halfbin)*binwidth << " " << bins[i]/(count*binwidth) << std::endl;
+              }
+              outfile.close();
+          }
+        
+        }
+
+        // solenoidal  and dilataional velocity PDF
+        MultiFab vel_decomp(ba,dmap,6,0);
+
+        Copy(vel_decomp,mf,velx_sol_ind,0,1,0); // sol
+        Copy(vel_decomp,mf,vely_sol_ind,1,1,0); // sol
+        Copy(vel_decomp,mf,velz_sol_ind,2,1,0); // sol
+        Copy(vel_decomp,mf,velx_dil_ind,3,1,0); // dil
+        Copy(vel_decomp,mf,vely_dil_ind,4,1,0); // dil
+        Copy(vel_decomp,mf,velz_dil_ind,5,1,0); // dil
+
+        // compute spatial mean
+        Real mean_solx   = vel_decomp.sum(0) / (ncells);
+        Real mean_soly   = vel_decomp.sum(1) / (ncells);
+        Real mean_solz   = vel_decomp.sum(2) / (ncells);
+        Real mean_dilx   = vel_decomp.sum(3) / (ncells);
+        Real mean_dily   = vel_decomp.sum(4) / (ncells);
+        Real mean_dilz   = vel_decomp.sum(5) / (ncells);
+
+        // get fluctuations
+        vel_decomp.plus(-1.0*mean_solx,     0, 1);
+        vel_decomp.plus(-1.0*mean_soly,     1, 1);
+        vel_decomp.plus(-1.0*mean_solz,     2, 1);
+        vel_decomp.plus(-1.0*mean_dilx,     3, 1);
+        vel_decomp.plus(-1.0*mean_dily,     4, 1);
+        vel_decomp.plus(-1.0*mean_dilz,     5, 1);
+
+        // get rms
+        Real rms_solx   = vel_decomp.norm2(0) / sqrt(ncells);
+        Real rms_soly   = vel_decomp.norm2(1) / sqrt(ncells);
+        Real rms_solz   = vel_decomp.norm2(2) / sqrt(ncells);
+        Real rms_dilx   = vel_decomp.norm2(3) / sqrt(ncells);
+        Real rms_dily   = vel_decomp.norm2(4) / sqrt(ncells);
+        Real rms_dilz   = vel_decomp.norm2(5) / sqrt(ncells);
+
+        // scale by rms
+        vel_decomp.mult(1.0/rms_solx,   0, 1);
+        vel_decomp.mult(1.0/rms_soly,   1, 1);
+        vel_decomp.mult(1.0/rms_solz,   2, 1);
+        vel_decomp.mult(1.0/rms_dilx,   3, 1);
+        vel_decomp.mult(1.0/rms_dily,   4, 1);
+        vel_decomp.mult(1.0/rms_dilz,   5, 1);
+
+        // solenoidal 
+        {
+          Vector<Real> bins(nbins+1,0.);
+
+          int halfbin = nbins/2;
+          Real hbinwidth = range/nbins;
+          Real binwidth = 2.*range/nbins;
+          amrex::Long count=0;
+          amrex::Long totbin=0;
+          for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0;
+
+          for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) {
+
+              const Box& bx = mfi.validbox();
+              const auto lo = amrex::lbound(bx);
+              const auto hi = amrex::ubound(bx);
+              
+              const Array4<Real>& vel = vel_decomp.array(mfi);
+
+              for (auto n = 0;    n < 3;     ++n) { 
+              for (auto k = lo.z; k <= hi.z; ++k) {
+              for (auto j = lo.y; j <= hi.y; ++j) {
+              for (auto i = lo.x; i <= hi.x; ++i) {
+
+                  int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth);
+                  index += halfbin;
+                  
+                  if( index >=0 && index <= nbins) {
+                      bins[index] += 1;
+                      totbin++;
+                  }
+
+                  count++;
+                      
+              }
+              }
+              }
+              }
+
+          } // end MFIter
+
+          ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1);
+          ParallelDescriptor::ReduceLongSum(count);
+          ParallelDescriptor::ReduceLongSum(totbin);
+          Print() << "Points outside of range "<< count - totbin << " " << 
+                     (double)(count-totbin)/count << std::endl;
+
+          // print out contents of bins to the screen
+          for (int i=0; i<nbins+1; ++i) {
+              Print() << "For solenoid. vel. " <<  (i-halfbin)*binwidth << " " 
+                      << bins[i]/(count*binwidth) << std::endl;
+          }
+          if (ParallelDescriptor::IOProcessor()) {
+              std::ofstream outfile;
+              outfile.open(amrex::Concatenate("solenoidal_pdf",step,9));
+              for (int i=0; i<nbins+1; ++i) {
+                  outfile << (i-halfbin)*binwidth << " " << bins[i]/(count*binwidth) << std::endl;
+              }
+              outfile.close();
+          }
+        
+        }
+
+        // dilatational
+        {
+          Vector<Real> bins(nbins+1,0.);
+
+          int halfbin = nbins/2;
+          Real hbinwidth = range/nbins;
+          Real binwidth = 2.*range/nbins;
+          amrex::Long count=0;
+          amrex::Long totbin=0;
+          for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0;
+
+          for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) {
+
+              const Box& bx = mfi.validbox();
+              const auto lo = amrex::lbound(bx);
+              const auto hi = amrex::ubound(bx);
+              
+              const Array4<Real>& vel = vel_decomp.array(mfi);
+
+              for (auto n = 3;    n < 6;     ++n) { 
+              for (auto k = lo.z; k <= hi.z; ++k) {
+              for (auto j = lo.y; j <= hi.y; ++j) {
+              for (auto i = lo.x; i <= hi.x; ++i) {
+
+                  int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth);
+                  index += halfbin;
+                  
+                  if( index >=0 && index <= nbins) {
+                      bins[index] += 1;
+                      totbin++;
+                  }
+
+                  count++;
+                      
+              }
+              }
+              }
+              }
+
+          } // end MFIter
+
+          ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1);
+          ParallelDescriptor::ReduceLongSum(count);
+          ParallelDescriptor::ReduceLongSum(totbin);
+          Print() << "Points outside of range "<< count - totbin << " " << 
+                     (double)(count-totbin)/count << std::endl;
+
+          // print out contents of bins to the screen
+          for (int i=0; i<nbins+1; ++i) {
+              Print() << "For dilation. vel. " <<  (i-halfbin)*binwidth << " " 
+                      << bins[i]/(count*binwidth) << std::endl;
+          }
+          if (ParallelDescriptor::IOProcessor()) {
+              std::ofstream outfile;
+              outfile.open(amrex::Concatenate("dilatational_pdf",step,9));
+              for (int i=0; i<nbins+1; ++i) {
+                  outfile << (i-halfbin)*binwidth << " " << bins[i]/(count*binwidth) << std::endl;
+              }
+              outfile.close();
+          }
+        
+        }
+
+    }
+        
+    amrex::Finalize();
+
+}
+
+
diff --git a/exec/compressible_stag/TURB_PDFS/main_multisteps.cpp b/exec/compressible_stag/TURB_PDFS/main_multisteps.cpp
new file mode 100644
index 000000000..a5c9f35f2
--- /dev/null
+++ b/exec/compressible_stag/TURB_PDFS/main_multisteps.cpp
@@ -0,0 +1,526 @@
+#include <fstream>
+#include <iostream>
+
+#include <AMReX_ParmParse.H>
+#include <AMReX_PlotFileUtil.H>
+#include <AMReX_VisMF.H>
+
+using namespace amrex;
+using namespace std;
+
+static
+void
+PrintUsage (const char* progName)
+{
+    Print() << std::endl
+            << "This utility computes PDF of vorticity and divergence, and various thermodynamic scalars," << std::endl;
+
+    Print() << "Usage:" << '\n';
+    Print() << progName << " <inputs>" << std::endl
+            << "OR" << std::endl
+            << progName << std::endl
+            << " steps=<step numbers of plotfiles to be read>" << std::endl
+            << " nbins=<number of bins> " << std::endl
+            << " range=<lo/hi end of range> " << std::endl
+            << std::endl;
+
+    exit(1);
+}
+
+
+int main (int argc, char* argv[])
+{
+    amrex::Initialize(argc,argv);
+
+    {
+
+        if (argc == 1) {
+            PrintUsage(argv[0]);
+        }
+        
+        ParmParse pp;
+        
+	std::vector<int> steps;
+        pp.queryarr("steps",steps);
+	int nsteps = steps.size();
+	Print() << "number of steps to process: " << nsteps << std::endl;
+        
+        Vector<std::string> scalar_out(5);
+        scalar_out[0] = amrex::Concatenate("div_pdf_",steps[0],9);
+	scalar_out[0] = scalar_out[0] + "_";
+	scalar_out[0] = amrex::Concatenate(scalar_out[0],steps[nsteps-1],9);
+        scalar_out[1] = amrex::Concatenate("vortx_pdf_",steps[0],9);
+	scalar_out[1] = scalar_out[1] + "_";
+	scalar_out[1] = amrex::Concatenate(scalar_out[1],steps[nsteps-1],9);
+        scalar_out[2] = amrex::Concatenate("vorty_pdf_",steps[0],9);
+	scalar_out[2] = scalar_out[2] + "_";
+	scalar_out[2] = amrex::Concatenate(scalar_out[2],steps[nsteps-1],9);
+        scalar_out[3] = amrex::Concatenate("vortz_pdf_",steps[0],9);
+	scalar_out[3] = scalar_out[3] + "_";
+	scalar_out[3] = amrex::Concatenate(scalar_out[3],steps[nsteps-1],9);
+        scalar_out[4] = amrex::Concatenate("vort_pdf_",steps[0],9);
+	scalar_out[4] = scalar_out[4] + "_";
+	scalar_out[4] = amrex::Concatenate(scalar_out[4],steps[nsteps-1],9);
+               
+  int nbins;
+  pp.get("nbins", nbins);
+
+  Real range;
+  pp.get("range",range);
+  
+	Vector<Vector<Real> > bins;
+	Vector<amrex::Long> count(5,0);
+	Vector<amrex::Long> totbin(5,0);
+	for (int i=0; i<5; ++i) {
+	    bins.push_back(Vector<Real> (nbins+1,0.));
+	}
+	
+	int halfbin = nbins/2;
+  Real hbinwidth = range/nbins;
+  Real binwidth = 2.*range/nbins;
+
+	for (int step=0; step<nsteps; ++step) {
+		std::string iFile         = amrex::Concatenate("vel_grad_decomp",steps[step],9);
+		amrex::Print() << "Reading from vel_grad_decomp plotfile " << iFile << "\n";
+
+		// for the Header
+		std::string iFile2 = iFile;
+		iFile2 += "/Header";
+
+		// open header
+		ifstream x;
+		x.open(iFile2.c_str(), ios::in);
+
+		// read in first line of header (typically "HyperCLaw-V1.1" or similar)
+		std::string str;
+		x >> str;
+
+		// read in number of components from header
+		int ncomp;
+		x >> ncomp;
+
+		// read in variable names from header
+		int flag = 0;
+		int vort_ind, div_ind, velx_sol_ind, vely_sol_ind, velz_sol_ind, velx_dil_ind, vely_dil_ind, velz_dil_ind;
+		for (int n=0; n<ncomp; ++n) {
+		    x >> str;
+		    if (str == "vort") vort_ind = flag;
+		    if (str == "div")  div_ind = flag;
+		    if (str == "ux_s") velx_sol_ind = flag;
+		    if (str == "uy_s") vely_sol_ind = flag;
+		    if (str == "uz_s") velz_sol_ind = flag;
+		    flag ++;
+		}
+
+		// read in dimensionality from header
+		int dim;
+		x >> dim;
+
+		// read in time
+		Real time;
+		x >> time;
+
+		// read in finest level
+		int finest_level;
+		x >> finest_level;
+
+		// read in prob_lo and prob_hi
+		amrex::GpuArray<amrex::Real, 3> prob_lo, prob_hi;
+		for (int i=0; i<3; ++i) {
+		    x >> prob_lo[i];        
+		}
+		for (int i=0; i<3; ++i) {
+		    x >> prob_hi[i];        
+		}
+		
+		// now read in the plotfile data
+		// check to see whether the user pointed to the plotfile base directory
+		// or the data itself
+		if (amrex::FileExists(iFile+"/Level_0/Cell_H")) {
+		   iFile += "/Level_0/Cell";
+		}
+		if (amrex::FileExists(iFile+"/Level_00/Cell_H")) {
+		   iFile += "/Level_00/Cell";
+		}
+
+		// storage for the input coarse and fine MultiFabs
+		MultiFab mf;
+
+		// read in plotfile mf to MultiFab
+		VisMF::Read(mf, iFile);
+
+		// get BoxArray and DistributionMapping
+		BoxArray ba = mf.boxArray();
+		DistributionMapping dmap = mf.DistributionMap();
+
+		// physical dimensions of problem
+		RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])},
+				 {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])});
+
+		// single box with the enire domain
+		Box domain = ba.minimalBox().enclosedCells();
+
+		Real ncells = (double) domain.numPts();
+
+		// set to 1 (periodic)
+		Vector<int> is_periodic(3,1);
+		
+		Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data());
+		
+		const Real* dx = geom.CellSize();
+  
+		////////////////////////////////////////////////////////////////////////
+		////////////// velocity Laplacian PDFs///////////// ////////////////////
+		////////////////////////////////////////////////////////////////////////
+		MultiFab vel_grown(ba,dmap,6,1);
+		MultiFab vel_sol  (ba,dmap,3,1);
+
+		// copy shifted velocity components from mf into vel_grown
+		Copy(vel_grown,mf,velx_sol_ind,0,1,0); // sol
+		Copy(vel_grown,mf,vely_sol_ind,1,1,0); // sol
+		Copy(vel_grown,mf,velz_sol_ind,2,1,0); // sol
+		
+		Copy(vel_grown,mf,velx_dil_ind,3,1,0); // dil
+		Copy(vel_grown,mf,vely_dil_ind,4,1,0); // dil
+		Copy(vel_grown,mf,velz_dil_ind,5,1,0); // dil
+		
+		Copy(vel_sol,mf,velx_sol_ind,0,1,0); // sol
+		Copy(vel_sol,mf,vely_sol_ind,1,1,0); // sol
+		Copy(vel_sol,mf,velz_sol_ind,2,1,0); // sol
+
+		// fill ghost cells of vel_grown
+		vel_grown.FillBoundary(geom.periodicity());
+		vel_sol  .FillBoundary(geom.periodicity());
+
+		////////////////////////////////////////////////////////////////////////
+		///////////////////////// scalar  PDFs /////////////////////////////////
+		////////////////////////////////////////////////////////////////////////
+		MultiFab scalar(ba,dmap,4,0);    // vort_mag, div, vort_x, vort_y, vort_z
+		scalar.setVal(0.0);
+		Copy(scalar,mf,div_ind,0,1,0);
+
+		// Compute vorticity components and store in scalar
+		for ( MFIter mfi(vel_sol,false); mfi.isValid(); ++mfi ) {
+
+		    const Box& bx = mfi.validbox();
+		    const auto lo = amrex::lbound(bx);
+		    const auto hi = amrex::ubound(bx);
+
+		    Array4<Real const> const& sol  = vel_sol   .array(mfi);
+		    Array4<Real>       const& sca  = scalar    .array(mfi);
+
+		    for (auto k = lo.z; k <= hi.z; ++k) {
+		    for (auto j = lo.y; j <= hi.y; ++j) {
+		    for (auto i = lo.x; i <= hi.x; ++i) {
+			// dw/dy - dv/dz
+			sca(i,j,k,1) =
+			    (sol(i,j+1,k,velz_sol_ind) - sol(i,j-1,k,velz_sol_ind)) / (2.*dx[1]) -
+			    (sol(i,j,k+1,vely_sol_ind) - sol(i,j,k-1,vely_sol_ind)) / (2.*dx[2]);
+
+			// dv/dx - du/dy
+			sca(i,j,k,2) =
+			    (sol(i+1,j,k,vely_sol_ind) - sol(i-1,j,k,vely_sol_ind)) / (2.*dx[0]) -
+			    (sol(i,j+1,k,velx_sol_ind) - sol(i,j-1,k,velx_sol_ind)) / (2.*dx[1]);
+
+			// du/dz - dw/dx                
+			sca(i,j,k,3) =
+			    (sol(i,j,k+1,velx_sol_ind) - sol(i,j,k-1,velx_sol_ind)) / (2.*dx[2]) -
+			    (sol(i+1,j,k,velz_sol_ind) - sol(i-1,j,k,velz_sol_ind)) / (2.*dx[0]);
+
+		    }
+		    }
+		    }
+		}
+
+		// compute spatial mean
+		Real mean_div     = scalar.sum(0) / (ncells);
+		Real mean_vortx   = scalar.sum(1) / (ncells);
+		Real mean_vorty   = scalar.sum(2) / (ncells);
+		Real mean_vortz   = scalar.sum(3) / (ncells);
+
+		// get fluctuations
+		scalar.plus(-1.0*mean_div,     0, 1);
+		scalar.plus(-1.0*mean_vortx,   1, 1);
+		scalar.plus(-1.0*mean_vorty,   2, 1);
+		scalar.plus(-1.0*mean_vortz,   3, 1);
+
+		// get rms
+		Real rms_div     = scalar.norm2(0) / sqrt(ncells);
+		Real rms_vortx   = scalar.norm2(1) / sqrt(ncells);
+		Real rms_vorty   = scalar.norm2(2) / sqrt(ncells);
+		Real rms_vortz   = scalar.norm2(3) / sqrt(ncells);
+
+		// scale by rms
+		scalar.mult(1.0/rms_div,     0, 1);
+		scalar.mult(1.0/rms_vortx,   1, 1);
+		scalar.mult(1.0/rms_vorty,   2, 1);
+		scalar.mult(1.0/rms_vortz,   3, 1);
+
+		// ompute pdfs
+		for (int m = 0; m < 4; ++m) {
+
+		    for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) {
+
+			const Box& bx = mfi.validbox();
+			const auto lo = amrex::lbound(bx);
+			const auto hi = amrex::ubound(bx);
+			
+			const Array4<Real>& sca = scalar.array(mfi);
+
+			for (auto k = lo.z; k <= hi.z; ++k) {
+			for (auto j = lo.y; j <= hi.y; ++j) {
+			for (auto i = lo.x; i <= hi.x; ++i) {
+
+			    int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth);
+			    index += halfbin;
+			    
+			    if( index >=0 && index <= nbins) {
+				bins[m][index] += 1;
+				totbin[m]++;
+			    }
+
+			    count[m]++;
+				
+			}
+			}
+			}
+
+		    } // end MFIter
+		    ParallelDescriptor::ReduceRealSum(bins[m].dataPtr(),nbins+1);
+		    ParallelDescriptor::ReduceLongSum(count[m]);
+		    ParallelDescriptor::ReduceLongSum(totbin[m]);
+		
+		    Print() << "Points outside of range "<< count[m] - totbin[m] << " " << 
+			   (double)(count[m]-totbin[m])/count[m] << std::endl;
+		}
+		
+		// ompute pdfs vorticity
+		for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) {
+
+		    const Box& bx = mfi.validbox();
+		    const auto lo = amrex::lbound(bx);
+		    const auto hi = amrex::ubound(bx);
+		    
+		    const Array4<Real>& sca = scalar.array(mfi);
+
+		    for (auto n = 1;    n < 4;     ++n) { 
+		    for (auto k = lo.z; k <= hi.z; ++k) {
+		    for (auto j = lo.y; j <= hi.y; ++j) {
+		    for (auto i = lo.x; i <= hi.x; ++i) {
+
+			int index = floor((sca(i,j,k,n) + hbinwidth)/binwidth);
+			index += halfbin;
+			
+			if( index >=0 && index <= nbins) {
+			    bins[4][index] += 1;
+			    totbin[4]++;
+			}
+
+			count[4]++;
+			    
+		    }
+		    }
+		    }
+		    }
+
+		} // end MFIter
+		ParallelDescriptor::ReduceRealSum(bins[4].dataPtr(),nbins+1);
+		ParallelDescriptor::ReduceLongSum(count[4]);
+		ParallelDescriptor::ReduceLongSum(totbin[4]);
+        
+        	Print() << "Points outside of range "<< count[4] - totbin[4] << " " << 
+                   (double)(count[4]-totbin[4])/count[4] << std::endl;
+
+	} // end nsteps
+	
+	// print out contents of bins to the screen
+	for (int m=0; m<5; ++m) {
+		for (int i=0; i<nbins+1; ++i) {
+		    Print() << "For scalar m = "<< m << " " <<  (i-halfbin)*binwidth << " " 
+			    << bins[m][i]/(count[m]*binwidth) << std::endl;
+		}
+		if (ParallelDescriptor::IOProcessor()) {
+		    std::ofstream outfile;
+		    outfile.open(scalar_out[m]);
+		    for (int i=0; i<nbins+1; ++i) {
+			outfile << (i-halfbin)*binwidth << " " << bins[m][i]/(count[m]*binwidth) << std::endl;
+		    }
+		    outfile.close();
+		}
+	}
+//
+//        // solenoidal  and dilataional velocity PDF
+//        MultiFab vel_decomp(ba,dmap,6,0);
+//
+//        Copy(vel_decomp,mf,velx_sol_ind,0,1,0); // sol
+//        Copy(vel_decomp,mf,vely_sol_ind,1,1,0); // sol
+//        Copy(vel_decomp,mf,velz_sol_ind,2,1,0); // sol
+//        Copy(vel_decomp,mf,velx_dil_ind,3,1,0); // dil
+//        Copy(vel_decomp,mf,vely_dil_ind,4,1,0); // dil
+//        Copy(vel_decomp,mf,velz_dil_ind,5,1,0); // dil
+//
+//        // compute spatial mean
+//        Real mean_solx   = vel_decomp.sum(0) / (ncells);
+//        Real mean_soly   = vel_decomp.sum(1) / (ncells);
+//        Real mean_solz   = vel_decomp.sum(2) / (ncells);
+//        Real mean_dilx   = vel_decomp.sum(3) / (ncells);
+//        Real mean_dily   = vel_decomp.sum(4) / (ncells);
+//        Real mean_dilz   = vel_decomp.sum(5) / (ncells);
+//
+//        // get fluctuations
+//        vel_decomp.plus(-1.0*mean_solx,     0, 1);
+//        vel_decomp.plus(-1.0*mean_soly,     1, 1);
+//        vel_decomp.plus(-1.0*mean_solz,     2, 1);
+//        vel_decomp.plus(-1.0*mean_dilx,     3, 1);
+//        vel_decomp.plus(-1.0*mean_dily,     4, 1);
+//        vel_decomp.plus(-1.0*mean_dilz,     5, 1);
+//
+//        // get rms
+//        Real rms_solx   = vel_decomp.norm2(0) / sqrt(ncells);
+//        Real rms_soly   = vel_decomp.norm2(1) / sqrt(ncells);
+//        Real rms_solz   = vel_decomp.norm2(2) / sqrt(ncells);
+//        Real rms_dilx   = vel_decomp.norm2(3) / sqrt(ncells);
+//        Real rms_dily   = vel_decomp.norm2(4) / sqrt(ncells);
+//        Real rms_dilz   = vel_decomp.norm2(5) / sqrt(ncells);
+//
+//        // scale by rms
+//        vel_decomp.mult(1.0/rms_solx,   0, 1);
+//        vel_decomp.mult(1.0/rms_soly,   1, 1);
+//        vel_decomp.mult(1.0/rms_solz,   2, 1);
+//        vel_decomp.mult(1.0/rms_dilx,   3, 1);
+//        vel_decomp.mult(1.0/rms_dily,   4, 1);
+//        vel_decomp.mult(1.0/rms_dilz,   5, 1);
+//
+//        // solenoidal 
+//        {
+//          Vector<Real> bins(nbins+1,0.);
+//
+//          int halfbin = nbins/2;
+//          Real hbinwidth = range/nbins;
+//          Real binwidth = 2.*range/nbins;
+//          amrex::Long count=0;
+//          amrex::Long totbin=0;
+//          for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0;
+//
+//          for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) {
+//
+//              const Box& bx = mfi.validbox();
+//              const auto lo = amrex::lbound(bx);
+//              const auto hi = amrex::ubound(bx);
+//              
+//              const Array4<Real>& vel = vel_decomp.array(mfi);
+//
+//              for (auto n = 0;    n < 3;     ++n) { 
+//              for (auto k = lo.z; k <= hi.z; ++k) {
+//              for (auto j = lo.y; j <= hi.y; ++j) {
+//              for (auto i = lo.x; i <= hi.x; ++i) {
+//
+//                  int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth);
+//                  index += halfbin;
+//                  
+//                  if( index >=0 && index <= nbins) {
+//                      bins[index] += 1;
+//                      totbin++;
+//                  }
+//
+//                  count++;
+//                      
+//              }
+//              }
+//              }
+//              }
+//
+//          } // end MFIter
+//
+//          ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1);
+//          ParallelDescriptor::ReduceLongSum(count);
+//          ParallelDescriptor::ReduceLongSum(totbin);
+//          Print() << "Points outside of range "<< count - totbin << " " << 
+//                     (double)(count-totbin)/count << std::endl;
+//
+//          // print out contents of bins to the screen
+//          for (int i=0; i<nbins+1; ++i) {
+//              Print() << "For solenoid. vel. " <<  (i-halfbin)*binwidth << " " 
+//                      << bins[i]/(count*binwidth) << std::endl;
+//          }
+//          if (ParallelDescriptor::IOProcessor()) {
+//              std::ofstream outfile;
+//              outfile.open(amrex::Concatenate("solenoidal_pdf",step,9));
+//              for (int i=0; i<nbins+1; ++i) {
+//                  outfile << (i-halfbin)*binwidth << " " << bins[i]/(count*binwidth) << std::endl;
+//              }
+//              outfile.close();
+//          }
+//        
+//        }
+//
+//        // dilatational
+//        {
+//          Vector<Real> bins(nbins+1,0.);
+//
+//          int halfbin = nbins/2;
+//          Real hbinwidth = range/nbins;
+//          Real binwidth = 2.*range/nbins;
+//          amrex::Long count=0;
+//          amrex::Long totbin=0;
+//          for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0;
+//
+//          for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) {
+//
+//              const Box& bx = mfi.validbox();
+//              const auto lo = amrex::lbound(bx);
+//              const auto hi = amrex::ubound(bx);
+//              
+//              const Array4<Real>& vel = vel_decomp.array(mfi);
+//
+//              for (auto n = 3;    n < 6;     ++n) { 
+//              for (auto k = lo.z; k <= hi.z; ++k) {
+//              for (auto j = lo.y; j <= hi.y; ++j) {
+//              for (auto i = lo.x; i <= hi.x; ++i) {
+//
+//                  int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth);
+//                  index += halfbin;
+//                  
+//                  if( index >=0 && index <= nbins) {
+//                      bins[index] += 1;
+//                      totbin++;
+//                  }
+//
+//                  count++;
+//                      
+//              }
+//              }
+//              }
+//              }
+//
+//          } // end MFIter
+//
+//          ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1);
+//          ParallelDescriptor::ReduceLongSum(count);
+//          ParallelDescriptor::ReduceLongSum(totbin);
+//          Print() << "Points outside of range "<< count - totbin << " " << 
+//                     (double)(count-totbin)/count << std::endl;
+//
+//          // print out contents of bins to the screen
+//          for (int i=0; i<nbins+1; ++i) {
+//              Print() << "For dilation. vel. " <<  (i-halfbin)*binwidth << " " 
+//                      << bins[i]/(count*binwidth) << std::endl;
+//          }
+//          if (ParallelDescriptor::IOProcessor()) {
+//              std::ofstream outfile;
+//              outfile.open(amrex::Concatenate("dilatational_pdf",step,9));
+//              for (int i=0; i<nbins+1; ++i) {
+//                  outfile << (i-halfbin)*binwidth << " " << bins[i]/(count*binwidth) << std::endl;
+//              }
+//              outfile.close();
+//          }
+//        
+//        }
+
+    }
+        
+    amrex::Finalize();
+
+}
+
+
diff --git a/exec/compressible_stag/build_frontier.sh b/exec/compressible_stag/build_frontier.sh
index d89d820bf..a56f6ab31 100755
--- a/exec/compressible_stag/build_frontier.sh
+++ b/exec/compressible_stag/build_frontier.sh
@@ -1,10 +1,10 @@
 #!/usr/bin/bash
 
 ## load necessary modules 
-module load cmake/3.23.2
 module load craype-accel-amd-gfx90a
-module load rocm/5.2.0  # waiting for 5.6 for next bump
-module load cray-mpich
+module load amd-mixed
+#module load rocm/5.2.0  # waiting for 5.6 for next bump
+module load cray-mpich/8.1.23
 module load cce/15.0.0  # must be loaded after rocm
 
 # GPU-aware MPI
@@ -14,11 +14,14 @@ export MPICH_GPU_SUPPORT_ENABLED=1
 export AMREX_AMD_ARCH=gfx90a
 
 # compiler environment hints
-export CC=$(which hipcc)
-export CXX=$(which hipcc)
-export FC=$(which ftn)
-export CFLAGS="-I${ROCM_PATH}/include"
-export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed"
-export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa"
+##export CC=$(which hipcc)
+##export CXX=$(which hipcc)
+##export FC=$(which ftn)
+##export CFLAGS="-I${ROCM_PATH}/include"
+##export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed"
+##export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa"
+export LDFLAGS="-L${MPICH_DIR}/lib -lmpi ${CRAY_XPMEM_POST_LINK_OPTS} -lxpmem ${PE_MPICH_GTL_DIR_amd_gfx90a} ${PE_MPICH_GTL_LIBS_amd_gfx90a}"
+export CXXFLAGS="-I${MPICH_DIR}/include"
+export HIPFLAGS="--amdgpu-target=gfx90a"
 
-make -j18 USE_CUDA=FALSE USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2
+make -j10 USE_CUDA=FALSE USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_ROCFFT=TRUE USE_ASSERTION=TRUE 
diff --git a/exec/compressible_stag/build_frontier_101324.sh b/exec/compressible_stag/build_frontier_101324.sh
new file mode 100755
index 000000000..01c565fd7
--- /dev/null
+++ b/exec/compressible_stag/build_frontier_101324.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/bash
+
+## load necessary modules 
+module load rocm/5.7.1
+
+# GPU-aware MPI
+export MPICH_GPU_SUPPORT_ENABLED=1
+
+# optimize CUDA compilation for MI250X
+export AMREX_AMD_ARCH=gfx90a
+
+# compiler environment hints
+export CC=$(which hipcc)
+export CXX=$(which hipcc)
+export FC=$(which ftn)
+export CFLAGS="-I${ROCM_PATH}/include"
+export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed"
+export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa"
+
+make -j8 USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_FFT=TRUE USE_ASSERTION=TRUE
diff --git a/exec/compressible_stag/build_perlmutter.sh b/exec/compressible_stag/build_perlmutter.sh
new file mode 100755
index 000000000..2118c7059
--- /dev/null
+++ b/exec/compressible_stag/build_perlmutter.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/bash
+
+# required dependencies
+module load gpu
+module load PrgEnv-gnu
+module load craype
+module load craype-x86-milan
+module load craype-accel-nvidia80
+module load cudatoolkit
+module load cmake/3.24.3
+
+# necessary to use CUDA-Aware MPI and run a job
+export CRAY_ACCEL_TARGET=nvidia80
+
+# optimize CUDA compilation for A100
+export AMREX_CUDA_ARCH=8.0
+
+# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3)
+# note: the cc/CC/ftn wrappers below add those
+export CXXFLAGS="-march=znver3"
+export CFLAGS="-march=znver3"
+
+# compiler environment hints
+export CC=cc
+export CXX=CC
+export FC=ftn
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=CC
+
+make -j10 USE_CUDA=TRUE DO_TURB=TRUE MAX_SPEC=2 USE_HEFFTE_CUFFT=TRUE USE_ASSERTION=TRUE
diff --git a/exec/compressible_stag/build_perlmutter_101724.sh b/exec/compressible_stag/build_perlmutter_101724.sh
new file mode 100755
index 000000000..e2b81f938
--- /dev/null
+++ b/exec/compressible_stag/build_perlmutter_101724.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/bash
+
+# required dependencies
+module load cray-fftw
+module load cmake
+module load cudatoolkit
+
+module list
+
+# necessary to use CUDA-Aware MPI and run a job
+export CRAY_ACCEL_TARGET=nvidia80
+
+export MPICH_GPU_SUPPORT_ENABLED=1
+
+# optimize CUDA compilation for A100
+export AMREX_CUDA_ARCH=8.0
+
+# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3)
+# note: the cc/CC/ftn wrappers below add those
+export CXXFLAGS="-march=znver3"
+export CFLAGS="-march=znver3"
+
+# compiler environment hints
+export CC=cc
+export CXX=CC
+export FC=ftn
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=CC
+
+make -j10 USE_CUDA=TRUE DO_TURB=TRUE MAX_SPEC=2  USE_FFT=TRUE USE_ASSERTION=TRUE
diff --git a/exec/compressible_stag/test_MFsurfchem_COAr_eq/inputs_fhd_stag b/exec/compressible_stag/test_MFsurfchem_COAr_eq/inputs_fhd_stag
index 19d706270..3e5d36209 100644
--- a/exec/compressible_stag/test_MFsurfchem_COAr_eq/inputs_fhd_stag
+++ b/exec/compressible_stag/test_MFsurfchem_COAr_eq/inputs_fhd_stag
@@ -122,7 +122,7 @@ surf_site_num_dens = 1.027285e+15
 
 # adsorption rate = ads_rate_const * num_dens
 # desoprtion rate = des_rate
-ads_rate_const = 1.770226e-11
+ads_rate_const = 1.831671e+02
 des_rate = 3.702336e+07
 
 # e_beta = 0    # no additional energy update
diff --git a/exec/compressible_stag/test_MFsurfchem_COAr_eq/params_COAr_eq.py b/exec/compressible_stag/test_MFsurfchem_COAr_eq/params_COAr_eq.py
index c2550d8c3..a16240e1a 100644
--- a/exec/compressible_stag/test_MFsurfchem_COAr_eq/params_COAr_eq.py
+++ b/exec/compressible_stag/test_MFsurfchem_COAr_eq/params_COAr_eq.py
@@ -152,7 +152,7 @@
 
 sprob = 1.
 rads1 = sprob*rcol1
-kads1 = rads1/n1
+kads1 = rads1/p1
 
 kdes1 = rads1*math.exp((-delta_mu1+E_bind)*eV_cgs/(kB*temp))
 
@@ -162,7 +162,7 @@
 
 print("- sticking prob = %f" % sprob)
 print("- rads1 = %e (rate)" % rads1)
-print("- kads1 = rads1/n1 (rate const) = %e" % kads1)
+print("- kads1 = rads1/p1 (rate const) = %e" % kads1)
 
 print("- kdes1 = %e" % kdes1)
 
diff --git a/exec/compressible_stag/test_NO2_dimerization_neq_wall/entropy_check.py b/exec/compressible_stag/test_NO2_dimerization_neq_wall/entropy_check.py
index 99da07311..17650d620 100644
--- a/exec/compressible_stag/test_NO2_dimerization_neq_wall/entropy_check.py
+++ b/exec/compressible_stag/test_NO2_dimerization_neq_wall/entropy_check.py
@@ -80,7 +80,7 @@
     tmp1 = mu2face[k]*F2z[k]/Tface[k]
     term2[k] += (tmp2-tmp1)/dz
 
-# Q.gradT/T^2
+# (Q.gradT)/T^2
 gradTz = np.zeros(nz)
 for k in range(1,nz):
     gradTz[k] = (prim4[k]-prim4[k-1])/dz
@@ -107,7 +107,7 @@
     tmp2 = F2z[k+1]*gradmu2Tz[k+1]
     term4[k] += 0.5*(tmp1+tmp2)
 
-# sum muk*Mk*Omegak
+# (sum muk*Mk*Omegak)/T
 M1 = 46.0055
 M2 = 92.0110
 term5 = np.zeros(nz)
@@ -136,7 +136,7 @@
 for k in range(1,nz-1):
     term14[k] = (Qz[k+1]-Qz[k])/dz/prim4[k]
 
-# sum muk*(div(Fk)-Mk*Omegak)/T
+# (sum muk*(div(Fk)-Mk*Omegak))/T
 term15 = np.zeros(nz)
 for k in range(1,nz-1):
     tmp = (F1z[k+1]-F1z[k])/dz
@@ -148,5 +148,5 @@
 
 # output for final terms
 outfile3 = "res.entropy_check"
-np.savetxt(outfile3,np.column_stack((z_in,term1,term2,term3,term4,term5,term11,term12,term13,term14,term15)))
+np.savetxt(outfile3,np.column_stack((z_in,term1,term2,term3,term4,term5,term11,term12,term13,term14,term15,mu1,mu2,Omega1,Omega2)))
 print("** %s generated" % outfile3)
diff --git a/exec/compressible_stag/test_NO2_dimerization_neq_wall/zavg3.sh b/exec/compressible_stag/test_NO2_dimerization_neq_wall/zavg3.sh
index c5736afe4..0f3ed6522 100755
--- a/exec/compressible_stag/test_NO2_dimerization_neq_wall/zavg3.sh
+++ b/exec/compressible_stag/test_NO2_dimerization_neq_wall/zavg3.sh
@@ -13,3 +13,8 @@ fi
 $exec -p $pltfile -o res.zavg3 -v 10 rhoMean rhoEMean rhoYkMean_0 rhoYkMean_1 tMean pMean YkMean_0 YkMean_1 XkMean_0 XkMean_1
 
 python entropy_check.py
+
+echo "mv res.zavg3 res.zavg3_${pltfile}"
+mv res.zavg3 res.zavg3_${pltfile}
+echo "mv res.entropy_check res.entropy_check_${pltfile}"
+mv res.entropy_check res.entropy_check_${pltfile}
diff --git a/exec/hydro/GNUmakefile b/exec/hydro/GNUmakefile
index 1a81d9633..0f6b98117 100644
--- a/exec/hydro/GNUmakefile
+++ b/exec/hydro/GNUmakefile
@@ -8,7 +8,8 @@ USE_OMP   = FALSE
 USE_CUDA  = FALSE
 COMP      = gnu
 DIM       = 3
-MAX_SPEC      = 8
+MAX_SPEC  = 2
+USE_FFT   = TRUE
 
 TINY_PROFILE = FALSE
 
@@ -39,6 +40,7 @@ VPATH_LOCATIONS   += ../../src_common/
 INCLUDE_LOCATIONS += ../../src_common/
 
 include $(AMREX_HOME)/Src/Base/Make.package
+include $(AMREX_HOME)/Src/FFT/Make.package
 include $(AMREX_HOME)/Src/Boundary/Make.package
 include $(AMREX_HOME)/Src/LinearSolvers/MLMG/Make.package
 
diff --git a/exec/hydro/main_driver.cpp b/exec/hydro/main_driver.cpp
index 56145b88d..4fd0a5b16 100644
--- a/exec/hydro/main_driver.cpp
+++ b/exec/hydro/main_driver.cpp
@@ -13,7 +13,6 @@
 #include "gmres_functions.H"
 
 
-
 #include <AMReX_VisMF.H>
 #include <AMReX_PlotFileUtil.H>
 #include <AMReX_ParallelDescriptor.H>
@@ -351,7 +350,6 @@ void main_driver(const char* argv)
     ///////////////////////////////////////////
 
     StructFact structFactFlattened;
-    MultiFab FlattenedRotMaster;
 
     Geometry geom_flat;
 
@@ -362,80 +360,33 @@ void main_driver(const char* argv)
       // a built version of Flattened so can obtain what we need to build the
       // structure factor and geometry objects for flattened data
       if (slicepoint < 0) {
-          ComputeVerticalAverage(structFactMF, Flattened, geom, project_dir, 0, 1);
+          ComputeVerticalAverage(structFactMF, Flattened, project_dir, 0, 1);
       } else {
-          ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, 1);
+          ExtractSlice(structFactMF, Flattened, project_dir, slicepoint, 0, 1);
       }
-      // we rotate this flattened MultiFab to have normal in the z-direction since
-      // our structure factor class assumes this for flattened
-      MultiFab FlattenedRot = RotateFlattenedMF(Flattened);
-      BoxArray ba_flat = FlattenedRot.boxArray();
-      const DistributionMapping& dmap_flat = FlattenedRot.DistributionMap();
-      FlattenedRotMaster.define(ba_flat,dmap_flat,structVars,0);
+      BoxArray ba_flat = Flattened.boxArray();
+      const DistributionMapping& dmap_flat = Flattened.DistributionMap();
       {
-        IntVect dom_lo(AMREX_D_DECL(0,0,0));
-        IntVect dom_hi;
-
-        // yes you could simplify this code but for now
-        // these are written out fully to better understand what is happening
-        // we wanted dom_hi[AMREX_SPACEDIM-1] to be equal to 0
-        // and need to transmute the other indices depending on project_dir
-#if (AMREX_SPACEDIM == 2)
-        if (project_dir == 0) {
-            dom_hi[0] = n_cells[1]-1;
-        }
-        else if (project_dir == 1) {
-            dom_hi[0] = n_cells[0]-1;
-        }
-        dom_hi[1] = 0;
-#elif (AMREX_SPACEDIM == 3)
-        if (project_dir == 0) {
-            dom_hi[0] = n_cells[1]-1;
-            dom_hi[1] = n_cells[2]-1;
-        } else if (project_dir == 1) {
-            dom_hi[0] = n_cells[0]-1;
-            dom_hi[1] = n_cells[2]-1;
-        } else if (project_dir == 2) {
-            dom_hi[0] = n_cells[0]-1;
-            dom_hi[1] = n_cells[1]-1;
-        }
-        dom_hi[2] = 0;
-#endif
-        Box domain(dom_lo, dom_hi);
-
+        Box domain_flat = ba_flat.minimalBox();
+        
         // This defines the physical box
+        // we retain prob_lo and prob_hi in all directions except project_dir,
+        // where the physical size is 0 to dx[project_dir]
+        Vector<Real> projected_lo(AMREX_SPACEDIM);
         Vector<Real> projected_hi(AMREX_SPACEDIM);
 
-        // yes you could simplify this code but for now
-        // these are written out fully to better understand what is happening
-        // we wanted projected_hi[AMREX_SPACEDIM-1] to be equal to dx[projected_dir]
-        // and need to transmute the other indices depending on project_dir
-#if (AMREX_SPACEDIM == 2)
-        if (project_dir == 0) {
-            projected_hi[0] = prob_hi[1];
-        } else if (project_dir == 1) {
-            projected_hi[0] = prob_hi[0];
+        for (int d=0; d<AMREX_SPACEDIM; ++d) {
+            projected_lo[d] = prob_lo[d];
+            projected_hi[d] = prob_hi[d];
         }
-        projected_hi[1] = prob_hi[project_dir] / n_cells[project_dir];
-#elif (AMREX_SPACEDIM == 3)
-        if (project_dir == 0) {
-            projected_hi[0] = prob_hi[1];
-            projected_hi[1] = prob_hi[2];
-        } else if (project_dir == 1) {
-            projected_hi[0] = prob_hi[0];
-            projected_hi[1] = prob_hi[2];
-        } else if (project_dir == 2) {
-            projected_hi[0] = prob_hi[0];
-            projected_hi[1] = prob_hi[1];
-        }
-        projected_hi[2] = prob_hi[project_dir] / n_cells[project_dir];
-#endif
+        projected_lo[project_dir] = 0.;
+        projected_hi[project_dir] = (prob_hi[project_dir] - prob_lo[project_dir]) / n_cells[project_dir];
 
-        RealBox real_box({AMREX_D_DECL(     prob_lo[0],     prob_lo[1],     prob_lo[2])},
-                         {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])});
+        RealBox real_box_flat({AMREX_D_DECL(projected_lo[0],projected_lo[1],projected_lo[2])},
+                              {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])});
         
         // This defines a Geometry object
-        geom_flat.define(domain,&real_box,CoordSys::cartesian,is_periodic.data());
+        geom_flat.define(domain_flat,&real_box_flat,CoordSys::cartesian,is_periodic.data());
       }
 
       structFactFlattened.define(ba_flat,dmap_flat,var_names,var_scaling);
@@ -477,19 +428,15 @@ void main_driver(const char* argv)
             for(int d=0; d<AMREX_SPACEDIM; d++) {
                 ShiftFaceToCC(umac[d], 0, structFactMF, d, 1);
             }
-            structFact.FortStructure(structFactMF,geom);
+            structFact.FortStructure(structFactMF);
             if(project_dir >= 0) {
                 MultiFab Flattened;  // flattened multifab defined below
                 if (slicepoint < 0) {
-                    ComputeVerticalAverage(structFactMF, Flattened, geom, project_dir, 0, structVars);
+                    ComputeVerticalAverage(structFactMF, Flattened, project_dir, 0, structVars);
                 } else {
-                    ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, structVars);
+                    ExtractSlice(structFactMF, Flattened, project_dir, slicepoint, 0, structVars);
                 }
-                // we rotate this flattened MultiFab to have normal in the z-direction since
-                // our structure factor class assumes this for flattened
-                MultiFab FlattenedRot = RotateFlattenedMF(Flattened);
-                FlattenedRotMaster.ParallelCopy(FlattenedRot,0,0,structVars);
-                structFactFlattened.FortStructure(FlattenedRotMaster,geom_flat);
+                structFactFlattened.FortStructure(Flattened);
             }
         }
 
@@ -575,18 +522,15 @@ void main_driver(const char* argv)
             for(int d=0; d<AMREX_SPACEDIM; d++) {
                 ShiftFaceToCC(umac[d], 0, structFactMF, d, 1);
             }
-            structFact.FortStructure(structFactMF,geom);
+            structFact.FortStructure(structFactMF);
             if(project_dir >= 0) {
                 MultiFab Flattened;  // flattened multifab defined below
                 if (slicepoint < 0) {
-                    ComputeVerticalAverage(structFactMF, Flattened, geom, project_dir, 0, structVars);
+                    ComputeVerticalAverage(structFactMF, Flattened, project_dir, 0, structVars);
                 } else {
-                    ExtractSlice(structFactMF, Flattened, geom, project_dir, slicepoint, 0, structVars);
+                    ExtractSlice(structFactMF, Flattened, project_dir, slicepoint, 0, structVars);
                 }
-                // we rotate this flattened MultiFab to have normal in the z-direction since
-                // our structure factor class assumes this for flattened
-                MultiFab FlattenedRot = RotateFlattenedMF(Flattened);
-                structFactFlattened.FortStructure(FlattenedRot,geom_flat);
+                structFactFlattened.FortStructure(Flattened);
             }
         }
                 
@@ -617,11 +561,11 @@ void main_driver(const char* argv)
                     ShiftFaceToCC(umac[d], 0, structFactMF, d, 1);
                 }
                 // reset and compute structure factor
-                turbStructFact.FortStructure(structFactMF,geom,1);
-                turbStructFact.CallFinalize(geom);
+                turbStructFact.FortStructure(structFactMF,1);
+                turbStructFact.CallFinalize();
 
                 // integrate cov_mag over shells in k and write to file
-                turbStructFact.IntegratekShells(step,geom);
+                turbStructFact.IntegratekShells(step);
             }
         }
 
diff --git a/exec/immersedIons/GNUmakefile b/exec/immersedIons/GNUmakefile
index c2ea936ae..709882051 100644
--- a/exec/immersedIons/GNUmakefile
+++ b/exec/immersedIons/GNUmakefile
@@ -12,6 +12,7 @@ COMP      = gnu
 DIM       = 3
 DSMC      = FALSE
 MAX_SPEC      = 8
+USE_FFT   = TRUE
 
 TINY_PROFILE  = FALSE
 USE_PARTICLES = TRUE
diff --git a/exec/immersedIons/main_driver.cpp b/exec/immersedIons/main_driver.cpp
index 51dd39c8f..14880270f 100644
--- a/exec/immersedIons/main_driver.cpp
+++ b/exec/immersedIons/main_driver.cpp
@@ -1579,13 +1579,13 @@ void main_driver(const char* argv)
 
             // charge
             MultiFab::Copy(struct_cc_charge, charge, 0, 0, nvar_sf_charge, 0);
-            structFact_charge.FortStructure(struct_cc_charge,geomP);
+            structFact_charge.FortStructure(struct_cc_charge);
 
             // velocity
             for (int d=0; d<AMREX_SPACEDIM; ++d) {
                 ShiftFaceToCC(umac[d],0,struct_cc_vel,d,1);
             }
-            structFact_vel.FortStructure(struct_cc_vel,geom);
+            structFact_vel.FortStructure(struct_cc_vel);
             
             // plot structure factor on plot_int
             if (istep%plot_int == 0) {
diff --git a/exec/immersed_boundary/cell_body/main_driver.cpp b/exec/immersed_boundary/cell_body/main_driver.cpp
index 8da994e0e..3b72c0902 100644
--- a/exec/immersed_boundary/cell_body/main_driver.cpp
+++ b/exec/immersed_boundary/cell_body/main_driver.cpp
@@ -784,7 +784,7 @@ void main_driver(const char * argv) {
         //     for(int d=0; d<AMREX_SPACEDIM; d++) {
         //         ShiftFaceToCC(umac[d], 0, struct_in_cc, d, 1);
         //     }
-        //     structFact.FortStructure(struct_in_cc,geom);
+        //     structFact.FortStructure(struct_in_cc);
         //
         // }
 
diff --git a/exec/immersed_boundary/channel_dumbbell/main_driver.cpp b/exec/immersed_boundary/channel_dumbbell/main_driver.cpp
index e1bd8de3a..b9b4d724c 100644
--- a/exec/immersed_boundary/channel_dumbbell/main_driver.cpp
+++ b/exec/immersed_boundary/channel_dumbbell/main_driver.cpp
@@ -436,7 +436,7 @@ void main_driver(const char * argv) {
             for(int d=0; d<AMREX_SPACEDIM; d++) {
                 ShiftFaceToCC(umac[d], 0, struct_in_cc, d, 1);
             }
-            // structFact.FortStructure(struct_in_cc,geom);
+            // structFact.FortStructure(struct_in_cc);
         }
 
         Real step_stop_time = ParallelDescriptor::second() - step_strt_time;
diff --git a/exec/immersed_boundary/channel_multiblob/main_driver.cpp b/exec/immersed_boundary/channel_multiblob/main_driver.cpp
index 377f8be19..8c7ddb4d5 100644
--- a/exec/immersed_boundary/channel_multiblob/main_driver.cpp
+++ b/exec/immersed_boundary/channel_multiblob/main_driver.cpp
@@ -511,7 +511,7 @@ void main_driver(const char * argv) {
             for(int d=0; d<AMREX_SPACEDIM; d++) {
                 ShiftFaceToCC(umac[d], 0, struct_in_cc, d, 1);
             }
-            // structFact.FortStructure(struct_in_cc,geom);
+            // structFact.FortStructure(struct_in_cc);
         }
 
         Real step_stop_time = ParallelDescriptor::second() - step_strt_time;
diff --git a/exec/immersed_boundary/channel_rigid/main_driver.cpp b/exec/immersed_boundary/channel_rigid/main_driver.cpp
index 298676c3d..a05f45ec5 100644
--- a/exec/immersed_boundary/channel_rigid/main_driver.cpp
+++ b/exec/immersed_boundary/channel_rigid/main_driver.cpp
@@ -617,7 +617,7 @@ void main_driver(const char * argv) {
             for(int d=0; d<AMREX_SPACEDIM; d++) {
                 ShiftFaceToCC(umac[d], 0, struct_in_cc, d, 1);
             }
-            structFact.FortStructure(struct_in_cc,geom);
+            structFact.FortStructure(struct_in_cc);
         }
 
         Real step_stop_time = ParallelDescriptor::second() - step_strt_time;
diff --git a/exec/immersed_boundary/flagellum/main_driver.cpp b/exec/immersed_boundary/flagellum/main_driver.cpp
index 970f1c644..ff278d130 100644
--- a/exec/immersed_boundary/flagellum/main_driver.cpp
+++ b/exec/immersed_boundary/flagellum/main_driver.cpp
@@ -653,7 +653,7 @@ void main_driver(const char * argv) {
         //     for(int d=0; d<AMREX_SPACEDIM; d++) {
         //         ShiftFaceToCC(umac[d], 0, struct_in_cc, d, 1);
         //     }
-        //     structFact.FortStructure(struct_in_cc,geom);
+        //     structFact.FortStructure(struct_in_cc);
         //
         // }
 
diff --git a/exec/immersed_boundary/taylor_line/main_driver.cpp b/exec/immersed_boundary/taylor_line/main_driver.cpp
index a72a4db82..81dd17840 100644
--- a/exec/immersed_boundary/taylor_line/main_driver.cpp
+++ b/exec/immersed_boundary/taylor_line/main_driver.cpp
@@ -647,7 +647,7 @@ void main_driver(const char * argv) {
         //     for(int d=0; d<AMREX_SPACEDIM; d++) {
         //         ShiftFaceToCC(umac[d], 0, struct_in_cc, d, 1);
         //     }
-        //     structFact.FortStructure(struct_in_cc,geom);
+        //     structFact.FortStructure(struct_in_cc);
         //
         // }
 
diff --git a/exec/multispec/AdvanceTimestepBousq.cpp b/exec/multispec/AdvanceTimestepBousq.cpp
index 8d9f3931c..bed33cd1e 100644
--- a/exec/multispec/AdvanceTimestepBousq.cpp
+++ b/exec/multispec/AdvanceTimestepBousq.cpp
@@ -273,7 +273,7 @@ void AdvanceTimestepBousq(std::array< MultiFab, AMREX_SPACEDIM >& umac,
         // set normal velocity of physical domain boundaries
         MultiFabPhysBCDomainVel(umac[i],geom,i);
         // set transverse velocity behind physical boundaries
-        int is_inhomogeneous = 1;
+        int is_inhomogeneous = 0;
         MultiFabPhysBCMacVel(umac[i],geom,i,is_inhomogeneous);
         // fill periodic and interior ghost cells
         umac[i].FillBoundary(geom.periodicity());
@@ -707,7 +707,7 @@ void AdvanceTimestepBousq(std::array< MultiFab, AMREX_SPACEDIM >& umac,
         // set normal velocity of physical domain boundaries
         MultiFabPhysBCDomainVel(umac[i],geom,i);
         // set transverse velocity behind physical boundaries
-        int is_inhomogeneous = 1;
+        int is_inhomogeneous = 0;
         MultiFabPhysBCMacVel(umac[i],geom,i,is_inhomogeneous);
         // fill periodic and interior ghost cells
         umac[i].FillBoundary(geom.periodicity());
diff --git a/exec/multispec/AdvanceTimestepInertial.cpp b/exec/multispec/AdvanceTimestepInertial.cpp
index bbfbc263b..b89d13739 100644
--- a/exec/multispec/AdvanceTimestepInertial.cpp
+++ b/exec/multispec/AdvanceTimestepInertial.cpp
@@ -309,7 +309,7 @@ void AdvanceTimestepInertial(std::array< MultiFab, AMREX_SPACEDIM >& umac,
         // set normal velocity of physical domain boundaries
         MultiFabPhysBCDomainVel(umac[i],geom,i);
         // set transverse velocity behind physical boundaries
-        int is_inhomogeneous = 1;
+        int is_inhomogeneous = 0;
         MultiFabPhysBCMacVel(umac[i],geom,i,is_inhomogeneous);
         // fill periodic and interior ghost cells
         umac[i].FillBoundary(geom.periodicity());
@@ -598,7 +598,7 @@ void AdvanceTimestepInertial(std::array< MultiFab, AMREX_SPACEDIM >& umac,
         // set normal velocity of physical domain boundaries
         MultiFabPhysBCDomainVel(umac[i],geom,i);
         // set transverse velocity behind physical boundaries
-        int is_inhomogeneous = 1;
+        int is_inhomogeneous = 0;
         MultiFabPhysBCMacVel(umac[i],geom,i,is_inhomogeneous);
         // fill periodic and interior ghost cells
         umac[i].FillBoundary(geom.periodicity());
diff --git a/exec/multispec/GNUmakefile b/exec/multispec/GNUmakefile
index ba60c98d5..5cc88aa1a 100644
--- a/exec/multispec/GNUmakefile
+++ b/exec/multispec/GNUmakefile
@@ -9,9 +9,10 @@ USE_CUDA  = FALSE
 COMP      = gnu
 DIM       = 3
 DSMC      = FALSE
-MAX_SPEC      = 8
+MAX_SPEC  = 8
+USE_FFT   = TRUE
 # MAX_ELEM needs to be MAX_SPEC*(MAX_SPEC-1)/2
-MAX_ELEM      = 28
+MAX_ELEM  = 28
 
 TINY_PROFILE = FALSE
 USE_PARTICLES = FALSE
diff --git a/exec/multispec/main_driver.cpp b/exec/multispec/main_driver.cpp
index 60ee3f385..f614d142a 100644
--- a/exec/multispec/main_driver.cpp
+++ b/exec/multispec/main_driver.cpp
@@ -501,7 +501,7 @@ void main_driver(const char* argv)
             for(int d=0; d<nspecies; d++) {
                 MultiFab::Divide(structFactMF,rhotot_old,0,AMREX_SPACEDIM+d+1,1,0);
             }
-            structFact.FortStructure(structFactMF,geom);
+            structFact.FortStructure(structFactMF);
         }
         
         // write initial plotfile and structure factor
@@ -578,7 +578,7 @@ void main_driver(const char* argv)
             for(int d=0; d<nspecies; d++) {
                 MultiFab::Divide(structFactMF,rhotot_new,0,AMREX_SPACEDIM+d+1,1,0);
             }
-            structFact.FortStructure(structFactMF,geom);
+            structFact.FortStructure(structFactMF);
         }
 	
 
diff --git a/exec/phononDSMC/main_driver.cpp b/exec/phononDSMC/main_driver.cpp
index ff31f5ba9..89959e6b6 100644
--- a/exec/phononDSMC/main_driver.cpp
+++ b/exec/phononDSMC/main_driver.cpp
@@ -33,6 +33,8 @@ void main_driver(const char* argv)
 	Real time = 0.;
 	int statsCount = 1;
 	
+	iMultiFab bCell;
+	
 	MultiFab cuInst, cuMeans, cuVars;
 	
 	if (seed > 0)
@@ -99,6 +101,8 @@ void main_driver(const char* argv)
 		cuInst.define(ba, dmap, ncon, 0); cuInst.setVal(0.);
 		cuMeans.define(ba, dmap, ncon, 0); cuMeans.setVal(0.);
 		cuVars.define(ba,dmap, ncon, 0); cuVars.setVal(0.);
+		
+    	bCell.define(ba, dmap, 2, 1); bCell.setVal(0);
 
 
 	}
diff --git a/exec/phononDSMC/test_inputs/input_test b/exec/phononDSMC/test_inputs/input_test
index 03ef991e2..8913daab1 100644
--- a/exec/phononDSMC/test_inputs/input_test
+++ b/exec/phononDSMC/test_inputs/input_test
@@ -1,13 +1,13 @@
  # Problem specification
-  prob_lo = 0.0 0.0 0.0     			 	# physical lo coordinate
-  prob_hi = 0.08 0.08 0.01	# physical hi coordinate (cm)
+  prob_lo = -0.000597375 -0.00020999999999999998 0.0     			 	# physical lo coordinate
+  prob_hi = 0.000597375 0.00020999999999999998 145e-7	# physical hi coordinate (cm)
 
-  n_cells = 80 80 10 # keep as powers of two
-  max_grid_size = 80 80 10
-  max_particle_tile_size = 256 256 256
+  n_cells = 80 80 4 # keep as powers of two
+  max_grid_size = 20 20 4
+  max_particle_tile_size = 256 256 4
 
   # Time-step control
-  fixed_dt = 1e-9
+  fixed_dt = 2e-12
 
   # Controls for number of steps between actions
   max_step =     10000
diff --git a/exec/reactDiff/GNUmakefile b/exec/reactDiff/GNUmakefile
new file mode 100644
index 000000000..916832401
--- /dev/null
+++ b/exec/reactDiff/GNUmakefile
@@ -0,0 +1,64 @@
+# AMREX_HOME defines the directory in which we will find all the AMReX code.
+# If you set AMREX_HOME as an environment variable, this line will be ignored
+AMREX_HOME ?= ../../../amrex/
+
+DEBUG     = FALSE
+USE_MPI   = TRUE
+USE_OMP   = FALSE
+USE_CUDA  = FALSE
+COMP      = gnu
+DIM       = 2
+MAX_SPEC  = 8
+MAX_REAC  = 7
+
+TINY_PROFILE = FALSE
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+# add this back on if we add any local files
+#include ./Make.package
+#VPATH_LOCATIONS   += .
+#INCLUDE_LOCATIONS += .
+
+include ../../src_reactDiff/Make.package
+VPATH_LOCATIONS   += ../../src_reactDiff/
+INCLUDE_LOCATIONS += ../../src_reactDiff/
+
+include ../../src_chemistry/Make.package
+VPATH_LOCATIONS   += ../../src_chemistry/
+INCLUDE_LOCATIONS += ../../src_chemistry/
+
+include ../../src_analysis/Make.package
+VPATH_LOCATIONS   += ../../src_analysis/
+INCLUDE_LOCATIONS += ../../src_analysis/
+
+include ../../src_rng/Make.package
+VPATH_LOCATIONS   += ../../src_rng/
+INCLUDE_LOCATIONS += ../../src_rng/
+
+include ../../src_common/Make.package
+VPATH_LOCATIONS   += ../../src_common/
+INCLUDE_LOCATIONS += ../../src_common/
+
+include $(AMREX_HOME)/Src/Base/Make.package
+include $(AMREX_HOME)/Src/Boundary/Make.package
+include $(AMREX_HOME)/Src/LinearSolvers/MLMG/Make.package
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
+
+ifeq ($(findstring cgpu, $(HOST)), cgpu)
+  CXXFLAGS += $(FFTW)
+endif
+
+ifeq ($(USE_CUDA),TRUE)
+  LIBRARIES += -lcufft
+else
+  LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3
+endif
+
+MAXSPECIES := $(strip $(MAX_SPEC))
+DEFINES += -DMAX_SPECIES=$(MAXSPECIES)
+
+MAXREACTION := $(strip $(MAX_REAC))
+DEFINES += -DMAX_REACTION=$(MAXREACTION)
+
diff --git a/exec/reactDiff/inputs_paper_BPM_2d b/exec/reactDiff/inputs_paper_BPM_2d
new file mode 100644
index 000000000..d04c29768
--- /dev/null
+++ b/exec/reactDiff/inputs_paper_BPM_2d
@@ -0,0 +1,121 @@
+# This inputs file is used for generating
+# - Figures 5, 6, 7 (Section V.B)
+# in Paper by C. Kim et al. "Stochastic simulation of reaction-diffusion
+#  systems: A fluctuating-hydrodynamics approach"
+#  J. Chem. Phys. 146, 124110 (2017).
+# You can change some relevant parameters such as
+# - cell_depth
+# - n_cells (64^2 or 256^2) and max_grid_size
+# - fixed_dt, max_step
+# - plot_int (plot files)
+# - temporal_integrator, reaction_type (numerical scheme)
+# and run this inputs file.
+
+# Problem specification
+prob_lo =  0.0  0.0       # physical lo coordinate
+prob_hi = 32.0 32.0       # physical hi coordinate
+
+# number of cells in domain and maximum number of cells in a box
+n_cells = 64 64
+max_grid_size = 16 16
+
+# to compute cell volume in 2D problems
+cell_depth = 1.
+
+# Time-step control
+fixed_dt = 0.01
+
+# Controls for number of steps between actions
+max_step = 2000000
+plot_int = 20000
+struct_fact_int = -1
+n_steps_skip = 2000
+
+seed = 1
+
+nspecies = 3
+nreaction = 7
+
+prob_type = 0
+
+n_init_in_1 = 1685.8 533.5 56.38 # Start on the limit cycle
+
+integer_populations = 1
+
+# 0=D+R (first-order splitting)
+# 1=(1/2)R + D + (1/2)R (Strang option 1)
+# 2=(1/2)D + R + (1/2)D (Strang option 2)
+# -1=unsplit forward Euler
+# -2=unsplit explicit midpoint 
+# -3=unsplit multinomial diffusion
+# -4=unsplit implicit midpoint
+temporal_integrator = 1
+
+# only used for split schemes (temporal_integrator>=0)
+# 0=explicit trapezoidal predictor/corrector
+# 1=Crank-Nicolson semi-implicit
+# 2=explicit midpoint
+# 3=multinomial diffusion
+# 4=forward Euler  
+reactDiff_diffusion_type = 3
+
+# Fickian diffusion coeffs
+D_Fick = 0.1 0.01 0.01
+
+variance_coef_mass = 1.
+initial_variance_mass = 1.
+
+# how to compute n on faces for stochastic weighting
+# 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic
+# 10=arithmetic average with discontinuous Heaviside function
+# 11=arithmetic average with C1-smoothed Heaviside function
+# 12=arithmetic average with C2-smoothed Heaviside function
+avg_type = 1
+
+# only used for split schemes (temporal_integrator>=0)
+# 0=first-order (deterministic, tau leaping, CLE, or SSA)
+# 1=second-order (determinisitc, tau leaping, or CLE only)
+reactDiff_reaction_type = 0
+
+# 0=deterministic; 1=CLE; 2=SSA; 3=tau leap
+reaction_type = 2
+
+# BPM model is:
+#     (1) U + W --> V + W
+#     (2) V + V --> W
+#     (3) W     --> V + V
+#     (4) V     --> 0
+#     (5) 0     --> V
+#     (6) U     --> 0
+#     (7) 0     --> U
+stoich_1R = 1 0 1
+stoich_1P = 0 1 1
+stoich_2R = 0 2 0
+stoich_2P = 0 0 1
+stoich_3R = 0 0 1
+stoich_3P = 0 2 0
+stoich_4R = 0 1 0
+stoich_4P = 0 0 0
+stoich_5R = 0 0 0
+stoich_5P = 0 1 0
+stoich_6R = 1 0 0
+stoich_6P = 0 0 0
+stoich_7R = 0 0 0
+stoich_7P = 1 0 0
+
+# reaction rate constant for each reaction (assuming Law of Mass Action holds)
+# using rate_multiplier, reaction rates can be changed by the same factor
+# if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become
+# n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). 
+rate_const = 0.0002 0.0002 1. 0.03666663 4.44444555555 0.00333333 16.66665
+rate_multiplier = 1.
+include_discrete_LMA_correction = 1
+
+# Boundary conditions
+# ----------------------
+# BC specifications:
+# -1 = periodic
+#  1 = wall (Neumann)
+#  2 = reservoir (Dirichlet)
+bc_mass_lo = -1 -1
+bc_mass_hi = -1 -1
diff --git a/exec/reactDiff/inputs_paper_Lemarchand_3d b/exec/reactDiff/inputs_paper_Lemarchand_3d
new file mode 100644
index 000000000..0d8bec99d
--- /dev/null
+++ b/exec/reactDiff/inputs_paper_Lemarchand_3d
@@ -0,0 +1,109 @@
+# This inputs file is used for generating
+# - Figure 8 (Section V.C)
+# in Paper by C. Kim et al. "Stochastic simulation of reaction-diffusion
+#  systems: A fluctuating-hydrodynamics approach"
+#  J. Chem. Phys. 146, 124110 (2017).
+# You can change some relevant parameters such as
+# - initial_variance_mass: 0 (smooth initial condition) 1 (with fluctuations):
+# - variance_coef_mass: 0 (deterministic diffusion) 1 (stochastic)
+# - reaction_type: 0=deterministic; 1=CLE; 2=SSA; 3=tau leap
+# and run this inputs file.
+
+# Problem specification
+prob_lo =   0.0   0.0   0.0      # physical lo coordinate
+prob_hi = 512.0 512.0 512.0      # physical hi coordinate
+
+# number of cells in domain and maximum number of cells in a box
+n_cells = 256 256 256
+max_grid_size = 256 256 256
+
+# volume scale factor in 3D problems
+cell_depth = 1000.
+
+# Time-step control
+fixed_dt = 0.25
+
+# Controls for number of steps between actions
+max_step = 800
+plot_int = 10
+struct_fact_int = -1
+n_steps_skip = 200
+
+seed = 1
+
+nspecies = 2
+nreaction = 4
+
+prob_type = 5
+perturb_width = 16.    # scale factor for perturbed part in initial profile (for prob_type=4,5)
+smoothing_width = 1.   # scale factor for smoothing initial profile (for prob_type=4,5)
+
+n_init_in_1 = 2.16245 1.35018
+n_init_in_2 = 0. 10.
+
+# 0=D+R (first-order splitting)
+# 1=(1/2)R + D + (1/2)R (Strang option 1)
+# 2=(1/2)D + R + (1/2)D (Strang option 2)
+# -1=unsplit forward Euler
+# -2=unsplit explicit midpoint 
+# -3=unsplit multinomial diffusion
+# -4=unsplit implicit midpoint
+temporal_integrator = -4
+
+# only used for split schemes (temporal_integrator>=0)
+# 0=explicit trapezoidal predictor/corrector
+# 1=Crank-Nicolson semi-implicit
+# 2=explicit midpoint
+# 3=multinomial diffusion
+# 4=forward Euler  
+reactDiff_diffusion_type = 4
+
+# Fickian diffusion coeffs
+D_Fick = 1. 10.
+
+variance_coef_mass = 1.
+
+# how to compute n on faces for stochastic weighting
+# 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic
+# 10=arithmetic average with discontinuous Heaviside function
+# 11=arithmetic average with C1-smoothed Heaviside function
+# 12=arithmetic average with C2-smoothed Heaviside function
+avg_type = 1
+
+# only used for split schemes (temporal_integrator>=0)
+# 0=first-order (deterministic, tau leaping, CLE, or SSA)
+# 1=second-order (determinisitc, tau leaping, or CLE only)
+reactDiff_reaction_type = 0
+
+# 0=deterministic; 1=CLE; 2=SSA; 3=tau leap
+reaction_type = 3
+
+#     (1) A -> 0
+#     (2) 2A + B --> 3A
+#     (3) B --> 0
+#     (4) 0 --> B
+stoich_1R = 1 0
+stoich_1P = 0 0
+stoich_2R = 2 1
+stoich_2P = 3 0
+stoich_3R = 0 1
+stoich_3P = 0 0
+stoich_4R = 0 0
+stoich_4P = 0 1
+
+# reaction rate constant for each reaction (assuming Law of Mass Action holds)
+# using rate_multiplier, reaction rates can be changed by the same factor
+# if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become
+# n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). 
+rate_const =  4. 1.37 1. 10.
+rate_multiplier = 0.1
+include_discrete_LMA_correction = 1
+
+# Boundary conditions
+# ----------------------
+# BC specifications:
+# -1 = periodic
+#  1 = wall (Neumann)
+#  2 = reservoir (Dirichlet)
+bc_mass_lo = -1 -1 -1
+bc_mass_hi = -1 -1 -1
diff --git a/exec/reactDiff/test_Schlogl_2d/clean.sh b/exec/reactDiff/test_Schlogl_2d/clean.sh
new file mode 100755
index 000000000..d5c9d18e1
--- /dev/null
+++ b/exec/reactDiff/test_Schlogl_2d/clean.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+rm -rf slurm-*.out plt* averagedDensity.txt
diff --git a/exec/reactDiff/test_Schlogl_2d/inputs_Schlogl_2d b/exec/reactDiff/test_Schlogl_2d/inputs_Schlogl_2d
new file mode 100644
index 000000000..c54794ec8
--- /dev/null
+++ b/exec/reactDiff/test_Schlogl_2d/inputs_Schlogl_2d
@@ -0,0 +1,101 @@
+# Problem specification
+prob_lo =  0.0  0.0       # physical lo coordinate
+prob_hi = 32.0 32.0       # physical hi coordinate
+
+# number of cells in domain and maximum number of cells in a box
+n_cells = 64 64
+max_grid_size = 16 16
+
+# to compute cell volume in 2D problems
+cell_depth = 1.
+
+# Time-step control
+fixed_dt = 0.001
+
+# Controls for number of steps between actions
+max_step = 1000000
+plot_int = 100000
+struct_fact_int = 1
+n_steps_skip = 100000
+
+seed = 0 
+
+nspecies = 1 
+nreaction = 4 
+
+prob_type = 0
+
+n_init_in_1 = 1000.
+
+integer_populations = 1
+
+# 0=D+R (first-order splitting)
+# 1=(1/2)R + D + (1/2)R (Strang option 1)
+# 2=(1/2)D + R + (1/2)D (Strang option 2)
+# -1=unsplit forward Euler
+# -2=unsplit explicit midpoint 
+# -3=unsplit multinomial diffusion
+# -4=unsplit implicit midpoint
+temporal_integrator = 1
+
+# only used for split schemes (temporal_integrator>=0)
+# 0=explicit trapezoidal predictor/corrector
+# 1=Crank-Nicolson semi-implicit
+# 2=explicit midpoint
+# 3=multinomial diffusion
+# 4=forward Euler  
+reactDiff_diffusion_type = 0
+
+# Fickian diffusion coeffs
+D_Fick = 1.
+
+variance_coef_mass = 1.
+initial_variance_mass = 1.
+
+# how to compute n on faces for stochastic weighting
+# 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic
+# 10=arithmetic average with discontinuous Heaviside function
+# 11=arithmetic average with C1-smoothed Heaviside function
+# 12=arithmetic average with C2-smoothed Heaviside function
+avg_type = 1
+
+# only used for split schemes (temporal_integrator>=0)
+# 0=first-order (deterministic, tau leaping, CLE, or SSA)
+# 1=second-order (determinisitc, tau leaping, or CLE only)
+reactDiff_reaction_type = 0
+
+# 0=deterministic; 1=CLE; 2=SSA; 3=tau leap
+reaction_type = 2
+
+# Schlog model is:
+#     (1) 2X --> 3X
+#     (2) 3X --> 2X
+#     (3) 0  --> X
+#     (4) X  --> 0
+stoich_1R = 2 
+stoich_1P = 3
+stoich_2R = 3 
+stoich_2P = 2 
+stoich_3R = 0 
+stoich_3P = 1 
+stoich_4R = 1 
+stoich_4P = 0
+
+# reaction rate constant for each reaction (assuming Law of Mass Action holds)
+# using rate_multiplier, reaction rates can be changed by the same factor
+# if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become
+# n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). 
+#rate_const = 1e-4 1e-7 200. 0.2     # thermodynamic equilibrium
+rate_const = 1e-4 2e-7 200. 0.1     # case where detailed balance is not satisfied
+
+rate_multiplier = 1.
+include_discrete_LMA_correction = 1
+
+# Boundary conditions
+# ----------------------
+# BC specifications:
+# -1 = periodic
+#  1 = wall (Neumann)
+#  2 = reservoir (Dirichlet)
+bc_mass_lo = -1 -1
+bc_mass_hi = -1 -1
diff --git a/exec/reactDiff/test_Schlogl_2d/job_script.sh b/exec/reactDiff/test_Schlogl_2d/job_script.sh
new file mode 100644
index 000000000..86337152e
--- /dev/null
+++ b/exec/reactDiff/test_Schlogl_2d/job_script.sh
@@ -0,0 +1,9 @@
+#! /bin/bash -l
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=16
+#SBATCH --partition test
+#SBATCH --time=0-00:30:00
+
+# COMMANDS HERE
+
+srun -n 16 ../main2d.gnu.MPI.ex inputs_Schlogl_2d
diff --git a/exec/reactDiff/test_Schlogl_2d/submit_job.sh b/exec/reactDiff/test_Schlogl_2d/submit_job.sh
new file mode 100755
index 000000000..66c0ac813
--- /dev/null
+++ b/exec/reactDiff/test_Schlogl_2d/submit_job.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+# LOAD NECESSARY MODULES
+
+sbatch job_script.sh
diff --git a/exec/structFactTest/GNUmakefile b/exec/structFactTest/GNUmakefile
index b39f97814..af26559ab 100644
--- a/exec/structFactTest/GNUmakefile
+++ b/exec/structFactTest/GNUmakefile
@@ -8,7 +8,8 @@ USE_OMP   = FALSE
 USE_CUDA  = FALSE
 COMP      = gnu
 DIM       = 3
-MAX_SPEC      = 8
+MAX_SPEC  = 2
+USE_FFT   = TRUE
 
 TINY_PROFILE = FALSE
 
@@ -21,7 +22,6 @@ include ../../src_analysis/Make.package
 VPATH_LOCATIONS   += ../../src_analysis/
 INCLUDE_LOCATIONS += ../../src_analysis/
 
-
 include ../../src_common/Make.package
 VPATH_LOCATIONS   += ../../src_common/
 INCLUDE_LOCATIONS += ../../src_common/
diff --git a/exec/structFactTest/main_driver.cpp b/exec/structFactTest/main_driver.cpp
index e783749d7..64e5a639b 100644
--- a/exec/structFactTest/main_driver.cpp
+++ b/exec/structFactTest/main_driver.cpp
@@ -134,7 +134,7 @@ void main_driver(const char* argv)
     // take an FFT and write them out
     MultiFab dft_real(ba, dmap, 2, 0);
     MultiFab dft_imag(ba, dmap, 2, 0);
-    structFact.ComputeFFT(struct_cc,dft_real,dft_imag,geom);
+    structFact.ComputeFFT(struct_cc,dft_real,dft_imag);
 
     WriteSingleLevelPlotfile("plt_real", dft_real, {"var1", "var2"}, geom, 0., 0);
     WriteSingleLevelPlotfile("plt_imag", dft_imag, {"var1", "var2"}, geom, 0., 0);
@@ -148,7 +148,7 @@ void main_driver(const char* argv)
     /////////////////////////////////
 
     
-    structFact.FortStructure(struct_cc,geom);
+    structFact.FortStructure(struct_cc);
       
     structFact.WritePlotFile(0,0.,geom,"plt_SF");
 
diff --git a/exec/thinFilm/main_driver.cpp b/exec/thinFilm/main_driver.cpp
index df1150845..997c17835 100644
--- a/exec/thinFilm/main_driver.cpp
+++ b/exec/thinFilm/main_driver.cpp
@@ -137,7 +137,9 @@ void main_driver(const char* argv)
 
     MultiFab height(ba, dmap, 1, 1);
     MultiFab Laph  (ba, dmap, 1, 1);
+    MultiFab disjoining(ba, dmap, 1, 1);
     Laph.setVal(0.); // prevent intermediate NaN calculations behind physical boundaries
+    disjoining.setVal(0.);
 
     // for statsitics
 
@@ -169,6 +171,11 @@ void main_driver(const char* argv)
                  gradLaph[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);,
                  gradLaph[2].define(convert(ba,nodal_flag_z), dmap, 1, 0););
     
+    std::array< MultiFab, AMREX_SPACEDIM > gradDisjoining;
+    AMREX_D_TERM(gradDisjoining[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);,
+                 gradDisjoining[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);,
+                 gradDisjoining[2].define(convert(ba,nodal_flag_z), dmap, 1, 0););
+
     std::array< MultiFab, AMREX_SPACEDIM > flux;
     AMREX_D_TERM(flux[0]    .define(convert(ba,nodal_flag_x), dmap, 1, 0);,
                  flux[1]    .define(convert(ba,nodal_flag_y), dmap, 1, 0);,
@@ -205,6 +212,7 @@ void main_driver(const char* argv)
     // constant factor in noise term
     Real ConstNoise = 2.*k_B*T_init[0] / (3.*visc_coef);
     Real Const3dx = thinfilm_gamma / (3.*visc_coef);
+    Real Const3dx_nogamma = 1. / (3.*visc_coef);
 
     Real time = 0.;
 
@@ -365,12 +373,16 @@ void main_driver(const char* argv)
 
         }
 
-        // compute Laph
+        // compute Laph and disjoining
         for ( MFIter mfi(Laph,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
 
             const Box& bx = mfi.tilebox();
 
             const Array4<Real> & L = Laph.array(mfi);
+
+            const Array4<Real> & h = height.array(mfi);
+
+            const Array4<Real> & Disjoining = disjoining.array(mfi);
         
             AMREX_D_TERM(const Array4<Real> & gradhx = gradh[0].array(mfi);,
                          const Array4<Real> & gradhy = gradh[1].array(mfi);,
@@ -380,11 +392,14 @@ void main_driver(const char* argv)
             {
                 L(i,j,k) = x_flux_fac * (gradhx(i+1,j,k) - gradhx(i,j,k)) / dx[0]
                          + y_flux_fac * (gradhy(i,j+1,k) - gradhy(i,j,k)) / dx[1];
+
+                Disjoining(i,j,k) = thinfilm_hamaker / (6.*M_PI*std::pow(h(i,j,k),3.));
             });
         }
         Laph.FillBoundary(geom.periodicity());
+        disjoining.FillBoundary(geom.periodicity());
 
-        // compute gradLaph
+        // compute gradLaph and gradDisjoining
         for ( MFIter mfi(height,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
 
             AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);,
@@ -394,17 +409,25 @@ void main_driver(const char* argv)
             AMREX_D_TERM(const Array4<Real> & gradLaphx = gradLaph[0].array(mfi);,
                          const Array4<Real> & gradLaphy = gradLaph[1].array(mfi);,
                          const Array4<Real> & gradLaphz = gradLaph[2].array(mfi););
+
+            AMREX_D_TERM(const Array4<Real> & gradDisjoiningx = gradDisjoining[0].array(mfi);,
+                         const Array4<Real> & gradDisjoiningy = gradDisjoining[1].array(mfi);,
+                         const Array4<Real> & gradDisjoiningz = gradDisjoining[2].array(mfi););
             
             const Array4<Real> & L = Laph.array(mfi);
 
+            const Array4<Real> & Disjoining = disjoining.array(mfi);
+
             amrex::ParallelFor(bx_x, bx_y,
                                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
                 gradLaphx(i,j,k) = ( L(i,j,k) - L(i-1,j,k) ) / dx[0];
+                gradDisjoiningx(i,j,k) = ( Disjoining(i,j,k) -Disjoining(i-1,j,k) ) / dx[0];
             },
                                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
                 gradLaphy(i,j,k) = ( L(i,j,k) - L(i,j-1,k) ) / dx[1];
+                gradDisjoiningy(i,j,k) = ( Disjoining(i,j,k) -Disjoining(i,j-1,k) ) / dx[1];
             });
         }
 
@@ -431,18 +454,24 @@ void main_driver(const char* argv)
                          const Array4<Real> & randfacey = randface[1].array(mfi);,
                          const Array4<Real> & randfacez = randface[2].array(mfi););
 
+            AMREX_D_TERM(const Array4<Real> & gradDisjoiningx = gradDisjoining[0].array(mfi);,
+                         const Array4<Real> & gradDisjoiningy = gradDisjoining[1].array(mfi);,
+                         const Array4<Real> & gradDisjoiningz = gradDisjoining[2].array(mfi););
+
             amrex::ParallelFor(bx_x, bx_y,
                                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
                 fluxx(i,j,k) = x_flux_fac * (
                                std::sqrt(ConstNoise*std::pow(hfacex(i,j,k),3.) / (dt*dVol)) * randfacex(i,j,k)
-                               + Const3dx * std::pow(hfacex(i,j,k),3.)*gradLaphx(i,j,k) );
+                               + Const3dx         * std::pow(hfacex(i,j,k),3.)*gradLaphx(i,j,k)
+                               + Const3dx_nogamma * std::pow(hfacex(i,j,k),3.)*gradDisjoiningx(i,j,k));
             },
                                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
                 fluxy(i,j,k) = y_flux_fac * (
                                std::sqrt(ConstNoise*std::pow(hfacey(i,j,k),3.) / (dt*dVol)) * randfacey(i,j,k)
-                               + Const3dx * std::pow(hfacey(i,j,k),3.)*gradLaphy(i,j,k) );
+                               + Const3dx         * std::pow(hfacey(i,j,k),3.)*gradLaphy(i,j,k)
+                               + Const3dx_nogamma * std::pow(hfacex(i,j,k),3.)*gradDisjoiningy(i,j,k) );
             });
 
             // lo x-faces
@@ -489,8 +518,11 @@ void main_driver(const char* argv)
             
             const Array4<Real> & h = height.array(mfi);
 
+            // amrex::Print{} << "HEIGHT " << time << " " << h(0,0,0) << " " << h(31,0,0) << std::endl;
+
             amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
+
                 h(i,j,k) -= dt * ( (fluxx(i+1,j,k) - fluxx(i,j,k)) / dx[0]
                                   +(fluxy(i,j+1,k) - fluxy(i,j,k)) / dx[1]);
             });
@@ -511,6 +543,8 @@ void main_driver(const char* argv)
             // copy distributed data into 1D data
             height_onegrid.ParallelCopy(height, 0, 0, 1);
 
+            amrex::Real sumh = 0.;
+
             for ( MFIter mfi(height_onegrid,false); mfi.isValid(); ++mfi ) {
 
                 std::ofstream hstream;
@@ -530,9 +564,14 @@ void main_driver(const char* argv)
                 for (auto j = lo.y; j <= hi.y; ++j) {
                 for (auto i = lo.x; i <= hi.x; ++i) {
                     hstream << std::setprecision(15) << mfdata(i,j,0) << " ";
+                    sumh += mfdata(i,j,0);
+                    if(j==0 && i==0){
+                       amrex::Print{} << "HEIGHT " << time << " " << mfdata(0,0,0) << " " << mfdata(31,0,0) << std::endl;
+                    }
                 }
                 hstream << "\n";
                 }
+                amrex::Print{} << "SUM " << sumh << std::endl;
 
             } // end MFIter
             
diff --git a/exec/thinFilm/thinfilm_functions.cpp b/exec/thinFilm/thinfilm_functions.cpp
index 3a4917d20..dde70c0b9 100644
--- a/exec/thinFilm/thinfilm_functions.cpp
+++ b/exec/thinFilm/thinfilm_functions.cpp
@@ -5,6 +5,7 @@
 AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_h0;
 AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_gamma;
 AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_pertamp;
+AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_hamaker;
 
 AMREX_GPU_MANAGED int thinfilm::thinfilm_icorr;
 AMREX_GPU_MANAGED int thinfilm::thinfilm_jcorr;
@@ -19,6 +20,7 @@ void InitializeThinfilmNamespace() {
     thinfilm_icorr = 0;
     thinfilm_jcorr = 0;
     thinfilm_pertamp = 0.;
+    thinfilm_hamaker = 0.;
 
     do_fft_diag = 1;
     
@@ -28,6 +30,7 @@ void InitializeThinfilmNamespace() {
     pp.get("thinfilm_gamma",thinfilm_gamma);
 
     pp.query("thinfilm_pertamp",thinfilm_pertamp);
+    pp.query("thinfilm_hamaker",thinfilm_hamaker);
 
     pp.query("thinfilm_icorr",thinfilm_icorr);
     pp.query("thinfilm_jcorr",thinfilm_jcorr);
diff --git a/exec/thinFilm/thinfilm_namespace.H b/exec/thinFilm/thinfilm_namespace.H
index 59091542f..16f3afefe 100644
--- a/exec/thinFilm/thinfilm_namespace.H
+++ b/exec/thinFilm/thinfilm_namespace.H
@@ -3,6 +3,7 @@ namespace thinfilm {
     extern AMREX_GPU_MANAGED amrex::Real thinfilm_h0;
     extern AMREX_GPU_MANAGED amrex::Real thinfilm_gamma;
     extern AMREX_GPU_MANAGED amrex::Real thinfilm_pertamp;
+    extern AMREX_GPU_MANAGED amrex::Real thinfilm_hamaker;
 
     extern AMREX_GPU_MANAGED int thinfilm_icorr;
     extern AMREX_GPU_MANAGED int thinfilm_jcorr;
diff --git a/src_MFsurfchem/MFsurfchem_functions.cpp b/src_MFsurfchem/MFsurfchem_functions.cpp
index b7805a1df..4892dae9f 100644
--- a/src_MFsurfchem/MFsurfchem_functions.cpp
+++ b/src_MFsurfchem/MFsurfchem_functions.cpp
@@ -66,7 +66,7 @@ void InitializeMFSurfchemNamespace()
     stoch_MFsurfchem = 1; // default value
     pp.query("stoch_MFsurfchem",stoch_MFsurfchem);
 
-    k_beta = 0.5; // default value
+    k_beta = -0.5; // default value
     pp.query("k_beta",k_beta);
 
     e_beta = 0.5; // default value
@@ -153,12 +153,12 @@ void sample_MFsurfchem(MultiFab& cu, MultiFab& prim, MultiFab& surfcov, MultiFab
                 amrex:: Real tempratio = prim_arr(i,j,k,4)/T_init[0];
 
                 for (int m=0;m<n_ads_spec;m++) {
-                    amrex::Real dens = cu_arr(i,j,k,5+m);   // mass density
-                    dens *= AVONUM/molmass[m];              // number density
+                    amrex::Real pres = prim_arr(i,j,k,5);   // total pressure
+                    pres *= prim_arr(i,j,k,6+nspecies+m);   // partial pressure
 
                     amrex::Real theta = surfcov_arr(i,j,k,m);
 
-  		    amrex::Real meanNads = ads_rate_const[m]*dens*(1-sumtheta)*Ntot*dt*pow(tempratio,k_beta);
+  		    amrex::Real meanNads = ads_rate_const[m]*pres*(1-sumtheta)*Ntot*dt*pow(tempratio,k_beta);
                     amrex::Real meanNdes = des_rate[m]*theta*Ntot*dt;
 
                     amrex::Real Nads;
diff --git a/src_analysis/Make.package b/src_analysis/Make.package
index e9d284030..a2c26cceb 100644
--- a/src_analysis/Make.package
+++ b/src_analysis/Make.package
@@ -1,3 +1,5 @@
+CEXE_headers += StructFact.H
 CEXE_sources += StructFact.cpp
 
-CEXE_headers += StructFact.H
+CEXE_headers += TurbSpectra.H
+CEXE_sources += TurbSpectra.cpp
diff --git a/src_analysis/StructFact.H b/src_analysis/StructFact.H
index fa1c0656e..611ab8c12 100644
--- a/src_analysis/StructFact.H
+++ b/src_analysis/StructFact.H
@@ -5,32 +5,12 @@
 #include <AMReX_MultiFab.H>
 #include <AMReX_Vector.H>
 #include <AMReX_VisMF.H>
-
-// These are for FFTW / cuFFT / rocFFT
-
-#ifdef AMREX_USE_CUDA
-#include <cufft.h>
-#elif AMREX_USE_HIP
-#  if __has_include(<rocfft/rocfft.h>)  // ROCm 5.3+
-#    include <rocfft/rocfft.h>
-#  else
-#    include <rocfft.h>
-#  endif
-#else
-#include <fftw3.h>
-#if AMREX_USE_MPI
-#include <fftw3-mpi.h>
-#endif
-#endif
-
 #include <AMReX_GpuComplex.H>
 
 #include <string>
 
 #include "common_functions.H"
 
-#define ALIGN 16
-
 using namespace amrex;
 
 class StructFact {
@@ -44,9 +24,6 @@ class StructFact {
     // Total number of states to average over, updated by FortStructure()
     int nsamples = 0;
 
-    // decompose velocity field
-    bool decompose = false;
-
     // Vector containing covariance scaling
     Vector< Real > scaling;
 
@@ -70,82 +47,61 @@ public:
     // Vector of MultiFabs containing final magnitude of covariances
     MultiFab cov_mag;
 
-    // MultiFabs of real/imag for solenoidal/dilatational
-    MultiFab vel_sol_real;
-    MultiFab vel_sol_imag;
-    MultiFab vel_dil_real;
-    MultiFab vel_dil_imag;
-
     StructFact();
 
-    StructFact(const amrex::BoxArray&, const amrex::DistributionMapping&, 
-               const amrex::Vector< std::string >&,
-               const amrex::Vector< amrex::Real >&,
-               const int& verbosity=0);
-
-    StructFact(const amrex::BoxArray&, const amrex::DistributionMapping&, 
-               const amrex::Vector< std::string >&,
-               const amrex::Vector< amrex::Real >&,
-               const amrex::Vector< int >&, const amrex::Vector< int >&,
-               const int& verbosity=0);
-
-    void define(const amrex::BoxArray&, const amrex::DistributionMapping&, 
-                const amrex::Vector< std::string >&,
-                const amrex::Vector< amrex::Real >&,
-                const int& verbosity=0);
-
-    void define(const amrex::BoxArray&, const amrex::DistributionMapping&, 
-                const amrex::Vector< std::string >&,
-                const amrex::Vector< amrex::Real >&,
-                const amrex::Vector< int >&, const amrex::Vector< int >&,
-                const int& verbosity=0);
-
-    void defineDecomp(const amrex::BoxArray&, const amrex::DistributionMapping&, 
-                      const amrex::Vector< std::string >&,
-                      const amrex::Vector< amrex::Real >&,
-                      const amrex::Vector< int >&,
-                      const amrex::Vector< int >&);
-
-    void FortStructure(const amrex::MultiFab&, const amrex::Geometry&,
+    StructFact(const BoxArray& ba_in,
+               const DistributionMapping& dmap_in,
+               const Vector< std::string >& var_names,
+               const Vector< Real >& var_scaling_in,
+               const int& verbosity_in=0);
+
+    StructFact(const BoxArray& ba_in,
+               const DistributionMapping& dmap_in,
+               const Vector< std::string >& var_names,
+               const Vector< Real >& var_scaling_in,
+               const Vector< int >& s_pairA_in,
+               const Vector< int >& s_pairB_in,
+               const int& verbosity_in=0);
+
+    void define(const BoxArray& ba_in,
+                const DistributionMapping& dmap_in,
+                const Vector< std::string >& var_names,
+                const Vector< Real >& var_scaling_in,
+                const int& verbosity_in=0);
+
+    void define(const BoxArray& ba_in,
+                const DistributionMapping& dmap_in,
+                const Vector< std::string >& var_names,
+                const Vector< Real >& var_scaling_in,
+                const Vector< int >& s_pairA_in,
+                const Vector< int >& s_pairB_in,
+                const int& verbosity_in=0);
+
+    void FortStructure(const amrex::MultiFab&,
                        const int& reset=0);
 
-    void FortStructureDecomp(const amrex::MultiFab& vel, const amrex::Geometry& geom,
-                             const int& reset=0);
-
-    void DecomposeVelFourier(const amrex::MultiFab& vel_dft_real, 
-                             const amrex::MultiFab& vel_dft_imag, 
-                             const amrex::Geometry& geom);
-
     void Reset();
     
     void ComputeFFT(const amrex::MultiFab&, amrex::MultiFab&,
-                    amrex::MultiFab&, const amrex::Geometry&,
+                    amrex::MultiFab&,
                     bool unpack=true);
-    
-    void InverseFFT(amrex::MultiFab&, const amrex::MultiFab&,
-                    const amrex::MultiFab&, const amrex::Geometry&);
 
-    void GetDecompVel(amrex::MultiFab&, const amrex::Geometry&); 
-    
     void WritePlotFile(const int, const amrex::Real, const amrex::Geometry&, 
                        std::string, const int& zero_avg=1);
     
-    void Finalize(amrex::MultiFab&, amrex::MultiFab&, const Geometry& geom,
+    void Finalize(amrex::MultiFab&, amrex::MultiFab&,
                   const int& zero_avg=1);
 
-    void CallFinalize(const Geometry& geom, const int& zero_avg=1);
+    void CallFinalize(const int& zero_avg=1);
     
-    void ShiftFFT(amrex::MultiFab&,  const Geometry& geom,
+    void ShiftFFT(amrex::MultiFab&,
                   const int& zero_avg=1);
 
-    void IntegratekShells(const int& step, const amrex::Geometry& geom, const std::string& name="");
-
-    void IntegratekShellsScalar(const int& step, const amrex::Geometry& geom, const amrex::Vector< std::string >& names);
+    void IntegratekShells(const int& step, const std::string& name="");
 
-    void IntegratekShellsDecomp(const int& step, const amrex::Geometry& geom, 
-                                const std::string& name_sol="vel_sol", const std::string& name_dil="vel_dil");
+    void IntegratekShellsScalar(const int& step, const amrex::Vector< std::string >& names);
 
-    void AddToExternal(amrex::MultiFab& x_mag, amrex::MultiFab& x_realimag, const amrex::Geometry&, const int& zero_avg=1);
+    void AddToExternal(amrex::MultiFab& x_mag, amrex::MultiFab& x_realimag, const int& zero_avg=1);
 
     int get_ncov() const { return NCOV; }
 
diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp
index 115076ca9..06b837d91 100644
--- a/src_analysis/StructFact.cpp
+++ b/src_analysis/StructFact.cpp
@@ -5,64 +5,17 @@
 #include "AMReX_PlotFileUtil.H"
 #include "AMReX_BoxArray.H"
 
-#ifdef AMREX_USE_CUDA
-std::string cufftErrorToString (const cufftResult& err)
-{
-    switch (err) {
-    case CUFFT_SUCCESS:  return "CUFFT_SUCCESS";
-    case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN";
-    case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED";
-    case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE";
-    case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE";
-    case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR";
-    case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED";
-    case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED";
-    case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE";
-    case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA";
-    default: return std::to_string(err) + " (unknown error code)";
-    }
-}
-#endif
-
-#ifdef AMREX_USE_HIP
-std::string rocfftErrorToString (const rocfft_status err)
-{
-    if              (err == rocfft_status_success) {
-        return std::string("rocfft_status_success");
-    } else if       (err == rocfft_status_failure) {
-        return std::string("rocfft_status_failure");
-    } else if       (err == rocfft_status_invalid_arg_value) {
-        return std::string("rocfft_status_invalid_arg_value");
-    } else if       (err == rocfft_status_invalid_dimensions) {
-        return std::string("rocfft_status_invalid_dimensions");
-    } else if       (err == rocfft_status_invalid_array_type) {
-        return std::string("rocfft_status_invalid_array_type");
-    } else if       (err == rocfft_status_invalid_strides) {
-        return std::string("rocfft_status_invalid_strides");
-    } else if       (err == rocfft_status_invalid_distance) {
-        return std::string("rocfft_status_invalid_distance");
-    } else if       (err == rocfft_status_invalid_offset) {
-        return std::string("rocfft_status_invalid_offset");
-    } else {
-        return std::to_string(err) + " (unknown error code)";
-    }
-}
-
-void assert_rocfft_status (std::string const& name, rocfft_status status)
-{
-    if (status != rocfft_status_success) {
-        amrex::AllPrint() <<  name + " failed! Error: " + rocfftErrorToString(status) << "\n";;
-    }
-}
-#endif
+#include <AMReX_FFT.H>
 
+// blank constructor
 StructFact::StructFact()
 {}
 
-// var_names contains the names of all variables under consideration
-// this constructor computes the covariances of all possible pairs of variables
+// this constructor takes in var_names, which contains the names of all variables under consideration
+// we will compute the covariances of all possible pairs of variables
 // var_scaling must be sized to match the total number of pairs of variables
-StructFact::StructFact(const BoxArray& ba_in, const DistributionMapping& dmap_in,
+StructFact::StructFact(const BoxArray& ba_in,
+                       const DistributionMapping& dmap_in,
 		       const Vector< std::string >& var_names,
 		       const Vector< Real >& var_scaling_in,
 		       const int& verbosity_in) {
@@ -71,10 +24,11 @@ StructFact::StructFact(const BoxArray& ba_in, const DistributionMapping& dmap_in
 
 }
 
-// var_names contains the names of all variables under consideration
-// this constructor compute the covariances of the pairs of variables defined in s_pairA/B_in
+// this constructor takes in var_names, which contains the names of all variables under consideration
+// we will compute the covariances of the pairs of variables defined in s_pairA/B_in
 // var_scaling must be sized to match the total number of pairs of variables
-StructFact::StructFact(const BoxArray& ba_in, const DistributionMapping& dmap_in,
+StructFact::StructFact(const BoxArray& ba_in,
+                       const DistributionMapping& dmap_in,
 		       const Vector< std::string >& var_names,
 		       const Vector< Real >& var_scaling_in,
 		       const Vector< int >& s_pairA_in,
@@ -85,9 +39,11 @@ StructFact::StructFact(const BoxArray& ba_in, const DistributionMapping& dmap_in
 
 }
 
-
-// this builds a list of all possible pairs of variables and calls define()
-void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_in,
+// this define takes in var_names, which contains the names of all variables under consideration
+// we will compute the covariances of all possible pairs of variables
+// var_scaling must be sized to match the total number of pairs of variables
+void StructFact::define(const BoxArray& ba_in,
+                        const DistributionMapping& dmap_in,
                         const Vector< std::string >& var_names,
                         const Vector< Real >& var_scaling_in,
                         const int& verbosity_in) {
@@ -110,7 +66,11 @@ void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_i
 
 }
 
-void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_in,
+// this define takes in var_names, which contains the names of all variables under consideration
+// we will compute the covariances of the pairs of variables defined in s_pairA/B_in
+// var_scaling must be sized to match the total number of pairs of variables
+void StructFact::define(const BoxArray& ba_in,
+                        const DistributionMapping& dmap_in,
                         const Vector< std::string >& var_names,
                         const Vector< Real >& var_scaling_in,
                         const Vector< int >& s_pairA_in,
@@ -186,12 +146,6 @@ void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_i
     }
   }
 
-  /*
-  for (int n=0; n<NVARU; n++) {
-    Print() << "HACK 1: vector (" << n << ") = " << varu_temp[n] << std::endl;
-  }
-  */
-
   // Identify number of repeats
   int N_dup = 0;
   for (int n=1; n<NVARU; n++) {
@@ -251,85 +205,18 @@ void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_i
   }
 }
 
-void StructFact::defineDecomp(const amrex::BoxArray& ba_in,
-                              const amrex::DistributionMapping& dmap_in,
-                              const Vector< std::string >& /*var_names*/,
-                              const amrex::Vector< amrex::Real >& var_scaling_in,
-                              const Vector< int >& s_pairA_in,
-                              const Vector< int >& s_pairB_in)
+void StructFact::FortStructure(const MultiFab& variables,
+                               const int& reset)
 {
-
-  BL_PROFILE_VAR("StructFact::defineDecomp()",StructFactDefineDecomp);
-
-  decompose = true;
-  
-  if (s_pairA_in.size() != s_pairB_in.size())
-        amrex::Error("StructFact::define() - Must have an equal number of components");
-
-  NVAR = 3;
-  NCOV = 6;
-  scaling.resize(NCOV);
-  for (int n=0; n<NCOV; n++) {
-      scaling[n] = 1.0/var_scaling_in[n];
-  }
-  
-  s_pairA.resize(3);
-  s_pairB.resize(3);
-  
-  // Set vectors identifying covariance pairs
-  for (int n=0; n<3; n++) {
-    s_pairA[n] = s_pairA_in[n];
-    s_pairB[n] = s_pairB_in[n];
-  }
-  
-  // Create vector of unique variable indices to select which to take the FFT
-  NVARU = NVAR;    // temporary before selecting unique variables
-  var_u.resize(NVARU);
-  for (int n=0; n<NVARU; n++) {
-    var_u[n] = s_pairA[n];
-  }
-
-  //BoxArray ba_onegrid;
-  //{
-  //  Box domain = geom.Domain();
-  //  ba_onegrid.define(domain);
-  //}
-  //DistributionMapping dmap_onegrid(ba_onegrid);
-
-  vel_sol_real.define(ba_in, dmap_in, 3, 0);
-  vel_sol_imag.define(ba_in, dmap_in, 3, 0);
-  vel_dil_real.define(ba_in, dmap_in, 3, 0);
-  vel_dil_imag.define(ba_in, dmap_in, 3, 0);
-
-  cov_real.define(ba_in, dmap_in, 6, 0);
-  cov_imag.define(ba_in, dmap_in, 6, 0);
-  cov_mag.define( ba_in, dmap_in, 6, 0);
-  cov_real.setVal(0.0);
-  cov_imag.setVal(0.0);
-  cov_mag.setVal (0.0);
-  
-  NCOV = 6;
-  NVAR = 3;
-  scaling.resize(NCOV);
-  for (int n=0; n<NCOV; n++) {
-      scaling[n] = 1.0/var_scaling_in[n];
-  }
-
-}
-
-void StructFact::FortStructure(const MultiFab& variables, const Geometry& geom,
-                               const int& reset) {
-
   BL_PROFILE_VAR("StructFact::FortStructure()",FortStructure);
 
   const BoxArray& ba = variables.boxArray();
   const DistributionMapping& dm = variables.DistributionMap();
   
-  MultiFab variables_dft_real, variables_dft_imag;
-  variables_dft_real.define(ba, dm, NVAR, 0);
-  variables_dft_imag.define(ba, dm, NVAR, 0);
+  MultiFab variables_dft_real(ba, dm, NVAR, 0);
+  MultiFab variables_dft_imag(ba, dm, NVAR, 0);
 
-  ComputeFFT(variables, variables_dft_real, variables_dft_imag, geom);
+  ComputeFFT(variables, variables_dft_real, variables_dft_imag);
 
   // temporary storage built on BoxArray and DistributionMapping of "variables"
   // One case where "variables" and "cov_real/imag/mag" may have different DistributionMappings
@@ -402,117 +289,6 @@ void StructFact::FortStructure(const MultiFab& variables, const Geometry& geom,
   
 }
 
-void StructFact::FortStructureDecomp(const MultiFab& vel, const Geometry& geom,
-                                     const int& reset) 
-{
-
-
-  BL_PROFILE_VAR("StructFact::FortStructureDecomp()",FortStructureDecomp);
-
-  if (!decompose) amrex::Error("StructFact::FortStructureDecomp() is specific for vel decomposition in turbulence");
-
-  const BoxArray& ba = vel.boxArray();
-  const DistributionMapping& dm = vel.DistributionMap();
-  
-  MultiFab vel_dft_real, vel_dft_imag;
-  //BoxArray ba_onegrid;
-  //{
-  //  Box domain = geom.Domain();
-  //  ba_onegrid.define(domain);
-  //}
-  //DistributionMapping dmap_onegrid(ba_onegrid);
-  vel_dft_real.define(ba, dm, 3, 0);
-  vel_dft_imag.define(ba, dm, 3, 0);
-
-  ComputeFFT(vel, vel_dft_real, vel_dft_imag, geom);
-  
-  DecomposeVelFourier(vel_dft_real, vel_dft_imag, geom);
-
-  // temporary storage built on BoxArray and DistributionMapping of "variables"
-  // One case where "variables" and "cov_real/imag/mag" may have different DistributionMappings
-  // is for flattened MFs with one grid newly built flattened MFs may be on a different
-  // processor than the flattened MF used to build cov_real/imag/mag
-  // or in general, problems that are not perfectly load balanced
-  MultiFab cov_temp;
-  cov_temp.define(ba, dm, 1, 0);
-
-  // temporary storage built on BoxArray and DistributionMapping of "cov_real/imag/mag"
-  MultiFab cov_temp2;
-  cov_temp2.define(cov_real.boxArray(), cov_real.DistributionMap(), 1, 0);
- 
-  // solenoidal
-  int index = 0;
-  for (int n=0; n<3; n++) {
-    cov_temp.setVal(0.0);
-    MultiFab::AddProduct(cov_temp,vel_sol_real,n,vel_sol_real,n,0,1,0);
-    MultiFab::AddProduct(cov_temp,vel_sol_imag,n,vel_sol_imag,n,0,1,0);
-
-    // copy into a MF with same ba and dm as cov_real/imag/mag
-    cov_temp2.ParallelCopy(cov_temp,0,0,1);
-        
-    if (reset == 1) {
-        MultiFab::Copy(cov_real,cov_temp2,0,index,1,0);
-    } else {        
-        MultiFab::Add(cov_real,cov_temp2,0,index,1,0);
-    }
-
-    // Imaginary component of covariance
-    cov_temp.setVal(0.0);
-    MultiFab::AddProduct(cov_temp,vel_sol_imag,n,vel_sol_real,n,0,1,0);
-    cov_temp.mult(-1.0,0);
-    MultiFab::AddProduct(cov_temp,vel_sol_real,n,vel_sol_imag,n,0,1,0);
-
-    // copy into a MF with same ba and dm as cov_real/imag/mag
-    cov_temp2.ParallelCopy(cov_temp,0,0,1);
-    
-    if (reset == 1) {
-        MultiFab::Copy(cov_imag,cov_temp2,0,index,1,0);
-    } else {
-        MultiFab::Add(cov_imag,cov_temp2,0,index,1,0);
-    }
-    index++;
-  }
-  
-  // dilatational
-  for (int n=0; n<3; n++) {
-    cov_temp.setVal(0.0);
-    MultiFab::AddProduct(cov_temp,vel_dil_real,n,vel_dil_real,n,0,1,0);
-    MultiFab::AddProduct(cov_temp,vel_dil_imag,n,vel_dil_imag,n,0,1,0);
-
-    // copy into a MF with same ba and dm as cov_real/imag/mag
-    cov_temp2.ParallelCopy(cov_temp,0,0,1);
-        
-    if (reset == 1) {
-        MultiFab::Copy(cov_real,cov_temp2,0,index,1,0);
-    } else {        
-        MultiFab::Add(cov_real,cov_temp2,0,index,1,0);
-    }
-
-    // Imaginary component of covariance
-    cov_temp.setVal(0.0);
-    MultiFab::AddProduct(cov_temp,vel_dil_imag,n,vel_dil_real,n,0,1,0);
-    cov_temp.mult(-1.0,0);
-    MultiFab::AddProduct(cov_temp,vel_dil_real,n,vel_dil_imag,n,0,1,0);
-
-    // copy into a MF with same ba and dm as cov_real/imag/mag
-    cov_temp2.ParallelCopy(cov_temp,0,0,1);
-    
-    if (reset == 1) {
-        MultiFab::Copy(cov_imag,cov_temp2,0,index,1,0);
-    } else {
-        MultiFab::Add(cov_imag,cov_temp2,0,index,1,0);
-    }
-    index++;
-  }
-
-  if (reset == 1) {
-      nsamples = 1;
-  } else {
-      nsamples++;
-  }
-
-}
-
 void StructFact::Reset() {
 
     BL_PROFILE_VAR("StructFact::Reset()", StructFactReset);
@@ -526,76 +302,61 @@ void StructFact::Reset() {
 void StructFact::ComputeFFT(const MultiFab& variables,
 			    MultiFab& variables_dft_real, 
 			    MultiFab& variables_dft_imag,
-			    const Geometry& geom,
-                bool unpack)
+                            bool unpack)
 {
 
     BL_PROFILE_VAR("StructFact::ComputeFFT()", ComputeFFT);
 
-#ifdef AMREX_USE_CUDA
-    // Print() << "Using cuFFT\n";
-#elif AMREX_USE_HIP
-    // Print() << "Using rocFFT\n";
-#else
-    // Print() << "Using FFTW\n";
-#endif
-
-    bool is_flattened = false;
-
-    long npts;
-
-    // Initialize the boxarray "ba_onegrid" from the single box "domain"
-    BoxArray ba_onegrid;
-    {
-      Box domain = geom.Domain();
-      ba_onegrid.define(domain);
+    Box domain = variables.boxArray().minimalBox();
+    bool chopped_in_x = false;
+    bool chopped_in_y = false;
+    bool chopped_in_z = false;
 
-      if (domain.bigEnd(AMREX_SPACEDIM-1) == 0) {
-          is_flattened = true;
-      }
-
-#if (AMREX_SPACEDIM == 2)
-      npts = (domain.length(0)*domain.length(1));
-#elif (AMREX_SPACEDIM == 3)
-      npts = (domain.length(0)*domain.length(1)*domain.length(2));
+    // figure out which direction the spectral box will be chopped
+    if (domain.length(0) > 1) {
+        chopped_in_x = true;
+    } else if (domain.length(1) > 1) {
+        chopped_in_y = true;
+#if (AMREX_SPACEDIM == 3)
+    } else if (domain.length(2) > 1) {
+        chopped_in_z = true;
 #endif
-
+    } else {
+        Abort("Calling ComputeFFT for a MultiFab with only 1 cell");
     }
 
+    // compute number of points in the domain and the square root
+    long npts = (AMREX_SPACEDIM == 2) ? (domain.length(0)*domain.length(1)) : (domain.length(0)*domain.length(1)*domain.length(2));
     Real sqrtnpts = std::sqrt(npts);
 
-    DistributionMapping dmap_onegrid(ba_onegrid);
+    // extract BoxArray and DistributionMapping from variables
+    BoxArray ba = variables.boxArray();
+    DistributionMapping dm = variables.DistributionMap();
 
-    // we will take one FFT at a time and copy the answer into the
-    // corresponding component
-    MultiFab variables_onegrid;
-    MultiFab variables_dft_real_onegrid;
-    MultiFab variables_dft_imag_onegrid;
-    variables_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
-    variables_dft_real_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
-    variables_dft_imag_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
-
-//    fftw_mpi_init();
-
-#ifdef AMREX_USE_CUDA
-    using FFTplan = cufftHandle;
-    using FFTcomplex = cuDoubleComplex;
-#elif AMREX_USE_HIP
-    using FFTplan = rocfft_plan;
-    using FFTcomplex = double2;
-#else
-    using FFTplan = fftw_plan;
-    using FFTcomplex = fftw_complex;
-#endif
+    // create storage for one component of variables
+    MultiFab phi(ba,dm,1,0);
+
+    // Initialize the boxarray "ba_onegrid" from the single box "domain"
+    // Initilize a DistributionMapping for one grid
+    BoxArray ba_onegrid(domain);
+    DistributionMapping dm_onegrid(ba_onegrid);
+
+    // create amrex::FFT object
+    amrex::FFT::R2C my_fft(domain);
 
-    // contain to store FFT - note it is shrunk by "half" in x
-    Vector<std::unique_ptr<BaseFab<GpuComplex<Real> > > > spectral_field;
+    // create storage for the FFT (distributed and single-grid)
+    auto const& [ba_fft, dm_fft] = my_fft.getSpectralDataLayout();
+    FabArray<BaseFab<GpuComplex<amrex::Real> > > phi_fft(ba_fft, dm_fft, 1, 0);
 
-    Vector<FFTplan> forward_plan;
+    Box domain_fft = ba_fft.minimalBox();
+    BoxArray ba_fft_onegrid(domain_fft);
+    FabArray<BaseFab<GpuComplex<amrex::Real> > > phi_fft_onegrid(ba_fft_onegrid, dm_onegrid, 1, 0);
 
-    // for CUDA builds we only need to build the plan once; track whether we did
-    bool built_plan = false;
+    MultiFab variables_dft_real_onegrid(ba_onegrid,dm_onegrid,1,0);
+    MultiFab variables_dft_imag_onegrid(ba_onegrid,dm_onegrid,1,0);
     
+    // we will take one FFT at a time and copy the answer into the
+    // corresponding component of variables_dft_real/imag
     for (int comp=0; comp<NVAR; comp++) {
 
         bool comp_fft = false;
@@ -606,568 +367,144 @@ void StructFact::ComputeFFT(const MultiFab& variables,
             }
         }
 
-	    if (comp_fft == false) continue;
-
-        variables_onegrid.ParallelCopy(variables,comp,0,1);
-
-        if (!built_plan) {
-
-            for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) {
-
-                // grab a single box including ghost cell range
-                Box realspace_bx = mfi.fabbox();
-
-                // size of box including ghost cell range
-                IntVect fft_size = realspace_bx.length(); // This will be different for hybrid FFT
-
-                // this is the size of the box, except the 0th component is 'halved plus 1'
-                IntVect spectral_bx_size = fft_size;
-                spectral_bx_size[0] = fft_size[0]/2 + 1;
-
-                // spectral box
-                Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1));
-
-                spectral_field.emplace_back(new BaseFab<GpuComplex<Real> >(spectral_bx,1,
-                                                                       The_Device_Arena()));
-                spectral_field.back()->setVal<RunOn::Device>(0.0); // touch the memory
-
-                FFTplan fplan;
-
-#ifdef AMREX_USE_CUDA // CUDA
-                if (is_flattened) {
-#if (AMREX_SPACEDIM == 2)
-                    cufftResult result = cufftPlan1d(&fplan, fft_size[0], CUFFT_D2Z, 1);
-                    if (result != CUFFT_SUCCESS) {
-                        amrex::AllPrint() << " cufftplan1d forward failed! Error: "
-                                          << cufftErrorToString(result) << "\n";
-                    }
-#elif (AMREX_SPACEDIM == 3)
-                    cufftResult result = cufftPlan2d(&fplan, fft_size[1], fft_size[0], CUFFT_D2Z);
-                    if (result != CUFFT_SUCCESS) {
-                        amrex::AllPrint() << " cufftplan2d forward failed! Error: "
-                                          << cufftErrorToString(result) << "\n";
-                    }
-#endif
-                } else {
-#if (AMREX_SPACEDIM == 2)
-                    cufftResult result = cufftPlan2d(&fplan, fft_size[1], fft_size[0], CUFFT_D2Z);
-                    if (result != CUFFT_SUCCESS) {
-                        amrex::AllPrint() << " cufftplan2d forward failed! Error: "
-                                          << cufftErrorToString(result) << "\n";
-                    }
-#elif (AMREX_SPACEDIM == 3)
-                    cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z);
-                    if (result != CUFFT_SUCCESS) {
-                        amrex::AllPrint() << " cufftplan3d forward failed! Error: "
-                                          << cufftErrorToString(result) << "\n";
-                    }
-#endif
-                }
-#elif AMREX_USE_HIP // HIP
-                if (is_flattened) {
-#if (AMREX_SPACEDIM == 2)
-                    const std::size_t lengths[] = {std::size_t(fft_size[0])};
-                    rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, 
-                                                              rocfft_transform_type_real_forward, rocfft_precision_double,
-                                                              1, lengths, 1, nullptr);
-                    assert_rocfft_status("rocfft_plan_create", result);
-#elif (AMREX_SPACEDIM == 3)
-                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])};
-                    rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, 
-                                                              rocfft_transform_type_real_forward, rocfft_precision_double,
-                                                              2, lengths, 1, nullptr);
-                    assert_rocfft_status("rocfft_plan_create", result);
-#endif
-                } else {
-#if (AMREX_SPACEDIM == 2)
-                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])};
-                    rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, 
-                                                              rocfft_transform_type_real_forward, rocfft_precision_double,
-                                                              2, lengths, 1, nullptr);
-                    assert_rocfft_status("rocfft_plan_create", result);
-#elif (AMREX_SPACEDIM == 3)
-                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])};
-                    rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, 
-                                                              rocfft_transform_type_real_forward, rocfft_precision_double,
-                                                              3, lengths, 1, nullptr);
-                    assert_rocfft_status("rocfft_plan_create", result);
-#endif
-                }
-#else // host
-
-                if (is_flattened) {
-#if (AMREX_SPACEDIM == 2)
-                    fplan = fftw_plan_dft_r2c_1d(fft_size[0],
-                                                 variables_onegrid[mfi].dataPtr(),
-                                                 reinterpret_cast<FFTcomplex*>
-                                                 (spectral_field.back()->dataPtr()),
-                                                 FFTW_ESTIMATE);
-#elif (AMREX_SPACEDIM == 3)
-                    fplan = fftw_plan_dft_r2c_2d(fft_size[1], fft_size[0],
-                                                 variables_onegrid[mfi].dataPtr(),
-                                                 reinterpret_cast<FFTcomplex*>
-                                                 (spectral_field.back()->dataPtr()),
-                                                 FFTW_ESTIMATE);
-#endif
-                } else {
-#if (AMREX_SPACEDIM == 2)
-                    fplan = fftw_plan_dft_r2c_2d(fft_size[1], fft_size[0],
-                                                 variables_onegrid[mfi].dataPtr(),
-                                                 reinterpret_cast<FFTcomplex*>
-                                                 (spectral_field.back()->dataPtr()),
-                                                 FFTW_ESTIMATE);
-#elif (AMREX_SPACEDIM == 3)
-                    fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0],
-                                                 variables_onegrid[mfi].dataPtr(),
-                                                 reinterpret_cast<FFTcomplex*>
-                                                 (spectral_field.back()->dataPtr()),
-                                                 FFTW_ESTIMATE);
-#endif
-                }
-#endif
-
-                forward_plan.push_back(fplan);
-            }
-
-	    built_plan = true;
-        
-        }
+        if (comp_fft == false) continue;
 
-        ParallelDescriptor::Barrier();
+        // copy component "comp" into a MultiFab with one component
+        MultiFab::Copy(phi,variables,comp,0,1,0);
 
         // ForwardTransform
-        for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) {
-            int i = mfi.LocalIndex();
-#ifdef AMREX_USE_CUDA
-            cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream());
-            cufftResult result = cufftExecD2Z(forward_plan[i],
-                                              variables_onegrid[mfi].dataPtr(),
-                                              reinterpret_cast<FFTcomplex*>
-                                                  (spectral_field[i]->dataPtr()));
-            if (result != CUFFT_SUCCESS) {
-                amrex::AllPrint() << " forward transform using cufftExec failed! Error: "
-                                  << cufftErrorToString(result) << "\n";
-	    }
-#elif AMREX_USE_HIP
-            rocfft_execution_info execinfo = nullptr;
-            rocfft_status result = rocfft_execution_info_create(&execinfo);
-            assert_rocfft_status("rocfft_execution_info_create", result);
-
-            std::size_t buffersize = 0;
-            result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize);
-            assert_rocfft_status("rocfft_plan_get_work_buffer_size", result);
-
-            void* buffer = amrex::The_Arena()->alloc(buffersize);
-            result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize);
-            assert_rocfft_status("rocfft_execution_info_set_work_buffer", result);
-
-            result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream());
-            assert_rocfft_status("rocfft_execution_info_set_stream", result);
-
-	        amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr();
-	        FFTcomplex* spectral_field_ptr = reinterpret_cast<FFTcomplex*>(spectral_field[i]->dataPtr());
-            result = rocfft_execute(forward_plan[i],
-                                    (void**) &variables_onegrid_ptr, // in
-                                    (void**) &spectral_field_ptr, // out
-                                    execinfo);
-            assert_rocfft_status("rocfft_execute", result);
-            amrex::Gpu::streamSynchronize();
-            amrex::The_Arena()->free(buffer);
-            result = rocfft_execution_info_destroy(execinfo);
-            assert_rocfft_status("rocfft_execution_info_destroy", result);
-#else
-            fftw_execute(forward_plan[i]);
-#endif
-        }
+        my_fft.forward(phi,phi_fft);
+
+        // copy my_fft into a single-grid MultiFab
+        phi_fft_onegrid.ParallelCopy(phi_fft,0,0,1);
 
         // copy data to a full-sized MultiFab
         // this involves copying the complex conjugate from the half-sized field
         // into the appropriate place in the full MultiFab
         for (MFIter mfi(variables_dft_real_onegrid); mfi.isValid(); ++mfi) {
 
-            Array4< GpuComplex<Real> > spectral = (*spectral_field[0]).array();
+            Box bx = mfi.fabbox();
+
+            Array4<GpuComplex<Real>> spectral = phi_fft_onegrid.array(mfi);
 
             Array4<Real> const& realpart = variables_dft_real_onegrid.array(mfi);
             Array4<Real> const& imagpart = variables_dft_imag_onegrid.array(mfi);
 
-            Box bx = mfi.fabbox();
+            /*
+              Unpacking rules:
 
-            amrex::ParallelFor(bx,
-            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-            {
-                /*
-                  Unpacking rules:
+              For domains from (0,0,0) to (Nx-1,Ny-1,Nz-1) and chopped_in_x (i.e., Nx > 1)
 
-                  For domains from (0,0,0) to (Nx-1,Ny-1,Nz-1)
+              For any cells with i index > Nx/2, these values are complex conjugates of the corresponding
+              entry where (Nx-i,Ny-j,Nz-k) UNLESS that index is zero, in which case you use 0.
 
-                  For any cells with i index > Nx/2, these values are complex conjugates of the corresponding
-                  entry where (Nx-i,Ny-j,Nz-k) UNLESS that index is zero, in which case you use 0.
+              e.g. for an 8^3 domain, any cell with i index
 
-                  e.g. for an 8^3 domain, any cell with i index
+              Cell (6,2,3) is complex conjugate of (2,6,5)
 
-                  Cell (6,2,3) is complex conjugate of (2,6,5)
+              Cell (4,1,0) is complex conjugate of (4,7,0)  (note that the FFT is computed for 0 <= i <= Nx/2)
 
-                  Cell (4,1,0) is complex conjugate of (4,7,0)  (note that the FFT is computed for 0 <= i <= Nx/2)
-                */
-                if (i <= bx.length(0)/2) {
-                    // copy value
-                    realpart(i,j,k) = spectral(i,j,k).real();
-                    imagpart(i,j,k) = spectral(i,j,k).imag();
-                } else {
-                    // copy complex conjugate
-                    int iloc = bx.length(0)-i;
-                    int jloc, kloc;
-                    if (is_flattened) {
-#if (AMREX_SPACEDIM == 2)
-                        jloc = 0;
-#elif (AMREX_SPACEDIM == 3)
-                        jloc = (j == 0) ? 0 : bx.length(1)-j;
-#endif
-                        kloc = 0;
+              The analogy extends for the chopped_in_y and z directions
+            */
+
+            if (chopped_in_x) {
+                amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+                {
+                    if (i <= bx.length(0)/2) {
+                        // copy value
+                        realpart(i,j,k) = spectral(i,j,k).real();
+                        imagpart(i,j,k) = spectral(i,j,k).imag();
                     } else {
-                        jloc = (j == 0) ? 0 : bx.length(1)-j;
+                        // copy complex conjugate
+                        int iloc = bx.length(0)-i;
+                        int jloc = (j == 0) ? 0 : bx.length(1)-j;
 #if (AMREX_SPACEDIM == 2)
-                        kloc = 0;
+                        int kloc = 0;
 #elif (AMREX_SPACEDIM == 3)
-                        kloc = (k == 0) ? 0 : bx.length(2)-k;
+                        int kloc = (k == 0) ? 0 : bx.length(2)-k;
 #endif
+                        if (unpack) {
+                            realpart(i,j,k) =  spectral(iloc,jloc,kloc).real();
+                            imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag();
+                        }
+                        else {
+                            realpart(i,j,k) =  0.0;
+                            imagpart(i,j,k) =  0.0;
+                        }
                     }
 
-                    if (unpack) {
-                        realpart(i,j,k) =  spectral(iloc,jloc,kloc).real();
-                        imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag();
-                    }
-                    else {
-                        realpart(i,j,k) =  0.0;
-                        imagpart(i,j,k) =  0.0;
-                    }
-                }
-
-                realpart(i,j,k) /= sqrtnpts;
-                imagpart(i,j,k) /= sqrtnpts;
-            });
-
-            /*
-            amrex::ParallelFor(bx,
-            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-            {
-                std::cout << "HACKFFT " << i << " " << j << " " << k << " "
-                          << realpart(i,j,k) << " + " << imagpart(i,j,k) << "i"
-                          << std::endl;
-            });
-            */
-        }
-
-        variables_dft_real.ParallelCopy(variables_dft_real_onegrid,0,comp,1);
-        variables_dft_imag.ParallelCopy(variables_dft_imag_onegrid,0,comp,1);
-
-    }
-
-    // destroy fft plan
-    for (int i = 0; i < forward_plan.size(); ++i) {
-#ifdef AMREX_USE_CUDA
-        cufftDestroy(forward_plan[i]);
-#elif AMREX_USE_HIP
-        rocfft_plan_destroy(forward_plan[i]);
-#else
-        fftw_destroy_plan(forward_plan[i]);
-#endif
-    }
-//    fftw_mpi_cleanup();
-}
-
-void StructFact::InverseFFT(MultiFab& variables,
-			    const MultiFab& variables_dft_real, 
-			    const MultiFab& variables_dft_imag,
-			    const Geometry& geom)
-{
-
-    BL_PROFILE_VAR("StructFact::InverseFFT()", InverseFFT);
-
-#ifdef AMREX_USE_CUDA
-    // Print() << "Using cuFFT\n";
-#elif AMREX_USE_HIP
-    // Print() << "Using rocFFT\n";
-#else
-    // Print() << "Using FFTW\n";
-#endif
-
-    bool is_flattened = false;
-
-    long npts;
-
-    // Initialize the boxarray "ba_onegrid" from the single box "domain"
-    BoxArray ba_onegrid;
-    {
-      Box domain = geom.Domain();
-      ba_onegrid.define(domain);
-
-      if (domain.bigEnd(AMREX_SPACEDIM-1) == 0) {
-          is_flattened = true;
-      }
+                    realpart(i,j,k) /= sqrtnpts;
+                    imagpart(i,j,k) /= sqrtnpts;
+                });
+            }
 
+            if (chopped_in_y) {
+                amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+                {
+                    if (j <= bx.length(1)/2) {
+                        // copy value
+                        realpart(i,j,k) = spectral(i,j,k).real();
+                        imagpart(i,j,k) = spectral(i,j,k).imag();
+                    } else {
+                        // copy complex conjugate
+                        int iloc = (i == 0) ? 0 : bx.length(0)-i;
+                        int jloc = bx.length(1)-j;
 #if (AMREX_SPACEDIM == 2)
-      npts = (domain.length(0)*domain.length(1));
+                        int kloc = 0;
 #elif (AMREX_SPACEDIM == 3)
-      npts = (domain.length(0)*domain.length(1)*domain.length(2));
-#endif
-
-    }
-
-    Real sqrtnpts = std::sqrt(npts);
-
-    DistributionMapping dmap_onegrid(ba_onegrid);
-
-    // we will take one FFT at a time and copy the answer into the
-    // corresponding component
-    MultiFab variables_onegrid;
-    variables_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
-    
-    MultiFab variables_dft_real_onegrid;
-    MultiFab variables_dft_imag_onegrid;
-    variables_dft_real_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
-    variables_dft_imag_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
-
-//    fftw_mpi_init();
-
-#ifdef AMREX_USE_CUDA
-    using FFTplan = cufftHandle;
-    using FFTcomplex = cuDoubleComplex;
-#elif AMREX_USE_HIP
-    using FFTplan = rocfft_plan;
-    using FFTcomplex = double2;
-#else
-    using FFTplan = fftw_plan;
-    using FFTcomplex = fftw_complex;
+                        int kloc = (k == 0) ? 0 : bx.length(2)-k;
 #endif
+                        if (unpack) {
+                            realpart(i,j,k) =  spectral(iloc,jloc,kloc).real();
+                            imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag();
+                        }
+                        else {
+                            realpart(i,j,k) =  0.0;
+                            imagpart(i,j,k) =  0.0;
+                        }
+                    }
 
-    // contain to store FFT - note it is shrunk by "half" in x
-    Vector<std::unique_ptr<BaseFab<GpuComplex<Real> > > > spectral_field;
-
-    Vector<FFTplan> backward_plan;
-
-    // for CUDA builds we only need to build the plan once; track whether we did
-    bool built_plan = false;
-    
-    for (int comp=0; comp<variables_dft_real.nComp(); comp++) {
-
-        // build spectral field from multifabs
-        variables_dft_real_onegrid.ParallelCopy(variables_dft_real,comp,0,1);
-        variables_dft_imag_onegrid.ParallelCopy(variables_dft_imag,comp,0,1);
-        
-        for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) {
-
-            // grab a single box including ghost cell range
-            Box realspace_bx = mfi.fabbox();
-
-            // size of box including ghost cell range
-            IntVect fft_size = realspace_bx.length(); // This will be different for hybrid FFT
-
-            // this is the size of the box, except the 0th component is 'halved plus 1'
-            IntVect spectral_bx_size = fft_size;
-            spectral_bx_size[0] = fft_size[0]/2 + 1;
-
-            // spectral box
-            Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1));
-
-            spectral_field.emplace_back(new BaseFab<GpuComplex<Real> >(spectral_bx,1,
-                                                                   The_Device_Arena()));
-            spectral_field.back()->setVal<RunOn::Device>(0.0); // touch the memory
-
-            Array4< GpuComplex<Real> > spectral = (*spectral_field[0]).array();
-            Array4<Real> const& realpart = variables_dft_real_onegrid.array(mfi);
-            Array4<Real> const& imagpart = variables_dft_imag_onegrid.array(mfi);
-
-            Box bx = mfi.fabbox();
-
-            amrex::ParallelFor(bx, 
-            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-            {
-                if (i <= bx.length(0)/2) {
-                    GpuComplex<Real> copy(realpart(i,j,k),imagpart(i,j,k));
-                    spectral(i,j,k) = copy;
-                }
-            });
-        }
-
-        // build FFTplan if necessary
-        for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) {
-            
-            if (!built_plan) {
-
-                Box realspace_bx = mfi.fabbox();
-
-                IntVect fft_size = realspace_bx.length();
-
-                FFTplan bplan;
+                    realpart(i,j,k) /= sqrtnpts;
+                    imagpart(i,j,k) /= sqrtnpts;
+                });
+            }
 
-#ifdef AMREX_USE_CUDA // CUDA
-                if (is_flattened) {
-#if (AMREX_SPACEDIM == 2)
-                    cufftResult result = cufftPlan1d(&bplan, fft_size[0], CUFFT_Z2D, 1);
-                    if (result != CUFFT_SUCCESS) {
-                        amrex::AllPrint() << " cufftplan1d forward failed! Error: "
-                                          << cufftErrorToString(result) << "\n";
-                    }
-#elif (AMREX_SPACEDIM == 3)
-                    cufftResult result = cufftPlan2d(&bplan, fft_size[1], fft_size[0], CUFFT_Z2D);
-                    if (result != CUFFT_SUCCESS) {
-                        amrex::AllPrint() << " cufftplan2d forward failed! Error: "
-                                          << cufftErrorToString(result) << "\n";
-                    }
-#endif
-                } else {
-#if (AMREX_SPACEDIM == 2)
-                    cufftResult result = cufftPlan2d(&bplan, fft_size[1], fft_size[0], CUFFT_Z2D);
-                    if (result != CUFFT_SUCCESS) {
-                        amrex::AllPrint() << " cufftplan2d forward failed! Error: "
-                                          << cufftErrorToString(result) << "\n";
-                    }
-#elif (AMREX_SPACEDIM == 3)
-                    cufftResult result = cufftPlan3d(&bplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D);
-                    if (result != CUFFT_SUCCESS) {
-                        amrex::AllPrint() << " cufftplan3d forward failed! Error: "
-                                          << cufftErrorToString(result) << "\n";
+            if (chopped_in_z) {
+                amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+                {
+                    if (k <= bx.length(2)/2) {
+                        // copy value
+                        realpart(i,j,k) = spectral(i,j,k).real();
+                        imagpart(i,j,k) = spectral(i,j,k).imag();
+                    } else {
+                        // copy complex conjugate
+                        int iloc = (i == 0) ? 0 : bx.length(0)-i;
+                        int jloc = (j == 0) ? 0 : bx.length(1)-j;
+                        int kloc = bx.length(2)-k;
+
+                        if (unpack) {
+                            realpart(i,j,k) =  spectral(iloc,jloc,kloc).real();
+                            imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag();
+                        }
+                        else {
+                            realpart(i,j,k) =  0.0;
+                            imagpart(i,j,k) =  0.0;
+                        }
                     }
-#endif
-                }
-#elif AMREX_USE_HIP // HIP
-                if (is_flattened) {
-#if (AMREX_SPACEDIM == 2)
-                    const std::size_t lengths[] = {std::size_t(fft_size[0])};
-                    rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, 
-                                                              rocfft_transform_type_real_inverse, rocfft_precision_double,
-                                                              1, lengths, 1, nullptr);
-                    assert_rocfft_status("rocfft_plan_create", result);
-#elif (AMREX_SPACEDIM == 3)
-                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])};
-                    rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, 
-                                                              rocfft_transform_type_real_inverse, rocfft_precision_double,
-                                                              2, lengths, 1, nullptr);
-                    assert_rocfft_status("rocfft_plan_create", result);
-#endif
-                } else {
-#if (AMREX_SPACEDIM == 2)
-                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])};
-                    rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, 
-                                                              rocfft_transform_type_real_inverse, rocfft_precision_double,
-                                                              2, lengths, 1, nullptr);
-                    assert_rocfft_status("rocfft_plan_create", result);
-#elif (AMREX_SPACEDIM == 3)
-                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])};
-                    rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, 
-                                                              rocfft_transform_type_real_inverse, rocfft_precision_double,
-                                                              3, lengths, 1, nullptr);
-                    assert_rocfft_status("rocfft_plan_create", result);
-#endif
-                }
-#else // host
-
-                if (is_flattened) {
-#if (AMREX_SPACEDIM == 2)
-                    bplan = fftw_plan_dft_c2r_1d(fft_size[0],
-                                                 reinterpret_cast<FFTcomplex*>
-                                                 (spectral_field.back()->dataPtr()),
-                                                 variables_onegrid[mfi].dataPtr(),
-                                                 FFTW_ESTIMATE);
-#elif (AMREX_SPACEDIM == 3)
-                    bplan = fftw_plan_dft_c2r_2d(fft_size[1], fft_size[0],
-                                                 reinterpret_cast<FFTcomplex*>
-                                                 (spectral_field.back()->dataPtr()),
-                                                 variables_onegrid[mfi].dataPtr(),
-                                                 FFTW_ESTIMATE);
-#endif
-                } else {
-#if (AMREX_SPACEDIM == 2)
-                    bplan = fftw_plan_dft_c2r_2d(fft_size[1], fft_size[0],
-                                                 reinterpret_cast<FFTcomplex*>
-                                                 (spectral_field.back()->dataPtr()),
-                                                 variables_onegrid[mfi].dataPtr(),
-                                                 FFTW_ESTIMATE);
-#elif (AMREX_SPACEDIM == 3)
-                    bplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0],
-                                                 reinterpret_cast<FFTcomplex*>
-                                                 (spectral_field.back()->dataPtr()),
-                                                 variables_onegrid[mfi].dataPtr(),
-                                                 FFTW_ESTIMATE);
-#endif
-                }
-#endif
 
-                backward_plan.push_back(bplan);
+                    realpart(i,j,k) /= sqrtnpts;
+                    imagpart(i,j,k) /= sqrtnpts;
+                });
             }
-	        
-            built_plan = true;
-        
-        } // end MFITer
-
-        ParallelDescriptor::Barrier();
-
-        // InverseTransform
-        for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) {
-            int i = mfi.LocalIndex();
-#ifdef AMREX_USE_CUDA
-            cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream());
-            cufftResult result = cufftExecZ2D(backward_plan[i],
-                                              reinterpret_cast<FFTcomplex*>
-                                                  (spectral_field[i]->dataPtr()),
-                                              variables_onegrid[mfi].dataPtr());
-            if (result != CUFFT_SUCCESS) {
-                amrex::AllPrint() << " forward transform using cufftExec failed! Error: "
-                                  << cufftErrorToString(result) << "\n";
-	        }
-#elif AMREX_USE_HIP
-            rocfft_execution_info execinfo = nullptr;
-            rocfft_status result = rocfft_execution_info_create(&execinfo);
-            assert_rocfft_status("rocfft_execution_info_create", result);
-
-            std::size_t buffersize = 0;
-            result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize);
-            assert_rocfft_status("rocfft_plan_get_work_buffer_size", result);
-
-            void* buffer = amrex::The_Arena()->alloc(buffersize);
-            result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize);
-            assert_rocfft_status("rocfft_execution_info_set_work_buffer", result);
-
-            result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream());
-            assert_rocfft_status("rocfft_execution_info_set_stream", result);
-
-	        amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr();
-	        FFTcomplex* spectral_field_ptr = reinterpret_cast<FFTcomplex*>(spectral_field[i]->dataPtr());
-            result = rocfft_execute(backward_plan[i],
-                                    (void**) &spectral_field_ptr, // in
-                                    (void**) &variables_onegrid_ptr, // out
-                                    execinfo);
-            assert_rocfft_status("rocfft_execute", result);
-            amrex::Gpu::streamSynchronize();
-            amrex::The_Arena()->free(buffer);
-            result = rocfft_execution_info_destroy(execinfo);
-            assert_rocfft_status("rocfft_execution_info_destroy", result);
-#else
-            fftw_execute(backward_plan[i]);
-#endif
-        }
 
-        variables_onegrid.mult(1.0/sqrtnpts);
-        variables.ParallelCopy(variables_onegrid,0,comp,1);
-    }
+        } // end MFIter
 
-    // destroy fft plan
-    for (int i = 0; i < backward_plan.size(); ++i) {
-#ifdef AMREX_USE_CUDA
-        cufftDestroy(backward_plan[i]);
-#elif AMREX_USE_HIP
-        rocfft_plan_destroy(backward_plan[i]);
-#else
-        fftw_destroy_plan(backward_plan[i]);
-#endif
-    }
-
-//    fftw_mpi_cleanup();
+        variables_dft_real.ParallelCopy(variables_dft_real_onegrid,0,comp,1);
+        variables_dft_imag.ParallelCopy(variables_dft_imag_onegrid,0,comp,1);
 
+    }
 }
 
-
 void StructFact::WritePlotFile(const int step, const Real time, const Geometry& geom,
                                std::string plotfile_base,
                                const int& zero_avg) {
@@ -1188,7 +525,7 @@ void StructFact::WritePlotFile(const int step, const Real time, const Geometry&
   MultiFab::Copy(cov_imag_temp, cov_imag, 0, 0, NCOV, 0);
 
   // Finalize covariances - scale & compute magnitude
-  Finalize(cov_real_temp, cov_imag_temp, geom, zero_avg);
+  Finalize(cov_real_temp, cov_imag_temp, zero_avg);
 
   //////////////////////////////////////////////////////////////////////////////////
   // Write out structure factor magnitude to plot file
@@ -1208,24 +545,8 @@ void StructFact::WritePlotFile(const int step, const Real time, const Geometry&
   
   MultiFab::Copy(plotfile, cov_mag, 0, 0, NCOV, 0); // copy structure factor into plotfile
 
-  Real dx = geom.CellSize(0);
-  Real pi = 3.1415926535897932;
-  Box domain = geom.Domain();
-
-  RealBox real_box({AMREX_D_DECL(-pi/dx,-pi/dx,-pi/dx)},
-                   {AMREX_D_DECL( pi/dx, pi/dx, pi/dx)});
-  
-  // check bc_vel_lo/hi to determine the periodicity
-  Vector<int> is_periodic(AMREX_SPACEDIM,0);  // set to 0 (not periodic) by default
-  for (int i=0; i<AMREX_SPACEDIM; ++i) {
-      is_periodic[i] = geom.isPeriodic(i);
-  }
-
-  Geometry geom2;
-  geom2.define(domain,&real_box,CoordSys::cartesian,is_periodic.data());
-    
   // write a plotfile
-  WriteSingleLevelPlotfile(plotfilename1,plotfile,varNames,geom2,time,step);
+  WriteSingleLevelPlotfile(plotfilename1,plotfile,varNames,geom,time,step);
   
   //////////////////////////////////////////////////////////////////////////////////
   // Write out real and imaginary components of structure factor to plot file
@@ -1258,18 +579,18 @@ void StructFact::WritePlotFile(const int step, const Real time, const Geometry&
   MultiFab::Copy(plotfile,cov_imag_temp,0,NCOV,NCOV,0);
 
   // write a plotfile
-  WriteSingleLevelPlotfile(plotfilename2,plotfile,varNames,geom2,time,step);
+  WriteSingleLevelPlotfile(plotfilename2,plotfile,varNames,geom,time,step);
 }
 
 void StructFact::Finalize(MultiFab& cov_real_in, MultiFab& cov_imag_in,
-                          const Geometry& geom, const int& zero_avg) {
+                          const int& zero_avg) {
 
   BL_PROFILE_VAR("StructFact::Finalize()",StructFactFinalize);
   
   Real nsamples_inv = 1.0/(Real)nsamples;
   
-  ShiftFFT(cov_real_in,geom,zero_avg);
-  ShiftFFT(cov_imag_in,geom,zero_avg);
+  ShiftFFT(cov_real_in,zero_avg);
+  ShiftFFT(cov_imag_in,zero_avg);
 
   cov_real_in.mult(nsamples_inv);
   for (int d=0; d<NCOV; d++) {
@@ -1290,8 +611,8 @@ void StructFact::Finalize(MultiFab& cov_real_in, MultiFab& cov_imag_in,
 }
 
 // Finalize covariances - scale & compute magnitude
-void StructFact::CallFinalize( const Geometry& geom,
-                               const int& zero_avg) {
+void StructFact::CallFinalize(const int& zero_avg)
+{
   
   BL_PROFILE_VAR("CallFinalize()",CallFinalize);
 
@@ -1305,12 +626,10 @@ void StructFact::CallFinalize( const Geometry& geom,
   MultiFab::Copy(cov_imag_temp, cov_imag, 0, 0, NCOV, 0);
 
   // Finalize covariances - scale & compute magnitude
-  Finalize(cov_real_temp, cov_imag_temp, geom, zero_avg);
+  Finalize(cov_real_temp, cov_imag_temp, zero_avg);
 }
 
-
-
-void StructFact::ShiftFFT(MultiFab& dft_out, const Geometry& geom, const int& zero_avg) {
+void StructFact::ShiftFFT(MultiFab& dft_out, const int& zero_avg) {
 
   BL_PROFILE_VAR("StructFact::ShiftFFT()",ShiftFFT);
 
@@ -1326,7 +645,7 @@ void StructFact::ShiftFFT(MultiFab& dft_out, const Geometry& geom, const int& ze
   */
   BoxArray ba_onegrid;
   {
-      Box domain = geom.Domain();
+      Box domain = dft_out.boxArray().minimalBox();
       
       // Initialize the boxarray "ba" from the single box "bx"
       ba_onegrid.define(domain);
@@ -1347,11 +666,11 @@ void StructFact::ShiftFFT(MultiFab& dft_out, const Geometry& geom, const int& ze
 	      const Box& bx = mfi.tilebox();
 	      const Array4<Real>& dft_temp = dft_onegrid_temp.array(mfi);
 	      amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept                                                                                                
-              {                                                                                                                                                                         
+              {
   		  if (i == 0 && j == 0 && k == 0) {
 		      dft_temp(i,j,k) = 0.;
 		  }
-	      });                                                                                                                                                                       
+              });
 	  }
       }
     
@@ -1390,7 +709,7 @@ void StructFact::ShiftFFT(MultiFab& dft_out, const Geometry& geom, const int& ze
 }
 
 // integrate cov_mag over k shells
-void StructFact::IntegratekShells(const int& step, const Geometry& /*geom*/, const std::string& name) {
+void StructFact::IntegratekShells(const int& step, const std::string& name) {
 
     BL_PROFILE_VAR("StructFact::IntegratekShells",IntegratekShells);
 
@@ -1536,132 +855,9 @@ void StructFact::IntegratekShells(const int& step, const Geometry& /*geom*/, con
         }
     }
 }
-    
-void StructFact::IntegratekShellsDecomp(const int& step, 
-                                        const amrex::Geometry& /*geom*/, 
-                                        const std::string& name_sol, 
-                                        const std::string& name_dil)
-{
-    BL_PROFILE_VAR("StructFact::IntegratekShellsDecomp",IntegratekShellsDecomp);
-
-    GpuArray<int,AMREX_SPACEDIM> center;
-    for (int d=0; d<AMREX_SPACEDIM; ++d) {
-        center[d] = n_cells[d]/2;
-    }
-
-    //int npts = n_cells[0]/2-1;
-    int npts = n_cells[0]/2;
-    //int npts_sq = npts*npts;
-
-    Gpu::DeviceVector<Real> phisum_sol_device(npts);
-    Gpu::DeviceVector<Real> phisum_dil_device(npts);
-    Gpu::DeviceVector<int>  phicnt_device(npts);
-
-    Gpu::HostVector<Real> phisum_sol_host(npts);
-    Gpu::HostVector<Real> phisum_dil_host(npts);
-    
-    Real* phisum_sol_ptr = phisum_sol_device.dataPtr();  // pointer to data
-    Real* phisum_dil_ptr = phisum_dil_device.dataPtr();  // pointer to data
-    int*  phicnt_ptr = phicnt_device.dataPtr();  // pointer to data
-
-    amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept
-    {
-      phisum_sol_ptr[d] = 0.;
-      phisum_dil_ptr[d] = 0.;
-      phicnt_ptr[d] = 0;
-    });
-    
-    // only consider cells that are within 15k of the center point
-    
-    for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
-        
-        const Box& bx = mfi.tilebox();
-
-        const Array4<Real> & cov = cov_mag.array(mfi);
-
-        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-        {
-            int ilen = amrex::Math::abs(i-center[0]);
-            int jlen = amrex::Math::abs(j-center[1]);
-            int klen = (AMREX_SPACEDIM == 3) ? amrex::Math::abs(k-center[2]) : 0;
-
-            Real dist = (ilen*ilen + jlen*jlen + klen*klen);
-            dist = std::sqrt(dist);
-            
-            if ( dist <= center[0]-0.5) {
-	            dist = dist+0.5;
-                int cell = int(dist);
-                for (int d=0; d<AMREX_SPACEDIM; ++d) {
-		            amrex::HostDevice::Atomic::Add(&(phisum_sol_ptr[cell]), cov(i,j,k,d));
-		            amrex::HostDevice::Atomic::Add(&(phisum_dil_ptr[cell]), cov(i,j,k,d+3));
-                }
-		        amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1);
-            }
-        });
-    }
-        
-    for (int d=1; d<npts; ++d) {
-        ParallelDescriptor::ReduceRealSum(phisum_sol_device[d]);
-        ParallelDescriptor::ReduceRealSum(phisum_dil_device[d]);
-        ParallelDescriptor::ReduceIntSum(phicnt_device[d]);
-    }
-
-    Real dk = 1.;
-    
-#if (AMREX_SPACEDIM == 2)
-    amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept
-    {
-        if (d != 0) {
-	        phisum_sol_ptr[d] *= 2.*M_PI*(d*dk+.5*dk*dk)/phicnt_ptr[d];
-	        phisum_dil_ptr[d] *= 2.*M_PI*(d*dk+.5*dk*dk)/phicnt_ptr[d];
-        }
-    });
-#else
-    amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept
-    {
-        if (d != 0) {
-	        phisum_sol_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d];
-	        phisum_dil_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d];
-	    }
-    });
-#endif
-
-    Gpu::copy(Gpu::deviceToHost, phisum_sol_device.begin(), 
-              phisum_sol_device.end(), phisum_sol_host.begin());
-
-    Gpu::copy(Gpu::deviceToHost, phisum_dil_device.begin(), 
-              phisum_dil_device.end(), phisum_dil_host.begin());
-    
-    if (ParallelDescriptor::IOProcessor()) {
-        {
-            std::ofstream turb;
-            std::string turbBaseName = "turb_";
-            turbBaseName += name_sol;
-            std::string turbName = Concatenate(turbBaseName,step,7);
-            turbName += ".txt";
-            
-            turb.open(turbName);
-            for (int d=1; d<npts; ++d) {
-                turb << d << " " << phisum_sol_host[d] << std::endl;
-            }
-        }
-        {
-            std::ofstream turb;
-            std::string turbBaseName = "turb_";
-            turbBaseName += name_dil;
-            std::string turbName = Concatenate(turbBaseName,step,7);
-            turbName += ".txt";
-            
-            turb.open(turbName);
-            for (int d=1; d<npts; ++d) {
-                turb << d << " " << phisum_dil_host[d] << std::endl;
-            }
-        }
-    }
-}
 
 // integrate cov_mag over k shells for scalar qtys
-void StructFact::IntegratekShellsScalar(const int& step, const Geometry& /*geom*/, const amrex::Vector< std::string >& names) {
+void StructFact::IntegratekShellsScalar(const int& step, const amrex::Vector< std::string >& names) {
 
     BL_PROFILE_VAR("StructFact::IntegratekShellsMisc",IntegratekShellsMisc);
 
@@ -1772,7 +968,7 @@ void StructFact::IntegratekShellsScalar(const int& step, const Geometry& /*geom*
     }
 }
 
-void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const Geometry& geom, const int& zero_avg) {
+void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const int& zero_avg) {
 
     BL_PROFILE_VAR("StructFact::AddToExternal",AddToExternal);
 
@@ -1789,7 +985,7 @@ void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const Geom
     MultiFab::Copy(cov_imag_temp, cov_imag, 0, 0, NCOV, 0);
 
     // Finalize covariances - scale & compute magnitude
-    Finalize(cov_real_temp, cov_imag_temp, geom, zero_avg);
+    Finalize(cov_real_temp, cov_imag_temp, zero_avg);
 
     nPlot = NCOV;
     plotfile.define(cov_mag.boxArray(), cov_mag.DistributionMap(), nPlot, 0);
@@ -1804,137 +1000,6 @@ void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const Geom
 
 }
 
-
-void StructFact::DecomposeVelFourier(const amrex::MultiFab& vel_dft_real, 
-                                     const amrex::MultiFab& vel_dft_imag, 
-                                     const amrex::Geometry& geom)
-{
-    BL_PROFILE_VAR("StructFact::DecomposeVelFourier",DecomposeVelFourier);
-
-    const BoxArray& ba = vel_sol_real.boxArray();
-    const DistributionMapping& dm = vel_sol_real.DistributionMap();
-    MultiFab dft_real, dft_imag;
-    dft_real.define(ba, dm, 3, 0);
-    dft_imag.define(ba, dm, 3, 0);
-    dft_real.ParallelCopy(vel_dft_real,0,0,3);
-    dft_imag.ParallelCopy(vel_dft_imag,0,0,3);
-    
-    const GpuArray<Real, AMREX_SPACEDIM> dx = geom.CellSizeArray();
-
-    for (MFIter mfi(dft_real); mfi.isValid(); ++mfi) {
-        
-        Box bx = mfi.fabbox();
-
-        Array4<const Real> const& real = dft_real.array(mfi);
-        Array4<const Real> const& imag = dft_imag.array(mfi);
-        
-        Array4<      Real> const& real_sol = vel_sol_real.array(mfi);
-        Array4<      Real> const& imag_sol = vel_sol_imag.array(mfi);
-        
-        Array4<      Real> const& real_dil = vel_dil_real.array(mfi);
-        Array4<      Real> const& imag_dil = vel_dil_imag.array(mfi);
-
-        amrex::ParallelFor(bx,
-        [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-        {
-            int nx = bx.length(0);
-            int ny = bx.length(1);
-            int nz = bx.length(2);
-
-            Real GxR, GxC, GyR, GyC, GzR, GzC;
-            
-            if (i <= bx.length(0)/2) { 
-                // Gradient Operators
-                GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0];
-                GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0];
-                GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1];
-                GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1];
-                GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2];
-                GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2];
-            }
-            else { // conjugate
-                // Gradient Operators
-                GxR = (cos(2.0*M_PI*(nx-i)/nx)-1.0)/dx[0];
-                GxC = (sin(2.0*M_PI*(nx-i)/nx)-0.0)/dx[0];
-                GyR = (cos(2.0*M_PI*(ny-j)/ny)-1.0)/dx[1];
-                GyC = (sin(2.0*M_PI*(ny-j)/ny)-0.0)/dx[1];
-                GzR = (cos(2.0*M_PI*(nz-k)/nz)-1.0)/dx[2];
-                GzC = (sin(2.0*M_PI*(nz-k)/nz)-0.0)/dx[2];
-            }
-
-            // Inverse Laplacian
-            Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC;
-
-            // Divergence of vel
-            Real divR = real(i,j,k,0)*GxR - imag(i,j,k,0)*GxC +
-                        real(i,j,k,1)*GyR - imag(i,j,k,1)*GyC +
-                        real(i,j,k,2)*GzR - imag(i,j,k,2)*GzC ;
-            Real divC = real(i,j,k,0)*GxC + imag(i,j,k,0)*GxR +
-                        real(i,j,k,1)*GyC + imag(i,j,k,1)*GyR +
-                        real(i,j,k,2)*GzC + imag(i,j,k,2)*GzR ;
-
-            if (Lap < 1.0e-12) { // zero mode for no bulk motion
-                real_dil(i,j,k,0) = 0.0;
-                real_dil(i,j,k,1) = 0.0;
-                real_dil(i,j,k,2) = 0.0;
-                imag_dil(i,j,k,0) = 0.0;
-                imag_dil(i,j,k,1) = 0.0;
-                imag_dil(i,j,k,2) = 0.0;
-            }
-            else {
-                // Dilatational velocity 
-                real_dil(i,j,k,0) = (divR*GxR + divC*GxC) / Lap;
-                real_dil(i,j,k,1) = (divR*GyR + divC*GyC) / Lap;
-                real_dil(i,j,k,2) = (divR*GzR + divC*GzC) / Lap;
-                imag_dil(i,j,k,0) = (divC*GxR - divR*GxC) / Lap;
-                imag_dil(i,j,k,1) = (divC*GyR - divR*GyC) / Lap;
-                imag_dil(i,j,k,2) = (divC*GzR - divR*GzC) / Lap;
-                
-                // Solenoidal velocity
-                real_sol(i,j,k,0) = real(i,j,k,0) - real_dil(i,j,k,0);
-                real_sol(i,j,k,1) = real(i,j,k,1) - real_dil(i,j,k,1); 
-                real_sol(i,j,k,2) = real(i,j,k,2) - real_dil(i,j,k,2);
-                imag_sol(i,j,k,0) = imag(i,j,k,0) - imag_dil(i,j,k,0);
-                imag_sol(i,j,k,1) = imag(i,j,k,1) - imag_dil(i,j,k,1);
-                imag_sol(i,j,k,2) = imag(i,j,k,2) - imag_dil(i,j,k,2);
-            }
-        });
-    }
-}
-
-void StructFact::GetDecompVel(MultiFab& vel_decomp, const Geometry& geom)
-{
-    BL_PROFILE_VAR("StructFact::GetDecompVel()", GetDecompVel);
-    
-    if (!decompose) 
-        amrex::Error("StructFact::GetDecompVel() is specific for vel decomposition in turbulence");
-
-    const BoxArray& ba_in = vel_decomp.boxArray();
-    const DistributionMapping& dmap_in = vel_decomp.DistributionMap();
-
-    MultiFab vel;
-    vel.define(ba_in, dmap_in, 3, 0);
-
-    const BoxArray& ba = vel_sol_real.boxArray();
-    const DistributionMapping& dm = vel_sol_real.DistributionMap();
-    MultiFab dft_real, dft_imag;
-    dft_real.define(ba, dm, 3, 0);
-    dft_imag.define(ba, dm, 3, 0);
-    
-    dft_real.ParallelCopy(vel_sol_real,0,0,3);
-    dft_imag.ParallelCopy(vel_sol_imag,0,0,3);
-
-    InverseFFT(vel, dft_real, dft_imag, geom);
-    vel_decomp.ParallelCopy(vel,0,0,3);
-
-    dft_real.ParallelCopy(vel_dil_real,0,0,3);
-    dft_imag.ParallelCopy(vel_dil_imag,0,0,3);
-
-    InverseFFT(vel, dft_real, dft_imag, geom);
-    vel_decomp.ParallelCopy(vel,0,3,3);
-
-}
-
 void StructFact::WriteCheckPoint(const int& step,
                                  std::string checkfile_base)
 {
diff --git a/src_analysis/TurbSpectra.H b/src_analysis/TurbSpectra.H
new file mode 100644
index 000000000..66a8ee524
--- /dev/null
+++ b/src_analysis/TurbSpectra.H
@@ -0,0 +1,43 @@
+#ifndef _TurbSpectraDistributed_H_
+#define _TurbSpectraDistributed_H_
+
+#include <AMReX.H>
+#include <AMReX_MultiFab.H>
+#include <AMReX_Vector.H>
+#include <AMReX_VisMF.H>
+
+#include <AMReX_GpuComplex.H>
+
+#include <string>
+
+#include "common_functions.H"
+
+#define ALIGN 16
+
+using namespace amrex;
+
+
+void IntegrateKScalar(const MultiFab& cov_mag,
+                      	    const std::string& name,
+                            const int& step,
+                            const int& comp);
+
+void IntegrateKVelocity(const MultiFab& cov_mag,
+                              const std::string& name,
+                              const int& step,
+                              const int& comp);
+
+void TurbSpectrumScalar(const MultiFab& variables, 
+                              const amrex::Geometry& geom, 
+                              const int& step, 
+                              const amrex::Vector<amrex::Real>& var_scaling,
+                              const amrex::Vector< std::string >& var_names);
+void TurbSpectrumVelDecomp(const MultiFab& vel,
+                                 MultiFab& vel_decomp,
+                                 const amrex::Geometry& geom,
+                                 const int& step,
+                                 const amrex::Real& var_scaling,
+                                 const amrex::Vector< std::string >& var_names);
+
+
+#endif
diff --git a/src_analysis/TurbSpectra.cpp b/src_analysis/TurbSpectra.cpp
new file mode 100644
index 000000000..e628c5ab1
--- /dev/null
+++ b/src_analysis/TurbSpectra.cpp
@@ -0,0 +1,452 @@
+#include <AMReX_FFT.H>
+
+#include "TurbSpectra.H"
+#include "common_functions.H"
+
+#include <AMReX_MultiFabUtil.H>
+#include "AMReX_PlotFileUtil.H"
+#include "AMReX_BoxArray.H"
+
+void TurbSpectrumScalar(const MultiFab& variables, 
+                        const amrex::Geometry& geom, 
+                        const int& step, 
+                        const amrex::Vector<amrex::Real>& scaling,
+                        const amrex::Vector< std::string >& var_names)
+{
+    BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar);
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), 
+        "TurbSpectrumScalar: must have same number variable names as components of input MultiFab");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), 
+        "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, 
+        "TurbSpectrumScalar: Must have one Box per MPI process");
+
+    int ncomp = variables.nComp();
+
+    Box domain = geom.Domain();
+    auto npts = domain.numPts();
+    Real sqrtnpts = std::sqrt(npts);
+    
+    amrex::FFT::R2C<Real,FFT::Direction::forward> r2c(geom.Domain());
+
+    auto const& [cba, cdm] = r2c.getSpectralDataLayout();
+
+    MultiFab cov(cba, cdm, ncomp, 0);
+    
+    for (int comp=0; comp<ncomp; ++comp) {    
+
+	MultiFab mf(variables, amrex::make_alias, comp, 1);
+	cMultiFab cmf(cba, cdm, 1, 0);
+
+	r2c.forward(mf,cmf);
+
+        // Fill in the covariance multifab
+        int comp_gpu = comp;
+        Real sqrtnpts_gpu = sqrtnpts;
+        Real scaling_i_gpu = scaling[comp];
+        std::string name_gpu = var_names[comp];
+        for (MFIter mfi(cov); mfi.isValid(); ++mfi) {
+            Array4<Real> const& data = cov.array(mfi);
+            Array4<const GpuComplex<Real> > spectral = cmf.const_array(mfi);
+            const Box& bx = mfi.validbox();
+            amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+            {
+                Real re = spectral(i,j,k).real();
+                Real im = spectral(i,j,k).imag();
+                data(i,j,k,comp_gpu) = (re*re + im*im)/(sqrtnpts_gpu*sqrtnpts_gpu*scaling_i_gpu);
+            });
+        }
+        
+        // Integrate spectra over k-shells
+        IntegrateKScalar(cov,name_gpu,step,comp_gpu);
+    }
+}
+
+
+void TurbSpectrumVelDecomp(const MultiFab& vel,
+                                 MultiFab& vel_decomp,
+                                 const amrex::Geometry& geom,
+                                 const int& step,
+                                 const amrex::Real& scaling,
+                                 const amrex::Vector< std::string >& var_names)
+{
+    BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp);
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, 
+        "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, 
+        "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, 
+        "TurbSpectrumVelDecomp: Must have one Box per MPI process");
+
+    const GpuArray<Real, AMREX_SPACEDIM> dx = geom.CellSizeArray();
+    
+    Box domain = geom.Domain();
+    auto npts = domain.numPts();
+    Real sqrtnpts = std::sqrt(npts);
+    
+    // get box array and distribution map of vel
+    DistributionMapping dm = vel.DistributionMap();
+    BoxArray ba            = vel.boxArray();
+    
+    amrex::FFT::R2C<Real,FFT::Direction::both> r2c(geom.Domain());
+
+    // box array and dmap for FFT
+    auto const& [cba, cdm] = r2c.getSpectralDataLayout();
+
+    // each MPI rank gets storage for its piece of the fft
+    cMultiFab spectral_field_Tx(cba,cdm,1,0); // totalx
+    cMultiFab spectral_field_Ty(cba,cdm,1,0); // totaly
+    cMultiFab spectral_field_Tz(cba,cdm,1,0); // totalz
+    cMultiFab spectral_field_Sx(cba,cdm,1,0); // solenoidalx
+    cMultiFab spectral_field_Sy(cba,cdm,1,0); // solenoidaly
+    cMultiFab spectral_field_Sz(cba,cdm,1,0); // solenoidalz
+    cMultiFab spectral_field_Dx(cba,cdm,1,0); // dilatationalx
+    cMultiFab spectral_field_Dy(cba,cdm,1,0); // dilatationaly
+    cMultiFab spectral_field_Dz(cba,cdm,1,0); // dilatationalz
+	
+    // ForwardTransform
+    // X
+    {
+	MultiFab vel_single(vel, amrex::make_alias, 0, 1);
+	r2c.forward(vel_single,spectral_field_Tx);
+    }
+    // Y
+    {
+	MultiFab vel_single(vel, amrex::make_alias, 1, 1);
+	r2c.forward(vel_single,spectral_field_Ty);
+    }
+    // Z
+    {
+	MultiFab vel_single(vel, amrex::make_alias, 2, 1);
+	r2c.forward(vel_single,spectral_field_Tz);
+    }
+    
+    // Decompose velocity field into solenoidal and dilatational
+    for (MFIter mfi(spectral_field_Tx); mfi.isValid(); ++mfi) {
+        Array4< GpuComplex<Real> > spectral_tx = spectral_field_Tx.array(mfi);
+        Array4< GpuComplex<Real> > spectral_ty = spectral_field_Ty.array(mfi);
+        Array4< GpuComplex<Real> > spectral_tz = spectral_field_Tz.array(mfi);
+        Array4< GpuComplex<Real> > spectral_sx = spectral_field_Sx.array(mfi);
+        Array4< GpuComplex<Real> > spectral_sy = spectral_field_Sy.array(mfi);
+        Array4< GpuComplex<Real> > spectral_sz = spectral_field_Sz.array(mfi);
+        Array4< GpuComplex<Real> > spectral_dx = spectral_field_Dx.array(mfi);
+        Array4< GpuComplex<Real> > spectral_dy = spectral_field_Dy.array(mfi);
+        Array4< GpuComplex<Real> > spectral_dz = spectral_field_Dz.array(mfi);
+        const Box& bx = mfi.validbox();
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+        {
+
+           int nx = n_cells[0]; 
+           int ny = n_cells[1]; 
+           int nz = n_cells[2];
+
+           Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0;
+           
+           if (i <= nx/2) { 
+               
+               // Get the wavevector
+               int ki = i;
+               int kj = j;
+               if (j >= ny/2) kj = ny - j;
+               int kk = k;
+               if (k >= nz/2) kk = nz - k;
+
+               // Gradient Operators
+               GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0];
+               GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0];
+               GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1];
+               GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1];
+               GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2];
+               GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2];
+
+               // Scale Total velocity FFT components
+               spectral_tx(i,j,k) *= (1.0/sqrtnpts);
+               spectral_ty(i,j,k) *= (1.0/sqrtnpts);
+               spectral_tz(i,j,k) *= (1.0/sqrtnpts);
+
+               // Inverse Laplacian
+               Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC;
+
+               // Divergence of vel
+               Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC +
+                           spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC +
+                           spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ;
+               Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR +
+                           spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR +
+                           spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ;
+
+               if (Lap < 1.0e-12) { // zero mode for no bulk motion
+                   spectral_dx(i,j,k) *= 0.0;
+                   spectral_dy(i,j,k) *= 0.0;
+                   spectral_dz(i,j,k) *= 0.0;
+               }
+               else {
+
+                   // Dilatational velocity 
+                   GpuComplex<Real> copy_dx((divR*GxR + divC*GxC) / Lap, 
+                                            (divC*GxR - divR*GxC) / Lap);
+                   spectral_dx(i,j,k) = copy_dx;
+
+                   GpuComplex<Real> copy_dy((divR*GyR + divC*GyC) / Lap,
+                                            (divC*GyR - divR*GyC) / Lap);
+                   spectral_dy(i,j,k) = copy_dy;
+
+                   GpuComplex<Real> copy_dz((divR*GzR + divC*GzC) / Lap,
+                                            (divC*GzR - divR*GzC) / Lap);
+                   spectral_dz(i,j,k) = copy_dz;
+               }
+                   
+               // Solenoidal velocity
+               spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k);
+               spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); 
+               spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k);
+           }
+           else { // conjugate
+                amrex::Abort("check the code; i should not go beyond bx.length(0)/2");
+           }
+
+        });
+    }
+    
+    MultiFab cov(cba, cdm, 3, 0); // total, solenoidal, dilatational
+    
+    // Fill in the covariance multifab
+    Real scaling_gpu = scaling;
+    for (MFIter mfi(cov); mfi.isValid(); ++mfi) {
+        Array4<Real> const& data = cov.array(mfi);
+        Array4<const GpuComplex<Real> > spec_tx = spectral_field_Tx.const_array(mfi);
+        Array4<const GpuComplex<Real> > spec_ty = spectral_field_Ty.const_array(mfi);
+        Array4<const GpuComplex<Real> > spec_tz = spectral_field_Tz.const_array(mfi);
+        Array4<const GpuComplex<Real> > spec_sx = spectral_field_Sx.const_array(mfi);
+        Array4<const GpuComplex<Real> > spec_sy = spectral_field_Sy.const_array(mfi);
+        Array4<const GpuComplex<Real> > spec_sz = spectral_field_Sz.const_array(mfi);
+        Array4<const GpuComplex<Real> > spec_dx = spectral_field_Dx.const_array(mfi);
+        Array4<const GpuComplex<Real> > spec_dy = spectral_field_Dy.const_array(mfi);
+        Array4<const GpuComplex<Real> > spec_dz = spectral_field_Dz.const_array(mfi);
+        const Box& bx = mfi.validbox();
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            if (i <= n_cells[0]/2) {
+                Real re_x, re_y, re_z, im_x, im_y, im_z;
+                
+                re_x = spec_tx(i,j,k).real();
+                im_x = spec_tx(i,j,k).imag();
+                re_y = spec_ty(i,j,k).real();
+                im_y = spec_ty(i,j,k).imag();
+                re_z = spec_tz(i,j,k).real();
+                im_z = spec_tz(i,j,k).imag();
+                data(i,j,k,0) = (re_x*re_x + im_x*im_x + 
+                                 re_y*re_y + im_y*im_y +
+                                 re_z*re_z + im_z*im_z)/(scaling_gpu);
+                re_x = spec_sx(i,j,k).real();
+                im_x = spec_sx(i,j,k).imag();
+                re_y = spec_sy(i,j,k).real();
+                im_y = spec_sy(i,j,k).imag();
+                re_z = spec_sz(i,j,k).real();
+                im_z = spec_sz(i,j,k).imag();
+                data(i,j,k,1) = (re_x*re_x + im_x*im_x + 
+                                 re_y*re_y + im_y*im_y +
+                                 re_z*re_z + im_z*im_z)/(scaling_gpu);
+                re_x = spec_dx(i,j,k).real();
+                im_x = spec_dx(i,j,k).imag();
+                re_y = spec_dy(i,j,k).real();
+                im_y = spec_dy(i,j,k).imag();
+                re_z = spec_dz(i,j,k).real();
+                im_z = spec_dz(i,j,k).imag();
+                data(i,j,k,2) = (re_x*re_x + im_x*im_x + 
+                                 re_y*re_y + im_y*im_y +
+                                 re_z*re_z + im_z*im_z)/(scaling_gpu);
+            }
+            else {
+                amrex::Abort("check the code; i should not go beyond n_cells[0]/2");
+            }
+        });
+    }
+
+    // Integrate K spectrum for velocities
+    IntegrateKVelocity(cov,"vel_total"     ,step,0);
+    IntegrateKVelocity(cov,"vel_solenoidal",step,1);
+    IntegrateKVelocity(cov,"vel_dilational",step,2);
+    
+    // inverse Fourier transform solenoidal and dilatational components 
+    {
+	MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 0, 1);
+	r2c.backward(spectral_field_Sx,vel_decomp_single);
+    }
+    {
+	MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 1, 1);
+	r2c.backward(spectral_field_Sy,vel_decomp_single);
+    }
+    {
+	MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 2, 1);
+	r2c.backward(spectral_field_Sz,vel_decomp_single);
+    }
+    {
+	MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 3, 1);
+	r2c.backward(spectral_field_Dx,vel_decomp_single);
+    }
+    {
+	MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 4, 1);
+	r2c.backward(spectral_field_Dy,vel_decomp_single);
+    }
+    {
+	MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 5, 1);
+	r2c.backward(spectral_field_Dz,vel_decomp_single);
+    }
+
+    vel_decomp.mult(1.0/sqrtnpts);
+
+}
+
+void IntegrateKScalar(const MultiFab& cov_mag,
+                      	    const std::string& name,
+                            const int& step,
+                            const int& comp)
+
+{
+    int npts = n_cells[0]/2;
+    
+    Gpu::DeviceVector<Real> phisum_device(npts, 0);
+    Gpu::DeviceVector<int>  phicnt_device(npts, 0);
+    Real* phisum_ptr = phisum_device.dataPtr();  // pointer to data
+    int*  phicnt_ptr = phicnt_device.dataPtr();  // pointer to data
+
+    int comp_gpu = comp;
+    int nx = n_cells[0]; 
+    int ny = n_cells[1]; 
+    int nz = n_cells[2];
+    for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+        const Box& bx = mfi.tilebox();
+
+        const Array4<const Real> & cov = cov_mag.const_array(mfi);
+
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            if (i <= n_cells[0]/2) {
+                int ki = i; 
+                int kj = j;
+                if (j >= ny/2) kj = ny - j;
+                int kk = k;
+                if (k >= nz/2) kk = nz - k;
+
+                Real dist = (ki*ki + kj*kj + kk*kk);
+                dist = std::sqrt(dist);
+                
+                if ( dist <=  n_cells[0]/2-0.5) {
+                    dist = dist+0.5;
+                    int cell = int(dist);
+                    amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu));
+                    amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1);
+                }
+            }
+            else {
+                amrex::Abort("check the code; i should not go beyond n_cells[0]/2");
+            }
+        });
+    }
+
+    Gpu::HostVector<Real> phisum_host(npts);
+    Gpu::HostVector<int>  phicnt_host(npts);
+    Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin());
+    Gpu::copyAsync(Gpu::deviceToHost, phicnt_device.begin(), phicnt_device.end(), phicnt_host.begin());
+    Gpu::streamSynchronize();
+
+    ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts);
+    ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts);
+        
+    Real dk = 1.;
+    for (int d = 1; d < npts; ++d) {
+        phisum_host[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_host[d];
+    }
+    
+    if (ParallelDescriptor::IOProcessor()) {
+        std::ofstream turb;
+        std::string turbBaseName = "turb_"+name;
+        std::string turbName = Concatenate(turbBaseName,step,7);
+        turbName += ".txt";
+        
+        turb.open(turbName);
+        for (int d=1; d<npts; ++d) {
+            turb << d << " " << phisum_host[d] << std::endl;
+        }
+        turb.close();
+    }
+}
+
+void IntegrateKVelocity(const MultiFab& cov_mag,
+                              const std::string& name,
+                              const int& step,
+                              const int& comp)
+
+{
+    int npts = n_cells[0]/2;
+    
+    Gpu::DeviceVector<Real> phisum_device(npts, 0);
+    Gpu::DeviceVector<int>  phicnt_device(npts, 0);
+    Real* phisum_ptr = phisum_device.dataPtr();  // pointer to data
+    int*  phicnt_ptr = phicnt_device.dataPtr();  // pointer to data
+
+    int comp_gpu = comp;
+    int nx = n_cells[0]; 
+    int ny = n_cells[1]; 
+    int nz = n_cells[2];
+    for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+        const Box& bx = mfi.tilebox();
+
+        const Array4<const Real> & cov = cov_mag.const_array(mfi);
+
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            if (i <= n_cells[0]/2) {
+                int ki = i; 
+                int kj = j;
+                if (j >= ny/2) kj = ny - j;
+                int kk = k;
+                if (k >= nz/2) kk = nz - k;
+
+                Real dist = (ki*ki + kj*kj + kk*kk);
+                dist = std::sqrt(dist);
+                
+                if ( dist <=  n_cells[0]/2-0.5) {
+                    dist = dist+0.5;
+                    int cell = int(dist);
+                    amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu));
+                    amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1);
+                }
+            }
+            else {
+                amrex::Abort("check the code; i should not go beyond n_cells[0]/2");
+            }
+        });
+    }
+    
+    Gpu::HostVector<Real> phisum_host(npts);
+    Gpu::HostVector<int>  phicnt_host(npts);
+    Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin());
+    Gpu::copyAsync(Gpu::deviceToHost, phicnt_device.begin(), phicnt_device.end(), phicnt_host.begin());
+    Gpu::streamSynchronize();
+
+    ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts);
+    ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts);
+        
+    Real dk = 1.;
+    for (int d = 1; d < npts; ++d) {
+        phisum_host[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_host[d];
+    }
+    
+    if (ParallelDescriptor::IOProcessor()) {
+        std::ofstream turb;
+        std::string turbBaseName = "turb_"+name;
+        std::string turbName = Concatenate(turbBaseName,step,7);
+        turbName += ".txt";
+        
+        turb.open(turbName);
+        for (int d=1; d<npts; ++d) {
+            turb << d << " " << phisum_host[d] << std::endl;
+        }
+        turb.close();
+    }
+}
+
diff --git a/src_chemistry/chemistry_functions.H b/src_chemistry/chemistry_functions.H
index 41a5703f7..a918400a3 100644
--- a/src_chemistry/chemistry_functions.H
+++ b/src_chemistry/chemistry_functions.H
@@ -19,4 +19,16 @@ void InitializeChemistryNamespace();
 // used in compressible code only
 void compute_compressible_chemistry_source_CLE(amrex::Real dt, amrex::Real dV,
                                                MultiFab& prim, MultiFab& source, MultiFab& ranchem);
+
+void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geometry& geom, const amrex::Real& dt, 
+                   const MultiFab& n_interm, Vector<Real>& lin_comb_coef_in, Real volume_factor_in=1.);
+
+AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray<Real,MAX_SPECIES>& n_in,
+                                                  GpuArray<Real,MAX_REACTION>& reaction_rates,
+                                                  const amrex::Real& dv);
+
+AMREX_GPU_HOST_DEVICE void sample_num_reactions(GpuArray<Real,MAX_SPECIES>& n_in,
+                                                GpuArray<Real,MAX_REACTION>& num_reactions,
+                                                GpuArray<Real,MAX_REACTION>& avg_num_reactions,
+                                                const amrex::RandomEngine& engine);
 #endif
diff --git a/src_chemistry/chemistry_functions.cpp b/src_chemistry/chemistry_functions.cpp
index 1bcd46524..46f93c02d 100644
--- a/src_chemistry/chemistry_functions.cpp
+++ b/src_chemistry/chemistry_functions.cpp
@@ -3,8 +3,6 @@
 
 AMREX_GPU_MANAGED int chemistry::nreaction;
 
-AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> chemistry::rate_const;
-
 // from the fortran code, stoich_coeffs_R = stoichiometric_factors(spec,1,reac)
 // from the fortran code, stoich_coeffs_P = stoichiometric_factors(spec,2,reac)
 // stoich_coeffs_PR = stoich_coeffs_P - stoich_coeffs_R
@@ -12,10 +10,29 @@ AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> chemistry::stoich_
 AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> chemistry::stoich_coeffs_P;
 AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> chemistry::stoich_coeffs_PR;
 
+// reaction rate constant for each reaction (assuming Law of Mass Action holds)
+// using rate_multiplier, reaction rates can be changed by the same factor
+// if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become
+// n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). 
+AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> chemistry::rate_const;
+AMREX_GPU_MANAGED amrex::Real chemistry::rate_multiplier;
+AMREX_GPU_MANAGED int chemistry::include_discrete_LMA_correction;
+
+// if n is positive, exclude species n (=solvent) when computing reaction rates
+// in this case, the concentration of the solvent is assumed to be constant,
+// which should be reflected on rate constants.
+// if 0, no species is excluded
+// e.g. U + S -> 2U, if exclude_solvent_comput_rates=0, rate=k*n_U*n_S
+//                   if exclude_solvent_comput_rates=2, rate=k_new*n_U where k_new=k*n_S
+AMREX_GPU_MANAGED int chemistry::exclude_solvent_comput_rates;
+
 // from the fortran code this was use_Poisson_rng (0=CLE; 1=tau leaping; -1=deterministic; 2=SSA)
 // here it's being used as reaction_type (0=deterministic; 1=CLE; 2=SSA; 3=tau leap)
 AMREX_GPU_MANAGED int chemistry::reaction_type;
 
+// use mole fraction based LMA
+AMREX_GPU_MANAGED int chemistry::use_mole_frac_LMA;
+
 // specific to compressible codes
 AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> chemistry::alpha_param;
 AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> chemistry::beta_param;
@@ -37,11 +54,6 @@ void InitializeChemistryNamespace()
     // if nreaction is set to zero or not defined in the inputs file, quit the routine
     if (nreaction==0) return;
 
-    // get rate constants
-    std::vector<amrex::Real> k_tmp(MAX_REACTION);
-    pp.getarr("rate_const",k_tmp,0,nreaction);
-    for (int m=0; m<nreaction; m++) rate_const[m] = k_tmp[m];
-
     // get stoich coeffs for reactants
     for (int m=0; m<nreaction; m++)
     {
@@ -75,9 +87,26 @@ void InitializeChemistryNamespace()
         for (int n=0; n<nspecies; n++)
             stoich_coeffs_PR(m,n) = stoich_coeffs_P(m,n)-stoich_coeffs_R(m,n);
 
+    // get rate constants
+    std::vector<amrex::Real> k_tmp(MAX_REACTION);
+    pp.getarr("rate_const",k_tmp,0,nreaction);
+    for (int m=0; m<nreaction; m++) rate_const[m] = k_tmp[m];
+
+    rate_multiplier = 1.;
+    pp.query("rate_multiplier",rate_multiplier);
+
+    include_discrete_LMA_correction = 0;
+    pp.query("include_discrete_LMA_correction",include_discrete_LMA_correction);
+
+    exclude_solvent_comput_rates = -1;
+    pp.query("exclude_solvent_comput_rates",exclude_solvent_comput_rates);
+    
     // get reaction type (0=deterministic; 1=CLE; 2=SSA; 3=tau leap)
     pp.get("reaction_type",reaction_type);
 
+    use_mole_frac_LMA = 0;
+    pp.query("use_mole_frac_LMA",use_mole_frac_LMA);
+
     // get alpha parameter for compressible code
     std::vector<amrex::Real> alpha_tmp(MAX_REACTION);
     pp.queryarr("alpha_param",alpha_tmp,0,nreaction);
@@ -162,3 +191,269 @@ void compute_compressible_chemistry_source_CLE(amrex::Real dt, amrex::Real dV,
         });
     }
 }
+
+
+void ChemicalRates(const MultiFab& n_cc, MultiFab& chem_rate, const amrex::Geometry& geom, const amrex::Real& dt, 
+                   const MultiFab& n_interm, Vector<Real>& lin_comb_coef_in, Real volume_factor_in)
+{
+    if (nreaction == 1) {
+        chem_rate.setVal(0.);
+        return;
+    }
+
+    int lin_comb_avg_react_rate = 1;
+    if (lin_comb_coef_in[0] == 1. && lin_comb_coef_in[1] == 0.) {
+        lin_comb_avg_react_rate = 0;
+    }
+
+    GpuArray<Real,2> lin_comb_coef;
+    lin_comb_coef[0] = lin_comb_coef_in[0];
+    lin_comb_coef[1] = lin_comb_coef_in[1];
+
+    const Real* dx = geom.CellSize();
+
+    Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2]*cell_depth : dx[0]*dx[1]*cell_depth;
+    dv *= volume_factor_in;
+    
+    for (MFIter mfi(n_cc); mfi.isValid(); ++mfi)
+    {
+        const Box& bx = mfi.validbox();
+
+        const Array4<const Real>& n_arr = n_cc.array(mfi);
+        const Array4<const Real>& n_int = n_interm.array(mfi);
+
+        const Array4<Real>& rate = chem_rate.array(mfi);
+
+        if (reaction_type == 2) { // SSA
+
+            amrex::ParallelForRNG(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k, amrex::RandomEngine const& engine) noexcept
+            {
+                GpuArray<Real,MAX_SPECIES> n_old;
+                GpuArray<Real,MAX_SPECIES> n_new;
+                GpuArray<Real,MAX_REACTION> avg_reaction_rate;
+
+                Real t_local = 0.;
+                
+                for (int n=0; n<nspecies; ++n) {
+                    n_old[n] = n_arr(i,j,k,n);
+                    n_new[n] = n_arr(i,j,k,n);
+                }
+
+                while(true)
+                {
+                    compute_reaction_rates(n_new,avg_reaction_rate,dv);
+
+                    Real rTotal = 0.;
+                    for (int m=0; m<nreaction; m++)
+                    {
+                        // convert reation rates to propensities
+                        avg_reaction_rate[m] = std::max(0.,avg_reaction_rate[m]*dv);
+                        rTotal += avg_reaction_rate[m];
+                    }
+
+                    if (rTotal==0.) break;
+
+                    Real u1 = amrex::Random(engine);
+                    Real tau = -log(1-u1)/rTotal;
+                    t_local += tau; // update t_local
+
+                    if (t_local > dt) break;
+
+                    Real u2 = amrex::Random(engine);
+                    u2 *= rTotal;
+
+                    // find which reaction has occured
+                    int which_reaction=0;
+                    Real rSum = 0.;
+                    for (int m=0; m<nreaction; m++)
+                    {
+                        rSum = rSum + avg_reaction_rate[m];
+                        which_reaction = m;
+                        if (rSum >= u2) break;
+                    }
+
+                    // update number densities for the reaction that has occured
+                    for (int n=0; n<nspecies; n++) {
+                        n_new[n] += stoich_coeffs_PR(which_reaction,n)/dv;
+                    }
+                }
+
+                for (int n=0; n<nspecies; ++n) {
+                    rate(i,j,k,n) = (n_new[n] - n_old[n] ) / dt;
+                }
+            });
+
+        } else {
+            amrex::ParallelForRNG(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k, amrex::RandomEngine const& engine) noexcept
+            {
+                GpuArray<Real,MAX_SPECIES> n_in;
+                GpuArray<Real,MAX_SPECIES> n_int_in;
+                GpuArray<Real,MAX_REACTION> avg_reaction_rate;
+                GpuArray<Real,MAX_REACTION> avg_reaction_rate_interm;
+                GpuArray<Real,MAX_REACTION> avg_num_reactions;
+                GpuArray<Real,MAX_REACTION> num_reactions;
+
+                for (int n=0; n<nspecies; ++n) {
+                    rate(i,j,k,n) = 0.;
+                    n_in[n]     = n_arr(i,j,k,n);
+                    n_int_in[n] = n_int(i,j,k,n);
+                }
+
+                if (lin_comb_avg_react_rate == 1) {
+                    compute_reaction_rates(n_in    , avg_reaction_rate       , dv);
+                    compute_reaction_rates(n_int_in, avg_reaction_rate_interm, dv);
+                    for (int r=0; r<nreaction; ++r) {
+                        avg_reaction_rate[r] = lin_comb_coef[0]*avg_reaction_rate[r] + lin_comb_coef[1]*avg_reaction_rate_interm[r];
+                    }
+                } else {
+                    compute_reaction_rates(n_in, avg_reaction_rate, dv);
+                }
+
+                for (int r=0; r<nreaction; ++r) {
+                    avg_num_reactions[r] = std::max(0.,avg_reaction_rate[r]*dv*dt);
+                }
+                sample_num_reactions(n_in,num_reactions,avg_num_reactions,engine);
+                for (int r=0; r<nreaction; ++r) {
+                    for (int n=0; n<nspecies; ++n) {
+                        rate(i,j,k,n) += num_reactions[r]/dv/dt * stoich_coeffs_PR(r,n);
+                    }
+                }
+            });
+        }
+    }
+}
+
+AMREX_GPU_HOST_DEVICE void compute_reaction_rates(GpuArray<Real,MAX_SPECIES>& n_in,
+                                                  GpuArray<Real,MAX_REACTION>& reaction_rates,
+                                                  const amrex::Real& dv)
+{
+    GpuArray<Real,MAX_SPECIES> n_nonneg;
+
+    Real n_sum = 0.;
+    for (int n=0; n<nspecies; ++n) {
+        n_nonneg[n] = std::max(0.,n_in[n]);
+        n_sum += n_nonneg[n];
+    }
+    if (n_sum < 0.) {
+        n_sum = 1./dv;
+        Abort("compute_reaction_rates() - n_sum < 0, is this right?");
+    }
+
+    if (use_mole_frac_LMA && include_discrete_LMA_correction) {
+
+        Abort("compute_reaction_rates() - use_mole_frac_LMA && include_discrete_LMA_correction not supported yet");
+
+/*
+      ! Use mole-fraction based LMA (general ideal mixtures) with integer corrections
+
+      do reaction=1, nreactions
+        reaction_rates(reaction) = rate_multiplier*rate_const(reaction)
+        do species=1, nspecies
+          ! Donev: Replaced case statement by if here
+          ! Donev: Made sure n_sum is never zero for empty cells to avoid division by zero
+
+          if(stoichiometric_factors(species,1,reaction)>=1) then
+            ! rate ~ N/N_sum
+            if(n_nonneg(species)>0.0d0) then ! This species is present in this cell
+               reaction_rates(reaction) = reaction_rates(reaction) * n_nonneg(species)/n_sum
+            else
+               reaction_rates(reaction) = 0.0d0
+            end if
+          end if
+          if(stoichiometric_factors(species,1,reaction)>=2) then
+            ! rate ~ (N/N_sum)*((N-1)/(N_sum-1))
+            ! Donev: Avoid division by zero or negative rates
+            if(n_nonneg(species)>1.0d0/dv) then ! There is at least one molecule of this species in this cell
+               reaction_rates(reaction) = reaction_rates(reaction) * (n_nonneg(species)-1.0d0/dv)/(n_sum-1.0d0/dv)
+            else
+               reaction_rates(reaction) = 0.0d0
+            end if
+          end if
+          if(stoichiometric_factors(species,1,reaction)>=3) then ! Donev added ternary reactions here
+            ! rate ~ (N/N_sum)*((N-1)/(N_sum-1))*((N-2)/(N_sum-2))
+            if(n_nonneg(species)>2.0d0/dv) then ! There is at least two molecules of this species in this cell
+              reaction_rates(reaction) = reaction_rates(reaction) * (n_nonneg(species)-2.0d0/dv)/(n_sum-2.0d0/dv)
+            else
+               reaction_rates(reaction) = 0.0d0
+            end if
+          end if
+          if(stoichiometric_factors(species,1,reaction)>=4) then
+            ! This is essentially impossible in practice and won't happen
+            call bl_error("Stochiometric coefficients larger then 3 not supported")
+          end if
+        end do
+      end do
+*/
+
+    } else if (include_discrete_LMA_correction == 0 && exclude_solvent_comput_rates == -1) {
+
+        if (use_mole_frac_LMA) {
+            for (int n=0; n<nspecies; ++n) {
+                n_nonneg[n] /= n_sum;
+            }
+        }
+
+        for (int r=0; r<nreaction; ++r) {
+            reaction_rates[r] = rate_multiplier*rate_const[r];
+            for (int n=0; n<nspecies; ++n) {
+                reaction_rates[r] *= std::pow(n_nonneg[n],stoich_coeffs_R(r,n));
+            }
+        }
+        
+    } else { // General case of number-density based LMA is handled by slower code that includes species by species
+    
+        for (int r=0; r<nreaction; ++r) {
+            reaction_rates[r] = rate_multiplier*rate_const[r];
+
+            for (int n=0; n<nspecies; ++n) {
+                if (n == exclude_solvent_comput_rates) {
+                    continue;
+                }
+                if (include_discrete_LMA_correction) {
+
+                    int coef = stoich_coeffs_R(r,n);
+                    if (coef == 0) {
+                        // Species doe not participate in reaction
+                    } else if (coef == 1) {
+                        reaction_rates[r] *= n_nonneg[n];
+                    } else if (coef == 2) {
+                        reaction_rates[r] *= n_nonneg[n]*std::max(0.,n_nonneg[n]-1./dv);
+                    } else if (coef == 3) {
+                        reaction_rates[r] *= n_nonneg[n]*std::max(0.,n_nonneg[n]-1./dv)*std::max(0.,n_nonneg[n]-2./dv);
+                    } else {
+                        // This is essentially impossible in practice and won't happen
+                        Abort("Stochiometric coefficients larger then 3 not supported");
+                    }
+
+                } else {
+                    reaction_rates[r] *= std::pow(n_nonneg[n],stoich_coeffs_R(r,n));
+                }
+            } // end loop over species
+        } // end loop over reaction
+    }
+   
+}
+
+AMREX_GPU_HOST_DEVICE void sample_num_reactions(GpuArray<Real,MAX_SPECIES>& n_in,
+                                                GpuArray<Real,MAX_REACTION>& num_reactions,
+                                                GpuArray<Real,MAX_REACTION>& avg_num_reactions,
+                                                const amrex::RandomEngine& engine)
+{
+    if (reaction_type == 0) { // deterministic
+        for (int n=0; n<nreaction; ++n) {
+            num_reactions[n] = avg_num_reactions[n];
+        }
+    } else if (reaction_type == 1) { // CLE
+        for (int n=0; n<nreaction; ++n) {
+            Real rand = RandomNormal(0.,1.,engine);
+            num_reactions[n] = avg_num_reactions[n] + std::sqrt(avg_num_reactions[n])*rand;
+        }
+    } else if (reaction_type == 3) { // tau leaping
+        for (int n=0; n<nreaction; ++n) {
+            num_reactions[n] = RandomPoisson(avg_num_reactions[n], engine);
+        }
+    } else {
+        Abort("sample_num_reactions() - reaction_type not supported");
+    }
+}
+
diff --git a/src_chemistry/chemistry_namespace.H b/src_chemistry/chemistry_namespace.H
index df42071c2..66ed7853e 100644
--- a/src_chemistry/chemistry_namespace.H
+++ b/src_chemistry/chemistry_namespace.H
@@ -1,14 +1,39 @@
 namespace chemistry {
+
     extern AMREX_GPU_MANAGED int nreaction;
 
-    extern AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> rate_const; 
-    extern AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> alpha_param; 
-    extern AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> beta_param; 
-    extern AMREX_GPU_MANAGED amrex::Real T0_chem;
+    // from the fortran code, stoich_coeffs_R = stoichiometric_factors(spec,1,reac)
+    // from the fortran code, stoich_coeffs_P = stoichiometric_factors(spec,2,reac)
+    // stoich_coeffs_PR = stoich_coeffs_P - stoich_coeffs_R
+    extern AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> stoich_coeffs_R;
+    extern AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> stoich_coeffs_P;
+    extern AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> stoich_coeffs_PR;
+
+    // reaction rate constant for each reaction (assuming Law of Mass Action holds)
+    // using rate_multiplier, reaction rates can be changed by the same factor
+    // if include_discrete_LMA_correction, n^2 and n^3 in rate expressions become
+    // n*(n-1/dv) and n*(n-1/dv)*(n-2/dv). 
+    extern AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> rate_const;
+    extern AMREX_GPU_MANAGED amrex::Real rate_multiplier;
+    extern AMREX_GPU_MANAGED int include_discrete_LMA_correction;
 
-    extern AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> stoich_coeffs_R; 
-    extern AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> stoich_coeffs_P; 
-    extern AMREX_GPU_MANAGED Array2D<int,0, MAX_REACTION,0, MAX_SPECIES> stoich_coeffs_PR; 
+    // if n is positive, exclude species n (=solvent) when computing reaction rates
+    // in this case, the concentration of the solvent is assumed to be constant,
+    // which should be reflected on rate constants.
+    // if 0, no species is excluded
+    // e.g. U + S -> 2U, if exclude_solvent_comput_rates=0, rate=k*n_U*n_S
+    //                   if exclude_solvent_comput_rates=2, rate=k_new*n_U where k_new=k*n_S
+    extern AMREX_GPU_MANAGED int exclude_solvent_comput_rates;
 
+    // from the fortran code this was use_Poisson_rng (0=CLE; 1=tau leaping; -1=deterministic; 2=SSA)
+    // here it's being used as reaction_type (0=deterministic; 1=CLE; 2=SSA; 3=tau leap)
     extern AMREX_GPU_MANAGED int reaction_type;
+
+    // use mole fraction based LMA
+    extern AMREX_GPU_MANAGED int use_mole_frac_LMA;
+
+    // specific to compressible codes 
+    extern AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> alpha_param;
+    extern AMREX_GPU_MANAGED GpuArray<amrex::Real, MAX_REACTION> beta_param;
+    extern AMREX_GPU_MANAGED amrex::Real T0_chem;
 }
diff --git a/src_common/ComputeAverages.cpp b/src_common/ComputeAverages.cpp
index 6880c70de..22b494cb9 100644
--- a/src_common/ComputeAverages.cpp
+++ b/src_common/ComputeAverages.cpp
@@ -95,8 +95,12 @@ void WriteHorizontalAverage(const MultiFab& mf_in, const int& dir, const int& in
 
 void WriteHorizontalAverageToMF(const MultiFab& mf_in, MultiFab& mf_out,
                                 const int& dir, const int& incomp,
-                                const int& ncomp)
+                                const int& ncomp, int outcomp)
 {
+    if (outcomp == -1) {
+        outcomp = incomp; // default outcomp is incomp unless specified
+    }
+
     // number of points in the averaging direction
     int npts = n_cells[dir];
 
@@ -166,7 +170,7 @@ void WriteHorizontalAverageToMF(const MultiFab& mf_in, MultiFab& mf_out,
         const Array4<Real> mf = mf_out.array(mfi);
 
         for (auto n=0; n<ncomp; ++n) {
-            comp = incomp+n;
+            comp = outcomp+n;
             for (auto k = lo.z; k <= hi.z; ++k) {
             for (auto j = lo.y; j <= hi.y; ++j) {
             for (auto i = lo.x; i <= hi.x; ++i) {
@@ -189,7 +193,7 @@ void WriteHorizontalAverageToMF(const MultiFab& mf_in, MultiFab& mf_out,
 
 
 void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat,
-			    const Geometry& geom, const int& dir,
+			    const int& dir,
 			    const int& incomp, const int& ncomp,
                             const int& slablo, const int& slabhi)
 {
@@ -199,19 +203,9 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat,
     if (dir >= AMREX_SPACEDIM) {
         Abort("ComputeVerticalAverage: invalid dir");
     }
-    
-    // debugging
-    bool write_data = false;
-
-    // this is a full MultiFab with pencil-shaped boxes
-    // we will define mf_flat as a flattened MultiFab that
-    // has the same BoxArray but flattened in the dir direction
-    // and the same DistributionMapping so
-    // we can do the averaging from mf_pencil to mf_flat on a box-by-box basis
-    MultiFab mf_pencil;
 
     // get a single Box that spans the full domain
-    Box domain(geom.Domain());
+    Box domain(mf.boxArray().minimalBox());
 
     // these are the transverse directions (i.e., NOT the dir direction)
     int dir1=0, dir2=0;
@@ -229,7 +223,7 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat,
         dir2 = 1;
     }
 #endif
-
+    
     // max_grid_size_pencil will be equal to the number of cells in the domain in the dir direction
     // and uses max_grid_projection to set the non-dir directions
     Vector<int> max_grid_size_pencil(AMREX_SPACEDIM);
@@ -239,36 +233,6 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat,
     max_grid_size_pencil[dir2] = max_grid_projection[1];
 #endif
 
-    // create the BoxArray for the pencil MultiFab
-    BoxArray ba_pencil(domain);
-    ba_pencil.maxSize(IntVect(max_grid_size_pencil));
-
-    // create DistributionMapping on the pencil BoxArray
-    DistributionMapping dmap_pencil(ba_pencil);
-
-    // build pencil MultiFab
-    mf_pencil.define(ba_pencil,dmap_pencil,ncomp,0);
-
-    // copy data from full MultiFab to pencil MultiFab
-    mf_pencil.ParallelCopy(mf, incomp, 0, ncomp);
-
-    // create a single flattened box with coordinate index 0 in the dir direction
-    IntVect dom_lo(domain.loVect());
-    IntVect dom_hi(domain.hiVect());
-    if (dom_lo[dir] != 0) {
-        Abort("ComputeVerticalAverage requires dom_lo[dir]=0");
-    }
-    dom_hi[dir] = 0;
-    Box domain_flat(dom_lo, dom_hi);
-    
-    // create the BoxArray for the flattened MultiFab
-    BoxArray ba_flat(domain_flat);
-    ba_flat.maxSize(IntVect(max_grid_size_pencil));
-
-    // build flattened MultiFab and initialize to zero
-    mf_flat.define(ba_flat,dmap_pencil,ncomp,0);
-    mf_flat.setVal(0.);
-
     // this is the inverse of the number of cells in the dir direction we are averaging over
     // by default we average over the entire domain, but one can pass in slab_lo/hi to set bounds
     Real ninv;
@@ -278,71 +242,54 @@ void ComputeVerticalAverage(const MultiFab& mf, MultiFab& mf_flat,
         ninv = 1./(domain.length(dir));
     }
 
-    // average pencil data onto the flattened MultiFab
-    for ( MFIter mfi(mf_pencil); mfi.isValid(); ++mfi ) {
-        const Box& bx = mfi.validbox();
+    MultiFab mf_onecomp(mf.boxArray(), mf.DistributionMap(), 1, 0);
 
-        const auto lo = amrex::lbound(bx);
-        const auto hi = amrex::ubound(bx);
+    for (int n=0; n<ncomp; ++n) {
 
-        const Array4<Real> meanfab = mf_flat.array(mfi);
-        const Array4<Real> inputfab = mf_pencil.array(mfi);
+        // copy a component of mf into mf_onecomp
+        MultiFab::Copy(mf_onecomp,mf,incomp+n,0,1,0);
 
-        if (dir == 0) {
-        
-            for (auto n = incomp; n<incomp+ncomp; ++n) {
-            for (auto k = lo.z; k <= hi.z; ++k) {
-            for (auto j = lo.y; j <= hi.y; ++j) {
-            for (auto i = lo.x; i <= hi.x; ++i) {
-                if ((i >= slablo) and (i <= slabhi)) {
-                    meanfab(0,j,k,n) = meanfab(0,j,k,n) + ninv*inputfab(i,j,k,n);
-                }
-            }
-            }
-            }
-            }
-            
-        } else if (dir == 1) {
-        
-            for (auto n = incomp; n<incomp+ncomp; ++n) {
-            for (auto k = lo.z; k <= hi.z; ++k) {
-            for (auto j = lo.y; j <= hi.y; ++j) {
-            for (auto i = lo.x; i <= hi.x; ++i) {
-                if ((j >= slablo) and (j <= slabhi)) {
-                    meanfab(i,0,k,n) = meanfab(i,0,k,n) + ninv*inputfab(i,j,k,n);
-                }
-            }
-            }
-            }
-            }
+        // sum up
+        auto const& ma = mf_onecomp.const_arrays();
+        auto fab = ReduceToPlane<ReduceOpSum,Real>(dir, domain, mf_onecomp,
+          [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) -> Real
+          {
+              return ma[box_no](i,j,k); // data at (i,j,k) of Box box_no
+          });
 
-        } else if (dir == 2) {
-        
-            for (auto n = incomp; n<incomp+ncomp; ++n) {
-            for (auto k = lo.z; k <= hi.z; ++k) {
-            for (auto j = lo.y; j <= hi.y; ++j) {
-            for (auto i = lo.x; i <= hi.x; ++i) {
-                if ((k >= slablo) and (k <= slabhi)) {
-                    meanfab(i,j,0,n) = meanfab(i,j,0,n) + ninv*inputfab(i,j,k,n);
-                }
-            }
-            }
-            }
-            }
+        Box dom2d = fab.box();
+        Vector<Box> bv(ParallelDescriptor::NProcs(),dom2d);
+        BoxArray ba(bv.data(), bv.size());
+
+        Vector<int> pmap(ParallelDescriptor::NProcs());
+        std::iota(pmap.begin(), pmap.end(), 0);
+        DistributionMapping dm(std::move(pmap));
+
+        MultiFab mftmp(ba, dm, 1, 0, MFInfo().SetAlloc(false));
+        mftmp.setFab(ParallelDescriptor::MyProc(),
+                     FArrayBox(fab.box(), 1, fab.dataPtr()));
+
+        // divide by number of cells in column to create average
+        mftmp.mult(ninv);
+
+        BoxArray ba2(dom2d);
+
+        ba2.maxSize(IntVect(max_grid_size_pencil));
+
+        if (n==0) {
+            mf_flat.define(ba2, DistributionMapping{ba2}, ncomp, 0);
         }
-    }
 
-    // debugging
-    if (write_data) {
-        VisMF::Write(mf,"mf_full");
-        VisMF::Write(mf_pencil,"mf_pencil");
-        VisMF::Write(mf_flat,"mf_flat");
-    }
+        MultiFab mf_flat_onecomp(ba2, DistributionMapping{ba2}, fab.nComp(), 0);
+        mf_flat_onecomp.setVal(0.);
+        mf_flat_onecomp.ParallelAdd(mftmp);
 
+        mf_flat.ParallelCopy(mf_flat_onecomp, 0, n, 1);
+    }
 }
 
 void ExtractSlice(const MultiFab& mf, MultiFab& mf_slice,
-                  const Geometry& geom, const int dir, const int slice,
+                  const int dir, const int slice,
                   const int incomp, const int ncomp)
 {
     BL_PROFILE_VAR("ExtractSlice()",ExtractSlice);
@@ -350,7 +297,7 @@ void ExtractSlice(const MultiFab& mf, MultiFab& mf_slice,
     // create BoxArray
 
     // get lo and hi coordinates of problem domain
-    Box domain(geom.Domain());
+    Box domain(mf.boxArray().minimalBox());
     IntVect dom_lo(domain.loVect());
     IntVect dom_hi(domain.hiVect());
 
@@ -384,7 +331,44 @@ void ExtractSlice(const MultiFab& mf, MultiFab& mf_slice,
 
     // create a new DistributionMapping and define the MultiFab
     DistributionMapping dmap_slice(ba_slice);
-    mf_slice.define(ba_slice,dmap_slice,ncomp,0);
+    MultiFab mf_slice_tmp(ba_slice,dmap_slice,ncomp,0);
         
-    mf_slice.ParallelCopy(mf, incomp, 0, ncomp);
+    mf_slice_tmp.ParallelCopy(mf, incomp, 0, ncomp);
+
+    // now copy this into a multifab with index zero in the dir direction rather than slicepoint
+    // (structure factor code requires this)
+    dom_lo[dir] = 0;
+    dom_hi[dir] = 0;
+
+    Box domain_slice2(dom_lo,dom_hi);
+    BoxArray ba_slice2(domain_slice2);
+    ba_slice2.maxSize(IntVect(max_grid_slice));
+    mf_slice.define(ba_slice2,dmap_slice,ncomp,0);
+
+    for ( MFIter mfi(mf_slice_tmp,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+
+        const Box& bx = mfi.tilebox();
+
+        const Array4<Real> & slice = mf_slice.array(mfi);
+        const Array4<Real> & slice_tmp = mf_slice_tmp.array(mfi);
+
+        if (dir == 0) {
+            amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+            {
+                slice(0,j,k,n) = slice_tmp(i,j,k,n);
+            });
+        }
+        if (dir == 1) {
+            amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+            {
+                slice(i,0,k,n) = slice_tmp(i,j,k,n);
+            });
+        }
+        if (dir == 2) {
+            amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+            {
+                slice(i,j,0,n) = slice_tmp(i,j,k,n);
+            });
+        }
+    }
 }
diff --git a/src_common/Make.package b/src_common/Make.package
index daaa57330..e8556109c 100644
--- a/src_common/Make.package
+++ b/src_common/Make.package
@@ -15,7 +15,6 @@ CEXE_sources += ComputeDivAndGrad.cpp
 CEXE_sources += Debug.cpp
 CEXE_sources += MultiFabPhysBC.cpp
 CEXE_sources += NormInnerProduct.cpp
-CEXE_sources += RotateFlattenedMF.cpp
 CEXE_sources += SqrtMF.cpp
 #CEXE_sources += InterpCoarsen.cpp
 
diff --git a/src_common/MultiFabPhysBC.cpp b/src_common/MultiFabPhysBC.cpp
index 529efaf59..069485559 100644
--- a/src_common/MultiFabPhysBC.cpp
+++ b/src_common/MultiFabPhysBC.cpp
@@ -390,7 +390,7 @@ void MultiFabPhysBCMacVel(MultiFab& vel, const Geometry& geom, int dim, int is_i
             }
         }
 
-        if ((dim != 0) && (bc_vel_lo[0] == 1 || bc_vel_hi[0] == 2) && (bx.bigEnd(0) > dom.bigEnd(0))) {
+        if ((dim != 0) && (bc_vel_hi[0] == 1 || bc_vel_hi[0] == 2) && (bx.bigEnd(0) > dom.bigEnd(0))) {
             if (bc_vel_hi[0] == 1) { // slip
                 amrex::ParallelFor(bx,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
                 {
diff --git a/src_common/RotateFlattenedMF.cpp b/src_common/RotateFlattenedMF.cpp
deleted file mode 100644
index b1b607331..000000000
--- a/src_common/RotateFlattenedMF.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "common_functions.H"
-
-// takes in a flattened multifab
-// (i.e., a multifab with only 1 cell in 1 direction but >1 cells in the other directions)
-// returns a flattened multifab that is now flattened in the AMREX_SPACEDIM-1 direction
-// (z direction in 3D, y direction in 2D)
-MultiFab RotateFlattenedMF(MultiFab const& mf)
-{
-    BoxArray const& old_ba = mf.boxArray();
-    DistributionMapping const& dm = mf.DistributionMap();
-    Box const& domain_box = old_ba.minimalBox();
-    int short_direction;
-    int short_size = domain_box.shortside(short_direction);
-    if (short_size != 1) {
-        Print() << "RotateFlattenedMF needs a MF with short_size==1; returning the original input MultiFab\n";
-        return MultiFab(mf, amrex::make_alias, 0, mf.nComp());
-    } else if (short_direction == AMREX_SPACEDIM-1) {
-        return MultiFab(mf, amrex::make_alias, 0, mf.nComp());
-    } else {
-        IntVect old_ng = mf.nGrowVect();
-        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(old_ng[short_direction] == 0,
-                                         "Not supposed to have ghost cells in the shortest direction");
-        IntVect ng;
-        if (short_direction == 0) {
-            ng = IntVect(AMREX_D_DECL(old_ng[1],old_ng[2],0));
-        } else {
-            ng = IntVect(AMREX_D_DECL(old_ng[0],old_ng[2],0));
-        }
-        BoxList bl = old_ba.boxList();
-        for (auto& b : bl) {
-            const auto lo = b.smallEnd();
-            const auto hi = b.bigEnd();
-            if (short_direction == 0) {
-                b = Box(IntVect(AMREX_D_DECL(lo[1],lo[2],0)),
-                        IntVect(AMREX_D_DECL(hi[1],hi[2],0)),
-                        b.ixType());
-            } else {
-                b = Box(IntVect(AMREX_D_DECL(lo[0],lo[2],0)),
-                        IntVect(AMREX_D_DECL(hi[0],hi[2],0)),
-                        b.ixType());
-            }
-        }
-        BoxArray new_ba(std::move(bl));
-        const int ncomp = mf.nComp();
-        MultiFab new_mf(new_ba, dm, ncomp, ng, MFInfo().SetAlloc(false));
-        for (MFIter mfi(new_mf); mfi.isValid(); ++mfi) {
-            new_mf.setFab(mfi, FArrayBox(mfi.fabbox(), ncomp, mf[mfi.index()].dataPtr()));
-        }
-        return new_mf;
-    }
-}
diff --git a/src_common/common_functions.H b/src_common/common_functions.H
index 353d38efd..affabd688 100644
--- a/src_common/common_functions.H
+++ b/src_common/common_functions.H
@@ -148,13 +148,13 @@ void WriteHorizontalAverage(const MultiFab& mf_in, const int& dir, const int& in
 
 void WriteHorizontalAverageToMF(const MultiFab& mf_in, MultiFab& mf_out,
                                 const int& dir, const int& incomp,
-                                const int& ncomp);
+                                const int& ncomp, int outcomp=-1);
     
-void ComputeVerticalAverage(const MultiFab & mf, MultiFab & mf_flat, const Geometry & geom,
+void ComputeVerticalAverage(const MultiFab & mf, MultiFab & mf_flat,
                             const int& dir, const int& incomp, const int& ncomp,
                             const int& slablo=-1, const int& slabhi=99999);
 
-void ExtractSlice(const MultiFab & mf, MultiFab & mf_slice, const Geometry & geom,
+void ExtractSlice(const MultiFab & mf, MultiFab & mf_slice,
                   const int dir, const int slice, const int incomp, const int ncomp);
 
 ///////////////////////////
@@ -243,11 +243,6 @@ void CCL2Norm(const MultiFab & m1,
               amrex::MultiFab& mscr,
 	      Real & norm_l2);
 
-///////////////////////////
-// in RotateFlattenedMF.cpp
-
-MultiFab RotateFlattenedMF(MultiFab const& mf);
-
 ///////////////////////////
 // in InterpCoarsen.cpp
 void FaceFillCoarse(Vector<std::array< MultiFab, AMREX_SPACEDIM >>& mf, int map);
diff --git a/src_common/common_functions.cpp b/src_common/common_functions.cpp
index 2c2db9421..eb708935c 100644
--- a/src_common/common_functions.cpp
+++ b/src_common/common_functions.cpp
@@ -97,10 +97,6 @@ AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>         common::bc_mass_l
 AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>         common::bc_mass_hi;
 AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>         common::bc_therm_lo;
 AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>         common::bc_therm_hi;
-AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>         common::bc_spec_lo;
-AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>         common::bc_spec_hi;
-
-
 
 AMREX_GPU_MANAGED amrex::GpuArray<amrex::Real, AMREX_SPACEDIM> common::p_lo;
 AMREX_GPU_MANAGED amrex::GpuArray<amrex::Real, AMREX_SPACEDIM> common::p_hi;
@@ -244,6 +240,7 @@ int                        common::plot_means;
 int                        common::plot_vars;
 int                        common::plot_covars;
 int                        common::plot_cross;
+int                        common::plot_deltaY_dir;
 int                        common::particle_motion;
 
 AMREX_GPU_MANAGED amrex::Real common::turb_a;
@@ -343,7 +340,7 @@ void InitializeCommonNamespace() {
     // primvars - number of primative variables (no default)
 
     cross_cell = 0;     // cell to compute spatial correlation
-    do_slab_sf = 0;     // whether to compute SF in two slabs separated by cross_cell
+    do_slab_sf = 0;     // whether to compute SF in two slabs separated by membrane_cell
 
     for (int i=0; i<MAX_SPECIES; ++i) {
         qval[i] = 0.;                // charge on an ion
@@ -483,8 +480,6 @@ void InitializeCommonNamespace() {
         bc_mass_hi[i] = 0;
         bc_therm_lo[i] = 0;
         bc_therm_hi[i] = 0;
-        bc_spec_lo[i] = -1;
-        bc_spec_hi[i] = -1;
 
         // Pressure drop are periodic inflow/outflow walls (bc_[hi,lo]=-2).
         p_lo[i] = 0.;
@@ -614,6 +609,7 @@ void InitializeCommonNamespace() {
     plot_vars = 0;
     plot_covars = 0;
     plot_cross = 0;
+    plot_deltaY_dir = -1;
     particle_motion = 0;
 
     // turblent forcing parameters
@@ -827,16 +823,6 @@ void InitializeCommonNamespace() {
             bc_mass_hi[i] = temp_int[i];
         }
     }
-    if (pp.queryarr("bc_spec_lo",temp_int,0,AMREX_SPACEDIM)) {
-        for (int i=0; i<AMREX_SPACEDIM; ++i) {
-            bc_spec_lo[i] = temp_int[i];
-        }
-    }
-    if (pp.queryarr("bc_spec_hi",temp_int,0,AMREX_SPACEDIM)) {
-        for (int i=0; i<AMREX_SPACEDIM; ++i) {
-            bc_spec_hi[i] = temp_int[i];
-        }
-    }
     if (pp.queryarr("bc_therm_lo",temp_int,0,AMREX_SPACEDIM)) {
         for (int i=0; i<AMREX_SPACEDIM; ++i) {
             bc_therm_lo[i] = temp_int[i];
@@ -1151,6 +1137,7 @@ void InitializeCommonNamespace() {
     pp.query("plot_vars",plot_vars);
     pp.query("plot_covars",plot_covars);
     pp.query("plot_cross",plot_cross);
+    pp.query("plot_deltaY_dir",plot_deltaY_dir);
     pp.query("particle_motion",particle_motion);
     pp.query("turb_a",turb_a);
     pp.query("turb_b",turb_b);
diff --git a/src_common/common_namespace.H b/src_common/common_namespace.H
index 5f8c68487..b5493d3a1 100644
--- a/src_common/common_namespace.H
+++ b/src_common/common_namespace.H
@@ -157,14 +157,6 @@ namespace common {
     extern AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>          bc_therm_lo;
     extern AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>          bc_therm_hi;
     
-    // BC specifications:
-    // -1 = don't change species, else change species to number
-    //  1 = wall
-    //  2 = reservoir (Dirichlet values must be suppled by other means)
-    extern AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>          bc_spec_lo;
-    extern AMREX_GPU_MANAGED amrex::GpuArray<int, AMREX_SPACEDIM>          bc_spec_hi;
-    
-
     // Pressure drop are periodic inflow/outflow walls (bc_[hi,lo]=-2).
     extern AMREX_GPU_MANAGED amrex::GpuArray<amrex::Real, AMREX_SPACEDIM> p_lo;
     extern AMREX_GPU_MANAGED amrex::GpuArray<amrex::Real, AMREX_SPACEDIM> p_hi;
@@ -322,6 +314,7 @@ namespace common {
     extern int                        plot_vars;
     extern int                        plot_covars;
     extern int                        plot_cross;
+    extern int                        plot_deltaY_dir;
     extern int                        particle_motion;
 
     // parameters for turbulent forcing example
diff --git a/src_compressible/compressible_functions.cpp b/src_compressible/compressible_functions.cpp
index 96bb6f502..3d818cfe2 100644
--- a/src_compressible/compressible_functions.cpp
+++ b/src_compressible/compressible_functions.cpp
@@ -8,6 +8,7 @@ AMREX_GPU_MANAGED int compressible::do_1D;
 AMREX_GPU_MANAGED int compressible::do_2D;
 AMREX_GPU_MANAGED int compressible::all_correl;
 AMREX_GPU_MANAGED int compressible::nspec_surfcov = 0;
+AMREX_GPU_MANAGED int compressible::turbRestartRun = 1;
 AMREX_GPU_MANAGED bool compressible::do_reservoir = false;
 AMREX_GPU_MANAGED amrex::Real compressible::zeta_ratio = -1.0;
 
@@ -64,7 +65,14 @@ void InitializeCompressibleNamespace()
     all_correl = 0;
     pp.query("all_correl",all_correl);
 
-
+    // restart for turbulence
+    // if 1: will advance time, if 0: only stats no advance time
+    pp.query("turbRestartRun",turbRestartRun);
+    if (turbRestartRun == 0) {
+      if (restart <= 0) amrex::Abort("turbRestartRun requires restarting from a checkpoint, restart > 0");
+      if (max_step != restart+1) amrex::Abort("this is a single step run; max_step should be equal to restart+1");
+    }
+    
     // do reservoir?
     if ((bc_mass_lo[0] == 4) or (bc_mass_lo[1] == 4) or (bc_mass_lo[2] == 4) or
         (bc_mass_hi[0] == 4) or (bc_mass_hi[1] == 4) or (bc_mass_hi[2] == 4)) {
diff --git a/src_compressible/compressible_namespace.H b/src_compressible/compressible_namespace.H
index e75c9935c..d3195911b 100644
--- a/src_compressible/compressible_namespace.H
+++ b/src_compressible/compressible_namespace.H
@@ -7,6 +7,7 @@ namespace compressible {
     extern AMREX_GPU_MANAGED int do_2D;
     extern AMREX_GPU_MANAGED int all_correl;
     extern AMREX_GPU_MANAGED int nspec_surfcov;
+    extern AMREX_GPU_MANAGED int turbRestartRun;
     extern AMREX_GPU_MANAGED bool do_reservoir;
     extern AMREX_GPU_MANAGED amrex::Real zeta_ratio;
 
diff --git a/src_compressible/main_driver.cpp b/src_compressible/main_driver.cpp
index 504ae956a..c9654aa60 100644
--- a/src_compressible/main_driver.cpp
+++ b/src_compressible/main_driver.cpp
@@ -406,7 +406,6 @@ void main_driver(const char* argv)
 
     // structure factor class for flattened dataset
     StructFact structFactPrimFlattened;
-    MultiFab primFlattenedRotMaster;
 
     //////////////////////////////////////////////
 
@@ -465,7 +464,6 @@ void main_driver(const char* argv)
 
     // structure factor class for flattened dataset
     StructFact structFactConsFlattened;
-    MultiFab consFlattenedRotMaster;
 
     //////////////////////////////////////////////
     
@@ -478,81 +476,33 @@ void main_driver(const char* argv)
       // a built version of primFlattened so can obtain what we need to build the
       // structure factor and geometry objects for flattened data
       if (slicepoint < 0) {
-          ComputeVerticalAverage(structFactPrimMF, Flattened, geom, project_dir, 0, 1);
+          ComputeVerticalAverage(structFactPrimMF, Flattened, project_dir, 0, 1);
       } else {
-          ExtractSlice(structFactPrimMF, Flattened, geom, project_dir, slicepoint, 0, 1);
+          ExtractSlice(structFactPrimMF, Flattened, project_dir, slicepoint, 0, 1);
       }
-      // we rotate this flattened MultiFab to have normal in the z-direction since
-      // our structure factor class assumes this for flattened
-      MultiFab FlattenedRot = RotateFlattenedMF(Flattened);
-      BoxArray ba_flat = FlattenedRot.boxArray();
-      const DistributionMapping& dmap_flat = FlattenedRot.DistributionMap();
-      primFlattenedRotMaster.define(ba_flat,dmap_flat,structVarsPrim,0);
-      consFlattenedRotMaster.define(ba_flat,dmap_flat,structVarsCons,0);
+      BoxArray ba_flat = Flattened.boxArray();
+      const DistributionMapping& dmap_flat = Flattened.DistributionMap();
       {
-        IntVect dom_lo(AMREX_D_DECL(0,0,0));
-        IntVect dom_hi;
-
-        // yes you could simplify this code but for now
-        // these are written out fully to better understand what is happening
-        // we wanted dom_hi[AMREX_SPACEDIM-1] to be equal to 0
-        // and need to transmute the other indices depending on project_dir
-#if (AMREX_SPACEDIM == 2)
-        if (project_dir == 0) {
-            dom_hi[0] = n_cells[1]-1;
-        }
-        else if (project_dir == 1) {
-            dom_hi[0] = n_cells[0]-1;
-        }
-        dom_hi[1] = 0;
-#elif (AMREX_SPACEDIM == 3)
-        if (project_dir == 0) {
-            dom_hi[0] = n_cells[1]-1;
-            dom_hi[1] = n_cells[2]-1;
-        } else if (project_dir == 1) {
-            dom_hi[0] = n_cells[0]-1;
-            dom_hi[1] = n_cells[2]-1;
-        } else if (project_dir == 2) {
-            dom_hi[0] = n_cells[0]-1;
-            dom_hi[1] = n_cells[1]-1;
-        }
-        dom_hi[2] = 0;
-#endif
-        Box domain(dom_lo, dom_hi);
-
-        // This defines the physical box
-        Vector<Real> projected_hi(AMREX_SPACEDIM);
-
-        // yes you could simplify this code but for now
-        // these are written out fully to better understand what is happening
-        // we wanted projected_hi[AMREX_SPACEDIM-1] to be equal to dx[projected_dir]
-        // and need to transmute the other indices depending on project_dir
-#if (AMREX_SPACEDIM == 2)
-        if (project_dir == 0) {
-            projected_hi[0] = prob_hi[1];
-        } else if (project_dir == 1) {
-            projected_hi[0] = prob_hi[0];
-        }
-        projected_hi[1] = prob_hi[project_dir] / n_cells[project_dir];
-#elif (AMREX_SPACEDIM == 3)
-        if (project_dir == 0) {
-            projected_hi[0] = prob_hi[1];
-            projected_hi[1] = prob_hi[2];
-        } else if (project_dir == 1) {
-            projected_hi[0] = prob_hi[0];
-            projected_hi[1] = prob_hi[2];
-        } else if (project_dir == 2) {
-            projected_hi[0] = prob_hi[0];
-            projected_hi[1] = prob_hi[1];
-        }
-        projected_hi[2] = prob_hi[project_dir] / n_cells[project_dir];
-#endif
-
-        RealBox real_box({AMREX_D_DECL(     prob_lo[0],     prob_lo[1],     prob_lo[2])},
-                         {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])});
-        
-        // This defines a Geometry object
-        geom_flat.define(domain,&real_box,CoordSys::cartesian,is_periodic.data());
+          Box domain_flat = ba_flat.minimalBox();
+
+          // This defines the physical box
+          // we retain prob_lo and prob_hi in all directions except project_dir,
+          // where the physical size is 0 to dx[project_dir]
+          Vector<Real> projected_lo(AMREX_SPACEDIM);
+          Vector<Real> projected_hi(AMREX_SPACEDIM);
+
+          for (int d=0; d<AMREX_SPACEDIM; ++d) {
+              projected_lo[d] = prob_lo[d];
+              projected_hi[d] = prob_hi[d];
+          }
+          projected_lo[project_dir] = 0.;
+          projected_hi[project_dir] = (prob_hi[project_dir] - prob_lo[project_dir]) / n_cells[project_dir];
+
+          RealBox real_box_flat({AMREX_D_DECL(projected_lo[0],projected_lo[1],projected_lo[2])},
+                                {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])});
+          
+          // This defines a Geometry object
+          geom_flat.define(domain_flat,&real_box_flat,CoordSys::cartesian,is_periodic.data());
       }
 
       structFactPrimFlattened.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim);
@@ -731,11 +681,11 @@ void main_driver(const char* argv)
                MultiFab::Copy(structFactMF, prim, 1, 0, AMREX_SPACEDIM, 0);
                 
                // reset and compute structure factor
-               turbStructFact.FortStructure(structFactMF,geom,1);
-               turbStructFact.CallFinalize(geom);
+               turbStructFact.FortStructure(structFactMF,1);
+               turbStructFact.CallFinalize();
 
                // integrate cov_mag over shells in k and write to file
-               turbStructFact.IntegratekShells(step,geom);
+               turbStructFact.IntegratekShells(step);
 
                // timer
                t2 = ParallelDescriptor::second() - t1;
@@ -767,27 +717,20 @@ void main_driver(const char* argv)
             MultiFab::Copy(structFactPrimMF, prim, 0,                0,                structVarsPrim,   0);
             MultiFab::Copy(structFactConsMF, cu,   0,                0,                structVarsCons-1, 0);
             MultiFab::Copy(structFactConsMF, prim, AMREX_SPACEDIM+1, structVarsCons-1, 1,                0); // temperature too
-            structFactPrim.FortStructure(structFactPrimMF,geom);
-            structFactCons.FortStructure(structFactConsMF,geom);
+            structFactPrim.FortStructure(structFactPrimMF);
+            structFactCons.FortStructure(structFactConsMF);
             if(project_dir >= 0) {
                 MultiFab primFlattened;  // flattened multifab defined below
                 MultiFab consFlattened;  // flattened multifab defined below
                 if (slicepoint < 0) {
-                    ComputeVerticalAverage(structFactPrimMF, primFlattened, geom, project_dir, 0, structVarsPrim);
-                    ComputeVerticalAverage(structFactConsMF, consFlattened, geom, project_dir, 0, structVarsCons);
+                    ComputeVerticalAverage(structFactPrimMF, primFlattened, project_dir, 0, structVarsPrim);
+                    ComputeVerticalAverage(structFactConsMF, consFlattened, project_dir, 0, structVarsCons);
                 } else {
-                    ExtractSlice(structFactPrimMF, primFlattened, geom, project_dir, slicepoint, 0, structVarsPrim);
-                    ExtractSlice(structFactConsMF, consFlattened, geom, project_dir, slicepoint, 0, structVarsCons);
+                    ExtractSlice(structFactPrimMF, primFlattened, project_dir, slicepoint, 0, structVarsPrim);
+                    ExtractSlice(structFactConsMF, consFlattened, project_dir, slicepoint, 0, structVarsCons);
                 }
-                // we rotate this flattened MultiFab to have normal in the z-direction since
-                // our structure factor class assumes this for flattened
-                MultiFab primFlattenedRot = RotateFlattenedMF(primFlattened);
-                primFlattenedRotMaster.ParallelCopy(primFlattenedRot,0,0,structVarsPrim);
-                structFactPrimFlattened.FortStructure(primFlattenedRotMaster,geom_flat);
-
-                MultiFab consFlattenedRot = RotateFlattenedMF(consFlattened);
-                consFlattenedRotMaster.ParallelCopy(consFlattenedRot,0,0,structVarsCons);
-                structFactConsFlattened.FortStructure(consFlattenedRotMaster,geom_flat);
+                structFactPrimFlattened.FortStructure(primFlattened);
+                structFactConsFlattened.FortStructure(consFlattened);
             }
 
             // timer
diff --git a/src_compressible_stag/Checkpoint.cpp b/src_compressible_stag/Checkpoint.cpp
index caf1a722c..e0b0201cf 100644
--- a/src_compressible_stag/Checkpoint.cpp
+++ b/src_compressible_stag/Checkpoint.cpp
@@ -643,7 +643,7 @@ void ReadCheckPoint3D(int& step,
     dmap.define(ba, ParallelDescriptor::NProcs());
     
 #if defined(TURB)
-    if (turbForcing > 1) {
+    if ((turbForcing > 1) and (turbRestartRun)) {
         turbforce.define(ba,dmap,turb_a,turb_b,turb_c,turb_d,turb_alpha);
     }
 #endif
@@ -694,7 +694,7 @@ void ReadCheckPoint3D(int& step,
 
 #if defined(TURB)
         // Read in turbulent forcing
-        if (turbForcing > 1) {
+        if ((turbForcing > 1) and (turbRestartRun)) {
             Real fs_temp;
             Real fc_temp;
             for (int i=0; i<132; ++i) {
@@ -1391,28 +1391,29 @@ void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, const std
                              BoxArray& ba_old, DistributionMapping& dmap_old,
                              int NVARS, int ghost, int nodal_flag)
 {
-    // define temporary MF
-    MultiFab mf_temp;
-    if (nodal_flag < 0) {
-        if (ghost) {
-            mf_temp.define(ba_old,dmap_old,NVARS,ngc);
-        }
-        else {
-            mf_temp.define(ba_old,dmap_old,NVARS,0);
-        }
-
-    }
-    else {
-        if (ghost) {
-            mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,ngc);
-        }
-        else {
-            mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,0);
-        }
-
-    }
+    //// define temporary MF
+    //MultiFab mf_temp;
+    //if (nodal_flag < 0) {
+    //    if (ghost) {
+    //        mf_temp.define(ba_old,dmap_old,NVARS,ngc);
+    //    }
+    //    else {
+    //        mf_temp.define(ba_old,dmap_old,NVARS,0);
+    //    }
+
+    //}
+    //else {
+    //    if (ghost) {
+    //        mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,ngc);
+    //    }
+    //    else {
+    //        mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,0);
+    //    }
+
+    //}
     
     // Read into temporary MF from file
+    MultiFab mf_temp;
     VisMF::Read(mf_temp,amrex::MultiFabFileFullPrefix(0, checkpointname, "Level_", mf_name));
 
     // Copy temporary MF into the new MF
diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp
index 87f3232d9..c437cd384 100644
--- a/src_compressible_stag/DeriveVelProp.cpp
+++ b/src_compressible_stag/DeriveVelProp.cpp
@@ -8,6 +8,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
                 std::array< MultiFab, AMREX_SPACEDIM >& cumom,
                 MultiFab& prim,
                 MultiFab& eta,
+                MultiFab& zeta,
                 const amrex::Geometry& geom,
                 Real& turbKE, Real& c_speed,
                 Real& u_rms,
@@ -26,11 +27,11 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     // Setup temp MultiFabs
     std::array< MultiFab, AMREX_SPACEDIM > macTemp;
     MultiFab gradU;
+    MultiFab eta_bulk_diss;
     MultiFab sound_speed;
     MultiFab ccTemp;
     MultiFab ccTempA;
     MultiFab ccTempDiv;
-    MultiFab eta_kin; // kinematic viscosity
     std::array< MultiFab, NUM_EDGE > curlU;
     std::array< MultiFab, NUM_EDGE > eta_edge;
     std::array< MultiFab, NUM_EDGE > curlUtemp;
@@ -42,7 +43,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0);
     ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0);
     ccTempDiv.define(prim.boxArray(),prim.DistributionMap(),1,0);
-    eta_kin.define(prim.boxArray(),prim.DistributionMap(),1,ngc);
+    if (visc_type == 3) eta_bulk_diss.define(prim.boxArray(),prim.DistributionMap(),1,0);
 #if (AMREX_SPACEDIM == 3)
     curlU[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
     curlU[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0);
@@ -63,19 +64,6 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     curlUtemp[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
 #endif
 
-    // Get Kinematic Viscosity
-    for ( MFIter mfi(eta,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
-        // grow the box by ngc
-        const Box& bx = amrex::grow(mfi.tilebox(), ngc);
-        const Array4<Real> & eta_kin_fab = eta_kin.array(mfi);
-        const Array4<const Real>& eta_fab = eta.array(mfi);
-        const Array4<const Real>& prim_fab = prim.array(mfi);
-        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-        {
-            eta_kin_fab(i,j,k) = eta_fab(i,j,k) / prim_fab(i,j,k,0);
-        });
-    }
-
     // Setup temp variables
     Real temp;
     Vector<Real> tempvec(3);
@@ -87,14 +75,38 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     Vector<Real> eps_s_vec(3); // solenoidal dissipation
 
     // turbulent kinetic energy
-    StagInnerProd(cumom,0,vel,0,macTemp,rhouu);
+//   StagInnerProd(cumom,0,vel,0,macTemp,rhouu);
+    {
+        auto mask = cumom[0].OwnerMask(geom.periodicity());
+	rhouu[0] = MultiFab::Dot(cumom[0],0,vel[0],0,1,0);
+    }
+    {
+        auto mask = cumom[1].OwnerMask(geom.periodicity());
+	rhouu[1] = MultiFab::Dot(cumom[1],0,vel[1],0,1,0);
+    }
+    {
+        auto mask = cumom[2].OwnerMask(geom.periodicity());
+	rhouu[2] = MultiFab::Dot(cumom[2],0,vel[2],0,1,0);
+    }
     rhouu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2];
     rhouu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0];
     rhouu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1];
     turbKE = 0.5*( rhouu[0] + rhouu[1] + rhouu[2] );
 
     // RMS velocity
-    StagInnerProd(vel,0,vel,0,macTemp,uu);
+//    StagInnerProd(vel,0,vel,0,macTemp,uu);
+    {
+        auto mask = vel[0].OwnerMask(geom.periodicity());
+	uu[0] = MultiFab::Dot(vel[0],0,vel[0],0,1,0);
+    }
+    {
+        auto mask = vel[1].OwnerMask(geom.periodicity());
+	uu[1] = MultiFab::Dot(vel[1],0,vel[1],0,1,0);
+    }
+    {
+        auto mask = vel[2].OwnerMask(geom.periodicity());
+	uu[2] = MultiFab::Dot(vel[2],0,vel[2],0,1,0);
+    }
     uu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2];
     uu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0];
     uu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1];
@@ -167,28 +179,53 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     // Compute \omega (curl)
     ComputeCurlFaceToEdge(vel,curlU,geom);
     
-    // Solenoidal dissipation: <eta_kin \omega_i \omega_i>/<rho>
-    AverageCCToEdge(eta_kin,eta_edge,0,1,SPEC_BC_COMP,geom);
+    // Solenoidal dissipation: <eta \omega_i \omega_i>
+    AverageCCToEdge(eta,eta_edge,0,1,SPEC_BC_COMP,geom);
     EdgeInnerProd(curlU,0,curlU,0,curlUtemp,tempvec);
-    EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec);
+//    EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec);
+    {
+        auto mask = curlUtemp[0].OwnerMask(geom.periodicity());
+	eps_s_vec[0] = MultiFab::Dot(curlUtemp[0],0,eta_edge[0],0,1,0);
+    }
+    {
+        auto mask = curlUtemp[1].OwnerMask(geom.periodicity());
+	eps_s_vec[1] = MultiFab::Dot(curlUtemp[1],0,eta_edge[1],0,1,0);
+    }
+    {
+        auto mask = curlUtemp[2].OwnerMask(geom.periodicity());
+	eps_s_vec[2] = MultiFab::Dot(curlUtemp[2],0,eta_edge[2],0,1,0);
+    }
     eps_s_vec[0] /= (n_cells[0]+1)*(n_cells[1]+1)*n_cells[2];
     eps_s_vec[1] /= (n_cells[0]+1)*(n_cells[2]+1)*n_cells[1];
     eps_s_vec[2] /= (n_cells[1]+1)*(n_cells[2]+1)*n_cells[0];
     eps_s = (eps_s_vec[0] + eps_s_vec[1] + eps_s_vec[2]);
 
-    // Dilational dissipation (4/3)*<eta_kin (\sum_i du_i/dx_i)^2>/<rho>
-    CCInnerProd(ccTempDiv,0,eta_kin,0,ccTemp,eps_d);
-    eps_d *= dProb*(4.0/3.0);
+    // Dilational dissipation (4/3)*<eta (\sum_i du_i/dx_i)^2>
+//    CCInnerProd(ccTempDiv,0,eta,0,ccTemp,eps_d);
+    if (visc_type == 3) {
+      // get eta_bulk_diss = kappa + 4/3 eta
+      MultiFab::LinComb(eta_bulk_diss, 1.0, zeta, 0, 
+                        1.3333333333, eta, 0, 
+                        0, 1, 0);
+      eps_d = MultiFab::Dot(eta_bulk_diss, 0, ccTempDiv, 0, 1, 0);
+      eps_d *= dProb;
+    }
+    else {
+      eps_d = MultiFab::Dot(eta, 0, ccTempDiv, 0, 1, 0);
+      eps_d *= dProb*(4.0/3.0);
+    }
 
     // Ratio of Dilational to Solenoidal dissipation
     eps_ratio = eps_d/eps_s;
     Real eps_t = eps_s + eps_d;
 
     // Kolmogorov scales
-    Real eta_kin_avg = ComputeSpatialMean(eta_kin, 0);
-    kolm_s = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_s),0.25);
-    kolm_d = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_d),0.25);
-    kolm_t = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_t),0.25);
+    kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_s)),0.25);
+    kolm_d = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_d)),0.25);
+    kolm_t = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_t)),0.25);
+//    kolm_s = pow((eta_avg*eta_avg*eta_avg/eps_s),0.25);
+//    kolm_d = pow((eta_avg*eta_avg*eta_avg/eps_d),0.25);
+//    kolm_t = pow((eta_avg*eta_avg*eta_avg/eps_t),0.25);
 
 }
 #endif
@@ -485,9 +522,9 @@ void EvaluateWritePlotFileVelGrad(int step,
         
         const Box& bx = mfi.tilebox();
         
-        const Array4<Real>             out = output.array(mfi);
+        const Array4<      Real>&             out   = output.array(mfi);
 
-        const Array4<const Real>  v_decomp = vel_decomp.array(mfi);
+        const Array4<const Real>&  v_decomp         = vel_decomp.array(mfi);
         
         AMREX_D_TERM(Array4<Real const> const& velx = vel[0].array(mfi);,
                      Array4<Real const> const& vely = vel[1].array(mfi);,
@@ -495,11 +532,16 @@ void EvaluateWritePlotFileVelGrad(int step,
 
         amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
         {
-            for (int n=0;n<6;++n) {
-                out(i,j,k,n) = v_decomp(i,j,k,n);
-            }
-            out(i,j,k,6) = sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag solenoidal
-            out(i,j,k,7) = sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag dilatational
+            
+            out(i,j,k,0) = v_decomp(i,j,k,0);
+            out(i,j,k,1) = v_decomp(i,j,k,1);
+            out(i,j,k,2) = v_decomp(i,j,k,2);
+            out(i,j,k,3) = v_decomp(i,j,k,3);
+            out(i,j,k,4) = v_decomp(i,j,k,4);
+            out(i,j,k,5) = v_decomp(i,j,k,5);
+
+            out(i,j,k,6) = std::sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag solenoidal
+            out(i,j,k,7) = std::sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag dilatational
 
             // divergence
             out(i,j,k,8) = (velx(i+1,j,k) - velx(i,j,k))/dx[0] +
@@ -549,7 +591,7 @@ void EvaluateWritePlotFileVelGrad(int step,
             Real w3_pp  = u32_pp - u23_pp;
 
             // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3)
-            out(i,j,k,9) = sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp +
+            out(i,j,k,9) = std::sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp +
                                       w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp +
                                       w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp));
         });
@@ -562,8 +604,8 @@ void EvaluateWritePlotFileVelGrad(int step,
     varNames[1] = "uy_s";
     varNames[2] = "uz_s";
     varNames[3] = "ux_d";
-    varNames[4] = "ux_d";
-    varNames[5] = "uy_d";
+    varNames[4] = "uy_d";
+    varNames[5] = "uz_d";
     varNames[6] = "umag_s";
     varNames[7] = "umag_d";
     varNames[8] = "div";
@@ -572,3 +614,123 @@ void EvaluateWritePlotFileVelGrad(int step,
 }
 #endif
 
+#if defined(TURB)
+void EvaluateWritePlotFileVelGradTiny(int step,
+                                  const amrex::Real time,
+                                  const amrex::Geometry& geom,
+                                  const std::array<MultiFab, AMREX_SPACEDIM>& vel,
+                                  const amrex::MultiFab& vel_decomp_in)
+{
+    BL_PROFILE_VAR("EvaluateWritePlotFileVelGradTiny()",EvaluateWritePlotFileVelGradTiny);
+
+    MultiFab output;
+    
+    // 0: vorticity wx_sifted
+    // 1: vorticity wy_shifted
+    // 2: vorticity wz_shifted
+    // 3: vorticity wx_avg
+    // 4: vorticity wy_avg
+    // 5: vorticity wz_avg
+    // 6: vorticity_mag_shft_then_sq = sqrt(wx + wy + wz)
+    // 7: vorticity_mag_avg_then_sq = sqrt(wx + wy + wz)
+    // 8: vorticity_mag_sq_then_avg = sqrt(wx + wy + wz)
+    // 9: divergence =  u_1,1 + u_2,2 + u_3,3
+    output.define(convert(vel[0].boxArray(),IntVect(AMREX_D_DECL(0,0,0))), vel[0].DistributionMap(), 10, 0);
+    output.setVal(0.0);
+
+    const GpuArray<Real, AMREX_SPACEDIM> dx = geom.CellSizeArray();
+
+    for ( MFIter mfi(output,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+        const Box& bx = mfi.tilebox();
+        
+        const Array4<      Real>&             out   = output.array(mfi);
+
+        AMREX_D_TERM(Array4<Real const> const& velx = vel[0].array(mfi);,
+                     Array4<Real const> const& vely = vel[1].array(mfi);,
+                     Array4<Real const> const& velz = vel[2].array(mfi););
+
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            
+            // divergence
+            out(i,j,k,9) = (velx(i+1,j,k) - velx(i,j,k))/dx[0] +
+                           (vely(i,j+1,k) - vely(i,j,k))/dx[1] + 
+                           (velz(i,j,k+1) - velz(i,j,k))/dx[2] ;
+
+            // on edges: u_1,2 and u_2,1 and curl w1 = u_2,1 - u_1,2
+            Real u12_mm = (velx(i,j,k) - velx(i,j-1,k))/dx[1];
+            Real u21_mm = (vely(i,j,k) - vely(i-1,j,k))/dx[0];
+            Real w1_mm  = u21_mm - u12_mm;
+            Real u12_mp = (velx(i,j+1,k) - velx(i,j,k))/dx[1];
+            Real u21_mp = (vely(i,j+1,k) - vely(i-1,j+1,k))/dx[0];
+            Real w1_mp  = u21_mp - u12_mp;
+            Real u12_pm = (velx(i+1,j,k) - velx(i+1,j-1,k))/dx[1];
+            Real u21_pm = (vely(i+1,j,k) - vely(i,j,k))/dx[0];
+            Real w1_pm  = u21_pm - u12_pm;
+            Real u12_pp = (velx(i+1,j+1,k) - velx(i+1,j,k))/dx[1];
+            Real u21_pp = (vely(i+1,j+1,k) - vely(i,j+1,k))/dx[0];
+            Real w1_pp  = u21_pp - u12_pp;
+            out(i,j,k,0) = w1_mm;
+            out(i,j,k,3) = 0.5*(w1_mm+w1_mp+w1_pm+w1_pp);
+
+            // on edges: u_1,3 and u_3,1 and curl w2 = u_1,3 - u_3,1
+            Real u13_mm = (velx(i,j,k) - velx(i,j,k-1))/dx[2];
+            Real u31_mm = (velz(i,j,k) - velz(i-1,j,k))/dx[0];
+            Real w2_mm  = u13_mm - u31_mm;
+            Real u13_mp = (velx(i,j,k+1) - velx(i,j,k))/dx[2];
+            Real u31_mp = (velz(i,j,k+1) - velz(i-1,j,k+1))/dx[0];
+            Real w2_mp  = u13_mp - u31_mp;
+            Real u13_pm = (velx(i+1,j,k) - velx(i+1,j,k-1))/dx[2];
+            Real u31_pm = (velz(i+1,j,k) - velz(i,j,k))/dx[0];
+            Real w2_pm  = u13_pm - u31_pm;
+            Real u13_pp = (velx(i+1,j,k+1) - velx(i+1,j,k))/dx[2];
+            Real u31_pp = (velz(i+1,j,k+1) - velz(i,j,k+1))/dx[0];
+            Real w2_pp  = u13_pp - u31_pp;
+            out(i,j,k,1) = w2_mm;
+            out(i,j,k,4) = 0.5*(w2_mm+w2_mp+w2_pm+w2_pp);
+
+            // on edges: u_2,3 and u_3,2 and curl w2 = u_3,2 - u_2,3
+            Real u23_mm = (vely(i,j,k) - vely(i,j,k-1))/dx[2];
+            Real u32_mm = (velz(i,j,k) - velz(i,j-1,k))/dx[1];
+            Real w3_mm  = u32_mm - u23_mm;
+            Real u23_mp = (vely(i,j,k+1) - vely(i,j,k))/dx[2];
+            Real u32_mp = (velz(i,j,k+1) - velz(i,j-1,k+1))/dx[1];
+            Real w3_mp  = u32_mp - u23_mp;
+            Real u23_pm = (vely(i,j+1,k) - vely(i,j+1,k-1))/dx[2];
+            Real u32_pm = (velz(i,j+1,k) - velz(i,j,k))/dx[1];
+            Real w3_pm  = u32_pm - u23_pm;
+            Real u23_pp = (vely(i,j+1,k+1) - vely(i,j+1,k))/dx[2];
+            Real u32_pp = (velz(i,j+1,k+1) - velz(i,j,k+1))/dx[1];
+            Real w3_pp  = u32_pp - u23_pp;
+            out(i,j,k,2) = w3_mm;
+            out(i,j,k,5) = 0.5*(w3_mm+w3_mp+w3_pm+w3_pp);
+
+            // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3)
+            out(i,j,k,6) = sqrt(w1_mm*w1_mm + w2_mm*w2_mm + w3_mm*w3_mm);
+            out(i,j,k,7) = sqrt(out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)
+                                + out(i,j,k,6)*out(i,j,k,6));
+            out(i,j,k,8) = std::sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp +
+                                      w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp +
+                                      w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp));
+        });
+    }
+
+    // Write on a plotfile
+    std::string plotfilename = amrex::Concatenate("vort_div",step,9);
+    amrex::Vector<std::string> varNames(10);
+    varNames[0] = "w1_shift";
+    varNames[1] = "w2_shift";
+    varNames[2] = "w3_shift";
+    varNames[3] = "w1_avg";
+    varNames[4] = "w2_avg";
+    varNames[5] = "w3_avg";
+    varNames[6] = "vort_mag_shft";
+    varNames[7] = "vort_mag_shft_avg";
+    varNames[8] = "vort_mag_avg";
+    varNames[9] = "div";
+    WriteSingleLevelPlotfile(plotfilename,output,varNames,geom,time,step);
+}
+#endif
+
+
diff --git a/src_compressible_stag/boundaryStag.cpp b/src_compressible_stag/boundaryStag.cpp
index b4c100394..965688570 100644
--- a/src_compressible_stag/boundaryStag.cpp
+++ b/src_compressible_stag/boundaryStag.cpp
@@ -32,6 +32,9 @@ void SetupCWallStag() {
        } else if (amrex::Math::abs(sumy-1) < 1.e-10) {
            GetMolfrac(bc_Yk_x_lo,bc_Xk_x_lo);
        }
+       else {
+           Abort("SetupCWallStag: lo-x; mass or mole fractions do not sum to 1");
+       }
     }
 
     if (bc_mass_lo[0] >= 3) {
@@ -69,6 +72,8 @@ void SetupCWallStag() {
           GetMassfrac(bc_Xk_x_hi,bc_Yk_x_hi);
        } else if (amrex::Math::abs(sumy-1) < 1.e-10) {
           GetMolfrac(bc_Yk_x_hi,bc_Xk_x_hi);
+       } else {
+           Abort("SetupCWallStag: hi-x; mass or mole fractions do not sum to 1");
        }
     }
 
@@ -108,6 +113,8 @@ void SetupCWallStag() {
           GetMassfrac(bc_Xk_y_lo,bc_Yk_y_lo);
        } else if (amrex::Math::abs(sumy-1) < 1.e-10) {
           GetMolfrac(bc_Yk_y_lo,bc_Xk_y_lo);
+       } else {
+           Abort("SetupCWallStag: lo-y; mass or mole fractions do not sum to 1");
        }
     }
 
@@ -146,6 +153,8 @@ void SetupCWallStag() {
           GetMassfrac(bc_Xk_y_hi,bc_Yk_y_hi);
        } else if (amrex::Math::abs(sumy-1) < 1.e-10) {
           GetMolfrac(bc_Yk_y_hi,bc_Xk_y_hi);
+       } else {
+           Abort("SetupCWallStag: hi-y; mass or mole fractions do not sum to 1");
        }
     }
 
@@ -185,6 +194,8 @@ void SetupCWallStag() {
           GetMassfrac(bc_Xk_z_lo,bc_Yk_z_lo);
        } else if (amrex::Math::abs(sumy-1) < 1.e-10) {
           GetMolfrac(bc_Yk_z_lo,bc_Xk_z_lo);
+       } else {
+           Abort("SetupCWallStag: lo-z; mass or mole fractions do not sum to 1");
        }
     }
 
@@ -223,6 +234,8 @@ void SetupCWallStag() {
           GetMassfrac(bc_Xk_z_hi,bc_Yk_z_hi);
        } else if (amrex::Math::abs(sumy-1) < 1.e-10) {
           GetMolfrac(bc_Yk_z_hi,bc_Xk_z_hi);
+       } else {
+           Abort("SetupCWallStag: hi-z; mass or mole fractions do not sum to 1");
        }
     }
 
diff --git a/src_compressible_stag/compressible_functions_stag.H b/src_compressible_stag/compressible_functions_stag.H
index 101434773..f0b628ae3 100644
--- a/src_compressible_stag/compressible_functions_stag.H
+++ b/src_compressible_stag/compressible_functions_stag.H
@@ -54,6 +54,11 @@ void EvaluateWritePlotFileVelGrad(int step,
                                   const amrex::Geometry& geom,
                                   const std::array<MultiFab, AMREX_SPACEDIM>& vel,
                                   const amrex::MultiFab& vel_decomp);
+void EvaluateWritePlotFileVelGradTiny(int step,
+                                  const amrex::Real time,
+                                  const amrex::Geometry& geom,
+                                  const std::array<MultiFab, AMREX_SPACEDIM>& vel,
+                                  const amrex::MultiFab& vel_decomp);
 #endif
 
 void conservedToPrimitiveStag(MultiFab& prim_in, std::array<MultiFab, AMREX_SPACEDIM>& velStag_in,
@@ -434,6 +439,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
                 std::array< MultiFab, AMREX_SPACEDIM >& cumom,
                 MultiFab& prim,
                 MultiFab& eta,
+                MultiFab& zeta,
                 const amrex::Geometry& geom,
                 Real& turbKE, Real& c_speed,
                 Real& u_rms,
diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp
index 512657bf1..c455fee2d 100644
--- a/src_compressible_stag/main_driver.cpp
+++ b/src_compressible_stag/main_driver.cpp
@@ -1,3 +1,4 @@
+#include "TurbSpectra.H"
 #include "common_functions.H"
 #include "compressible_functions.H"
 #include "compressible_functions_stag.H"
@@ -208,11 +209,22 @@ void main_driver(const char* argv)
     if ((plot_cross) and ((cross_cell < 0) or (cross_cell > n_cells[0]-1))) {
         Abort("Cross cell needs to be within the domain: 0 <= cross_cell <= n_cells[0] - 1");
     }
-    if ((do_slab_sf) and ((membrane_cell <= 0) or (membrane_cell >= n_cells[0]-1))) {
-        Abort("Slab structure factor needs a membrane cell within the domain: 0 < cross_cell < n_cells[0] - 1");
-    }
-    if ((project_dir >= 0) and ((do_1D) or (do_2D))) {
-        Abort("Projected structure factors (project_dir) works only for 3D case");
+    if (struct_fact_int >0 and project_dir >= 0) {
+        if (do_slab_sf and ((membrane_cell <= 0) or (membrane_cell >= n_cells[project_dir]-1))) {
+            Abort("Slab structure factor needs a membrane cell within the domain: 0 < membrane_cell < n_cells[project_dir] - 1");
+        }
+        if (do_1D) {
+            Abort("Projected structure factors (project_dir) does not work for do_1D case");
+        }
+        if (do_slab_sf and slicepoint >= 0) {
+            Abort("Cannot use do_slab_sf and slicepoint");
+        }
+        if (do_2D and slicepoint >= 0) {
+            Abort("Cannot use do_2D and slicepoint");
+        }
+        if (do_2D and project_dir != 2) {
+            Abort("Structure factors with do_2D requires project_dir == 2");
+        }
     }
     if ((all_correl > 1) or (all_correl < 0)) {
         Abort("all_correl can be 0 or 1");
@@ -260,10 +272,16 @@ void main_driver(const char* argv)
 
 #if defined(TURB)
     // data structure for turbulence diagnostics
+    MultiFab MFTurbScalar;
+    MultiFab MFTurbVel;
+    MultiFab vel_decomp;
     std::string turbfilename = "turbstats";
     std::ofstream turboutfile;
     std::string turbfilenamedecomp = "turbstatsdecomp";
     std::ofstream turboutfiledecomp;
+    // need to use dVol for scaling
+    Real dVol = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2];
+    Real dVolinv = 1.0/dVol;
 #endif
 
     /////////////////////////////////////////////
@@ -273,26 +291,34 @@ void main_driver(const char* argv)
     // Standard 3D structure factors
     StructFact structFactPrim;
     StructFact structFactCons;
+    // MultiFabs to copy data into for snapshots for full 3D data
     MultiFab structFactPrimMF;
     MultiFab structFactConsMF;
 
-    // Structure factor for 2D averaged data
-    StructFact structFactPrimVerticalAverage;
-    StructFact structFactConsVerticalAverage;
-
+    // Structure factor for vertically-averaged or sliced data
+    // these are enabled if project_dir >= 0 AND do_slab_sf == 0
+    StructFact structFactPrimFlattened;
+    StructFact structFactConsFlattened;
     // Structure factor for 2D averaged data (across a membrane)
-    StructFact structFactPrimVerticalAverage0;
-    StructFact structFactPrimVerticalAverage1;
-    StructFact structFactConsVerticalAverage0;
-    StructFact structFactConsVerticalAverage1;
-    MultiFab master_project_rot_prim;
-    MultiFab master_project_rot_cons;
-
-    // Vector of structure factors for 2D simulation
+    // these are enabled if project_dir >= 0 AND do_slab_sf == 1
+    StructFact structFactPrimVerticalAverageMembraneLo;
+    StructFact structFactPrimVerticalAverageMembraneHi;
+    StructFact structFactConsVerticalAverageMembraneLo;
+    StructFact structFactConsVerticalAverageMembraneHi;
+    // Vector of structure factors of slices for 2D simulation
+    // these are enabled if do_2D (this mode assumes z slices; project_dir must equal 2)
     Vector < StructFact > structFactPrimArray;
     Vector < StructFact > structFactConsArray;
-    MultiFab master_2D_rot_prim;
-    MultiFab master_2D_rot_cons;
+    
+    Geometry geom_flat;
+    BoxArray ba_flat;
+    DistributionMapping dmap_flat;
+
+    // Structure factor for surface coverage slice
+    // these are enabled if n_ads_spec > 0 and assumes the k=0 plane is the slice of interest
+    StructFact structFactSurfCov;
+    
+    Geometry geom_surfcov;
 
 #if defined(TURB)
     // Structure factor for compressible turbulence
@@ -300,13 +326,6 @@ void main_driver(const char* argv)
     StructFact turbStructFactVelDecomp; // decomposed velocity
     StructFact turbStructFactScalar; // scalars 
 #endif
-    
-    Geometry geom_flat;
-    Geometry geom_flat_2D;
-    BoxArray ba_flat;
-    BoxArray ba_flat_2D;
-    DistributionMapping dmap_flat;
-    DistributionMapping dmap_flat_2D;
 
     // "primitive" variable structure factor will contain
     // rho
@@ -427,40 +446,6 @@ void main_driver(const char* argv)
         var_scaling_cons[d] = 1./(dx[0]*dx[1]*dx[2]);
     }
 
-#if defined(TURB)
-    //////////////////////////////////////////////////////////////
-    // structure factor variables names and scaling for turbulence
-    // variables are velocities, density, pressure and temperature
-    //////////////////////////////////////////////////////////////
-    // need to use dVol for scaling
-    Real dVol = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2];
-    Real dVolinv = 1.0/dVol;
-    
-    MultiFab structFactMFTurbVel;
-    MultiFab structFactMFTurbScalar;
-    MultiFab vel_decomp;
-
-    Vector< std::string > var_names_turbVelTotal{"ux","uy","uz"};
-    Vector<Real> var_scaling_turbVelTotal(3, dVolinv);
-    amrex::Vector< int > s_pairA_turbVelTotal(3);
-    amrex::Vector< int > s_pairB_turbVelTotal(3);
-    for (int d=0; d<3; ++d) {
-        s_pairA_turbVelTotal[d] = d;
-        s_pairB_turbVelTotal[d] = d;
-    }
-    
-    Vector<Real> var_scaling_turbVelDecomp(6, dVolinv);
-    
-    Vector< std::string > var_names_turbScalar{"rho","tenp","press"};
-    Vector<Real> var_scaling_turbScalar(3, dVolinv);
-    amrex::Vector< int > s_pairA_turbScalar(3);
-    amrex::Vector< int > s_pairB_turbScalar(3);
-    for (int d=0; d<3; ++d) {
-        s_pairA_turbScalar[d] = d;
-        s_pairB_turbScalar[d] = d;
-    }
-#endif
-    
     // object for turbulence forcing
     TurbForcingComp turbforce;
 
@@ -724,6 +709,7 @@ void main_driver(const char* argv)
 #if defined(TURB)
             if (turbForcing > 0) {
                 EvaluateWritePlotFileVelGrad(0, 0.0, geom, vel, vel_decomp);
+                EvaluateWritePlotFileVelGradTiny(0, 0.0, geom, vel, vel_decomp);
             }
 #endif
 
@@ -757,162 +743,143 @@ void main_driver(const char* argv)
 
 
     } // end t=0 setup
+    
+#if defined(TURB)
+    if (turbForcing >= 1) {
+        MFTurbVel.define(ba, dmap, 3, 0);
+        MFTurbScalar.define(ba, dmap, 3, 0);
+        vel_decomp.define(ba, dmap, 6, 0);
+        vel_decomp.setVal(0.0);
+    }
+#endif
 
     ///////////////////////////////////////////
     // Setup Structure factor
     ///////////////////////////////////////////
 
     if (struct_fact_int > 0) {
-        structFactPrimMF.define(ba, dmap, structVarsPrim, 0);
-        structFactPrim.define(ba,dmap,prim_var_names,var_scaling_prim);
-            
-        structFactConsMF.define(ba, dmap, structVarsCons, 0);
-        structFactCons.define(ba,dmap,cons_var_names,var_scaling_cons);
-        
+
+        if ((do_1D==0) and (do_2D==0)) {
+            structFactPrim.define(ba,dmap,prim_var_names,var_scaling_prim);
+            structFactCons.define(ba,dmap,cons_var_names,var_scaling_cons);
+            structFactConsMF.define(ba,dmap,structVarsCons,0);
+            structFactPrimMF.define(ba,dmap,structVarsPrim,0);
+        }
+
         // structure factor class for vertically-averaged dataset
         if (project_dir >= 0) {
 
-            {
-                MultiFab X, XRot;
-                ComputeVerticalAverage(prim, X, geom, project_dir, 0, nprimvars);
-                XRot = RotateFlattenedMF(X);
-                ba_flat = XRot.boxArray();
-                dmap_flat = XRot.DistributionMap();
-                master_project_rot_prim.define(ba_flat,dmap_flat,structVarsPrim,0);
-                master_project_rot_cons.define(ba_flat,dmap_flat,structVarsCons,0);
-
-                IntVect dom_lo_flat(AMREX_D_DECL(0,0,0));
-                IntVect dom_hi_flat;
-#if (AMREX_SPACEDIM == 2)
-                if (project_dir == 0) {
-                    dom_hi_flat[0] = n_cells[1]-1;
-                    dom_hi_flat[1] = 0;
-                }
-                else if (project_dir == 1) {
-                    dom_hi_flat[0] = n_cells[0]-1;
-                    dom_hi_flat[1] = 0;
-                }
-#elif (AMREX_SPACEDIM == 3)
-                if (project_dir == 0) {
-                    dom_hi_flat[0] = n_cells[1]-1;
-                    dom_hi_flat[1] = n_cells[2]-1;
-                    dom_hi_flat[2] = 0;
-                } else if (project_dir == 1) {
-                    dom_hi_flat[0] = n_cells[0]-1;
-                    dom_hi_flat[1] = n_cells[2]-1;
-                    dom_hi_flat[2] = 0;
-                } else if (project_dir == 2) {
-                    dom_hi_flat[0] = n_cells[0]-1;
-                    dom_hi_flat[1] = n_cells[1]-1;
-                    dom_hi_flat[2] = 0;
-                }
-#endif
-                Box domain_flat(dom_lo_flat, dom_hi_flat);
+            MultiFab Flattened; // flattened multifab define below
 
-                // This defines the physical box
-                Vector<Real> projected_hi(AMREX_SPACEDIM);
-                for (int d=0; d<AMREX_SPACEDIM; d++) {
-                    projected_hi[d] = prob_hi[d];
-                }
-#if (AMREX_SPACEDIM == 2)
-                if (project_dir == 0) {
-                    projected_hi[0] = prob_hi[1];
-                }
-#elif (AMREX_SPACEDIM == 3)
-                if (project_dir == 0) {
-                    projected_hi[0] = prob_hi[1];
-                    projected_hi[1] = prob_hi[2];
-                } else if (project_dir == 1) {
-                    projected_hi[1] = prob_hi[2];
-                }
-#endif
-        
-                projected_hi[AMREX_SPACEDIM-1] = prob_hi[project_dir] / n_cells[project_dir];
+            // we are only calling ExtractSlice here to obtain
+            // a built version of Flattened so can obtain what we need to build the
+            // structure factor and geometry objects for flattened data
+            ExtractSlice(prim, Flattened, project_dir, 0, 0, 1);
 
-                RealBox real_box_flat({AMREX_D_DECL(     prob_lo[0],     prob_lo[1],     prob_lo[2])},
-                                    {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])});
-          
-                // This defines a Geometry object
-                geom_flat.define(domain_flat,&real_box_flat,CoordSys::cartesian,is_periodic.data());
+            ba_flat = Flattened.boxArray();
+            dmap_flat = Flattened.DistributionMap();
 
-            }
+            Box domain_flat = ba_flat.minimalBox();
+
+            // This defines the physical box
+            // we retain prob_lo and prob_hi in all directions except project_dir,
+            // where the physical size is 0 to dx[project_dir]
+            Vector<Real> projected_lo(AMREX_SPACEDIM);
+            Vector<Real> projected_hi(AMREX_SPACEDIM);
 
-            if (do_slab_sf == 0) {
-                structFactPrimVerticalAverage.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim,2);
-                structFactConsVerticalAverage.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons,2);
+            for (int d=0; d<AMREX_SPACEDIM; ++d) {
+                projected_lo[d] = prob_lo[d];
+                projected_hi[d] = prob_hi[d];
             }
-            else {
-                structFactPrimVerticalAverage0.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim);
-                structFactPrimVerticalAverage1.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim);
-                structFactConsVerticalAverage0.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons);
-                structFactConsVerticalAverage1.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons);
+            projected_lo[project_dir] = 0.;
+            projected_hi[project_dir] = dx[project_dir];
+
+            RealBox real_box_flat({AMREX_D_DECL(projected_lo[0],projected_lo[1],projected_lo[2])},
+                                  {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])});
+          
+            // This defines a Geometry object
+            geom_flat.define(domain_flat,&real_box_flat,CoordSys::cartesian,is_periodic.data());
+
+            if (do_2D) {
+
+                structFactPrimArray.resize(n_cells[project_dir]);
+                structFactConsArray.resize(n_cells[project_dir]);
+
+                for (int i = 0; i < n_cells[project_dir]; ++i) { 
+                    structFactPrimArray[i].define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim);
+                    structFactConsArray[i].define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons);
+                }
+
+            } else {
+                if (do_slab_sf == 0) {
+                    structFactPrimFlattened.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim);
+                    structFactConsFlattened.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons);
+                }
+                else {
+                    structFactPrimVerticalAverageMembraneLo.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim);
+                    structFactPrimVerticalAverageMembraneHi.define(ba_flat,dmap_flat,prim_var_names,var_scaling_prim);
+                    structFactConsVerticalAverageMembraneLo.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons);
+                    structFactConsVerticalAverageMembraneHi.define(ba_flat,dmap_flat,cons_var_names,var_scaling_cons);
+                }
             }
     
         }
 
-        if (do_2D) { // 2D is coded only for XY plane
+        if (n_ads_spec>0) {
 
-            {
-                MultiFab X, XRot;
-                ExtractSlice(prim, X, geom, 2, 0, 0, nprimvars);
-                XRot = RotateFlattenedMF(X);
-                ba_flat_2D = XRot.boxArray();
-                dmap_flat_2D = XRot.DistributionMap();
-                master_2D_rot_prim.define(ba_flat_2D,dmap_flat_2D,structVarsPrim,0);
-                master_2D_rot_cons.define(ba_flat_2D,dmap_flat_2D,structVarsCons,0);
-
-                IntVect dom_lo_flat(AMREX_D_DECL(0,0,0));
-                IntVect dom_hi_flat;
-                dom_hi_flat[0] = n_cells[0]-1;
-                dom_hi_flat[1] = n_cells[1]-1;
-                dom_hi_flat[2] = 0;
-                Box domain_flat(dom_lo_flat, dom_hi_flat);
+            MultiFab Flattened;  // flattened multifab defined below
+
+            // we are only calling ExtractSlice here to obtain
+            // a built version of Flattened so can obtain what we need to build the
+            // structure factor and geometry objects for flattened data
+            // assume surface covered is stored in the "k" direction in the k=0 coordinate.
+            int surfcov_dir = 2;
+            int surfcov_plane = 0;
+            int surfcov_structVars = n_ads_spec;
+            int surfcov_nPairs = surfcov_structVars*(surfcov_structVars+1)/2;
+
+            Vector< std::string > surfcov_var_names;
+            surfcov_var_names.resize(surfcov_structVars);
+            for (int d=0; d<surfcov_structVars; d++) {
+                x = "surfCov";
+                x += (48+d);
+                surfcov_var_names[d] = x;
+            }
 
+            Vector<Real> surfcov_var_scaling(surfcov_nPairs);
+            for (int d=0; d<surfcov_var_scaling.size(); ++d) {
+                surfcov_var_scaling[d] = 1.;
+            }
+      
+            ExtractSlice(surfcov, Flattened, surfcov_dir, surfcov_plane, 0, surfcov_structVars);
+            BoxArray ba_surfcov = Flattened.boxArray();
+            const DistributionMapping& dmap_surfcov = Flattened.DistributionMap();
+            {
+                Box domain_surfcov = ba_surfcov.minimalBox();
+        
                 // This defines the physical box
+                // we retain prob_lo and prob_hi in all directions except surfcov_dir,
+                // where the physical size is 0 to dx[surfcov_dir]
+                Vector<Real> projected_lo(AMREX_SPACEDIM);
                 Vector<Real> projected_hi(AMREX_SPACEDIM);
-                for (int d=0; d<AMREX_SPACEDIM; d++) {
+
+                for (int d=0; d<AMREX_SPACEDIM; ++d) {
+                    projected_lo[d] = prob_lo[d];
                     projected_hi[d] = prob_hi[d];
                 }
-                projected_hi[AMREX_SPACEDIM-1] = prob_hi[2] / n_cells[2];
+                projected_lo[surfcov_dir] = 0.;
+                projected_hi[surfcov_dir] = dx[surfcov_dir];
 
-                RealBox real_box_flat({AMREX_D_DECL(     prob_lo[0],     prob_lo[1],     prob_lo[2])},
-                                      {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])});
-          
+                RealBox real_box_surfcov({AMREX_D_DECL(projected_lo[0],projected_lo[1],projected_lo[2])},
+                                         {AMREX_D_DECL(projected_hi[0],projected_hi[1],projected_hi[2])});
+        
                 // This defines a Geometry object
-                geom_flat_2D.define(domain_flat,&real_box_flat,CoordSys::cartesian,is_periodic.data());
-
-            }
-
-            structFactPrimArray.resize(n_cells[2]);
-            structFactConsArray.resize(n_cells[2]);
-
-            for (int i = 0; i < n_cells[2]; ++i) { 
-                structFactPrimArray[i].define(ba_flat_2D,dmap_flat_2D,prim_var_names,var_scaling_prim,2);
-                structFactConsArray[i].define(ba_flat_2D,dmap_flat_2D,cons_var_names,var_scaling_cons,2);
+                geom_surfcov.define(domain_surfcov,&real_box_surfcov,CoordSys::cartesian,is_periodic.data());
             }
 
+            structFactSurfCov.define(ba_surfcov,dmap_surfcov,surfcov_var_names,surfcov_var_scaling);
         }
     }
-    
-#if defined(TURB)
-    if (turbForcing >= 1) {
-        
-        structFactMFTurbVel.define(ba, dmap, 3, 0);
-        structFactMFTurbScalar.define(ba, dmap, 6, 0);
-        vel_decomp.define(ba, dmap, 6, 0);
-        vel_decomp.setVal(0.0);
-
-        turbStructFactVelTotal.define(ba,dmap,
-                var_names_turbVelTotal,var_scaling_turbVelTotal,
-                s_pairA_turbVelTotal,s_pairB_turbVelTotal);
-        turbStructFactScalar.define(ba,dmap,
-                var_names_turbScalar,var_scaling_turbScalar,
-                s_pairA_turbScalar,s_pairB_turbScalar);
-        turbStructFactVelDecomp.defineDecomp(ba,dmap,
-                var_names_turbVelTotal,var_scaling_turbVelDecomp,
-                s_pairA_turbVelTotal,s_pairB_turbVelTotal);
-    }
-#endif
 
     /////////////////////////////////////////////////
     // Initialize Fluxes and Sources
@@ -971,7 +938,7 @@ void main_driver(const char* argv)
 
 #if defined(TURB)
     // Initialize Turbulence Forcing Object
-    if (turbForcing > 1) {
+    if ((turbForcing > 1) and (turbRestartRun)) {
         turbforce.Initialize(geom);
     }
 #endif
@@ -1018,8 +985,13 @@ void main_driver(const char* argv)
 	}
 
         // FHD
-        RK3stepStag(cu, cumom, prim, vel, source, eta, zeta, kappa, chi, D, 
-            faceflux, edgeflux_x, edgeflux_y, edgeflux_z, cenflux, ranchem, geom, dt, step, turbforce);
+        if (turbRestartRun) {
+          RK3stepStag(cu, cumom, prim, vel, source, eta, zeta, kappa, chi, D, 
+              faceflux, edgeflux_x, edgeflux_y, edgeflux_z, cenflux, ranchem, geom, dt, step, turbforce);
+        }
+	  else {
+	      calculateTransportCoeffs(prim, eta, zeta, kappa, chi, D);
+	  }
 
 	if (n_ads_spec>0 && splitting_MFsurfchem == 1) sample_MFsurfchem(cu, prim, surfcov, dNadsdes, geom, dt/2.0);
 
@@ -1168,6 +1140,9 @@ void main_driver(const char* argv)
                 writePlt = ((step+1)%plot_int == 0);
             }
         }
+#if defined(TURB)
+        if ((turbRestartRun == 0) and (turbForcing >= 1)) writePlt = true;
+#endif
         
         if (writePlt) {
             //yzAverage(cuMeans, cuVars, primMeans, primVars, spatialCross,
@@ -1175,12 +1150,6 @@ void main_driver(const char* argv)
             WritePlotFileStag(step, time, geom, cu, cuMeans, cuVars, cumom, cumomMeans, cumomVars,
                               prim, primMeans, primVars, vel, velMeans, velVars, coVars, surfcov, surfcovMeans, surfcovVars, surfcovcoVars, eta, kappa, zeta);
             
-#if defined(TURB)
-            if (turbForcing > 0) {
-                EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp);
-            }
-#endif
-
             if (plot_cross) {
                 if (do_1D) {
                     WriteSpatialCross1D(spatialCross1D, step, geom, ncross);
@@ -1206,27 +1175,24 @@ void main_driver(const char* argv)
 
                 // copy velocities into structFactMFTurb
                 for(int d=0; d<AMREX_SPACEDIM; d++) {
-                    ShiftFaceToCC(vel[d], 0, structFactMFTurbVel, d, 1);
+                    ShiftFaceToCC(vel[d], 0, MFTurbVel, d, 1);
                 }
-                MultiFab::Copy(structFactMFTurbScalar, prim, 0, 0, 1, 0);
-                MultiFab::Copy(structFactMFTurbScalar, prim, 4, 1, 1, 0);
-                MultiFab::Copy(structFactMFTurbScalar, prim, 5, 2, 1, 0);
+                MultiFab::Copy(MFTurbScalar, prim, 0, 0, 1, 0);
+                MultiFab::Copy(MFTurbScalar, prim, 4, 1, 1, 0);
+                MultiFab::Copy(MFTurbScalar, prim, 5, 2, 1, 0);
                 
-                 // decomposed velocities
-                    turbStructFactVelDecomp.FortStructureDecomp(structFactMFTurbVel,geom,1);
-                    turbStructFactVelDecomp.GetDecompVel(vel_decomp,geom);
-                    turbStructFactVelDecomp.CallFinalize(geom);
-                    turbStructFactVelDecomp.IntegratekShellsDecomp(step,geom,"vel_solenoid","vel_dilation");
+                // decomposed velocities
+                Vector< std::string > var_names_turbVel{"vel_total","vel_solenoidal","vel_dilation"};
+                Real scaling_turb_veldecomp = dVolinv;
+                TurbSpectrumVelDecomp(MFTurbVel, vel_decomp, geom, step, scaling_turb_veldecomp, var_names_turbVel);
                 
-                 // total velocity
-                    turbStructFactVelTotal.FortStructure(structFactMFTurbVel,geom,1);
-                    turbStructFactVelTotal.CallFinalize(geom);
-                    turbStructFactVelTotal.IntegratekShells(step,geom,"vel_total");
-                
-                 // scalars
-                    turbStructFactScalar.FortStructure(structFactMFTurbScalar,geom,1);
-                    turbStructFactScalar.CallFinalize(geom);
-                    turbStructFactScalar.IntegratekShellsScalar(step,geom,var_names_turbScalar);
+                // scalars
+                Vector< std::string > var_names_turbScalar{"rho","temp","press"};
+                Vector<Real> scaling_turb_scalar(3, dVolinv);
+                TurbSpectrumScalar(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar);
+
+                EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp);
+                EvaluateWritePlotFileVelGradTiny(step, time, geom, vel, vel_decomp);
             }
 #endif
         }
@@ -1234,7 +1200,8 @@ void main_driver(const char* argv)
 
 #if defined(TURB)
         // turbulence outputs
-        if ((turbForcing >= 1) and (step%1000 == 0)) {
+        if (((turbForcing >= 1) and (step%1000 == 0)) or
+            ((turbForcing >= 1) and (turbRestartRun == 0))) {
 
             Real turbKE, c_speed, u_rms, taylor_len, taylor_Re, taylor_Ma,
             skew, kurt, eps_s, eps_d, eps_ratio, kolm_s, kolm_d, kolm_t;
@@ -1242,7 +1209,7 @@ void main_driver(const char* argv)
                 vel[i].FillBoundary(geom.periodicity());
                 cumom[i].FillBoundary(geom.periodicity());
             }
-            GetTurbQty(vel, cumom, prim, eta, geom,
+            GetTurbQty(vel, cumom, prim, eta, zeta, geom,
                        turbKE, c_speed, u_rms,
                        taylor_len, taylor_Re, taylor_Ma,
                        skew, kurt,
@@ -1268,7 +1235,9 @@ void main_driver(const char* argv)
             turboutfile << std::endl;
         }
         
-        if ((turbForcing >= 1) and (writePlt)) {
+        if (((turbForcing >= 1) and (writePlt)) or
+            ((turbForcing >= 1) and (turbRestartRun == 0))) {
+            
             Real turbKE_s, turbKE_d, delta_turbKE;
             Real u_rms_s, u_rms_d, delta_u_rms;
             Real taylor_Ma_d;
@@ -1348,97 +1317,98 @@ void main_driver(const char* argv)
             ////////////////////////////////////////////////////
 
             if ((do_1D==0) and (do_2D==0)) {
-                structFactPrim.FortStructure(structFactPrimMF,geom);
-                structFactCons.FortStructure(structFactConsMF,geom);
+                structFactPrim.FortStructure(structFactPrimMF);
+                structFactCons.FortStructure(structFactConsMF);
             }
 
             if (project_dir >= 0) {
 
-                if (do_slab_sf == 0) {
-                    
-                    {
-                        MultiFab X, XRot;
-
-                        ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim);
-                        XRot = RotateFlattenedMF(X);
-                        master_project_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim); 
-                        structFactPrimVerticalAverage.FortStructure(master_project_rot_prim,geom_flat);
-                    }
+                if (do_2D) {
 
-                    {
-                        MultiFab X, XRot;
+                    for (int i=0; i<n_cells[project_dir]; ++i) {
 
-                        ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons);
-                        XRot = RotateFlattenedMF(X);
-                        master_project_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons);
-                        structFactConsVerticalAverage.FortStructure(master_project_rot_cons,geom_flat);
-                    }
+                        {
+                            MultiFab Flattened;
 
-                }
-                else {
-                    
-                    {
-                        MultiFab X, XRot;
+                            ExtractSlice(structFactPrimMF, Flattened, project_dir, i, 0, structVarsPrim);
+                            structFactPrimArray[i].FortStructure(Flattened);
+                        }
 
-                        ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim, 0, membrane_cell-1);
-                        XRot = RotateFlattenedMF(X);
-                        master_project_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim);
-                        structFactPrimVerticalAverage0.FortStructure(master_project_rot_prim,geom_flat);
-                    }
+                        {
+                            MultiFab Flattened;
 
-                    {
-                        MultiFab X, XRot;
+                            ExtractSlice(structFactConsMF, Flattened, project_dir, i, 0, structVarsCons);
+                            structFactConsArray[i].FortStructure(Flattened);
+                        }
 
-                        ComputeVerticalAverage(structFactPrimMF, X, geom, project_dir, 0, structVarsPrim, membrane_cell, n_cells[project_dir]-1);
-                        XRot = RotateFlattenedMF(X);
-                        master_project_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim); 
-                        structFactPrimVerticalAverage1.FortStructure(master_project_rot_prim,geom_flat);
                     }
+                } else {
 
-                    {
-                        MultiFab X, XRot;
+                    if (do_slab_sf == 0) {
+                    
+                        {
+                            MultiFab Flattened;
+
+                            if (slicepoint < 0) {
+                                ComputeVerticalAverage(structFactPrimMF, Flattened, project_dir, 0, structVarsPrim);
+                            } else {
+                                ExtractSlice(structFactPrimMF, Flattened, project_dir, slicepoint, 0, structVarsPrim);
+                            }
+                            structFactPrimFlattened.FortStructure(Flattened);
+                        }
 
-                        ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons, 0, membrane_cell-1);
-                        XRot = RotateFlattenedMF(X);
-                        master_project_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons); 
-                        structFactConsVerticalAverage0.FortStructure(master_project_rot_cons,geom_flat);
-                    }
+                        {
+                            MultiFab Flattened;
 
-                    {
-                        MultiFab X, XRot;
+                            if (slicepoint < 0) {
+                                ComputeVerticalAverage(structFactConsMF, Flattened, project_dir, 0, structVarsCons);
+                            } else {
+                                ExtractSlice(structFactConsMF, Flattened, project_dir, slicepoint, 0, structVarsCons);
+                            }
+                            structFactConsFlattened.FortStructure(Flattened);
+                        }
+                    } else {
+                    
+                        {
+                            MultiFab Flattened;
 
-                        ComputeVerticalAverage(structFactConsMF, X, geom, project_dir, 0, structVarsCons, membrane_cell, n_cells[project_dir]-1);
-                        XRot = RotateFlattenedMF(X);
-                        master_project_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons); 
-                        structFactConsVerticalAverage1.FortStructure(master_project_rot_cons,geom_flat);
-                    }
-                }
-            }
+                            ComputeVerticalAverage(structFactPrimMF, Flattened, project_dir, 0, structVarsPrim, 0, membrane_cell-1);
+                            structFactPrimVerticalAverageMembraneLo.FortStructure(Flattened);
+                        }
 
-            if (do_2D) {
+                        {
+                            MultiFab Flattened;
 
-                for (int i=0; i<n_cells[2]; ++i) {
+                            ComputeVerticalAverage(structFactPrimMF, Flattened, project_dir, 0, structVarsPrim, membrane_cell, n_cells[project_dir]-1);
+                            structFactPrimVerticalAverageMembraneHi.FortStructure(Flattened);
+                        }
 
-                    {
-                        MultiFab X, XRot;
+                        {
+                            MultiFab Flattened;
 
-                        ExtractSlice(structFactPrimMF, X, geom, 2, i, 0, structVarsPrim);
-                        XRot = RotateFlattenedMF(X);
-                        master_2D_rot_prim.ParallelCopy(XRot, 0, 0, structVarsPrim); 
-                        structFactPrimArray[i].FortStructure(master_2D_rot_prim,geom_flat_2D);
-                    }
+                            ComputeVerticalAverage(structFactConsMF, Flattened, project_dir, 0, structVarsCons, 0, membrane_cell-1);
+                            structFactConsVerticalAverageMembraneLo.FortStructure(Flattened);
+                        }
 
-                    {
-                        MultiFab X, XRot;
+                        {
+                            MultiFab Flattened;
 
-                        ExtractSlice(structFactConsMF, X, geom, 2, i, 0, structVarsCons);
-                        XRot = RotateFlattenedMF(X);
-                        master_2D_rot_cons.ParallelCopy(XRot, 0, 0, structVarsCons); 
-                        structFactConsArray[i].FortStructure(master_2D_rot_cons,geom_flat_2D);
+                            ComputeVerticalAverage(structFactConsMF, Flattened, project_dir, 0, structVarsCons, membrane_cell, n_cells[project_dir]-1);
+                            structFactConsVerticalAverageMembraneHi.FortStructure(Flattened);
+                        }
                     }
-
                 }
             }
+
+            if (n_ads_spec > 0) {
+                int surfcov_dir = 2;
+                int surfcov_plane = 0;
+                int surfcov_structVars = n_ads_spec;
+                MultiFab Flattened;  // flattened multifab defined below
+                ExtractSlice(surfcov, Flattened, surfcov_dir, surfcov_plane, 0, surfcov_structVars);
+                structFactSurfCov.FortStructure(Flattened);
+            }
+
         }
 
         // write out structure factor
@@ -1453,14 +1423,14 @@ void main_driver(const char* argv)
 
             if (project_dir >= 0) {
                 if (do_slab_sf == 0) {
-                    structFactPrimVerticalAverage.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverage");
-                    structFactConsVerticalAverage.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverage");
+                    structFactPrimFlattened.WritePlotFile(step,time,geom_flat,"plt_SF_prim_Flattened");
+                    structFactConsFlattened.WritePlotFile(step,time,geom_flat,"plt_SF_cons_Flattened");
                 }
                 else {
-                    structFactPrimVerticalAverage0.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageSlab0");
-                    structFactPrimVerticalAverage1.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageSlab1");
-                    structFactConsVerticalAverage0.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverageSlab0");
-                    structFactConsVerticalAverage1.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverageSlab1");
+                    structFactPrimVerticalAverageMembraneLo.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageMembraneLo");
+                    structFactPrimVerticalAverageMembraneHi.WritePlotFile(step,time,geom_flat,"plt_SF_prim_VerticalAverageMembraneHi");
+                    structFactConsVerticalAverageMembraneLo.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverageMembraneLo");
+                    structFactConsVerticalAverageMembraneHi.WritePlotFile(step,time,geom_flat,"plt_SF_cons_VerticalAverageMembraneHi");
                 }
             }
 
@@ -1468,10 +1438,10 @@ void main_driver(const char* argv)
                     
                 MultiFab prim_mag, prim_realimag, cons_mag, cons_realimag;
 
-                prim_mag.define(ba_flat_2D,dmap_flat_2D,structFactPrimArray[0].get_ncov(),0);
-                prim_realimag.define(ba_flat_2D,dmap_flat_2D,2*structFactPrimArray[0].get_ncov(),0);
-                cons_mag.define(ba_flat_2D,dmap_flat_2D,structFactConsArray[0].get_ncov(),0);
-                cons_realimag.define(ba_flat_2D,dmap_flat_2D,2*structFactConsArray[0].get_ncov(),0);
+                prim_mag.define(ba_flat,dmap_flat,structFactPrimArray[0].get_ncov(),0);
+                prim_realimag.define(ba_flat,dmap_flat,2*structFactPrimArray[0].get_ncov(),0);
+                cons_mag.define(ba_flat,dmap_flat,structFactConsArray[0].get_ncov(),0);
+                cons_realimag.define(ba_flat,dmap_flat,2*structFactConsArray[0].get_ncov(),0);
 
                 prim_mag.setVal(0.0);
                 cons_mag.setVal(0.0);
@@ -1479,8 +1449,8 @@ void main_driver(const char* argv)
                 cons_realimag.setVal(0.0);
 
                 for (int i=0; i<n_cells[2]; ++i) {
-                    structFactPrimArray[i].AddToExternal(prim_mag,prim_realimag,geom_flat_2D);
-                    structFactConsArray[i].AddToExternal(cons_mag,cons_realimag,geom_flat_2D);
+                    structFactPrimArray[i].AddToExternal(prim_mag,prim_realimag);
+                    structFactConsArray[i].AddToExternal(cons_mag,cons_realimag);
                 }
                     
                 Real ncellsinv = 1.0/n_cells[2];
@@ -1489,12 +1459,16 @@ void main_driver(const char* argv)
                 prim_realimag.mult(ncellsinv);
                 cons_realimag.mult(ncellsinv);
 
-                WritePlotFilesSF_2D(prim_mag,prim_realimag,geom_flat_2D,step,time,
+                WritePlotFilesSF_2D(prim_mag,prim_realimag,geom_flat,step,time,
                                     structFactPrimArray[0].get_names(),"plt_SF_prim_2D");
-                WritePlotFilesSF_2D(cons_mag,cons_realimag,geom_flat_2D,step,time,
+                WritePlotFilesSF_2D(cons_mag,cons_realimag,geom_flat,step,time,
                                     structFactConsArray[0].get_names(),"plt_SF_cons_2D");
 
             }
+
+            if (n_ads_spec > 0) {
+                structFactSurfCov.WritePlotFile(step,time,geom_surfcov,"plt_SF_surfcov");
+            }
         }
 
         // write checkpoint file
diff --git a/src_compressible_stag/writePlotFileStag.cpp b/src_compressible_stag/writePlotFileStag.cpp
index c606b1966..4a3742b3e 100644
--- a/src_compressible_stag/writePlotFileStag.cpp
+++ b/src_compressible_stag/writePlotFileStag.cpp
@@ -87,7 +87,11 @@ void WritePlotFileStag(int step,
 
 	if (nspec_surfcov>0) nplot += nspec_surfcov*6;
     }
-   
+
+    if (plot_deltaY_dir != -1) {
+        nplot += nspecies;
+    }
+
     amrex::BoxArray ba = cuMeans.boxArray();
     amrex::DistributionMapping dmap = cuMeans.DistributionMap();
 
@@ -257,6 +261,16 @@ void WritePlotFileStag(int step,
 	}
     }
 
+    if (plot_deltaY_dir != -1) {
+        MultiFab Ybar(ba, dmap, nspecies, 0);
+        // Yk is component 6: in prim
+        WriteHorizontalAverageToMF(prim,Ybar,plot_deltaY_dir,6,nspecies,0);
+        Ybar.mult(-1.);
+        amrex::MultiFab::Add(Ybar,prim,6,0,nspecies,0);
+        amrex::MultiFab::Copy(plotfile,Ybar,0,cnt,nspecies,0);
+        cnt+= nspecies;
+    }
+
     // Set variable names
     cnt = 0;
 
@@ -445,6 +459,14 @@ void WritePlotFileStag(int step,
 
     }
 
+    if (plot_deltaY_dir != -1) {
+        x = "deltaYk_";
+        for (i=0; i<nspecies; i++) {
+            varNames[cnt] = x;
+            varNames[cnt++] += 48+i;
+        }
+    }
+
     AMREX_ASSERT(cnt==nplot);
 
     // write a plotfile
diff --git a/src_geometry/paramPlane.H b/src_geometry/paramPlane.H
index e3661d8e0..106540a8e 100644
--- a/src_geometry/paramPlane.H
+++ b/src_geometry/paramPlane.H
@@ -147,6 +147,8 @@ typedef struct {
 void BuildParamplanes(paramPlane* paramPlaneList, const int paramplanes, const Real* domainLo, const Real* domainHi);
 void BuildParamplanesPhonon(paramPlane* paramPlaneList, const int paramplanes, const Real* domainLo, const Real* domainHi);
 
+void SetBoundaryCells(paramPlane* paramPlaneList, const int paramplanes, const Real* domainLo, const Real* domainHi, iMultiFab& bCell);
+
 double getTheta(double nx, double ny, double nz);
 double getPhi(double nx, double ny, double nz);
 
diff --git a/src_geometry/paramPlane.cpp b/src_geometry/paramPlane.cpp
index c0f50d08e..e99d1f144 100644
--- a/src_geometry/paramPlane.cpp
+++ b/src_geometry/paramPlane.cpp
@@ -57,6 +57,7 @@ void BuildParamplanes(paramPlane* paramPlaneList, const int paramplanes, const R
 			for(int l=0; l<nspecies; l++)
 			{
 				bc_Xk_x_lo[l] /= n_lo[0];
+				cout << "n: " << bc_Xk_x_lo[l] << endl;
 			}
 		}
 		// Number densities defined
@@ -259,6 +260,7 @@ void BuildParamplanes(paramPlane* paramPlaneList, const int paramplanes, const R
 			for(int l=0; l<nspecies; l++)
 			{
 				bc_Xk_x_hi[l] /= n_hi[0];
+				cout << "n: " << bc_Xk_x_lo[l] << endl;				
 			}
 		}
 		// Number densities defined
@@ -1987,3 +1989,8 @@ void BuildParamplanesPhonon(paramPlane* paramPlaneList, const int paramplanes, c
     }
     planeFile.close();
 }
+
+void SetBoundaryCells(paramPlane* paramPlaneList, const int paramplanes, const Real* domainLo, const Real* domainHi, iMultiFab& bCell)
+{
+
+}
diff --git a/src_geometry/paramplane_functions_K.H b/src_geometry/paramplane_functions_K.H
index fc0ad4372..dc99b2ad1 100644
--- a/src_geometry/paramplane_functions_K.H
+++ b/src_geometry/paramplane_functions_K.H
@@ -52,13 +52,15 @@ void find_inter_gpu(FhdParticleContainer::ParticleType& part, const Real delt, c
 	*intsurf = -1;
 	Real uval, vval, tval;
 	
-	pre_check_gpu(part, delt, paramplanes, ns, &flag, phi, plo, inttime);
+	//pre_check_gpu(part, delt, paramplanes, ns, &flag, phi, plo, inttime);
 
     //Complete
 	if(flag == 0)
 	{
 		for(int s=1;s<=ns;s++)
 		{
+			//if((s != 2) && (s != 3))
+			{
 			const paramPlane* surf = &paramplanes[s-1];
 
 			Real denominv = 1.0/(part.rdata(FHD_realData::velz)*surf->uy*surf->vx - part.rdata(FHD_realData::vely)*surf->uz*surf->vx - part.rdata(FHD_realData::velz)*surf->ux*surf->vy + part.rdata(FHD_realData::velx)*surf->uz*surf->vy + part.rdata(FHD_realData::vely)*surf->ux*surf->vz - part.rdata(FHD_realData::velx)*surf->uy*surf->vz);
@@ -89,6 +91,7 @@ void find_inter_gpu(FhdParticleContainer::ParticleType& part, const Real delt, c
 					*intside = 0; //0 for lhs
 				}
 			}
+			}
 		}
 	}
 	
diff --git a/src_particles/DsmcParticleContainer.cpp b/src_particles/DsmcParticleContainer.cpp
index 04b409dc4..1a50581b4 100644
--- a/src_particles/DsmcParticleContainer.cpp
+++ b/src_particles/DsmcParticleContainer.cpp
@@ -223,6 +223,7 @@ void FhdParticleContainer::MoveParticlesCPP(const Real dt, paramPlane* paramPlan
 					&intsurf, &inttime, &intside, AMREX_ZFILL(plo), AMREX_ZFILL(phi));
 
 				for (int d=0; d<(AMREX_SPACEDIM); ++d)
+				//for (int d=0; d<(AMREX_SPACEDIM-2); ++d)				
 				{
 					part.pos(d) += inttime * part.rdata(FHD_realData::velx + d)*ADJ;
 				}
@@ -237,6 +238,7 @@ void FhdParticleContainer::MoveParticlesCPP(const Real dt, paramPlane* paramPlan
 					Real posAlt[3];
 
 					for (int d=0; d<(AMREX_SPACEDIM); ++d)
+    				//for (int d=0; d<(AMREX_SPACEDIM-2); ++d)					
 					{
 						posAlt[d] = inttime * part.rdata(FHD_realData::velx + d)*ADJALT;
 					}
@@ -250,6 +252,7 @@ void FhdParticleContainer::MoveParticlesCPP(const Real dt, paramPlane* paramPlan
                     }
 					if(push == 1)
 					{
+						//for (int d=0; d<(AMREX_SPACEDIM-2); ++d)
 						for (int d=0; d<(AMREX_SPACEDIM); ++d)
 						{
 							part.pos(d) += part.pos(d) + posAlt[d];
@@ -417,8 +420,11 @@ void FhdParticleContainer::MovePhononsCPP(const Real dt, paramPlane* paramPlaneL
 				//Print() << "Pre " << part.id() << ": " << part.rdata(FHD_realData::velx + 0) << ", " << part.rdata(FHD_realData::velx + 1) << ", " << part.rdata(FHD_realData::velx + 2) << endl;
 //                printf("DT: %e\n", dt);
 //                cout << "DT: " << dt << endl;
-				find_inter_gpu(part, runtime, paramPlaneListPtr, paramPlaneCount,
-					&intsurf, &inttime, &intside, AMREX_ZFILL(plo), AMREX_ZFILL(phi));
+                for(int ii = 0;ii<100;ii++)
+                { 
+				    find_inter_gpu(part, runtime, paramPlaneListPtr, paramPlaneCount,
+					    &intsurf, &inttime, &intside, AMREX_ZFILL(plo), AMREX_ZFILL(phi));
+			    }
 				
 				Real tauImpurityInv = pow(part.rdata(FHD_realData::omega),4)/tau_i_p;
 				Real tauTAInv = part.rdata(FHD_realData::omega)*pow(T_init[0],4)/tau_ta_p;
@@ -452,7 +458,7 @@ void FhdParticleContainer::MovePhononsCPP(const Real dt, paramPlane* paramPlaneL
 					    }
 					    
 					    app_bc_phonon_gpu(&surf, part, intside, pdomsize, &push, &runtime, step, countPtr, specCountPtr, engine);
-    //					app_bc_gpu(&surf, part, intside, pdomsize, &push, &runtime, dummy, engine);
+    					//app_bc_gpu(&surf, part, intside, pdomsize, &push, &runtime, dummy, engine);
    					    //Print() << "Post " << part.id() << ": " << part.rdata(FHD_realData::velx + 0) << ", " << part.rdata(FHD_realData::velx + 1) << ", " << part.rdata(FHD_realData::velx + 2) << endl;
                         if(part.id() == -1)
                         {
@@ -491,6 +497,10 @@ void FhdParticleContainer::MovePhononsCPP(const Real dt, paramPlane* paramPlaneL
 
 			part.rdata(FHD_realData::timeFrac) = 1.0;
 			
+//			if(step%2==0)
+//			{
+//                part.rdata(FHD_realData::velz) = -part.rdata(FHD_realData::velz);    
+//		    }
 
 			if(part.idata(FHD_intData::newSpecies) != -1)
 			{
@@ -500,6 +510,10 @@ void FhdParticleContainer::MovePhononsCPP(const Real dt, paramPlane* paramPlaneL
 
 
 		});
+		
+		
+	
+		
 		//Print() << "Pre buffer size: " << paramPlaneList[1].recCountRight << endl;		
 		for (int i = 0; i < np; i++)
 		{
@@ -1123,6 +1137,7 @@ void FhdParticleContainer::Source(const Real dt, paramPlane* paramPlaneList, con
 					for(int j=nspecies-1; j>=0; j--)
 					{
 						Real density = paramPlaneList[i].densityRight[j]*rho_lo[0]/properties[j].mass;
+										//cout << "n: " << density << endl;
 						//Real density = paramPlaneList[i].densityRight[j];
 						
 						Real xMom = paramPlaneList[i].xMomFluxRight[j];												
diff --git a/src_reactDiff/AdvanceDiffusion.cpp b/src_reactDiff/AdvanceDiffusion.cpp
new file mode 100644
index 000000000..a37601fe2
--- /dev/null
+++ b/src_reactDiff/AdvanceDiffusion.cpp
@@ -0,0 +1,192 @@
+#include "reactDiff_functions.H"
+
+// Solves n_t = div ( D grad (n)) + div (sqrt(2*variance*D*n)*W) + g
+// where g is a constant in time external source
+void AdvanceDiffusion(MultiFab& n_old,
+                      MultiFab& n_new,
+                      const MultiFab& ext_src,
+                      const Real& dt,
+                      const Real& time,
+                      const Geometry& geom) {
+
+    BoxArray ba = n_old.boxArray();
+    DistributionMapping dmap = n_old.DistributionMap();
+
+    // store D_Fick on faces
+    std::array< MultiFab, AMREX_SPACEDIM > diff_coef_face;
+    AMREX_D_TERM(diff_coef_face[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);,
+                 diff_coef_face[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);,
+                 diff_coef_face[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0););
+
+    for (int i=0; i<nspecies; ++i) {
+        // load D_fick for species i
+        for (int d=0; d<AMREX_SPACEDIM; ++d) {
+            diff_coef_face[d].setVal(D_Fick[i],i,1,0);
+        }
+    }
+    
+    // do not do diffusion if only one cell (well-mixed system)
+    // there is no restriction on the number of cells
+    // but we can shortcut the single cell case anyway for simplicity
+    if (n_cells[0] == 0 && n_cells[1] == 0) {
+        Abort("AdvanceDiffusion() - fix one cell case");
+    }
+
+    if (reactDiff_diffusion_type == 3) {
+        MultinomialDiffusion(n_old,n_new,diff_coef_face,geom,dt,time);
+        return;
+    }
+
+    MultiFab diff_fluxdiv (ba,dmap,nspecies,0);
+    MultiFab stoch_fluxdiv(ba,dmap,nspecies,0);
+
+    DiffusiveNFluxdiv(n_old,diff_fluxdiv,diff_coef_face,geom,time);
+
+    if (variance_coef_mass > 0.) {
+        StochasticNFluxdiv(n_old,stoch_fluxdiv,diff_coef_face,geom,dt,time,0);
+    } else {
+        stoch_fluxdiv.setVal(0.);
+    }
+
+    if (reactDiff_diffusion_type == 0 || reactDiff_diffusion_type == 4) {
+        // explicit trapezoidal predictor-corrector OR forward Euler
+
+        // forward Euler predictor
+        // n_k^{n+1,*} = n_k^n + dt div (D_k grad n_k)^n
+        //                     + dt div (sqrt(2 D_k n_k / dt) Z)^n
+        //                     + dt ext_src
+        MultiFab::Copy(n_new,n_old,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,diff_fluxdiv ,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,stoch_fluxdiv,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,ext_src      ,0,0,nspecies,0);
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+        if (reactDiff_diffusion_type == 0) {
+            /*
+              ! Trapezoidal corrector:
+              ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n
+              !                   + (dt/2) div (D_k grad n_k)^{n+1,*}
+              !                   +  dt    div (sqrt(2 D_k n_k / dt) Z)^n
+              !                   +  dt    ext_src
+              ! This is the same as stepping to time t+2*dt and then averaging with the state at time t:
+              !  n_new = 1/2 * (n_old + n_new + dt*div (D grad n_new) + div (sqrt(2 D_k n_k dt) Z)^n)
+              !  which is what we use below
+            */
+
+            // compute diffusive flux divergence
+            DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time);
+
+            MultiFab::Saxpy(n_new,1.,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt,diff_fluxdiv ,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt,stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt,ext_src      ,0,0,nspecies,0);
+            n_new.mult(0.5);
+            n_new.FillBoundary(geom.periodicity());
+            MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+        }
+        
+    } else if (reactDiff_diffusion_type == 1) {
+        /*
+       ! Crank-Nicolson
+       ! n_k^{n+1} = n_k^n + (dt/2)(div D_k grad n_k)^n
+       !                   + (dt/2)(div D_k grad n_k)^n+1
+       !                   +  dt    div (sqrt(2 D_k n_k / dt) Z)^n
+       !                   +  dt    ext_src
+       !
+       ! ( I- (dt/2) div D_k grad) n_k^n+1 = n_k^n
+       !                                     + (dt/2)(div D_k grad n_k)^n
+       !                                     +  dt    div (sqrt(2 D_k n_k / dt) Z)^n
+       !                                     +  dt    ext_src
+       ! we combine the entire rhs into stoch_fluxdiv
+        */
+
+        MultiFab::Saxpy(stoch_fluxdiv,1.,ext_src,0,0,nspecies,0);
+        MultiFab::Saxpy(stoch_fluxdiv,0.5,diff_fluxdiv,0,0,nspecies,0);
+        stoch_fluxdiv.mult(dt);
+        MultiFab::Saxpy(stoch_fluxdiv,1.,n_old,0,0,nspecies,0);
+
+        ImplicitDiffusion(n_old, n_new, stoch_fluxdiv, diff_coef_face, geom, 0.5*dt, time);
+
+    } else if (reactDiff_diffusion_type == 2) {
+        
+        /*
+       ! explicit midpoint scheme
+
+       ! n_k^{n+1/2} = n_k^n + (dt/2) div (D_k grad n_k)^n
+       !                     + (dt/2) div (sqrt(2 D_k n_k / (dt/2) ) Z_1)^n
+       !                     + (dt/2) ext_src
+        */
+
+        MultiFab::Copy(n_new,n_old,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,0.5*dt,diff_fluxdiv,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,0.5*dt,ext_src,0,0,nspecies,0);
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+        // compute diffusive flux divergence at t^{n+1/2}
+        DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time);
+
+        if (variance_coef_mass > 0.) {
+            GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom);
+        }
+
+       /*
+       ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^{n+1/2}
+       !                   + dt div (sqrt(2 D_k n_k^n dt) Z_1 / sqrt(2) )
+       !                   + dt div (sqrt(2 D_k n_k^? dt) Z_2 / sqrt(2) )
+       !                   + dt ext_src
+       ! where
+       ! n_k^? = n_k^n               (midpoint_stoch_flux_type=1)
+       !       = n_k^pred            (midpoint_stoch_flux_type=2)
+       !       = 2*n_k^pred - n_k^n  (midpoint_stoch_flux_type=3)
+       */
+
+        MultiFab::Copy(n_new,n_old,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,diff_fluxdiv,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0);
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+    } else {
+        Abort("AdvanceDiffusion() - invalid reactDiff_diffusion_type");
+    }
+    
+}
+
+void GenerateStochasticFluxdivCorrector(MultiFab& n_old,
+                                        MultiFab& n_new,
+                                        MultiFab& stoch_fluxdiv,
+                                        const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                                        const Real& dt,
+                                        const Real& time,
+                                        const Geometry& geom) {
+
+    // fill random flux multifabs with new random numbers and
+    // compute second-stage stochastic flux divergence and
+    // add to first-stage stochastic flux divergence
+    if (midpoint_stoch_flux_type == 1) {
+
+        // use n_old
+        StochasticNFluxdiv(n_old,stoch_fluxdiv,diff_coef_face,geom,dt,time,1);
+
+    } else if (midpoint_stoch_flux_type == 2) {
+
+        // use n_pred
+        StochasticNFluxdiv(n_new,stoch_fluxdiv,diff_coef_face,geom,dt,time,1);
+
+    } else if (midpoint_stoch_flux_type == 3) {
+
+        // We use n_new=2*n_pred-n_old here as temporary storage since we will overwrite it shortly
+        n_new.mult(2.);
+        MultiFab::Subtract(n_new,n_old,0,0,nspecies,1);
+
+        // use n_new=2*n_pred-n_old
+        StochasticNFluxdiv(n_new,stoch_fluxdiv,diff_coef_face,geom,dt,time,1);
+                
+    } else {
+        Abort("GenerateStochasticFluxdivCorrector() - invalid midpoint_stoch_flux_type");
+    }
+}
diff --git a/src_reactDiff/AdvanceReaction.cpp b/src_reactDiff/AdvanceReaction.cpp
new file mode 100644
index 000000000..d9c31c4e6
--- /dev/null
+++ b/src_reactDiff/AdvanceReaction.cpp
@@ -0,0 +1,93 @@
+#include "reactDiff_functions.H"
+#include "chemistry_functions.H"
+
+// this solves dn/dt = f(n) - g (note the minus sign for g)
+// where f(n) are the chemical production rates (deterministic or stochastic)
+// and g=ext_src is a constant (in time) *deterministic* source term.
+// to model stochastic particle production (sources) include g in the definition of f instead.
+// or add it as a reaction 0->products
+void AdvanceReaction(MultiFab& n_old,
+                     MultiFab& n_new,
+                     const MultiFab& ext_src,
+                     const Real& dt,
+                     const Real& time,
+                     const Geometry& geom) {
+
+    BoxArray ba = n_old.boxArray();
+    DistributionMapping dmap = n_old.DistributionMap();
+
+    // if there are no reactions to process, copy n_old to n_new,
+    // account for ext_src and return
+    if (nreaction < 1) {
+        MultiFab::LinComb(n_new,1,n_old,0,-dt,ext_src,0,0,nspecies,0);
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+        return;
+    }
+
+    MultiFab rate(ba,dmap,nspecies,0);
+
+    // only used for reactDiff_reaction_type = 1
+    Vector<Real> mattingly_lin_comb_coef(2);
+    mattingly_lin_comb_coef[0] = 1.;
+    mattingly_lin_comb_coef[1] = 0.;
+
+    if (reactDiff_reaction_type == 0) { // first-order det/tau-leaping/CLE, or SSA
+
+        // calculate rates
+        // rates could be deterministic or stochastic depending on reaction_type
+        ChemicalRates(n_old,rate,geom,dt,n_old,mattingly_lin_comb_coef,volume_factor);
+        
+        MultiFab::LinComb(n_new,1,n_old,0,dt,rate,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,-dt,ext_src,0,0,nspecies,0); //note the negative sign
+
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+    } else if (reactDiff_reaction_type == 1) { // second-order det/tau-leaping/CLE
+
+        // here we use Mattingly's predictor-corrector with theta=0.5d0 (for rection_type=1).
+        // with these parameters this is actually equivalent to a traditional midpoint scheme.
+        Real theta = 0.5;
+        Real alpha1 = 2.;
+        Real alpha2 = 1.;
+
+        //!!!!!!!!!!!!!!
+        // predictor   !
+        //!!!!!!!!!!!!!!
+
+        // calculate rates from a(n_old)
+        ChemicalRates(n_old,rate,geom,theta*dt,n_old,mattingly_lin_comb_coef,volume_factor);
+
+        // update
+        MultiFab::LinComb(n_new,1,n_old,0,theta*dt,rate,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,-theta*dt,ext_src,0,0,nspecies,0); //note the negative sign 
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+        //!!!!!!!!!!!!!!
+        // corrector   !
+        //!!!!!!!!!!!!!!
+      
+        // Here we write this in the form that Mattingly et al do
+        //  where we just continue the second half of the time step from where we left
+
+        mattingly_lin_comb_coef[0] = -alpha2;
+        mattingly_lin_comb_coef[1] = alpha1;
+
+        // calculate rates from 2*a(n_pred)-a(n_old)
+        ChemicalRates(n_old,rate,geom,(1.-theta)*dt,n_new,mattingly_lin_comb_coef,volume_factor);
+
+        // update
+        MultiFab::Saxpy(n_new,(1.-theta)*dt,rate,0,0,nspecies,0);
+        // note the negative sign
+        // also note that ext_src does not change in the time interval (t,t+dt) 
+        MultiFab::Saxpy(n_new,-(1.-theta)*dt*(alpha1-alpha2),ext_src,0,0,nspecies,0);
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+        
+    } else {
+        Abort("AdvanceReaction() - invalid reactDiff_reaction_type");
+    }
+
+}
diff --git a/src_reactDiff/AdvanceReactionDiffusion.cpp b/src_reactDiff/AdvanceReactionDiffusion.cpp
new file mode 100644
index 000000000..f3519c4ca
--- /dev/null
+++ b/src_reactDiff/AdvanceReactionDiffusion.cpp
@@ -0,0 +1,355 @@
+#include "reactDiff_functions.H"
+#include "chemistry_functions.H"
+
+void AdvanceReactionDiffusion(MultiFab& n_old,
+                              MultiFab& n_new,
+                              const MultiFab& ext_src,
+                              const Real& dt,
+                              const Real& time,
+                              const Geometry& geom) {
+
+    BoxArray ba = n_old.boxArray();
+    DistributionMapping dmap = n_old.DistributionMap();
+
+    // store D_Fick on faces
+    std::array< MultiFab, AMREX_SPACEDIM > diff_coef_face;
+    AMREX_D_TERM(diff_coef_face[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);,
+                 diff_coef_face[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);,
+                 diff_coef_face[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0););
+
+    for (int i=0; i<nspecies; ++i) {
+        // load D_fick for species i
+        for (int d=0; d<AMREX_SPACEDIM; ++d) {
+            diff_coef_face[d].setVal(D_Fick[i],i,1,0);
+        }
+    }
+
+    MultiFab rate1(ba,dmap,nspecies,0);
+
+    Vector<Real> mattingly_lin_comb_coef(2);
+    mattingly_lin_comb_coef[0] = 1.;
+    mattingly_lin_comb_coef[1] = 0.;
+
+    if (temporal_integrator == -3) { // multinomial diffusion
+
+        // calculate rates
+        // rates could be deterministic or stochastic depending on use_Poisson_rng
+        ChemicalRates(n_old,rate1,geom,dt,n_old,mattingly_lin_comb_coef,volume_factor);
+
+        // advance multinomial diffusion
+        MultinomialDiffusion(n_old,n_new,diff_coef_face,geom,dt,time);
+
+        // add reaction contribution and external source
+        MultiFab::Saxpy(n_new,dt,rate1,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0);
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+        return;
+
+    }
+
+    MultiFab diff_fluxdiv (ba,dmap,nspecies,0);
+    MultiFab stoch_fluxdiv(ba,dmap,nspecies,0);
+
+    DiffusiveNFluxdiv(n_old,diff_fluxdiv,diff_coef_face,geom,time);
+
+    if (variance_coef_mass > 0.) {
+        StochasticNFluxdiv(n_old,stoch_fluxdiv,diff_coef_face,geom,dt,time,0);
+    } else {
+        stoch_fluxdiv.setVal(0.);
+    }
+
+    //!!!!!!!!!!!!!!!
+    // time advance !
+    //!!!!!!!!!!!!!!!
+
+    if (temporal_integrator == -1) { // forward Euler
+
+        // calculate rates
+        // rates could be deterministic or stochastic depending on use_Poisson_rng
+        ChemicalRates(n_old,rate1,geom,dt,n_old,mattingly_lin_comb_coef,volume_factor);
+
+        // n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^n
+        //                   + dt div (sqrt(2 D_k n_k^n dt) Z) ! Gaussian noise
+        //                   + 1/dV * P( f(n_k)*dt*dV )        ! Poisson noise
+        //                   + dt ext_src
+        MultiFab::LinComb(n_new,1,n_old,0,dt,diff_fluxdiv,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,stoch_fluxdiv,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,rate1,0,0,nspecies,0);
+        MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0);
+        n_new.FillBoundary(geom.periodicity());
+        MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+        
+    } else if (temporal_integrator == -2) { // explicit midpoint
+
+        // temporary storage for second rate
+        MultiFab rate2(ba,dmap,nspecies,0);
+
+        if (reaction_type == 2) { // explicit midpoint with SSA
+
+            //!!!!!!!!!!!!!!
+            // predictor   !
+            //!!!!!!!!!!!!!!
+
+            /*
+         ! n_k^{**} = n_k^n + (dt/2)       div (D_k grad n_k)^n
+         !                  + (dt/sqrt(2)) div sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! Gaussian noise
+         !                  + (dt/2)       ext_src
+            */
+            MultiFab::Copy(n_new,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,diff_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,ext_src,0,0,nspecies,0);
+
+            // computing rate1 = R(n^{**},dt/2) / (dt/2)
+            ChemicalRates(n_new,rate1,geom,0.5*dt,n_old,mattingly_lin_comb_coef,volume_factor);
+            
+            // n_k^* = n_k^{**} + R(n^{**},dt/2)
+            MultiFab::Saxpy(n_new,0.5*dt,rate1,0,0,nspecies,0);
+            n_new.FillBoundary(geom.periodicity());
+            MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+            //!!!!!!!!!!!!!!
+            // corrector   !
+            //!!!!!!!!!!!!!!
+
+            // compute diffusive flux divergence
+            DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time);
+
+            // computing rate2 = R(n^*,dt/2) / (dt/2)
+            ChemicalRates(n_new,rate2,geom,0.5*dt,n_old,mattingly_lin_comb_coef,volume_factor);
+
+            // compute stochastic flux divergence and add to the ones from the predictor stage
+            if (variance_coef_mass > 0.) {
+                GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom);
+            }
+
+            /*
+         ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^*
+         !                   + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise
+         !                   + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise
+         !                   + R(n^{**},dt/2)
+         !                   + R(n^{*},dt/2)
+         !                   + dt ext_src
+         ! where
+         ! n_k^? = n_k^n               (midpoint_stoch_flux_type=1)
+         !       = n_k^pred            (midpoint_stoch_flux_type=2)
+         !       = 2*n_k^pred - n_k^n  (midpoint_stoch_flux_type=3)
+            */
+
+            MultiFab::Copy(n_new,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt,diff_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,rate1,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,rate2,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0);
+            n_new.FillBoundary(geom.periodicity());
+            MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+        } else { // explicit midpoint for det/tau/CLE
+
+            //!!!!!!!!!!!!!!
+            // predictor   !
+            //!!!!!!!!!!!!!!
+
+            // calculate rates from a(n_old)
+            ChemicalRates(n_old,rate1,geom,0.5*dt,n_old,mattingly_lin_comb_coef,volume_factor);
+
+            /*
+         ! n_k^{n+1/2} = n_k^n + (dt/2)       div (D_k grad n_k)^n
+         !                     + (dt/sqrt(2)) div sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! Gaussian noise
+         !                     + 1/dV * P_1( f(n_k)*(dt/2)*dV )                   ! Poisson noise
+         !                     + (dt/2)        ext_src
+            */
+            MultiFab::Copy(n_new,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,diff_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,rate1,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,ext_src,0,0,nspecies,0);
+            n_new.FillBoundary(geom.periodicity());
+            MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+            //!!!!!!!!!!!!!!
+            // corrector   !
+            //!!!!!!!!!!!!!!
+
+            // Here we do not write this in the form that Mattingly et al do
+            // where we just continue the second half of the time step from where we left
+            // Rather, we compute terms at the midpoint and then add contributions from both 
+            // halves of the time step to n_old
+            // This works simpler with diffusion but we have to store both rates1 and rates2
+         
+            // compute diffusive flux divergence
+            DiffusiveNFluxdiv(n_new,diff_fluxdiv,diff_coef_face,geom,time);
+
+            // calculate rates from 2*a(n_pred)-a(n_old)
+            mattingly_lin_comb_coef[0] = -1.;
+            mattingly_lin_comb_coef[1] = 2.;
+            ChemicalRates(n_old,rate2,geom,0.5*dt,n_new,mattingly_lin_comb_coef,volume_factor);
+
+            //compute stochastic flux divergence and add to the ones from the predictor stage
+            if (variance_coef_mass > 0.) {
+                GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom);
+            }
+
+            /*
+         ! n_k^{n+1} = n_k^n + dt div (D_k grad n_k)^{n+1/2}
+         !                   + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise
+         !                   + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise
+         !                   + 1/dV * P_1( f(n_k)*(dt/2)*dV )                        ! Poisson noise
+         !                   + 1/dV * P_2( (2*f(n_k^pred)-f(n_k))*(dt/2)*dV )        ! Poisson noise
+         !                   + dt ext_src
+         ! where
+         ! n_k^? = n_k^n               (midpoint_stoch_flux_type=1)
+         !       = n_k^pred            (midpoint_stoch_flux_type=2)
+         !       = 2*n_k^pred - n_k^n  (midpoint_stoch_flux_type=3)
+            */
+
+            MultiFab::Copy(n_new,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt,diff_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,rate1,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,0.5*dt,rate2,0,0,nspecies,0);
+            MultiFab::Saxpy(n_new,dt,ext_src,0,0,nspecies,0);
+            n_new.FillBoundary(geom.periodicity());
+            MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+                                     
+        } // explicit midpoint for det/tau/CLE
+        
+    } else if (temporal_integrator == -4) { // implicit midpoint
+
+        if (reaction_type == 2) { // implicit midpoint with SSA
+
+            /*
+         ! backward Euler predictor to half-time
+         ! n_k^* = n_k^n + (dt/2)       div (D_k grad n_k)^*
+         !               + (dt/sqrt(2)) div sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! Gaussian noise
+         !               + (dt/2)       ext_src
+         !
+         ! (I - div (dt/2) D_k grad) n_k^* = n_k^n
+         !                                   + (dt/sqrt(2)) div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1
+         !                                   + (dt/2) ext_src
+            */
+
+            MultiFab rhs(ba,dmap,nspecies,0);
+
+            MultiFab::Copy(rhs,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,0.5*dt,ext_src,0,0,nspecies,0);
+
+            ImplicitDiffusion(n_old, n_new, rhs, diff_coef_face, geom, 0.5*dt, time);
+
+            // corrector
+
+            // compute R(n^*,dt) / dt
+            ChemicalRates(n_new,rate1,geom,dt,n_new,mattingly_lin_comb_coef,volume_factor);
+
+            // compute stochastic flux divergence and add to the ones from the predictor stage
+            if (variance_coef_mass > 0.) {
+                GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom);
+            }
+
+            /*
+         ! Crank-Nicolson
+         ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n
+         !                   + (dt/2) div (D_k grad n_k)^{n+1}
+         !                   + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise
+         !                   + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise
+         !                   + R(n^*,dt)
+         !                   + dt ext_src
+         !
+         ! (I - div (dt/2) D_k grad) n_k^{n+1} = n_k^n
+                             + (dt/2) div (D_k grad n_k^n)
+         !                   + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise
+         !                   + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise
+         !                   + R(n^*,dt)
+         !                   + dt ext_src
+            */
+
+            MultiFab::Copy(rhs,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,0.5*dt,diff_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,dt,rate1,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,dt,ext_src,0,0,nspecies,0);
+
+            ImplicitDiffusion(n_old, n_new, rhs, diff_coef_face, geom, 0.5*dt, time);
+
+        } else { // implicit midpoint for det/tau/CLE
+
+/*
+         ! backward Euler predictor to half-time
+         ! n_k^{n+1/2} = n_k^n + (dt/2)       div (D_k grad n_k)^{n+1/2}
+         !                     + (dt/sqrt(2)) div sqrt(2 D_k n_k^n / (dt*dV)) Z_1 ! Gaussian noise
+         !                     + 1/dV * P_1( f(n_k)*(dt/2)*dV )                   ! Poisson noise
+         !                     + (dt/2)       ext_src
+         !
+         ! (I - div (dt/2) D_k grad) n_k^{n+1/2} = n_k^n
+         !                                       + (dt/sqrt(2)) div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1
+         !                                       + 1/dV * P_1( f(n_k)*(dt/2)*dV )
+         !                                       + (dt/2) ext_src
+*/
+
+            MultiFab rhs  (ba,dmap,nspecies,0);
+            MultiFab rate2(ba,dmap,nspecies,0);
+
+            // calculate rates
+            // rates could be deterministic or stochastic depending on use_Poisson_rng
+            ChemicalRates(n_old,rate1,geom,0.5*dt,n_old,mattingly_lin_comb_coef,volume_factor);
+
+            MultiFab::Copy(rhs,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,0.5*dt,rate1,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,0.5*dt,ext_src,0,0,nspecies,0);
+
+            ImplicitDiffusion(n_old, n_new, rhs, diff_coef_face, geom, 0.5*dt, time);
+
+            // corrector
+
+            // calculate rates from 2*a(n_pred)-a(n_old)
+            mattingly_lin_comb_coef[0] = -1.;
+            mattingly_lin_comb_coef[1] = 2.;
+            ChemicalRates(n_old,rate2,geom,0.5*dt,n_new,mattingly_lin_comb_coef,volume_factor);
+            
+            // compute stochastic flux divergence and add to the ones from the predictor stage
+            if (variance_coef_mass > 0.) {
+                // compute n on faces to use in the stochastic flux in the corrector
+                // three possibilities
+                GenerateStochasticFluxdivCorrector(n_old,n_new,stoch_fluxdiv,diff_coef_face,dt,time,geom);
+            }
+
+/*            
+         ! Crank-Nicolson
+         ! n_k^{n+1} = n_k^n + (dt/2) div (D_k grad n_k)^n
+         !                   + (dt/2) div (D_k grad n_k)^{n+1}
+         !                   + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise
+         !                   + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise
+         !                   + 1/dV * P_1( f(n_k)*(dt/2)*dV )                        ! Poisson noise
+         !                   + 1/dV * P_2( (2*f(n_k^pred)-f(n_k))*(dt/2)*dV )        ! Poisson noise
+         !                   + dt ext_src
+         !
+         ! in delta form
+         !
+         ! (I - div (dt/2) D_k grad) n_k^{n+1} = n_k^n
+         !                                     + (dt/2) div (D_k grad n_k^n)
+         !                                     + dt div (sqrt(2 D_k n_k^n / (dt*dV)) Z_1 / sqrt(2) ) ! Gaussian noise
+         !                                     + dt div (sqrt(2 D_k n_k^? / (dt*dV)) Z_2 / sqrt(2) ) ! Gaussian noise
+         !                                     + 1/dV * P_1( f(n_k)*(dt/2)*dV )                      ! Poisson noise
+         !                                     + 1/dV * P_2( (2*f(n_k^pred)-f(n_k))*(dt/2)*dV )      ! Poisson noise
+         !                                     + dt ext_src
+*/
+            MultiFab::Copy(rhs,n_old,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,0.5*dt,diff_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,dt/std::sqrt(2.),stoch_fluxdiv,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,0.5*dt,rate1,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,0.5*dt,rate2,0,0,nspecies,0);
+            MultiFab::Saxpy(rhs,dt,ext_src,0,0,nspecies,0);
+
+            ImplicitDiffusion(n_old, n_new, rhs, diff_coef_face, geom, 0.5*dt, time);
+
+        }
+    } else {
+
+        Abort("AdvanceReactionDiffusion() - invalid temporal_integrator");
+
+    }
+}
diff --git a/src_reactDiff/AdvanceTimestep.cpp b/src_reactDiff/AdvanceTimestep.cpp
new file mode 100644
index 000000000..d271c1701
--- /dev/null
+++ b/src_reactDiff/AdvanceTimestep.cpp
@@ -0,0 +1,60 @@
+#include "reactDiff_functions.H"
+#include "chemistry_functions.H"
+
+void AdvanceTimestep(MultiFab& n_old,
+                     MultiFab& n_new,
+                     const Real& dt,
+                     const Real& time,
+                     const Geometry& geom) {
+
+    if (temporal_integrator >= 0 && reactDiff_reaction_type != 0) {
+        if (reaction_type == 2) {
+            Abort("SSA (reaction_type==2) requires reactDiff_reaction_type=0 for split schemes");
+        }
+    }
+
+    // external source term for diffusion/reaction solvers for inhomogeneous bc algorithm
+    MultiFab Rn_steady(n_old.boxArray(), n_old.DistributionMap(), nspecies, 0);
+    
+    if (temporal_integrator < 0) {
+
+        Rn_steady.setVal(0.);
+
+        // unsplit schemes
+        AdvanceReactionDiffusion(n_old,n_new,Rn_steady,dt,time,geom);
+
+    } else {
+
+        if (inhomogeneous_bc_fix) {
+            Abort("inhomogeneous_bc_fix not implemented yet");
+        } else {
+            Rn_steady.setVal(0.);
+        }
+
+        if (temporal_integrator == 0) {
+            // D + R
+            AdvanceDiffusion(n_old,n_new,Rn_steady,dt,time,geom);
+            MultiFab::Copy(n_old,n_new,0,0,nspecies,1);
+            AdvanceReaction(n_old,n_new,Rn_steady,dt,time,geom);
+
+        } else if (temporal_integrator == 1) {
+            // (1/2)R + D + (1/2)R
+            AdvanceReaction(n_old,n_new,Rn_steady,0.5*dt,time,geom);
+            // swap n_new/n_old to avoid calling copy()
+            AdvanceDiffusion(n_new,n_old,Rn_steady,dt,time,geom);
+            AdvanceReaction(n_old,n_new,Rn_steady,0.5*dt,time,geom);
+
+        } else if (temporal_integrator == 2) {
+            // (1/2)D + R + (1/2)D
+            AdvanceDiffusion(n_old,n_new,Rn_steady,0.5*dt,time,geom);
+            // swap n_new/n_old to avoid calling copy()
+            AdvanceReaction(n_new,n_old,Rn_steady,dt,time,geom);
+            AdvanceDiffusion(n_old,n_new,Rn_steady,0.5*dt,time,geom);
+
+        } else {
+            Abort("AdvanceTimestep(): invalid temporal_integrator");
+        }
+
+    }
+    
+}
diff --git a/src_reactDiff/DiffusiveNFluxdiv.cpp b/src_reactDiff/DiffusiveNFluxdiv.cpp
new file mode 100644
index 000000000..7d92184b2
--- /dev/null
+++ b/src_reactDiff/DiffusiveNFluxdiv.cpp
@@ -0,0 +1,101 @@
+#include "reactDiff_functions.H"
+
+#include "AMReX_MLMG.H"
+#include <AMReX_MLABecLaplacian.H>
+
+void DiffusiveNFluxdiv(MultiFab& n_in,
+                       MultiFab& diff_fluxdiv,
+                       const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                       const Geometry& geom,
+                       const Real& time) {
+
+    // single cell case set diffusive mass fluxdiv to zero and return
+    long cell_count = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : n_cells[0]*n_cells[1]*n_cells[2];
+    if (cell_count == 1) {
+        diff_fluxdiv.setVal(0.);
+        return;
+    }
+    
+    // fill n ghost cells
+    n_in.FillBoundary(geom.periodicity());
+    MultiFabPhysBC(n_in, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+    BoxArray ba = n_in.boxArray();
+    DistributionMapping dmap = n_in.DistributionMap();
+    
+    // don't need to set much here for explicit evaluations
+    LPInfo info;
+
+    // operator of the form (ascalar * acoef - bscalar div bcoef grad) phi
+    MLABecLaplacian mlabec({geom}, {ba}, {dmap}, info);
+    mlabec.setMaxOrder(2);
+
+    // store one component at a time and take L(phi) one component at a time
+    MultiFab phi (ba,dmap,1,1);
+    MultiFab Lphi(ba,dmap,1,0);
+
+    MultiFab acoef(ba,dmap,1,0);
+    std::array< MultiFab, AMREX_SPACEDIM > bcoef;
+    AMREX_D_TERM(bcoef[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);,
+                 bcoef[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);,
+                 bcoef[2].define(convert(ba,nodal_flag_z), dmap, 1, 0););
+    
+    // build array of boundary conditions needed by MLABecLaplacian
+    std::array<LinOpBCType, AMREX_SPACEDIM> lo_mlmg_bc;
+    std::array<LinOpBCType, AMREX_SPACEDIM> hi_mlmg_bc;
+
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
+    {
+        if (bc_mass_lo[idim] == -1 || bc_mass_hi[idim] == -1) {
+            if ( !(bc_mass_lo[idim] == -1 && bc_mass_hi[idim] == -1) ) {
+                Abort("Both bc_mass_lo and bc_mass_hi must be periodic in a given direction if the other one is");
+            }            
+            lo_mlmg_bc[idim] = LinOpBCType::Periodic;            
+            hi_mlmg_bc[idim] = LinOpBCType::Periodic;
+        }
+
+        if (bc_mass_lo[idim] == 0) {
+            lo_mlmg_bc[idim] = LinOpBCType::inhomogNeumann;
+        } else if (bc_mass_lo[idim] == 1) {
+            lo_mlmg_bc[idim] = LinOpBCType::Dirichlet;
+        } else if (bc_mass_lo[idim] != -1) {
+            Abort("Invalid bc_mass_lo");
+        }
+
+        if (bc_mass_hi[idim] == 0) {
+            hi_mlmg_bc[idim] = LinOpBCType::inhomogNeumann;
+        } else if (bc_mass_hi[idim] == 1) {
+            hi_mlmg_bc[idim] = LinOpBCType::Dirichlet;
+        } else if (bc_mass_hi[idim] != -1) {
+            Abort("Invalid bc_mass_hi");
+        }
+    }
+
+    mlabec.setDomainBC(lo_mlmg_bc,hi_mlmg_bc);
+
+    // set acoeff to 0and bcoeff to -1
+    mlabec.setScalars(0., -1.);
+
+    acoef.setVal(0.);
+    mlabec.setACoeffs(0, acoef);
+
+    for (int i=0; i<nspecies; ++i) {
+
+        // copy ith component of n_in into phi, including ghost cells
+        MultiFab::Copy(phi,n_in,i,0,1,1);
+
+        // load D_fick for species i into bcoef
+        for (int d=0; d<AMREX_SPACEDIM; ++d) {
+            MultiFab::Copy(bcoef[d],diff_coef_face[d],i,0,1,0);
+        }
+        mlabec.setBCoeffs(0, amrex::GetArrOfConstPtrs(bcoef));
+
+        MLMG mlmg(mlabec);
+
+        mlmg.apply({&Lphi},{&phi});
+
+        MultiFab::Copy(diff_fluxdiv,Lphi,0,i,1,0);
+        
+    }
+    
+}
diff --git a/src_reactDiff/ImplicitDiffusion.cpp b/src_reactDiff/ImplicitDiffusion.cpp
new file mode 100644
index 000000000..bd4ba70b3
--- /dev/null
+++ b/src_reactDiff/ImplicitDiffusion.cpp
@@ -0,0 +1,117 @@
+#include "reactDiff_functions.H"
+
+#include "AMReX_MLMG.H"
+#include <AMReX_MLABecLaplacian.H>
+
+// (I - (dt_fac) div D_k grad) n = rhs
+
+void ImplicitDiffusion(MultiFab& n_old,
+                       MultiFab& n_new,
+                       const MultiFab& rhs,
+                       const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                       const Geometry& geom,
+                       const Real& dt_fac,
+                       const Real& time) {
+
+    BoxArray ba = n_old.boxArray();
+    DistributionMapping dmap = n_old.DistributionMap();
+
+    // fill n ghost cells
+    n_old.FillBoundary(geom.periodicity());
+    MultiFabPhysBC(n_old, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+    LPInfo info;
+
+    // operator of the form (ascalar * acoef - bscalar div bcoef grad) phi
+    MLABecLaplacian mlabec({geom}, {ba}, {dmap}, info);
+    mlabec.setMaxOrder(2);
+
+    // store one component at a time and take L(phi) one component at a time
+    MultiFab phi     (ba,dmap,1,1);
+    MultiFab rhs_comp(ba,dmap,1,0);
+    
+    // build array of boundary conditions needed by MLABecLaplacian
+    std::array<LinOpBCType, AMREX_SPACEDIM> lo_mlmg_bc;
+    std::array<LinOpBCType, AMREX_SPACEDIM> hi_mlmg_bc;
+
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
+    {
+        if (bc_mass_lo[idim] == -1 || bc_mass_hi[idim] == -1) {
+            if ( !(bc_mass_lo[idim] == -1 && bc_mass_hi[idim] == -1) ) {
+                Abort("Both bc_mass_lo and bc_mass_hi must be periodic in a given direction if the other one is");
+            }            
+            lo_mlmg_bc[idim] = LinOpBCType::Periodic;            
+            hi_mlmg_bc[idim] = LinOpBCType::Periodic;
+        }
+
+        if (bc_mass_lo[idim] == 0) {
+            lo_mlmg_bc[idim] = LinOpBCType::inhomogNeumann;
+        } else if (bc_mass_lo[idim] == 1) {
+            lo_mlmg_bc[idim] = LinOpBCType::Dirichlet;
+        } else if (bc_mass_lo[idim] != -1) {
+            Abort("Invalid bc_mass_lo");
+        }
+
+        if (bc_mass_hi[idim] == 0) {
+            hi_mlmg_bc[idim] = LinOpBCType::inhomogNeumann;
+        } else if (bc_mass_hi[idim] == 1) {
+            hi_mlmg_bc[idim] = LinOpBCType::Dirichlet;
+        } else if (bc_mass_hi[idim] != -1) {
+            Abort("Invalid bc_mass_hi");
+        }
+    }
+
+    mlabec.setDomainBC(lo_mlmg_bc,hi_mlmg_bc);
+
+    // storage for acoeff and bcoeff in
+    // (ascalar * acoeff I - bscalar div bcoeff grad) phi = rhs
+    MultiFab acoef(ba,dmap,1,0);
+    std::array< MultiFab, AMREX_SPACEDIM > bcoef;
+    AMREX_D_TERM(bcoef[0].define(convert(ba,nodal_flag_x), dmap, 1, 0);,
+                 bcoef[1].define(convert(ba,nodal_flag_y), dmap, 1, 0);,
+                 bcoef[2].define(convert(ba,nodal_flag_z), dmap, 1, 0););
+
+    // set ascalar and bscalar to 1
+    mlabec.setScalars(1., 1.);
+
+    // acoeff = 1
+    acoef.setVal(1.);
+    mlabec.setACoeffs(0, acoef);
+
+    // set bcoeff to dt_fac * D_i
+    for (int i=0; i<nspecies; ++i) {
+
+        // load D_fick for species i into bcoef
+        // then multiply by dt_fac
+        for (int d=0; d<AMREX_SPACEDIM; ++d) {
+            MultiFab::Copy(bcoef[d],diff_coef_face[d],i,0,1,0);
+            bcoef[d].mult(dt_fac);
+        }
+        mlabec.setBCoeffs(0, amrex::GetArrOfConstPtrs(bcoef));
+
+        // copy in n_old, including ghost cells for boundary conditions, into phi as an initial guess
+        // copy in rhs_comp into rhs
+        MultiFab::Copy(phi,n_old,i,0,1,1);
+        MultiFab::Copy(rhs_comp,rhs,i,0,1,0);
+
+        // tell the operator what the numerical values for physical boundary conditions are
+        mlabec.setLevelBC(0, &phi);
+        
+        MLMG mlmg(mlabec);
+
+        // solver parameters
+        mlmg.setMaxIter(100);
+        mlmg.setVerbose(0);
+        mlmg.setBottomVerbose(0);
+        
+        // do solve
+        mlmg.solve({&phi}, {&rhs_comp}, 1.e-10, 0.0);
+
+        MultiFab::Copy(n_new,phi,0,i,1,0);
+        
+    }
+
+    n_new.FillBoundary(geom.periodicity());
+    MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+
+}
diff --git a/src_reactDiff/InitN.cpp b/src_reactDiff/InitN.cpp
new file mode 100644
index 000000000..307b035f0
--- /dev/null
+++ b/src_reactDiff/InitN.cpp
@@ -0,0 +1,94 @@
+#include "reactDiff_functions.H"
+
+void InitN(MultiFab& n_in,
+           const Geometry& geom,
+           const Real& time) {
+
+    const GpuArray<Real, AMREX_SPACEDIM> dx = geom.CellSizeArray();
+
+    for ( MFIter mfi(n_in,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+        const Box& bx = mfi.tilebox();
+
+        const Array4<Real> & n_init = n_in.array(mfi);
+
+        if (prob_type == 0) {
+            //============================================================
+            // Thermodynamic equilibrium
+            //============================================================
+        
+            amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+            {
+                n_init(i,j,k,n) = n_init_in(0,n);
+            });
+
+        } else if (prob_type == 5) {
+            //=================================================================
+            // bubble having radius = 0.5*perturb_width*dx(1) 
+            // n_init = n_init_in(1,:) inside, n_init = n_init_in (2,:) outside
+            // can be discontinous or smooth depending on smoothing_width
+            //=================================================================
+
+            Real rad = 0.5*perturb_width*dx[0];
+        
+            amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+            {
+                Real x = prob_lo[0] + (i+0.5)*dx[0] - 0.5*(prob_lo[0]+prob_hi[0]);
+                Real y = prob_lo[1] + (j+0.5)*dx[1] - 0.5*(prob_lo[1]+prob_hi[1]);
+                Real r = std::sqrt(x*x + y*y);
+#if (AMREX_SPACEDIM == 3)
+                Real z = prob_lo[2] + (k+0.5)*dx[2] - 0.5*(prob_lo[2]+prob_hi[2]);
+                r = std::sqrt(x*x + y*y + z*z);
+#endif
+
+                if (smoothing_width == 0.) {
+                    // discontinuous interface
+                    if (r < rad) {
+                        n_init(i,j,k,n) = n_init_in(0,n);
+                    } else {
+                        n_init(i,j,k,n) = n_init_in(1,n);
+                    }
+                } else {
+                    // smooth interface
+                    n_init(i,j,k,n) = n_init_in(0,n) + (n_init_in(1,n) - n_init_in(0,n))* 0.5*(1. + std::tanh((r-rad)/(smoothing_width*dx[0])));
+                }
+                
+            });
+
+        } else {
+            Abort("prob_type not implemented yet");
+        }
+
+    }
+
+    if (integer_populations == 1) { // Ensure that the initial number of molecules are integers
+
+        Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2]*cell_depth : dx[0]*dx[1]*cell_depth;
+
+        if (initial_variance_mass < 0.) { // Distribute the particles on the box using a multinomial sampler
+
+            Abort("integer_populations=1 with initial_variance_mass < 0. not supported yet");
+
+        } else if (initial_variance_mass > 0.) { // Make the number of molecules in each cell Poisson distributed with desired mean
+
+            for ( MFIter mfi(n_in,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+                const Box& bx = mfi.tilebox();
+
+                const Array4<Real> & n_init = n_in.array(mfi);
+
+                amrex::ParallelForRNG(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, amrex::RandomEngine const& engine) noexcept
+                {
+                    // Generate the initial fluctuations using a Poisson random number generator
+                    // This assumes that the distribution of initial conditions is a product Poisson measure
+                    int nparticles = RandomPoisson(n_init(i,j,k,n)*dv, engine);
+                    n_init(i,j,k,n) = nparticles / dv;
+                });
+            }
+
+        }
+    }
+
+    n_in.FillBoundary(geom.periodicity());
+    MultiFabPhysBC(n_in, geom, 0, nspecies, SPEC_BC_COMP, time);
+}
diff --git a/src_reactDiff/Make.package b/src_reactDiff/Make.package
new file mode 100644
index 000000000..33c156374
--- /dev/null
+++ b/src_reactDiff/Make.package
@@ -0,0 +1,13 @@
+CEXE_sources   += AdvanceReaction.cpp
+CEXE_sources   += AdvanceReactionDiffusion.cpp
+CEXE_sources   += AdvanceDiffusion.cpp
+CEXE_sources   += AdvanceTimestep.cpp
+CEXE_sources   += DiffusiveNFluxdiv.cpp
+CEXE_sources   += ImplicitDiffusion.cpp
+CEXE_sources   += InitN.cpp
+CEXE_sources   += MultinomialDiffusion.cpp
+CEXE_sources   += reactDiff_functions.cpp
+CEXE_sources   += StochasticNFluxdiv.cpp
+CEXE_sources   += WritePlotFile.cpp
+CEXE_headers   += reactDiff_functions.H
+CEXE_headers   += reactDiff_namespace.H
diff --git a/src_reactDiff/MultinomialDiffusion.cpp b/src_reactDiff/MultinomialDiffusion.cpp
new file mode 100644
index 000000000..b2d702e80
--- /dev/null
+++ b/src_reactDiff/MultinomialDiffusion.cpp
@@ -0,0 +1,161 @@
+#include "reactDiff_functions.H"
+
+#include "AMReX_MLMG.H"
+#include <AMReX_MLABecLaplacian.H>
+
+#include <random>
+
+void MultinomialDiffusion(MultiFab& n_old,
+                          MultiFab& n_new,
+                          const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                          const Geometry& geom,
+                          const Real& dt,
+                          const Real& time)
+{
+#if (AMREX_USE_CUDA)
+    Abort("std::MultinomailDiffusion not supported for CUDA (need sum reductions)");
+#endif
+
+    BoxArray ba = n_old.boxArray();
+    DistributionMapping dmap = n_old.DistributionMap();
+
+    MultiFab cell_update(ba, dmap, nspecies, 1);
+    cell_update.setVal(0.);
+
+    // set new state to zero everywhere, including ghost cells
+    n_new.setVal(0.);
+
+    // copy old state into new in valid region only
+    MultiFab::Copy(n_new,n_old,0,0,nspecies,0);
+
+    const GpuArray<Real,AMREX_SPACEDIM> dx = geom.CellSizeArray();
+
+    Real dv = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2]*cell_depth;
+
+    for (MFIter mfi(n_new); mfi.isValid(); ++mfi)
+    {
+        const Box& bx = mfi.validbox();
+
+        const Array4<Real> & n_arr = n_new.array(mfi);
+
+        const Array4<Real> & update = cell_update.array(mfi);
+        
+        AMREX_D_TERM(const Array4<const Real> & diffx = diff_coef_face[0].array(mfi);,
+                     const Array4<const Real> & diffy = diff_coef_face[1].array(mfi);,
+                     const Array4<const Real> & diffz = diff_coef_face[2].array(mfi););
+
+        amrex::ParallelForRNG(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, amrex::RandomEngine const& engine) noexcept
+        {
+
+            GpuArray<Real,2*AMREX_SPACEDIM> p;
+            GpuArray<Real,2*AMREX_SPACEDIM> fluxes;
+
+            p[0] = diffx(i  ,j  ,k,n)*dt/(dx[0]*dx[0]);
+            p[1] = diffx(i+1,j  ,k,n)*dt/(dx[0]*dx[0]);
+            p[2] = diffy(i  ,j  ,k,n)*dt/(dx[1]*dx[1]);
+            p[3] = diffy(i  ,j+1,k,n)*dt/(dx[1]*dx[1]);
+#if (AMREX_SPACEDIM == 3)
+            p[4] = diffz(i  ,j  ,k  ,n)*dt/(dx[2]*dx[2]);
+            p[5] = diffz(i  ,j  ,k+1,n)*dt/(dx[2]*dx[2]);
+#endif
+
+            int N = std::max(0., std::round(n_arr(i,j,k,n)*dv));
+            
+            multinomial_rng(fluxes, N, p, engine);
+
+            // lo-x face
+            update(i  ,j,k,n) -= fluxes[0];
+            update(i-1,j,k,n) += fluxes[0];
+
+            // hi-x face
+            update(i  ,j,k,n) -= fluxes[1];
+            update(i+1,j,k,n) += fluxes[1];
+
+            // lo-y face
+            update(i,j,  k,n) -= fluxes[2];
+            update(i,j-1,k,n) += fluxes[2];
+
+            // hi-y face
+            update(i,j  ,k,n) -= fluxes[3];
+            update(i,j+1,k,n) += fluxes[3];
+
+#if (AMREX_SPACEDIM == 3)
+            // lo-z face
+            update(i,j,k,  n) -= fluxes[4];
+            update(i,j,k-1,n) += fluxes[4];
+
+            // hi-z face
+            update(i,j,k,  n) -= fluxes[5];
+            update(i,j,k+1,n) += fluxes[5];
+#endif
+        });
+    }
+
+    for (MFIter mfi(n_new); mfi.isValid(); ++mfi)
+    {
+        const Box& bx = mfi.growntilebox(1);
+
+        const Array4<Real> & n_arr = n_new.array(mfi);
+
+        const Array4<Real> & update = cell_update.array(mfi);
+
+        amrex::ParallelFor(bx, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            n_arr(i,j,k,n) += update(i,j,k,n) / dv;
+        });
+    }
+
+    n_new.SumBoundary(geom.periodicity());
+    n_new.FillBoundary(geom.periodicity());
+    MultiFabPhysBC(n_new, geom, 0, nspecies, SPEC_BC_COMP, time);
+}
+
+AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray<Real,2*AMREX_SPACEDIM>& samples,
+                                           const int& N,
+                                           GpuArray<Real,2*AMREX_SPACEDIM>& p,
+                                           const amrex::RandomEngine& engine)
+{
+    Real sum_p = 0;
+    for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) {
+        sum_p += p[sample];
+    }
+    if (sum_p > 1.) {
+        printf("sum_p = %f",sum_p);
+        Abort("multinomial_rng: probabilities must sum to 1 or less");
+    }
+
+    // brute force multinomial
+    for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) {
+        samples[sample] = 0.;
+    }
+    for (int n=0; n<N; ++n) {
+        Real x = amrex::Random(engine); // uniform over [0,1)
+        Real sum_p = 0.;
+        // find the multinomial bin the RNG lands in
+        for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) {
+            sum_p += p[sample];
+            if (x <= sum_p) {
+                samples[sample] += 1.;
+                break;
+            }
+        }
+    }
+
+#if 0
+    // not sure why std:: binomial_distribition gives grid artifacts
+    std::default_random_engine generator;
+    generator.seed(std::chrono::system_clock::now().time_since_epoch().count());
+
+    sum_p = 0.;
+    int sum_n = 0;
+    
+    for (int sample=0; sample<2*AMREX_SPACEDIM; ++sample) {
+
+        std::binomial_distribution<int> distribution(N-sum_n, p[sample]/(1.-sum_p));
+        samples[sample] = distribution(generator);
+
+        sum_n += samples[sample];
+        sum_p += p[sample];
+    }
+#endif
+}
diff --git a/src_reactDiff/StochasticNFluxdiv.cpp b/src_reactDiff/StochasticNFluxdiv.cpp
new file mode 100644
index 000000000..74dfff514
--- /dev/null
+++ b/src_reactDiff/StochasticNFluxdiv.cpp
@@ -0,0 +1,180 @@
+#include "rng_functions.H"
+#include "reactDiff_functions.H"
+
+void StochasticNFluxdiv(MultiFab& n_in,
+                        MultiFab& stoch_fluxdiv,
+                        const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                        const Geometry& geom,
+                        const Real& dt,
+                        const Real& time,
+                        int increment_div) {
+
+    // single cell case set stochastic mass fluxdiv to zero 
+    // (or its increment if increment_in=T) and return
+    long cell_count = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : n_cells[0]*n_cells[1]*n_cells[2];
+    if (cell_count == 1 && increment_div==0) {
+        stoch_fluxdiv.setVal(0.);
+        return;
+    }
+
+    BoxArray ba = n_in.boxArray();
+    DistributionMapping dmap = n_in.DistributionMap();
+
+    std::array< MultiFab, AMREX_SPACEDIM > flux;
+    AMREX_D_TERM(flux[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);,
+                 flux[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);,
+                 flux[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0););
+
+    std::array< MultiFab, AMREX_SPACEDIM > rand;
+    AMREX_D_TERM(rand[0].define(convert(ba,nodal_flag_x), dmap, nspecies, 0);,
+                 rand[1].define(convert(ba,nodal_flag_y), dmap, nspecies, 0);,
+                 rand[2].define(convert(ba,nodal_flag_z), dmap, nspecies, 0););
+
+    const Real* dx = geom.CellSize();
+
+    Real dv = (AMREX_SPACEDIM == 3) ? dx[0]*dx[1]*dx[2]*cell_depth : dx[0]*dx[1]*cell_depth;
+
+    // average n_in to faces, store in flux
+    for (MFIter mfi(n_in); mfi.isValid(); ++mfi)
+    {
+        const Box& bx = mfi.validbox();
+
+        const Array4<const Real>& n_arr = n_in.array(mfi);
+
+        AMREX_D_TERM(const Array4<Real> & fluxx = flux[0].array(mfi);,
+                     const Array4<Real> & fluxy = flux[1].array(mfi);,
+                     const Array4<Real> & fluxz = flux[2].array(mfi););
+
+        AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);,
+                     const Box & bx_y = mfi.nodaltilebox(1);,
+                     const Box & bx_z = mfi.nodaltilebox(2););
+        
+        amrex::ParallelFor(bx_x, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            fluxx(i,j,k,n) = average_to_faces(n_arr(i-1,j,k,n),n_arr(i,j,k,n),dv);
+        },
+                           bx_y, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            fluxy(i,j,k,n) = average_to_faces(n_arr(i,j-1,k,n),n_arr(i,j,k,n),dv);
+        }
+#if (AMREX_SPACEDIM == 3)
+                         , bx_z, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            fluxz(i,j,k,n) = average_to_faces(n_arr(i,j,k-1,n),n_arr(i,j,k,n),dv);
+        }
+#endif
+        );
+    }
+
+    // generate random numbers
+    for (int i=0; i<AMREX_SPACEDIM; ++i) {
+        for (int n=0; n<nspecies; ++n) {
+            MultiFabFillRandom(rand[i], n, 1., geom, 0);
+        }
+    }
+    
+    // assemble_stoch_n_fluxes
+    for (MFIter mfi(n_in); mfi.isValid(); ++mfi)
+    {
+        const Box& bx = mfi.validbox();
+
+        const Array4<const Real>& n_arr = n_in.array(mfi);
+
+        AMREX_D_TERM(const Array4<Real> & fluxx = flux[0].array(mfi);,
+                     const Array4<Real> & fluxy = flux[1].array(mfi);,
+                     const Array4<Real> & fluxz = flux[2].array(mfi););
+
+        AMREX_D_TERM(const Array4<Real> & randx = rand[0].array(mfi);,
+                     const Array4<Real> & randy = rand[1].array(mfi);,
+                     const Array4<Real> & randz = rand[2].array(mfi););
+
+        AMREX_D_TERM(const Array4<const Real> & coefx = diff_coef_face[0].array(mfi);,
+                     const Array4<const Real> & coefy = diff_coef_face[1].array(mfi);,
+                     const Array4<const Real> & coefz = diff_coef_face[2].array(mfi););
+
+        AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);,
+                     const Box & bx_y = mfi.nodaltilebox(1);,
+                     const Box & bx_z = mfi.nodaltilebox(2););
+        
+        amrex::ParallelFor(bx_x, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            fluxx(i,j,k,n) = std::sqrt(coefx(i,j,k,n)*fluxx(i,j,k,n)) * randx(i,j,k,n);
+        },
+                           bx_y, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            fluxy(i,j,k,n) = std::sqrt(coefy(i,j,k,n)*fluxy(i,j,k,n)) * randy(i,j,k,n);
+        }
+#if (AMREX_SPACEDIM == 3)
+                         , bx_z, nspecies, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            fluxz(i,j,k,n) = std::sqrt(coefz(i,j,k,n)*fluxz(i,j,k,n)) * randz(i,j,k,n);            
+        }
+#endif
+        );
+    }
+
+    for (int i=0; i<AMREX_SPACEDIM; ++i) {
+        if (bc_mass_lo[i] != -1 || bc_mass_hi[i] != -1) {
+            Abort("StochasticNFluxdiv() - implement physical bc's for noise");
+        }
+    }
+    
+    for (int i=0; i<AMREX_SPACEDIM; ++i) {
+        flux[i].mult(std::sqrt(2.*variance_coef_mass/(dv*dt)));
+    }
+
+    // compute flux divergence
+    ComputeDiv(stoch_fluxdiv, flux, 0, 0, nspecies, geom, increment_div);
+    
+}
+
+AMREX_GPU_HOST_DEVICE Real average_to_faces(const Real& value1,
+                                            const Real& value2,
+                                            const Real& dv) {
+
+    if (avg_type == 1) { // Arithmetic with a C0-smoothed Heaviside
+
+        if ( (value1 <= 0.) || (value2 <= 0.) ) {
+            return 0.;
+        } else {
+            Real tmp1=std::min(dv*value1,1.);
+            Real tmp2=std::min(dv*value2,1.);
+            return (value1+value2)/2.*tmp1*tmp2;
+        }
+
+    } else if (avg_type == 2) { // Geometric
+
+        Abort("average_to_faces: unimplemented avg_type");
+        return 0;
+
+    } else if (avg_type == 3) { // Harmonic
+        // What we want here is the harmonic mean of max(value1,0) and max(value2,0)
+        // Where we define the result to be zero if either one is zero
+        // But numerically we want to avoid here division by zero
+
+        Abort("average_to_faces: unimplemented avg_type");
+        return 0;
+
+    } else if (avg_type == 10) { // Arithmetic with (discontinuous) Heaviside
+
+        Abort("average_to_faces: unimplemented avg_type");
+        return 0;
+
+    } else if (avg_type == 11) { // Arithmetic with C1-smoothed Heaviside
+
+        Abort("average_to_faces: unimplemented avg_type");
+        return 0;
+
+    } else if (avg_type == 12) { // Arithmetic with C2-smoothed Heaviside
+
+        Abort("average_to_faces: unimplemented avg_type");
+        return 0;
+
+    } else {
+
+        Abort("average_to_faces: unimplemented avg_type");
+        return 0;
+
+    }
+    
+}
diff --git a/src_reactDiff/WritePlotFile.cpp b/src_reactDiff/WritePlotFile.cpp
new file mode 100644
index 000000000..3021df00c
--- /dev/null
+++ b/src_reactDiff/WritePlotFile.cpp
@@ -0,0 +1,32 @@
+#include "AMReX_PlotFileUtil.H"
+#include "reactDiff_functions.H"
+
+void WritePlotFile(int step,
+                   const amrex::Real time,
+                   const amrex::Geometry& geom,
+		   const MultiFab& n_in)
+{
+    
+    BL_PROFILE_VAR("WritePlotFile()",WritePlotFile);
+    
+    std::string plotfilename = Concatenate(plot_base_name,step,7);
+
+    amrex::Print() << "Writing plotfile " << plotfilename << "\n";
+    
+    BoxArray ba = n_in.boxArray();
+    DistributionMapping dmap = n_in.DistributionMap();
+
+    Vector<std::string> varNames(nspecies);
+
+    // keep a counter for plotfile variables
+    int cnt = 0;
+
+    for (int i=0; i<nspecies; ++i) {
+        std::string x = "n";
+        x += char(49+i);
+        varNames[cnt++] = x;
+    }
+        
+    WriteSingleLevelPlotfile(plotfilename,n_in,varNames,geom,time,step);
+
+}
diff --git a/src_reactDiff/main_driver.cpp b/src_reactDiff/main_driver.cpp
new file mode 100644
index 000000000..c92e65ffc
--- /dev/null
+++ b/src_reactDiff/main_driver.cpp
@@ -0,0 +1,322 @@
+
+#include "common_functions.H"
+#include "chemistry_functions.H"
+#include "reactDiff_functions.H"
+#include "StructFact.H"
+
+#include <AMReX_VisMF.H>
+#include <AMReX_PlotFileUtil.H>
+#include <AMReX_ParallelDescriptor.H>
+#include <AMReX_MultiFabUtil.H>
+
+#include "chrono"
+
+using namespace std::chrono;
+using namespace amrex;
+using namespace common;
+using namespace chemistry;
+
+// argv contains the name of the inputs file entered at the command line
+void main_driver(const char* argv)
+{
+
+    BL_PROFILE_VAR("main_driver()",main_driver);
+
+    // store the current time so we can later compute total run time.
+    Real strt_time = ParallelDescriptor::second();
+
+    std::string inputs_file = argv;
+
+    // Initialize variables in namespaces
+    InitializeCommonNamespace();
+    InitializeChemistryNamespace();
+    InitializeReactDiffNamespace();
+
+    /////////////////////////////////////////
+    // Initialize seeds for random number generator
+    /////////////////////////////////////////
+    if (restart < 0) {
+
+        int mySeed;
+
+        if (seed > 0) {
+            // initializes the seed for C++ random number calls with a specified root seed
+            mySeed = seed;
+        } else if (seed == 0) {
+            // initializes the root seed for C++ random number calls based on the clock
+            auto now = time_point_cast<nanoseconds>(system_clock::now());
+            int mySeed = now.time_since_epoch().count();
+            // broadcast the same root seed to all processors
+            ParallelDescriptor::Bcast(&mySeed,1,ParallelDescriptor::IOProcessorNumber());
+        } else {
+            Abort("Must supply non-negative seed");
+        }
+
+        // MPI ranks > 0 get a seed inremented by the rank
+        InitRandom(mySeed+ParallelDescriptor::MyProc(),
+                   ParallelDescriptor::NProcs(),
+                   mySeed+ParallelDescriptor::MyProc());
+
+    }
+
+    if (nreaction > 0 && use_mole_frac_LMA) {
+        if (include_discrete_LMA_correction == 1) {
+            Abort("Error: currently use_mole_frac_LMA can be used only with include_discrete_LMA_correction=0");
+        }
+        if (exclude_solvent_comput_rates != -1) {
+            Abort("Error: currently use_mole_frac_LMA can be used only with exclude_solvent_comput_rates=-1");
+        }
+    }
+
+    // is the problem periodic?
+    Vector<int> is_periodic(AMREX_SPACEDIM,0);  // set to 0 (not periodic) by default
+    for (int i=0; i<AMREX_SPACEDIM; ++i) {
+        if (bc_mass_lo[i] == -1 && bc_mass_hi[i] == -1) {
+            is_periodic[i] = 1;
+        }
+    }
+
+    // This defines the physical box, [-1,1] in each direction.
+    RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])},
+                     {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])});
+
+    IntVect dom_lo(AMREX_D_DECL(           0,            0,            0));
+    IntVect dom_hi(AMREX_D_DECL(n_cells[0]-1, n_cells[1]-1, n_cells[2]-1));
+    Box domain(dom_lo, dom_hi);
+
+    long cell_count = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : n_cells[0]*n_cells[1]*n_cells[2];
+
+    Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data());
+
+    const Real* dx = geom.CellSize();
+        
+    BoxArray ba;
+    DistributionMapping dmap;
+    
+    int step_start;
+    amrex::Real time;
+
+    MultiFab n_old;
+    MultiFab n_new;
+
+    ///////////////////////////////////////////
+    // Initialize structure factor object for analysis
+    ///////////////////////////////////////////
+
+    Vector< std::string > var_names;
+    var_names.resize(nspecies);
+
+    int cnt = 0;
+    std::string x;
+
+    // n0, n1, ...
+    for (int d=0; d<nspecies; d++) {
+      x = "n";
+      x += (49+d);
+      var_names[cnt++] = x;
+    }
+
+    // need to use dv for scaling
+    Real dv = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2]*cell_depth;
+
+    // 0 = compute only specified pais listed in s_pairA and s_pairB
+    // 1 = compute all possible pairs of variables
+    int compute_all_pairs = 1;
+
+    int nPairs = (compute_all_pairs) ? nspecies*(nspecies+1)/2 : 2;
+    
+    Vector<Real> var_scaling(nPairs);
+    for (int d=0; d<var_scaling.size(); ++d) {
+        var_scaling[d] = 1./dv;
+    }
+
+    StructFact structFact;
+
+    if (restart < 0) {
+
+        step_start = 1;
+        time = 0.;
+        
+        // Initialize the boxarray "ba" from the single box "bx"
+        ba.define(domain);
+
+        // Break up boxarray "ba" into chunks no larger than "max_grid_size" along a direction
+        // note we are converting "Vector<int> max_grid_size" to an IntVect
+        ba.maxSize(IntVect(max_grid_size));
+
+        dmap.define(ba);
+
+        n_old.define(ba,dmap,nspecies,1);
+        n_new.define(ba,dmap,nspecies,1);
+    
+        if (model_file_init) {
+            Abort("model_file_init not supported yet");
+        } else {
+            // Initialize n
+            InitN(n_old,geom,time);
+        }
+
+        if (std::abs(initial_variance_mass) > 0.) {
+            if (integer_populations == 0) {
+                Abort("add_init_n_fluctuations not supported yet");
+                // add_init_n_fluctuations()
+            }
+        }
+
+        // structure factor
+        if (compute_all_pairs) {
+            // option to compute all pairs
+            structFact.define(ba,dmap,var_names,var_scaling);
+        } else {
+            // option to compute only specified pairs
+            int nPairs = 2;
+            amrex::Vector< int > s_pairA(nPairs);
+            amrex::Vector< int > s_pairB(nPairs);
+
+            // Select which variable pairs to include in structure factor:
+            s_pairA[0] = 0;
+            s_pairB[0] = 0;
+            s_pairA[1] = 1;
+            s_pairB[1] = 1;
+    
+            structFact.define(ba,dmap,var_names,var_scaling,s_pairA,s_pairB);
+        }
+
+    } else {
+
+        // checkpoint restart
+        Abort("checkpoint read not implemented yet");
+        
+    }
+
+    Real dt;
+    if (fixed_dt > 0.) {
+        dt = fixed_dt;
+        Print() << "Setting dt using fixed_dt = " << dt << std::endl;
+    } else {
+        Real D_Fick_max = 0.;
+        for (int i=0; i<nspecies; ++i ) {
+            D_Fick_max = std::max(D_Fick_max,D_Fick[i]);
+        }
+        Real dx_min = dx[0];
+        for (int i=1; i<AMREX_SPACEDIM; ++i) {
+            dx_min = std::min(dx_min,dx[i]);
+        }
+        dt = cfl * dx_min / (2. * AMREX_SPACEDIM * D_Fick_max);
+        Print() << "Setting dt using explicit diffusion cfl condition = " << dt << std::endl;
+    }
+
+    if (inhomogeneous_bc_fix == 1 && temporal_integrator > 0) {
+        Abort("comput_n_steady not supported for inhomogeneous_bc_fix == 1 && temporal_integrator > 0 yet");
+        // compute_n_steady()
+    }
+
+    if (temporal_integrator < 0) { // unsplit schemes
+        // Donev: The code will work for a single cell also but may not be the most efficient, so issue warning:
+        if (n_cells[0] == 1 && n_cells[1] == 1) {
+            Print() << "WARNING in advance_reaction_diffusion: use splitting based schemes (temporal_integrator>=0) for single cell" << std::endl;
+        }
+        if (nreaction < 1) {
+            Print() << "WARNING in advance_reaction_diffusion: use splitting based schemes (temporal_integrator>=0) for diffusion only" << std::endl;
+        }
+    }
+    
+    int istep = (restart < 0) ? 0 : restart;
+    WritePlotFile(istep,time,geom,n_old);
+
+    ///////////////////////////////////////////
+    
+    // Create output file for averaged density
+    std::ofstream outputFile("averagedDensity.txt");
+    outputFile << "time ";
+
+    for (int comp = 0; comp < nspecies; ++comp) {
+        outputFile << "comp_" << comp << " ";
+    }
+    outputFile << std::endl;
+
+    // time step loop
+    for(int step=step_start;step<=max_step;++step) {
+
+        // store the current time so we can later compute total run time.
+        Real step_strt_time = ParallelDescriptor::second();
+        
+        AdvanceTimestep(n_old,n_new,dt,time,geom);
+
+        time += dt;
+        MultiFab::Copy(n_old,n_new,0,0,nspecies,1);
+
+        outputFile << std::setprecision(12) << time << " ";
+        amrex::Print() << "time = " << time << " n_avg = ";
+
+        // Compute average n for each species, print to file?
+        for (int comp = 0; comp < nspecies; ++comp) {
+
+            amrex::Real n_sum = n_old.sum(comp);
+            amrex::Real n_avg = n_sum / cell_count;
+            amrex::Print() << n_avg << " ";
+            outputFile << std::setprecision(15) << n_avg << " ";
+        }
+        amrex::Print() << std::endl;
+        outputFile << std::endl;
+
+        // Call the timer again and compute the maximum difference between the start time
+        // and stop time over all processors
+        Real step_stop_time = ParallelDescriptor::second() - step_strt_time;
+        ParallelDescriptor::ReduceRealMax(step_stop_time);
+        amrex::Print() << "Time step " << step << " complted in " << step_stop_time << " seconds\n";
+
+        // add a snapshot to the structure factor
+	if (step > n_steps_skip && struct_fact_int > 0 && (step-n_steps_skip)%struct_fact_int == 0) {
+
+            // add this snapshot to the average in the structure factor
+            structFact.FortStructure(n_new);
+
+        }
+        
+        if (plot_int > 0 && step%plot_int == 0) {
+
+            WritePlotFile(step,time,geom,n_new);
+
+            // write out structure factor to plotfile
+            if (step > n_steps_skip && struct_fact_int > 0) {
+                structFact.WritePlotFile(step,time,geom,"plt_SF");
+            }
+        }
+
+        if (chk_int > 0 && step%chk_int == 0) {
+            Abort("checkpoint write not implemented yet");
+        }
+        
+        // MultiFab memory usage
+        const int IOProc = ParallelDescriptor::IOProcessorNumber();
+
+        amrex::Long min_fab_megabytes  = amrex::TotalBytesAllocatedInFabsHWM()/1048576;
+        amrex::Long max_fab_megabytes  = min_fab_megabytes;
+
+        ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc);
+        ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc);
+
+        amrex::Print() << "High-water FAB megabyte spread across MPI nodes: ["
+                       << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n";
+
+        min_fab_megabytes  = amrex::TotalBytesAllocatedInFabs()/1048576;
+        max_fab_megabytes  = min_fab_megabytes;
+
+        ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc);
+        ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc);
+
+        amrex::Print() << "Curent     FAB megabyte spread across MPI nodes: ["
+                       << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n";
+        
+    }
+
+    outputFile.close();
+
+    // Call the timer again and compute the maximum difference between the start time
+    // and stop time over all processors
+    Real stop_time = ParallelDescriptor::second() - strt_time;
+    ParallelDescriptor::ReduceRealMax(stop_time);
+    amrex::Print() << "Run time = " << stop_time << std::endl;
+
+}
diff --git a/src_reactDiff/reactDiff_functions.H b/src_reactDiff/reactDiff_functions.H
new file mode 100644
index 000000000..b3ede1f01
--- /dev/null
+++ b/src_reactDiff/reactDiff_functions.H
@@ -0,0 +1,132 @@
+#ifndef _reactdiff_functions_H_
+#define _reactdiff_functions_H_
+
+#include <AMReX.H>
+
+#include "common_functions.H"
+#include "common_namespace.H"
+#include "reactDiff_namespace.H"
+
+using namespace reactDiff;
+using namespace amrex;
+using namespace common;
+
+////////////////////////
+// In reactDiff_functions.cpp
+////////////////////////
+void InitializeReactDiffNamespace();
+
+////////////////////////
+// In AdvanceDiffusion.cpp
+////////////////////////
+void AdvanceDiffusion(MultiFab& n_old,
+                      MultiFab& n_new,
+                      const MultiFab& ext_src,
+                      const Real& dt,
+                      const Real& time,
+                      const Geometry& geom);
+
+void GenerateStochasticFluxdivCorrector(MultiFab& n_old,
+                                        MultiFab& n_new,
+                                        MultiFab& stoch_fluxdiv,
+                                        const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                                        const Real& dt,
+                                        const Real& time,
+                                        const Geometry& geom);
+
+////////////////////////
+// In AdvanceReaction.cpp
+////////////////////////
+void AdvanceReaction(MultiFab& n_old,
+                     MultiFab& n_new,
+                     const MultiFab& ext_src,
+                     const Real& dt,
+                     const Real& time,
+                     const Geometry& geom);
+
+////////////////////////
+// In AdvanceReactionDiffusion.cpp
+////////////////////////
+void AdvanceReactionDiffusion(MultiFab& n_old,
+                              MultiFab& n_new,
+                              const MultiFab& ext_src,
+                              const Real& dt,
+                              const Real& time,
+                              const Geometry& geom);
+
+////////////////////////
+// In AdvanceTimestep.cpp
+////////////////////////
+void AdvanceTimestep(MultiFab& n_old,
+                     MultiFab& n_new,
+                     const Real& dt,
+                     const Real& time,
+                     const Geometry& geom);
+
+////////////////////////
+// In ImplicitDiffusion.cpp
+////////////////////////
+void ImplicitDiffusion(MultiFab& n_old,
+                       MultiFab& n_new,
+                       const MultiFab& rhs,
+                       const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                       const Geometry& geom,
+                       const Real& dt_fac,
+                       const Real& time);
+
+////////////////////////
+// In DiffusiveNFluxdiv.cpp
+////////////////////////
+void DiffusiveNFluxdiv(MultiFab& n_in,
+                       MultiFab& diff_fluxdiv,
+                       const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                       const Geometry& geom,
+                       const Real& time);
+
+////////////////////////
+// In InitN.cpp
+////////////////////////
+void InitN(MultiFab& n_in,
+           const Geometry& geom,
+           const Real& time);
+
+////////////////////////
+// In MultinomialDiffusion.cpp
+////////////////////////
+void MultinomialDiffusion(MultiFab& n_old,
+                          MultiFab& n_new,
+                          const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                          const Geometry& geom,
+                          const Real& dt,
+                          const Real& time);
+
+AMREX_GPU_HOST_DEVICE void multinomial_rng(GpuArray<Real,2*AMREX_SPACEDIM>& samples,
+                                           const int& N,
+                                           GpuArray<Real,2*AMREX_SPACEDIM>& p,
+                                           const amrex::RandomEngine& engine);
+
+////////////////////////
+// In StochasticNFluxdiv.cpp
+////////////////////////
+void StochasticNFluxdiv(MultiFab& n_in,
+                        MultiFab& stoch_fluxdiv,
+                        const std::array< MultiFab, AMREX_SPACEDIM >& diff_coef_face,
+                        const Geometry& geom,
+                        const Real& dt,
+                        const Real& time,
+                        int increment_div=0);
+
+AMREX_GPU_HOST_DEVICE Real average_to_faces(const Real& value1,
+                                            const Real& value2,
+                                            const Real& dv);
+
+////////////////////////
+// In WritePlotFile.cpp
+////////////////////////
+
+void WritePlotFile(int step,
+                   const amrex::Real time,
+                   const amrex::Geometry& geom,
+		   const MultiFab& n_in);
+
+#endif
diff --git a/src_reactDiff/reactDiff_functions.cpp b/src_reactDiff/reactDiff_functions.cpp
new file mode 100644
index 000000000..4acb3f7d9
--- /dev/null
+++ b/src_reactDiff/reactDiff_functions.cpp
@@ -0,0 +1,144 @@
+#include "reactDiff_functions.H"
+#include "AMReX_ParmParse.H"
+
+// 0=D + R (first-order splitting)
+// 1=(1/2)R + D + (1/2)R (Strang option 1)
+// 2=(1/2)D + R + (1/2)D (Strang option 2)
+// -1=unsplit forward Euler
+// -2=unsplit explicit midpoint 
+// -3=unsplit multinomial diffusion
+// -4=unsplit implicit midpoint
+AMREX_GPU_MANAGED int reactDiff::temporal_integrator;
+
+// only used for split schemes (temporal_integrator>=0)
+// 0=explicit trapezoidal predictor/corrector
+// 1=Crank-Nicolson semi-implicit
+// 2=explicit midpoint
+// 3=multinomial diffusion
+// 4=forward Euler  
+AMREX_GPU_MANAGED int reactDiff::reactDiff_diffusion_type;
+
+// only used for split schemes (temporal_integrator>=0)
+// 0=first-order (deterministic, tau leaping, CLE, or SSA)
+// 1=second-order (determinisitc, tau leaping, or CLE only)
+AMREX_GPU_MANAGED int reactDiff::reactDiff_reaction_type;
+
+// only used for midpoint diffusion schemes (split as well as unsplit)
+// corrector formulation of noise
+// 1 = K(nold) * W1 + K(nold)         * W2
+// 2 = K(nold) * W1 + K(npred)        * W2
+// 3 = K(nold) * W1 + K(2*npred-nold) * W2
+AMREX_GPU_MANAGED int reactDiff::midpoint_stoch_flux_type;
+
+// how to compute n on faces for stochastic weighting
+// 1=arithmetic (with C0-Heaviside), 2=geometric, 3=harmonic
+// 10=arithmetic average with discontinuous Heaviside function
+// 11=arithmetic average with C1-smoothed Heaviside function
+// 12=arithmetic average with C2-smoothed Heaviside function
+AMREX_GPU_MANAGED int reactDiff::avg_type;
+
+// use the Einkemmer boundary condition fix (split schemes only)
+AMREX_GPU_MANAGED int reactDiff::inhomogeneous_bc_fix;
+
+// volume multiplier (dv = product(dx(1:MAX_SPACEDIM))*volume_factor)
+// only really intended for 3D since in 2D one can control the cell depth
+AMREX_GPU_MANAGED amrex::Real reactDiff::volume_factor;
+
+// initial values to be used in init_n.f90
+AMREX_GPU_MANAGED Array2D<amrex::Real, 0, 2 ,0, MAX_SPECIES> reactDiff::n_init_in;
+
+// initialize from model file
+AMREX_GPU_MANAGED int reactDiff::model_file_init;
+
+// initialize with all number of molecules strictly integer
+AMREX_GPU_MANAGED int reactDiff::integer_populations;
+
+// Fickian diffusion coeffs
+AMREX_GPU_MANAGED amrex::GpuArray<amrex::Real, MAX_SPECIES> reactDiff::D_Fick;
+
+// diffusion boundary stencil order
+AMREX_GPU_MANAGED int reactDiff::diffusion_stencil_order;
+
+// implicit diffusion solve verbosity
+AMREX_GPU_MANAGED int reactDiff::diffusion_verbose;
+
+// implicit diffusion solve bottom solver verbosity
+AMREX_GPU_MANAGED int reactDiff::diffusion_bottom_verbose;
+
+// relative eps for implicit diffusion solve
+AMREX_GPU_MANAGED amrex::Real reactDiff::implicit_diffusion_rel_eps;
+
+// absolute eps for implicit diffusion solve
+AMREX_GPU_MANAGED amrex::Real reactDiff::implicit_diffusion_abs_eps;
+
+void InitializeReactDiffNamespace()
+{
+    // extract inputs parameters
+    ParmParse pp;
+
+    int temp_max = std::max(MAX_SPECIES,MAX_REACTION);
+    
+    amrex::Vector<amrex::Real> temp    (temp_max,0.);
+    amrex::Vector<int>         temp_int(temp_max,0 );
+    
+    temporal_integrator = 0;
+    pp.query("temporal_integrator",temporal_integrator);
+
+    reactDiff_diffusion_type = 0;
+    pp.query("reactDiff_diffusion_type",reactDiff_diffusion_type);
+
+    reactDiff_reaction_type = 0;
+    pp.query("reactDiff_reaction_type",reactDiff_reaction_type);
+    
+    midpoint_stoch_flux_type = 1;
+    pp.query("midpoint_stoch_flux_type",midpoint_stoch_flux_type);
+
+    avg_type = 1;
+    pp.query("avg_type",avg_type);
+
+    inhomogeneous_bc_fix = 0;
+    pp.query("inhomogeneous_bc_fix",inhomogeneous_bc_fix);
+
+    volume_factor = 1.;
+    pp.query("volume_factor",volume_factor);
+
+    if (pp.queryarr("n_init_in_1",temp)) {
+        for (int i=0; i<nspecies; ++i) {
+            n_init_in(0,i) = temp[i];
+        }
+    }
+    if (pp.queryarr("n_init_in_2",temp)) {
+        for (int i=0; i<nspecies; ++i) {
+            n_init_in(1,i) = temp[i];
+        }
+    }
+
+    model_file_init = 0;
+    pp.query("model_file_init",model_file_init);
+
+    integer_populations = 0;
+    pp.query("integer_populations",integer_populations);
+
+    if (pp.queryarr("D_Fick",temp)) {
+        for (int i=0; i<nspecies; ++i) {
+            D_Fick[i] = temp[i];
+        }
+    }
+
+    diffusion_stencil_order = 1;
+    pp.query("diffusion_stencil_order",diffusion_stencil_order);
+
+    diffusion_verbose = 0;
+    pp.query("diffusion_verbose",diffusion_verbose);
+
+    diffusion_bottom_verbose = 0;
+    pp.query("diffusion_bottom_verbose",diffusion_bottom_verbose);
+
+    implicit_diffusion_rel_eps = 1.e-10;
+    pp.query("implicit_diffusion_rel_eps",implicit_diffusion_rel_eps);
+
+    implicit_diffusion_abs_eps = -1.;
+    pp.query("implicit_diffusion_abs_eps",implicit_diffusion_abs_eps);
+    
+    return;
+}
diff --git a/src_reactDiff/reactDiff_namespace.H b/src_reactDiff/reactDiff_namespace.H
new file mode 100644
index 000000000..99d4668a1
--- /dev/null
+++ b/src_reactDiff/reactDiff_namespace.H
@@ -0,0 +1,20 @@
+namespace reactDiff {
+
+    // see reactDiff_functions.cpp for descriptions and default values
+    extern AMREX_GPU_MANAGED int temporal_integrator;
+    extern AMREX_GPU_MANAGED int reactDiff_diffusion_type;
+    extern AMREX_GPU_MANAGED int reactDiff_reaction_type;
+    extern AMREX_GPU_MANAGED int midpoint_stoch_flux_type;
+    extern AMREX_GPU_MANAGED int avg_type;
+    extern AMREX_GPU_MANAGED int inhomogeneous_bc_fix;
+    extern AMREX_GPU_MANAGED amrex::Real volume_factor;
+    extern AMREX_GPU_MANAGED Array2D<amrex::Real, 0, 2 ,0, MAX_SPECIES> n_init_in;
+    extern AMREX_GPU_MANAGED int model_file_init;
+    extern AMREX_GPU_MANAGED int integer_populations;
+    extern AMREX_GPU_MANAGED amrex::GpuArray<amrex::Real, MAX_SPECIES> D_Fick;
+    extern AMREX_GPU_MANAGED int diffusion_stencil_order;
+    extern AMREX_GPU_MANAGED int diffusion_verbose;
+    extern AMREX_GPU_MANAGED int diffusion_bottom_verbose;
+    extern AMREX_GPU_MANAGED amrex::Real implicit_diffusion_rel_eps;
+    extern AMREX_GPU_MANAGED amrex::Real implicit_diffusion_abs_eps;
+}
diff --git a/unmaintained/channel_soft/main_driver.cpp b/unmaintained/channel_soft/main_driver.cpp
index 662d05927..7cb46cf18 100644
--- a/unmaintained/channel_soft/main_driver.cpp
+++ b/unmaintained/channel_soft/main_driver.cpp
@@ -571,7 +571,7 @@ void main_driver(const char * argv) {
             for(int d=0; d<AMREX_SPACEDIM; d++)
                 ShiftFaceToCC(umac[d], 0, struct_in_cc, d, 1);
           //  Have to comment this out for now
-          //  structFact.FortStructure(struct_in_cc,geom);
+          //  structFact.FortStructure(struct_in_cc);
         }
 
         Real step_stop_time = ParallelDescriptor::second() - step_strt_time;
diff --git a/unmaintained/exercises/compressible/main_driver.cpp b/unmaintained/exercises/compressible/main_driver.cpp
index 436e0baf6..3b7cbea00 100644
--- a/unmaintained/exercises/compressible/main_driver.cpp
+++ b/unmaintained/exercises/compressible/main_driver.cpp
@@ -412,7 +412,7 @@ void main_driver(const char* argv)
 
     if(project_dir >= 0){
       cu.setVal(0.0);
-      ComputeVerticalAverage(cu, cuVertAvg, geom, project_dir, 0, nvars);
+      ComputeVerticalAverage(cu, cuVertAvg, project_dir, 0, nvars);
       BoxArray ba_flat = cuVertAvg.boxArray();
       const DistributionMapping& dmap_flat = cuVertAvg.DistributionMap();
       {
@@ -536,10 +536,10 @@ void main_driver(const char* argv)
 //	// collect a snapshot for structure factor
 //	if (step > n_steps_skip && struct_fact_int > 0 && (step-n_steps_skip)%struct_fact_int == 0) {
 //            MultiFab::Copy(struct_in_cc, cu, 0, 0, nvar_sf, 0);
-//            structFact.FortStructure(struct_in_cc,geom);
+//            structFact.FortStructure(struct_in_cc);
 //            if(project_dir >= 0) {
-//                ComputeVerticalAverage(cu, cuVertAvg, geom, project_dir, 0, nvars);
-//                structFactVA.FortStructure(cuVertAvg,geom_flat);
+//                ComputeVerticalAverage(cu, cuVertAvg, project_dir, 0, nvars);
+//                structFactVA.FortStructure(cuVertAvg);
 //            }
 //        }
 
diff --git a/unmaintained/exercises/incompressible/main_driver.cpp b/unmaintained/exercises/incompressible/main_driver.cpp
index b273e1fcc..48f3b9cd3 100644
--- a/unmaintained/exercises/incompressible/main_driver.cpp
+++ b/unmaintained/exercises/incompressible/main_driver.cpp
@@ -387,7 +387,7 @@ void main_driver(const char* argv)
 	  for(int d=0; d<AMREX_SPACEDIM; d++) {
 	    ShiftFaceToCC(umac[d], 0, struct_in_cc, d, 1);
 	  }
-	  structFact.FortStructure(struct_in_cc,geom);
+	  structFact.FortStructure(struct_in_cc);
         }
 	*/