From f37579e5c3502985fb190c21cefaff5e49bc9b84 Mon Sep 17 00:00:00 2001
From: Daniel Arndt <arndtd@ornl.gov>
Date: Thu, 17 Jun 2021 16:02:09 +0000
Subject: [PATCH 01/15] Tpetra: Fix compiling and running with SYCL and MPI

---
 .../tpetra/core/src/Tpetra_Distributor.hpp    | 17 ++++++++++
 .../core/test/Distributor/Issue1454.cpp       |  7 +++-
 .../test/PerformanceCGSolve/cg_solve_file.cpp | 32 ++++++++++++++++++-
 3 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/packages/tpetra/core/src/Tpetra_Distributor.hpp b/packages/tpetra/core/src/Tpetra_Distributor.hpp
index b88a55274f7b..2f478336c834 100644
--- a/packages/tpetra/core/src/Tpetra_Distributor.hpp
+++ b/packages/tpetra/core/src/Tpetra_Distributor.hpp
@@ -2122,6 +2122,15 @@ namespace Tpetra {
        "See Trilinos GitHub issue #1088.");
 #endif // KOKKOS_ENABLE_CUDA
 
+#ifdef KOKKOS_ENABLE_SYCL
+    static_assert
+      (! std::is_same<typename ExpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value &&
+       ! std::is_same<typename ImpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value,
+       "Please do not use Tpetra::Distributor with SharedUSM allocations.  "
+       "See Trilinos GitHub issue #1088 (corresponding to CUDA).");
+#endif // KOKKOS_ENABLE_SYCL
+
+
 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
     Teuchos::TimeMonitor timeMon (*timer_doPosts3KV_);
 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
@@ -2565,6 +2574,14 @@ namespace Tpetra {
                    "allocations.  See GitHub issue #1088.");
 #endif // KOKKOS_ENABLE_CUDA
 
+#ifdef KOKKOS_ENABLE_SYCL
+    static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value &&
+                   ! std::is_same<typename ImpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value,
+                   "Please do not use Tpetra::Distributor with SharedUSM "
+                   "allocations.  See GitHub issue #1088 (corresponding to CUDA).");
+#endif // KOKKOS_ENABLE_SYCL
+
+
     std::unique_ptr<std::string> prefix;
     if (verbose_) {
       prefix = createPrefix("doPosts(4-arg, Kokkos)");
diff --git a/packages/tpetra/core/test/Distributor/Issue1454.cpp b/packages/tpetra/core/test/Distributor/Issue1454.cpp
index 95ff17bfac50..86d73228c426 100644
--- a/packages/tpetra/core/test/Distributor/Issue1454.cpp
+++ b/packages/tpetra/core/test/Distributor/Issue1454.cpp
@@ -98,9 +98,14 @@ TEUCHOS_UNIT_TEST( Distributor, Issue1454 )
   std::is_same<typename device_type::execution_space, Kokkos::Cuda>::value,
     Kokkos::CudaSpace,
     typename device_type::memory_space>::type;
+#elif defined(KOKKOS_ENABLE_SYCL)
+  using buffer_memory_space = typename std::conditional<
+  std::is_same<typename device_type::execution_space, Kokkos::Experimental::SYCL>::value,
+    Kokkos::Experimental::SYCLDeviceUSMSpace,
+    typename device_type::memory_space>::type;
 #else
   using buffer_memory_space = typename device_type::memory_space;
-#endif // KOKKOS_ENABLE_CUDA
+#endif
   using buffer_execution_space = typename device_type::execution_space;
   using buffer_device_type = Kokkos::Device<buffer_execution_space, buffer_memory_space>;
 
diff --git a/packages/tpetra/core/test/PerformanceCGSolve/cg_solve_file.cpp b/packages/tpetra/core/test/PerformanceCGSolve/cg_solve_file.cpp
index d211cbbbc954..a797ee389435 100644
--- a/packages/tpetra/core/test/PerformanceCGSolve/cg_solve_file.cpp
+++ b/packages/tpetra/core/test/PerformanceCGSolve/cg_solve_file.cpp
@@ -306,6 +306,8 @@ int main(int argc, char *argv[]) {
     numgpus = std::atoi(rawKokkosNumDevices);
   int skipgpu = 999;
 
+  bool useSYCL = false;
+  bool useHIP = false;
   bool useCuda = false;
   bool useOpenMP = false;
   bool useThreads = false;
@@ -327,6 +329,12 @@ int main(int argc, char *argv[]) {
   cmdp.setOption("tol_small",&tol_small,"Tolerance for total CG-Time and final residual.");
   cmdp.setOption("tol_large",&tol_large,"Tolerance for individual times.");
   //Only provide the option to use Node types that are actually enabled in this build
+#ifdef HAVE_TPETRA_INST_SYCL
+  cmdp.setOption("sycl","no-sycl",&useSYCL,"Use SYCL node");
+#endif
+  #ifdef HAVE_TPETRA_INST_HIP
+  cmdp.setOption("hip","no-hip",&useHIP,"Use HIP node");
+#endif
 #ifdef HAVE_TPETRA_INST_CUDA
   cmdp.setOption("cuda","no-cuda",&useCuda,"Use Cuda node");
 #endif
@@ -344,8 +352,22 @@ int main(int argc, char *argv[]) {
   }
 
   //If no node type was explicitly requested, use Tpetra's default node
-  if(!useCuda && !useOpenMP && !useThreads && !useSerial)
+  if(!useSYCL && !useHIP && !useCuda && !useOpenMP && !useThreads && !useSerial)
   {
+#ifdef HAVE_TPETRA_INST_SYCL
+    if(std::is_same<default_exec, Kokkos::Experimental::SYCL>::value)
+    {
+      std::cout << "No node specified in command-line args, so using default (SYCL)\n";
+      useSYCL = true;
+    }
+#endif
+#ifdef HAVE_TPETRA_INST_HIP
+    if(std::is_same<default_exec, Kokkos::Experimental::HIP>::value)
+    {
+      std::cout << "No node specified in command-line args, so using default (HIP)\n";
+      useHIP = true;
+    }
+#endif
 #ifdef HAVE_TPETRA_INST_CUDA
     if(std::is_same<default_exec, Kokkos::Cuda>::value)
     {
@@ -389,6 +411,14 @@ int main(int argc, char *argv[]) {
 
   Kokkos::initialize (kokkosArgs);
   {
+#ifdef HAVE_TPETRA_INST_SYCL
+    if(useSYCL)
+      return run<KokkosSYCLWrapperNode>();
+#endif
+#ifdef HAVE_TPETRA_INST_HIP
+    if(useHIP)
+      return run<KokkosHIPWrapperNode>();
+#endif
 #ifdef HAVE_TPETRA_INST_CUDA
     if(useCuda)
       return run<KokkosCudaWrapperNode>();

From 53c2ccec2d88cfb3dcc4dc8b2c2644087ea311db Mon Sep 17 00:00:00 2001
From: Sidafa Conde <sconde@sandia.gov>
Date: Sat, 5 Jun 2021 20:05:40 -0400
Subject: [PATCH 02/15] Tempus: FSA - updated to use IntegratorBasic

- convert IntegratorBasicOld -> IntegratorBasic
- shadow decl warnings
- remove ParameterListAcceptor inherentence
- eliminate ctor not called/used
- non default ctor changes are compiling and running
- remove setParameterList
- remove ParameterList from class
- reorder
- remove uncessary include

ROL: update Tempus integrator

Change IntegratorBasicOld to the new IntegratorBasic class and update
the example parameterlist so that it validates properly

Tempus: FSA - update doxygen
---
 .../tempus/ROL_TempusReducedObjective.hpp     |   6 +-
 packages/rol/example/tempus/example_01.cpp    |   4 +-
 .../rol/example/tempus/example_parabolic.xml  |   6 +-
 .../tempus/example_parabolic_modeleval.cpp    |   4 +-
 ...le_parabolic_modeleval_forward-adjoint.cpp |   6 +-
 .../rol/example/tempus/example_sincos.xml     |   6 +-
 .../Tempus_IntegratorForwardSensitivity.cpp   |   6 -
 ...mpus_IntegratorForwardSensitivity_decl.hpp | 106 +++++----
 ...mpus_IntegratorForwardSensitivity_impl.hpp | 202 ++++++------------
 ...tepperStaggeredForwardSensitivity_impl.hpp |   1 +
 packages/tempus/test/BDF2/Tempus_BDF2Test.cpp |   4 +-
 11 files changed, 136 insertions(+), 215 deletions(-)

diff --git a/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp b/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
index c75113db5fd3..3ae9d971631a 100644
--- a/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
+++ b/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
@@ -49,7 +49,7 @@
 #include "Teuchos_RCP.hpp"
 #include "Teuchos_ParameterList.hpp"
 
-#include "Tempus_IntegratorBasicOld.hpp"
+#include "Tempus_IntegratorBasic.hpp"
 #include "Tempus_IntegratorForwardSensitivity.hpp"
 #include "Tempus_IntegratorAdjointSensitivity.hpp"
 #include "Tempus_IntegratorPseudoTransientForwardSensitivity.hpp"
@@ -425,8 +425,8 @@ run_tempus(const Thyra::ModelEvaluatorBase::InArgs<Real>&  inArgs,
       << " and Pseudotransient Adjoint.");
   }
   else {
-    RCP<Tempus::IntegratorBasicOld<Real> > integrator =
-      Tempus::integratorBasic<Real>(tempus_params_, wrapped_model);
+    RCP<Tempus::IntegratorBasic<Real> > integrator =
+      Tempus::createIntegratorBasic<Real>(tempus_params_, wrapped_model);
     const bool integratorStatus = integrator->advanceTime();
     TEUCHOS_TEST_FOR_EXCEPTION(
       !integratorStatus, std::logic_error, "Integrator failed!");
diff --git a/packages/rol/example/tempus/example_01.cpp b/packages/rol/example/tempus/example_01.cpp
index 4fe5182d5f70..a5b4e03118b4 100644
--- a/packages/rol/example/tempus/example_01.cpp
+++ b/packages/rol/example/tempus/example_01.cpp
@@ -52,7 +52,7 @@
 #include "Teuchos_GlobalMPISession.hpp"
 #include "Teuchos_XMLParameterListHelpers.hpp"
 
-#include "Tempus_IntegratorBasicOld.hpp"
+#include "Tempus_IntegratorBasic.hpp"
 
 #include <fstream>
 
@@ -96,7 +96,7 @@ int main(int argc, char *argv[]) {
     RCP<Teuchos::ParameterList> tempusParList = sublist(parList, "Tempus", true);
     RCP<SinCosModelEvaluator<RealT>> model = Teuchos::rcp(new SinCosModelEvaluator<RealT>());
 
-    RCP<Tempus::IntegratorBasicOld<RealT> > integrator = Tempus::integratorBasic<RealT>(tempusParList, model);
+    RCP<Tempus::IntegratorBasic<RealT> > integrator = Tempus::createIntegratorBasic<RealT>(tempusParList, model);
 
     integrator->advanceTime();
 
diff --git a/packages/rol/example/tempus/example_parabolic.xml b/packages/rol/example/tempus/example_parabolic.xml
index 57d8116ec2c0..199c40ccdd85 100644
--- a/packages/rol/example/tempus/example_parabolic.xml
+++ b/packages/rol/example/tempus/example_parabolic.xml
@@ -70,7 +70,7 @@
       <Parameter name="Initial Condition Consistency Check" type="bool" value="false"/>
       <Parameter name="Zero Initial Guess" type="bool" value="0"/>
       <Parameter name="Solver Name"    type="string" value="Default Solver"/>
-      <Parameter name="Predictor Name" type="string" value="Default Predictor"/>
+      <Parameter name="Predictor Stepper Type" type="string" value="Forward Euler"/>
 
       <ParameterList name="Default Solver">
         <ParameterList name="NOX">
@@ -122,10 +122,6 @@
         </ParameterList>
       </ParameterList>
 
-      <ParameterList name="Default Predictor">
-        <Parameter name="Stepper Type" type="string" value="Forward Euler"/>
-      </ParameterList>
-
     </ParameterList>
 
   </ParameterList>
diff --git a/packages/rol/example/tempus/example_parabolic_modeleval.cpp b/packages/rol/example/tempus/example_parabolic_modeleval.cpp
index 6eeae32e4506..622a8624f0ff 100644
--- a/packages/rol/example/tempus/example_parabolic_modeleval.cpp
+++ b/packages/rol/example/tempus/example_parabolic_modeleval.cpp
@@ -66,7 +66,7 @@
 
 #include "Teuchos_GlobalMPISession.hpp"
 
-#include "Tempus_IntegratorBasicOld.hpp"
+#include "Tempus_IntegratorBasic.hpp"
 
 #include "ROL_Stream.hpp"
 #include "ROL_ParameterList.hpp"
@@ -114,7 +114,7 @@ int main(int argc, char *argv[]) {
 
     // Create Tempus Integrator from ModelEvaluator.
     ROL::Ptr<Tempus::Integrator<RealT>> integrator =
-      ROL::makePtr<Tempus::IntegratorBasicOld<RealT>>(ROL::makePtrFromRef(pl_tempus), meval);
+      Tempus::createIntegratorBasic<RealT>(ROL::makePtrFromRef(pl_tempus), meval);
 
     // Initialize objective function.
     ROL::Ptr<ROL::DynamicObjective<RealT>> dyn_obj =
diff --git a/packages/rol/example/tempus/example_parabolic_modeleval_forward-adjoint.cpp b/packages/rol/example/tempus/example_parabolic_modeleval_forward-adjoint.cpp
index eb9e25aeb6af..d7c7e1b972b6 100644
--- a/packages/rol/example/tempus/example_parabolic_modeleval_forward-adjoint.cpp
+++ b/packages/rol/example/tempus/example_parabolic_modeleval_forward-adjoint.cpp
@@ -66,7 +66,7 @@
 
 #include "Teuchos_GlobalMPISession.hpp"
 
-#include "Tempus_IntegratorBasicOld.hpp"
+#include "Tempus_IntegratorBasic.hpp"
 
 #include "ROL_Stream.hpp"
 #include "ROL_ParameterList.hpp"
@@ -118,11 +118,11 @@ int main(int argc, char *argv[]) {
 
     // Create FORWARD Tempus Integrator from ModelEvaluator.
     ROL::Ptr<Tempus::Integrator<RealT>> forward_integrator =
-      ROL::makePtr<Tempus::IntegratorBasicOld<RealT>>(ROL::makePtrFromRef(pl_tempus), forward_meval);
+      Tempus::createIntegratorBasic<RealT>(ROL::makePtrFromRef(pl_tempus), forward_meval);
 
     // Create ADJOINT Tempus Integrator from ModelEvaluator.
     ROL::Ptr<Tempus::Integrator<RealT>> adjoint_integrator =
-      ROL::makePtr<Tempus::IntegratorBasicOld<RealT>>(ROL::makePtrFromRef(pl_tempus), adjoint_meval);
+      Tempus::createIntegratorBasic<RealT>(ROL::makePtrFromRef(pl_tempus), adjoint_meval);
 
     // Initialize objective function.
     ROL::Ptr<ROL::DynamicObjective<RealT>> dyn_obj =
diff --git a/packages/rol/example/tempus/example_sincos.xml b/packages/rol/example/tempus/example_sincos.xml
index 2a6dc0cec8cc..8777e117ca47 100644
--- a/packages/rol/example/tempus/example_sincos.xml
+++ b/packages/rol/example/tempus/example_sincos.xml
@@ -54,7 +54,7 @@
       <Parameter name="Stepper Type"   type="string" value="Backward Euler"/>
       <Parameter name="Zero Initial Guess" type="bool" value="0"/>
       <Parameter name="Solver Name"    type="string" value="Default Solver"/>
-      <Parameter name="Predictor Name" type="string" value="Default Predictor"/>
+      <Parameter name="Predictor Stepper Type" type="string" value="Forward Euler"/>
 
       <ParameterList name="Default Solver">
         <ParameterList name="NOX">
@@ -106,10 +106,6 @@
         </ParameterList>
       </ParameterList>
 
-      <ParameterList name="Default Predictor">
-        <Parameter name="Stepper Type" type="string" value="Forward Euler"/>
-      </ParameterList>
-
     </ParameterList>
 
   </ParameterList>
diff --git a/packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp b/packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp
index 8e0f3a71fb16..aac1afe13dc8 100644
--- a/packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp
+++ b/packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp
@@ -22,12 +22,6 @@ namespace Tempus {
     Teuchos::RCP<Teuchos::ParameterList>        parameterList,
     const Teuchos::RCP<Thyra::ModelEvaluator<double> >& model);
 
-  // Nonmember ctor
-  template Teuchos::RCP<IntegratorForwardSensitivity<double> >
-  integratorForwardSensitivity(
-    const Teuchos::RCP<Thyra::ModelEvaluator<double> >& model,
-    std::string stepperType);
-
   // Nonmember ctor
   template Teuchos::RCP<IntegratorForwardSensitivity<double> >
   integratorForwardSensitivity();
diff --git a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
index 492d66d96246..2c8440adde6e 100644
--- a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
@@ -11,7 +11,7 @@
 
 // Tempus
 #include "Tempus_config.hpp"
-#include "Tempus_IntegratorBasicOld.hpp"
+#include "Tempus_IntegratorBasic.hpp"
 #include "Tempus_SensitivityModelEvaluatorBase.hpp"
 #include "Tempus_StepperStaggeredForwardSensitivity.hpp"
 
@@ -37,7 +37,7 @@ namespace Tempus {
  * </ul>
  *
  * Note that this integrator implements all of the same functions as the
- * IntegratorBasicOld, but is not derived from IntegratorBasicOld.  It also provides
+ * IntegratorBasic, but is not derived from IntegratorBasic.  It also provides
  * functions for setting the sensitivity initial conditions and extracting the
  * sensitivity at the final time.  Also the vectors stored in the solution
  * history store product vectors of the state and sensitivities using
@@ -45,14 +45,19 @@ namespace Tempus {
  */
 template<class Scalar>
 class IntegratorForwardSensitivity
-  : virtual public Tempus::Integrator<Scalar>,
-    virtual public Teuchos::ParameterListAcceptor
+  : virtual public Tempus::Integrator<Scalar>
 {
 public:
 
-  /** \brief Constructor with ParameterList and model, and will be fully
-   * initialized. */
-  /*!
+  /** \brief Full Constructor with model, and will be fully initialized. 
+   *
+   * \param[in] model               The forward physics ModelEvaluator
+   * \param[in] integrator          Forward state Integrator
+   * \param[in] sens_model          The sensitivity ModelEvaluator
+   * \param[in] sens_stepper        Tempus stepper for the sensitivity integration
+   * \param[in] use_combined_method Indicates whether or not to use the
+   *                                "Combined" sensitivity method
+   *
    * In addition to all of the regular integrator options, the supplied
    * parameter list supports the following options contained within a sublist
    * "Sensitivities" from the top-level parameter list:
@@ -83,16 +88,14 @@ class IntegratorForwardSensitivity
    * </ul>
    */
   IntegratorForwardSensitivity(
-    Teuchos::RCP<Teuchos::ParameterList>                pList,
-    const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model);
-
-  /** \brief Constructor with model and "Stepper Type" and is fully initialized with default settings. */
-  IntegratorForwardSensitivity(
-    const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model,
-    std::string stepperType);
+      const Teuchos::RCP<Thyra::ModelEvaluator<Scalar>> &model,
+      const Teuchos::RCP<IntegratorBasic<Scalar>> &integrator,
+      const Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar>> &sens_model,
+      const Teuchos::RCP<StepperStaggeredForwardSensitivity<Scalar>> &sens_stepper,
+      const bool use_combined_method);
 
   /// Destructor
-  /** \brief Constructor that requires a subsequent setParameterList, setStepper, and initialize calls. */
+  /** \brief Constructor that requires a subsequent setStepper, and initialize calls. */
   IntegratorForwardSensitivity();
 
   /// Destructor
@@ -148,8 +151,8 @@ class IntegratorForwardSensitivity
   virtual void setStepper(Teuchos::RCP<Thyra::ModelEvaluator<Scalar> > model);
 
   /// Set the Stepper
-  virtual void setStepperWStepper(Teuchos::RCP<Stepper<Scalar> > stepper)
-    { integrator_->setStepperWStepper(stepper); }
+  virtual void setStepper(Teuchos::RCP<Stepper<Scalar> > stepper)
+    { integrator_->setStepper(stepper); }
   /// Set the initial state which has the initial conditions
   virtual void initializeSolutionHistory(
     Teuchos::RCP<SolutionState<Scalar> > state = Teuchos::null)
@@ -217,18 +220,6 @@ class IntegratorForwardSensitivity
   /// Parse when screen output should be executed
   void parseScreenOutput() { integrator_->parseScreenOutput(); }
 
-  /// \name Overridden from Teuchos::ParameterListAcceptor
-  //@{
-    void setParameterList(const Teuchos::RCP<Teuchos::ParameterList> & pl)
-      override;
-    Teuchos::RCP<Teuchos::ParameterList> getNonconstParameterList() override
-      { return tempus_pl_; }
-    Teuchos::RCP<Teuchos::ParameterList> unsetParameterList() override;
-
-    Teuchos::RCP<const Teuchos::ParameterList> getValidParameters()
-      const override;
-  //@}
-
   /// \name Overridden from Teuchos::Describable
   //@{
     std::string description() const override;
@@ -237,39 +228,44 @@ class IntegratorForwardSensitivity
   //@}
 
 protected:
-
-  // Create sensitivity model evaluator from application model
-  void
-  createSensitivityModelAndStepper(
-    const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model);
-
-  Teuchos::RCP<Thyra::ModelEvaluator<Scalar> > model_;
-  Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar> > sens_model_;
-  Teuchos::RCP<StepperStaggeredForwardSensitivity<Scalar> > sens_stepper_;
-  Teuchos::RCP<IntegratorBasicOld<Scalar> > integrator_;
-  Teuchos::RCP<Teuchos::ParameterList> tempus_pl_;
-  Teuchos::RCP<Teuchos::ParameterList> sens_pl_;
-  Teuchos::RCP<Teuchos::ParameterList> stepper_pl_;
+  Teuchos::RCP<Thyra::ModelEvaluator<Scalar>> model_;
+  Teuchos::RCP<IntegratorBasic<Scalar>> integrator_;
+  Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar>> sens_model_;
+  Teuchos::RCP<StepperStaggeredForwardSensitivity<Scalar>> sens_stepper_;
   bool use_combined_method_;
 };
 
 /// Nonmember constructor
-template<class Scalar>
-Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
-integratorForwardSensitivity(
-  Teuchos::RCP<Teuchos::ParameterList>                pList,
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model);
-
-/// Nonmember constructor
-template<class Scalar>
-Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
+/**
+ * @brief Nonmember constructor
+ *
+ * This nonmember constructor calls parses the `pList` provided to constructor
+ * the sub-objects needed to call the full `IntegratorForwardSensitivity`
+ * construtor
+ *
+ * @param pList  ParameterList defining the integrator options and options
+ *               defining the sensitivity analysis
+ * @param model  ModelEvaluator for the problem
+ *
+ * @return Time integrator implementing forward sensitivity
+ */
+template <class Scalar>
+Teuchos::RCP<IntegratorForwardSensitivity<Scalar>>
 integratorForwardSensitivity(
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model,
-  std::string stepperType);
+    Teuchos::RCP<Teuchos::ParameterList> pList,
+    const Teuchos::RCP<Thyra::ModelEvaluator<Scalar>> &model);
 
 /// Nonmember constructor
-template<class Scalar>
-Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
+/**
+ * @brief Default non-member constructor
+ *
+ * This nonmember constructor creates default state and sensitivity time
+ * integrator.
+ *
+ * @return Time integrator implementing forward sensitivity
+ */
+template <class Scalar>
+Teuchos::RCP<IntegratorForwardSensitivity<Scalar>>
 integratorForwardSensitivity();
 
 } // namespace Tempus
diff --git a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp
index 7b8cb372d7ea..f19347bec3bb 100644
--- a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp
@@ -9,6 +9,7 @@
 #ifndef Tempus_IntegratorForwardSensitivity_impl_hpp
 #define Tempus_IntegratorForwardSensitivity_impl_hpp
 
+#include "Tempus_SolutionHistory_decl.hpp"
 #include "Thyra_DefaultMultiVectorProductVector.hpp"
 #include "Thyra_VectorStdOps.hpp"
 #include "Thyra_MultiVectorStdOps.hpp"
@@ -16,56 +17,30 @@
 #include "Tempus_CombinedForwardSensitivityModelEvaluator.hpp"
 #include "Tempus_WrapCombinedFSAModelEvaluator.hpp"
 
-
 namespace Tempus {
 
-template<class Scalar>
-IntegratorForwardSensitivity<Scalar>::
-IntegratorForwardSensitivity(
-  Teuchos::RCP<Teuchos::ParameterList>                inputPL,
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model)
-{
-  model_ = model;
-  integrator_ = integratorBasic<Scalar>();
-  this->setParameterList(inputPL);
-  createSensitivityModelAndStepper(model);
-  if (use_combined_method_)
-    integrator_ = integratorBasic<Scalar>(tempus_pl_, sens_model_);
-  else {
-    integrator_ = integratorBasic<Scalar>();
-    integrator_->setTempusParameterList(tempus_pl_);
-    integrator_->setStepperWStepper(sens_stepper_);
-    integrator_->initialize();
-  }
-}
-
-template<class Scalar>
-IntegratorForwardSensitivity<Scalar>::
-IntegratorForwardSensitivity(
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model,
-  std::string stepperType)
+template <class Scalar>
+IntegratorForwardSensitivity<Scalar>::IntegratorForwardSensitivity(
+    const Teuchos::RCP<Thyra::ModelEvaluator<Scalar>> &model,
+    const Teuchos::RCP<IntegratorBasic<Scalar>> &integrator,
+    const Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar>> &sens_model,
+    const Teuchos::RCP<StepperStaggeredForwardSensitivity<Scalar>> &sens_stepper,
+    bool use_combined_method)
+    : model_(model)
+    , integrator_(integrator)
+    , sens_model_(sens_model)
+    , sens_stepper_(sens_stepper)
+    , use_combined_method_(use_combined_method)
 {
-  model_ = model;
-  integrator_ = integratorBasic<Scalar>();
-  this->setParameterList(Teuchos::null);
-  createSensitivityModelAndStepper(model);
-  if (use_combined_method_)
-    integrator_ = integratorBasic<Scalar>(sens_model_, stepperType);
-  else {
-    integrator_ = integratorBasic<Scalar>();
-    integrator_->setParameterList(tempus_pl_);
-    integrator_->setStepperWStepper(sens_stepper_);
-    integrator_->initialize();
-  }
-
+  integrator_->initialize();
 }
 
 template<class Scalar>
 IntegratorForwardSensitivity<Scalar>::
 IntegratorForwardSensitivity()
 {
-  integrator_ = integratorBasic<Scalar>();
-  this->setParameterList(Teuchos::null);
+  integrator_ = createIntegratorBasic<Scalar>();
+  integrator_->initialize();
 }
 
 template<class Scalar>
@@ -73,11 +48,10 @@ void IntegratorForwardSensitivity<Scalar>::
 setStepper(
   Teuchos::RCP<Thyra::ModelEvaluator<Scalar> > model)
 {
-  createSensitivityModelAndStepper(model);
   if (use_combined_method_)
-    integrator_->setStepper(sens_model_);
+    integrator_->setModel(sens_model_);
   else
-    integrator_->setStepperWStepper(sens_stepper_);
+    integrator_->setStepper(sens_stepper_);
 }
 
 template<class Scalar>
@@ -253,104 +227,67 @@ describe(
   integrator_->describe(*l_out, verbLevel);
 }
 
-template<class Scalar>
-void
-IntegratorForwardSensitivity<Scalar>::
-setParameterList(const Teuchos::RCP<Teuchos::ParameterList> & inputPL)
-{
-  tempus_pl_ = Teuchos::parameterList();
-  if (inputPL != Teuchos::null)
-    *tempus_pl_ = *inputPL;
-  tempus_pl_->setParametersNotAlreadySet(*this->getValidParameters());
-  sens_pl_ = Teuchos::sublist(tempus_pl_, "Sensitivities", false);
-  std::string integratorName =
-    tempus_pl_->get<std::string>("Integrator Name", "Default Integrator");
-  std::string stepperName =
-    tempus_pl_->sublist(integratorName).get<std::string>("Stepper Name");
-  stepper_pl_ = Teuchos::sublist(tempus_pl_, stepperName, true);
-  use_combined_method_ =
-    sens_pl_->get<std::string>("Sensitivity Method") == "Combined";
-}
 
+/// Nonmember constructor
 template<class Scalar>
-Teuchos::RCP<Teuchos::ParameterList>
-IntegratorForwardSensitivity<Scalar>::
-unsetParameterList()
+Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
+integratorForwardSensitivity(
+  Teuchos::RCP<Teuchos::ParameterList>                     pList,
+  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >&      model)
 {
-  Teuchos::RCP<Teuchos::ParameterList> temp_param_list = tempus_pl_;
-  tempus_pl_ = Teuchos::null;
-  sens_pl_ = Teuchos::null;
-  stepper_pl_ = Teuchos::null;
-  integrator_->unsetParameterList();
-  return temp_param_list;
-}
 
-template<class Scalar>
-Teuchos::RCP<const Teuchos::ParameterList>
-IntegratorForwardSensitivity<Scalar>::
-getValidParameters() const
-{
-  Teuchos::RCP<Teuchos::ParameterList> pl =
-    Teuchos::rcp(new Teuchos::ParameterList);
-  Teuchos::RCP<const Teuchos::ParameterList> integrator_pl =
-    integrator_->getValidParameters();
-  Teuchos::RCP<const Teuchos::ParameterList> sensitivity_pl =
-    CombinedForwardSensitivityModelEvaluator<Scalar>::getValidParameters();
-  pl->setParameters(*integrator_pl);
-  Teuchos::ParameterList& spl = pl->sublist("Sensitivities");
-  spl.setParameters(*sensitivity_pl);
-  spl.set("Sensitivity Method", "Combined");
-  spl.set("Reuse State Linear Solver", false);
-
-  return pl;
-}
 
-template <class Scalar>
-void
-IntegratorForwardSensitivity<Scalar>::
-createSensitivityModelAndStepper(
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& /* model */)
-{
-  using Teuchos::rcp;
+  Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar> > sens_model;
+  Teuchos::RCP<StepperStaggeredForwardSensitivity<Scalar>> sens_stepper;
 
-  Teuchos::RCP<Teuchos::ParameterList> spl = Teuchos::parameterList();
-  *spl = *sens_pl_;
-  spl->remove("Sensitivity Method");
+  //1. create integrator
+  auto fwd_integrator = createIntegratorBasic<Scalar>(pList, model);
 
-  if (use_combined_method_) {
-    spl->remove("Reuse State Linear Solver");
-    sens_model_ =
-      wrapCombinedFSAModelEvaluator(model_, spl);
+  //2. set parameter list
+  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::rcp(new Teuchos::ParameterList);
+  {
+    Teuchos::RCP<const Teuchos::ParameterList> integrator_pl = fwd_integrator->getValidParameters();
+    Teuchos::RCP<const Teuchos::ParameterList> sensitivity_pl =
+      CombinedForwardSensitivityModelEvaluator<Scalar>::getValidParameters();
+    pl->setParameters(*integrator_pl);
+    Teuchos::ParameterList &spl = pl->sublist("Sensitivities");
+    spl.setParameters(*sensitivity_pl);
+    spl.set("Sensitivity Method", "Combined");
+    spl.set("Reuse State Linear Solver", false);
   }
-  else {
-    sens_stepper_ =
-      rcp(new StepperStaggeredForwardSensitivity<Scalar>(
-            model_, stepper_pl_, spl));
+  pList->setParametersNotAlreadySet(*pl);
+
+  auto sens_pl                    = Teuchos::sublist(pList, "Sensitivities", false);
+  std::string integratorName       = pList->get<std::string>("Integrator Name", "Default Integrator");
+  std::string stepperName          = pList->sublist(integratorName).get<std::string>("Stepper Name");
+  auto stepper_pl                 = Teuchos::sublist(pList, stepperName, true);
+  std::string sensitivity_method   = sens_pl->get<std::string>("Sensitivity Method");
+  bool use_combined_method        = sensitivity_method == "Combined";
+
+  //3. create sensitivity model and stepper
+  // createSensitivityModelAndStepper
+  {
+    sens_pl->remove("Sensitivity Method");
+
+    if (use_combined_method)
+    {
+      sens_pl->remove("Reuse State Linear Solver");
+      sens_model     = wrapCombinedFSAModelEvaluator(model, sens_pl);
+      fwd_integrator = createIntegratorBasic<Scalar>(pList, sens_model);
+    }
+    else
+    {
+      sens_stepper          = Teuchos::rcp(new StepperStaggeredForwardSensitivity<Scalar>(model, stepper_pl, sens_pl));
+      auto fsa_staggered_me = Teuchos::rcp_const_cast<Thyra::ModelEvaluator<Scalar>>(sens_stepper->getModel());
+      fwd_integrator        = createIntegratorBasic<Scalar>(pList, fsa_staggered_me);
+      fwd_integrator->setStepper(sens_stepper);
+    }
   }
-}
 
-/// Nonmember constructor
-template<class Scalar>
-Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
-integratorForwardSensitivity(
-  Teuchos::RCP<Teuchos::ParameterList>                     pList,
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >&      model)
-{
-  Teuchos::RCP<IntegratorForwardSensitivity<Scalar> > integrator =
-    Teuchos::rcp(new IntegratorForwardSensitivity<Scalar>(pList, model));
-  return(integrator);
-}
-
-/// Nonmember constructor
-template<class Scalar>
-Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
-integratorForwardSensitivity(
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >&      model,
-  std::string stepperType)
-{
-  Teuchos::RCP<IntegratorForwardSensitivity<Scalar> > integrator =
-    Teuchos::rcp(new IntegratorForwardSensitivity<Scalar>(model, stepperType));
-  return(integrator);
+  //4. initialize propoer integrator
+  Teuchos::RCP<IntegratorForwardSensitivity<Scalar>> integrator =
+    Teuchos::rcp(new IntegratorForwardSensitivity<Scalar>(model, fwd_integrator, sens_model, sens_stepper, use_combined_method));
+  return (integrator);
 }
 
 /// Nonmember constructor
@@ -358,6 +295,7 @@ template<class Scalar>
 Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
 integratorForwardSensitivity()
 {
+
   Teuchos::RCP<IntegratorForwardSensitivity<Scalar> > integrator =
     Teuchos::rcp(new IntegratorForwardSensitivity<Scalar>());
   return(integrator);
diff --git a/packages/tempus/src/Tempus_StepperStaggeredForwardSensitivity_impl.hpp b/packages/tempus/src/Tempus_StepperStaggeredForwardSensitivity_impl.hpp
index 898b6e417804..b0bd8a46b9fa 100644
--- a/packages/tempus/src/Tempus_StepperStaggeredForwardSensitivity_impl.hpp
+++ b/packages/tempus/src/Tempus_StepperStaggeredForwardSensitivity_impl.hpp
@@ -138,6 +138,7 @@ takeStep(
     RCP<SolutionState<Scalar> > state = solutionHistory->getCurrentState();
     RCP<DMVPV> X, XDot, XDotDot;
     X = rcp_dynamic_cast<DMVPV>(state->getX(),true);
+
     XDot = rcp_dynamic_cast<DMVPV>(state->getXDot(),true);
     if (state->getXDotDot() != Teuchos::null)
       XDotDot = rcp_dynamic_cast<DMVPV>(state->getXDotDot(),true);
diff --git a/packages/tempus/test/BDF2/Tempus_BDF2Test.cpp b/packages/tempus/test/BDF2/Tempus_BDF2Test.cpp
index 72ebfc2f7ed4..9c63d0b9bc50 100644
--- a/packages/tempus/test/BDF2/Tempus_BDF2Test.cpp
+++ b/packages/tempus/test/BDF2/Tempus_BDF2Test.cpp
@@ -340,7 +340,7 @@ TEUCHOS_UNIT_TEST(BDF2, SinCosAdapt)
   RCP<ParameterList> pList =
     getParametersFromXmlFile("Tempus_BDF2_SinCos_AdaptDt.xml");
   //Set initial time step = 2*dt specified in input file (for convergence study)
-  RCP<ParameterList> pl = sublist(pList, "Tempus", true);
+  RCP<ParameterList> tempus_pl = sublist(pList, "Tempus", true);
   double dt = pList->sublist("Tempus")
              .sublist("Default Integrator")
              .sublist("Time Step Control").get<double>("Initial Time Step");
@@ -395,7 +395,7 @@ TEUCHOS_UNIT_TEST(BDF2, SinCosAdapt)
 
     // Test if at 'Final Time'
     time = integrator->getTime();
-    double timeFinal =pl->sublist("Default Integrator")
+    double timeFinal = pl->sublist("Default Integrator")
        .sublist("Time Step Control").get<double>("Final Time");
     TEST_FLOATING_EQUALITY(time, timeFinal, 1.0e-14);
 

From c3f4da1f9f0737f3f145e33a9b0b95b3a4f516c8 Mon Sep 17 00:00:00 2001
From: Matt Bettencourt <mbetten@sandia.gov>
Date: Thu, 22 Jul 2021 08:49:18 -0600
Subject: [PATCH 03/15] Removed deprecated code from trilinos couplins

---
 .../examples/fenl/BelosSolve.hpp              |  2 +-
 .../examples/fenl/fenl_impl.hpp               | 18 ++++++------
 ...s_TpetraIntrepidHybridPoisson2DExample.cpp | 28 +++++++++----------
 ...s_TpetraIntrepidHybridPoisson3DExample.cpp | 28 +++++++++----------
 ...Couplings_TpetraIntrepidPoissonExample.cpp | 28 +++++++++----------
 ...TpetraIntrepidStructuredPoissonExample.cpp | 28 +++++++++----------
 .../scaling/example_Poisson2D_p2_tpetra.cpp   |  9 +++---
 .../scaling/example_Poisson2D_pn_tpetra.cpp   | 16 +++++------
 .../scaling/example_Poisson_NoFE_Tpetra.cpp   | 28 +++++++++----------
 9 files changed, 87 insertions(+), 98 deletions(-)

diff --git a/packages/trilinoscouplings/examples/fenl/BelosSolve.hpp b/packages/trilinoscouplings/examples/fenl/BelosSolve.hpp
index 19dcb8aa1b77..6008a7fd15ac 100644
--- a/packages/trilinoscouplings/examples/fenl/BelosSolve.hpp
+++ b/packages/trilinoscouplings/examples/fenl/BelosSolve.hpp
@@ -160,7 +160,7 @@ belos_solve(
     //Teuchos::RCP<VectorType> coords =
     Teuchos::RCP<Tpetra::MultiVector<double,LO,GO,N> > coords =
       Teuchos::rcp(new Tpetra::MultiVector<double,LO,GO,N>(x.getMap(), node_coords.extent(1)));
-    fill_coords(node_coords, coords->getLocalViewDevice());
+    fill_coords(node_coords, coords->getLocalViewDevice(Tpetra::Access::ReadWrite));
 
     RCP<ParameterList> mueluParams = Teuchos::sublist(fenlParams, "MueLu");
     if (use_mean_based) {
diff --git a/packages/trilinoscouplings/examples/fenl/fenl_impl.hpp b/packages/trilinoscouplings/examples/fenl/fenl_impl.hpp
index af5526a7702d..6a694513f67e 100644
--- a/packages/trilinoscouplings/examples/fenl/fenl_impl.hpp
+++ b/packages/trilinoscouplings/examples/fenl/fenl_impl.hpp
@@ -135,7 +135,7 @@ class Problem {
   typedef Tpetra::Vector<Scalar,LocalOrdinalType,GlobalOrdinalType,NodeType> GlobalVectorType;
   typedef Tpetra::MultiVector<Scalar,LocalOrdinalType,GlobalOrdinalType,NodeType> GlobalMultiVectorType;
   typedef Tpetra::CrsMatrix<Scalar,LocalOrdinalType,GlobalOrdinalType,NodeType> GlobalMatrixType;
-  typedef typename GlobalMatrixType::local_matrix_type LocalMatrixType;
+  typedef typename GlobalMatrixType::local_matrix_device_type LocalMatrixType;
   typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType;
 
 
@@ -286,7 +286,7 @@ class Problem {
 
         //------------------------------
 
-        LocalMatrixType jacobian = g_jacobian.getLocalMatrix();
+        LocalMatrixType jacobian = g_jacobian.getLocalMatrixDevice();
 
         const unsigned nrow = jacobian.numRows();
         std::cout << "JacobianGraph[ "
@@ -360,11 +360,11 @@ class Problem {
       Kokkos::Impl::Timer newton_clock ;
       newton_clock.reset();
 
-      LocalMatrixType jacobian = g_jacobian.getLocalMatrix();
+      LocalMatrixType jacobian = g_jacobian.getLocalMatrixDevice();
 
-      const auto k_nodal_solution = g_nodal_solution.getLocalViewDevice();
-      const auto k_nodal_residual = g_nodal_residual.getLocalViewDevice();
-      const auto k_nodal_delta    = g_nodal_delta   .getLocalViewDevice();
+      const auto k_nodal_solution = g_nodal_solution.getLocalViewDevice(Tpetra::Access::ReadWrite);
+      const auto k_nodal_residual = g_nodal_residual.getLocalViewDevice(Tpetra::Access::ReadWrite);
+      const auto k_nodal_delta    = g_nodal_delta   .getLocalViewDevice(Tpetra::Access::ReadWrite);
 
       const LocalVectorType nodal_solution =
         Kokkos::subview(k_nodal_solution,Kokkos::ALL(),0);
@@ -606,11 +606,11 @@ class Problem {
           "Response gradient length must match number of sensitivities specified in constuctor");
 
         const auto nodal_solution_dp =
-          g_nodal_solution_dp.getLocalViewDevice();
+          g_nodal_solution_dp.getLocalViewDevice(Tpetra::Access::ReadWrite);
         const auto nodal_residual_dp =
-          g_nodal_residual_dp.getLocalViewDevice();
+          g_nodal_residual_dp.getLocalViewDevice(Tpetra::Access::ReadWrite);
         const auto nodal_delta_dp =
-          g_nodal_delta_dp.getLocalViewDevice();
+          g_nodal_delta_dp.getLocalViewDevice(Tpetra::Access::ReadWrite);
 
         LocalMultiVectorType nodal_solution_no_overlap_dp =
           Kokkos::subview(
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
index 5a04e3936e13..849d8bc58ac2 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
@@ -1118,39 +1118,37 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Import from the global column map to the local column map.
     myColsToZeroT->doImport (*globColsToZeroT, *bdyExporter, Tpetra::INSERT);
 
-    Array<ST> values;
-    Array<int> indices;
     ArrayRCP<const int> myColsToZeroArrayRCP = myColsToZeroT->getData(0);
     size_t NumEntries = 0;
 
     // Zero the columns corresponding to Dirichlet BCs.
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      values.resize (NumEntries);
-      indices.resize (NumEntries);
-      gl_StiffMatrix->getLocalRowCopy (i, indices (), values (), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
-        if (myColsToZeroArrayRCP[indices[j]] == 1)
-          values[j] = STS::zero ();
+        if (myColsToZeroArrayRCP[indices(j)] == 1)
+          values(j) = STS::zero ();
       }
-      gl_StiffMatrix->replaceLocalValues (i, indices (), values ());
+      gl_StiffMatrix->replaceLocalValues (i, indices, values);
     } // for each (local) row of the global stiffness matrix
 
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      indices.resize (NumEntries);
-      values.resize (NumEntries);
-      gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices (), values (), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
       const LO localCol = gl_StiffMatrix->getColMap ()->getLocalElement (globalRow);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
-        values[j] = STS::zero ();
-        if (indices[j] == localCol) {
-          values[j] = STS::one ();
+        values(j) = STS::zero ();
+        if (indices(j) == localCol) {
+          values(j) = STS::one ();
         }
       } // for each entry in the current row
-      gl_StiffMatrix->replaceLocalValues (BCNodes[i], indices (), values ());
+      gl_StiffMatrix->replaceLocalValues (BCNodes[i], indices, values);
     } // for each BC node
   }
 
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
index 3fcd95596071..c3ae654bc689 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
@@ -1120,39 +1120,37 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Import from the global column map to the local column map.
     myColsToZeroT->doImport (*globColsToZeroT, *bdyExporter, Tpetra::INSERT);
 
-    Array<ST> values;
-    Array<int> indices;
     ArrayRCP<const int> myColsToZeroArrayRCP = myColsToZeroT->getData(0);
     size_t NumEntries = 0;
 
     // Zero the columns corresponding to Dirichlet BCs.
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      values.resize (NumEntries);
-      indices.resize (NumEntries);
-      gl_StiffMatrix->getLocalRowCopy (i, indices (), values (), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
-        if (myColsToZeroArrayRCP[indices[j]] == 1)
-          values[j] = STS::zero ();
+        if (myColsToZeroArrayRCP[indices(j)] == 1)
+	    values(j) = STS::zero ();
       }
-      gl_StiffMatrix->replaceLocalValues (i, indices (), values ());
+      gl_StiffMatrix->replaceLocalValues (i, indices, values);
     } // for each (local) row of the global stiffness matrix
 
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      indices.resize (NumEntries);
-      values.resize (NumEntries);
-      gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices (), values (), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
       const LO localCol = gl_StiffMatrix->getColMap ()->getLocalElement (globalRow);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
-        values[j] = STS::zero ();
-        if (indices[j] == localCol) {
-          values[j] = STS::one ();
+        values(j) = STS::zero ();
+        if (indices(j) == localCol) {
+          values(j) = STS::one ();
         }
       } // for each entry in the current row
-      gl_StiffMatrix->replaceLocalValues (BCNodes[i], indices (), values ());
+      gl_StiffMatrix->replaceLocalValues (BCNodes[i], indices, values);
     } // for each BC node
   }
 
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
index cfa2c39c02c4..43bcb0b210fc 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
@@ -1355,39 +1355,37 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Import from the global column map to the local column map.
     myColsToZeroT->doImport (*globColsToZeroT, *bdyExporter, Tpetra::INSERT);
 
-    Array<ST> values;
-    Array<int> indices;
     ArrayRCP<const int> myColsToZeroArrayRCP = myColsToZeroT->getData(0);
     size_t NumEntries = 0;
 
     // Zero the columns corresponding to Dirichlet BCs.
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      values.resize (NumEntries);
-      indices.resize (NumEntries);
-      gl_StiffMatrix->getLocalRowCopy (i, indices (), values (), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
-        if (myColsToZeroArrayRCP[indices[j]] == 1)
-          values[j] = STS::zero ();
+        if (myColsToZeroArrayRCP[indices(j)] == 1)
+          values(j) = STS::zero ();
       }
-      gl_StiffMatrix->replaceLocalValues (i, indices (), values ());
+      gl_StiffMatrix->replaceLocalValues (i, indices, values);
     } // for each (local) row of the global stiffness matrix
 
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      indices.resize (NumEntries);
-      values.resize (NumEntries);
-      gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices (), values (), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
       const LO localCol = gl_StiffMatrix->getColMap ()->getLocalElement (globalRow);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
-        values[j] = STS::zero ();
-        if (indices[j] == localCol) {
-          values[j] = STS::one ();
+        values(j) = STS::zero ();
+        if (indices(j) == localCol) {
+          values(j) = STS::one ();
         }
       } // for each entry in the current row
-      gl_StiffMatrix->replaceLocalValues (BCNodes[i], indices (), values ());
+      gl_StiffMatrix->replaceLocalValues (BCNodes[i], indices, values);
     } // for each BC node
   }
 
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
index e9083969fbca..32f34449273e 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
@@ -1112,39 +1112,37 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Import from the global column map to the local column map.
     myColsToZeroT->doImport (*globColsToZeroT, *bdyExporter, Tpetra::INSERT);
 
-    Array<ST> values;
-    Array<int> indices;
     ArrayRCP<const int> myColsToZeroArrayRCP = myColsToZeroT->getData(0);
     size_t NumEntries = 0;
 
     // Zero the columns corresponding to Dirichlet BCs.
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      values.resize (NumEntries);
-      indices.resize (NumEntries);
-      gl_StiffMatrix->getLocalRowCopy (i, indices (), values (), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
-        if (myColsToZeroArrayRCP[indices[j]] == 1)
-          values[j] = STS::zero ();
+        if (myColsToZeroArrayRCP[indices(j)] == 1)
+          values(j) = STS::zero ();
       }
-      gl_StiffMatrix->replaceLocalValues (i, indices (), values ());
+      gl_StiffMatrix->replaceLocalValues (i, indices, values);
     } // for each (local) row of the global stiffness matrix
 
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      indices.resize (NumEntries);
-      values.resize (NumEntries);
-      gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices (), values (), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
       const LO localCol = gl_StiffMatrix->getColMap ()->getLocalElement (globalRow);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
-        values[j] = STS::zero ();
-        if (indices[j] == localCol) {
-          values[j] = STS::one ();
+        values(j) = STS::zero ();
+        if (indices(j) == localCol) {
+          values(j) = STS::one ();
         }
       } // for each entry in the current row
-      gl_StiffMatrix->replaceLocalValues (BCNodes[i], indices (), values ());
+      gl_StiffMatrix->replaceLocalValues (BCNodes[i], indices, values);
     } // for each BC node
   }
 
diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp
index bd9951f7537a..7d25c5bdae95 100644
--- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp
+++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp
@@ -2297,15 +2297,14 @@ void Apply_Dirichlet_BCs(std::vector<int> &BCNodes, crs_matrix_type & A, multive
 
     xdata[lrid]=bdata[lrid] = solndata[lrid];
 
-    size_t numEntriesInRow = A.getNumEntriesInLocalRow(lrid);
-    Array<local_ordinal_type> cols(numEntriesInRow);
-    Array<scalar_type> vals(numEntriesInRow);
-    A.getLocalRowCopy(lrid, cols(), vals(), numEntriesInRow);
+    size_t numEntriesInRow = A.getNumEntriesInLocalRow(lrid);    typename crs_matrix_type::nonconst_local_inds_host_view_type cols("cols", numEntriesInRow);
+    typename crs_matrix_type::nonconst_values_host_view_type vals("vals", numEntriesInRow);
+    A.getLocalRowCopy(lrid, cols, vals, numEntriesInRow);
 
     for(int j=0; j<vals.size(); j++)
       vals[j] = (cols[j] == lrid) ? 1.0 : 0.0;
 
-    A.replaceLocalValues(lrid, cols(), vals());
+    A.replaceLocalValues(lrid, cols, vals);
   }
 
   A.fillComplete();
diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp
index aec4c09d219b..407afd6ad0c2 100644
--- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp
+++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp
@@ -2837,14 +2837,14 @@ void CreateLinearSystem(int numWorksets,
   }
 
   vector_type laplDiagOwned(rowMap, true);
-  Teuchos::ArrayView<const local_ordinal_type> indices;
-  Teuchos::ArrayView<const scalar_type> values;
+  typename crs_matrix_type::local_inds_host_view_type indices;
+  typename crs_matrix_type::values_host_view_type values;
   size_t numOwnedRows = rowMap->getNodeNumElements();
   for (size_t row=0; row<numOwnedRows; row++) {
     StiffMatrix.getLocalRowView(row, indices, values);
     size_t numIndices = indices.size();
     for (size_t j=0; j<numIndices; j++) {
-      size_t col = indices[j];
+      size_t col = indices(j);
       if (row == col) continue;
       laplDiagOwned.sumIntoLocalValue(row, 1/myDistance2(*coordsOwnedPlusShared, row, col));
     }
@@ -2948,14 +2948,14 @@ void Apply_Dirichlet_BCs(std::vector<int> &BCNodes, crs_matrix_type & A, multive
     xdata[lrid]=bdata[lrid] = solndata[lrid];
 
     size_t numEntriesInRow = A.getNumEntriesInLocalRow(lrid);
-    Array<local_ordinal_type> cols(numEntriesInRow);
-    Array<scalar_type> vals(numEntriesInRow);
-    A.getLocalRowCopy(lrid, cols(), vals(), numEntriesInRow);
+    typename crs_matrix_type::nonconst_local_inds_host_view_type cols("cols", numEntriesInRow);
+    typename crs_matrix_type::nonconst_values_host_view_type vals("vals", numEntriesInRow);
+    A.getLocalRowCopy(lrid, cols, vals, numEntriesInRow);
     
     for(int j=0; j<vals.size(); j++)
-      vals[j] = (cols[j] == lrid) ? 1.0 : 0.0;
+      vals(j) = (cols(j) == lrid) ? 1.0 : 0.0;
 
-    A.replaceLocalValues(lrid, cols(), vals());
+    A.replaceLocalValues(lrid, cols, vals);
   }
 
   A.fillComplete();
diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
index 5a0e53ed4752..45f6df718426 100644
--- a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
+++ b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
@@ -1243,39 +1243,37 @@ int main(int argc, char *argv[]) {
     // now import from the global column map to the local column map
     myColsToZeroT->doImport(*globColsToZeroT,*ExporterT,Tpetra::INSERT);
 
-    Teuchos::Array<double> values;
-    Teuchos::Array<int> indices;
     Teuchos::ArrayRCP<const int> myColsToZeroArrayRCP = myColsToZeroT->getData(0);
     size_t NumEntries = 0;
     /* Zero the columns */
     for (size_t i=0; i < gl_StiffMatrixT->getNodeNumRows(); i++) {
        NumEntries = gl_StiffMatrixT->getNumEntriesInLocalRow(i);
-       values.resize(NumEntries);
-       indices.resize(NumEntries);
-       gl_StiffMatrixT->getLocalRowCopy(i, indices(), values(), NumEntries);
+       typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+       gl_StiffMatrixT->getLocalRowCopy(i, indices, values, NumEntries);
        //Matrix.ExtractMyRowView(i,numEntries,vals,cols);
        for (size_t j=0; j < NumEntries; j++){
           //Teuchos::ArrayRCP<const int> myColsToZeroj = myColsToZeroT->getData();
-          if (myColsToZeroArrayRCP[indices[j]] == 1)
-              values[j] = 0.0;
+	 if (myColsToZeroArrayRCP[indices(j)] == 1)
+	   values(j) = 0.0;
        }
-       gl_StiffMatrixT->replaceLocalValues(i, indices(), values());
+       gl_StiffMatrixT->replaceLocalValues(i, indices, values);
     }/*end for*/
 
     /* Zero the rows, add ones to diagonal */
     for (size_t i = 0; i<(size_t)numBCNodes; i++) {
       NumEntries = gl_StiffMatrixT->getNumEntriesInLocalRow(BCNodes[i]);
-      indices.resize(NumEntries);
-      values.resize(NumEntries);
-      gl_StiffMatrixT->getLocalRowCopy(BCNodes[i], indices(), values(), NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      gl_StiffMatrixT->getLocalRowCopy(BCNodes[i], indices, values, NumEntries);
       int globalRow = gl_StiffMatrixT->getRowMap()->getGlobalElement(BCNodes[i]);
       int localCol = gl_StiffMatrixT->getColMap()->getLocalElement(globalRow);
       for (size_t j = 0; j<NumEntries; j++){
-         values[j] = 0.0;
-         if (indices[j] == localCol)
-            values[j] = 1.0;
+	values(j) = 0.0;
+	if (indices(j) == localCol)
+	  values(j) = 1.0;
       }
-      gl_StiffMatrixT->replaceLocalValues(BCNodes[i], indices(), values());
+      gl_StiffMatrixT->replaceLocalValues(BCNodes[i], indices, values);
    }
    }
    }

From 6de8a4f07904ac7392820bbbe5e35f53ba4fe2da Mon Sep 17 00:00:00 2001
From: Christian Glusa <caglusa@sandia.gov>
Date: Thu, 22 Jul 2021 11:29:56 -0600
Subject: [PATCH 04/15] Tpetra: Add/fix convert method for MV and CrsMatrix

---
 .../cmake/ExplicitInstantiationSupport.cmake  |  46 ++---
 packages/tpetra/core/src/CMakeLists.txt       | 172 ++++++++++++++++++
 .../src/Tpetra_ETI_SOUT_SIN_LO_GO_NT.tmpl     |  80 ++++++++
 .../core/src/Tpetra_MultiVector_decl.hpp      |   6 +
 .../core/src/Tpetra_MultiVector_def.hpp       |  22 +++
 5 files changed, 303 insertions(+), 23 deletions(-)
 create mode 100644 packages/tpetra/core/src/Tpetra_ETI_SOUT_SIN_LO_GO_NT.tmpl

diff --git a/packages/tpetra/core/cmake/ExplicitInstantiationSupport.cmake b/packages/tpetra/core/cmake/ExplicitInstantiationSupport.cmake
index 6c885edd9e81..bdc3da2a2c55 100644
--- a/packages/tpetra/core/cmake/ExplicitInstantiationSupport.cmake
+++ b/packages/tpetra/core/cmake/ExplicitInstantiationSupport.cmake
@@ -76,7 +76,7 @@ TRIBITS_ETI_TYPE_EXPANSION(${PACKAGE_NAME}_ETI_EXCLUDE_SET_GORD_NOT_INT "S=.*" "
 # Scalar: ${PACKAGE_NAME}_ETI_SCALARS
 # LocalOrdinal: ${PACKAGE_NAME}_ETI_LORDS
 # GlobalOrdinal: ${PACKAGE_NAME}_ETI_GORDS
-# Node: ${PACKAGE_NAME}_ETI_NODES 
+# Node: ${PACKAGE_NAME}_ETI_NODES
 #
 # Note that the Scalar set from Tpetra includes the Scalar =
 # GlobalOrdinal case.  We have to exclude that explicitly in what
@@ -94,10 +94,10 @@ MESSAGE(STATUS "Enabled Node types:          ${${PACKAGE_NAME}_ETI_NODES}")
 # Construct the "type expansion" string that TriBITS' ETI system
 # expects.  Even if ETI is OFF, we will use this to generate macros
 # for instantiating tests.
-TRIBITS_ETI_TYPE_EXPANSION(SingleScalarInsts 
-  "S=${${PACKAGE_NAME}_ETI_SCALARS}" 
+TRIBITS_ETI_TYPE_EXPANSION(SingleScalarInsts
+  "S=${${PACKAGE_NAME}_ETI_SCALARS}"
   "N=${${PACKAGE_NAME}_ETI_NODES}"
-  "LO=${${PACKAGE_NAME}_ETI_LORDS}" 
+  "LO=${${PACKAGE_NAME}_ETI_LORDS}"
   "GO=${${PACKAGE_NAME}_ETI_GORDS}")
 
 # Set up the set of enabled type combinations, in a format that
@@ -114,10 +114,10 @@ IF(TpetraCore_ENABLE_EXPLICIT_INSTANTIATION)
   # enabling certain type combinations when ETI is OFF.
   MESSAGE(STATUS "Excluded type combinations: ${${PACKAGE_NAME}_ETI_EXCLUDE_SET}:${${PACKAGE_NAME}_ETI_EXCLUDE_SET_INT}")
 ELSE()
-  TRIBITS_ETI_TYPE_EXPANSION(${PACKAGE_NAME}_ETI_LIBRARYSET 
-    "S=${${PACKAGE_NAME}_ETI_SCALARS}" 
-    "N=${${PACKAGE_NAME}_ETI_NODES}" 
-    "LO=${${PACKAGE_NAME}_ETI_LORDS}" 
+  TRIBITS_ETI_TYPE_EXPANSION(${PACKAGE_NAME}_ETI_LIBRARYSET
+    "S=${${PACKAGE_NAME}_ETI_SCALARS}"
+    "N=${${PACKAGE_NAME}_ETI_NODES}"
+    "LO=${${PACKAGE_NAME}_ETI_LORDS}"
     "GO=${${PACKAGE_NAME}_ETI_GORDS}")
 ENDIF()
 MESSAGE(STATUS "Set of enabled types, before exclusions: ${${PACKAGE_NAME}_ETI_LIBRARYSET}")
@@ -128,8 +128,8 @@ MESSAGE(STATUS "Set of enabled types, before exclusions: ${${PACKAGE_NAME}_ETI_L
 # TpetraCore_ETIHelperMacros.h.in (in this directory).
 #
 TRIBITS_ETI_GENERATE_MACROS(
-    "${${PACKAGE_NAME}_ETI_FIELDS}" "${${PACKAGE_NAME}_ETI_LIBRARYSET}" 
-    "${${PACKAGE_NAME}_ETI_EXCLUDE_SET};${${PACKAGE_NAME}_ETI_EXCLUDE_SET_ORDINAL_SCALAR}"  
+    "${${PACKAGE_NAME}_ETI_FIELDS}" "${${PACKAGE_NAME}_ETI_LIBRARYSET}"
+    "${${PACKAGE_NAME}_ETI_EXCLUDE_SET};${${PACKAGE_NAME}_ETI_EXCLUDE_SET_ORDINAL_SCALAR}"
     list_of_manglings eti_typedefs
     "TPETRA_INSTANTIATE_SLGN_NO_ORDINAL_SCALAR(S,LO,GO,N)" TPETRA_ETIMACRO_SLGN_NO_ORDINAL_SCALAR
     "TPETRA_INSTANTIATE_SLG_NO_ORDINAL_SCALAR(S,LO,GO)"    TPETRA_ETIMACRO_SLG_NO_ORDINAL_SCALAR
@@ -137,13 +137,13 @@ TRIBITS_ETI_GENERATE_MACROS(
     "TPETRA_INSTANTIATE_SN_NO_ORDINAL_SCALAR(S,N)"        TPETRA_ETIMACRO_SN_NO_ORDINAL_SCALAR
     "TPETRA_INSTANTIATE_S_NO_ORDINAL_SCALAR(S)"            TPETRA_ETIMACRO_S_NO_ORDINAL_SCALAR)
 TRIBITS_ETI_GENERATE_MACROS(
-    "${${PACKAGE_NAME}_ETI_FIELDS}" "${${PACKAGE_NAME}_ETI_LIBRARYSET}" 
-    "${${PACKAGE_NAME}_ETI_EXCLUDE_SET};${${PACKAGE_NAME}_ETI_EXCLUDE_SET_INT}"  
+    "${${PACKAGE_NAME}_ETI_FIELDS}" "${${PACKAGE_NAME}_ETI_LIBRARYSET}"
+    "${${PACKAGE_NAME}_ETI_EXCLUDE_SET};${${PACKAGE_NAME}_ETI_EXCLUDE_SET_INT}"
     list_of_manglings eti_typedefs
-    "TPETRA_INSTANTIATE_GLGN(GO,LO,GO,N)"           TPETRA_ETIMACRO_GLGN 
+    "TPETRA_INSTANTIATE_GLGN(GO,LO,GO,N)"           TPETRA_ETIMACRO_GLGN
     "TPETRA_INSTANTIATE_LGN(LO,GO,N)"               TPETRA_ETIMACRO_LGN
-    "TPETRA_INSTANTIATE_GLG(GO,LO,GO)"              TPETRA_ETIMACRO_GLG  
-    "TPETRA_INSTANTIATE_LG(LO,GO)"                  TPETRA_ETIMACRO_LG 
+    "TPETRA_INSTANTIATE_GLG(GO,LO,GO)"              TPETRA_ETIMACRO_GLG
+    "TPETRA_INSTANTIATE_LG(LO,GO)"                  TPETRA_ETIMACRO_LG
     "TPETRA_INSTANTIATE_SL(S,LO)"                   TPETRA_ETIMACRO_SL
     "TPETRA_INSTANTIATE_SN(S,N)"                    TPETRA_ETIMACRO_SN
     "TPETRA_INSTANTIATE_GN(GO,N)"                   TPETRA_ETIMACRO_GN
@@ -151,27 +151,27 @@ TRIBITS_ETI_GENERATE_MACROS(
     "TPETRA_INSTANTIATE_L(LO)"                      TPETRA_ETIMACRO_L
     "TPETRA_INSTANTIATE_G(GO)"                      TPETRA_ETIMACRO_G
     "TPETRA_INSTANTIATE_N(N)"                       TPETRA_ETIMACRO_N
-    "TPETRA_INSTANTIATE_TSLGN(CS,DS,LO,GO,N)"       TPETRA_ETIMACRO_TSLGN 
+    "TPETRA_INSTANTIATE_TSLGN(CS,DS,LO,GO,N)"       TPETRA_ETIMACRO_TSLGN
     "TPETRA_INSTANTIATE_TSLG(CS,DS,LO,GO)"          TPETRA_ETIMACRO_TSLG
     "TPETRA_INSTANTIATE_CONVERT(SOUT,SIN,LO,GO,N)"  TPETRA_ETIMACRO_CONVERT
     "TPETRA_INSTANTIATE_CONVERT_SSL(SOUT,SIN,LO)"    TPETRA_ETIMACRO_CONVERT_SSL)
 
 #SET(${PACKAGE_NAME}_SCALAR_INT_ETI_FIELDS "LO|GO|N")
-#TRIBITS_ETI_TYPE_EXPANSION(${PACKAGE_NAME}_SCALAR_INT_ETI_LIBRARYSET 
-#  "LO=${${PACKAGE_NAME}_ETI_LORDS}" 
+#TRIBITS_ETI_TYPE_EXPANSION(${PACKAGE_NAME}_SCALAR_INT_ETI_LIBRARYSET
+#  "LO=${${PACKAGE_NAME}_ETI_LORDS}"
 #  "GO=${${PACKAGE_NAME}_ETI_GORDS}"
 #  "N=${${PACKAGE_NAME}_ETI_NODES}")
 
 
 # For Mutlivector and friends
 TRIBITS_ETI_GENERATE_MACROS(
-    "${${PACKAGE_NAME}_ETI_FIELDS}" "${${PACKAGE_NAME}_ETI_LIBRARYSET}" 
+    "${${PACKAGE_NAME}_ETI_FIELDS}" "${${PACKAGE_NAME}_ETI_LIBRARYSET}"
     "${${PACKAGE_NAME}_ETI_EXCLUDE_SET_GORD_NOT_INT}"
     list_of_manglings eti_typedefs
     "TPETRA_INSTANTIATE_GLGN_NO_INT(GO,LO,GO,N)"           TPETRA_ETIMACRO_GLGN_NO_INT)
 
 TRIBITS_ETI_GENERATE_MACROS(
-    "${${PACKAGE_NAME}_ETI_FIELDS}" "${${PACKAGE_NAME}_ETI_LIBRARYSET}" 
+    "${${PACKAGE_NAME}_ETI_FIELDS}" "${${PACKAGE_NAME}_ETI_LIBRARYSET}"
     "${${PACKAGE_NAME}_ETI_EXCLUDE_SET}"
     list_of_manglings eti_typedefs
     "TPETRA_INSTANTIATE_LLGN(LO,LO,GO,N)"           TPETRA_ETIMACRO_LLGN)
@@ -186,14 +186,14 @@ STRING(REPLACE "S="  "P=" ScalarToPacketSet "${${PACKAGE_NAME}_ETI_LIBRARYSET}")
 STRING(REGEX REPLACE "GO=([^{ ]+)" "GO=\\1 P=\\1" GlobalToPacketSet1 "${${PACKAGE_NAME}_ETI_LIBRARYSET}")
 STRING(REGEX REPLACE "GO={([^}]+)}" "GO={\\1} P={\\1}" GlobalToPacketSet2 "${${PACKAGE_NAME}_ETI_LIBRARYSET}")
 TRIBITS_ETI_GENERATE_MACROS(
-    "${${PACKAGE_NAME}_ETI_FIELDS}|P" "${ScalarToPacketSet};${GlobalToPacketSet1};${GlobalToPacketSet2}" 
-    "${${PACKAGE_NAME}_ETI_EXCLUDE_SET}"  
+    "${${PACKAGE_NAME}_ETI_FIELDS}|P" "${ScalarToPacketSet};${GlobalToPacketSet1};${GlobalToPacketSet2}"
+    "${${PACKAGE_NAME}_ETI_EXCLUDE_SET}"
     list_of_manglings eti_typedefs
     "TPETRA_INSTANTIATE_PLGN(P,LO,GO,N)"            TPETRA_ETIMACRO_PLGN)
 
 TRIBITS_ETI_TYPE_EXPANSION(Tpetra_DII   "S=double" "N=${${PACKAGE_NAME}_ETI_NODES}" "LO=int" "GO=int")
 TRIBITS_ETI_GENERATE_MACROS(
-    "${${PACKAGE_NAME}_ETI_FIELDS}" "${Tpetra_DII}" 
+    "${${PACKAGE_NAME}_ETI_FIELDS}" "${Tpetra_DII}"
     "${${PACKAGE_NAME}_ETI_EXCLUDE_SET}"
     list_of_manglings eti_typedefs
     "TPETRA_INSTANTIATE_DOUBLE_INT_INT_N(S,LO,GO,N)"      TPETRA_ETIMACRO_DII_NODE)
diff --git a/packages/tpetra/core/src/CMakeLists.txt b/packages/tpetra/core/src/CMakeLists.txt
index 1195b5634ce3..51054675d103 100644
--- a/packages/tpetra/core/src/CMakeLists.txt
+++ b/packages/tpetra/core/src/CMakeLists.txt
@@ -196,6 +196,63 @@ FUNCTION(TPETRA_PROCESS_ONE_SLGN_TEMPLATE OUTPUT_FILE TEMPLATE_FILE
   SET(${OUTPUT_FILE} ${OUT_FILE} PARENT_SCOPE)
 ENDFUNCTION(TPETRA_PROCESS_ONE_SLGN_TEMPLATE)
 
+# Function to generate one .cpp file for the given (ScalarOut,
+# ScalarIn, LocalOrdinal, GlobalOrdinal, Node) template parameter
+# combination, for run-time registration of a Tpetra class or function
+# over those template parameters. This is meant to be called by
+# TPETRA_PROCESS_ALL_CONVERT_TEMPLATES. This function takes the names
+# already mangled, to avoid unnecessary string processing overhead.
+#
+# OUTPUT_FILE [out] Name of the generated .cpp file.
+#
+# TEMPLATE_FILE [in] Name of the input .tmpl "template" file.  This
+#   function does string substitution in that file, using the input
+#   arguments of this function.  For example, @SC_MACRO_EXPR@ (Scalar
+#   macro expression) gets substituted for the value of this
+#   function's SC_MACRO_EXPR input argument.
+#
+# CLASS_NAME [in] Name of the Tpetra class (without the Tpetra
+#   namespace qualifier; must live in the Tpetra namespace),
+#   suitably mangled for use in a file name.
+#
+# CLASS_MACRO_NAME [in] Name of the Tpetra class, suitably mangled
+#   for use in a macro name.
+#
+# SOUT_MANGLED_NAME [in] Name of the output Scalar (SC) type, mangled for use
+#   as a macro argument (e.g., spaces and colons removed).  In the
+#   arguments that follow, LO stands for LocalOrdinal, GO for
+#   GlobalOrdinal, and NT for Node.
+#
+# SIN_MANGLED_NAME [in] Name of the input Scalar (SC) type, mangled for use
+#   as a macro argument (e.g., spaces and colons removed).  In the
+#   arguments that follow, LO stands for LocalOrdinal, GO for
+#   GlobalOrdinal, and NT for Node.
+#
+# SC_MACRO_EXPR [in] Expression that asks Tpetra
+#   whether the given Scalar (SC) type is supported.
+#
+# LO_MACRO_NAME [in] Name of the LocalOrdinal (LO) type,
+#   mangled for use as a macro argument.
+#
+# GO_MACRO_NAME [in] Name of the GlobalOrdinal (GO) type,
+#   mangled for use as a macro argument.
+#
+# NT_MACRO_NAME [in] Name of the Node (NT) type,
+#   mangled for use as a macro argument.
+#
+FUNCTION(TPETRA_PROCESS_ONE_CONVERT_TEMPLATE OUTPUT_FILE TEMPLATE_FILE
+  CLASS_NAME CLASS_MACRO_NAME SOUT_MANGLED_NAME SIN_MANGLED_NAME LO_MANGLED_NAME
+  GO_MANGLED_NAME NT_MANGLED_NAME SOUT_MACRO_EXPR SIN_MACRO_EXPR LO_MACRO_NAME
+  GO_MACRO_NAME NT_MACRO_NAME)
+
+  STRING(REPLACE "ETI_SOUT_SIN_LO_GO_NT.tmpl"
+    "${CLASS_NAME}_${SOUT_MACRO_NAME}_${SIN_MACRO_NAME}_${LO_MACRO_NAME}_${GO_MACRO_NAME}_${NT_MACRO_NAME}.cpp"
+    OUT_FILE "${TEMPLATE_FILE}")
+  CONFIGURE_FILE("${TEMPLATE_FILE}" "${OUT_FILE}")
+
+  SET(${OUTPUT_FILE} ${OUT_FILE} PARENT_SCOPE)
+ENDFUNCTION(TPETRA_PROCESS_ONE_CONVERT_TEMPLATE)
+
 # Function to generate .cpp files for ETI of a Tpetra class or
 # function, over all enabled Scalar, LocalOrdinal, GlobalOrdinal, and
 # Node template parameters.  We generate one .cpp file for each
@@ -279,6 +336,113 @@ FUNCTION(TPETRA_PROCESS_ALL_SLGN_TEMPLATES OUTPUT_FILES TEMPLATE_FILE
   SET(${OUTPUT_FILES} ${OUT_FILES} PARENT_SCOPE)
 ENDFUNCTION(TPETRA_PROCESS_ALL_SLGN_TEMPLATES)
 
+# Function to generate .cpp files for ETI of a Tpetra class or
+# function, over all enabled output and input Scalar types,
+# LocalOrdinal, GlobalOrdinal, and Node template parameters. We
+# generate one .cpp file for each (ScalarOut, ScalarIn, LocalOrdinal,
+# GlobalOrdinal, Node) type combination over which Tpetra does ETI.
+#
+# OUTPUT_FILES [out] List of the generated .cpp files.
+#
+# TEMPLATE_FILE [in] Name of the input .tmpl "template" file.  This
+#   function does string substitution in that file, using the input
+#   arguments of this function.  For example, @SC_MACRO_EXPR@ (Scalar
+#   macro expression) gets substituted for the value of this
+#   function's SC_MACRO_EXPR input argument.
+#
+# CLASS_NAME [in] Name of the Tpetra class (without namespace
+#   qualifiers; must live in the Tpetra namespace)
+#
+# CLASS_MACRO_NAME [in] Name of the Tpetra class, suitably mangled for
+#   use in a macro name.
+#
+# SOUT_TYPES [in] All output Scalar types over which to do ETI for the given
+#   class.  This may include Scalar = GlobalOrdinal and/or Scalar =
+#   int, if appropriate for that class.
+#
+# SIN_TYPES [in] All input Scalar types over which to do ETI for the given
+#   class.  This may include Scalar = GlobalOrdinal and/or Scalar =
+#   int, if appropriate for that class.
+#
+# LOCALORDINAL_TYPES [in] All LocalOrdinal types over which to do ETI
+#   for the given class.
+#
+# GLOBALORDINAL_TYPES [in] All GlobalOrdinal types over which to do
+#   ETI for the given class.
+#
+# NODE_TYPES [in] All Node types over which to do ETI for the given
+#   class.
+#
+# MUST_HAVE_SCALAR_INT [in] (Boolean) Whether the class must be
+#   instantiated with Scalar = int, even if int is not in the set of
+#   GlobalOrdinal types.
+FUNCTION(TPETRA_PROCESS_ALL_CONVERT_TEMPLATES OUTPUT_FILES TEMPLATE_FILE
+  CLASS_NAME CLASS_MACRO_NAME SOUT_TYPES SIN_TYPES LOCALORDINAL_TYPES
+  GLOBALORDINAL_TYPES NODE_TYPES MUST_HAVE_SCALAR_INT)
+
+  SET(OUT_FILES "")
+  FOREACH(NT ${NODE_TYPES})
+    TPETRA_MANGLE_TEMPLATE_PARAMETER(NT_MANGLED "${NT}")
+    TPETRA_NODE_MACRO_NAME(NT_MACRO_NAME "${NT}")
+    FOREACH(GO ${GLOBALORDINAL_TYPES})
+      TPETRA_MANGLE_TEMPLATE_PARAMETER(GO_MANGLED "${GO}")
+      TPETRA_SLG_MACRO_NAME(GO_MACRO_NAME "${GO}")
+      FOREACH(LO ${LOCALORDINAL_TYPES})
+        TPETRA_MANGLE_TEMPLATE_PARAMETER(LO_MANGLED "${LO}")
+        TPETRA_SLG_MACRO_NAME(LO_MACRO_NAME "${LO}")
+        FOREACH(SIN ${SIN_TYPES})
+          TPETRA_MANGLE_TEMPLATE_PARAMETER(SIN_MANGLED "${SIN}")
+          TPETRA_SLG_MACRO_NAME(SIN_MACRO_NAME "${SIN}")
+          TPETRA_SC_MACRO_EXPR(SIN_MACRO_EXPR "${SIN}" "${GO}" "${SC_MACRO_NAME}")
+
+          #MESSAGE(STATUS ">> SC = ${SC}, SC_MACRO_EXPR = ${SC_MACRO_EXPR}")
+
+          # If SC is NOT a GlobalOrdinal type of some kind (not
+          # necessarily the current GO), or if it is "int", process
+          # it.  Otherwise, then we only have to process it if it
+          # equals the current GO.
+          TPETRA_SC_IS_GO(IS_GO "${SIN}")
+          STRING(COMPARE EQUAL "${SIN}" "${GO}" SIN_IS_CURRENT_GO)
+          STRING(COMPARE EQUAL "${SIN}" "int" SIN_IS_INT)
+
+          IF ((MUST_HAVE_SCALAR_INT AND SIN_IS_INT) OR (NOT SIN_IS_GO OR SIN_IS_CURRENT_GO))
+
+            FOREACH(SOUT ${SOUT_TYPES})
+              TPETRA_MANGLE_TEMPLATE_PARAMETER(SOUT_MANGLED "${SOUT}")
+              TPETRA_SLG_MACRO_NAME(SOUT_MACRO_NAME "${SOUT}")
+              TPETRA_SC_MACRO_EXPR(SOUT_MACRO_EXPR "${SOUT}" "${GO}" "${SC_MACRO_NAME}")
+
+              #MESSAGE(STATUS ">> SC = ${SC}, SC_MACRO_EXPR = ${SC_MACRO_EXPR}")
+
+              # If SC is NOT a GlobalOrdinal type of some kind (not
+              # necessarily the current GO), or if it is "int", process
+              # it.  Otherwise, then we only have to process it if it
+              # equals the current GO.
+              TPETRA_SC_IS_GO(IS_GO "${SOUT}")
+              STRING(COMPARE EQUAL "${SOUT}" "${GO}" SOUT_IS_CURRENT_GO)
+              STRING(COMPARE EQUAL "${SOUT}" "int" SOUT_IS_INT)
+
+              IF ((MUST_HAVE_SCALAR_INT AND SOUT_IS_INT) OR (NOT SOUT_IS_GO OR SOUT_IS_CURRENT_GO))
+
+                TPETRA_PROCESS_ONE_CONVERT_TEMPLATE(OUT_FILE "${TEMPLATE_FILE}"
+                  "${CLASS_NAME}" "${CLASS_MACRO_NAME}" "${SOUT_MANGLED}" "${SIN_MANGLED}"
+                  "${LO_MANGLED}" "${GO_MANGLED}" "${NT_MANGLED}"
+                  "${SOUT_MACRO_EXPR}" "${SIN_MACRO_EXPR}" "${LO_MACRO_NAME}" "${GO_MACRO_NAME}"
+                  "${NT_MACRO_NAME}")
+                LIST(APPEND OUT_FILES ${OUT_FILE})
+              ENDIF()
+            ENDFOREACH() # SOUT
+          ENDIF()
+        ENDFOREACH() # SIN
+      ENDFOREACH() # LO
+    ENDFOREACH() # GO
+  ENDFOREACH() # NT
+
+  # This is the standard CMake idiom for setting an output variable so
+  # that the caller can see the result.
+  SET(${OUTPUT_FILES} ${OUT_FILES} PARENT_SCOPE)
+ENDFUNCTION(TPETRA_PROCESS_ALL_CONVERT_TEMPLATES)
+
 # Function to generate one .cpp file for the given (LocalOrdinal,
 # GlobalOrdinal, Node) template parameter combination, for run-time
 # registration of a Tpetra class or function over those template
@@ -604,6 +768,14 @@ IF (${PACKAGE_NAME}_ENABLE_EXPLICIT_INSTANTIATION)
   TPETRA_PROCESS_ALL_SLGN_TEMPLATES(REPLACEDIAGONALCRSMATRIX_OUTPUT_FILES "Tpetra_ETI_SC_LO_GO_NT.tmpl" "replaceDiagonalCrsMatrix" "REPLACEDIAGONALCRSMATRIX" "${CrsMatrix_ETI_SCALARS}" "${TpetraCore_ETI_LORDS}" "${TpetraCore_ETI_GORDS}" "${TpetraCore_ETI_NODES}" FALSE)
   LIST(APPEND SOURCES ${REPLACEDIAGONALCRSMATRIX_OUTPUT_FILES})
 
+  # Generate ETI .cpp files for Tpetra::CrsMatrix::convert.
+  TPETRA_PROCESS_ALL_CONVERT_TEMPLATES(CRSMATRIX_CONVERT_OUTPUT_FILES "Tpetra_ETI_SOUT_SIN_LO_GO_NT.tmpl" "CrsMatrix" "CRSMATRIX_CONVERT" "${TpetraCore_ETI_SCALARS_NO_ORDS}" "${TpetraCore_ETI_SCALARS_NO_ORDS}" "${TpetraCore_ETI_LORDS}" "${TpetraCore_ETI_GORDS}" "${TpetraCore_ETI_NODES}" FALSE)
+  LIST(APPEND SOURCES ${CRSMATRIX_CONVERT_OUTPUT_FILES})
+
+  # Generate ETI .cpp files for Tpetra::MultiVector::convert.
+  TPETRA_PROCESS_ALL_CONVERT_TEMPLATES(MULTIVECTOR_CONVERT_OUTPUT_FILES "Tpetra_ETI_SOUT_SIN_LO_GO_NT.tmpl" "MultiVector" "MULTIVECTOR_CONVERT" "${TpetraCore_ETI_SCALARS_NO_ORDS}" "${TpetraCore_ETI_SCALARS_NO_ORDS}" "${TpetraCore_ETI_LORDS}" "${TpetraCore_ETI_GORDS}" "${TpetraCore_ETI_NODES}" FALSE)
+  LIST(APPEND SOURCES ${MULTIVECTOR_CONVERT_OUTPUT_FILES})
+
   IF (Tpetra_ENABLE_DEPRECATED_CODE)
 
     # Generate ETI .cpp files for Tpetra::Details::localDeepCopy*RowMatrix.
diff --git a/packages/tpetra/core/src/Tpetra_ETI_SOUT_SIN_LO_GO_NT.tmpl b/packages/tpetra/core/src/Tpetra_ETI_SOUT_SIN_LO_GO_NT.tmpl
new file mode 100644
index 000000000000..6fd128faa157
--- /dev/null
+++ b/packages/tpetra/core/src/Tpetra_ETI_SOUT_SIN_LO_GO_NT.tmpl
@@ -0,0 +1,80 @@
+/*
+// @HEADER
+// ***********************************************************************
+//
+//          Tpetra: Templated Linear Algebra Services Package
+//                 Copyright (2008) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ************************************************************************
+// @HEADER
+*/
+
+// WARNING: This file is automatically generated by CMake, and written
+// to Trilinos' build (not source) directory.  DO NOT EDIT THIS FILE!
+// If you run CMake again, it will overwrite any changes that you
+// made.  Furthermore, it would be unwise to assume anything about
+// this file: it may disappear at any time, and its name, location, or
+// contents may change at any time.
+//
+// CMake takes each expression in the original .tmpl file enclosed by
+// "at" symbols ("a" with a circle around it), and replaces it in the
+// generated .cpp file with a string defined by Tpetra's CMake logic
+// (see CMakeLists.txt in this directory).  Thus, the original .tmpl
+// file is NOT syntactically correct C++, but the .cpp file generated
+// by running CMake on it IS a syntactically correct C++ file.
+
+#include "TpetraCore_config.h"
+
+#if defined(HAVE_TPETRA_EXPLICIT_INSTANTIATION) && defined (HAVE_TPETRA_INST_@NT_MACRO_NAME@) && defined (HAVE_TPETRA_INST_@LO_MACRO_NAME@_@GO_MACRO_NAME@) @SC_MACRO_EXPR@
+
+// We protect the contents of this file with macros, to assist
+// applications that circumvent Trilinos' build system.  (We do NOT
+// recommend this.)  That way, they can still build this file, but as
+// long as the macros have correct definitions, they won't build
+// anything that's not enabled.
+
+#include "KokkosCompat_ClassicNodeAPI_Wrapper.hpp"
+
+#include "Tpetra_@CLASS_NAME@_decl.hpp"
+#include "Tpetra_@CLASS_NAME@_def.hpp"
+#include "TpetraCore_ETIHelperMacros.h"
+
+namespace Tpetra {
+
+  TPETRA_ETI_MANGLING_TYPEDEFS()
+
+  TPETRA_@CLASS_MACRO_NAME@_INSTANT( @SOUT_MANGLED_NAME@, @SIN_MANGLED_NAME@, @LO_MANGLED_NAME@, @GO_MANGLED_NAME@, @NT_MANGLED_NAME@ )
+
+} // namespace Tpetra
+
+#endif // Whether we should build this specialization
diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp
index 388453b9f85b..4ad40a64880a 100644
--- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp
+++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp
@@ -2372,6 +2372,12 @@ namespace Tpetra {
     void
     assign (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& src);
 
+    /// \brief Return another MultiVector with the same entries, but
+    ///   converted to a different Scalar type \c T.
+    template <class T>
+    Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
+    convert () const;
+
 
     // \brief Checks to see if the local length, number of vectors and size of Scalar type match
     /// \param src [in] MultiVector
diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp
index 67d75abd349c..bd5a642286cc 100644
--- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp
+++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp
@@ -4784,6 +4784,21 @@ namespace Tpetra {
     }
   }
 
+  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+  template<class T>
+  Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
+  MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
+  convert () const
+  {
+    using Teuchos::RCP;
+
+    auto newMV = Teuchos::rcp(new MultiVector<T,LocalOrdinal,GlobalOrdinal,Node>(this->getMap(),
+                                                                                 this->getNumVectors(),
+                                                                                 /*zeroOut=*/false));
+    Tpetra::deep_copy(*newMV, *this);
+    return newMV;
+  }
+
   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
   bool
   MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
@@ -4934,4 +4949,11 @@ namespace Tpetra {
 
 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
 
+
+#define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
+  \
+  template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > >   \
+                MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
+
+
 #endif // TPETRA_MULTIVECTOR_DEF_HPP

From 642971c543459bd9cc6719a41f5a821a469acaed Mon Sep 17 00:00:00 2001
From: Christian Glusa <caglusa@sandia.gov>
Date: Thu, 22 Jul 2021 11:31:23 -0600
Subject: [PATCH 05/15] Xpetra TpetraHalfPrecisionOperator: Use Tpetra's
 convert methods

---
 .../Xpetra_TpetraHalfPrecisionOperator.hpp    | 51 +++++++++++--------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/packages/xpetra/src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp b/packages/xpetra/src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp
index 19afd8c871a4..3f6b227cfc13 100644
--- a/packages/xpetra/src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp
+++ b/packages/xpetra/src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp
@@ -50,8 +50,10 @@
 
 #include <Teuchos_ScalarTraits.hpp>
 
+#include <Tpetra_CrsMatrix.hpp>
 #include <Xpetra_TpetraOperator.hpp>
 #include <Xpetra_MultiVector.hpp>
+#include <Xpetra_CrsMatrixWrap.hpp>
 #include <Xpetra_MultiVectorFactory.hpp>
 
 namespace Xpetra {
@@ -59,35 +61,40 @@ namespace Xpetra {
   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
   RCP<Xpetra::Matrix<typename Teuchos::ScalarTraits<Scalar>::halfPrecision, LocalOrdinal, GlobalOrdinal, Node> >
   convertToHalfPrecision(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& A) {
-    typedef typename Teuchos::ScalarTraits<Scalar>::halfPrecision                                 HalfScalar;
-    typedef typename Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_type LM;
-    typedef Xpetra::Matrix<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>                            MatrixHalf;
-    typedef Xpetra::CrsMatrix<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>                         CrsHalf;
-    typedef Xpetra::TpetraCrsMatrix<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>                   tCrsHalf;
-    typedef typename CrsHalf::local_matrix_type                                                    LMhalf;
-
-    LM lclA = A->getLocalMatrix();
-    LMhalf newLclA = LMhalf("new A", lclA.graph);
-    Kokkos::deep_copy(newLclA.values, lclA.values);
-    RCP<CrsHalf> newCrsA = Teuchos::rcp(new tCrsHalf(newLclA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap()));
-    RCP<MatrixHalf> newA = Teuchos::rcp(new Xpetra::CrsMatrixWrap<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>(newCrsA));
-    newA->setObjectLabel(A->getObjectLabel());
+#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT)
+    using HalfScalar = typename Teuchos::ScalarTraits<Scalar>::halfPrecision;
+    typedef typename Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> XpCrs;
+
+    RCP<XpCrs> xpCrs = Teuchos::rcp_dynamic_cast<Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node> >(A, true)->getCrsMatrix();
+    auto tpCrs = Teuchos::rcp_dynamic_cast<Xpetra::TpetraCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >(xpCrs, true)->getTpetra_CrsMatrix();
+    auto newTpCrs = tpCrs->template convert<HalfScalar>();
+    auto newXpCrs = Teuchos::rcp(new Xpetra::TpetraCrsMatrix<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>(newTpCrs));
+    auto newA = Teuchos::rcp(new Xpetra::CrsMatrixWrap<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>(Teuchos::rcp_dynamic_cast<Xpetra::CrsMatrix<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>>(newXpCrs)));
+
     return newA;
+#else
+    TEUCHOS_TEST_FOR_EXCEPTION(true, Xpetra::Exceptions::RuntimeError,
+      "Xpetra::convertToHalfPrecision only available for Tpetra with SC=double and SC=float enabled");
+    TEUCHOS_UNREACHABLE_RETURN(false);
+#endif
   }
 
   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
   RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::halfPrecision, LocalOrdinal, GlobalOrdinal, Node> >
   convertToHalfPrecision(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& X) {
-    typedef Xpetra::TpetraMultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>    tMV;
-    typedef typename Teuchos::ScalarTraits<Scalar>::halfPrecision                HalfScalar;
-    typedef Xpetra::MultiVector<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>       MVHalf;
-    typedef Xpetra::TpetraMultiVector<HalfScalar,LocalOrdinal,GlobalOrdinal,Node> tMVHalf;
-
-    RCP<MVHalf> newX = Xpetra::MultiVectorFactory<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>::Build(X->getMap(),
-                                                                                                     X->getNumVectors());
-    Tpetra::deep_copy(*Teuchos::rcp_dynamic_cast<tMVHalf>(newX)->getTpetra_MultiVector(),
-                      *Teuchos::rcp_dynamic_cast<tMV>(X)->getTpetra_MultiVector());
+#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT)
+    using HalfScalar = typename Teuchos::ScalarTraits<Scalar>::halfPrecision;
+
+    auto tpX = toTpetra(*X);
+    auto newTpX = tpX.template convert<HalfScalar>();
+    auto newX = rcp(new Xpetra::TpetraMultiVector<HalfScalar,LocalOrdinal,GlobalOrdinal,Node>(newTpX));
+
     return newX;
+#else
+    TEUCHOS_TEST_FOR_EXCEPTION(true, Xpetra::Exceptions::RuntimeError,
+      "Xpetra::convertToHalfPrecision only available for Tpetra with SC=double and SC=float enabled");
+    TEUCHOS_UNREACHABLE_RETURN(false);
+#endif
   }
 
 /*!  @brief Wraps an existing halfer precision Xpetra::Operator as a Xpetra::Operator.

From a8c80c7102e3ebee6d3db7ac4f39576cc89da3b2 Mon Sep 17 00:00:00 2001
From: Matt Bettencourt <mbetten@sandia.gov>
Date: Fri, 23 Jul 2021 10:35:52 -0600
Subject: [PATCH 06/15] Fixed typo and warning

---
 ...TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp | 4 ++--
 ...TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp | 4 ++--
 .../TrilinosCouplings_TpetraIntrepidPoissonExample.cpp        | 4 ++--
 ...ilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp | 4 ++--
 .../examples/scaling/example_Poisson2D_p2_tpetra.cpp          | 4 ++--
 .../examples/scaling/example_Poisson2D_pn_tpetra.cpp          | 2 +-
 .../examples/scaling/example_Poisson_NoFE_Tpetra.cpp          | 4 ++--
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
index 849d8bc58ac2..bdd7ee20e347 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
@@ -1124,7 +1124,7 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the columns corresponding to Dirichlet BCs.
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
@@ -1137,7 +1137,7 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
index c3ae654bc689..b3f48ae20b93 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
@@ -1126,7 +1126,7 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the columns corresponding to Dirichlet BCs.
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
@@ -1139,7 +1139,7 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
index 43bcb0b210fc..1e3577558f7c 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
@@ -1361,7 +1361,7 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the columns corresponding to Dirichlet BCs.
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
@@ -1374,7 +1374,7 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
index 32f34449273e..4f01bf936f0e 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
@@ -1118,7 +1118,7 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the columns corresponding to Dirichlet BCs.
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
@@ -1131,7 +1131,7 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp
index 7d25c5bdae95..20c623736557 100644
--- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp
+++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp
@@ -2301,8 +2301,8 @@ void Apply_Dirichlet_BCs(std::vector<int> &BCNodes, crs_matrix_type & A, multive
     typename crs_matrix_type::nonconst_values_host_view_type vals("vals", numEntriesInRow);
     A.getLocalRowCopy(lrid, cols, vals, numEntriesInRow);
 
-    for(int j=0; j<vals.size(); j++)
-      vals[j] = (cols[j] == lrid) ? 1.0 : 0.0;
+    for(size_t j=0; j<vals.extent(0); j++)
+      vals(j) = (cols(j) == lrid) ? 1.0 : 0.0;
 
     A.replaceLocalValues(lrid, cols, vals);
   }
diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp
index 407afd6ad0c2..47342b258505 100644
--- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp
+++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp
@@ -2952,7 +2952,7 @@ void Apply_Dirichlet_BCs(std::vector<int> &BCNodes, crs_matrix_type & A, multive
     typename crs_matrix_type::nonconst_values_host_view_type vals("vals", numEntriesInRow);
     A.getLocalRowCopy(lrid, cols, vals, numEntriesInRow);
     
-    for(int j=0; j<vals.size(); j++)
+    for(size_t j=0; j<vals.extent(0); j++)
       vals(j) = (cols(j) == lrid) ? 1.0 : 0.0;
 
     A.replaceLocalValues(lrid, cols, vals);
diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
index 45f6df718426..123326496ec4 100644
--- a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
+++ b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
@@ -1248,7 +1248,7 @@ int main(int argc, char *argv[]) {
     /* Zero the columns */
     for (size_t i=0; i < gl_StiffMatrixT->getNodeNumRows(); i++) {
        NumEntries = gl_StiffMatrixT->getNumEntriesInLocalRow(i);
-       typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+       typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
        typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
        gl_StiffMatrixT->getLocalRowCopy(i, indices, values, NumEntries);
        //Matrix.ExtractMyRowView(i,numEntries,vals,cols);
@@ -1263,7 +1263,7 @@ int main(int argc, char *argv[]) {
     /* Zero the rows, add ones to diagonal */
     for (size_t i = 0; i<(size_t)numBCNodes; i++) {
       NumEntries = gl_StiffMatrixT->getNumEntriesInLocalRow(BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indicies", NumEntries);
+      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
       gl_StiffMatrixT->getLocalRowCopy(BCNodes[i], indices, values, NumEntries);
       int globalRow = gl_StiffMatrixT->getRowMap()->getGlobalElement(BCNodes[i]);

From 0a43b1fd1d70df62e7e18d919ab55c6b14ff09da Mon Sep 17 00:00:00 2001
From: Matt Bettencourt <mbetten@sandia.gov>
Date: Fri, 23 Jul 2021 11:05:06 -0600
Subject: [PATCH 07/15] Fixed review comments

---
 ...s_TpetraIntrepidHybridPoisson2DExample.cpp | 10 ++++---
 ...s_TpetraIntrepidHybridPoisson3DExample.cpp | 10 ++++---
 ...Couplings_TpetraIntrepidPoissonExample.cpp | 10 ++++---
 ...TpetraIntrepidStructuredPoissonExample.cpp | 10 ++++---
 .../scaling/example_Poisson_NoFE_Tpetra.cpp   | 28 ++++++++++---------
 5 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
index bdd7ee20e347..ef6aa9730e99 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp
@@ -1122,10 +1122,12 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     size_t NumEntries = 0;
 
     // Zero the columns corresponding to Dirichlet BCs.
+    typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", 1);
+    typename sparse_matrix_type::nonconst_values_host_view_type values("values", 1);
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
         if (myColsToZeroArrayRCP[indices(j)] == 1)
@@ -1137,8 +1139,8 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
       const LO localCol = gl_StiffMatrix->getColMap ()->getLocalElement (globalRow);
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
index b3f48ae20b93..611aef9d0761 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp
@@ -1124,10 +1124,12 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     size_t NumEntries = 0;
 
     // Zero the columns corresponding to Dirichlet BCs.
+    typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", 1);
+    typename sparse_matrix_type::nonconst_values_host_view_type values("values", 1);
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
         if (myColsToZeroArrayRCP[indices(j)] == 1)
@@ -1139,8 +1141,8 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
       const LO localCol = gl_StiffMatrix->getColMap ()->getLocalElement (globalRow);
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
index 1e3577558f7c..b115b90b0b79 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp
@@ -1359,10 +1359,12 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     size_t NumEntries = 0;
 
     // Zero the columns corresponding to Dirichlet BCs.
+    typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", 1);
+    typename sparse_matrix_type::nonconst_values_host_view_type values("values", 1);
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
         if (myColsToZeroArrayRCP[indices(j)] == 1)
@@ -1374,8 +1376,8 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
       const LO localCol = gl_StiffMatrix->getColMap ()->getLocalElement (globalRow);
diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
index 4f01bf936f0e..d7b32417b4c2 100644
--- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
+++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp
@@ -1116,10 +1116,12 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     size_t NumEntries = 0;
 
     // Zero the columns corresponding to Dirichlet BCs.
+    typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", 1);
+    typename sparse_matrix_type::nonconst_values_host_view_type values("values", 1);
     for (LO i = 0; i < as<int> (gl_StiffMatrix->getNodeNumRows ()); ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (i);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrix->getLocalRowCopy (i, indices, values, NumEntries);
       for (int j = 0; j < as<int> (NumEntries); ++j) {
         if (myColsToZeroArrayRCP[indices(j)] == 1)
@@ -1131,8 +1133,8 @@ makeMatrixAndRightHandSide (Teuchos::RCP<sparse_matrix_type>& A,
     // Zero the rows and add ones to diagonal.
     for (int i = 0; i < numBCNodes; ++i) {
       NumEntries = gl_StiffMatrix->getNumEntriesInLocalRow (BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrix->getLocalRowCopy (BCNodes[i], indices, values, NumEntries);
       const GO globalRow = gl_StiffMatrix->getRowMap ()->getGlobalElement (BCNodes[i]);
       const LO localCol = gl_StiffMatrix->getColMap ()->getLocalElement (globalRow);
diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
index 123326496ec4..9f99bfe84663 100644
--- a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
+++ b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp
@@ -1246,25 +1246,27 @@ int main(int argc, char *argv[]) {
     Teuchos::ArrayRCP<const int> myColsToZeroArrayRCP = myColsToZeroT->getData(0);
     size_t NumEntries = 0;
     /* Zero the columns */
+    typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", 1);
+    typename sparse_matrix_type::nonconst_values_host_view_type values("values", 1);
     for (size_t i=0; i < gl_StiffMatrixT->getNodeNumRows(); i++) {
-       NumEntries = gl_StiffMatrixT->getNumEntriesInLocalRow(i);
-       typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-       typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
-       gl_StiffMatrixT->getLocalRowCopy(i, indices, values, NumEntries);
-       //Matrix.ExtractMyRowView(i,numEntries,vals,cols);
-       for (size_t j=0; j < NumEntries; j++){
-          //Teuchos::ArrayRCP<const int> myColsToZeroj = myColsToZeroT->getData();
-	 if (myColsToZeroArrayRCP[indices(j)] == 1)
-	   values(j) = 0.0;
-       }
-       gl_StiffMatrixT->replaceLocalValues(i, indices, values);
+      NumEntries = gl_StiffMatrixT->getNumEntriesInLocalRow(i);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
+      gl_StiffMatrixT->getLocalRowCopy(i, indices, values, NumEntries);
+      //Matrix.ExtractMyRowView(i,numEntries,vals,cols);
+      for (size_t j=0; j < NumEntries; j++){
+	//Teuchos::ArrayRCP<const int> myColsToZeroj = myColsToZeroT->getData();
+	if (myColsToZeroArrayRCP[indices(j)] == 1)
+	  values(j) = 0.0;
+      }
+      gl_StiffMatrixT->replaceLocalValues(i, indices, values);
     }/*end for*/
 
     /* Zero the rows, add ones to diagonal */
     for (size_t i = 0; i<(size_t)numBCNodes; i++) {
       NumEntries = gl_StiffMatrixT->getNumEntriesInLocalRow(BCNodes[i]);
-      typename sparse_matrix_type::nonconst_local_inds_host_view_type indices("indices", NumEntries);
-      typename sparse_matrix_type::nonconst_values_host_view_type values("values", NumEntries);
+      Kokkos::resize(indices, NumEntries);
+      Kokkos::resize(values, NumEntries);
       gl_StiffMatrixT->getLocalRowCopy(BCNodes[i], indices, values, NumEntries);
       int globalRow = gl_StiffMatrixT->getRowMap()->getGlobalElement(BCNodes[i]);
       int localCol = gl_StiffMatrixT->getColMap()->getLocalElement(globalRow);

From 7524ee92f3509c083ce8f1dd3404463e1fce438a Mon Sep 17 00:00:00 2001
From: Christian Glusa <caglusa@sandia.gov>
Date: Thu, 22 Jul 2021 14:54:27 -0600
Subject: [PATCH 08/15] Tpetra: Remove failing check in CrsMatrix::convert

---
 packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
index 214c06e43378..6b89763912da 100644
--- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
+++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
@@ -5565,11 +5565,6 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
        "of the conversion) is not fill complete.  You must first call "
        "fillComplete() (possibly with the domain and range Map) without an "
        "intervening call to resumeFill(), before you may call this method.");
-    TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
-      (! this->isStaticGraph (), std::logic_error, "This matrix (the source "
-       "of the conversion) claims to be fill complete, but does not have a "
-       "static (i.e., constant) graph.  Please report this bug to the Tpetra "
-       "developers.");
 
     RCP<output_matrix_type> newMatrix
       (new output_matrix_type (this->getCrsGraph ()));

From 2e8770b0366c7157b186689554187138ca9651ed Mon Sep 17 00:00:00 2001
From: Christian Glusa <caglusa@sandia.gov>
Date: Thu, 22 Jul 2021 16:06:59 -0600
Subject: [PATCH 09/15] Tpetra: Add MixedScalarMultiplyOp

---
 .../Thyra_MueLuPreconditionerFactory_def.hpp  |   4 +
 .../core/src/Tpetra_MixedScalarMultiplyOp.hpp | 189 ++++++++++++++++++
 packages/tpetra/core/test/CMakeLists.txt      |   3 +-
 .../tpetra/core/test/Operator/CMakeLists.txt  |  17 ++
 .../MixedScalarMultiplyOp_UnitTests.cpp       | 185 +++++++++++++++++
 5 files changed, 397 insertions(+), 1 deletion(-)
 create mode 100644 packages/tpetra/core/src/Tpetra_MixedScalarMultiplyOp.hpp
 create mode 100644 packages/tpetra/core/test/Operator/CMakeLists.txt
 create mode 100644 packages/tpetra/core/test/Operator/MixedScalarMultiplyOp_UnitTests.cpp

diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_def.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_def.hpp
index 0b484635f452..ca8ad2b2a623 100644
--- a/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_def.hpp
+++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_def.hpp
@@ -384,6 +384,10 @@ namespace Thyra {
       if (useHalfPrecision) {
 #if defined(HAVE_MUELU_TPETRA) && defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT)
 
+        // CAG: There is nothing special about the combination double-float,
+        //      except that I feel somewhat confident that Trilinos builds
+        //      with both scalar types.
+
         // convert to half precision
         RCP<XphMat> halfA = Xpetra::convertToHalfPrecision(A);
         const std::string userName = "user data";
diff --git a/packages/tpetra/core/src/Tpetra_MixedScalarMultiplyOp.hpp b/packages/tpetra/core/src/Tpetra_MixedScalarMultiplyOp.hpp
new file mode 100644
index 000000000000..0079e8f736f4
--- /dev/null
+++ b/packages/tpetra/core/src/Tpetra_MixedScalarMultiplyOp.hpp
@@ -0,0 +1,189 @@
+// @HEADER
+// ***********************************************************************
+//
+//          Tpetra: Templated Linear Algebra Services Package
+//                 Copyright (2008) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
+//
+// ************************************************************************
+// @HEADER
+
+#ifndef TPETRA_MIXEDSCALARMULTIPLYOP_HPP
+#define TPETRA_MIXEDSCALARMULTIPLYOP_HPP
+
+/// \file Tpetra_MixedScalarMultiplyOp.hpp
+///
+/// Declaration and definition of Tpetra::MixedScalarMultiplyOp and its
+/// nonmember constructor Tpetra::createMixedScalarMatrixMultiplyOp.
+
+#include "Tpetra_Operator.hpp"
+#include "Tpetra_Util.hpp"
+#include "Tpetra_Details_Behavior.hpp"
+#include "Tpetra_Details_Profiling.hpp"
+
+namespace Tpetra {
+
+  /// \brief A class for wrapping an Operator of one Scalar type into
+  ///        an Operator of another Scalar type.
+  ///
+  /// \tparam Scalar The type of the entries of the input and output
+  ///   MultiVector (see apply()).  Same as the first template
+  ///   parameter of Operator.
+  ///
+  /// \tparam OpScalar The type of the entries of the wrapped Operator.
+  ///
+  /// \tparam LocalOrdinal The second template parameter of Operator.
+  ///
+  /// \tparam GlobalOrdinal The third template parameter of Operator.
+  ///
+  /// \tparam Node The fourth template parameter of Operator.
+  template <class Scalar,
+            class OpScalar,
+            class LocalOrdinal,
+            class GlobalOrdinal,
+            class Node>
+  class MixedScalarMultiplyOp :
+    public Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>
+  {
+  public:
+    //! The specialization of CrsMatrix which this class wraps.
+    using op_type =
+      Operator<OpScalar, LocalOrdinal, GlobalOrdinal, Node>;
+    //! The specialization of Map which this class uses.
+    using map_type = Map<LocalOrdinal, GlobalOrdinal, Node>;
+
+  public:
+    //! @name Constructor and destructor
+    //@{
+
+    /// \brief Constructor
+    ///
+    /// \param A [in] The Operator to wrap as an
+    ///   <tt>Operator<Scalar, ...></tt>.
+    MixedScalarMultiplyOp (const Teuchos::RCP<const op_type>& A) :
+      op_ (A)
+    {
+      Allocate(1);
+    }
+
+    //! Destructor (virtual for memory safety of derived classes).
+    ~MixedScalarMultiplyOp () override = default;
+
+    //@}
+    //! @name Methods implementing Operator
+    //@{
+
+    /// \brief Compute <tt>Y = beta*Y + alpha*Op(A)*X</tt>, where
+    ///   <tt>Op(A)</tt> is either A, \f$A^T\f$, or \f$A^H\f$.
+    void
+    apply (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X,
+           MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y,
+           Teuchos::ETransp mode = Teuchos::NO_TRANS,
+           Scalar alpha = Teuchos::ScalarTraits<Scalar>::one (),
+           Scalar beta = Teuchos::ScalarTraits<Scalar>::zero ()) const override
+    {
+      TEUCHOS_TEST_FOR_EXCEPTION
+        (X.getNumVectors () != Y.getNumVectors (), std::runtime_error,
+         Teuchos::typeName (*this) << "::apply(X,Y): X and Y must have the same number of vectors.");
+
+      if (X.getNumVectors() != X_->getNumVectors()) {
+        Allocate(X.getNumVectors());
+      }
+
+      Tpetra::deep_copy(*X_, X);
+      op_->apply (*X_, *Y_, mode, alpha, beta);
+      Tpetra::deep_copy(Y, *Y_);
+    }
+
+    /// \brief Whether this Operator's apply() method can apply the
+    ///   transpose or conjugate transpose.
+    ///
+    /// This is always true, since it is true for the CrsMatrix that
+    /// this object wraps.
+    bool hasTransposeApply() const override {
+      return true;
+    }
+
+    //! The domain Map of this Operator.
+    Teuchos::RCP<const map_type> getDomainMap () const override {
+      return op_->getDomainMap ();
+    }
+
+    //! The range Map of this Operator.
+    Teuchos::RCP<const map_type> getRangeMap () const override {
+      return op_->getRangeMap ();
+    }
+
+    //@}
+
+  protected:
+    typedef MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> MV;
+
+    //! The underlying CrsMatrix object.
+    const Teuchos::RCP<const op_type> op_;
+    mutable Teuchos::RCP<MV> X_, Y_;
+
+    void
+    Allocate(int numVecs) const {
+      X_ = rcp(new MV(op_->getDomainMap(),numVecs));
+      Y_ = rcp(new MV(op_->getRangeMap(),numVecs));
+    }
+
+  };
+
+  /// \brief Non-member function to create a MixedScalarMultiplyOp.
+  /// \relatesalso MixedScalarMultiplyOp
+  ///
+  /// The function has the same template parameters of MixedScalarMultiplyOp.
+  ///
+  /// \param A [in] The Operator instance to wrap in an MixedScalarMultiplyOp.
+  /// \return The MixedScalarMultiplyOp wrapper for the given Operator.
+  template <class Scalar,
+            class OpScalar,
+            class LocalOrdinal,
+            class GlobalOrdinal,
+            class Node>
+  Teuchos::RCP<
+    MixedScalarMultiplyOp<Scalar, OpScalar, LocalOrdinal, GlobalOrdinal, Node> >
+  createMixedScalarMultiplyOp (const Teuchos::RCP<
+    const Operator<OpScalar, LocalOrdinal, GlobalOrdinal, Node> >& A)
+  {
+    typedef MixedScalarMultiplyOp<Scalar, OpScalar, LocalOrdinal,
+      GlobalOrdinal, Node> op_type;
+    return Teuchos::rcp (new op_type (A));
+  }
+
+} // end of namespace Tpetra
+
+#endif // TPETRA_MIXEDSCALARMULTIPLYOP_HPP
diff --git a/packages/tpetra/core/test/CMakeLists.txt b/packages/tpetra/core/test/CMakeLists.txt
index 88df13f2e6c0..b44e99526514 100644
--- a/packages/tpetra/core/test/CMakeLists.txt
+++ b/packages/tpetra/core/test/CMakeLists.txt
@@ -9,7 +9,7 @@ ADD_SUBDIRECTORIES(
   Block
   BugTests
   Comm
-  Core  
+  Core
   CrsGraph
   CrsMatrix
   Directory
@@ -27,6 +27,7 @@ ADD_SUBDIRECTORIES(
   Merge
   MultiVector
   Node
+  Operator
   PerformanceCGSolve
   Sort
   Utils
diff --git a/packages/tpetra/core/test/Operator/CMakeLists.txt b/packages/tpetra/core/test/Operator/CMakeLists.txt
new file mode 100644
index 000000000000..56c8cc276627
--- /dev/null
+++ b/packages/tpetra/core/test/Operator/CMakeLists.txt
@@ -0,0 +1,17 @@
+SET(ARGS "--filedir=${CMAKE_CURRENT_BINARY_DIR}/")
+
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+  MixedScalarMultiplyOp_UnitTests
+  SOURCES
+    MixedScalarMultiplyOp_UnitTests
+    ${TEUCHOS_STD_UNIT_TEST_MAIN}
+  ARGS ""
+  COMM serial mpi
+  STANDARD_PASS_OUTPUT
+  )
+
+
+SET(TIMING_INSTALLS "")
+
+INSTALL(TARGETS ${TIMING_INSTALLS}
+        RUNTIME DESTINATION "${${PROJECT_NAME}_INSTALL_RUNTIME_DIR}")
diff --git a/packages/tpetra/core/test/Operator/MixedScalarMultiplyOp_UnitTests.cpp b/packages/tpetra/core/test/Operator/MixedScalarMultiplyOp_UnitTests.cpp
new file mode 100644
index 000000000000..087dda0ad9b7
--- /dev/null
+++ b/packages/tpetra/core/test/Operator/MixedScalarMultiplyOp_UnitTests.cpp
@@ -0,0 +1,185 @@
+/*
+// @HEADER
+// ***********************************************************************
+//
+//          Tpetra: Templated Linear Algebra Services Package
+//                 Copyright (2008) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
+//
+// ************************************************************************
+// @HEADER
+*/
+
+#include "Tpetra_TestingUtilities.hpp"
+#include "Tpetra_CrsMatrix.hpp"
+#include "Tpetra_MixedScalarMultiplyOp.hpp"
+#include "Tpetra_MultiVector.hpp"
+#include "Tpetra_Details_getNumDiags.hpp"
+#include "Tpetra_Details_residual.hpp"
+
+namespace { // (anonymous)
+
+  using Tpetra::TestingUtilities::getDefaultComm;
+  using Tpetra::createContigMapWithNode;
+  using Teuchos::RCP;
+  using Teuchos::ArrayRCP;
+  using Teuchos::rcp;
+  using Teuchos::outArg;
+  using Teuchos::Comm;
+  using Teuchos::Array;
+  using Teuchos::ArrayView;
+  using Teuchos::tuple;
+  using Teuchos::NO_TRANS;
+  //using Teuchos::TRANS;
+  using Teuchos::CONJ_TRANS;
+  using std::endl;
+  typedef Tpetra::global_size_t GST;
+
+
+  //
+  // UNIT TESTS
+  //
+
+  ////
+  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MixedScalarMultiplyOp, Apply, LO, GO, Scalar, Node )
+  {
+    typedef Tpetra::CrsMatrix<Scalar,LO,GO,Node> MAT;
+    typedef Tpetra::Operator<Scalar,LO,GO,Node> OP;
+    typedef Teuchos::ScalarTraits<Scalar> ST;
+    typedef Tpetra::MultiVector<Scalar,LO,GO,Node> MV;
+    typedef typename ST::magnitudeType Mag;
+    typedef Teuchos::ScalarTraits<Mag> MT;
+    const size_t ONE = Teuchos::OrdinalTraits<size_t>::one();
+    const GST INVALID = Teuchos::OrdinalTraits<GST>::invalid();
+    // get a comm
+    RCP<const Comm<int> > comm = getDefaultComm();
+    const size_t numImages = comm->getSize();
+    const size_t myImageID = comm->getRank();
+    if (numImages < 2) return;
+    // create a Map
+    RCP<const Tpetra::Map<LO,GO,Node> > map =
+      createContigMapWithNode<LO,GO,Node>(INVALID,ONE,comm);
+    // create a multivector ones(n,1)
+    MV ones(map,ONE,false), threes1(map,ONE,false), threes2(map,ONE,false);
+    ones.putScalar(ST::one());
+    /* create the following matrix:
+       [2 1           ]
+       [1 1 1         ]
+       [  1 1 1       ]
+       [   . . .      ]
+       [     . . .    ]
+       [       . . .  ]
+       [         1 1 1]
+       [           1 2]
+     this matrix has an eigenvalue lambda=3, with eigenvector v = [1 ... 1]
+    */
+    MAT A(map,3);
+    if (myImageID == 0) {
+      Array<Scalar> vals(tuple<Scalar>(static_cast<Scalar>(2)*ST::one(), ST::one()));
+      Array<GO> cols(tuple<GO>(myImageID, myImageID+1));
+      A.insertGlobalValues(myImageID,cols(),vals());
+    }
+    else if (myImageID == numImages-1) {
+      Array<Scalar> vals(tuple<Scalar>(ST::one(), static_cast<Scalar>(2)*ST::one()));
+      Array<GO> cols(tuple<GO>(myImageID-1,myImageID));
+      A.insertGlobalValues(myImageID,cols(),vals());
+    }
+    else {
+      Array<Scalar> vals(3,ST::one());
+      Array<GO> cols(tuple<GO>(myImageID-1, myImageID, myImageID+1));
+      A.insertGlobalValues(myImageID,cols(),vals());
+    }
+    A.fillComplete();
+
+    // Now, wrap A into a MixedScalarMultiplyOp. We generally build
+    // only with a single scalar type, in which case we test wrapping
+    // a scalar operator as a scalar operator.
+    // But if we do have double and float, we can convert A to a float
+    // matrix and then wrap it back to double.
+    RCP<OP> mixOpA;
+#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT)
+    if (typeid(Scalar) == typeid(double)) {
+      typedef Tpetra::MixedScalarMultiplyOp<double,float,LO,GO,Node> MixedOp;
+      mixOpA = Teuchos::rcp_dynamic_cast<OP>(rcp(new MixedOp(A.template convert<float>())),true);
+    } else
+#endif
+    {
+      typedef Tpetra::MixedScalarMultiplyOp<Scalar,Scalar,LO,GO,Node> MixedOp;
+      mixOpA = rcp(new MixedOp(rcpFromRef(A)));
+    }
+
+    // test the properties of mixOpA
+    TEST_EQUALITY_CONST(mixOpA->getDomainMap()->isSameAs(*A.getDomainMap()), true);
+    TEST_EQUALITY_CONST(mixOpA->getRangeMap()->isSameAs(*A.getRangeMap()), true);
+
+    // test the action
+    threes1.randomize();
+    threes2.randomize();
+    A.apply(ones,threes1);
+    mixOpA->apply(ones,threes2);
+
+    // Both vectors should now be all threes.
+    // Check threes2 against threes1.
+    threes2.update(-ST::one(),threes1,ST::one());
+    Array<Mag> norms(1), zeros(1,MT::zero());
+    threes2.norm1(norms());
+    if (ST::isOrdinal) {
+      TEST_COMPARE_ARRAYS(norms,zeros);
+    } else {
+      TEST_COMPARE_FLOATING_ARRAYS(norms,zeros,MT::zero());
+    }
+
+    // now, threes1 should be 3*ones
+    threes1.update(static_cast<Scalar>(-3)*ST::one(),ones,ST::one());
+    threes1.norm1(norms());
+    if (ST::isOrdinal) {
+      TEST_COMPARE_ARRAYS(norms,zeros);
+    } else {
+      TEST_COMPARE_FLOATING_ARRAYS(norms,zeros,MT::zero());
+    }
+  }
+
+//
+// INSTANTIATIONS
+//
+
+// FIXME_SYCL
+#define UNIT_TEST_GROUP( SCALAR, LO, GO, NODE ) \
+      TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MixedScalarMultiplyOp, Apply,  LO, GO, SCALAR, NODE )
+
+  TPETRA_ETI_MANGLING_TYPEDEFS()
+
+  TPETRA_INSTANTIATE_SLGN_NO_ORDINAL_SCALAR( UNIT_TEST_GROUP )
+
+}

From 4ed5ec54c4c5c715a8217f7fad302678c0a5d802 Mon Sep 17 00:00:00 2001
From: Christian Glusa <caglusa@sandia.gov>
Date: Thu, 22 Jul 2021 14:53:44 -0600
Subject: [PATCH 10/15] Stratimikos & Ifpack2: Add half-precision
 preconditioners, test Ifpack2 adapter

---
 ...Thyra_Ifpack2PreconditionerFactory_def.hpp |  69 +++-
 packages/stratimikos/test/CMakeLists.txt      |  36 +-
 packages/stratimikos/test/galeri_driver.cpp   | 318 ++++++++++++++++++
 .../stratimikos/test/stratimikos_jacobi.xml   |  83 +++++
 .../test/stratimikos_jacobi_half.xml          |  84 +++++
 5 files changed, 579 insertions(+), 11 deletions(-)
 create mode 100644 packages/stratimikos/test/galeri_driver.cpp
 create mode 100644 packages/stratimikos/test/stratimikos_jacobi.xml
 create mode 100644 packages/stratimikos/test/stratimikos_jacobi_half.xml

diff --git a/packages/ifpack2/adapters/thyra/Thyra_Ifpack2PreconditionerFactory_def.hpp b/packages/ifpack2/adapters/thyra/Thyra_Ifpack2PreconditionerFactory_def.hpp
index a917da8927c8..1f7999db0e85 100644
--- a/packages/ifpack2/adapters/thyra/Thyra_Ifpack2PreconditionerFactory_def.hpp
+++ b/packages/ifpack2/adapters/thyra/Thyra_Ifpack2PreconditionerFactory_def.hpp
@@ -53,6 +53,7 @@
 #include "Ifpack2_Parameters.hpp"
 
 #include "Tpetra_RowMatrix.hpp"
+#include "Tpetra_MixedScalarMultiplyOp.hpp"
 
 #include "Teuchos_TestForException.hpp"
 #include "Teuchos_Assert.hpp"
@@ -160,6 +161,10 @@ void Ifpack2PreconditionerFactory<MatrixType>::initializePrec(
 
   // Process parameter list
 
+  bool useHalfPrecision = false;
+  if (paramList_->isParameter("half precision"))
+    useHalfPrecision = paramList_->get<bool>("half precision");
+
   Teuchos::RCP<const Teuchos::ParameterList> constParamList = paramList_;
   if (constParamList.is_null ()) {
     constParamList = getValidParameters ();
@@ -195,11 +200,41 @@ void Ifpack2PreconditionerFactory<MatrixType>::initializePrec(
   }
   timer.start(true);
 
+  Teuchos::RCP<LinearOpBase<scalar_type> > thyraPrecOp;
+
   typedef Ifpack2::Preconditioner<scalar_type, local_ordinal_type, global_ordinal_type, node_type> Ifpack2Prec;
-  typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type,
-    global_ordinal_type, node_type> row_matrix_type;
-  const Teuchos::RCP<Ifpack2Prec> concretePrecOp =
-    Ifpack2::Factory::create<row_matrix_type> (preconditionerType, tpetraFwdMatrix);
+  Teuchos::RCP<Ifpack2Prec> concretePrecOp;
+
+#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT)
+  // CAG: There is nothing special about the combination double-float,
+  //      except that I feel somewhat confident that Trilinos builds
+  //      with both scalar types.
+  typedef typename Teuchos::ScalarTraits<scalar_type>::halfPrecision half_scalar_type;
+  typedef Ifpack2::Preconditioner<half_scalar_type, local_ordinal_type, global_ordinal_type, node_type> HalfIfpack2Prec;
+  Teuchos::RCP<HalfIfpack2Prec> concretePrecOpHalf;
+#endif
+
+  if (useHalfPrecision) {
+#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT)
+    if (Teuchos::nonnull(out) && Teuchos::includesVerbLevel(verbLevel, Teuchos::VERB_LOW)) {
+      Teuchos::OSTab(out).o() << "> Creating half precision preconditioner\n";
+    }
+
+
+    typedef Tpetra::RowMatrix<half_scalar_type, local_ordinal_type,
+                              global_ordinal_type, node_type> row_matrix_type;
+    auto tpetraFwdMatrixHalf = tpetraFwdMatrix->template convert<half_scalar_type>();
+    concretePrecOpHalf =
+      Ifpack2::Factory::create<row_matrix_type> (preconditionerType, tpetraFwdMatrixHalf);
+#else
+    TEUCHOS_TEST_FOR_EXCEPT(true);
+#endif
+  } else {
+    typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type,
+                              global_ordinal_type, node_type> row_matrix_type;
+    concretePrecOp =
+      Ifpack2::Factory::create<row_matrix_type> (preconditionerType, tpetraFwdMatrix);
+  }
 
   timer.stop();
   if (Teuchos::nonnull(out) && Teuchos::includesVerbLevel(verbLevel, Teuchos::VERB_LOW)) {
@@ -208,13 +243,25 @@ void Ifpack2PreconditionerFactory<MatrixType>::initializePrec(
 
   // Initialize and compute the initial preconditioner
 
-  concretePrecOp->setParameters(*packageParamList);
-  concretePrecOp->initialize();
-  concretePrecOp->compute();
+  if (useHalfPrecision) {
+#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT)
+    concretePrecOpHalf->setParameters(*packageParamList);
+    concretePrecOpHalf->initialize();
+    concretePrecOpHalf->compute();
 
-  // Wrap concrete preconditioner
+    RCP<TpetraLinOp> wrappedOp = rcp(new Tpetra::MixedScalarMultiplyOp<scalar_type,half_scalar_type,local_ordinal_type,global_ordinal_type,node_type>(concretePrecOpHalf));
+
+    thyraPrecOp = Thyra::createLinearOp(wrappedOp);
+#endif
+  } else {
+    concretePrecOp->setParameters(*packageParamList);
+    concretePrecOp->initialize();
+    concretePrecOp->compute();
+
+    // Wrap concrete preconditioner
+    thyraPrecOp = Thyra::createLinearOp(Teuchos::RCP<TpetraLinOp>(concretePrecOp));
+  }
 
-  const Teuchos::RCP<LinearOpBase<scalar_type> > thyraPrecOp = Thyra::createLinearOp(Teuchos::RCP<TpetraLinOp>(concretePrecOp));
   defaultPrec->initializeUnspecified(thyraPrecOp);
 
   totalTimer.stop();
@@ -321,6 +368,10 @@ Ifpack2PreconditionerFactory<MatrixType>::getValidParameters() const
       "Number of rows/columns overlapped between subdomains in different"
       "\nprocesses in the additive Schwarz-type domain-decomposition preconditioners."
       );
+    validParamList->set(
+      "half precision", false,
+      "Whether a half-precision preconditioner should be built."
+      );
     Teuchos::ParameterList &packageParamList = validParamList->sublist(
       "Ifpack2 Settings", false,
       "Preconditioner settings that are passed onto the Ifpack preconditioners themselves."
diff --git a/packages/stratimikos/test/CMakeLists.txt b/packages/stratimikos/test/CMakeLists.txt
index 45ef0d5a10ec..266b44a52909 100644
--- a/packages/stratimikos/test/CMakeLists.txt
+++ b/packages/stratimikos/test/CMakeLists.txt
@@ -6,7 +6,7 @@ TRIBITS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${PACKAGE_SOURC
 ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_Amesos)
 IF (${PACKAGE_NAME}_ENABLE_Belos)
    TRIBITS_ADD_EXECUTABLE_AND_TEST(
-     Belos_GCRODR_strattest 
+     Belos_GCRODR_strattest
      SOURCES
        test_belos_gcrodr.cpp
        ${TEUCHOS_STD_UNIT_TEST_MAIN}
@@ -56,7 +56,7 @@ IF (${PACKAGE_NAME}_ENABLE_Amesos)
   TRIBITS_ADD_TEST(
     test_single_stratimikos_solver_driver
     NAME test_single_stratimikos_solver_driver_amesos
-    ARGS 
+    ARGS
       "--input-file=FourByFour.xml --show-timer-summary"
       "--input-file=FourByFour.amesos.xml --show-timer-summary"
     COMM serial mpi
@@ -165,9 +165,41 @@ ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_ThyraTpetraAdapters)
 ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_Tpetra)
 IF (${PROJECT_NAME}_ENABLE_Tpetra)
   ASSERT_DEFINED(Tpetra_INST_DOUBLE)
+  ASSERT_DEFINED(Tpetra_INST_FLOAT)
   ASSERT_DEFINED(Tpetra_INST_INT_INT)
 ENDIF()
 
+IF (${PACKAGE_NAME}_ENABLE_Ifpack2 AND ${PACKAGE_NAME}_ENABLE_ThyraTpetraAdapters AND Tpetra_INST_DOUBLE)
+
+  TRIBITS_COPY_FILES_TO_BINARY_DIR(Stratimikos_cp
+    SOURCE_FILES stratimikos_jacobi.xml stratimikos_jacobi_half.xml
+    )
+
+  TRIBITS_ADD_EXECUTABLE(
+   GaleriDriver
+   SOURCES galeri_driver.cpp
+   COMM serial mpi
+   )
+
+ TRIBITS_ADD_TEST(
+   GaleriDriver
+   NAME "Galeri_Jacobi"
+   ARGS "--xml=stratimikos_jacobi.xml"
+   NUM_MPI_PROCS 4
+   )
+
+ IF (Tpetra_INST_FLOAT)
+   TRIBITS_ADD_TEST(
+     GaleriDriver
+     NAME "Galeri_Jacobi_HalfPrecision"
+     ARGS "--xml=stratimikos_jacobi_half.xml"
+     NUM_MPI_PROCS 4
+     )
+ ENDIF()
+
+ENDIF ()
+
+
 IF (${PACKAGE_NAME}_ENABLE_Ifpack2 AND ${PACKAGE_NAME}_ENABLE_ThyraTpetraAdapters AND Tpetra_INST_DOUBLE AND Tpetra_INST_INT_INT)
  TRIBITS_ADD_EXECUTABLE(
    issue_535
diff --git a/packages/stratimikos/test/galeri_driver.cpp b/packages/stratimikos/test/galeri_driver.cpp
new file mode 100644
index 000000000000..e820c0cf44d5
--- /dev/null
+++ b/packages/stratimikos/test/galeri_driver.cpp
@@ -0,0 +1,318 @@
+/*@HEADER
+// ***********************************************************************
+//
+//       Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
+//                 Copyright (2009) Sandia Corporation
+//
+// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+// license for use of this work by or on behalf of the U.S. Government.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
+//
+// ***********************************************************************
+//@HEADER
+*/
+#include <iostream>
+
+/*
+   Call Ifpack2 via the Stratimikos interface.
+
+Usage:
+./Ifpack2_Stratimikos.exe : use xml configuration file stratimikos_ParameterList.xml
+
+Note:
+The source code is not MueLu specific and can be used with any Stratimikos strategy.
+*/
+
+// Teuchos includes
+#include <Teuchos_ConfigDefs.hpp>
+#include <Teuchos_GlobalMPISession.hpp>
+#include <Teuchos_StackedTimer.hpp>
+#include <Teuchos_RCP.hpp>
+#include <Teuchos_XMLParameterListHelpers.hpp>
+#include <Teuchos_YamlParameterListHelpers.hpp>
+#include <Teuchos_StandardCatchMacros.hpp>
+#include "Teuchos_AbstractFactoryStd.hpp"
+
+// Thyra includes
+#include <Thyra_LinearOpWithSolveBase.hpp>
+#include <Thyra_VectorBase.hpp>
+#include <Thyra_SolveSupportTypes.hpp>
+
+// Stratimikos includes
+#include <Stratimikos_DefaultLinearSolverBuilder.hpp>
+
+// Xpetra include
+#include <Xpetra_Parameters.hpp>
+#include <Xpetra_ThyraUtils.hpp>
+
+// Galeri includes
+#include <Galeri_XpetraParameters.hpp>
+#include <Galeri_XpetraProblemFactory.hpp>
+#include <Galeri_XpetraUtils.hpp>
+#include <Galeri_XpetraMaps.hpp>
+
+
+// Ifpack2 includes
+#include <Thyra_Ifpack2PreconditionerFactory.hpp>
+
+
+int
+main(int argc, char *argv[]) {
+  typedef double Scalar;
+  typedef Teuchos::ScalarTraits<Scalar> STS;
+  typedef Tpetra::Map<> map_type;
+  typedef map_type::local_ordinal_type LocalOrdinal;
+  typedef map_type::global_ordinal_type GlobalOrdinal;
+  typedef map_type::node_type Node;
+  using Teuchos::RCP;
+  using Teuchos::rcp;
+  using Teuchos::ParameterList;
+  using Teuchos::TimeMonitor;
+  #include <Xpetra_UseShortNames.hpp>
+
+  bool success = false;
+  bool verbose = true;
+  try {
+
+    //
+    // MPI initialization
+    //
+    Teuchos::GlobalMPISession session (&argc, &argv, NULL);
+    Teuchos::CommandLineProcessor clp(false);
+    const auto comm = Teuchos::DefaultComm<int>::getComm ();
+
+    //
+    // Parameters
+    //
+    // manage parameters of the test case
+    Galeri::Xpetra::Parameters<GlobalOrdinal> galeriParameters(clp, 100, 100, 100, "Laplace2D");
+    // manage parameters of Xpetra
+    Xpetra::Parameters                        xpetraParameters(clp);
+
+    // command line parameters
+    std::string xmlFileName       = "stratimikos_ParameterList.xml"; clp.setOption("xml",      &xmlFileName,       "read parameters from an xml file");
+    std::string yamlFileName      = "";                 clp.setOption("yaml",                  &yamlFileName,      "read parameters from a yaml file");
+    bool        printTimings      = false;              clp.setOption("timings", "notimings",  &printTimings,      "print timings to screen");
+    bool        use_stacked_timer = false;              clp.setOption("stacked-timer", "no-stacked-timer", &use_stacked_timer, "Run with or without stacked timer output");
+    std::string timingsFormat     = "table-fixed";      clp.setOption("time-format",           &timingsFormat,     "timings format (table-fixed | table-scientific | yaml)");
+    int         numVectors        = 1;                  clp.setOption("multivector",           &numVectors,        "number of rhs to solve simultaneously");
+    int         numSolves         = 1;                  clp.setOption("numSolves",             &numSolves,         "number of times the system should be solved");
+
+    switch (clp.parse(argc,argv)) {
+      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS;
+      case Teuchos::CommandLineProcessor::PARSE_ERROR:
+      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE;
+      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:          break;
+    }
+
+    RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
+    Teuchos::FancyOStream& out = *fancy;
+    out.setOutputToRootOnly(0);
+
+    // Set up timers
+    Teuchos::RCP<Teuchos::StackedTimer> stacked_timer;
+    if (use_stacked_timer)
+      stacked_timer = rcp(new Teuchos::StackedTimer("Main"));
+    TimeMonitor::setStackedTimer(stacked_timer);
+
+    // Read in parameter list
+    TEUCHOS_TEST_FOR_EXCEPTION(xmlFileName == "" && yamlFileName == "", std::runtime_error,
+                               "Need to provide xml or yaml input file");
+    RCP<ParameterList> paramList = rcp(new ParameterList("params"));
+    if (yamlFileName != "")
+      Teuchos::updateParametersFromYamlFileAndBroadcast(yamlFileName, paramList.ptr(), *comm);
+    else
+      Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, paramList.ptr(), *comm);
+
+    //
+    // Construct the problem
+    //
+
+    RCP<Matrix>                A;
+    RCP<const Map>             map;
+    RCP<MultiVector>           X, B;
+
+    std::ostringstream galeriStream;
+    Teuchos::ParameterList galeriList = galeriParameters.GetParameterList();
+    galeriStream << "========================================================\n" << xpetraParameters;
+    galeriStream << galeriParameters;
+
+    // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g.,
+    //                                 d1  d2  d3
+    //                                 d4  d5  d6
+    //                                 d7  d8  d9
+    //                                 d10 d11 d12
+    // A perfect distribution is only possible when the #processors is a perfect square.
+    // This *will* result in "strip" distribution if the #processors is a prime number or if the factors are very different in
+    // size. For example, np=14 will give a 7-by-2 distribution.
+    // If you don't want Galeri to do this, specify mx or my on the galeriList.
+    std::string matrixType = galeriParameters.GetMatrixType();
+
+    // Create map
+    if (matrixType == "Laplace1D") {
+      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList);
+
+    } else if (matrixType == "Laplace2D" || matrixType == "Star2D" ||
+               matrixType == "BigStar2D" || matrixType == "AnisotropicDiffusion" || matrixType == "Elasticity2D") {
+      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList);
+
+    } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") {
+      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList);
+    }
+
+    // Expand map to do multiple DOF per node for block problems
+    if (matrixType == "Elasticity2D")
+      map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 2);
+    if (matrixType == "Elasticity3D")
+      map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 3);
+
+    galeriStream << "Processor subdomains in x direction: " << galeriList.get<GO>("mx") << std::endl
+                 << "Processor subdomains in y direction: " << galeriList.get<GO>("my") << std::endl
+                 << "Processor subdomains in z direction: " << galeriList.get<GO>("mz") << std::endl
+                 << "========================================================" << std::endl;
+
+    if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") {
+      // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet
+      galeriList.set("right boundary" , "Neumann");
+      galeriList.set("bottom boundary", "Neumann");
+      galeriList.set("top boundary"   , "Neumann");
+      galeriList.set("front boundary" , "Neumann");
+      galeriList.set("back boundary"  , "Neumann");
+    }
+
+    RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
+      Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(galeriParameters.GetMatrixType(), map, galeriList);
+    A = Pr->BuildMatrix();
+
+    if (matrixType == "Elasticity2D" ||
+        matrixType == "Elasticity3D") {
+      A->SetFixedBlockSize((galeriParameters.GetMatrixType() == "Elasticity2D") ? 2 : 3);
+    }
+
+    out << galeriStream.str();
+    X = MultiVectorFactory::Build(map, numVectors);
+    B = MultiVectorFactory::Build(map, numVectors);
+    B->putScalar(1);
+    X->putScalar(0);
+
+    //
+    // Build Thyra linear algebra objects
+    //
+
+    RCP<const Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xpCrsA = Teuchos::rcp_dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node> >(A);
+
+    RCP<const Thyra::LinearOpBase<Scalar> >    thyraA = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyra(xpCrsA->getCrsMatrix());
+    RCP<      Thyra::MultiVectorBase<Scalar> > thyraX = Teuchos::rcp_const_cast<Thyra::MultiVectorBase<Scalar> >(Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(X));
+    RCP<const Thyra::MultiVectorBase<Scalar> > thyraB = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(B);
+
+    //
+    // Build Stratimikos solver
+    //
+
+    // This is the Stratimikos main class (= factory of solver factory).
+    Stratimikos::DefaultLinearSolverBuilder linearSolverBuilder;
+    // Register Ifpack2 as a Stratimikos preconditioner strategy.
+    typedef Thyra::PreconditionerFactoryBase<Scalar> Base;
+    typedef Thyra::Ifpack2PreconditionerFactory<Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Impl;
+    linearSolverBuilder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd<Base, Impl>(), "Ifpack2");
+
+    // Setup solver parameters using a Stratimikos parameter list.
+    linearSolverBuilder.setParameterList(paramList);
+
+    // Build a new "solver factory" according to the previously specified parameter list.
+    RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> > solverFactory = Thyra::createLinearSolveStrategy(linearSolverBuilder);
+    auto precFactory = solverFactory->getPreconditionerFactory();
+    RCP<Thyra::PreconditionerBase<Scalar> > prec;
+    Teuchos::RCP<Thyra::LinearOpWithSolveBase<Scalar> > thyraInverseA;
+    if (!precFactory.is_null()) {
+      prec = precFactory->createPrec();
+
+      // Build a Thyra operator corresponding to A^{-1} computed using the Stratimikos solver.
+      Thyra::initializePrec<double>(*precFactory, thyraA, prec.ptr());
+      thyraInverseA = solverFactory->createOp();
+      Thyra::initializePreconditionedOp<double>(*solverFactory, thyraA, prec, thyraInverseA.ptr());
+    } else {
+      thyraInverseA = Thyra::linearOpWithSolve(*solverFactory, thyraA);
+    }
+
+    // Solve Ax = b.
+    Thyra::SolveStatus<Scalar> status = Thyra::solve<Scalar>(*thyraInverseA, Thyra::NOTRANS, *thyraB, thyraX.ptr());
+
+    success = (status.solveStatus == Thyra::SOLVE_STATUS_CONVERGED);
+
+    for (int solveno = 1; solveno < numSolves; solveno++) {
+      if (!precFactory.is_null())
+        Thyra::initializePrec<double>(*precFactory, thyraA, prec.ptr());
+      thyraX->assign(0.);
+
+      status = Thyra::solve<Scalar>(*thyraInverseA, Thyra::NOTRANS, *thyraB, thyraX.ptr());
+
+      success = success && (status.solveStatus == Thyra::SOLVE_STATUS_CONVERGED);
+    }
+
+    // print timings
+    if (printTimings) {
+      if (use_stacked_timer) {
+        stacked_timer->stop("Main");
+        Teuchos::StackedTimer::OutputOptions options;
+        options.output_fraction = options.output_histogram = options.output_minmax = true;
+        stacked_timer->report(out, comm, options);
+      } else {
+        RCP<ParameterList> reportParams = rcp(new ParameterList);
+        if (timingsFormat == "yaml") {
+          reportParams->set("Report format",             "YAML");            // "Table" or "YAML"
+          reportParams->set("YAML style",                "compact");         // "spacious" or "compact"
+        }
+        reportParams->set("How to merge timer sets",   "Union");
+        reportParams->set("alwaysWriteLocal",          false);
+        reportParams->set("writeGlobalStats",          true);
+        reportParams->set("writeZeroTimers",           false);
+
+        const std::string filter = "";
+
+        std::ios_base::fmtflags ff(out.flags());
+        if (timingsFormat == "table-fixed") out << std::fixed;
+        else                                out << std::scientific;
+        TimeMonitor::report(comm.ptr(), out, filter, reportParams);
+        out << std::setiosflags(ff);
+      }
+    }
+
+    TimeMonitor::clearCounters();
+    out << std::endl;
+
+  }
+  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
+
+  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
+}
+
+
diff --git a/packages/stratimikos/test/stratimikos_jacobi.xml b/packages/stratimikos/test/stratimikos_jacobi.xml
new file mode 100644
index 000000000000..b4051200158f
--- /dev/null
+++ b/packages/stratimikos/test/stratimikos_jacobi.xml
@@ -0,0 +1,83 @@
+<ParameterList>
+
+  <!-- ================================================== -->
+  <!-- ================================================== -->
+  <!-- KRYLOV SOLVER CONFIGURATION                        -->
+  <!-- ================================================== -->
+  <!-- ================================================== -->
+
+  <Parameter name="Linear Solver Type" type="string" value="Belos"/>
+  <ParameterList name="Linear Solver Types">
+    <ParameterList name="Belos">
+
+      <!-- ========================================= -->
+      <!-- Select which Krylov solver should be used -->
+
+      <Parameter name="Solver Type" type="string" value="Pseudo Block CG"/>
+
+      <!-- ================================== -->
+      <!-- Parameters for the Krylov solvers  -->
+
+      <ParameterList name="Solver Types">
+
+        <ParameterList name="Pseudo Block CG">
+          <Parameter name="Convergence Tolerance" type="double" value="1e-1"/>
+          <Parameter name="Maximum Iterations" type="int" value="100"/>
+          <!-- Verbosity
+              Errors = 0,
+              Warnings = 0x1,
+              IterationDetails = 0x2,
+              OrthoDetails = 0x4,
+              FinalSummary = 0x8,
+              TimingDetails = 0x10 = 16,
+              StatusTestDetails = 0x20 = 32,
+              Debug = 0x40 = 64                -->
+          <Parameter name="Verbosity" type="int" value="41"/>
+          <Parameter name="Output Frequency" type="int" value="1"/>
+          <Parameter name="Output Style" type="int" value="1"/>
+        </ParameterList>
+
+      </ParameterList>
+
+      <ParameterList name="VerboseObject">
+        <Parameter name="Verbosity Level" type="string" value="low"/>
+      </ParameterList>
+
+    </ParameterList>
+  </ParameterList>
+
+
+  <!-- ================================================== -->
+  <!-- ================================================== -->
+  <!-- PRECONDITIONER CONFIGURATION                       -->
+  <!-- ================================================== -->
+  <!-- ================================================== -->
+
+
+  <!-- ========================================== -->
+  <!-- Select which preconditioner should be used -->
+
+  <!-- Ifpack2 contains different 1-level preconditioners such as Jacobi, Gauss-Seidel and Chebyshev -->
+  <Parameter name="Preconditioner Type" type="string" value="Ifpack2"/>
+
+  <!-- ========================================== -->
+  <!-- Parameters for the preconditioners         -->
+
+  <ParameterList name="Preconditioner Types">
+
+    <!-- Settings for 1-level preconditioners -->
+    <ParameterList name="Ifpack2">
+      <Parameter name="Prec Type" type="string" value="relaxation"/>
+      <Parameter name="half precision" type="bool" value="false"/>
+      <ParameterList name="Ifpack2 Settings">
+        <Parameter name="relaxation: type" type="string" value="Jacobi"/>
+        <Parameter name="relaxation: sweeps" type="int" value="1"/>
+      </ParameterList>
+
+      <ParameterList name="VerboseObject">
+        <Parameter name="Verbosity Level" type="string" value="low"/>
+      </ParameterList>
+    </ParameterList>
+
+  </ParameterList>
+</ParameterList>
diff --git a/packages/stratimikos/test/stratimikos_jacobi_half.xml b/packages/stratimikos/test/stratimikos_jacobi_half.xml
new file mode 100644
index 000000000000..d267f8bc301f
--- /dev/null
+++ b/packages/stratimikos/test/stratimikos_jacobi_half.xml
@@ -0,0 +1,84 @@
+<ParameterList>
+
+  <!-- ================================================== -->
+  <!-- ================================================== -->
+  <!-- KRYLOV SOLVER CONFIGURATION                        -->
+  <!-- ================================================== -->
+  <!-- ================================================== -->
+
+  <Parameter name="Linear Solver Type" type="string" value="Belos"/>
+  <ParameterList name="Linear Solver Types">
+    <ParameterList name="Belos">
+
+      <!-- ========================================= -->
+      <!-- Select which Krylov solver should be used -->
+
+      <Parameter name="Solver Type" type="string" value="Pseudo Block CG"/>
+
+      <!-- ================================== -->
+      <!-- Parameters for the Krylov solvers  -->
+
+      <ParameterList name="Solver Types">
+
+        <ParameterList name="Pseudo Block CG">
+          <Parameter name="Convergence Tolerance" type="double" value="1e-1"/>
+          <Parameter name="Maximum Iterations" type="int" value="100"/>
+          <!-- Verbosity
+              Errors = 0,
+              Warnings = 0x1,
+              IterationDetails = 0x2,
+              OrthoDetails = 0x4,
+              FinalSummary = 0x8,
+              TimingDetails = 0x10 = 16,
+              StatusTestDetails = 0x20 = 32,
+              Debug = 0x40 = 64                -->
+          <Parameter name="Verbosity" type="int" value="41"/>
+          <Parameter name="Output Frequency" type="int" value="1"/>
+          <Parameter name="Output Style" type="int" value="1"/>
+        </ParameterList>
+
+      </ParameterList>
+
+      <ParameterList name="VerboseObject">
+        <Parameter name="Verbosity Level" type="string" value="low"/>
+      </ParameterList>
+
+    </ParameterList>
+  </ParameterList>
+
+
+  <!-- ================================================== -->
+  <!-- ================================================== -->
+  <!-- PRECONDITIONER CONFIGURATION                       -->
+  <!-- ================================================== -->
+  <!-- ================================================== -->
+
+
+  <!-- ========================================== -->
+  <!-- Select which preconditioner should be used -->
+
+  <!-- Ifpack2 contains different 1-level preconditioners such as Jacobi, Gauss-Seidel and Chebyshev -->
+  <Parameter name="Preconditioner Type" type="string" value="Ifpack2"/>
+
+  <!-- ========================================== -->
+  <!-- Parameters for the preconditioners         -->
+
+  <ParameterList name="Preconditioner Types">
+
+    <!-- Settings for 1-level preconditioners -->
+    <ParameterList name="Ifpack2">
+      <Parameter name="Prec Type" type="string" value="relaxation"/>
+      <Parameter name="half precision" type="bool" value="true"/>
+      <ParameterList name="Ifpack2 Settings">
+        <Parameter name="relaxation: type" type="string" value="Jacobi"/>
+        <Parameter name="relaxation: sweeps" type="int" value="1"/>
+      </ParameterList>
+
+      <ParameterList name="VerboseObject">
+        <Parameter name="Verbosity Level" type="string" value="low"/>
+      </ParameterList>
+    </ParameterList>
+
+  </ParameterList>
+
+</ParameterList>

From fa12ca96f2ead80b1da0b2a830f400f8e71c7d99 Mon Sep 17 00:00:00 2001
From: Sidafa Conde <sconde@sandia.gov>
Date: Thu, 22 Jul 2021 15:20:57 -0400
Subject: [PATCH 11/15] Tempus: FSA - update nonmember function name to meet
 standard

---
 packages/piro/src/Piro_TempusIntegrator_Def.hpp               | 2 +-
 packages/rol/example/tempus/ROL_TempusReducedObjective.hpp    | 2 +-
 packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp   | 4 ++--
 .../tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp   | 4 ++--
 .../tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp   | 4 ++--
 packages/tempus/test/BDF2/Tempus_BDF2Test.cpp                 | 1 -
 packages/tempus/test/BDF2/Tempus_BDF2_FSA.hpp                 | 2 +-
 .../tempus/test/BackwardEuler/Tempus_BackwardEuler_FSA.hpp    | 2 +-
 packages/tempus/test/DIRK/Tempus_DIRK_FSA.hpp                 | 2 +-
 packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_FSA.hpp     | 2 +-
 packages/tempus/test/IMEX_RK/Tempus_IMEX_RK_FSA.hpp           | 2 +-
 .../IMEX_RK_Partitioned/Tempus_IMEX_RK_Partitioned_FSA.hpp    | 2 +-
 12 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/packages/piro/src/Piro_TempusIntegrator_Def.hpp b/packages/piro/src/Piro_TempusIntegrator_Def.hpp
index 8ddd885e1a7d..303bd349b9f9 100644
--- a/packages/piro/src/Piro_TempusIntegrator_Def.hpp
+++ b/packages/piro/src/Piro_TempusIntegrator_Def.hpp
@@ -67,7 +67,7 @@ Piro::TempusIntegrator<Scalar>::TempusIntegrator(Teuchos::RCP< Teuchos::Paramete
   else if (sens_method == FORWARD) {
     //forward sensitivities
     basicIntegrator_ = Teuchos::null;
-    fwdSensIntegrator_ = Tempus::integratorForwardSensitivity<Scalar>(pList, model);
+    fwdSensIntegrator_ = Tempus::createIntegratorForwardSensitivity<Scalar>(pList, model);
     adjSensIntegrator_ = Teuchos::null; 
   }
   else if (sens_method == ADJOINT) {
diff --git a/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp b/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
index 3ae9d971631a..4f62a8a86737 100644
--- a/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
+++ b/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
@@ -361,7 +361,7 @@ run_tempus(const Thyra::ModelEvaluatorBase::InArgs<Real>&  inArgs,
   // Create and run integrator
   if (dgdp != Teuchos::null && sensitivity_method_ == "Forward") {
     RCP<Tempus::IntegratorForwardSensitivity<Real> > integrator =
-      Tempus::integratorForwardSensitivity<Real>(tempus_params_, wrapped_model);
+      Tempus::createIntegratorForwardSensitivity<Real>(tempus_params_, wrapped_model);
     const bool integratorStatus = integrator->advanceTime();
     TEUCHOS_TEST_FOR_EXCEPTION(
       !integratorStatus, std::logic_error, "Integrator failed!");
diff --git a/packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp b/packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp
index aac1afe13dc8..43cc8f1eb16a 100644
--- a/packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp
+++ b/packages/tempus/src/Tempus_IntegratorForwardSensitivity.cpp
@@ -18,13 +18,13 @@ namespace Tempus {
 
   // Nonmember ctor
   template Teuchos::RCP<IntegratorForwardSensitivity<double> >
-  integratorForwardSensitivity(
+  createIntegratorForwardSensitivity(
     Teuchos::RCP<Teuchos::ParameterList>        parameterList,
     const Teuchos::RCP<Thyra::ModelEvaluator<double> >& model);
 
   // Nonmember ctor
   template Teuchos::RCP<IntegratorForwardSensitivity<double> >
-  integratorForwardSensitivity();
+  createIntegratorForwardSensitivity();
 
 } // namespace Tempus
 
diff --git a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
index 2c8440adde6e..e71778addb06 100644
--- a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
@@ -251,7 +251,7 @@ class IntegratorForwardSensitivity
  */
 template <class Scalar>
 Teuchos::RCP<IntegratorForwardSensitivity<Scalar>>
-integratorForwardSensitivity(
+createIntegratorForwardSensitivity(
     Teuchos::RCP<Teuchos::ParameterList> pList,
     const Teuchos::RCP<Thyra::ModelEvaluator<Scalar>> &model);
 
@@ -266,7 +266,7 @@ integratorForwardSensitivity(
  */
 template <class Scalar>
 Teuchos::RCP<IntegratorForwardSensitivity<Scalar>>
-integratorForwardSensitivity();
+createIntegratorForwardSensitivity();
 
 } // namespace Tempus
 
diff --git a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp
index f19347bec3bb..09924d177a3e 100644
--- a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_impl.hpp
@@ -231,7 +231,7 @@ describe(
 /// Nonmember constructor
 template<class Scalar>
 Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
-integratorForwardSensitivity(
+createIntegratorForwardSensitivity(
   Teuchos::RCP<Teuchos::ParameterList>                     pList,
   const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >&      model)
 {
@@ -293,7 +293,7 @@ integratorForwardSensitivity(
 /// Nonmember constructor
 template<class Scalar>
 Teuchos::RCP<IntegratorForwardSensitivity<Scalar> >
-integratorForwardSensitivity()
+createIntegratorForwardSensitivity()
 {
 
   Teuchos::RCP<IntegratorForwardSensitivity<Scalar> > integrator =
diff --git a/packages/tempus/test/BDF2/Tempus_BDF2Test.cpp b/packages/tempus/test/BDF2/Tempus_BDF2Test.cpp
index 9c63d0b9bc50..734742569c05 100644
--- a/packages/tempus/test/BDF2/Tempus_BDF2Test.cpp
+++ b/packages/tempus/test/BDF2/Tempus_BDF2Test.cpp
@@ -340,7 +340,6 @@ TEUCHOS_UNIT_TEST(BDF2, SinCosAdapt)
   RCP<ParameterList> pList =
     getParametersFromXmlFile("Tempus_BDF2_SinCos_AdaptDt.xml");
   //Set initial time step = 2*dt specified in input file (for convergence study)
-  RCP<ParameterList> tempus_pl = sublist(pList, "Tempus", true);
   double dt = pList->sublist("Tempus")
              .sublist("Default Integrator")
              .sublist("Time Step Control").get<double>("Initial Time Step");
diff --git a/packages/tempus/test/BDF2/Tempus_BDF2_FSA.hpp b/packages/tempus/test/BDF2/Tempus_BDF2_FSA.hpp
index 670cc948742e..0c989c43aefa 100644
--- a/packages/tempus/test/BDF2/Tempus_BDF2_FSA.hpp
+++ b/packages/tempus/test/BDF2/Tempus_BDF2_FSA.hpp
@@ -91,7 +91,7 @@ void test_sincos_fsa(const bool use_combined_method,
     pl->sublist("Default Integrator")
        .sublist("Time Step Control").set("Initial Time Step", dt);
     RCP<Tempus::IntegratorForwardSensitivity<double> > integrator =
-      Tempus::integratorForwardSensitivity<double>(pl, model);
+      Tempus::createIntegratorForwardSensitivity<double>(pl, model);
     order = integrator->getStepper()->getOrder();
 
     // Initial Conditions
diff --git a/packages/tempus/test/BackwardEuler/Tempus_BackwardEuler_FSA.hpp b/packages/tempus/test/BackwardEuler/Tempus_BackwardEuler_FSA.hpp
index d313ed820fac..d5d50acb5183 100644
--- a/packages/tempus/test/BackwardEuler/Tempus_BackwardEuler_FSA.hpp
+++ b/packages/tempus/test/BackwardEuler/Tempus_BackwardEuler_FSA.hpp
@@ -89,7 +89,7 @@ void test_sincos_fsa(const bool use_combined_method,
     pl->sublist("Default Integrator")
        .sublist("Time Step Control").set("Initial Time Step", dt);
     RCP<Tempus::IntegratorForwardSensitivity<double> > integrator =
-      Tempus::integratorForwardSensitivity<double>(pl, model);
+      Tempus::createIntegratorForwardSensitivity<double>(pl, model);
     order = integrator->getStepper()->getOrder();
 
     // Initial Conditions
diff --git a/packages/tempus/test/DIRK/Tempus_DIRK_FSA.hpp b/packages/tempus/test/DIRK/Tempus_DIRK_FSA.hpp
index f293f2176cf0..68a0fe1dd356 100644
--- a/packages/tempus/test/DIRK/Tempus_DIRK_FSA.hpp
+++ b/packages/tempus/test/DIRK/Tempus_DIRK_FSA.hpp
@@ -169,7 +169,7 @@ void test_sincos_fsa(const std::string& method_name,
       pl->sublist("Default Integrator")
          .sublist("Time Step Control").set("Initial Time Step", dt);
       RCP<Tempus::IntegratorForwardSensitivity<double> > integrator =
-        Tempus::integratorForwardSensitivity<double>(pl, model);
+        Tempus::createIntegratorForwardSensitivity<double>(pl, model);
       order = integrator->getStepper()->getOrder();
 
       // Initial Conditions
diff --git a/packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_FSA.hpp b/packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_FSA.hpp
index 643f6959c9fe..5ce5652077b7 100644
--- a/packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_FSA.hpp
+++ b/packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_FSA.hpp
@@ -154,7 +154,7 @@ void test_sincos_fsa(const std::string& method_name,
       pl->sublist("Demo Integrator")
         .sublist("Time Step Control").set("Initial Time Step", dt);
       RCP<Tempus::IntegratorForwardSensitivity<double> > integrator =
-        Tempus::integratorForwardSensitivity<double>(pl, model);
+        Tempus::createIntegratorForwardSensitivity<double>(pl, model);
       order = integrator->getStepper()->getOrder();
 
       // Initial Conditions
diff --git a/packages/tempus/test/IMEX_RK/Tempus_IMEX_RK_FSA.hpp b/packages/tempus/test/IMEX_RK/Tempus_IMEX_RK_FSA.hpp
index 7beb10253631..a4a1d0f7c6c3 100644
--- a/packages/tempus/test/IMEX_RK/Tempus_IMEX_RK_FSA.hpp
+++ b/packages/tempus/test/IMEX_RK/Tempus_IMEX_RK_FSA.hpp
@@ -158,7 +158,7 @@ void test_vdp_fsa(const bool use_combined_method,
       pl->sublist("Default Integrator")
          .sublist("Time Step Control").remove("Time Step Control Strategy");
       RCP<Tempus::IntegratorForwardSensitivity<double> > integrator =
-        Tempus::integratorForwardSensitivity<double>(pl, model);
+        Tempus::createIntegratorForwardSensitivity<double>(pl, model);
       order = integrator->getStepper()->getOrder();
 
       // Integrate to timeMax
diff --git a/packages/tempus/test/IMEX_RK_Partitioned/Tempus_IMEX_RK_Partitioned_FSA.hpp b/packages/tempus/test/IMEX_RK_Partitioned/Tempus_IMEX_RK_Partitioned_FSA.hpp
index 7d542697c4fa..3433965e3f55 100644
--- a/packages/tempus/test/IMEX_RK_Partitioned/Tempus_IMEX_RK_Partitioned_FSA.hpp
+++ b/packages/tempus/test/IMEX_RK_Partitioned/Tempus_IMEX_RK_Partitioned_FSA.hpp
@@ -202,7 +202,7 @@ void test_vdp_fsa(const std::string& method_name,
       pl->sublist("Default Integrator")
          .sublist("Time Step Control").remove("Time Step Control Strategy");
       RCP<Tempus::IntegratorForwardSensitivity<double> > integrator =
-        Tempus::integratorForwardSensitivity<double>(pl, model);
+        Tempus::createIntegratorForwardSensitivity<double>(pl, model);
       order = integrator->getStepper()->getOrder();
 
       // Integrate to timeMax

From 5cb3fd87d04ab6cb22629fc6be720ad85cab7a20 Mon Sep 17 00:00:00 2001
From: Sidafa Conde <sconde@sandia.gov>
Date: Thu, 1 Jul 2021 09:14:41 -0400
Subject: [PATCH 12/15] Tempus: PFSA - convert IntegratorBasicOld to
 IntegratorBasic

---
 ...ratorPseudoTransientForwardSensitivity.cpp |   6 -
 ...PseudoTransientForwardSensitivity_decl.hpp |  68 +++----
 ...PseudoTransientForwardSensitivity_impl.hpp | 170 ++++++------------
 3 files changed, 87 insertions(+), 157 deletions(-)

diff --git a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity.cpp b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity.cpp
index bf28a9e1fdb2..b0e3ab8e6c17 100644
--- a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity.cpp
+++ b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity.cpp
@@ -22,12 +22,6 @@ namespace Tempus {
     Teuchos::RCP<Teuchos::ParameterList>        parameterList,
     const Teuchos::RCP<Thyra::ModelEvaluator<double> >& model);
 
-  // Nonmember ctor
-  template Teuchos::RCP<IntegratorPseudoTransientForwardSensitivity<double> >
-  integratorPseudoTransientForwardSensitivity(
-    const Teuchos::RCP<Thyra::ModelEvaluator<double> >& model,
-    std::string stepperType);
-
   // Nonmember ctor
   template Teuchos::RCP<IntegratorPseudoTransientForwardSensitivity<double> >
   integratorPseudoTransientForwardSensitivity();
diff --git a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_decl.hpp b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_decl.hpp
index 1b5d18bd2f73..5e2a0c18a98a 100644
--- a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_decl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_decl.hpp
@@ -11,7 +11,7 @@
 
 // Tempus
 #include "Tempus_config.hpp"
-#include "Tempus_IntegratorBasicOld.hpp"
+#include "Tempus_IntegratorBasic.hpp"
 #include "Tempus_SensitivityModelEvaluatorBase.hpp"
 
 namespace Tempus {
@@ -54,8 +54,7 @@ namespace Tempus {
  */
 template<class Scalar>
 class IntegratorPseudoTransientForwardSensitivity
-  : virtual public Tempus::Integrator<Scalar>,
-    virtual public Teuchos::ParameterListAcceptor
+  : virtual public Tempus::Integrator<Scalar>
 {
 public:
 
@@ -93,17 +92,15 @@ class IntegratorPseudoTransientForwardSensitivity
    *        x_dot_dot).
    * </ul>
    */
-  IntegratorPseudoTransientForwardSensitivity(
-    Teuchos::RCP<Teuchos::ParameterList>                pList,
-    const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model);
-
-  /** \brief Constructor with model and "Stepper Type" and is fully initialized with default settings. */
   IntegratorPseudoTransientForwardSensitivity(
     const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model,
-    std::string stepperType);
+    const  Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar> >&sens_model,
+    const Teuchos::RCP<IntegratorBasic<Scalar> > &fwd_integrator,
+    const Teuchos::RCP<IntegratorBasic<Scalar> > &sens_integrator,
+    const bool reuse_solver, const bool force_W_update);
 
   /// Destructor
-  /** \brief Constructor that requires a subsequent setParameterList, setStepper, and initialize calls. */
+  /** \brief Constructor that requires a subsequent setStepper, and initialize calls. */
   IntegratorPseudoTransientForwardSensitivity();
 
   /// Destructor
@@ -163,17 +160,6 @@ class IntegratorPseudoTransientForwardSensitivity
   virtual Teuchos::RCP<const Thyra::VectorBase<Scalar> > getXDotDot() const;
   virtual Teuchos::RCP<const Thyra::MultiVectorBase<Scalar> > getDXDotDotDp() const;
 
-  /// \name Overridden from Teuchos::ParameterListAcceptor
-  //@{
-    void setParameterList(const Teuchos::RCP<Teuchos::ParameterList> & pl)
-      override;
-    Teuchos::RCP<Teuchos::ParameterList> getNonconstParameterList() override;
-    Teuchos::RCP<Teuchos::ParameterList> unsetParameterList() override;
-
-    Teuchos::RCP<const Teuchos::ParameterList> getValidParameters()
-      const override;
-  //@}
-
   /// \name Overridden from Teuchos::Describable
   //@{
     std::string description() const override;
@@ -183,24 +169,27 @@ class IntegratorPseudoTransientForwardSensitivity
 
 protected:
 
-  // Create sensitivity model evaluator from application model
-  Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar> >
-  createSensitivityModel(
-    const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model,
-    const Teuchos::RCP<Teuchos::ParameterList>& inputPL);
-
   void buildSolutionHistory();
 
-  Teuchos::RCP<Thyra::ModelEvaluator<Scalar> > model_;
-  Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar> > sens_model_;
-  Teuchos::RCP<IntegratorBasicOld<Scalar> > state_integrator_;
-  Teuchos::RCP<IntegratorBasicOld<Scalar> > sens_integrator_;
-  Teuchos::RCP<SolutionHistory<Scalar> > solutionHistory_;
+  Teuchos::RCP<Thyra::ModelEvaluator<Scalar>> model_;
+  Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar>> sens_model_;
+  Teuchos::RCP<IntegratorBasic<Scalar>> state_integrator_;
+  Teuchos::RCP<IntegratorBasic<Scalar>> sens_integrator_;
+  Teuchos::RCP<SolutionHistory<Scalar>> solutionHistory_;
   bool reuse_solver_;
   bool force_W_update_;
 };
 
 /// Nonmember constructor
+/**
+ * @brief Nonmember constructor
+ *
+ * @param pList ParameterList to construct the Tempus state integrator, the
+ *              sensitivity model evaluator, and the sensisitivity integrator
+ * @param model Physics model
+ *
+ * @return 
+ */
 template<class Scalar>
 Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >
 integratorPseudoTransientForwardSensitivity(
@@ -208,13 +197,14 @@ integratorPseudoTransientForwardSensitivity(
   const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model);
 
 /// Nonmember constructor
-template<class Scalar>
-Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >
-integratorPseudoTransientForwardSensitivity(
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model,
-  std::string stepperType);
-
-/// Nonmember constructor
+/**
+ * @brief Default ctor
+ *
+ * Instantiates a default IntegratorBasic for both the state and the sensitivity
+ * integrator. 
+ *
+ * @return IntegratorPseudoTransientForwardSensitivity
+ */
 template<class Scalar>
 Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >
 integratorPseudoTransientForwardSensitivity();
diff --git a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_impl.hpp b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_impl.hpp
index 617afc13227f..eab21c639111 100644
--- a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_impl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_impl.hpp
@@ -18,31 +18,21 @@
 
 namespace Tempus {
 
-template<class Scalar>
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-IntegratorPseudoTransientForwardSensitivity(
-  Teuchos::RCP<Teuchos::ParameterList>                inputPL,
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model) :
-  reuse_solver_(false)
-{
-  model_ = model;
-  sens_model_ = createSensitivityModel(model_, inputPL);
-  state_integrator_ = integratorBasic<Scalar>(inputPL, model_);
-  sens_integrator_ = integratorBasic<Scalar>(inputPL, sens_model_);
-}
-
-template<class Scalar>
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-IntegratorPseudoTransientForwardSensitivity(
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model,
-  std::string stepperType) :
-  reuse_solver_(false),
-  force_W_update_(false)
+template <class Scalar>
+IntegratorPseudoTransientForwardSensitivity<Scalar>::IntegratorPseudoTransientForwardSensitivity(
+    const Teuchos::RCP<Thyra::ModelEvaluator<Scalar>> &model,
+    const Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar>> &sens_model,
+    const Teuchos::RCP<IntegratorBasic<Scalar>> &fwd_integrator,
+    const Teuchos::RCP<IntegratorBasic<Scalar>> &sens_integrator,
+    const bool reuse_solver,
+    const bool force_W_update)
+    : model_(model)
+    , sens_model_(sens_model)
+    , state_integrator_(fwd_integrator)
+    , sens_integrator_(sens_integrator)
+    , reuse_solver_(reuse_solver)
+    , force_W_update_(force_W_update)
 {
-  model_ = model;
-  sens_model_ = createSensitivityModel(model, Teuchos::null);
-  state_integrator_ = integratorBasic<Scalar>(model_, stepperType);
-  sens_integrator_ = integratorBasic<Scalar>(sens_model_, stepperType);
 }
 
 template<class Scalar>
@@ -51,8 +41,8 @@ IntegratorPseudoTransientForwardSensitivity() :
   reuse_solver_(false),
   force_W_update_(false)
 {
-  state_integrator_ = integratorBasic<Scalar>();
-  sens_integrator_ = integratorBasic<Scalar>();
+  state_integrator_ = createIntegratorBasic<Scalar>();
+  sens_integrator_ = createIntegratorBasic<Scalar>();
 }
 
 template<class Scalar>
@@ -171,8 +161,8 @@ void
 IntegratorPseudoTransientForwardSensitivity<Scalar>::
 setTempusParameterList(Teuchos::RCP<Teuchos::ParameterList> pl)
 {
-  state_integrator_->setTempusParameterList(pl);
-  sens_integrator_->setTempusParameterList(pl);
+  TEUCHOS_TEST_FOR_EXCEPTION( true, std::logic_error,
+      "  IntegratorPseudoTransientForwardSensitivity::setTempusParameterList() --  Deprecated!\n");
 }
 
 template<class Scalar>
@@ -370,74 +360,6 @@ describe(
   sens_integrator_->describe(*l_out, verbLevel);
 }
 
-template<class Scalar>
-void
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-setParameterList(const Teuchos::RCP<Teuchos::ParameterList> & inputPL)
-{
-  state_integrator_->setParameterList(inputPL);
-  sens_integrator_->setParameterList(inputPL);
-  reuse_solver_ =
-    inputPL->sublist("Sensitivities").get("Reuse State Linear Solver", false);
-  force_W_update_ =
-    inputPL->sublist("Sensitivities").get("Force W Update", false);
-}
-
-template<class Scalar>
-Teuchos::RCP<Teuchos::ParameterList>
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-unsetParameterList()
-{
-  state_integrator_->unsetParameterList();
-  return sens_integrator_->unsetParameterList();
-}
-
-template<class Scalar>
-Teuchos::RCP<const Teuchos::ParameterList>
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-getValidParameters() const
-{
-  Teuchos::RCP<Teuchos::ParameterList> pl =
-    Teuchos::rcp(new Teuchos::ParameterList);
-  Teuchos::RCP<const Teuchos::ParameterList> integrator_pl =
-    state_integrator_->getValidParameters();
-  Teuchos::RCP<const Teuchos::ParameterList> sensitivity_pl =
-    StaggeredForwardSensitivityModelEvaluator<Scalar>::getValidParameters();
-  pl->setParameters(*integrator_pl);
-  pl->sublist("Sensitivities").setParameters(*sensitivity_pl);
-  pl->sublist("Sensitivities").set("Reuse State Linear Solver", false);
-  pl->sublist("Sensitivities").set("Force W Update", false);
-
-  return pl;
-}
-
-template<class Scalar>
-Teuchos::RCP<Teuchos::ParameterList>
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-getNonconstParameterList()
-{
-  return state_integrator_->getNonconstParameterList();
-}
-
-template <class Scalar>
-Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar> >
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-createSensitivityModel(
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model,
-  const Teuchos::RCP<Teuchos::ParameterList>& inputPL)
-{
-  using Teuchos::rcp;
-
-  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
-  if (inputPL != Teuchos::null) {
-    *pl = inputPL->sublist("Sensitivities");
-  }
-  reuse_solver_ = pl->get("Reuse State Linear Solver", false);
-  force_W_update_ = pl->get("Force W Update", true);
-  pl->remove("Reuse State Linear Solver");
-  pl->remove("Force W Update");
-  return wrapStaggeredFSAModelEvaluator(model, pl);
-}
 
 template<class Scalar>
 void
@@ -456,11 +378,12 @@ buildSolutionHistory()
   using Thyra::assign;
   typedef Thyra::DefaultMultiVectorProductVector<Scalar> DMVPV;
 
+  //TODO: get the solution history PL or create it
+
   // Create combined solution histories, first for the states with zero
   // sensitivities and then for the sensitivities with frozen states
-  RCP<ParameterList> shPL =
-    Teuchos::sublist(state_integrator_->getIntegratorParameterList(),
-                     "Solution History", true);
+  RCP<ParameterList> shPL;
+    //Teuchos::sublist(state_integrator_->getIntegratorParameterList(), "Solution History", true);
   solutionHistory_ = createSolutionHistoryPL<Scalar>(shPL);
 
   const int num_param =
@@ -571,23 +494,46 @@ integratorPseudoTransientForwardSensitivity(
   Teuchos::RCP<Teuchos::ParameterList>                     pList,
   const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >&      model)
 {
-  Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> > integrator =
-    Teuchos::rcp(new Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar>(pList, model));
-  return(integrator);
-}
 
-/// Nonmember constructor
-template<class Scalar>
-Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >
-integratorPseudoTransientForwardSensitivity(
-  const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >&      model,
-  std::string stepperType)
-{
-  Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> > integrator =
-    Teuchos::rcp(new Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar>(model, stepperType));
+  auto fwd_integrator = createIntegratorBasic<Scalar>(pList, model);
+  Teuchos::RCP<SensitivityModelEvaluatorBase<Scalar> > sens_model;
+  Teuchos::RCP<IntegratorBasic<Scalar> > sens_integrator;
+
+  {
+    Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::rcp(new Teuchos::ParameterList);
+    Teuchos::RCP<const Teuchos::ParameterList> integrator_pl = fwd_integrator->getValidParameters();
+    Teuchos::RCP<const Teuchos::ParameterList> sensitivity_pl =
+      StaggeredForwardSensitivityModelEvaluator<Scalar>::getValidParameters();
+    pl->setParameters(*integrator_pl);
+    pl->sublist("Sensitivities").setParameters(*sensitivity_pl);
+    pl->sublist("Sensitivities").set("Reuse State Linear Solver", false);
+    pl->sublist("Sensitivities").set("Force W Update", false);
+    pList->setParametersNotAlreadySet(*pl);
+  }
+
+  bool reuse_solver   = pList->sublist("Sensitivities").get("Reuse State Linear Solver", false);
+  bool force_W_update = pList->sublist("Sensitivities").get("Force W Update", false);
+
+  {
+    Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
+    if (pList!= Teuchos::null)
+    {
+      *pl = pList->sublist("Sensitivities");
+    }
+    pl->remove("Reuse State Linear Solver");
+    pl->remove("Force W Update");
+    sens_model = wrapStaggeredFSAModelEvaluator(model, pl);
+    sens_integrator = createIntegratorBasic<Scalar>(pList, sens_model);
+  }
+
+  Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar>> integrator =
+      Teuchos::rcp(new Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar>(
+          model, sens_model, fwd_integrator, sens_integrator, reuse_solver, force_W_update));
+
   return(integrator);
 }
 
+
 /// Nonmember constructor
 template<class Scalar>
 Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >

From 59a9531b6f8fbf48a533981681778a58f4aa5961 Mon Sep 17 00:00:00 2001
From: Sidafa Conde <sconde@sandia.gov>
Date: Fri, 23 Jul 2021 15:38:49 -0400
Subject: [PATCH 13/15] Tempus: FSA - update the documentation

---
 ...mpus_IntegratorForwardSensitivity_decl.hpp | 31 ++++++++++++++++---
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
index 4c0b668f0416..d79af0d2ef7d 100644
--- a/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorForwardSensitivity_decl.hpp
@@ -40,7 +40,7 @@ namespace Tempus {
  * IntegratorBasicOld, but is not derived from IntegratorBasicOld.  It also provides
  * functions for setting the sensitivity initial conditions and extracting the
  * sensitivity at the final time.  One should use the getX() and getDxDp()
- * methods for extracting the final sultion and its parameter sensitivity
+ * methods for extracting the final solution and its parameter sensitivity
  * as a multi-vector.  This data can also be extracted from the solution
  * history, but is stored as a Thyra product vector which requires knowledge
  * of the internal implementation.
@@ -201,13 +201,36 @@ class IntegratorForwardSensitivity
   virtual Teuchos::RCP<Teuchos::Time> getStepperTimer() const override
     { return integrator_->getStepperTimer(); }
 
-  /// Get current the solution, x
+  /**
+   * @brief Get the current solution, x, only. If looking for the solution
+   * vector and the sensitivities, use `SolutionState->getX()` which will return a
+   * Block MultiVector with the first block containing the current solution, x,
+   * and the remaining blocks are the forward sensitivities \f$dx/dp\f$. 
+   *
+   * Use `getDxDp` to get the forward sensitivities \f$dx/dp\f$ only.
+   *
+   * @return The current solution, x, without the sensitivities. 
+   * */
   virtual Teuchos::RCP<const Thyra::VectorBase<Scalar> > getX() const;
+
+  /// Get the forward sensitivities \f$dx/dp\f$
   virtual Teuchos::RCP<const Thyra::MultiVectorBase<Scalar> > getDxDp() const;
-  /// Get current the time derivative of the solution, xdot
+  /**
+   * @brief Get current the time derivative of the solution, xdot, only.  This
+   * is the first block only and not the full Block MultiVector.
+   *
+   * @return Get current the time derivative of the solution, xdot. 
+   * */
   virtual Teuchos::RCP<const Thyra::VectorBase<Scalar> > getXDot() const;
   virtual Teuchos::RCP<const Thyra::MultiVectorBase<Scalar> > getDXDotDp() const;
-  /// Get current the second time derivative of the solution, xdotdot
+  /**
+   * @brief Get current the second time derivative of the solution, xdotdot, only.  This
+   * is the first block only and not the full Block MultiVector.
+   *
+   * Use `getDXDotDp` to get the forward sensitivities.
+   *
+   * @return Get current the second time derivative of the solution, xdotdot. 
+   * */
   virtual Teuchos::RCP<const Thyra::VectorBase<Scalar> > getXDotDot() const;
   virtual Teuchos::RCP<const Thyra::MultiVectorBase<Scalar> > getDXDotDotDp() const;
 

From e9987513584595ca06ce8b730e10daf17f3cd5b7 Mon Sep 17 00:00:00 2001
From: Sidafa Conde <sconde@sandia.gov>
Date: Fri, 23 Jul 2021 15:42:38 -0400
Subject: [PATCH 14/15] Tempus: PFSA - update nonmember function name to meet
 standard

---
 .../tempus/ROL_TempusReducedObjective.hpp     |  2 +-
 ...ratorPseudoTransientForwardSensitivity.cpp |  4 ++--
 ...PseudoTransientForwardSensitivity_decl.hpp | 10 +++++----
 ...PseudoTransientForwardSensitivity_impl.hpp | 21 ++-----------------
 .../BDF2/Tempus_BDF2_PseudoTransient_SA.cpp   |  2 +-
 ...empus_BackwardEuler_PseudoTransient_SA.cpp |  2 +-
 .../DIRK/Tempus_DIRK_PseudoTransient_SA.cpp   |  2 +-
 .../Tempus_ExplicitRK_PseudoTransient_SA.cpp  |  2 +-
 8 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp b/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
index c75113db5fd3..14338bdc6b4a 100644
--- a/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
+++ b/packages/rol/example/tempus/ROL_TempusReducedObjective.hpp
@@ -389,7 +389,7 @@ run_tempus(const Thyra::ModelEvaluatorBase::InArgs<Real>&  inArgs,
   else if (dgdp != Teuchos::null &&
            sensitivity_method_ == "Pseudotransient Forward") {
     RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Real> > integrator =
-      Tempus::integratorPseudoTransientForwardSensitivity<Real>(tempus_params_,
+      Tempus::createIntegratorPseudoTransientForwardSensitivity<Real>(tempus_params_,
                                                                 wrapped_model);
     const bool integratorStatus = integrator->advanceTime();
     TEUCHOS_TEST_FOR_EXCEPTION(
diff --git a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity.cpp b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity.cpp
index b0e3ab8e6c17..01107a86e0b5 100644
--- a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity.cpp
+++ b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity.cpp
@@ -18,13 +18,13 @@ namespace Tempus {
 
   // Nonmember ctor
   template Teuchos::RCP<IntegratorPseudoTransientForwardSensitivity<double> >
-  integratorPseudoTransientForwardSensitivity(
+  createIntegratorPseudoTransientForwardSensitivity(
     Teuchos::RCP<Teuchos::ParameterList>        parameterList,
     const Teuchos::RCP<Thyra::ModelEvaluator<double> >& model);
 
   // Nonmember ctor
   template Teuchos::RCP<IntegratorPseudoTransientForwardSensitivity<double> >
-  integratorPseudoTransientForwardSensitivity();
+  createIntegratorPseudoTransientForwardSensitivity();
 
 } // namespace Tempus
 
diff --git a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_decl.hpp b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_decl.hpp
index 5e2a0c18a98a..61705e9d8059 100644
--- a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_decl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_decl.hpp
@@ -124,8 +124,10 @@ class IntegratorPseudoTransientForwardSensitivity
   /// Get the Stepper
   virtual Teuchos::RCP<Stepper<Scalar> > getStepper() const override;
   /// Return a copy of the Tempus ParameterList
-  virtual Teuchos::RCP<Teuchos::ParameterList> getTempusParameterList() override;
-  virtual void setTempusParameterList(Teuchos::RCP<Teuchos::ParameterList> pl) override;
+  virtual Teuchos::RCP<Teuchos::ParameterList> getTempusParameterList() override
+  { return state_integrator_->getTempusParameterList(); }
+  virtual void setTempusParameterList(Teuchos::RCP<Teuchos::ParameterList> pl) override
+  { state_integrator_->setTempusParameterList(pl); }
   /// Get the SolutionHistory
   virtual Teuchos::RCP<const SolutionHistory<Scalar> > getSolutionHistory() const override;
   /// Get the SolutionHistory
@@ -192,7 +194,7 @@ class IntegratorPseudoTransientForwardSensitivity
  */
 template<class Scalar>
 Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >
-integratorPseudoTransientForwardSensitivity(
+createIntegratorPseudoTransientForwardSensitivity(
   Teuchos::RCP<Teuchos::ParameterList>                pList,
   const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >& model);
 
@@ -207,7 +209,7 @@ integratorPseudoTransientForwardSensitivity(
  */
 template<class Scalar>
 Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >
-integratorPseudoTransientForwardSensitivity();
+createIntegratorPseudoTransientForwardSensitivity();
 
 } // namespace Tempus
 
diff --git a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_impl.hpp b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_impl.hpp
index eab21c639111..7d33bac33282 100644
--- a/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_impl.hpp
+++ b/packages/tempus/src/Tempus_IntegratorPseudoTransientForwardSensitivity_impl.hpp
@@ -148,23 +148,6 @@ getStepper() const
   return state_integrator_->getStepper();
 }
 
-template<class Scalar>
-Teuchos::RCP<Teuchos::ParameterList>
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-getTempusParameterList()
-{
-  return state_integrator_->getTempusParameterList();
-}
-
-template<class Scalar>
-void
-IntegratorPseudoTransientForwardSensitivity<Scalar>::
-setTempusParameterList(Teuchos::RCP<Teuchos::ParameterList> pl)
-{
-  TEUCHOS_TEST_FOR_EXCEPTION( true, std::logic_error,
-      "  IntegratorPseudoTransientForwardSensitivity::setTempusParameterList() --  Deprecated!\n");
-}
-
 template<class Scalar>
 Teuchos::RCP<const SolutionHistory<Scalar> >
 IntegratorPseudoTransientForwardSensitivity<Scalar>::
@@ -490,7 +473,7 @@ buildSolutionHistory()
 /// Nonmember constructor
 template<class Scalar>
 Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >
-integratorPseudoTransientForwardSensitivity(
+createIntegratorPseudoTransientForwardSensitivity(
   Teuchos::RCP<Teuchos::ParameterList>                     pList,
   const Teuchos::RCP<Thyra::ModelEvaluator<Scalar> >&      model)
 {
@@ -537,7 +520,7 @@ integratorPseudoTransientForwardSensitivity(
 /// Nonmember constructor
 template<class Scalar>
 Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> >
-integratorPseudoTransientForwardSensitivity()
+createIntegratorPseudoTransientForwardSensitivity()
 {
   Teuchos::RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar> > integrator =
     Teuchos::rcp(new Tempus::IntegratorPseudoTransientForwardSensitivity<Scalar>());
diff --git a/packages/tempus/test/BDF2/Tempus_BDF2_PseudoTransient_SA.cpp b/packages/tempus/test/BDF2/Tempus_BDF2_PseudoTransient_SA.cpp
index 0ea0d4c36313..b7df48661ab3 100644
--- a/packages/tempus/test/BDF2/Tempus_BDF2_PseudoTransient_SA.cpp
+++ b/packages/tempus/test/BDF2/Tempus_BDF2_PseudoTransient_SA.cpp
@@ -57,7 +57,7 @@ void test_pseudotransient_fsa(const bool use_dfdp_as_tangent,
 
   // Setup the Integrator
   RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<double> > integrator =
-    Tempus::integratorPseudoTransientForwardSensitivity<double>(pl, model);
+    Tempus::createIntegratorPseudoTransientForwardSensitivity<double>(pl, model);
 
   // Integrate to timeMax
   bool integratorStatus = integrator->advanceTime();
diff --git a/packages/tempus/test/BackwardEuler/Tempus_BackwardEuler_PseudoTransient_SA.cpp b/packages/tempus/test/BackwardEuler/Tempus_BackwardEuler_PseudoTransient_SA.cpp
index a608bba584d9..5c7721dce646 100644
--- a/packages/tempus/test/BackwardEuler/Tempus_BackwardEuler_PseudoTransient_SA.cpp
+++ b/packages/tempus/test/BackwardEuler/Tempus_BackwardEuler_PseudoTransient_SA.cpp
@@ -57,7 +57,7 @@ void test_pseudotransient_fsa(const bool use_dfdp_as_tangent,
 
   // Setup the Integrator
   RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<double> > integrator =
-    Tempus::integratorPseudoTransientForwardSensitivity<double>(pl, model);
+    Tempus::createIntegratorPseudoTransientForwardSensitivity<double>(pl, model);
 
   // Integrate to timeMax
   bool integratorStatus = integrator->advanceTime();
diff --git a/packages/tempus/test/DIRK/Tempus_DIRK_PseudoTransient_SA.cpp b/packages/tempus/test/DIRK/Tempus_DIRK_PseudoTransient_SA.cpp
index 6ed6f793d011..dff9192363d8 100644
--- a/packages/tempus/test/DIRK/Tempus_DIRK_PseudoTransient_SA.cpp
+++ b/packages/tempus/test/DIRK/Tempus_DIRK_PseudoTransient_SA.cpp
@@ -57,7 +57,7 @@ void test_pseudotransient_fsa(const bool use_dfdp_as_tangent,
 
   // Setup the Integrator
   RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<double> > integrator =
-    Tempus::integratorPseudoTransientForwardSensitivity<double>(pl, model);
+    Tempus::createIntegratorPseudoTransientForwardSensitivity<double>(pl, model);
 
   // Integrate to timeMax
   bool integratorStatus = integrator->advanceTime();
diff --git a/packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_PseudoTransient_SA.cpp b/packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_PseudoTransient_SA.cpp
index 54e8a099ea67..0b30ce45a397 100644
--- a/packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_PseudoTransient_SA.cpp
+++ b/packages/tempus/test/ExplicitRK/Tempus_ExplicitRK_PseudoTransient_SA.cpp
@@ -54,7 +54,7 @@ void test_pseudotransient_fsa(const bool use_dfdp_as_tangent,
 
   // Setup the Integrator
   RCP<Tempus::IntegratorPseudoTransientForwardSensitivity<double> > integrator =
-    Tempus::integratorPseudoTransientForwardSensitivity<double>(pl, model);
+    Tempus::createIntegratorPseudoTransientForwardSensitivity<double>(pl, model);
 
   // Integrate to timeMax
   bool integratorStatus = integrator->advanceTime();

From 42c4b3855f87b0f643c9ece5bf6ef0998cf9a821 Mon Sep 17 00:00:00 2001
From: bathmatt <dr.bettencourt@gmail.com>
Date: Sat, 24 Jul 2021 10:26:25 -0400
Subject: [PATCH 15/15] Zoltan2 no UVM testing (#9441)

Fixed zoltan2 so it it's tests all pass without UVM enabled. Enabled uvm free testing
---
 ...nuxCuda10.1.105uvmOffTestingSettings.cmake |  1 -
 .../Zoltan2_BasicKokkosIdentifierAdapter.hpp  | 57 ++++++++---
 .../zoltan2/example/block/kokkosBlock.cpp     |  6 +-
 .../zoltan2/sphynx/src/Zoltan2_Sphynx.hpp     |  4 +-
 .../TpetraCrsColorer/TpetraCrsColorer.cpp     | 19 +++-
 .../test/core/partition/mj_backwardcompat.cpp | 97 ++++++++++++-------
 .../zoltan2/test/core/temp/ddirectoryTest.cpp | 18 ++--
 7 files changed, 136 insertions(+), 66 deletions(-)

diff --git a/cmake/std/PullRequestLinuxCuda10.1.105uvmOffTestingSettings.cmake b/cmake/std/PullRequestLinuxCuda10.1.105uvmOffTestingSettings.cmake
index 58b9a6e22836..807b8e460f34 100644
--- a/cmake/std/PullRequestLinuxCuda10.1.105uvmOffTestingSettings.cmake
+++ b/cmake/std/PullRequestLinuxCuda10.1.105uvmOffTestingSettings.cmake
@@ -153,7 +153,6 @@ set (Sacado_ENABLE_TESTS OFF CACHE BOOL "Turn off tests for non-UVM build")
 set (SEACAS_ENABLE_TESTS OFF CACHE BOOL "Turn off tests for non-UVM build")
 set (ShyLU_DD_ENABLE_TESTS OFF CACHE BOOL "Turn off tests for non-UVM build")
 set (STK_ENABLE_TESTS OFF CACHE BOOL "Turn off tests for non-UVM build")
-set (Zoltan2_ENABLE_TESTS OFF CACHE BOOL "Turn off tests for non-UVM build")
 
 
 # ShyLU_DD UVM = OFF tests
diff --git a/packages/zoltan2/core/src/input/Zoltan2_BasicKokkosIdentifierAdapter.hpp b/packages/zoltan2/core/src/input/Zoltan2_BasicKokkosIdentifierAdapter.hpp
index 33a75e619f9e..5090714aa6b0 100644
--- a/packages/zoltan2/core/src/input/Zoltan2_BasicKokkosIdentifierAdapter.hpp
+++ b/packages/zoltan2/core/src/input/Zoltan2_BasicKokkosIdentifierAdapter.hpp
@@ -62,7 +62,7 @@ namespace Zoltan2 {
  *           and their associated weights, if any.
  *
  *  The user supplies the identifiers and weights by way of pointers
- *    to arrays.  
+ *    to arrays.
  *
  *  The template parameter (\c User) is a C++ class type which provides the
  *  actual data types with which the Zoltan2 library will be compiled, through
@@ -112,27 +112,41 @@ template <typename User>
   // The Adapter interface.
   ////////////////////////////////////////////////////////////////
 
-  size_t getLocalNumIDs() const {
+  size_t getLocalNumIDs() const override {
     return idsView_.extent(0);
   }
 
+  void getIDsView(const gno_t *&ids) const override {
+    auto kokkosIds = idsView_.view_host();
+    ids = kokkosIds.data();
+  }
+
   void getIDsKokkosView(Kokkos::View<const gno_t *,
-    typename node_t::device_type> &ids) const {
-    ids = idsView_;
+    typename node_t::device_type> &ids) const override {
+    ids = idsView_.template view<typename node_t::device_type>();
   }
 
-  int getNumWeightsPerID() const {
+  int getNumWeightsPerID() const override {
     return weightsView_.extent(1);
   }
 
+  void getWeightsView(const scalar_t *&wgt, int &stride,
+                      int idx = 0) const override
+  {
+    auto h_wgts_2d = weightsView_.view_host();
+
+    wgt = Kokkos::subview(h_wgts_2d, Kokkos::ALL, idx).data();
+    stride = 1;
+  }
+
   void getWeightsKokkosView(Kokkos::View<scalar_t **,
-    typename node_t::device_type> &wgts) const {
-    wgts = weightsView_;
+    typename node_t::device_type> &wgts) const override {
+    wgts = weightsView_.template view<typename node_t::device_type>();
   }
 
 private:
-  Kokkos::View<const gno_t *, typename node_t::device_type> idsView_;
-  Kokkos::View<scalar_t **, typename node_t::device_type> weightsView_;
+  Kokkos::DualView<gno_t *> idsView_;
+  Kokkos::DualView<scalar_t **> weightsView_;
 };
 
 ////////////////////////////////////////////////////////////////
@@ -140,12 +154,25 @@ template <typename User>
 ////////////////////////////////////////////////////////////////
 
 template <typename User>
-  BasicKokkosIdentifierAdapter<User>::BasicKokkosIdentifierAdapter(
-    Kokkos::View<gno_t*, typename node_t::device_type> &ids,
-    Kokkos::View<scalar_t**, typename node_t::device_type> &weights):
-      idsView_(ids), weightsView_(weights) {
+BasicKokkosIdentifierAdapter<User>::BasicKokkosIdentifierAdapter(
+    Kokkos::View<gno_t *, typename node_t::device_type> &ids,
+    Kokkos::View<scalar_t **, typename node_t::device_type> &weights)
+{
+  idsView_ = Kokkos::DualView<gno_t *>("idsView_", ids.extent(0));
+  Kokkos::deep_copy(idsView_.h_view, ids);
+
+  weightsView_ = Kokkos::DualView<scalar_t **>("weightsView_", weights.extent(0), weights.extent(1));
+  Kokkos::deep_copy(weightsView_.h_view, weights);
+
+  weightsView_.modify_host();
+  weightsView_.sync_host();
+  weightsView_.template sync<typename node_t::device_type>();
+
+  idsView_.modify_host();
+  idsView_.sync_host();
+  idsView_.template sync<typename node_t::device_type>();
 }
-  
+
 } //namespace Zoltan2
-  
+
 #endif
diff --git a/packages/zoltan2/example/block/kokkosBlock.cpp b/packages/zoltan2/example/block/kokkosBlock.cpp
index 4e27a9eba5ec..9bf66d60f809 100644
--- a/packages/zoltan2/example/block/kokkosBlock.cpp
+++ b/packages/zoltan2/example/block/kokkosBlock.cpp
@@ -113,10 +113,14 @@ int main(int narg, char *arg[]) {
   const int nWeights = 1;
   Kokkos::View<scalar_t **, typename node_t::device_type>
     weights("weights", localCount, nWeights);
+  auto host_weights = Kokkos::create_mirror_view(weights);
+
   for (int index = 0; index < localCount; index++) {
-    weights(index, 0) = 1; // Error check relies on uniform weights
+    host_weights(index, 0) = 1; // Error check relies on uniform weights
   }
 
+  Kokkos::deep_copy(weights, host_weights);
+
   inputAdapter_t ia(globalIds, weights);
 
   /////////////////////////////////////////////////////////////////////////
diff --git a/packages/zoltan2/sphynx/src/Zoltan2_Sphynx.hpp b/packages/zoltan2/sphynx/src/Zoltan2_Sphynx.hpp
index 3314bf5a6927..fb116672e0f9 100644
--- a/packages/zoltan2/sphynx/src/Zoltan2_Sphynx.hpp
+++ b/packages/zoltan2/sphynx/src/Zoltan2_Sphynx.hpp
@@ -368,9 +368,7 @@ namespace Zoltan2 {
     {
 
 	// Get the row pointers in the host
-	auto rowOffsets = graph_->getLocalGraphDevice().row_map;
-	auto rowOffsets_h = Kokkos::create_mirror_view(rowOffsets);
-	Kokkos::deep_copy(rowOffsets_h, rowOffsets);
+	auto rowOffsets = graph_->getLocalGraphHost().row_map;
 
 	// Create the degree matrix with max row size set to 1
  	Teuchos::RCP<matrix_t> degMat(new matrix_t (graph_->getRowMap(), 
diff --git a/packages/zoltan2/test/core/TpetraCrsColorer/TpetraCrsColorer.cpp b/packages/zoltan2/test/core/TpetraCrsColorer/TpetraCrsColorer.cpp
index 12658db1333b..e2fa314835d6 100644
--- a/packages/zoltan2/test/core/TpetraCrsColorer/TpetraCrsColorer.cpp
+++ b/packages/zoltan2/test/core/TpetraCrsColorer/TpetraCrsColorer.cpp
@@ -9,6 +9,7 @@ class ColorerTest {
 public:
   using map_t = Tpetra::Map<>;
   using gno_t = typename map_t::global_ordinal_type;
+  using graph_t = Tpetra::CrsGraph<>;
   using matrix_t = Tpetra::CrsMatrix<zscalar_t>;
   using multivector_t = Tpetra::MultiVector<zscalar_t>;
   using execution_space_t = typename matrix_t::device_type::execution_space;
@@ -83,10 +84,22 @@ class ColorerTest {
                         static_cast<IST>(1.), static_cast<IST>(9999.));
     JBlock->fillComplete();
 
+
     // Make JCyclic:  same matrix with different Domain and Range maps
-    JCyclic = rcp(new matrix_t(JBlock->getLocalMatrixDevice(),
-                               JBlock->getRowMap(), JBlock->getColMap(),
-                               vMapCyclic, wMapCyclic));
+    RCP<const graph_t> block_graph = JBlock->getCrsGraph();
+    RCP<graph_t> cyclic_graph = rcp(new graph_t(*block_graph));
+    cyclic_graph->resumeFill();
+    cyclic_graph->fillComplete(vMapCyclic, wMapCyclic);
+    JCyclic = rcp(new matrix_t(cyclic_graph));
+    JCyclic->resumeFill();
+    TEUCHOS_ASSERT(block_graph->getNodeNumRows() == cyclic_graph->getNodeNumRows());
+    {
+      auto val_s = JBlock->getLocalMatrixHost().values;
+      auto val_d = JCyclic->getLocalMatrixHost().values;
+      TEUCHOS_ASSERT(val_s.extent(0) == val_d.extent(0));
+      Kokkos::deep_copy(val_d, val_s);
+    }
+    JCyclic->fillComplete();
   }
 
   ////////////////////////////////////////////////////////////////
diff --git a/packages/zoltan2/test/core/partition/mj_backwardcompat.cpp b/packages/zoltan2/test/core/partition/mj_backwardcompat.cpp
index d289efe4a69e..0bd4e3990d92 100644
--- a/packages/zoltan2/test/core/partition/mj_backwardcompat.cpp
+++ b/packages/zoltan2/test/core/partition/mj_backwardcompat.cpp
@@ -79,7 +79,7 @@ class OldSchoolVectorAdapterContig : public Zoltan2::VectorAdapter<User>
 
   int getNumWeightsPerID() const { return (weights != NULL); }
 
-  void getWeightsView(const scalar_t *&wgt, int &stride, int idx = 0) const 
+  void getWeightsView(const scalar_t *&wgt, int &stride, int idx = 0) const
   { wgt = weights; stride = 1; }
 
   int getNumEntriesPerID() const { return dim; }
@@ -122,7 +122,7 @@ class OldSchoolVectorAdapterStrided : public Zoltan2::VectorAdapter<User>
 
   int getNumWeightsPerID() const { return (weights != NULL); }
 
-  void getWeightsView(const scalar_t *&wgt, int &stride, int idx = 0) const 
+  void getWeightsView(const scalar_t *&wgt, int &stride, int idx = 0) const
   { wgt = weights; stride = 1; }
 
   int getNumEntriesPerID() const { return dim; }
@@ -163,77 +163,102 @@ class KokkosVectorAdapter : public Zoltan2::VectorAdapter<User>
   {
     // create kokkos_gids in default memory space
     {
-      typedef Kokkos::View<gno_t *, typename node_t::device_type> view_ids_t;
-      view_ids_t gids = view_ids_t(
+      typedef Kokkos::DualView<gno_t *> view_ids_t;
+      kokkos_gids = view_ids_t(
         Kokkos::ViewAllocateWithoutInitializing("gids"), nids);
-      typename view_ids_t::HostMirror host_gids =
-        Kokkos::create_mirror_view(gids);
+
+      auto host_gids = kokkos_gids.h_view;
       for(size_t n = 0; n < nids; ++n) {
         host_gids(n) = gids_[n];
       }
-      Kokkos::deep_copy(gids, host_gids);
-      kokkos_gids = gids; // to const View
+
+      kokkos_gids.template modify<typename view_ids_t::host_mirror_space>();
+      kokkos_gids.sync_host();
+      kokkos_gids.template sync<typename node_t::device_type>();
     }
 
     // create kokkos_weights in default memory space
     if(weights_ != NULL)
     {
-      typedef Kokkos::View<scalar_t **, typename node_t::device_type> view_weights_t;
+      typedef Kokkos::DualView<scalar_t **> view_weights_t;
       kokkos_weights = view_weights_t(
         Kokkos::ViewAllocateWithoutInitializing("weights"), nids, 0);
-      typename view_weights_t::HostMirror host_kokkos_weights =
-        Kokkos::create_mirror_view(kokkos_weights);
+      auto host_kokkos_weights = kokkos_weights.h_view;
       for(size_t n = 0; n < nids; ++n) {
         host_kokkos_weights(n,0) = weights_[n];
       }
-      Kokkos::deep_copy(kokkos_weights, host_kokkos_weights);
+
+      kokkos_weights.template modify<typename view_weights_t::host_mirror_space>();
+      kokkos_weights.sync_host();
+      kokkos_weights.template sync<typename node_t::device_type>();
     }
 
     // create kokkos_coords in default memory space
     {
-      typedef Kokkos::View<scalar_t **, Kokkos::LayoutLeft,
-        typename node_t::device_type> kokkos_coords_t;
+      typedef Kokkos::DualView<scalar_t **, Kokkos::LayoutLeft> kokkos_coords_t;
       kokkos_coords = kokkos_coords_t(
         Kokkos::ViewAllocateWithoutInitializing("coords"), nids, dim);
-      typename kokkos_coords_t::HostMirror host_kokkos_coords =
-        Kokkos::create_mirror_view(kokkos_coords);
+      auto host_kokkos_coords = kokkos_coords.h_view;
+
       for(size_t n = 0; n < nids; ++n) {
         for(int idx = 0; idx < dim; ++idx) {
           host_kokkos_coords(n,idx) = coords_[n+idx*nids];
         }
       }
-      Kokkos::deep_copy(kokkos_coords, host_kokkos_coords);
+
+      kokkos_coords.template modify<typename kokkos_coords_t::host_mirror_space>();
+      kokkos_coords.sync_host();
+      kokkos_coords.template sync<typename node_t::device_type>();
     }
   }
 
   size_t getLocalNumIDs() const { return nids; }
 
+  void getIDsView(const gno_t *&ids) const override {
+    auto kokkosIds = kokkos_gids.view_host();
+    ids = kokkosIds.data();
+  }
+
   virtual void getIDsKokkosView(Kokkos::View<const gno_t *,
     typename node_t::device_type> &ids) const {
-    ids = kokkos_gids;
+    ids = kokkos_gids.template view<typename node_t::device_type>();;
   }
 
-  int getNumWeightsPerID() const { return (kokkos_weights.size() != 0); }
+  int getNumWeightsPerID() const { return (kokkos_weights.h_view.size() != 0); }
+
+  void getWeightsView(const scalar_t *&wgt, int &stride,
+                      int idx = 0) const override
+  {
+    auto h_wgts_2d = kokkos_weights.view_host();
+
+    wgt = Kokkos::subview(h_wgts_2d, Kokkos::ALL, idx).data();
+    stride = 1;
+  }
 
   virtual void getWeightsKokkosView(Kokkos::View<scalar_t **,
     typename node_t::device_type> & wgt) const {
-    wgt = kokkos_weights;
+    wgt = kokkos_weights.template view<typename node_t::device_type>();;
   }
 
   int getNumEntriesPerID() const { return dim; }
 
+  void getEntriesView(const scalar_t *&elements,
+    int &stride, int idx = 0) const override {
+    elements = kokkos_coords.view_host().data();
+    stride = 1;
+  }
+
   virtual void getEntriesKokkosView(Kokkos::View<scalar_t **,
     Kokkos::LayoutLeft, typename node_t::device_type> & coo) const {
-    coo = kokkos_coords;
+    coo = kokkos_coords.template view<typename node_t::device_type>();
   }
 
 private:
   const size_t nids;
-  Kokkos::View<const gno_t *, typename node_t::device_type> kokkos_gids;
+  Kokkos::DualView<gno_t *> kokkos_gids;
   const int dim;
-  Kokkos::View<scalar_t **, Kokkos::LayoutLeft, typename node_t::device_type>
-    kokkos_coords;
-  Kokkos::View<scalar_t **, typename node_t::device_type> kokkos_weights;
+  Kokkos::DualView<scalar_t **, Kokkos::LayoutLeft> kokkos_coords;
+  Kokkos::DualView<scalar_t **> kokkos_weights;
 };
 
 //////////////////////////////////////////////
@@ -281,7 +306,7 @@ int run_test_strided_versus_contig(const std::string & algorithm) {
   globalId_t *globalIds = new globalId_t [localCount];
   globalId_t offset = rank * localCount;
   for (size_t i=0; i < localCount; i++) globalIds[i] = offset++;
-   
+
   ///////////////////////////////////////////////////////////////////////
   // Create parameters for an MJ problem
 
@@ -296,12 +321,12 @@ int run_test_strided_versus_contig(const std::string & algorithm) {
   // Test one:  No weights
 
   // Partition using strided coords
-  stridedAdapter_t *ia1 = 
+  stridedAdapter_t *ia1 =
                     new stridedAdapter_t(localCount,globalIds,dim,cStrided);
 
   Zoltan2::PartitioningProblem<stridedAdapter_t> *problem1 =
            new Zoltan2::PartitioningProblem<stridedAdapter_t>(ia1, &params);
-   
+
   problem1->solve();
 
   quality_t *metricObject1 = new quality_t(ia1, &params, comm,
@@ -325,7 +350,7 @@ int run_test_strided_versus_contig(const std::string & algorithm) {
 
   Zoltan2::PartitioningProblem<contigAdapter_t> *problem2 =
            new Zoltan2::PartitioningProblem<contigAdapter_t>(ia2, &params);
-   
+
   problem2->solve();
 
   // Partition using contiguous coords to generate kokkos adapter
@@ -344,7 +369,7 @@ int run_test_strided_versus_contig(const std::string & algorithm) {
        (problem2->getSolution().getPartListView()[i] !=
         problem3->getSolution().getPartListView()[i]))
     {
-      std::cout << rank << " Error: differing parts for index " << i 
+      std::cout << rank << " Error: differing parts for index " << i
                 << problem1->getSolution().getPartListView()[i] << " "
                 << problem2->getSolution().getPartListView()[i] << " "
                 << problem3->getSolution().getPartListView()[i] << std::endl;
@@ -362,11 +387,11 @@ int run_test_strided_versus_contig(const std::string & algorithm) {
   delete ia1;
   delete ia2;
   delete ia3;
-   
+
   ///////////////////////////////////////////////////////////////////////
   // Test two:  weighted
   // Create a Zoltan2 input adapter that includes weights.
-  
+
   scalar_t *weights = new scalar_t [localCount];
   for (size_t i=0; i < localCount; i++) weights[i] = 1 + scalar_t(rank);
 
@@ -397,18 +422,18 @@ int run_test_strided_versus_contig(const std::string & algorithm) {
   ia2 = new contigAdapter_t(localCount, globalIds, dim, cContig, weights);
 
   problem2 = new Zoltan2::PartitioningProblem<contigAdapter_t>(ia2, &params);
-   
+
   problem2->solve();
 
   // compare strided vs contiguous
   ndiff = 0;
   for (size_t i = 0; i < localCount; i++) {
-    if (problem1->getSolution().getPartListView()[i] != 
+    if (problem1->getSolution().getPartListView()[i] !=
         problem2->getSolution().getPartListView()[i]) {
-      std::cout << rank << " Error: differing parts for index " << i 
+      std::cout << rank << " Error: differing parts for index " << i
                 << problem1->getSolution().getPartListView()[i] << " "
                 << problem2->getSolution().getPartListView()[i] << std::endl;
-            
+
       ndiff++;
     }
   }
diff --git a/packages/zoltan2/test/core/temp/ddirectoryTest.cpp b/packages/zoltan2/test/core/temp/ddirectoryTest.cpp
index 153885bfb29e..c15d7ff8e126 100644
--- a/packages/zoltan2/test/core/temp/ddirectoryTest.cpp
+++ b/packages/zoltan2/test/core/temp/ddirectoryTest.cpp
@@ -241,10 +241,12 @@ bool IDs<id_t>::TpetraDDTest()
 
   // Use the multivector for counting number of occurrences of each id
 
-  vectordata_t idData = idVec.getDataNonConst();
-  for (auto it = uniqueIds.begin(); it != uniqueIds.end(); it++) {
-    id_t idx = idMap->getLocalElement(it->first);
-    idData[idx] = it->second;
+  {
+    vectordata_t idData = idVec.getDataNonConst();
+    for (auto it = uniqueIds.begin(); it != uniqueIds.end(); it++) {
+      id_t idx = idMap->getLocalElement(it->first);
+      idData[idx] = it->second;
+    }
   }
 
   printMap(*idMap, "idMap ");
@@ -300,9 +302,11 @@ bool IDs<id_t>::TpetraDDTest()
 
   // Check the result
   size_t cntShared = 0;
-  idData = idVec.getDataNonConst();
-  for (size_t i = 0; i < idVec.getLocalLength(); i++)
-    if (idData[i] > 1) cntShared++;
+  {
+    auto idData = idVec.getDataNonConst();
+    for (size_t i = 0; i < idVec.getLocalLength(); i++)
+      if (idData[i] > 1) cntShared++;
+  }
 
   std::cout << comm->getRank() << " cntShared = " << cntShared
                                << "; nShared = " << nShared << std::endl;