From c50bb150dfdd3e5f80443b465355bc03ca6d95dd Mon Sep 17 00:00:00 2001 From: micheldemessieres Date: Thu, 5 Mar 2020 20:24:49 -0500 Subject: [PATCH 1/2] Amesos2: Refactor Basker to use Kokkos views --- packages/amesos2/src/Amesos2_Basker.cpp | 323 +----------------- .../src/Amesos2_Basker_FunctionMap.hpp | 28 +- .../amesos2/src/Amesos2_Basker_TypeMap.hpp | 115 ++----- packages/amesos2/src/Amesos2_Basker_decl.hpp | 57 +++- packages/amesos2/src/Amesos2_Basker_def.hpp | 162 ++++----- packages/amesos2/src/Amesos2_KLU2_def.hpp | 7 - .../src/Amesos2_TpetraMultiVecAdapter_def.hpp | 18 +- .../amesos2/test/solvers/Basker_UnitTests.cpp | 4 + packages/amesos2/test/solvers/Basker_test.xml | 103 ++++-- packages/amesos2/test/solvers/CMakeLists.txt | 15 + 10 files changed, 269 insertions(+), 563 deletions(-) diff --git a/packages/amesos2/src/Amesos2_Basker.cpp b/packages/amesos2/src/Amesos2_Basker.cpp index de4ffe90d61e..2e0c2aefc871 100644 --- a/packages/amesos2/src/Amesos2_Basker.cpp +++ b/packages/amesos2/src/Amesos2_Basker.cpp @@ -47,6 +47,7 @@ #include "Amesos2_Basker_def.hpp" #include "Amesos2_ExplicitInstantiationHelpers.hpp" +#include "TpetraCore_ETIHelperMacros.h" namespace Amesos2 { @@ -54,324 +55,16 @@ namespace Amesos2 { AMESOS2_SOLVER_EPETRA_INST(Basker); #endif -#ifdef HAVE_TPETRA_INST_INT_INT -#ifdef HAVE_TPETRA_INST_FLOAT - AMESOS2_SOLVER_TPETRA_INST(Basker,float,int,int); -#endif -#ifdef HAVE_TPETRA_INST_DOUBLE - AMESOS2_SOLVER_TPETRA_INST(Basker,double,int,int); -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT - AMESOS2_SOLVER_TPETRA_INST(Basker,std::complex,int,int); -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE - AMESOS2_SOLVER_TPETRA_INST(Basker,std::complex,int,int); -#endif -#endif// HAVE_TPETRA_INST_INT_INST + #define AMESOS2_BASKER_LOCAL_INSTANT(S,LO,GO,N) \ + template class Amesos2::Basker, \ + Tpetra::MultiVector >; -#ifdef HAVE_TPETRA_INST_INT_UNSIGNED -#ifdef HAVE_TPETRA_INST_FLOAT - AMESOS2_SOLVER_TPETRA_INST(Basker,float,int,unsigned); -#endif -#ifdef HAVE_TPETRA_INST_DOUBLE - AMESOS2_SOLVER_TPETRA_INST(Basker,double,int,unsigned); -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT - AMESOS2_SOLVER_TPETRA_INST(Basker,std::complex,int,unsigned); -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE - AMESOS2_SOLVER_TPETRA_INST(Basker,std::complex,int,unsigned); -#endif -#endif// HAVE_TPETRA_INST_INST_UNSIGNED + TPETRA_ETI_MANGLING_TYPEDEFS() -#ifdef HAVE_TPETRA_INST_INT_LONG -#ifdef HAVE_TPETRA_INST_FLOAT - AMESOS2_SOLVER_TPETRA_INST(Basker,float,int,long); -#endif -#ifdef HAVE_TPETRA_INST_DOUBLE - AMESOS2_SOLVER_TPETRA_INST(Basker,double,int,long); -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT - AMESOS2_SOLVER_TPETRA_INST(Basker,std::complex,int,long); -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE - AMESOS2_SOLVER_TPETRA_INST(Basker,std::complex,int,long); -#endif -#endif// HAVE_TPETRA_INST_INT_UNSIGNED - -#ifdef HAVE_TPETRA_INST_INT_LONG_LONG -#ifdef HAVE_TPETRA_INST_FLOAT - AMESOS2_SOLVER_TPETRA_INST(Basker,float,int,long long); -#endif -#ifdef HAVE_TPETRA_INST_DOUBLE - AMESOS2_SOLVER_TPETRA_INST(Basker,double,int,long long); -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT - AMESOS2_SOLVER_TPETRA_INST(Basker,std::complex,int,long long); -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE - AMESOS2_SOLVER_TPETRA_INST(Basker,std::complex,int,long long); -#endif -#endif// HAVE_TPETRA_INST_INT_INT + TPETRA_INSTANTIATE_SLGN_NO_ORDINAL_SCALAR(AMESOS2_BASKER_LOCAL_INSTANT) + #define AMESOS2_KOKKOS_IMPL_SOLVER_NAME Basker + #include "Amesos2_Kokkos_Impl.hpp" } -// -// 26-Nov-2014: JJH code copied from Amesos2_SuperLU.cpp. -// -#include "Kokkos_DefaultNode.hpp" -#include "TpetraCore_ETIHelperMacros.h" - -#define AMESOS2_BASKER_LOCAL_INSTANT(S,LO,GO,N) \ - template class Amesos2::Basker, \ - Tpetra::MultiVector >; - -TPETRA_ETI_MANGLING_TYPEDEFS() - -#if defined(HAVE_TPETRA_INST_SERIAL) && !defined(HAVE_TPETRA_DEFAULTNODE_SERIALWRAPPERNODE) && defined(HAVE_TPETRA_INST_DOUBLE) && defined(TPETRA_HAVE_KOKKOS_REFACTOR) -#define NODETYPE Kokkos_Compat_KokkosSerialWrapperNode -#ifdef HAVE_TPETRA_INST_FLOAT - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(float, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(float, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(float, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(float, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_DOUBLE - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(double, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(double, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(double, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(double, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) - #endif -#endif -#undef NODETYPE -#endif - - - -#if defined(HAVE_TPETRA_INST_PTHREAD) && !defined(HAVE_TPETRA_DEFAULTNODE_THREADSWRAPPERNODE) && defined(HAVE_TPETRA_INST_DOUBLE) && defined(TPETRA_HAVE_KOKKOS_REFACTOR) -#define NODETYPE Kokkos_Compat_KokkosThreadsWrapperNode -#ifdef HAVE_TPETRA_INST_FLOAT - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(float, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(float, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(float, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(float, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_DOUBLE - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(double, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(double, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(double, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(double, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) - #endif -#endif -#undef NODETYPE -#endif - -#if defined(HAVE_TPETRA_INST_OPENMP) && !defined(HAVE_TPETRA_DEFAULTNODE_OPENMPWRAPPERNODE) && defined(HAVE_TPETRA_INST_DOUBLE) && defined(TPETRA_HAVE_KOKKOS_REFACTOR) -#define NODETYPE Kokkos_Compat_KokkosOpenMPWrapperNode -#ifdef HAVE_TPETRA_INST_FLOAT - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(float, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(float, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(float, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(float, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_DOUBLE - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(double, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(double, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(double, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(double, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) - #endif -#endif -#undef NODETYPE -#endif - -#if defined(HAVE_TPETRA_INST_CUDA) && !defined(HAVE_TPETRA_DEFAULTNODE_CUDAWRAPPERNODE) && defined(HAVE_TPETRA_INST_DOUBLE) && defined(TPETRA_HAVE_KOKKOS_REFACTOR) -#define NODETYPE Kokkos_Compat_KokkosCudaWrapperNode -#ifdef HAVE_TPETRA_INST_FLOAT - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(float, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(float, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(float, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(float, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_DOUBLE - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(double, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(double, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(double, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(double, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) - #endif -#endif -#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE - #ifdef HAVE_TPETRA_INST_INT_INT - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, int, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_LONG_LONG - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) - #endif - #ifdef HAVE_TPETRA_INST_INT_UNSIGNED - AMESOS2_BASKER_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) - #endif -#endif -#undef NODETYPE -#endif - #endif // HAVE_AMESOS2_EXPLICIT_INSTANTIATION diff --git a/packages/amesos2/src/Amesos2_Basker_FunctionMap.hpp b/packages/amesos2/src/Amesos2_Basker_FunctionMap.hpp index 7a25ee39a407..b85c9cfa5f78 100644 --- a/packages/amesos2/src/Amesos2_Basker_FunctionMap.hpp +++ b/packages/amesos2/src/Amesos2_Basker_FunctionMap.hpp @@ -64,20 +64,24 @@ namespace Amesos2 { - /* ==================== Specializations ==================== - * - * \cond Basker_function_specializations - */ + template <> + struct FunctionMap + { + static double * convert_scalar(double * pData) { + return pData; // no conversion necessary + } + }; - /** - * \brief Pass function calls to Basker based on data type. - - */ - // TODO : Do we need the specializations for Basker ?? - - - /* \endcond Basker_function_specializations */ +#ifdef HAVE_TEUCHOS_COMPLEX + template <> + struct FunctionMap> + { + static std::complex * convert_scalar(Kokkos::complex * pData) { + return reinterpret_cast *>(pData); + } + }; +#endif // HAVE_TEUCHOS_COMPLEX } // end namespace Amesos2 diff --git a/packages/amesos2/src/Amesos2_Basker_TypeMap.hpp b/packages/amesos2/src/Amesos2_Basker_TypeMap.hpp index e8b56b98f495..186e1d68aa2c 100644 --- a/packages/amesos2/src/Amesos2_Basker_TypeMap.hpp +++ b/packages/amesos2/src/Amesos2_Basker_TypeMap.hpp @@ -46,8 +46,7 @@ \author Joshua Dennis Booth Siva Rajamanickam - \brief Provides definition of Basker types as well as conversions and type - traits. + \brief Provides definition of Basker types */ @@ -66,102 +65,29 @@ #include "Amesos2_TypeMap.hpp" - -#ifdef HAVE_TEUCHOS_COMPLEX - -/* ==================== Conversion ==================== */ -namespace Teuchos { - -/** - * \defgroup slu_conversion Conversion definitions for SLU types. - * - * Define specializations of Teuchos::as<> for the SLU types. - * - * These specializations are meant to work with any complex data type that - * implements the same interface as the STL complex type. - * - * @{ - */ - -#ifndef HAVE_AMESOS2_KLU2 - -template <> -class ValueTypeConversionTraits, std::complex > -{ -public: - static std::complex convert( const std::complex t ) - { - std::complex ret(Teuchos::as(t.real()), - Teuchos::as(t.imag())); - return( ret ); - } - - static std::complex safeConvert( const std::complex t ) - { - std::complex ret(Teuchos::as(t.real()), - Teuchos::as(t.imag())); - return( ret ); - } -}; - - -template <> -class ValueTypeConversionTraits , std::complex > -{ -public: - static std::complex convert( const std::complex t ) - { - float ret_r = Teuchos::as( t.real() ); - float ret_i = Teuchos::as( t.imag() ); - std::complex ret (ret_r, ret_i); - return (ret); - } - - // No special checks for safe Convert - static std::complex safeConvert( const std::complex t ) - { - float ret_r = Teuchos::as( t.real() ); - float ret_i = Teuchos::as( t.imag() ); - std::complex ret (ret_r, ret_i); - return (ret); - } -}; - - -#endif -//@} End Conversion group - - -} // end namespace Teuchos - -#endif // HAVE_TEUCHOS_COMPLEX - - namespace Amesos2 { template class Basker; /* Specialize the Amesos2::TypeMap struct for Basker types - * TODO: Mostly dummy assignments as Basker is templated. Remove if possible. - * * \cond Basker_type_specializations */ template <> struct TypeMap { - static float dtype; - typedef float type; - typedef float magnitude_type; + typedef double dtype; + typedef double type; + typedef float put_type; // exists just to handle the case of std::complex or float adapter }; template <> struct TypeMap { - static double dtype; + typedef double dtype; typedef double type; - typedef double magnitude_type; + typedef double put_type; // exists just to handle the case of std::complex or float adapter }; @@ -170,18 +96,35 @@ struct TypeMap template <> struct TypeMap > { - static std::complex dtype; - typedef std::complex type; - typedef double magnitude_type; + typedef std::complex dtype; + typedef Kokkos::complex type; + typedef Kokkos::complex put_type; // exists just to handle the case of std::complex or float adapter }; template <> struct TypeMap > { - static std::complex dtype; - typedef std::complex type; - typedef double magnitude_type; + typedef std::complex dtype; + typedef Kokkos::complex type; + typedef Kokkos::complex put_type; // exists just to handle the case of std::complex or float adapter +}; + +template <> +struct TypeMap > +{ + typedef std::complex dtype; + typedef Kokkos::complex type; + typedef Kokkos::complex put_type; // exists just to handle the case of std::complex or float adapter +}; + + +template <> +struct TypeMap > +{ + typedef std::complex dtype; + typedef Kokkos::complex type; + typedef Kokkos::complex put_type; // exists just to handle the case of std::complex or float adapter }; diff --git a/packages/amesos2/src/Amesos2_Basker_decl.hpp b/packages/amesos2/src/Amesos2_Basker_decl.hpp index 74ef8ae9b9c7..e47668a2ec62 100644 --- a/packages/amesos2/src/Amesos2_Basker_decl.hpp +++ b/packages/amesos2/src/Amesos2_Basker_decl.hpp @@ -94,10 +94,11 @@ class Basker : public SolverCore typedef TypeMap type_map; - typedef typename type_map::type slu_type; - typedef typename type_map::magnitude_type magnitude_type; + typedef typename type_map::type basker_type; + typedef typename type_map::dtype basker_dtype; + typedef typename type_map::put_type basker_put_type; // just for special case when adapter is std::complex or float - typedef FunctionMap function_map; + typedef FunctionMap function_map; typedef Matrix matrix_type; typedef MatrixAdapter matrix_adapter_type; @@ -106,18 +107,17 @@ class Basker : public SolverCore Basker( Teuchos::RCP A, Teuchos::RCP X, Teuchos::RCP B); - ~Basker( ); + ~Basker( ); private: - /** - * \brief can we optimize size_type and ordinal_type for straight pass through, - * also check that is_contiguous_ flag set to true - */ + /** + - * \brief can we optimize size_type and ordinal_type for straight pass through, + - * also check that is_contiguous_ flag set to true + - */ bool single_proc_optimization() const; - /** * \brief Performs pre-ordering on the matrix to increase efficiency. * @@ -185,24 +185,37 @@ class Basker : public SolverCore // Members int num_threads; - // The following Arrays are persisting storage arrays for A, X, and B - /// Stores the values of the nonzero entries for Basker - Teuchos::Array nzvals_; + typedef Kokkos::DefaultHostExecutionSpace HostSpaceType; + typedef Kokkos::View host_ordinal_type_array; + + typedef Kokkos::View host_value_type_array; + + // The following Views are persisting storage arrays for A, X, and B + /// Stores the values of the nonzero entries for CHOLMOD + host_value_type_array host_nzvals_view_; /// Stores the location in \c Ai_ and Aval_ that starts row j - Teuchos::Array rowind_; + host_ordinal_type_array host_rows_view_; /// Stores the row indices of the nonzero entries - Teuchos::Array colptr_; + host_ordinal_type_array host_col_ptr_view_; bool is_contiguous_; + typedef typename Kokkos::View + host_solve_array_t; + typedef typename Kokkos::View + convert_host_solve_array_t; /// Persisting 1D store for X - mutable Teuchos::Array xvals_; local_ordinal_type ldx_; + mutable host_solve_array_t xValues_; + mutable convert_host_solve_array_t convert_xValues_; // exists just for the case of adapter using std::complex or float + int ldx_; + /// Persisting 1D store for B - mutable Teuchos::Array bvals_; local_ordinal_type ldb_; + mutable host_solve_array_t bValues_; + int ldb_; /*Handle for Basker object*/ - mutable ::BaskerClassicNS::BaskerClassic basker; + mutable ::BaskerClassicNS::BaskerClassic basker; }; // End class Basker @@ -212,8 +225,10 @@ class Basker : public SolverCore template <> struct solver_traits { #ifdef HAVE_TEUCHOS_COMPLEX - typedef Meta::make_list4, + Kokkos::complex, std::complex, std::complex > supported_scalars; #else @@ -221,6 +236,12 @@ struct solver_traits { #endif }; +template +struct solver_supports_matrix> { + static const bool value = true; +}; + } // end namespace Amesos2 #endif // AMESOS2_BASKER_DECL_HPP diff --git a/packages/amesos2/src/Amesos2_Basker_def.hpp b/packages/amesos2/src/Amesos2_Basker_def.hpp index e3d85accd94d..9375f654e73b 100644 --- a/packages/amesos2/src/Amesos2_Basker_def.hpp +++ b/packages/amesos2/src/Amesos2_Basker_def.hpp @@ -69,9 +69,6 @@ Basker::Basker( Teuchos::RCP X, Teuchos::RCP B ) : SolverCore(A, X, B) - , nzvals_() // initialize to empty arrays - , rowind_() - , colptr_() , is_contiguous_(true) // , basker() { @@ -132,7 +129,8 @@ Basker::numericFactorization_impl() std::cout << "colptr_ : " << colptr_.toString() << std::endl; #endif - info = basker.factor(this->globalNumRows_, this->globalNumCols_, this->globalNumNonZeros_, colptr_.getRawPtr(), rowind_.getRawPtr(), nzvals_.getRawPtr()); + basker_dtype * pBaskerValues = function_map::convert_scalar(host_nzvals_view_.data()); + info = basker.factor(this->globalNumRows_, this->globalNumCols_, this->globalNumNonZeros_, host_col_ptr_view_.data(), host_rows_view_.data(), pBaskerValues); // This is set after numeric factorization complete as pivoting can be used; // In this case, a discrepancy between symbolic and numeric nnz total can occur. @@ -176,108 +174,81 @@ Basker::solve_impl( const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0; const size_t nrhs = X->getGlobalNumVectors(); - if ( single_proc_optimization() && nrhs == 1 ) { - + { // Get values from RHS B #ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_); + Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_); + Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ ); #endif -#ifndef HAVE_TEUCHOS_COMPLEX - auto b_vector = Util::vector_pointer_helper< MultiVecAdapter, Vector >::get_pointer_to_vector( B ); - auto x_vector = Util::vector_pointer_helper< MultiVecAdapter, Vector >::get_pointer_to_vector( X ); -#else - // NDE: 09/25/2017 - // Cannot convert Kokkos::complex* to std::complex*; in this case, use reinterpret_cast - using complex_type = typename Util::getStdCplxType< magnitude_type, typename matrix_adapter_type::spmtx_vals_t >::type; - complex_type * b_vector = reinterpret_cast< complex_type * >( Util::vector_pointer_helper< MultiVecAdapter, Vector >::get_pointer_to_vector( B ) ); - complex_type * x_vector = reinterpret_cast< complex_type * >( Util::vector_pointer_helper< MultiVecAdapter, Vector >::get_pointer_to_vector( X ) ); -#endif - TEUCHOS_TEST_FOR_EXCEPTION(b_vector == nullptr, - std::runtime_error, "Amesos2 Runtime Error: b_vector returned null "); - - TEUCHOS_TEST_FOR_EXCEPTION(x_vector == nullptr, - std::runtime_error, "Amesos2 Runtime Error: x_vector returned null "); - - if ( this->root_ ) { - { // Do solve! -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_); -#endif - ierr = basker.solveMultiple(nrhs, b_vector, x_vector); - } - - /* All processes should have the same error code */ - Teuchos::broadcast(*(this->getComm()), 0, &ierr); + if ( single_proc_optimization() && nrhs == 1 ) { + // no msp creation + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(B, bValues_, as(ld_rhs)); - TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0, - std::runtime_error, - "Encountered zero diag element at: " << ierr); - TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1, - std::runtime_error, - "Could not alloc needed working memory for solve" ); + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(X, xValues_, as(ld_rhs)); } - } - else - { - const size_t val_store_size = as(ld_rhs * nrhs); - - xvals_.resize(val_store_size); - bvals_.resize(val_store_size); - - { // Get values from RHS B -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_); - Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ ); -#endif + else { + if ( is_contiguous_ == true ) { + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(B, bValues_, as(ld_rhs), ROOTED, this->rowIndexBase_); + } + else { + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(B, bValues_, as(ld_rhs), CONTIGUOUS_AND_ROOTED, this->rowIndexBase_); + } + // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here. if ( is_contiguous_ == true ) { - Util::get_1d_copy_helper, - slu_type>::do_get(B, bvals_(), as(ld_rhs), ROOTED, this->rowIndexBase_); + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(X, xValues_, as(ld_rhs), ROOTED, this->rowIndexBase_); } else { - Util::get_1d_copy_helper, - slu_type>::do_get(B, bvals_(), as(ld_rhs), CONTIGUOUS_AND_ROOTED, this->rowIndexBase_); + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(X, xValues_, as(ld_rhs), CONTIGUOUS_AND_ROOTED, this->rowIndexBase_); } } + } - if ( this->root_ ) { - { // Do solve! + if ( this->root_ ) { // do solve #ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_); + Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_); #endif - ierr = basker.solveMultiple(nrhs, bvals_.getRawPtr(),xvals_.getRawPtr()); - } - - } + basker_dtype * pxBaskerValues = function_map::convert_scalar(xValues_.data()); + basker_dtype * pbBaskerValues = function_map::convert_scalar(bValues_.data()); + ierr = basker.solveMultiple(nrhs, pbBaskerValues, pxBaskerValues); + } - /* All processes should have the same error code */ - Teuchos::broadcast(*(this->getComm()), 0, &ierr); + /* All processes should have the same error code */ + Teuchos::broadcast(*(this->getComm()), 0, &ierr); - TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0, - std::runtime_error, - "Encountered zero diag element at: " << ierr); - TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1, - std::runtime_error, - "Could not alloc needed working memory for solve" ); + TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0, + std::runtime_error, + "Encountered zero diag element at: " << ierr); + TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1, + std::runtime_error, + "Could not alloc needed working memory for solve" ); - { + { #ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); + Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); #endif + // see Amesos2_Klu2_def.hpp for same situation and a long comment on this + // Note for Basker the issue applies to float or complex, while Klu2 just applies to complex + deep_copy_or_assign_view(convert_xValues_, xValues_); - if ( is_contiguous_ == true ) { - Util::put_1d_data_helper< - MultiVecAdapter,slu_type>::do_put(X, xvals_(), - as(ld_rhs), - ROOTED); - } - else { - Util::put_1d_data_helper< - MultiVecAdapter,slu_type>::do_put(X, xvals_(), - as(ld_rhs), - CONTIGUOUS_AND_ROOTED); - } + if ( is_contiguous_ == true ) { + Util::put_1d_data_helper_kokkos_view< + MultiVecAdapter,convert_host_solve_array_t>::do_put(X, convert_xValues_, + as(ld_rhs), + ROOTED); + } + else { + Util::put_1d_data_helper_kokkos_view< + MultiVecAdapter,convert_host_solve_array_t>::do_put(X, convert_xValues_, + as(ld_rhs), + CONTIGUOUS_AND_ROOTED); } } @@ -344,9 +315,12 @@ Basker::loadA_impl(EPhase current_phase) // Only the root image needs storage allocated if( this->root_ ){ - nzvals_.resize(this->globalNumNonZeros_); - rowind_.resize(this->globalNumNonZeros_); - colptr_.resize(this->globalNumCols_ + 1); + host_nzvals_view_ = host_value_type_array( + Kokkos::ViewAllocateWithoutInitializing("host_nzvals_view_"), this->globalNumNonZeros_); + host_rows_view_ = host_ordinal_type_array( + Kokkos::ViewAllocateWithoutInitializing("host_rows_view_"), this->globalNumNonZeros_); + host_col_ptr_view_ = host_ordinal_type_array( + Kokkos::ViewAllocateWithoutInitializing("host_col_ptr_view_"), this->globalNumRows_ + 1); } local_ordinal_type nnz_ret = 0; @@ -356,15 +330,15 @@ Basker::loadA_impl(EPhase current_phase) #endif if ( is_contiguous_ == true ) { - Util::get_ccs_helper< - MatrixAdapter,slu_type,local_ordinal_type,local_ordinal_type> - ::do_get(this->matrixA_.ptr(), nzvals_(), rowind_(), colptr_(), + Util::get_ccs_helper_kokkos_view< + MatrixAdapter,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array> + ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_, nnz_ret, ROOTED, ARBITRARY, this->rowIndexBase_); } else { - Util::get_ccs_helper< - MatrixAdapter,slu_type,local_ordinal_type,local_ordinal_type> - ::do_get(this->matrixA_.ptr(), nzvals_(), rowind_(), colptr_(), + Util::get_ccs_helper_kokkos_view< + MatrixAdapter,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array> + ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED, ARBITRARY, this->rowIndexBase_); } } diff --git a/packages/amesos2/src/Amesos2_KLU2_def.hpp b/packages/amesos2/src/Amesos2_KLU2_def.hpp index 62153a360151..1a67a913f921 100644 --- a/packages/amesos2/src/Amesos2_KLU2_def.hpp +++ b/packages/amesos2/src/Amesos2_KLU2_def.hpp @@ -378,13 +378,6 @@ KLU2::solve_impl( } } } // end root_ - - /* Update X's global values */ - { -#ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); -#endif - } // end Timer scope } //end else // This conversion exists only for the situation where the Tpetra adapter diff --git a/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp index 13f68698ec4e..bf4eb3faadea 100644 --- a/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp @@ -267,7 +267,6 @@ namespace Amesos2 { // Special case when number vectors == 1 and single MPI process if ( num_vecs == 1 && this->getComm()->getRank() == 0 && this->getComm()->getSize() == 1 ) { if(mv_->isConstantStride()) { - mv_->sync_device(); // no testing of this right now - since UVM on deep_copy_or_assign_view(kokkos_view, mv_->getLocalViewDevice()); } else { @@ -308,7 +307,6 @@ namespace Amesos2 { } else { if(redist_mv.isConstantStride()) { - redist_mv.sync_device(); // no testing of this right now - since UVM on deep_copy_or_assign_view(kokkos_view, redist_mv.getLocalViewDevice()); } else { @@ -530,7 +528,23 @@ namespace Amesos2 { // If this is the optimized path then kokkos_new_data will be the dst auto mv_view_to_modify_2d = mv_->getLocalViewDevice(); + + #ifdef HAVE_TEUCHOS_FLOAT + // To remove this and make it use the optimized deep_copy_or_assign_view, + // need to resolve the put_type setup for Klu2 and Basker which is + // handling float in an awkward way. The current effect of this check + // is to make the put not completely optimized for float builds. + // This is related to #7158. The problem here was that we are assuming + // that the vector we did a 'get' on is the same one we will do 'put' with + // but the added put_type for float breaks that assumption. In that case, + // the code here may assign when in fact we need a copy back to the MV. + // If this code does assign it's not doing anything to the MV because it's + // assuming we were solving directly to it from the original 'get'. + // TODO: Make put_type go away and get rid of these special checks for float. + deep_copy(mv_view_to_modify_2d, kokkos_new_data); + #else deep_copy_or_assign_view(mv_view_to_modify_2d, kokkos_new_data); + #endif } else { diff --git a/packages/amesos2/test/solvers/Basker_UnitTests.cpp b/packages/amesos2/test/solvers/Basker_UnitTests.cpp index dfa458a51728..4f7769260800 100644 --- a/packages/amesos2/test/solvers/Basker_UnitTests.cpp +++ b/packages/amesos2/test/solvers/Basker_UnitTests.cpp @@ -810,6 +810,10 @@ namespace { typedef long int LongInt; UNIT_TEST_GROUP_ORDINAL_ORDINAL(int,LongInt) #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + typedef long long int LongLongInt; + UNIT_TEST_GROUP_ORDINAL_ORDINAL(int,LongLongInt) + #endif #endif // EXPL-INST diff --git a/packages/amesos2/test/solvers/Basker_test.xml b/packages/amesos2/test/solvers/Basker_test.xml index ef52657bac3e..eb5ab67c9ae3 100644 --- a/packages/amesos2/test/solvers/Basker_test.xml +++ b/packages/amesos2/test/solvers/Basker_test.xml @@ -4,38 +4,83 @@ - + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - diff --git a/packages/amesos2/test/solvers/CMakeLists.txt b/packages/amesos2/test/solvers/CMakeLists.txt index 8b7db223a903..2dedcd5cbb16 100644 --- a/packages/amesos2/test/solvers/CMakeLists.txt +++ b/packages/amesos2/test/solvers/CMakeLists.txt @@ -301,6 +301,21 @@ IF (${PACKAGE_NAME}_ENABLE_Basker) NUM_MPI_PROCS 2 STANDARD_PASS_OUTPUT ) + +TRIBITS_COPY_FILES_TO_BINARY_DIR(SolverTestCopyBaskerFiles + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} + SOURCE_FILES Basker_test.xml + EXEDEPS Solver_Test + ) + +TRIBITS_ADD_TEST( + Solver_Test + NAME Basker_Solver_Test + ARGS "--xml-params=Basker_test.xml --filedir=${CMAKE_CURRENT_BINARY_DIR}/../matrices/ --multiple-solves --refactor" + STANDARD_PASS_OUTPUT + NUM_MPI_PROCS 1 + COMM serial mpi + ) ENDIF() From 0a68532017de6ce7447ad0a94f5d80e224cd263e Mon Sep 17 00:00:00 2001 From: micheldemessieres Date: Thu, 18 Jun 2020 14:24:47 -0400 Subject: [PATCH 2/2] Amesos2: Clean up Basker type handling Resolves problems with Stokhos compatibility for custom TypeMap. Removes the put_type member of TypeMap for handling floats. This is handled in a cleaner way now. Removed some special handling of float in the Tpetra adapter. Also handled in a cleaner way now. Added complex testing to Basker. --- .../src/Amesos2_Basker_FunctionMap.hpp | 17 +++--- .../amesos2/src/Amesos2_Basker_TypeMap.hpp | 21 ++----- packages/amesos2/src/Amesos2_Basker_decl.hpp | 10 +-- packages/amesos2/src/Amesos2_Basker_def.hpp | 7 +-- .../amesos2/src/Amesos2_KLU2_FunctionMap.hpp | 25 +++----- packages/amesos2/src/Amesos2_KLU2_TypeMap.hpp | 6 -- packages/amesos2/src/Amesos2_KLU2_decl.hpp | 4 -- packages/amesos2/src/Amesos2_KLU2_def.hpp | 25 +------- .../src/Amesos2_TpetraMultiVecAdapter_def.hpp | 30 ++++----- packages/amesos2/test/solvers/Basker_test.xml | 61 +++++++++++++++++++ 10 files changed, 105 insertions(+), 101 deletions(-) diff --git a/packages/amesos2/src/Amesos2_Basker_FunctionMap.hpp b/packages/amesos2/src/Amesos2_Basker_FunctionMap.hpp index b85c9cfa5f78..c7dec5c0c57f 100644 --- a/packages/amesos2/src/Amesos2_Basker_FunctionMap.hpp +++ b/packages/amesos2/src/Amesos2_Basker_FunctionMap.hpp @@ -64,14 +64,6 @@ namespace Amesos2 { - template <> - struct FunctionMap - { - static double * convert_scalar(double * pData) { - return pData; // no conversion necessary - } - }; - #ifdef HAVE_TEUCHOS_COMPLEX template <> struct FunctionMap> @@ -83,6 +75,15 @@ namespace Amesos2 { #endif // HAVE_TEUCHOS_COMPLEX + // if not specialized, then assume generic conversion is fine + template + struct FunctionMap + { + static scalar_t * convert_scalar(scalar_t * pData) { + return pData; // no conversion necessary + } + }; + } // end namespace Amesos2 #endif // AMESOS2_BASKER_FUNCTIONMAP_HPP diff --git a/packages/amesos2/src/Amesos2_Basker_TypeMap.hpp b/packages/amesos2/src/Amesos2_Basker_TypeMap.hpp index 186e1d68aa2c..f2714d713b0e 100644 --- a/packages/amesos2/src/Amesos2_Basker_TypeMap.hpp +++ b/packages/amesos2/src/Amesos2_Basker_TypeMap.hpp @@ -76,18 +76,15 @@ template class Basker; template <> struct TypeMap { - typedef double dtype; + static double dtype; typedef double type; - typedef float put_type; // exists just to handle the case of std::complex or float adapter }; - template <> struct TypeMap { - typedef double dtype; + static double dtype; typedef double type; - typedef double put_type; // exists just to handle the case of std::complex or float adapter }; @@ -96,35 +93,29 @@ struct TypeMap template <> struct TypeMap > { - typedef std::complex dtype; + static std::complex dtype; typedef Kokkos::complex type; - typedef Kokkos::complex put_type; // exists just to handle the case of std::complex or float adapter }; - template <> struct TypeMap > { - typedef std::complex dtype; + static std::complex dtype; typedef Kokkos::complex type; - typedef Kokkos::complex put_type; // exists just to handle the case of std::complex or float adapter }; template <> struct TypeMap > { - typedef std::complex dtype; + static std::complex dtype; typedef Kokkos::complex type; - typedef Kokkos::complex put_type; // exists just to handle the case of std::complex or float adapter }; - template <> struct TypeMap > { - typedef std::complex dtype; + static std::complex dtype; typedef Kokkos::complex type; - typedef Kokkos::complex put_type; // exists just to handle the case of std::complex or float adapter }; diff --git a/packages/amesos2/src/Amesos2_Basker_decl.hpp b/packages/amesos2/src/Amesos2_Basker_decl.hpp index e47668a2ec62..7ac3276c786d 100644 --- a/packages/amesos2/src/Amesos2_Basker_decl.hpp +++ b/packages/amesos2/src/Amesos2_Basker_decl.hpp @@ -95,8 +95,11 @@ class Basker : public SolverCore typedef TypeMap type_map; typedef typename type_map::type basker_type; - typedef typename type_map::dtype basker_dtype; - typedef typename type_map::put_type basker_put_type; // just for special case when adapter is std::complex or float + + // TODO: Would like to change dtype to be a regular type, not static. + // Seems nothing was using dtype before anyways but Stokhos would break so + // will address that as a separate PR. + typedef decltype(type_map::dtype) basker_dtype; typedef FunctionMap function_map; @@ -202,12 +205,9 @@ class Basker : public SolverCore typedef typename Kokkos::View host_solve_array_t; - typedef typename Kokkos::View - convert_host_solve_array_t; /// Persisting 1D store for X mutable host_solve_array_t xValues_; - mutable convert_host_solve_array_t convert_xValues_; // exists just for the case of adapter using std::complex or float int ldx_; /// Persisting 1D store for B diff --git a/packages/amesos2/src/Amesos2_Basker_def.hpp b/packages/amesos2/src/Amesos2_Basker_def.hpp index 9375f654e73b..289875ad88dd 100644 --- a/packages/amesos2/src/Amesos2_Basker_def.hpp +++ b/packages/amesos2/src/Amesos2_Basker_def.hpp @@ -234,19 +234,16 @@ Basker::solve_impl( #ifdef HAVE_AMESOS2_TIMERS Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); #endif - // see Amesos2_Klu2_def.hpp for same situation and a long comment on this - // Note for Basker the issue applies to float or complex, while Klu2 just applies to complex - deep_copy_or_assign_view(convert_xValues_, xValues_); if ( is_contiguous_ == true ) { Util::put_1d_data_helper_kokkos_view< - MultiVecAdapter,convert_host_solve_array_t>::do_put(X, convert_xValues_, + MultiVecAdapter,host_solve_array_t>::do_put(X, xValues_, as(ld_rhs), ROOTED); } else { Util::put_1d_data_helper_kokkos_view< - MultiVecAdapter,convert_host_solve_array_t>::do_put(X, convert_xValues_, + MultiVecAdapter,host_solve_array_t>::do_put(X, xValues_, as(ld_rhs), CONTIGUOUS_AND_ROOTED); } diff --git a/packages/amesos2/src/Amesos2_KLU2_FunctionMap.hpp b/packages/amesos2/src/Amesos2_KLU2_FunctionMap.hpp index e384402e5562..f49a43445e7c 100644 --- a/packages/amesos2/src/Amesos2_KLU2_FunctionMap.hpp +++ b/packages/amesos2/src/Amesos2_KLU2_FunctionMap.hpp @@ -76,22 +76,6 @@ namespace KLU2 { namespace Amesos2 { - template <> - struct FunctionMap - { - static double * convert_scalar(double * pData) { - return pData; // no conversion necessary - } - }; - - template <> - struct FunctionMap - { - static float * convert_scalar(float * pData) { - return pData; // no conversion necessary - } - }; - #ifdef HAVE_TEUCHOS_COMPLEX template <> struct FunctionMap> @@ -104,6 +88,15 @@ namespace Amesos2 { // Note that Klu2 does not support complex float so it does not appear here. #endif // HAVE_TEUCHOS_COMPLEX + // if not specialized, then assume generic conversion is fine + template + struct FunctionMap + { + static scalar_t * convert_scalar(scalar_t * pData) { + return pData; // no conversion necessary + } + }; + } // end namespace Amesos2 #endif // AMESOS2_KLU2_FUNCTIONMAP_HPP diff --git a/packages/amesos2/src/Amesos2_KLU2_TypeMap.hpp b/packages/amesos2/src/Amesos2_KLU2_TypeMap.hpp index 28e21697428f..5f04cc18e85d 100644 --- a/packages/amesos2/src/Amesos2_KLU2_TypeMap.hpp +++ b/packages/amesos2/src/Amesos2_KLU2_TypeMap.hpp @@ -85,7 +85,6 @@ struct TypeMap { typedef float dtype; typedef float type; - typedef float put_type; // exists just to handle the case of std::complex adapter }; template <> @@ -93,7 +92,6 @@ struct TypeMap { typedef double dtype; typedef double type; - typedef double put_type; // exists just to handle the case of std::complex adapter }; #ifdef HAVE_TEUCHOS_COMPLEX @@ -103,7 +101,6 @@ struct TypeMap > { typedef std::complex dtype; typedef Kokkos::complex type; - typedef Kokkos::complex put_type; // exists just to handle the case of std::complex adapter }; template <> @@ -111,7 +108,6 @@ struct TypeMap > { typedef std::complex dtype; typedef Kokkos::complex type; - typedef Kokkos::complex put_type; // exists just to handle the case of std::complex adapter }; template <> @@ -119,7 +115,6 @@ struct TypeMap > { typedef std::complex dtype; typedef Kokkos::complex type; - typedef Kokkos::complex put_type; // exists just to handle the case of std::complex adapter }; template <> @@ -127,7 +122,6 @@ struct TypeMap > { typedef std::complex dtype; typedef Kokkos::complex type; - typedef Kokkos::complex put_type; // exists just to handle the case of std::complex adapter }; #endif // HAVE_TEUCHOS_COMPLEX diff --git a/packages/amesos2/src/Amesos2_KLU2_decl.hpp b/packages/amesos2/src/Amesos2_KLU2_decl.hpp index 882db0392894..44144ccaff57 100644 --- a/packages/amesos2/src/Amesos2_KLU2_decl.hpp +++ b/packages/amesos2/src/Amesos2_KLU2_decl.hpp @@ -97,7 +97,6 @@ class KLU2 : public SolverCore */ typedef typename type_map::type klu2_type; typedef typename type_map::dtype klu2_dtype; - typedef typename type_map::put_type klu2_put_type; // just for special case when adapter is std::complex typedef FunctionMap function_map; @@ -251,12 +250,9 @@ class KLU2 : public SolverCore typedef typename Kokkos::View host_solve_array_t; - typedef typename Kokkos::View - convert_host_solve_array_t; /// Persisting 1D store for X mutable host_solve_array_t xValues_; - mutable convert_host_solve_array_t convert_xValues_; // exists just for the case of adapter using std::complex int ldx_; /// Persisting 1D store for B diff --git a/packages/amesos2/src/Amesos2_KLU2_def.hpp b/packages/amesos2/src/Amesos2_KLU2_def.hpp index 1a67a913f921..db27954a12ee 100644 --- a/packages/amesos2/src/Amesos2_KLU2_def.hpp +++ b/packages/amesos2/src/Amesos2_KLU2_def.hpp @@ -380,27 +380,6 @@ KLU2::solve_impl( } // end root_ } //end else - // This conversion exists only for the situation where the Tpetra adapter - // is std::complex. For complex the put_1d_data_helper_kokkos_view call - // below is going to reinterpret_cast to std::complex but Klu2 is a bit - // special because it doesn't support complex so xValues_ will be - // Kokkos::complex. We need to convert from complex double to complex - // float and we can't use a deep_copy directly from Kokkos::complex - // to std::complex since deep_copy cannot do both Kokkos->std and double->float - // in one copy. To resolve this without creating a lot of machinery we have - // Amesos2_Klu2_TypeMap.hpp hint us the correct intermediate type (put_type). - // which will be Kokkos::complex. The put_type also hints for all the - // other cases to make those cases do simple assignment here and not copy. - // Then deep_copy_or_assign_view below will be able to deep_copy directly - // from Kokkos::complex to Kokkos::complex and we don't need to - // handle it outside of kokkos. Then the Kokkos::complex gets passed to - // put_1d_data_helper_kokkos_view and the reinterpret_cast to std::complex will be fine. - // - // For any other solver situation (float, double, Kokkos::complex), - // this deep_copy_or_assign_view will do assignment (no copy) so there won't - // be any overhead. - deep_copy_or_assign_view(convert_xValues_, xValues_); - { #ifdef HAVE_AMESOS2_TIMERS Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ ); @@ -408,13 +387,13 @@ KLU2::solve_impl( if ( is_contiguous_ == true ) { Util::put_1d_data_helper_kokkos_view< - MultiVecAdapter,convert_host_solve_array_t>::do_put(X, convert_xValues_, + MultiVecAdapter,host_solve_array_t>::do_put(X, xValues_, as(ld_rhs), ROOTED, this->rowIndexBase_); } else { Util::put_1d_data_helper_kokkos_view< - MultiVecAdapter,convert_host_solve_array_t>::do_put(X, convert_xValues_, + MultiVecAdapter,host_solve_array_t>::do_put(X, xValues_, as(ld_rhs), CONTIGUOUS_AND_ROOTED, this->rowIndexBase_); } diff --git a/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp index bf4eb3faadea..33a46b56d49e 100644 --- a/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp @@ -528,23 +528,7 @@ namespace Amesos2 { // If this is the optimized path then kokkos_new_data will be the dst auto mv_view_to_modify_2d = mv_->getLocalViewDevice(); - - #ifdef HAVE_TEUCHOS_FLOAT - // To remove this and make it use the optimized deep_copy_or_assign_view, - // need to resolve the put_type setup for Klu2 and Basker which is - // handling float in an awkward way. The current effect of this check - // is to make the put not completely optimized for float builds. - // This is related to #7158. The problem here was that we are assuming - // that the vector we did a 'get' on is the same one we will do 'put' with - // but the added put_type for float breaks that assumption. In that case, - // the code here may assign when in fact we need a copy back to the MV. - // If this code does assign it's not doing anything to the MV because it's - // assuming we were solving directly to it from the original 'get'. - // TODO: Make put_type go away and get rid of these special checks for float. - deep_copy(mv_view_to_modify_2d, kokkos_new_data); - #else deep_copy_or_assign_view(mv_view_to_modify_2d, kokkos_new_data); - #endif } else { @@ -568,11 +552,19 @@ namespace Amesos2 { } if ( distribution != CONTIGUOUS_AND_ROOTED ) { + // Use View scalar type, not MV Scalar because we want Kokkos::complex, not + // std::complex to avoid a Kokkos::complex to std::complex + // conversion which would require a double copy and fail here. Then we'll be + // setup to safely reinterpret_cast complex to std if necessary. + typedef typename multivec_t::dual_view_type::t_host::value_type tpetra_mv_view_type; + Kokkos::View convert_kokkos_new_data; + deep_copy_or_assign_view(convert_kokkos_new_data, kokkos_new_data); #ifdef HAVE_TEUCHOS_COMPLEX - // for complex, cast Kokkos::complex back to std::complex - auto pData = reinterpret_cast(kokkos_new_data.data()); + // convert_kokkos_new_data may be Kokkos::complex and Scalar could be std::complex + auto pData = reinterpret_cast(convert_kokkos_new_data.data()); #else - auto pData = kokkos_new_data.data(); + auto pData = convert_kokkos_new_data.data(); #endif const multivec_t source_mv (srcMap, Teuchos::ArrayView( diff --git a/packages/amesos2/test/solvers/Basker_test.xml b/packages/amesos2/test/solvers/Basker_test.xml index eb5ab67c9ae3..172ac45ecbab 100644 --- a/packages/amesos2/test/solvers/Basker_test.xml +++ b/packages/amesos2/test/solvers/Basker_test.xml @@ -83,4 +83,65 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +