From 05d6b96526513b503b6fbb1ff3586af0db469b2a Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 22 May 2020 15:12:00 -0600 Subject: [PATCH] cleaning --- .../Projection/Intrepid2_ProjectionTools.hpp | 128 ++++++++++-------- 1 file changed, 75 insertions(+), 53 deletions(-) diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp index 158f1c72347f..426fbd53e582 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp @@ -506,44 +506,58 @@ class ProjectionTools { ProjectionStruct * projStruct); - /** \brief Functor to solve a square system A x = b on each cell using QR method implemented in KokkosKernels + /** \brief Class to solve a square system A x = b on each cell A is expected to be saddle a point (KKT) matrix of the form [C B; B^T 0], where C has size nxn and B nxm, with n>0, m>=0. B^T is copied from B, so one does not have to define the B^T portion of A. b will contain the solution x. The first n-entries of x are copied into the provided basis coefficients using the provided indexing. + The system is solved either with a QR factorization implemented in KokkosKernels or + with Lapack GELS function. */ struct ElemSystem { + + std::string systemName_; bool matrixIndependentOfCell_; /** \brief Functor constructor - - \code - C - num. cells - P - num. evaluation points - \endcode - - - \param basisCoeffs [out] - rank-2 view (C,F) containing the basis coefficients - \param elemMat [in/out] - rank-3 view (C,P,P) containing the element matrix of size - numCells x (n+m)x(n+m) on each cell - it will be overwritten. - \param elemRhs [in/out] - rank-2 view (C,P) containing the element rhs on each cell - of size numCells x (n+m) - it will contain the solution of the system on output - \param tau [out] - rank-2 view (C,P) used to store the QR factorization - size: numCells x (n+m) - \param w [out] - rank-2 view (C,P) used has a workspace - Layout Right, size: numCells x (n+m) - \param elemDof [in] - rank-1 view having dimension n, containing the basis numbering - \param n [in] - ordinal_type, basis cardinality - \param m [in] - ordinal_type, dimension of the constraint of the KKT system + \param systemName [in] - string containing the name of the system (passed to parallel for) + \param matrixIndependentOfCell [in] - bool: whether the local cell matrix of the system changes from cell to cell + if true, the matrix factorization is preformed only on the first cell + and reused on other cells. + */ + + ElemSystem (std::string systemName, bool matrixIndependentOfCell) : + systemName_(systemName), matrixIndependentOfCell_(matrixIndependentOfCell){}; + + + + /** \brief Solve the system and returns the basis coefficients + solve the system either using Kokkos Kernel QR or Lapack GELS + depending on whether Kokkos Kernel is enabled. + + \code + C - num. cells + P - num. evaluation points + \endcode + + + \param basisCoeffs [out] - rank-2 view (C,F) containing the basis coefficients + \param elemMat [in/out] - rank-3 view (C,P,P) containing the element matrix of size + numCells x (n+m)x(n+m) on each cell + it will be overwritten. + \param elemRhs [in/out] - rank-2 view (C,P) containing the element rhs on each cell + of size numCells x (n+m) + it will contain the solution of the system on output + \param tau [out] - rank-2 view (C,P) used to store the QR factorization + size: numCells x (n+m) + \param w [out] - rank-2 view (C,P) used has a workspace + Layout Right, size: numCells x (n+m) + \param elemDof [in] - rank-1 view having dimension n, containing the basis numbering + \param n [in] - ordinal_type, basis cardinality + \param m [in] - ordinal_type, dimension of the constraint of the KKT system */ - - ElemSystem (std::string systemName, bool matrixIndependentOfCell) : - systemName_(systemName), matrixIndependentOfCell_(matrixIndependentOfCell){}; - template void solve(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 tau, ViewType3 w,const ViewType4 elemDof, ordinal_type n, ordinal_type m=0) { @@ -556,7 +570,9 @@ class ProjectionTools { #endif } -#define HAVE_INTREPID2_KOKKOSKERNELS + + /** \brief Parallel implementation of solve, using Kokkos Kernels QR factoriation + */ #ifdef HAVE_INTREPID2_KOKKOSKERNELS template void solveParallel(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 taul, @@ -577,6 +593,8 @@ class ProjectionTools { A0_host(i,j) = A0_host(j,i); auto w0_host = Kokkos::create_mirror_view(Kokkos::subview(work, 0, Kokkos::ALL())); + + //computing QR of A0. QR is saved in A0 and tau0 KokkosBatched::SerialQR_Internal::invoke(A0_host.extent(0), A0_host.extent(1), A0_host.data(), A0_host.stride_0(), A0_host.stride_1(), tau0_host.data(), tau0_host.stride_0(), w0_host.data()); @@ -585,13 +603,13 @@ class ProjectionTools { Kokkos::deep_copy(tau0, tau0_host); Kokkos::parallel_for (systemName_, - Kokkos::RangePolicy (0, numCells), - KOKKOS_LAMBDA (const size_t ic) { + Kokkos::RangePolicy (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { auto w = Kokkos::subview(work, ic, Kokkos::ALL()); auto b = Kokkos::subview(elemRhs, ic, Kokkos::ALL()); - //b'*Q -> b + //b'*Q0 -> b KokkosBatched::SerialApplyQ_RightNoTransForwardInternal::invoke( 1, A0.extent(0), A0.extent(1), A0.data(), A0.stride_0(), A0.stride_1(), @@ -599,7 +617,7 @@ class ProjectionTools { b.data(), 1, b.stride_0(), w.data()); - // R^{-1} b -> b + // R0^{-1} b -> b KokkosBatched::SerialTrsvInternalUpper::invoke(false, A0.extent(0), 1.0, @@ -655,6 +673,10 @@ class ProjectionTools { } } #endif + + /** \brief Serial implementation of solve, using Lapack GELS function + */ + template void solveSerial(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 , ViewType3, const ViewType4 elemDof, ordinal_type n, ordinal_type m) { @@ -668,12 +690,12 @@ class ProjectionTools { if(matrixIndependentOfCell_) { ViewType2 elemRhsTrans("transRhs", elemRhs.extent(1), elemRhs.extent(0)); Kokkos::View - pivVec("pivVec", m+n + std::max(m+n, numCells), 1); + pivVec("pivVec", m+n + std::max(m+n, numCells), 1); Kokkos::View serialElemRhs("serialElemRhs", n+m, numCells); auto A = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), - Kokkos::subview(elemMat, 0, Kokkos::ALL(), Kokkos::ALL())); + Kokkos::subview(elemMat, 0, Kokkos::ALL(), Kokkos::ALL())); auto b = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), elemRhs); auto serialBasisCoeffs = Kokkos::create_mirror_view_and_copy( @@ -684,23 +706,23 @@ class ProjectionTools { serialElemRhs(i,ic) = b(ic,i); for(ordinal_type j=0; j pivVec("pivVec", 2*(m+n), 1); @@ -710,9 +732,9 @@ class ProjectionTools { Kokkos::subview(elemMat, ic, Kokkos::ALL(), Kokkos::ALL())); auto b = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), Kokkos::subview(elemRhs, ic, Kokkos::ALL())); - auto basisCoeffs = Kokkos::subview(basisCoeffs, ic, Kokkos::ALL()); + auto basisCoeffs_ = Kokkos::subview(basisCoeffs, ic, Kokkos::ALL()); auto serialBasisCoeffs = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), - basisCoeffs); + basisCoeffs_); Kokkos::deep_copy(serialElemMat,valueType(0)); //LAPACK might overwrite the matrix @@ -745,7 +767,7 @@ class ProjectionTools { for(ordinal_type i=0; i