Skip to content

Commit

Permalink
cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
mperego committed May 22, 2020
1 parent 0234843 commit 05d6b96
Showing 1 changed file with 75 additions and 53 deletions.
128 changes: 75 additions & 53 deletions packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,44 +506,58 @@ class ProjectionTools {
ProjectionStruct<ExecSpaceType, typename BasisType::scalarType> * projStruct);


/** \brief Functor to solve a square system A x = b on each cell using QR method implemented in KokkosKernels
/** \brief Class to solve a square system A x = b on each cell
A is expected to be saddle a point (KKT) matrix of the form [C B; B^T 0],
where C has size nxn and B nxm, with n>0, m>=0.
B^T is copied from B, so one does not have to define the B^T portion of A.
b will contain the solution x.
The first n-entries of x are copied into the provided basis coefficients using the provided indexing.
The system is solved either with a QR factorization implemented in KokkosKernels or
with Lapack GELS function.
*/
struct ElemSystem {


std::string systemName_;
bool matrixIndependentOfCell_;

/** \brief Functor constructor
\code
C - num. cells
P - num. evaluation points
\endcode
\param basisCoeffs [out] - rank-2 view (C,F) containing the basis coefficients
\param elemMat [in/out] - rank-3 view (C,P,P) containing the element matrix of size
numCells x (n+m)x(n+m) on each cell
it will be overwritten.
\param elemRhs [in/out] - rank-2 view (C,P) containing the element rhs on each cell
of size numCells x (n+m)
it will contain the solution of the system on output
\param tau [out] - rank-2 view (C,P) used to store the QR factorization
size: numCells x (n+m)
\param w [out] - rank-2 view (C,P) used has a workspace
Layout Right, size: numCells x (n+m)
\param elemDof [in] - rank-1 view having dimension n, containing the basis numbering
\param n [in] - ordinal_type, basis cardinality
\param m [in] - ordinal_type, dimension of the constraint of the KKT system
\param systemName [in] - string containing the name of the system (passed to parallel for)
\param matrixIndependentOfCell [in] - bool: whether the local cell matrix of the system changes from cell to cell
if true, the matrix factorization is preformed only on the first cell
and reused on other cells.
*/

ElemSystem (std::string systemName, bool matrixIndependentOfCell) :
systemName_(systemName), matrixIndependentOfCell_(matrixIndependentOfCell){};



/** \brief Solve the system and returns the basis coefficients
solve the system either using Kokkos Kernel QR or Lapack GELS
depending on whether Kokkos Kernel is enabled.
\code
C - num. cells
P - num. evaluation points
\endcode
\param basisCoeffs [out] - rank-2 view (C,F) containing the basis coefficients
\param elemMat [in/out] - rank-3 view (C,P,P) containing the element matrix of size
numCells x (n+m)x(n+m) on each cell
it will be overwritten.
\param elemRhs [in/out] - rank-2 view (C,P) containing the element rhs on each cell
of size numCells x (n+m)
it will contain the solution of the system on output
\param tau [out] - rank-2 view (C,P) used to store the QR factorization
size: numCells x (n+m)
\param w [out] - rank-2 view (C,P) used has a workspace
Layout Right, size: numCells x (n+m)
\param elemDof [in] - rank-1 view having dimension n, containing the basis numbering
\param n [in] - ordinal_type, basis cardinality
\param m [in] - ordinal_type, dimension of the constraint of the KKT system
*/

ElemSystem (std::string systemName, bool matrixIndependentOfCell) :
systemName_(systemName), matrixIndependentOfCell_(matrixIndependentOfCell){};

template<typename ViewType1, typename ViewType2, typename ViewType3, typename ViewType4>
void solve(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 tau,
ViewType3 w,const ViewType4 elemDof, ordinal_type n, ordinal_type m=0) {
Expand All @@ -556,7 +570,9 @@ class ProjectionTools {
#endif

}
#define HAVE_INTREPID2_KOKKOSKERNELS

/** \brief Parallel implementation of solve, using Kokkos Kernels QR factoriation
*/
#ifdef HAVE_INTREPID2_KOKKOSKERNELS
template<typename ViewType1, typename ViewType2, typename ViewType3, typename ViewType4>
void solveParallel(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 taul,
Expand All @@ -577,6 +593,8 @@ class ProjectionTools {
A0_host(i,j) = A0_host(j,i);

auto w0_host = Kokkos::create_mirror_view(Kokkos::subview(work, 0, Kokkos::ALL()));

//computing QR of A0. QR is saved in A0 and tau0
KokkosBatched::SerialQR_Internal::invoke(A0_host.extent(0), A0_host.extent(1),
A0_host.data(), A0_host.stride_0(), A0_host.stride_1(),
tau0_host.data(), tau0_host.stride_0(), w0_host.data());
Expand All @@ -585,21 +603,21 @@ class ProjectionTools {
Kokkos::deep_copy(tau0, tau0_host);

Kokkos::parallel_for (systemName_,
Kokkos::RangePolicy<ExecSpaceType, int> (0, numCells),
KOKKOS_LAMBDA (const size_t ic) {
Kokkos::RangePolicy<ExecSpaceType, int> (0, numCells),
KOKKOS_LAMBDA (const size_t ic) {
auto w = Kokkos::subview(work, ic, Kokkos::ALL());

auto b = Kokkos::subview(elemRhs, ic, Kokkos::ALL());

//b'*Q -> b
//b'*Q0 -> b
KokkosBatched::SerialApplyQ_RightNoTransForwardInternal::invoke(
1, A0.extent(0), A0.extent(1),
A0.data(), A0.stride_0(), A0.stride_1(),
tau0.data(), tau0.stride_0(),
b.data(), 1, b.stride_0(),
w.data());

// R^{-1} b -> b
// R0^{-1} b -> b
KokkosBatched::SerialTrsvInternalUpper<KokkosBatched::Algo::Trsv::Unblocked>::invoke(false,
A0.extent(0),
1.0,
Expand Down Expand Up @@ -655,6 +673,10 @@ class ProjectionTools {
}
}
#endif

/** \brief Serial implementation of solve, using Lapack GELS function
*/

template<typename ViewType1, typename ViewType2, typename ViewType3, typename ViewType4>
void solveSerial(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 ,
ViewType3, const ViewType4 elemDof, ordinal_type n, ordinal_type m) {
Expand All @@ -668,12 +690,12 @@ class ProjectionTools {
if(matrixIndependentOfCell_) {
ViewType2 elemRhsTrans("transRhs", elemRhs.extent(1), elemRhs.extent(0));
Kokkos::View<valueType**,Kokkos::LayoutLeft,host_space_type>
pivVec("pivVec", m+n + std::max(m+n, numCells), 1);
pivVec("pivVec", m+n + std::max(m+n, numCells), 1);

Kokkos::View<valueType**,Kokkos::LayoutLeft,host_space_type> serialElemRhs("serialElemRhs", n+m, numCells);

auto A = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(),
Kokkos::subview(elemMat, 0, Kokkos::ALL(), Kokkos::ALL()));
Kokkos::subview(elemMat, 0, Kokkos::ALL(), Kokkos::ALL()));
auto b = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), elemRhs);

auto serialBasisCoeffs = Kokkos::create_mirror_view_and_copy(
Expand All @@ -684,23 +706,23 @@ class ProjectionTools {
serialElemRhs(i,ic) = b(ic,i);
for(ordinal_type j=0; j<n; ++j)
serialElemMat(j,i) = A(j,i);
}

for(ordinal_type i=n; i<n+m; ++i)
for(ordinal_type j=0; j<n; ++j)
serialElemMat(i,j) = serialElemMat(j,i);

ordinal_type info = 0;
lapack_.GELS('N', n+m, n+m, numCells,
serialElemMat.data(), serialElemMat.stride_1(),
serialElemRhs.data(), serialElemRhs.stride_1(),
pivVec.data(), pivVec.extent(0),
&info);

for(ordinal_type i=0; i<n; ++i) {
for (ordinal_type ic = 0; ic < numCells; ic++)
serialBasisCoeffs(ic,elemDof(i)) = serialElemRhs(i,ic);
}
}

for(ordinal_type i=n; i<n+m; ++i)
for(ordinal_type j=0; j<n; ++j)
serialElemMat(i,j) = serialElemMat(j,i);

ordinal_type info = 0;
lapack_.GELS('N', n+m, n+m, numCells,
serialElemMat.data(), serialElemMat.stride_1(),
serialElemRhs.data(), serialElemRhs.stride_1(),
pivVec.data(), pivVec.extent(0),
&info);

for(ordinal_type i=0; i<n; ++i) {
for (ordinal_type ic = 0; ic < numCells; ic++)
serialBasisCoeffs(ic,elemDof(i)) = serialElemRhs(i,ic);
}
}
else {
Kokkos::View<valueType**,Kokkos::LayoutLeft,host_space_type> pivVec("pivVec", 2*(m+n), 1);
Expand All @@ -710,9 +732,9 @@ class ProjectionTools {
Kokkos::subview(elemMat, ic, Kokkos::ALL(), Kokkos::ALL()));
auto b = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(),
Kokkos::subview(elemRhs, ic, Kokkos::ALL()));
auto basisCoeffs = Kokkos::subview(basisCoeffs, ic, Kokkos::ALL());
auto basisCoeffs_ = Kokkos::subview(basisCoeffs, ic, Kokkos::ALL());
auto serialBasisCoeffs = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(),
basisCoeffs);
basisCoeffs_);

Kokkos::deep_copy(serialElemMat,valueType(0)); //LAPACK might overwrite the matrix

Expand Down Expand Up @@ -745,7 +767,7 @@ class ProjectionTools {
for(ordinal_type i=0; i<n; ++i) {
serialBasisCoeffs(elemDof(i)) = serialElemRhs(i,0);
}
Kokkos::deep_copy(basisCoeffs,serialBasisCoeffs);
Kokkos::deep_copy(basisCoeffs_,serialBasisCoeffs);
}
}
}
Expand Down

0 comments on commit 05d6b96

Please sign in to comment.