Skip to content

Commit

Permalink
Revert "Tpetra: Abstracting out the matrix merge functionality needed…
Browse files Browse the repository at this point in the history
… for interfacing to KokkosKernels"

This reverts commit 92da795.
  • Loading branch information
Matt Bettencourt committed Jan 16, 2018
1 parent 5f69408 commit d86ccdc
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 109 deletions.
67 changes: 64 additions & 3 deletions packages/tpetra/core/ext/TpetraExt_MatrixMatrix_Cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,70 @@ void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCuda
if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);

// Merge the B and Bimport matrices
Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements(),Bmerged);

// We need to do this dance if either (a) We have Bimport or (b) We don't A's colMap is not the same as B's rowMap
if(!Bview.importMatrix.is_null() ||
(Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap())))
{
// We do have a Bimport
// NOTE: We're going merge Borig and Bimport into a single matrix and reindex the columns *before* we multiply.
// This option was chosen because we know we don't have any duplicate entries, so we can allocate once.

size_t merge_numrows = Amat.numCols();
lno_view_t Mrowptr("Mrowptr", merge_numrows + 1);

const LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();

// Use a Kokkos::parallel_scan to build the rowptr
typedef Node::execution_space execution_space;
typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
Kokkos::parallel_scan("Tpetra_MatrixMatrix_buildRowptrBmerged", range_type (0, merge_numrows), KOKKOS_LAMBDA(const size_t i, size_t& update, const bool final) {
if(final)
Mrowptr(i) = update;
// Get the row count
size_t ct = 0;
if(Acol2Brow(i) != LO_INVALID)
ct = Browptr(Acol2Brow(i) + 1) - Browptr(Acol2Brow(i));
else
ct = Irowptr(Acol2Irow(i) + 1) - Irowptr(Acol2Irow(i));
update += ct;
if(final && (i + 1 == merge_numrows))
Mrowptr(i + 1) = update;
});

execution_space::fence();

// Allocate nnz
size_t merge_nnz = Mrowptr(merge_numrows);
lno_nnz_view_t Mcolind("Mcolind", merge_nnz);
scalar_view_t Mvalues("Mvals", merge_nnz);

// Use a Kokkos::parallel_for to fill the rowptr/colind arrays
typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
Kokkos::parallel_for("Tpetra_MatrixMatrix_buildColindValuesBmerged", range_type (0, merge_numrows), KOKKOS_LAMBDA(const size_t i) {
if(Acol2Brow(i) != LO_INVALID) {
size_t row = Acol2Brow(i);
size_t start = Browptr(row);
for(size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
Mvalues(j) = Bvals(j - Mrowptr(i) + start);
Mcolind(j) = Bcol2Ccol(Bcolind(j - Mrowptr(i) + start));
}
}
else {
size_t row = Acol2Irow(i);
size_t start = Irowptr(row);
for(size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
Mvalues(j) = Ivals(j - Mrowptr(i) + start);
Mcolind(j) = Icol2Ccol(Icolind(j - Mrowptr(i) + start));
}
}
});
execution_space::fence();
Bmerged = Teuchos::rcp(new KCRS("CrsMatrix",merge_numrows,C.getColMap()->getNodeNumElements(),merge_nnz,Mvalues,Mrowptr,Mcolind));
}
else {
// We don't have a Bimport (the easy case)
Bmerged = Teuchos::rcpFromRef(Bmat);
}
#ifdef HAVE_TPETRA_MMM_TIMINGS
MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix CudaCore"))));
#endif
Expand Down
76 changes: 74 additions & 2 deletions packages/tpetra/core/ext/TpetraExt_MatrixMatrix_OpenMP.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,80 @@ void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpen
if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);

// Merge the B and Bimport matrices
Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements(),Bmerged);
// We need to do this dance if either (a) We have Bimport or (b) We don't A's colMap is not the same as B's rowMap
if(!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
// We do have a Bimport
// NOTE: We're going merge Borig and Bimport into a single matrix and reindex the columns *before* we multiply.
// This option was chosen because we know we don't have any duplicate entries, so we can allocate once.
RCP<const KCRS> Ik;
if(!Bview.importMatrix.is_null()) Ik = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrix());

size_t merge_numrows = Ak.numCols();
lno_view_t Mrowptr("Mrowptr", merge_numrows + 1);

const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();

// Use a Kokkos::parallel_scan to build the rowptr
//
// NOTE (mfh 15 Sep 2017) This is specifically only for
// execution_space = Kokkos::OpenMP, so we neither need nor want
// KOKKOS_LAMBDA (with its mandatory __device__ marking).
typedef Node::execution_space execution_space;
typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
Kokkos::parallel_scan ("Tpetra_MatrixMatrix_buildRowptrBmerged", range_type (0, merge_numrows),
[=] (const size_t i, size_t& update, const bool final) {
if(final) Mrowptr(i) = update;
// Get the row count
size_t ct=0;
if(Acol2Brow(i)!=LO_INVALID)
ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
else
ct = Ik->graph.row_map(Acol2Irow(i)+1) - Ik->graph.row_map(Acol2Irow(i));
update+=ct;

if(final && i+1==merge_numrows)
Mrowptr(i+1)=update;
});

// Allocate nnz
size_t merge_nnz = Mrowptr(merge_numrows);
lno_nnz_view_t Mcolind("Mcolind",merge_nnz);
scalar_view_t Mvalues("Mvals",merge_nnz);

// Use a Kokkos::parallel_for to fill the rowptr/colind arrays
//
// NOTE (mfh 15 Sep 2017) This is specifically only for
// execution_space = Kokkos::OpenMP, so we neither need nor want
// KOKKOS_LAMBDA (with its mandatory __device__ marking).
typedef Node::execution_space execution_space;
typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
Kokkos::parallel_for ("Tpetra_MatrixMatrix_buildColindValuesBmerged", range_type (0, merge_numrows),
[=] (const size_t i) {
if(Acol2Brow(i)!=LO_INVALID) {
size_t row = Acol2Brow(i);
size_t start = Bk.graph.row_map(row);
for(size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
}
}
else {
size_t row = Acol2Irow(i);
size_t start = Ik->graph.row_map(row);
for(size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
Mvalues(j) = Ik->values(j-Mrowptr(i)+start);
Mcolind(j) = Icol2Ccol(Ik->graph.entries(j-Mrowptr(i)+start));
}
}
});

Bmerged = Teuchos::rcp(new KCRS("CrsMatrix",merge_numrows,C.getColMap()->getNodeNumElements(),merge_nnz,Mvalues,Mrowptr,Mcolind));

}
else {
// We don't have a Bimport (the easy case)
Bmerged = Teuchos::rcpFromRef(Bk);
}

#ifdef HAVE_TPETRA_MMM_TIMINGS
MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
Expand Down
12 changes: 0 additions & 12 deletions packages/tpetra/core/ext/TpetraExt_MatrixMatrix_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,18 +553,6 @@ void setMaxNumEntriesPerRow(
};


// This only merges matrices that look like B & Bimport, aka, they have no overlapping rows
template<class Scalar,class LocalOrdinal,class GlobalOrdinal,class Node, class LocalOrdinalViewType>
inline void merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
const LocalOrdinalViewType & Acol2Brow,
const LocalOrdinalViewType & Acol2Irow,
const LocalOrdinalViewType & Bcol2Ccol,
const LocalOrdinalViewType & Icol2Ccol,
const size_t mergedNodeNumCols,
Teuchos::RCP<const typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_type> &Bmerged);



template<class CrsMatrixType>
size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B);
Expand Down
92 changes: 0 additions & 92 deletions packages/tpetra/core/ext/TpetraExt_MatrixMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3623,98 +3623,6 @@ void import_and_extract_views(
}
}





/*********************************************************************************************************/
// This only merges matrices that look like B & Bimport, aka, they have no overlapping rows
template<class Scalar,class LocalOrdinal,class GlobalOrdinal,class Node, class LocalOrdinalViewType>
void merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
const LocalOrdinalViewType & Acol2Brow,
const LocalOrdinalViewType & Acol2Irow,
const LocalOrdinalViewType & Bcol2Ccol,
const LocalOrdinalViewType & Icol2Ccol,
const size_t mergedNodeNumCols,
Teuchos::RCP<const typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_type> &Bmerged) {

using Teuchos::RCP;
typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_type KCRS;
typedef typename KCRS::StaticCrsGraphType graph_t;
typedef typename graph_t::row_map_type::non_const_type lno_view_t;
typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
typedef typename KCRS::values_type::non_const_type scalar_view_t;
// Grab the Kokkos::SparseCrsMatrices
const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
const KCRS & Bk = Bview.origMatrix->getLocalMatrix();

// We need to do this dance if either (a) We have Bimport or (b) We don't A's colMap is not the same as B's rowMap
if(!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
// We do have a Bimport
// NOTE: We're going merge Borig and Bimport into a single matrix and reindex the columns *before* we multiply.
// This option was chosen because we know we don't have any duplicate entries, so we can allocate once.
RCP<const KCRS> Ik;
if(!Bview.importMatrix.is_null()) Ik = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrix());
size_t merge_numrows = Ak.numCols();
lno_view_t Mrowptr("Mrowptr", merge_numrows + 1);

const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();

// Use a Kokkos::parallel_scan to build the rowptr
typedef typename Node::execution_space execution_space;
typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
Kokkos::parallel_scan ("Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows), KOKKOS_LAMBDA(const size_t i, size_t& update, const bool final) {
if(final) Mrowptr(i) = update;
// Get the row count
size_t ct=0;
if(Acol2Brow(i)!=LO_INVALID)
ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
else
ct = Ik->graph.row_map(Acol2Irow(i)+1) - Ik->graph.row_map(Acol2Irow(i));
update+=ct;

if(final && i+1==merge_numrows)
Mrowptr(i+1)=update;
});

// Allocate nnz
size_t merge_nnz = Mrowptr(merge_numrows);
lno_nnz_view_t Mcolind("Mcolind",merge_nnz);
scalar_view_t Mvalues("Mvals",merge_nnz);

// Use a Kokkos::parallel_for to fill the rowptr/colind arrays
typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
Kokkos::parallel_for ("Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(const size_t i) {
if(Acol2Brow(i)!=LO_INVALID) {
size_t row = Acol2Brow(i);
size_t start = Bk.graph.row_map(row);
for(size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
}
}
else {
size_t row = Acol2Irow(i);
size_t start = Ik->graph.row_map(row);
for(size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
Mvalues(j) = Ik->values(j-Mrowptr(i)+start);
Mcolind(j) = Icol2Ccol(Ik->graph.entries(j-Mrowptr(i)+start));
}
}
});

Bmerged = Teuchos::rcp(new KCRS("CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind));

}
else {
// We don't have a Bimport (the easy case)
Bmerged = Teuchos::rcpFromRef(Bk);
}
}//end merge_matrices



} //End namepsace MMdetails

} //End namespace Tpetra
Expand Down

0 comments on commit d86ccdc

Please sign in to comment.