Skip to content

Commit

Permalink
Tpetra: fix unit tests, eliminate more deep copies
Browse files Browse the repository at this point in the history
Update unpackAndCombineIntoCrsArrays unit test.
Leverage the fact that Tpetra::Details::Transfer has methods for
returning Kokkos::DualViews for remote, permuteTo, and permuteFrom
LIDs.

Part of #11693.
  • Loading branch information
jhux2 committed Jul 29, 2023
1 parent e069135 commit e53f6b0
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 85 deletions.
26 changes: 15 additions & 11 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7914,12 +7914,12 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
const size_t NumSameIDs = rowTransfer.getNumSameIDs();
ArrayView<const LO> ExportLIDs = reverseMode ?
rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
ArrayView<const LO> RemoteLIDs = reverseMode ?
rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
ArrayView<const LO> PermuteToLIDs = reverseMode ?
rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
ArrayView<const LO> PermuteFromLIDs = reverseMode ?
rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
auto RemoteLIDs = reverseMode ?
rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
auto PermuteToLIDs = reverseMode ?
rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
auto PermuteFromLIDs = reverseMode ?
rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
Distributor& Distor = rowTransfer.getDistributor ();

// Owning PIDs
Expand Down Expand Up @@ -8114,14 +8114,14 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
#endif
if (constantNumPackets == 0) {
destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
RemoteLIDs.size ());
RemoteLIDs.view_host().size ());
}
else {
// There are a constant number of packets per element. We
// already know (from the number of "remote" (incoming)
// elements) how many incoming elements we expect, so we can
// resize the buffer accordingly.
const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
destMat->reallocImportsIfNeeded (rbufLen, false, nullptr);
}
}
Expand Down Expand Up @@ -8473,14 +8473,18 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
"input Kokkos::DualView was most recently modified on host, but TAFC "
"needs the device view of the data to be the most recently modified.");

const Kokkos::View<LO const *, typename Node::device_type> RemoteLIDs_d = RemoteLIDs.view_device();
const Kokkos::View<LO const *, typename Node::device_type> PermuteToLIDs_d = PermuteToLIDs.view_device();
const Kokkos::View<LO const *, typename Node::device_type> PermuteFromLIDs_d = PermuteFromLIDs.view_device();
//auto PermuteToLIDs_d = PermuteToLIDs.view_device(); //FAILS
Details::unpackAndCombineIntoCrsArrays(
*this,
RemoteLIDs,
RemoteLIDs_d,
destMat->imports_.view_device(), //hostImports
destMat->numImportPacketsPerLID_.view_device(), //numImportPacketsPerLID
NumSameIDs,
PermuteToLIDs,
PermuteFromLIDs,
PermuteToLIDs_d,
PermuteFromLIDs_d,
N,
MyPID,
CSR_rowptr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,16 +225,17 @@ unpackAndCombineWithOwningPIDsCount (
/// Note: This method does the work previously done in unpackAndCombineWithOwningPIDsCount,
/// namely, calculating the local number of nonzeros, and allocates CRS
/// arrays of the correct sizes.

template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
void
unpackAndCombineIntoCrsArrays (
const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & sourceMatrix,
const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
const Kokkos::View<const char*, typename Node::device_type>& imports_d,
const Kokkos::View<const size_t*, typename Node::device_type>& num_packets_per_lid_d,
const Kokkos::View<LocalOrdinal const *, typename Node::device_type>,
const Kokkos::View<const char*, typename Node::device_type>,
const Kokkos::View<const size_t*, typename Node::device_type>,
const size_t numSameIDs,
const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
const Kokkos::View<LocalOrdinal const *, typename Node::device_type>,
const Kokkos::View<LocalOrdinal const *, typename Node::device_type>,
size_t TargetNumRows,
const int MyTargetPID,
Teuchos::ArrayRCP<size_t>& CRS_rowptr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1422,16 +1422,17 @@ unpackAndCombineWithOwningPIDsCount (
/// Note: The TargetPids vector (on output) will contain owning PIDs
/// for each entry in the matrix, with the "-1 for local" for locally
/// owned entries.

template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
void
unpackAndCombineIntoCrsArrays (
const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & sourceMatrix,
const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
const Kokkos::View<const char*, typename Node::device_type>& imports_d,
const Kokkos::View<const size_t*, typename Node::device_type>& num_packets_per_lid_d,
const Kokkos::View<LocalOrdinal const *, typename Node::device_type> import_lids_d,
const Kokkos::View<const char*, typename Node::device_type> imports_d,
const Kokkos::View<const size_t*, typename Node::device_type> num_packets_per_lid_d,
const size_t numSameIDs,
const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
const Kokkos::View<LocalOrdinal const *, typename Node::device_type> permute_to_lids_d,
const Kokkos::View<LocalOrdinal const *, typename Node::device_type> permute_from_lids_d,
size_t TargetNumRows,
const int MyTargetPID,
Teuchos::ArrayRCP<size_t>& CRS_rowptr,
Expand Down Expand Up @@ -1468,26 +1469,21 @@ unpackAndCombineIntoCrsArrays (
using Kokkos::MemoryUnmanaged;

TEUCHOS_TEST_FOR_EXCEPTION
(permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
prefix << "permuteToLIDs.size() = " << permuteToLIDs.size () << " != "
"permuteFromLIDs.size() = " << permuteFromLIDs.size() << ".");
(permute_to_lids_d.size () != permute_from_lids_d.size (), std::invalid_argument,
prefix << "permute_to_lids_d.size() = " << permute_to_lids_d.size () << " != "
"permute_from_lids_d.size() = " << permute_from_lids_d.size() << ".");
// FIXME (mfh 26 Jan 2015) If there are no entries on the calling
// process, then the matrix is neither locally nor globally indexed.
const bool locallyIndexed = sourceMatrix.isLocallyIndexed ();
TEUCHOS_TEST_FOR_EXCEPTION
(! locallyIndexed, std::invalid_argument, prefix << "The input "
"CrsMatrix 'sourceMatrix' must be locally indexed.");
TEUCHOS_TEST_FOR_EXCEPTION
(((size_t)importLIDs.size ()) != num_packets_per_lid_d.size (), std::invalid_argument,
prefix << "importLIDs.size() = " << importLIDs.size () << " != "
(((size_t)import_lids_d.size ()) != num_packets_per_lid_d.size (), std::invalid_argument,
prefix << "import_lids_d.size() = " << import_lids_d.size () << " != "
"num_packets_per_lid_d.size() = " << num_packets_per_lid_d.size () << ".");

auto local_matrix = sourceMatrix.getLocalMatrixDevice ();
auto permute_from_lids_d =
create_mirror_view_from_raw_host_array (DT (),
permuteFromLIDs.getRawPtr (),
permuteFromLIDs.size (), true,
"permute_from_lids");

// TargetNumNonzeros is number of nonzeros in local matrix.
# ifdef HAVE_TPETRA_MMM_TIMINGS
Expand All @@ -1513,9 +1509,9 @@ unpackAndCombineIntoCrsArrays (
# endif

TEUCHOS_TEST_FOR_EXCEPTION(
permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
prefix << "permuteToLIDs.size() = " << permuteToLIDs.size ()
<< "!= permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
permute_to_lids_d.size () != permute_from_lids_d.size (), std::invalid_argument,
prefix << "permuteToLIDs.size() = " << permute_to_lids_d.size ()
<< "!= permute_from_lids_d.size() = " << permute_from_lids_d.size () << ".");

// Preseed TargetPids with -1 for local
if (static_cast<size_t> (TargetPids.size ()) != TargetNumNonzeros) {
Expand All @@ -1531,13 +1527,6 @@ unpackAndCombineIntoCrsArrays (
# endif
// Convert input arrays to Kokkos::Views
DT outputDevice;
auto import_lids_d =
create_mirror_view_from_raw_host_array(outputDevice, importLIDs.getRawPtr(),
importLIDs.size(), true, "import_lids");

auto permute_to_lids_d =
create_mirror_view_from_raw_host_array(outputDevice, permuteToLIDs.getRawPtr(),
permuteToLIDs.size(), true, "permute_to_lids");

auto crs_rowptr_d =
create_mirror_view_from_raw_host_array(outputDevice, CRS_rowptr.getRawPtr(),
Expand Down Expand Up @@ -1691,12 +1680,12 @@ unpackAndCombineIntoCrsArrays (
template void \
Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT> ( \
const CrsMatrix<ST, LO, GO, NT> &, \
const Teuchos::ArrayView<const LO>&, \
const Kokkos::View<const char*, typename NT::device_type>&, \
const Kokkos::View<const size_t*, typename NT::device_type>&, \
const Kokkos::View<LO const *, typename NT::device_type>, \
const Kokkos::View<const char*, typename NT::device_type>, \
const Kokkos::View<const size_t*, typename NT::device_type>, \
const size_t, \
const Teuchos::ArrayView<const LO>&, \
const Teuchos::ArrayView<const LO>&, \
const Kokkos::View<LO const *, typename NT::device_type>, \
const Kokkos::View<LO const *, typename NT::device_type>, \
size_t, \
const int, \
Teuchos::ArrayRCP<size_t>&, \
Expand Down
67 changes: 28 additions & 39 deletions packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,8 +598,6 @@ namespace {
typedef typename Array<Scalar>::size_type size_type;
for (size_type k = 0; k < static_cast<size_type> (tgtNumEntries); ++k) {
TEST_EQUALITY(tgtRowInds[k], tgt2RowInds[k]);
out << "JHU: tgtRowInds[" << k << "]=" << tgtRowInds[k]
<< ", tgt2RowInds[" << k << "] = " << tgt2RowInds[k] << std::endl;
// The "out" and "success" variables should have been
// automatically defined by the unit test framework, in case
// you're wondering where they came from.
Expand Down Expand Up @@ -2385,6 +2383,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( Import_Util, UnpackAndCombineWithOwningPIDs,
}
Kokkos::View<char*, Kokkos::HostSpace> importsView(imports.data(), imports.size());
distor.doPostsAndWaits(exports.view_host(),numExportPackets(),importsView,numImportPackets());
auto importsView_d = Kokkos::create_mirror_view(Node::device_type::memory_space(), importsView);
deep_copy(importsView_d,importsView);
if (verbose) {
std::ostringstream os;
os << *prefix << "Done with 4-arg doPostsAndWaits" << std::endl;
Expand All @@ -2393,64 +2393,53 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( Import_Util, UnpackAndCombineWithOwningPIDs,

::Tpetra::Details::Behavior::enable_verbose_behavior ();

// Run the count... which should get the same NNZ as the traditional import
using Tpetra::Details::unpackAndCombineWithOwningPIDsCount;
size_t nnz2 =
unpackAndCombineWithOwningPIDsCount<Scalar, LO, GO, Node> (*A, Importer->getRemoteLIDs (),
imports (), numImportPackets (),
constantNumPackets,
Tpetra::INSERT,
Importer->getNumSameIDs (),
Importer->getPermuteToLIDs (),
Importer->getPermuteFromLIDs ());
if (verbose) {
std::ostringstream os;
os << *prefix << "Done with unpackAndCombineWithOwningPIDsCount; "
"nnz1=" << nnz1 << ", nnz2=" << nnz2 << std::endl;
std::cerr << os.str ();
}

if(nnz1!=nnz2) test_err++;
total_err+=test_err;

/////////////////////////////////////////////////////////
// Test #2: Actual combine test
/////////////////////////////////////////////////////////
Teuchos::Array<size_t> rowptr (MapTarget->getLocalNumElements () + 1);
Teuchos::Array<GO> colind (nnz2);
Teuchos::Array<Scalar> vals (nnz2);
Teuchos::Array<int> TargetPids;
Teuchos::ArrayRCP<size_t> rowptr;
Teuchos::ArrayRCP<GO> colind;
Teuchos::ArrayRCP<Scalar> vals;
Teuchos::Array<int> TargetPids;

if (verbose) {
std::ostringstream os;
os << *prefix << "Calling unpackAndCombineIntoCrsArrays" << std::endl;
std::cerr << os.str ();
}

auto numImportPacketsView_d = Kokkos::create_mirror_view(Node::device_type::memory_space(),numImportPacketsView);
deep_copy(numImportPacketsView_d,numImportPacketsView);


const Kokkos::View<LO const *, typename Node::device_type> RemoteLIDs_d = Importer->getRemoteLIDs_dv().view_device();
const Kokkos::View<LO const *, typename Node::device_type> PermuteToLIDs_d = Importer->getPermuteToLIDs_dv().view_device();
const Kokkos::View<LO const *, typename Node::device_type> PermuteFromLIDs_d = Importer->getPermuteFromLIDs_dv().view_device();

using Tpetra::Details::unpackAndCombineIntoCrsArrays;
//JHU FIXME
unpackAndCombineIntoCrsArrays<Scalar, LO, GO, Node> (
*A,
Importer->getRemoteLIDs (),
imports (),
numImportPackets (),
constantNumPackets,
Tpetra::INSERT,
RemoteLIDs_d,
importsView_d,
numImportPacketsView_d,
Importer->getNumSameIDs (),
Importer->getPermuteToLIDs (),
Importer->getPermuteFromLIDs (),
PermuteToLIDs_d,
PermuteFromLIDs_d,
MapTarget->getLocalNumElements (),
nnz2,
MyPID,
rowptr (),
colind (),
Teuchos::av_reinterpret_cast<IST> (vals ()),
rowptr,
colind,
vals,
SourcePids (),
TargetPids);

size_t nnz2 = vals.size();
if(nnz1!=nnz2) test_err++;
total_err+=test_err;

if (verbose) {
std::ostringstream os;
os << *prefix << "Done with unpackAndCombineIntoCrsArrays" << std::endl;
os << *prefix << "Done with unpackAndCombineIntoCrsArrays; "
"nnz1=" << nnz1 << ", nnz2=" << nnz2 << std::endl;
std::cerr << os.str ();
}

Expand Down

0 comments on commit e53f6b0

Please sign in to comment.