Skip to content

Commit

Permalink
Merge Pull Request #12009 from brian-kelley/Trilinos/Fix11959
Browse files Browse the repository at this point in the history
Automatically Merged using Trilinos Pull Request AutoTester
PR Title: Tpetra: fix #11959
PR Author: brian-kelley
  • Loading branch information
trilinos-autotester authored Jul 11, 2023
2 parents bbe586f + 7a57bc3 commit ca4f21a
Show file tree
Hide file tree
Showing 2 changed files with 198 additions and 112 deletions.
110 changes: 94 additions & 16 deletions packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2129,6 +2129,7 @@ namespace Tpetra {
global_size_t globalMaxNumRowEntries_ =
Teuchos::OrdinalTraits<global_size_t>::invalid();

private:
// Replacement for device view k_rowPtrs_
// Device view rowPtrsUnpacked_dev_ takes place of k_rowPtrs_
// Host view rowPtrsUnpacked_host_ takes place of copies and use of getEntryOnHost
Expand All @@ -2141,38 +2142,115 @@ namespace Tpetra {
// When OptimizedStorage, rowPtrsUnpacked_ = k_rowPtrsPacked_

row_ptrs_device_view_type rowPtrsUnpacked_dev_;
row_ptrs_host_view_type rowPtrsUnpacked_host_;
mutable row_ptrs_host_view_type rowPtrsUnpacked_host_;

// Row offsets into the actual graph local indices
// Device view rowPtrsUnpacked_dev_ takes place of lclGraph_.row_map

row_ptrs_device_view_type rowPtrsPacked_dev_;
mutable row_ptrs_host_view_type rowPtrsPacked_host_;

//! Whether the unpacked and packed row pointers hvae identical contents
bool packedUnpackedRowPtrsMatch_ = false;

protected:
void setRowPtrsUnpacked(const row_ptrs_device_view_type &dview) {
packedUnpackedRowPtrsMatch_ = false;
rowPtrsUnpacked_dev_ = dview;
rowPtrsUnpacked_host_ =
Kokkos::create_mirror_view_and_copy(
typename row_ptrs_device_view_type::host_mirror_space(),
dview);
//Make sure stale host rowptrs are not kept
rowPtrsUnpacked_host_ = row_ptrs_host_view_type();
}

// Row offsets into the actual graph local indices
// Device view rowPtrsUnpacked_dev_ takes place of lclGraph_.row_map
//! Get the unpacked row pointers on device
const row_ptrs_device_view_type& getRowPtrsUnpackedDevice() const
{
return rowPtrsUnpacked_dev_;
}

row_ptrs_device_view_type rowPtrsPacked_dev_;
row_ptrs_host_view_type rowPtrsPacked_host_;
//! Get the unpacked row pointers on host. Lazily make a copy from device.
const row_ptrs_host_view_type& getRowPtrsUnpackedHost() const
{
if(rowPtrsUnpacked_host_.extent(0) != rowPtrsUnpacked_dev_.extent(0))
{
//NOTE: not just using create_mirror_view here, because
//we do want host/device to be in different memory, even if we're using a SharedSpace.
//This is so that reads will never trigger a host-device transfer.
//The exception is when 'device' views are HostSpace, then don't make another copy.
if constexpr(std::is_same_v<typename Node::memory_space, Kokkos::HostSpace>)
{
rowPtrsUnpacked_host_ = rowPtrsUnpacked_dev_;
}
else
{
//Have to make this temporary because rowptrs are const-valued
typename row_ptrs_host_view_type::non_const_type rowPtrsTemp(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowPtrsUnpacked_host_"), rowPtrsUnpacked_dev_.extent(0));
Kokkos::deep_copy(rowPtrsTemp, rowPtrsUnpacked_dev_);
rowPtrsUnpacked_host_= rowPtrsTemp;
}
//Also keep packed/unpacked views in sync, if they are known to have the same contents
if(packedUnpackedRowPtrsMatch_)
{
rowPtrsPacked_host_ = rowPtrsUnpacked_host_;
}
}
return rowPtrsUnpacked_host_;
}

void setRowPtrsPacked(const row_ptrs_device_view_type &dview) {
packedUnpackedRowPtrsMatch_ = false;
rowPtrsPacked_dev_ = dview;
rowPtrsPacked_host_ =
Kokkos::create_mirror_view_and_copy(
typename row_ptrs_device_view_type::host_mirror_space(),
dview);
//Make sure stale host rowptrs are not kept
rowPtrsPacked_host_ = row_ptrs_host_view_type();
}

//! Get the packed row pointers on device
const row_ptrs_device_view_type& getRowPtrsPackedDevice() const
{
return rowPtrsPacked_dev_;
}

//! Get the packed row pointers on host. Lazily make a copy from device.
const row_ptrs_host_view_type& getRowPtrsPackedHost() const
{
if(rowPtrsPacked_host_.extent(0) != rowPtrsPacked_dev_.extent(0))
{
//NOTE: not just using create_mirror_view here, because
//we do want host/device to be in different memory, even if we're using a SharedSpace.
//This is so that reads will never trigger a host-device transfer.
//The exception is when 'device' views are HostSpace, then don't make another copy.
if constexpr(std::is_same_v<typename Node::memory_space, Kokkos::HostSpace>)
{
rowPtrsPacked_host_ = rowPtrsPacked_dev_;
}
else
{
//Have to make this temporary because rowptrs are const-valued
typename row_ptrs_host_view_type::non_const_type rowPtrsTemp(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowPtrsPacked_host_"), rowPtrsPacked_dev_.extent(0));
Kokkos::deep_copy(rowPtrsTemp, rowPtrsPacked_dev_);
rowPtrsPacked_host_= rowPtrsTemp;
}
//Also keep packed/unpacked views in sync, if they are known to have the same contents
if(packedUnpackedRowPtrsMatch_)
{
rowPtrsUnpacked_host_ = rowPtrsPacked_host_;
}
}
return rowPtrsPacked_host_;
}

// There are common cases where both packed and unpacked views are set to the same array.
// Doing this in a single call can reduce dataspace on host, and reduce runtime by
// removing a deep_copy from device to host.

void setRowPtrs(const row_ptrs_device_view_type &dview) {
setRowPtrsUnpacked(dview);
rowPtrsPacked_dev_ = rowPtrsUnpacked_dev_;
rowPtrsPacked_host_ = rowPtrsUnpacked_host_;
packedUnpackedRowPtrsMatch_ = true;
rowPtrsUnpacked_dev_ = dview;
rowPtrsPacked_dev_ = dview;
//Make sure stale host rowptrs are not kept
rowPtrsUnpacked_host_ = row_ptrs_host_view_type();
rowPtrsPacked_host_ = row_ptrs_host_view_type();
}

//TODO: Make private -- matrix shouldn't access directly the guts of graph
Expand Down
Loading

0 comments on commit ca4f21a

Please sign in to comment.