Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tpetra: Skip unpackAndCombine #9133

Merged
merged 1 commit into from
Jun 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Transfer_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,13 @@ class Transfer : public Teuchos::Describable {
/// Maps in the way that you expect.
bool isLocallyComplete () const;


void detectRemoteExportLIDsContiguous() const;

bool areRemoteLIDsContiguous() const;

bool areExportLIDsContiguous() const;

/// \brief Describe this object in a human-readable way to the given
/// output stream.
///
Expand Down
81 changes: 81 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Transfer_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,35 @@ namespace { // (anonymous)
return Teuchos::ArrayView<const ElementType> (size == 0 ? nullptr : hostView.data (), size);
}

template<class DeviceType, class LocalOrdinal>
struct OrderedViewFunctor {
OrderedViewFunctor (const Kokkos::View<LocalOrdinal*, DeviceType>& viewToCheck) :
viewToCheck_ (viewToCheck) {}
KOKKOS_INLINE_FUNCTION void operator() (const size_t i, unsigned int& isUnordered) const {
isUnordered |= static_cast<unsigned int>(viewToCheck_(i)+1 != viewToCheck_(i+1));
}
Kokkos::View<const LocalOrdinal*, DeviceType> viewToCheck_;
};

template<class DeviceType, class LocalOrdinal>
bool
isViewOrdered (const Kokkos::View<LocalOrdinal*, DeviceType>& viewToCheck)
{
using Kokkos::parallel_reduce;
typedef DeviceType DT;
typedef typename DT::execution_space DES;
typedef Kokkos::RangePolicy<DES, size_t> range_type;

const size_t size = viewToCheck.extent (0);
unsigned int isUnordered = 0;
if (size>1)
parallel_reduce ("isViewOrdered",
range_type (0, size-1),
OrderedViewFunctor<DeviceType, LocalOrdinal> (viewToCheck),
isUnordered);
return isUnordered == 0;
}

} // namespace (anonymous)

namespace Tpetra {
Expand Down Expand Up @@ -270,6 +299,58 @@ isLocallyComplete () const {
return TransferData_->isLocallyComplete_;
}

template <class LO, class GO, class NT>
void
Transfer<LO, GO, NT>::
detectRemoteExportLIDsContiguous () const {

// Check that maps are locally fitted
// TODO: We really want to check here that remote LIDs are sorted last.
// The current check is too restrictive in special cases.
bool ordered = (getNumSameIDs() == std::min(getSourceMap()->getNodeNumElements(),
getTargetMap()->getNodeNumElements()));
ordered &= (getTargetMap()->getNodeNumElements() == getNumSameIDs() + getNumRemoteIDs());
if (ordered) {
const auto& dv = TransferData_->remoteLIDs_;
TEUCHOS_TEST_FOR_EXCEPTION
(dv.need_sync_device (), std::logic_error,
"Tpetra::Details::Transfer::getRemoteLIDs_dv: "
"DualView needs sync to device" );
auto v_d = dv.view_device ();
ordered &= isViewOrdered<device_type, LO>(v_d);
}
TransferData_->remoteLIDsContiguous_ = ordered;

ordered = (getNumSameIDs() == std::min(getSourceMap()->getNodeNumElements(),
getTargetMap()->getNodeNumElements()));
ordered &= (getSourceMap()->getNodeNumElements() == getNumSameIDs() + getNumExportIDs());
if (ordered) {
const auto& dv = TransferData_->exportLIDs_;
TEUCHOS_TEST_FOR_EXCEPTION
(dv.need_sync_device (), std::logic_error,
"Tpetra::Details::Transfer::getRemoteLIDs_dv: "
"DualView needs sync to device" );
auto v_d = dv.view_device ();
ordered &= isViewOrdered<device_type, LO>(v_d);
}
TransferData_->exportLIDsContiguous_ = ordered;
}

template <class LO, class GO, class NT>
bool
Transfer<LO, GO, NT>::
areRemoteLIDsContiguous () const {
return TransferData_->remoteLIDsContiguous_;
}

template <class LO, class GO, class NT>
bool
Transfer<LO, GO, NT>::
areExportLIDsContiguous () const {
return TransferData_->exportLIDsContiguous_;
}


template <class LO, class GO, class NT>
void
Transfer<LO, GO, NT>::
Expand Down
10 changes: 7 additions & 3 deletions packages/tpetra/core/src/Tpetra_DistObject_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,9 @@ namespace Tpetra {
size_t constantNumPackets,
bool commOnHost,
ReverseOption revOp,
std::shared_ptr<std::string> prefix);
std::shared_ptr<std::string> prefix,
const bool canTryAliasing,
const CombineMode CM);

void doWaits(Distributor& distor,
ReverseOption revOp);
Expand Down Expand Up @@ -997,10 +999,12 @@ namespace Tpetra {
/// <tt>exports_</tt> always gets passed into packAndPrepare()
/// by nonconst reference. Thus, that method can resize the
/// DualView without needing to call other DistObject methods.
bool
virtual bool
reallocImportsIfNeeded (const size_t newSize,
const bool verbose,
const std::string* prefix);
const std::string* prefix,
const bool remoteLIDsContiguous=false,
const CombineMode CM=INSERT);

/// \brief Number of packets to receive for each receive operation.
///
Expand Down
23 changes: 17 additions & 6 deletions packages/tpetra/core/src/Tpetra_DistObject_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,9 @@ namespace Tpetra {
DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
reallocImportsIfNeeded (const size_t newSize,
const bool verbose,
const std::string* prefix)
const std::string* prefix,
const bool /*remoteLIDsContiguous*/,
const CombineMode /*CM*/)
{
if (verbose) {
std::ostringstream os;
Expand Down Expand Up @@ -917,6 +919,10 @@ namespace Tpetra {
const_lo_dv_type exportLIDs = (revOp == DoForward) ?
transfer.getExportLIDs_dv () :
transfer.getRemoteLIDs_dv ();
const bool canTryAliasing = (revOp == DoForward) ?
transfer.areRemoteLIDsContiguous() :
transfer.areExportLIDsContiguous();
// const bool canTryAliasing = false;

ProfilingRegion region_dTN(funcName);
#ifdef HAVE_TPETRA_TRANSFER_TIMERS
Expand Down Expand Up @@ -1068,7 +1074,7 @@ namespace Tpetra {
// elements) how many incoming elements we expect, so we can
// resize the buffer accordingly.
const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
reallocImportsIfNeeded (rbufLen, verbose, prefix.get ());
reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
}

// Do we need to do communication (via doPostsAndWaits)?
Expand Down Expand Up @@ -1116,7 +1122,7 @@ namespace Tpetra {
std::cerr << os.str ();
}

doPosts(distor, constantNumPackets, commOnHost, revOp, prefix);
doPosts(distor, constantNumPackets, commOnHost, revOp, prefix, canTryAliasing, CM);
} // if (needCommunication)
} // if (CM != ZERO)
}
Expand Down Expand Up @@ -1267,6 +1273,9 @@ namespace Tpetra {
const_lo_dv_type exportLIDs = (revOp == DoForward) ?
transfer.getExportLIDs_dv () :
transfer.getRemoteLIDs_dv ();
const bool canTryAliasing = (revOp == DoForward) ?
transfer.areRemoteLIDsContiguous() :
transfer.areExportLIDsContiguous();

size_t constantNumPackets = this->constantNumberOfPackets ();

Expand All @@ -1278,7 +1287,7 @@ namespace Tpetra {
// elements) how many incoming elements we expect, so we can
// resize the buffer accordingly.
const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
reallocImportsIfNeeded (rbufLen, verbose, prefix.get ());
reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
}

// Do we need to do communication (via doPostsAndWaits)?
Expand Down Expand Up @@ -1341,7 +1350,9 @@ namespace Tpetra {
size_t constantNumPackets,
bool commOnHost,
ReverseOption revOp,
std::shared_ptr<std::string> prefix)
std::shared_ptr<std::string> prefix,
const bool canTryAliasing,
const CombineMode CM)
{
using ::Tpetra::Details::dualViewStatusToString;
using ::Tpetra::Details::getArrayViewFromDualView;
Expand Down Expand Up @@ -1432,7 +1443,7 @@ namespace Tpetra {
std::cerr << os.str ();
}
this->reallocImportsIfNeeded (totalImportPackets, verbose,
prefix.get ());
prefix.get (), canTryAliasing, CM);
if (verbose) {
std::ostringstream os;
os << *prefix << "7.3. Second comm" << std::endl;
Expand Down
2 changes: 2 additions & 0 deletions packages/tpetra/core/src/Tpetra_Export_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ namespace Tpetra {
TEUCHOS_ASSERT( ! this->TransferData_->exportLIDs_.need_sync_device () );
TEUCHOS_ASSERT( ! this->TransferData_->exportLIDs_.need_sync_host () );

this->detectRemoteExportLIDsContiguous();

if (this->verbose ()) {
std::ostringstream os;
const int myRank = source->getComm ()->getRank ();
Expand Down
6 changes: 6 additions & 0 deletions packages/tpetra/core/src/Tpetra_ImportExportData_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ namespace Tpetra {
/// other processes.
Kokkos::DualView<LocalOrdinal*, device_type> remoteLIDs_;

//! Whether the remote LIDs are contiguous.
bool remoteLIDsContiguous_ = false;

//! Whether the export LIDs are contiguous.
bool exportLIDsContiguous_ = false;

/// \brief "Outgoing" local indices.
///
/// This array holds the LIDs of the GIDs that are owned by the
Expand Down
6 changes: 6 additions & 0 deletions packages/tpetra/core/src/Tpetra_Import_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ namespace Tpetra {
TEUCHOS_ASSERT( ! this->TransferData_->exportLIDs_.need_sync_device () );
TEUCHOS_ASSERT( ! this->TransferData_->exportLIDs_.need_sync_host () );

this->detectRemoteExportLIDsContiguous();

if (this->verbose ()) {
std::ostringstream os;
os << *verbPrefix << "Done!" << endl;
Expand Down Expand Up @@ -409,6 +411,8 @@ namespace Tpetra {
distributor.createFromSendsAndRecvs (this->TransferData_->exportPIDs_, tRemotePIDs);
}

this->detectRemoteExportLIDsContiguous();

TEUCHOS_ASSERT( ! this->TransferData_->permuteFromLIDs_.need_sync_device () );
TEUCHOS_ASSERT( ! this->TransferData_->permuteFromLIDs_.need_sync_host () );
TEUCHOS_ASSERT( ! this->TransferData_->permuteToLIDs_.need_sync_device () );
Expand Down Expand Up @@ -487,6 +491,8 @@ namespace Tpetra {
this->TransferData_->exportPIDs_.swap (exportPIDs);
this->TransferData_->distributor_.swap (distributor);

this->detectRemoteExportLIDsContiguous();

TEUCHOS_ASSERT( ! this->TransferData_->permuteFromLIDs_.need_sync_device () );
TEUCHOS_ASSERT( ! this->TransferData_->permuteFromLIDs_.need_sync_host () );
TEUCHOS_ASSERT( ! this->TransferData_->permuteToLIDs_.need_sync_device () );
Expand Down
40 changes: 40 additions & 0 deletions packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2574,6 +2574,46 @@ namespace Tpetra {
const size_t constantNumPackets,
Distributor& /* distor */,
const CombineMode CM);

private:

// If comm buffers can be aliased to the data view, use this
// implementation.
template<class NO=Node>
typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
typename NO::device_type::memory_space>::value, bool>::type
reallocImportsIfNeededImpl (const size_t newSize,
const bool verbose,
const std::string* prefix,
const bool areRemoteLIDsContiguous,
const CombineMode CM);

// If comm buffers cannot be aliased to the data view, use this
// implementation. (Just calls DistObject::reallocImportsIfNeeded.)
template<class NO=Node>
typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
typename NO::device_type::memory_space>::value, bool>::type
reallocImportsIfNeededImpl (const size_t newSize,
const bool verbose,
const std::string* prefix,
const bool areRemoteLIDsContiguous,
const CombineMode CM);
protected:

virtual bool
reallocImportsIfNeeded (const size_t newSize,
const bool verbose,
const std::string* prefix,
const bool areRemoteLIDsContiguous=false,
const CombineMode CM=INSERT);


public:
bool importsAreAliased();

protected:
Kokkos::DualView<impl_scalar_type*, buffer_device_type> unaliased_imports_;

//@}
}; // class MultiVector

Expand Down
Loading