Skip to content

Commit

Permalink
Merge pull request #9159 from cgcgcg/tpetraResidualSkipCopyAndPermute
Browse files Browse the repository at this point in the history
Tpetra: residual changes
  • Loading branch information
cgcgcg authored Jun 28, 2021
2 parents 3ce6990 + 7fe526c commit 6738b5f
Show file tree
Hide file tree
Showing 14 changed files with 1,007 additions and 162 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
#endif

#include "Tpetra_Details_residual.hpp"
#include "KokkosSparse_spmv_impl.hpp"

#include <Ifpack2_UnitTestHelpers.hpp>
#include <Ifpack2_OverlappingRowMatrix.hpp>
Expand Down Expand Up @@ -133,8 +134,7 @@ void localReducedMatvec(const MatrixClass & A_lcl,
int64_t numLocalRows = userNumRows;
int64_t myNnz = A_lcl.nnz();

int64_t rows_per_team =
Tpetra::Details::residual_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
int64_t worksets = (X_lcl.extent (0) + rows_per_team - 1) / rows_per_team;

using policy_type = typename Kokkos::TeamPolicy<execution_space>;
Expand Down
8 changes: 4 additions & 4 deletions packages/tpetra/core/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ FUNCTION(TPETRA_PROCESS_ALL_LGN_TEMPLATES OUTPUT_FILES TEMPLATE_FILE
FOREACH(LO ${LOCALORDINAL_TYPES})
TPETRA_MANGLE_TEMPLATE_PARAMETER(LO_MANGLED "${LO}")
TPETRA_SLG_MACRO_NAME(LO_MACRO_NAME "${LO}")

TPETRA_PROCESS_ONE_LGN_TEMPLATE(OUT_FILE "${TEMPLATE_FILE}"
"${CLASS_NAME}" "${CLASS_MACRO_NAME}"
"${LO_MANGLED}" "${GO_MANGLED}" "${NT_MANGLED}"
Expand Down Expand Up @@ -619,7 +619,7 @@ IF (${PACKAGE_NAME}_ENABLE_EXPLICIT_INSTANTIATION)
"${TpetraCore_ETI_NODES}"
TRUE)
LIST(APPEND SOURCES ${LOCALDEEPCOPYROWMATRIX_OUTPUT_FILES})

# Generate ETI .cpp files for the RowMatrix -> CrsMatrix overload of
# Tpetra::createDeepCopy. Do this only for non-integer Scalar
# types, since we really only need this function for linear solvers.
Expand All @@ -634,7 +634,7 @@ IF (${PACKAGE_NAME}_ENABLE_EXPLICIT_INSTANTIATION)
FALSE)
LIST(APPEND SOURCES ${CREATEDEEPCOPY_CRSMATRIX_OUTPUT_FILES})
ENDIF ()

# Generate ETI .cpp files for Tpetra::LocalCrsMatrixOperator.
TPETRA_PROCESS_ALL_SN_TEMPLATES(LOCALCRSMATRIXOPERATOR_OUTPUT_FILES
"Tpetra_ETI_SC_NT.tmpl" "LocalCrsMatrixOperator"
Expand Down Expand Up @@ -777,5 +777,5 @@ SET_PROPERTY(
# / from this directory, or to / from the 'impl' subdirectory. That ensures
# that running "make" will also rerun CMake in order to regenerate Makefiles.
#
# Here's another change, another, and another.
# Here's another change, another, and another and yet another.
#
18 changes: 18 additions & 0 deletions packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,9 @@ namespace Tpetra {
using nonconst_global_inds_host_view_type =
typename row_graph_type::nonconst_global_inds_host_view_type;

using offset_device_view_type =
typename row_ptrs_device_view_type::non_const_type;


//KDDKDD INROW using local_inds_host_view_type =
//KDDKDD INROW typename local_inds_dualv_type::t_host::const_type;
Expand Down Expand Up @@ -1387,6 +1390,10 @@ namespace Tpetra {
void
getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const;

/// \brief Get offsets of the off-rank entries in the graph.
void
getLocalOffRankOffsets (offset_device_view_type& offsets) const;

/// \brief Backwards compatibility overload of the above method.
///
/// This method takes a Teuchos::ArrayRCP instead of a
Expand Down Expand Up @@ -2064,6 +2071,8 @@ namespace Tpetra {
/// </ul>
void computeGlobalConstants ();

bool haveLocalOffRankOffsets() const { return haveLocalOffRankOffsets_;}

protected:
/// \brief Compute local constants, if they have not yet been computed.
///
Expand Down Expand Up @@ -2410,6 +2419,13 @@ namespace Tpetra {
/// This may also exist with 1-D storage, if storage is unpacked.
num_row_entries_type k_numRowEntries_;

/// \brief The offsets for off-rank entries.
///
/// When off-rank entries are sorted last, this rowPtr-lile view
/// contains the offsets. It is compute on the first call to
/// getLocalOffRankOffsets().
mutable offset_device_view_type k_offRankOffsets_;

//@}

/// \brief Status of the graph's storage, when not in a
Expand Down Expand Up @@ -2438,6 +2454,8 @@ namespace Tpetra {
bool haveLocalConstants_ = false;
//! Whether all processes have computed global constants.
bool haveGlobalConstants_ = false;
//!
mutable bool haveLocalOffRankOffsets_ = false;

typedef typename std::map<global_ordinal_type, std::vector<global_ordinal_type> > nonlocals_type;

Expand Down
58 changes: 58 additions & 0 deletions packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include "Tpetra_Details_copyOffsets.hpp"
#include "Tpetra_Details_gathervPrint.hpp"
#include "Tpetra_Details_getGraphDiagOffsets.hpp"
#include "Tpetra_Details_getGraphOffRankOffsets.hpp"
#include "Tpetra_Details_makeColMap.hpp"
#include "Tpetra_Details_Profiling.hpp"
#include "Tpetra_Details_getEntryOnHost.hpp"
Expand Down Expand Up @@ -6698,6 +6699,60 @@ namespace Tpetra {
} // debug_
}

template <class LocalOrdinal, class GlobalOrdinal, class Node>
void
CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
getLocalOffRankOffsets (offset_device_view_type& offsets) const
{
using std::endl;
const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
const bool verbose = verbose_;

std::unique_ptr<std::string> prefix;
if (verbose) {
prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
std::ostringstream os;
os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
<< endl;
std::cerr << os.str();
}

TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
(! hasColMap (), std::runtime_error, "The graph must have a column Map.");
// Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.

const size_t lclNumRows = this->getNodeNumRows ();

if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
offsets = k_offRankOffsets_;
return;
}
haveLocalOffRankOffsets_ = false;
k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
offsets = k_offRankOffsets_;

const map_type& colMap = * (this->getColMap ());
const map_type& domMap = * (this->getDomainMap ());

// mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
// the subset of Map functionality that we need below.
auto lclColMap = colMap.getLocalMap ();
auto lclDomMap = domMap.getLocalMap ();

// FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
// setup, at least on the host. For CUDA, we have to use LocalMap
// (that comes from each of the two Maps).

TEUCHOS_ASSERT(this->isSorted ());
if (isFillComplete ()) {
auto lclGraph = this->getLocalGraph ();
::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
lclColMap, lclDomMap,
lclGraph);
haveLocalOffRankOffsets_ = true;
}
}

namespace { // (anonymous)

// mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
Expand Down Expand Up @@ -7548,6 +7603,7 @@ namespace Tpetra {

std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);

std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
std::swap(graph.gblInds_wdv, this->gblInds_wdv);
Expand All @@ -7563,6 +7619,7 @@ namespace Tpetra {
std::swap(graph.noRedundancies_, this->noRedundancies_);
std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);

std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);

Expand Down Expand Up @@ -7625,6 +7682,7 @@ namespace Tpetra {
output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;

// Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
Expand Down
23 changes: 23 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,29 @@ bool Behavior::hierarchicalUnpack ()
defaultValue);
}

bool Behavior::skipCopyAndPermuteIfPossible ()
{
constexpr char envVarName[] = "TPETRA_SKIP_COPY_AND_PERMUTE";
constexpr bool defaultValue(false);

static bool value_ = defaultValue;
static bool initialized_ = false;
return idempotentlyGetEnvironmentVariableAsBool
(value_, initialized_, envVarName, defaultValue);
}

bool Behavior::overlapCommunicationAndComputation ()
{
constexpr char envVarName[] = "TPETRA_OVERLAP";
constexpr bool defaultValue(false);

static bool value_ = defaultValue;
static bool initialized_ = false;
return idempotentlyGetEnvironmentVariableAsBool
(value_, initialized_, envVarName, defaultValue);
}


} // namespace Details
} // namespace Tpetra

12 changes: 12 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,18 @@ class Behavior {
/// environment variable.
static bool profilingRegionUseKokkosProfiling();

/// \brief Skip copyAndPermute if possible
///
/// This is disabled by default. You may control this at run time via the
/// <tt>TPETRA_SKIP_COPY_AND_PERMUTE</tt> environment variable.
static bool skipCopyAndPermuteIfPossible();

/// \brief Overlap communication and computation.
///
/// This is disabled by default. You may control this at run time via the
/// <tt>TPETRA_OVERLAP</tt> environment variable.
static bool overlapCommunicationAndComputation();


};

Expand Down
8 changes: 8 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Transfer_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ class Transfer : public Teuchos::Describable {
void expertSetExportLIDsContiguous<LO,GO,NT>(Transfer<LO, GO, NT> transfer, bool contig);
#endif // DOXYGEN_SHOULD_SKIP_THIS

/// \brief Are source and target map locally fitted?
///
/// Returns whether source and target map are locally fitted on the
/// calling rank. This is can be more efficient that calling
/// isLocallyFitted() on the maps directly, since no indices need to
/// be compared.
bool isLocallyFitted () const;

/// \brief Describe this object in a human-readable way to the given
/// output stream.
///
Expand Down
8 changes: 8 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Transfer_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,14 @@ isLocallyComplete () const {
return TransferData_->isLocallyComplete_;
}

template <class LO, class GO, class NT>
bool
Transfer<LO, GO, NT>::
isLocallyFitted () const {
return (getNumSameIDs() == std::min(getSourceMap()->getNodeNumElements(),
getTargetMap()->getNodeNumElements()));
}

template <class LO, class GO, class NT>
void
Transfer<LO, GO, NT>::
Expand Down
67 changes: 67 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
// @HEADER
// ***********************************************************************
//
// Tpetra: Templated Linear Algebra Services Package
// Copyright (2008) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Michael A. Heroux ([email protected])
//
// ************************************************************************
// @HEADER
*/

#include "TpetraCore_config.h"

#if defined(HAVE_TPETRA_EXPLICIT_INSTANTIATION)

// We protect the contents of this file with macros, to assist
// applications that circumvent Trilinos' build system. (We do NOT
// recommend this.) That way, they can still build this file, but as
// long as the macros have correct definitions, they won't build
// anything that's not enabled.

#include "KokkosCompat_ClassicNodeAPI_Wrapper.hpp"
#include "Tpetra_Details_getGraphOffRankOffsets_decl.hpp"
#include "Tpetra_Details_getGraphOffRankOffsets_def.hpp"
#include "TpetraCore_ETIHelperMacros.h"

namespace Tpetra {

TPETRA_ETI_MANGLING_TYPEDEFS()

TPETRA_INSTANTIATE_LGN( TPETRA_DETAILS_IMPL_GETGRAPHOFFRANKOFFSETS_INSTANT )

} // namespace Tpetra

#endif // Whether we should build this specialization
Loading

0 comments on commit 6738b5f

Please sign in to comment.