From d27ee289b0210c9c598d8d76119bb2b517a785e7 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 1 Jun 2021 15:17:36 -0600 Subject: [PATCH] Tpetra CrsGraph: Add method "getLocalOffRankOffsets" --- packages/tpetra/core/src/CMakeLists.txt | 8 +- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 18 +++ .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 58 +++++++ .../Tpetra_Details_getGraphOffRankOffsets.cpp | 67 ++++++++ ...ra_Details_getGraphOffRankOffsets_decl.hpp | 146 ++++++++++++++++++ ...tra_Details_getGraphOffRankOffsets_def.hpp | 134 ++++++++++++++++ .../core/src/Tpetra_Details_residual.hpp | 67 ++++---- .../test/CrsGraph/CrsGraph_UnitTests1.cpp | 59 ++++++- 8 files changed, 523 insertions(+), 34 deletions(-) create mode 100644 packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp create mode 100644 packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_decl.hpp create mode 100644 packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_def.hpp diff --git a/packages/tpetra/core/src/CMakeLists.txt b/packages/tpetra/core/src/CMakeLists.txt index f72f5dd50099..1195b5634ce3 100644 --- a/packages/tpetra/core/src/CMakeLists.txt +++ b/packages/tpetra/core/src/CMakeLists.txt @@ -317,7 +317,7 @@ FUNCTION(TPETRA_PROCESS_ALL_LGN_TEMPLATES OUTPUT_FILES TEMPLATE_FILE FOREACH(LO ${LOCALORDINAL_TYPES}) TPETRA_MANGLE_TEMPLATE_PARAMETER(LO_MANGLED "${LO}") TPETRA_SLG_MACRO_NAME(LO_MACRO_NAME "${LO}") - + TPETRA_PROCESS_ONE_LGN_TEMPLATE(OUT_FILE "${TEMPLATE_FILE}" "${CLASS_NAME}" "${CLASS_MACRO_NAME}" "${LO_MANGLED}" "${GO_MANGLED}" "${NT_MANGLED}" @@ -619,7 +619,7 @@ IF (${PACKAGE_NAME}_ENABLE_EXPLICIT_INSTANTIATION) "${TpetraCore_ETI_NODES}" TRUE) LIST(APPEND SOURCES ${LOCALDEEPCOPYROWMATRIX_OUTPUT_FILES}) - + # Generate ETI .cpp files for the RowMatrix -> CrsMatrix overload of # Tpetra::createDeepCopy. Do this only for non-integer Scalar # types, since we really only need this function for linear solvers. @@ -634,7 +634,7 @@ IF (${PACKAGE_NAME}_ENABLE_EXPLICIT_INSTANTIATION) FALSE) LIST(APPEND SOURCES ${CREATEDEEPCOPY_CRSMATRIX_OUTPUT_FILES}) ENDIF () - + # Generate ETI .cpp files for Tpetra::LocalCrsMatrixOperator. TPETRA_PROCESS_ALL_SN_TEMPLATES(LOCALCRSMATRIXOPERATOR_OUTPUT_FILES "Tpetra_ETI_SC_NT.tmpl" "LocalCrsMatrixOperator" @@ -777,5 +777,5 @@ SET_PROPERTY( # / from this directory, or to / from the 'impl' subdirectory. That ensures # that running "make" will also rerun CMake in order to regenerate Makefiles. # -# Here's another change, another, and another. +# Here's another change, another, and another and yet another. # diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 01f2b3a79916..a7bc7f167021 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -303,6 +303,9 @@ namespace Tpetra { using nonconst_global_inds_host_view_type = typename row_graph_type::nonconst_global_inds_host_view_type; + using offset_device_view_type = + typename row_ptrs_device_view_type::non_const_type; + //KDDKDD INROW using local_inds_host_view_type = //KDDKDD INROW typename local_inds_dualv_type::t_host::const_type; @@ -1387,6 +1390,10 @@ namespace Tpetra { void getLocalDiagOffsets (const Kokkos::View& offsets) const; + /// \brief Get offsets of the off-rank entries in the graph. + void + getLocalOffRankOffsets (offset_device_view_type& offsets) const; + /// \brief Backwards compatibility overload of the above method. /// /// This method takes a Teuchos::ArrayRCP instead of a @@ -2064,6 +2071,8 @@ namespace Tpetra { /// void computeGlobalConstants (); + bool haveLocalOffRankOffsets() const { return haveLocalOffRankOffsets_;} + protected: /// \brief Compute local constants, if they have not yet been computed. /// @@ -2410,6 +2419,13 @@ namespace Tpetra { /// This may also exist with 1-D storage, if storage is unpacked. num_row_entries_type k_numRowEntries_; + /// \brief The offsets for off-rank entries. + /// + /// When off-rank entries are sorted last, this rowPtr-lile view + /// contains the offsets. It is compute on the first call to + /// getLocalOffRankOffsets(). + mutable offset_device_view_type k_offRankOffsets_; + //@} /// \brief Status of the graph's storage, when not in a @@ -2438,6 +2454,8 @@ namespace Tpetra { bool haveLocalConstants_ = false; //! Whether all processes have computed global constants. bool haveGlobalConstants_ = false; + //! + mutable bool haveLocalOffRankOffsets_ = false; typedef typename std::map > nonlocals_type; diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 6a3df5701921..96f29198b6bc 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -48,6 +48,7 @@ #include "Tpetra_Details_copyOffsets.hpp" #include "Tpetra_Details_gathervPrint.hpp" #include "Tpetra_Details_getGraphDiagOffsets.hpp" +#include "Tpetra_Details_getGraphOffRankOffsets.hpp" #include "Tpetra_Details_makeColMap.hpp" #include "Tpetra_Details_Profiling.hpp" #include "Tpetra_Details_getEntryOnHost.hpp" @@ -6698,6 +6699,60 @@ namespace Tpetra { } // debug_ } + template + void + CrsGraph:: + getLocalOffRankOffsets (offset_device_view_type& offsets) const + { + using std::endl; + const char tfecfFuncName[] = "getLocalOffRankOffsets: "; + const bool verbose = verbose_; + + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets"); + std::ostringstream os; + os << *prefix << "offsets.extent(0)=" << offsets.extent(0) + << endl; + std::cerr << os.str(); + } + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (! hasColMap (), std::runtime_error, "The graph must have a column Map."); + // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_. + + const size_t lclNumRows = this->getNodeNumRows (); + + if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) { + offsets = k_offRankOffsets_; + return; + } + haveLocalOffRankOffsets_ = false; + k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1); + offsets = k_offRankOffsets_; + + const map_type& colMap = * (this->getColMap ()); + const map_type& domMap = * (this->getDomainMap ()); + + // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just + // the subset of Map functionality that we need below. + auto lclColMap = colMap.getLocalMap (); + auto lclDomMap = domMap.getLocalMap (); + + // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this + // setup, at least on the host. For CUDA, we have to use LocalMap + // (that comes from each of the two Maps). + + TEUCHOS_ASSERT(this->isSorted ()); + if (isFillComplete ()) { + auto lclGraph = this->getLocalGraph (); + ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_, + lclColMap, lclDomMap, + lclGraph); + haveLocalOffRankOffsets_ = true; + } + } + namespace { // (anonymous) // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see @@ -7548,6 +7603,7 @@ namespace Tpetra { std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_); std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_); + std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_); std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv); std::swap(graph.gblInds_wdv, this->gblInds_wdv); @@ -7563,6 +7619,7 @@ namespace Tpetra { std::swap(graph.noRedundancies_, this->noRedundancies_); std::swap(graph.haveLocalConstants_, this->haveLocalConstants_); std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_); + std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_); std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_); @@ -7625,6 +7682,7 @@ namespace Tpetra { output = this->noRedundancies_ == graph.noRedundancies_ ? output : false; output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false; output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false; + output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false; output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false; // Compare nonlocals_ -- std::map > diff --git a/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp new file mode 100644 index 000000000000..bdd62e8538ab --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp @@ -0,0 +1,67 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#include "TpetraCore_config.h" + +#if defined(HAVE_TPETRA_EXPLICIT_INSTANTIATION) + +// We protect the contents of this file with macros, to assist +// applications that circumvent Trilinos' build system. (We do NOT +// recommend this.) That way, they can still build this file, but as +// long as the macros have correct definitions, they won't build +// anything that's not enabled. + +#include "KokkosCompat_ClassicNodeAPI_Wrapper.hpp" +#include "Tpetra_Details_getGraphOffRankOffsets_decl.hpp" +#include "Tpetra_Details_getGraphOffRankOffsets_def.hpp" +#include "TpetraCore_ETIHelperMacros.h" + +namespace Tpetra { + + TPETRA_ETI_MANGLING_TYPEDEFS() + + TPETRA_INSTANTIATE_LGN( TPETRA_DETAILS_IMPL_GETGRAPHOFFRANKOFFSETS_INSTANT ) + +} // namespace Tpetra + +#endif // Whether we should build this specialization diff --git a/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_decl.hpp b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_decl.hpp new file mode 100644 index 000000000000..8f7d85271278 --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_decl.hpp @@ -0,0 +1,146 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#ifndef TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DECL_HPP +#define TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DECL_HPP + +/// \file Tpetra_Details_getGraphOffRankOffsets_decl.hpp +/// \brief Declare and define the function +/// Tpetra::Details::getGraphOffRankOffsets, an implementation detail +/// of Tpetra::CrsGraph. + +#include "TpetraCore_config.h" +#include "Kokkos_Core.hpp" +#include "Kokkos_StaticCrsGraph.hpp" +#include "Tpetra_Details_LocalMap.hpp" +#include + +namespace Tpetra { +namespace Details { +namespace Impl { + +/// \brief Implementation detail of +/// Tpetra::Details::getGraphOffRankOffsets, which in turn is an +/// implementation detail of Tpetra::CrsGraph. +/// +/// FIXME (mfh 12 Mar 2016) There's currently no way to make a +/// MemoryUnmanaged Kokkos::StaticCrsGraph. Thus, we have to do this +/// separately for its column indices. We want the column indices to +/// be unmanaged because we need to take subviews in this kernel. +/// Taking a subview of a managed View updates the reference count, +/// which is a thread scalability bottleneck. +/// +/// mfh 12 Mar 2016: Tpetra::CrsGraph::getLocalOffRankOffsets returns +/// offsets as size_t. However, see Github Issue #213. +template +class GetGraphOffRankOffsets { +public: + typedef typename DeviceType::device_type device_type; + typedef OffsetType offset_type; + typedef ::Kokkos::View offsets_type; + typedef ::Kokkos::StaticCrsGraph local_graph_type; + typedef ::Tpetra::Details::LocalMap local_map_type; + typedef ::Kokkos::View row_offsets_type; + // This is unmanaged for performance, because we need to take + // subviews inside the functor. + typedef ::Kokkos::View lcl_col_inds_type; + + //! Constructor; also runs the functor. + GetGraphOffRankOffsets (const offsets_type& OffRankOffsets, + const local_map_type& lclColMap, + const local_map_type& lclDomMap, + const row_offsets_type& ptr, + const lcl_col_inds_type& ind); + + //! Kokkos::parallel_for loop body. + KOKKOS_FUNCTION void operator() (const LO& lclRowInd) const; + +private: + offsets_type OffRankOffsets_; + local_map_type lclColMap_; + local_map_type lclDomMap_; + row_offsets_type ptr_; + lcl_col_inds_type ind_; + LO lclNumRows_; +}; + +} // namespace Impl + +template +void +getGraphOffRankOffsets (const OffsetsType& OffRankOffsets, + const LclMapType& lclColMap, + const LclMapType& lclDomMap, + const LclGraphType& lclGraph) +{ + typedef typename OffsetsType::non_const_value_type offset_type; + typedef typename LclMapType::local_ordinal_type LO; + typedef typename LclMapType::global_ordinal_type GO; + typedef typename LclMapType::device_type DT; + + typedef Impl::GetGraphOffRankOffsets impl_type; + + // The functor's constructor runs the functor. + impl_type impl (OffRankOffsets, lclColMap, lclDomMap, lclGraph.row_map, lclGraph.entries); +} + +} // namespace Details +} // namespace Tpetra + +#endif // TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DECL_HPP diff --git a/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_def.hpp b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_def.hpp new file mode 100644 index 000000000000..9431c1af42f7 --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_def.hpp @@ -0,0 +1,134 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#ifndef TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DEF_HPP +#define TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DEF_HPP + +/// \file Tpetra_Details_getGraphOffRankOffsets_def.hpp +/// \brief Define the implementation of the function +/// Tpetra::Details::getGraphOffRankOffsets, an implementation detail +/// of Tpetra::CrsGraph. + +#include "Tpetra_Details_OrdinalTraits.hpp" +#include "Tpetra_Map.hpp" +#include "KokkosSparse_findRelOffset.hpp" + +namespace Tpetra { +namespace Details { +namespace Impl { + +/// \brief Implementation detail of +/// Tpetra::Details::getGraphOffRankOffsets, which in turn is an +/// implementation detail of Tpetra::CrsGraph. +/// +/// FIXME (mfh 12 Mar 2016) There's currently no way to make a +/// MemoryUnmanaged Kokkos::StaticCrsGraph. Thus, we have to do this +/// separately for its column indices. We want the column indices to +/// be unmanaged because we need to take subviews in this kernel. +/// Taking a subview of a managed View updates the reference count, +/// which is a thread scalability bottleneck. +/// +/// mfh 12 Mar 2016: Tpetra::CrsGraph::getLocalOffRankOffsets returns +/// offsets as size_t. However, see Github Issue #213. +template +GetGraphOffRankOffsets:: +GetGraphOffRankOffsets (const offsets_type& OffRankOffsets, + const local_map_type& lclColMap, + const local_map_type& lclDomMap, + const row_offsets_type& ptr, + const lcl_col_inds_type& ind) : + OffRankOffsets_ (OffRankOffsets), + lclColMap_ (lclColMap), + lclDomMap_ (lclDomMap), + ptr_ (ptr), + ind_ (ind) +{ + typedef typename device_type::execution_space execution_space; + typedef Kokkos::RangePolicy policy_type; + + lclNumRows_ = ptr.extent(0)-1; + policy_type range (0, ptr.extent(0)); + Kokkos::parallel_for (range, *this); +} + +template +KOKKOS_FUNCTION void +GetGraphOffRankOffsets:: +operator() (const LO& lclRowInd) const +{ + const LO INVALID = + Tpetra::Details::OrdinalTraits::invalid (); + + if (lclRowInd == lclNumRows_) + OffRankOffsets_[lclRowInd] = ptr_[lclRowInd]; + else { + // TODO: use parallel reduce + size_t offset = ptr_[lclRowInd+1]; + for (size_t j = ptr_[lclRowInd]; j < ptr_[lclRowInd+1]; j++) { + const LO lclColInd = ind_[j]; + const GO gblColInd = lclColMap_.getGlobalElement (lclColInd); + const LO lclDomInd = lclDomMap_.getLocalElement (gblColInd); + if ((lclDomInd == INVALID) && (j < offset)) + offset = j; + } + OffRankOffsets_[lclRowInd] = offset; + } +} + +} // namespace Impl +} // namespace Details +} // namespace Tpetra + +// Explicit template instantiation macro for +// Tpetra::Details::Impl::GetGraphOffRankOffsets. NOT FOR USERS!!! Must +// be used inside the Tpetra namespace. +#define TPETRA_DETAILS_IMPL_GETGRAPHOFFRANKOFFSETS_INSTANT( LO, GO, NODE ) \ + template class Details::Impl::GetGraphOffRankOffsets< LO, GO, NODE::device_type >; + +#endif // TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DEF_HPP diff --git a/packages/tpetra/core/src/Tpetra_Details_residual.hpp b/packages/tpetra/core/src/Tpetra_Details_residual.hpp index b6f41847aa4b..beff28c317c3 100644 --- a/packages/tpetra/core/src/Tpetra_Details_residual.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_residual.hpp @@ -66,6 +66,7 @@ void localResidual(const CrsMatrix & A, const MultiVector & X_colmap, const MultiVector & B, MultiVector & R, + const Kokkos::View& offsets, const MultiVector * X_domainmap=nullptr) { using Tpetra::Details::ProfilingRegion; using Teuchos::NO_TRANS; @@ -214,19 +215,18 @@ void localResidual(const CrsMatrix & A, Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (const LO& loop) { const LO lclRow = static_cast (dev.league_rank ()) * rows_per_team + loop; const LO numRows = A_lcl.numRows (); - if (lclRow >= numRows) { return; } - - const auto A_row = A_lcl.rowConst(lclRow); - const LO row_length = static_cast (A_row.length); - residual_value_type A_x = KAT::zero (); - - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (const LO iEntry, residual_value_type& lsum) { - const auto A_val = A_row.value(iEntry); - const auto lclCol = A_row.colidx(iEntry); - if (lclCol < numRows) + const LO offRankOffset = offsets(lclRow); + const size_t start = A_lcl.graph.row_map(lclRow); + const size_t end = A_lcl.graph.row_map(lclRow+1); + residual_value_type A_x = KAT::zero (); + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (const LO iEntry, residual_value_type& lsum) { + const auto A_val = A_lcl.values(iEntry); + const auto lclCol = A_lcl.graph.entries(iEntry); + if (iEntry < offRankOffset) lsum += A_val * X_domainmap_lcl(lclCol,0); else lsum += A_val * X_colmap_lcl(lclCol,0); @@ -237,7 +237,7 @@ void localResidual(const CrsMatrix & A, }); });//end parallel_for TeamThreadRange });//end parallel_for "residual-vector" - + } } else { // MultiVector case @@ -280,27 +280,29 @@ void localResidual(const CrsMatrix & A, Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (const LO& loop) { const LO lclRow = static_cast (dev.league_rank ()) * rows_per_team + loop; const LO numRows = A_lcl.numRows (); - if (lclRow >= numRows) { return; } - const auto A_row = A_lcl.rowConst(lclRow); - const LO row_length = static_cast (A_row.length); + + const LO offRankOffset = offsets(lclRow); + const size_t start = A_lcl.graph.row_map(lclRow); + const size_t end = A_lcl.graph.row_map(lclRow+1); + for(LO v=0; v & Aop, using import_type = typename CrsMatrix::import_type; using export_type = typename CrsMatrix::export_type; using MV = MultiVector; + using graph_type = Tpetra::CrsGraph; + using local_graph_type = typename graph_type::local_graph_type; + using offset_type = typename graph_type::offset_device_view_type; // We treat the case of a replicated MV output specially. const bool R_is_replicated = @@ -403,6 +408,10 @@ void residual(const Operator & Aop, X_colMap = rcp_const_cast (X_colMapNonConst); } + offset_type offsets; + if (restrictedMode) + A.getCrsGraph()->getLocalOffRankOffsets(offsets); + // Get a vector for the R_rowMap output residual, handling the // non-constant stride and exporter cases. Since R gets clobbered // we don't need to worry about the data in it @@ -451,9 +460,9 @@ void residual(const Operator & Aop, if (! exporter.is_null ()) { if (restrictedMode && !importer.is_null ()) - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, &X_in); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in); else - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets); { ProfilingRegion regionExport ("Tpetra::CrsMatrix::residual: R Export"); @@ -471,14 +480,14 @@ void residual(const Operator & Aop, // if (! R_in.isConstantStride () ) { // We need to be sure to do a copy out in this case. - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets); Tpetra::deep_copy (R_in, *R_rowMap); } else { if (restrictedMode && !importer.is_null ()) - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, &X_in); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in); else - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets); } } diff --git a/packages/tpetra/core/test/CrsGraph/CrsGraph_UnitTests1.cpp b/packages/tpetra/core/test/CrsGraph/CrsGraph_UnitTests1.cpp index b668f308b039..34ff87cfdcac 100644 --- a/packages/tpetra/core/test/CrsGraph/CrsGraph_UnitTests1.cpp +++ b/packages/tpetra/core/test/CrsGraph/CrsGraph_UnitTests1.cpp @@ -583,6 +583,62 @@ namespace { // (anonymous) TEST_EQUALITY_CONST(globalSuccess_int, 0); } + TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, Offsets, LO, GO , Node ) + { + typedef Tpetra::CrsGraph GRAPH; + typedef Tpetra::Map map_type; + typedef typename GRAPH::device_type device_type; + + const GST INVALID = Teuchos::OrdinalTraits::invalid (); + // get a comm + RCP > comm = getDefaultComm(); + const int numProcs = comm->getSize(); + // test filtering + if (numProcs > 1) { + const size_t numLocal = 2; + RCP rmap = + rcp (new map_type (INVALID, numLocal, 0, comm)); + ArrayRCP cmap_ind(numLocal); + cmap_ind[0] = comm->getRank()*numLocal; + cmap_ind[1] = ((comm->getRank()+1)*numLocal) % (numProcs*numLocal); + RCP cmap = + rcp (new map_type (INVALID, cmap_ind(), 0, comm)); + ArrayRCP rowptr(numLocal+1); + ArrayRCP colind(numLocal); // one unknown per row + rowptr[0] = 0; rowptr[1] = 1; rowptr[2] = 2; + colind[0] = Teuchos::as(0); + colind[1] = Teuchos::as(1); + + RCP G = rcp(new GRAPH(rmap,cmap,0,StaticProfile) ); + TEST_NOTHROW( G->setAllIndices(rowptr,colind) ); + TEST_EQUALITY_CONST( G->hasColMap(), true ); + + TEST_NOTHROW( G->expertStaticFillComplete(rmap,rmap) ); + TEST_EQUALITY( G->getRowMap(), rmap ); + TEST_EQUALITY( G->getColMap(), cmap ); + + auto diagOffsets = Kokkos::View("diagOffsets", numLocal); + G->getLocalDiagOffsets(diagOffsets); + auto diagOffsets_h = Kokkos::create_mirror_view(diagOffsets); + Kokkos::deep_copy(diagOffsets_h, diagOffsets); + TEST_EQUALITY( diagOffsets_h(0), 0 ); + TEST_EQUALITY( diagOffsets_h(1), INVALID ); + + typename GRAPH::offset_device_view_type offRankOffsets; + G->getLocalOffRankOffsets(offRankOffsets); + auto offRankOffsets_h = Kokkos::create_mirror_view(offRankOffsets); + Kokkos::deep_copy(offRankOffsets_h, offRankOffsets); + TEST_EQUALITY( offRankOffsets_h(0), 1 ); + TEST_EQUALITY( offRankOffsets_h(1), 1 ); + + } + + // All procs fail if any node fails + int globalSuccess_int = -1; + Teuchos::reduceAll( *comm, Teuchos::REDUCE_SUM, success ? 0 : 1, outArg(globalSuccess_int) ); + TEST_EQUALITY_CONST( globalSuccess_int, 0 ); + } + // // INSTANTIATIONS // @@ -599,7 +655,8 @@ namespace { // (anonymous) TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, SortingTests, LO, GO, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, TwoArraysESFC, LO, GO, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, SetAllIndices, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, StaticProfileMultiInsert, LO, GO, NODE ) + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, StaticProfileMultiInsert, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, Offsets, LO, GO, NODE ) TPETRA_ETI_MANGLING_TYPEDEFS()