From 0c49c3df8c882e668c37277ac4a105eaba289404 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 1 Jun 2021 15:17:36 -0600 Subject: [PATCH] Tpetra CrsGraph: Add method "getOffRankOffsets" --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 12 + .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 236 ++++++++++++++++++ .../Tpetra_Details_getGraphOffRankOffsets.cpp | 67 +++++ ...ra_Details_getGraphOffRankOffsets_decl.hpp | 166 ++++++++++++ ...tra_Details_getGraphOffRankOffsets_def.hpp | 135 ++++++++++ .../core/src/Tpetra_Details_residual.hpp | 72 +++--- 6 files changed, 659 insertions(+), 29 deletions(-) create mode 100644 packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp create mode 100644 packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_decl.hpp create mode 100644 packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_def.hpp diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 01f2b3a79916..1a81d9b0ac71 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1387,6 +1387,9 @@ namespace Tpetra { void getLocalDiagOffsets (const Kokkos::View& offsets) const; + void + getLocalOffRankOffsets (const Kokkos::View& offsets) const; + /// \brief Backwards compatibility overload of the above method. /// /// This method takes a Teuchos::ArrayRCP instead of a @@ -2064,6 +2067,10 @@ namespace Tpetra { /// void computeGlobalConstants (); + bool haveLocalOffRankOffsets() const { return haveLocalOffRankOffsets_;} + + void computeLocalOffRankOffsets (); + protected: /// \brief Compute local constants, if they have not yet been computed. /// @@ -2383,6 +2390,9 @@ namespace Tpetra { /// The k_numRowEntries_ array has has length getNodeNumRows(), /// again if it is allocated. + public: + typename local_graph_type::row_map_type::const_type k_rowPtrsOffRank_; + protected: /// \brief The type of k_numRowEntries_ (see below). /// /// This View gets used only on host. However, making this @@ -2438,6 +2448,8 @@ namespace Tpetra { bool haveLocalConstants_ = false; //! Whether all processes have computed global constants. bool haveGlobalConstants_ = false; + //! + bool haveLocalOffRankOffsets_ = false; typedef typename std::map > nonlocals_type; diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 7557b09a28a6..bc80ad714ad3 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -48,6 +48,7 @@ #include "Tpetra_Details_copyOffsets.hpp" #include "Tpetra_Details_gathervPrint.hpp" #include "Tpetra_Details_getGraphDiagOffsets.hpp" +#include "Tpetra_Details_getGraphOffRankOffsets.hpp" #include "Tpetra_Details_makeColMap.hpp" #include "Tpetra_Details_Profiling.hpp" #include "Tpetra_Details_getEntryOnHost.hpp" @@ -4444,6 +4445,24 @@ namespace Tpetra { } + template + void + CrsGraph:: + computeLocalOffRankOffsets () + { + using ::Tpetra::Details::ProfilingRegion; + + ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeLocalOffRankOffsets"); + + // release previous view + k_rowPtrsOffRank_ = typename local_graph_type::row_map_type::non_const_type("offRankOffset",0); + auto k_offRankOffset = typename local_graph_type::row_map_type::non_const_type("offRankOffset",this->getNodeNumRows()); + this->getLocalOffRankOffsets(k_offRankOffset); + k_rowPtrsOffRank_ = k_offRankOffset; + this->haveLocalOffRankOffsets_ = true; + } + + template void CrsGraph:: @@ -6701,6 +6720,204 @@ namespace Tpetra { } // debug_ } + template + void + CrsGraph:: + getLocalOffRankOffsets (const Kokkos::View& offsets) const + { + using std::endl; + using LO = LocalOrdinal; + // using GO = GlobalOrdinal; + const char tfecfFuncName[] = "getLocalOffRankOffsets: "; + const bool verbose = verbose_; + + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets"); + std::ostringstream os; + os << *prefix << "offsets.extent(0)=" << offsets.extent(0) + << endl; + std::cerr << os.str(); + } + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (! hasColMap (), std::runtime_error, "The graph must have a column Map."); + const LO lclNumRows = static_cast (this->getNodeNumRows ()); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (static_cast (offsets.extent (0)) < lclNumRows, + std::invalid_argument, "offsets.extent(0) = " << + offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << "."); + + const map_type& rowMap = * (this->getRowMap ()); + const map_type& colMap = * (this->getColMap ()); + const map_type& domMap = * (this->getDomainMap ()); + + // We only use these in debug mode, but since debug mode is a + // run-time option, they need to exist here. That's why we create + // the vector with explicit size zero, to avoid overhead if debug + // mode is off. + bool allRowMapDiagEntriesInColMap = true; + bool allDiagEntriesFound = true; + bool allOffsetsCorrect = true; + bool noOtherWeirdness = true; + using wrong_offsets_type = std::vector >; + wrong_offsets_type wrongOffsets(0); + + // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just + // the subset of Map functionality that we need below. + auto lclRowMap = rowMap.getLocalMap (); + auto lclColMap = colMap.getLocalMap (); + auto lclDomMap = domMap.getLocalMap (); + + // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this + // setup, at least on the host. For CUDA, we have to use LocalMap + // (that comes from each of the two Maps). + + TEUCHOS_ASSERT(this->isSorted ()); + if (isFillComplete ()) { + auto lclGraph = this->getLocalGraph (); + ::Tpetra::Details::getGraphOffRankOffsets (offsets, + lclRowMap, lclColMap, lclDomMap, + lclGraph.row_map, + lclGraph.entries); + } + // else { + // // NOTE (mfh 22 Feb 2017): We have to run this code on host, + // // since the graph is not fill complete. The previous version + // // of this code assumed UVM; this version does not. + // auto offsets_h = Kokkos::create_mirror_view (offsets); + + // for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) { + // // Find the diagonal entry. Since the row Map and column Map + // // may differ, we have to compare global row and column + // // indices, not local. + // const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd); + // const GO gblColInd = gblRowInd; + // const LO lclColInd = lclColMap.getLocalElement (gblColInd); + + // if (lclColInd == Tpetra::Details::OrdinalTraits::invalid ()) { + // allRowMapDiagEntriesInColMap = false; + // offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits::invalid (); + // } + // else { + // const RowInfo rowInfo = this->getRowInfo (lclRowInd); + // if (static_cast (rowInfo.localRow) == lclRowInd && + // rowInfo.numEntries > 0) { + + // auto colInds = this->getLocalKokkosRowView (rowInfo); + // const size_t hint = 0; // not needed for this algorithm + // const size_t offset = + // KokkosSparse::findRelOffset (colInds, rowInfo.numEntries, + // lclColInd, hint, sorted); + // offsets_h(lclRowInd) = offset; + + // if (debug_) { + // // Now that we have what we think is an offset, make sure + // // that it really does point to the diagonal entry. Offsets + // // are _relative_ to each row, not absolute (for the whole + // // (local) graph). + // Teuchos::ArrayView lclColInds; + // try { + // this->getLocalRowView (lclRowInd, lclColInds); + // } + // catch (...) { + // noOtherWeirdness = false; + // } + // // Don't continue with error checking if the above failed. + // if (noOtherWeirdness) { + // const size_t numEnt = lclColInds.size (); + // if (offset >= numEnt) { + // // Offsets are relative to each row, so this means that + // // the offset is out of bounds. + // allOffsetsCorrect = false; + // wrongOffsets.push_back (std::make_pair (lclRowInd, offset)); + // } else { + // const LO actualLclColInd = lclColInds[offset]; + // const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd); + // if (actualGblColInd != gblColInd) { + // allOffsetsCorrect = false; + // wrongOffsets.push_back (std::make_pair (lclRowInd, offset)); + // } + // } + // } + // } // debug_ + // } + // else { // either row is empty, or something went wrong w/ getRowInfo() + // offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits::invalid (); + // allDiagEntriesFound = false; + // } + // } // whether lclColInd is a valid local column index + // } // for each local row + + // Kokkos::deep_copy (offsets, offsets_h); + // } + // whether the graph is fill complete + + if (verbose && wrongOffsets.size () != 0) { + std::ostringstream os; + os << *prefix << "Wrong offsets: ["; + for (size_t k = 0; k < wrongOffsets.size (); ++k) { + os << "(" << wrongOffsets[k].first << "," + << wrongOffsets[k].second << ")"; + if (k + 1 < wrongOffsets.size ()) { + os << ", "; + } + } + os << "]" << endl; + std::cerr << os.str(); + } + + if (debug_) { + using Teuchos::reduceAll; + using std::endl; + Teuchos::RCP > comm = this->getComm (); + const bool localSuccess = + allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect; + const int numResults = 5; + int lclResults[5]; + lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0; + lclResults[1] = allDiagEntriesFound ? 1 : 0; + lclResults[2] = allOffsetsCorrect ? 1 : 0; + lclResults[3] = noOtherWeirdness ? 1 : 0; + // min-all-reduce will compute least rank of all the processes + // that didn't succeed. + lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize (); + + int gblResults[5]; + gblResults[0] = 0; + gblResults[1] = 0; + gblResults[2] = 0; + gblResults[3] = 0; + gblResults[4] = 0; + reduceAll (*comm, Teuchos::REDUCE_MIN, + numResults, lclResults, gblResults); + + if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1 + || gblResults[3] != 1) { + std::ostringstream os; // build error message + os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", " + "possibly among others): " << endl; + if (gblResults[0] == 0) { + os << " - The column Map does not contain at least one diagonal entry " + "of the graph." << endl; + } + if (gblResults[1] == 0) { + os << " - On one or more processes, some row does not contain a " + "diagonal entry." << endl; + } + if (gblResults[2] == 0) { + os << " - On one or more processes, some offsets are incorrect." + << endl; + } + if (gblResults[3] == 0) { + os << " - One or more processes had some other error." + << endl; + } + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str()); + } + } // debug_ + } + namespace { // (anonymous) // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see @@ -7551,6 +7768,7 @@ namespace Tpetra { std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_); std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_); + std::swap(graph.k_rowPtrsOffRank_, this->k_rowPtrsOffRank_); std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv); std::swap(graph.gblInds_wdv, this->gblInds_wdv); @@ -7566,6 +7784,7 @@ namespace Tpetra { std::swap(graph.noRedundancies_, this->noRedundancies_); std::swap(graph.haveLocalConstants_, this->haveLocalConstants_); std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_); + std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_); std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_); @@ -7628,6 +7847,7 @@ namespace Tpetra { output = this->noRedundancies_ == graph.noRedundancies_ ? output : false; output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false; output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false; + output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false; output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false; // Compare nonlocals_ -- std::map > @@ -7662,6 +7882,22 @@ namespace Tpetra { output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false; } + if (this->haveLocalOffRankOffsets() && graph.haveLocalOffRankOffsets()) { + // Compare this->k_rowPtrsOffRank_ isa Kokkos::View + output = this->k_rowPtrsOffRank_.extent(0) == graph.k_rowPtrsOffRank_.extent(0) ? output : false; + if(output && this->k_rowPtrsOffRank_.extent(0) > 0) + { + typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrsOffRank_host_this = Kokkos::create_mirror_view(this->k_rowPtrsOffRank_); + typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrsOffRank_host_graph= Kokkos::create_mirror_view(graph.k_rowPtrsOffRank_); + Kokkos::deep_copy(k_rowPtrsOffRank_host_this, this->k_rowPtrsOffRank_); + Kokkos::deep_copy(k_rowPtrsOffRank_host_graph, graph.k_rowPtrsOffRank_); + for(size_t i=0; output && i output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false; if(output && this->lclIndsUnpacked_wdv.extent(0) > 0) diff --git a/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp new file mode 100644 index 000000000000..bdd62e8538ab --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets.cpp @@ -0,0 +1,67 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#include "TpetraCore_config.h" + +#if defined(HAVE_TPETRA_EXPLICIT_INSTANTIATION) + +// We protect the contents of this file with macros, to assist +// applications that circumvent Trilinos' build system. (We do NOT +// recommend this.) That way, they can still build this file, but as +// long as the macros have correct definitions, they won't build +// anything that's not enabled. + +#include "KokkosCompat_ClassicNodeAPI_Wrapper.hpp" +#include "Tpetra_Details_getGraphOffRankOffsets_decl.hpp" +#include "Tpetra_Details_getGraphOffRankOffsets_def.hpp" +#include "TpetraCore_ETIHelperMacros.h" + +namespace Tpetra { + + TPETRA_ETI_MANGLING_TYPEDEFS() + + TPETRA_INSTANTIATE_LGN( TPETRA_DETAILS_IMPL_GETGRAPHOFFRANKOFFSETS_INSTANT ) + +} // namespace Tpetra + +#endif // Whether we should build this specialization diff --git a/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_decl.hpp b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_decl.hpp new file mode 100644 index 000000000000..b6f4af06db2e --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_decl.hpp @@ -0,0 +1,166 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#ifndef TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DECL_HPP +#define TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DECL_HPP + +/// \file Tpetra_Details_getGraphOffRankOffsets_decl.hpp +/// \brief Declare and define the function +/// Tpetra::Details::getGraphOffRankOffsets, an implementation detail +/// of Tpetra::CrsGraph. + +#include "TpetraCore_config.h" +#include "Kokkos_Core.hpp" +#include "Kokkos_StaticCrsGraph.hpp" +#include "Tpetra_Details_LocalMap.hpp" +#include + +namespace Tpetra { +namespace Details { +namespace Impl { + +/// \brief Implementation detail of +/// Tpetra::Details::getGraphOffRankOffsets, which in turn is an +/// implementation detail of Tpetra::CrsGraph. +/// +/// FIXME (mfh 12 Mar 2016) There's currently no way to make a +/// MemoryUnmanaged Kokkos::StaticCrsGraph. Thus, we have to do this +/// separately for its column indices. We want the column indices to +/// be unmanaged because we need to take subviews in this kernel. +/// Taking a subview of a managed View updates the reference count, +/// which is a thread scalability bottleneck. +/// +/// mfh 12 Mar 2016: Tpetra::CrsGraph::getLocalOffRankOffsets returns +/// offsets as size_t. However, see Github Issue #213. +template +class GetGraphOffRankOffsets { +public: + typedef typename DeviceType::device_type device_type; + typedef OffsetType offset_type; + typedef ::Kokkos::View offsets_type; + typedef ::Kokkos::StaticCrsGraph local_graph_type; + typedef ::Tpetra::Details::LocalMap local_map_type; + typedef ::Kokkos::View row_offsets_type; + // This is unmanaged for performance, because we need to take + // subviews inside the functor. + typedef ::Kokkos::View lcl_col_inds_type; + + //! Constructor; also runs the functor. + GetGraphOffRankOffsets (const offsets_type& OffRankOffsets, + const local_map_type& lclRowMap, + const local_map_type& lclColMap, + const local_map_type& lclDomMap, + const row_offsets_type& ptr, + const lcl_col_inds_type& ind); + + //! Kokkos::parallel_for loop body. + KOKKOS_FUNCTION void operator() (const LO& lclRowInd) const; + +private: + offsets_type OffRankOffsets_; + local_map_type lclRowMap_; + local_map_type lclColMap_; + local_map_type lclDomMap_; + row_offsets_type ptr_; + lcl_col_inds_type ind_; + LO lclNumRows_; +}; + +} // namespace Impl + +template +void +getGraphOffRankOffsets (const OffsetsType& OffRankOffsets, + const LclMapType& lclRowMap, + const LclMapType& lclColMap, + const LclMapType& lclDomMap, + const RowOffsetsType& ptr, + const LclColIndsType& ind) +{ + static_assert (Kokkos::Impl::is_view::value, + "RowOffsetsType (the type of ptr) must be a Kokkos::View."); + static_assert (Kokkos::Impl::is_view::value, + "LclColIndsType (the type of ind) must be a Kokkos::View."); + static_assert (static_cast (RowOffsetsType::rank) == 1, + "RowOffsetsType (the type of ptr) must be a rank-1 Kokkos::View."); + static_assert (static_cast (LclColIndsType::rank) == 1, + "LclColIndsType (the type of ind) must be a rank-1 Kokkos::View."); + typedef typename LclColIndsType::non_const_value_type local_ordinal_type; + static_assert (std::is_integral::value, + "The type of each entry of ind (the array of column indices) " + "must be an integer."); + typedef typename OffsetsType::non_const_value_type offset_type; + typedef typename RowOffsetsType::non_const_value_type row_offset_type; + static_assert (std::is_integral::value, + "The type of each entry of ptr must be an integer."); + + typedef typename LclMapType::local_ordinal_type LO; + typedef typename LclMapType::global_ordinal_type GO; + typedef typename LclMapType::device_type DT; + + typedef Impl::GetGraphOffRankOffsets impl_type; + // The functor's constructor runs the functor. + impl_type impl (OffRankOffsets, lclRowMap, lclColMap, lclDomMap, ptr, ind); +} + +} // namespace Details +} // namespace Tpetra + +#endif // TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DECL_HPP diff --git a/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_def.hpp b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_def.hpp new file mode 100644 index 000000000000..27981d11ec2b --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_getGraphOffRankOffsets_def.hpp @@ -0,0 +1,135 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#ifndef TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DEF_HPP +#define TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DEF_HPP + +/// \file Tpetra_Details_getGraphOffRankOffsets_def.hpp +/// \brief Define the implementation of the function +/// Tpetra::Details::getGraphOffRankOffsets, an implementation detail +/// of Tpetra::CrsGraph. + +#include "Tpetra_Details_OrdinalTraits.hpp" +#include "Tpetra_Map.hpp" +#include "KokkosSparse_findRelOffset.hpp" + +namespace Tpetra { +namespace Details { +namespace Impl { + +/// \brief Implementation detail of +/// Tpetra::Details::getGraphOffRankOffsets, which in turn is an +/// implementation detail of Tpetra::CrsGraph. +/// +/// FIXME (mfh 12 Mar 2016) There's currently no way to make a +/// MemoryUnmanaged Kokkos::StaticCrsGraph. Thus, we have to do this +/// separately for its column indices. We want the column indices to +/// be unmanaged because we need to take subviews in this kernel. +/// Taking a subview of a managed View updates the reference count, +/// which is a thread scalability bottleneck. +/// +/// mfh 12 Mar 2016: Tpetra::CrsGraph::getLocalOffRankOffsets returns +/// offsets as size_t. However, see Github Issue #213. +template +GetGraphOffRankOffsets:: +GetGraphOffRankOffsets (const offsets_type& OffRankOffsets, + const local_map_type& lclRowMap, + const local_map_type& lclColMap, + const local_map_type& lclDomMap, + const row_offsets_type& ptr, + const lcl_col_inds_type& ind) : + OffRankOffsets_ (OffRankOffsets), + lclColMap_ (lclColMap), + lclDomMap_ (lclDomMap), + ptr_ (ptr), + ind_ (ind) +{ + typedef typename device_type::execution_space execution_space; + typedef Kokkos::RangePolicy policy_type; + + lclNumRows_ = lclRowMap.getNodeNumElements (); + policy_type range (0, lclNumRows_+1); + Kokkos::parallel_for (range, *this); +} + +template +KOKKOS_FUNCTION void +GetGraphOffRankOffsets:: +operator() (const LO& lclRowInd) const +{ + const LO INVALID = + Tpetra::Details::OrdinalTraits::invalid (); + + if (lclRowInd == lclNumRows_) + OffRankOffsets_[lclRowInd] = ptr_[lclRowInd]; + else { + // TODO: use parallel reduce + size_t offset = ptr_[lclRowInd+1]; + for (size_t j = ptr_[lclRowInd]; j < ptr_[lclRowInd+1]; j++) { + const LO lclColInd = ind_[j]; + const GO gblColInd = lclColMap_.getGlobalElement (lclColInd); + const LO lclDomInd = lclDomMap_.getLocalElement (gblColInd); + if ((lclDomInd == INVALID) && (j < offset)) + offset = j; + } + OffRankOffsets_[lclRowInd] = offset; + } +} + +} // namespace Impl +} // namespace Details +} // namespace Tpetra + +// Explicit template instantiation macro for +// Tpetra::Details::Impl::GetGraphOffRankOffsets. NOT FOR USERS!!! Must +// be used inside the Tpetra namespace. +#define TPETRA_DETAILS_IMPL_GETGRAPHOFFRANKOFFSETS_INSTANT( LO, GO, NODE ) \ + template class Details::Impl::GetGraphOffRankOffsets< LO, GO, NODE::device_type >; + +#endif // TPETRA_DETAILS_GETGRAPHOFFRANKOFFSETS_DEF_HPP diff --git a/packages/tpetra/core/src/Tpetra_Details_residual.hpp b/packages/tpetra/core/src/Tpetra_Details_residual.hpp index b6f41847aa4b..39cbeb0f70b4 100644 --- a/packages/tpetra/core/src/Tpetra_Details_residual.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_residual.hpp @@ -66,6 +66,7 @@ void localResidual(const CrsMatrix & A, const MultiVector & X_colmap, const MultiVector & B, MultiVector & R, + const Kokkos::View& offsets, const MultiVector * X_domainmap=nullptr) { using Tpetra::Details::ProfilingRegion; using Teuchos::NO_TRANS; @@ -214,19 +215,18 @@ void localResidual(const CrsMatrix & A, Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (const LO& loop) { const LO lclRow = static_cast (dev.league_rank ()) * rows_per_team + loop; const LO numRows = A_lcl.numRows (); - if (lclRow >= numRows) { return; } - - const auto A_row = A_lcl.rowConst(lclRow); - const LO row_length = static_cast (A_row.length); - residual_value_type A_x = KAT::zero (); - - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (const LO iEntry, residual_value_type& lsum) { - const auto A_val = A_row.value(iEntry); - const auto lclCol = A_row.colidx(iEntry); - if (lclCol < numRows) + const LO offRankOffset = offsets(lclRow); + const size_t start = A_lcl.graph.row_map(lclRow); + const size_t end = A_lcl.graph.row_map(lclRow+1); + residual_value_type A_x = KAT::zero (); + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (const LO iEntry, residual_value_type& lsum) { + const auto A_val = A_lcl.values(iEntry); + const auto lclCol = A_lcl.graph.entries(iEntry); + if (iEntry < offRankOffset) lsum += A_val * X_domainmap_lcl(lclCol,0); else lsum += A_val * X_colmap_lcl(lclCol,0); @@ -237,7 +237,7 @@ void localResidual(const CrsMatrix & A, }); });//end parallel_for TeamThreadRange });//end parallel_for "residual-vector" - + } } else { // MultiVector case @@ -280,27 +280,29 @@ void localResidual(const CrsMatrix & A, Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (const LO& loop) { const LO lclRow = static_cast (dev.league_rank ()) * rows_per_team + loop; const LO numRows = A_lcl.numRows (); - if (lclRow >= numRows) { return; } - const auto A_row = A_lcl.rowConst(lclRow); - const LO row_length = static_cast (A_row.length); + + const LO offRankOffset = offsets(lclRow); + const size_t start = A_lcl.graph.row_map(lclRow); + const size_t end = A_lcl.graph.row_map(lclRow+1); + for(LO v=0; v & Aop, using import_type = typename CrsMatrix::import_type; using export_type = typename CrsMatrix::export_type; using MV = MultiVector; + using graph_type = Tpetra::CrsGraph; + using local_graph_type = typename graph_type::local_graph_type; + using offset_type = typename local_graph_type::row_map_type::const_type; // We treat the case of a replicated MV output specially. const bool R_is_replicated = @@ -403,6 +408,15 @@ void residual(const Operator & Aop, X_colMap = rcp_const_cast (X_colMapNonConst); } + offset_type offsets; + if (restrictedMode) { + if (!A.getCrsGraph()->haveLocalOffRankOffsets()) { + RCP graph = Teuchos::rcp_const_cast(A.getCrsGraph()); + graph->computeLocalOffRankOffsets(); + } + offsets = A.getCrsGraph()->k_rowPtrsOffRank_; + } + // Get a vector for the R_rowMap output residual, handling the // non-constant stride and exporter cases. Since R gets clobbered // we don't need to worry about the data in it @@ -451,9 +465,9 @@ void residual(const Operator & Aop, if (! exporter.is_null ()) { if (restrictedMode && !importer.is_null ()) - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, &X_in); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in); else - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets); { ProfilingRegion regionExport ("Tpetra::CrsMatrix::residual: R Export"); @@ -471,14 +485,14 @@ void residual(const Operator & Aop, // if (! R_in.isConstantStride () ) { // We need to be sure to do a copy out in this case. - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets); Tpetra::deep_copy (R_in, *R_rowMap); } else { if (restrictedMode && !importer.is_null ()) - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, &X_in); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in); else - localResidual (A, *X_colMap, *B_rowMap, *R_rowMap); + localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets); } }