From 5ae3557612afccce9a4ecac8154d21741e2baa95 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 15 Nov 2019 12:53:39 -0700 Subject: [PATCH 01/13] Tpetra: Fix 'inconsistent override' warnings --- packages/tpetra/core/src/Tpetra_DirectoryImpl_decl.hpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_DirectoryImpl_decl.hpp b/packages/tpetra/core/src/Tpetra_DirectoryImpl_decl.hpp index 8e1136739ac4..a11db9fcf6fa 100644 --- a/packages/tpetra/core/src/Tpetra_DirectoryImpl_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_DirectoryImpl_decl.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -192,7 +190,7 @@ namespace Tpetra { //@{ //! A one-line human-readable description of this object. - std::string description () const; + std::string description () const override; //@} protected: //! Find process IDs and (optionally) local IDs for the given global IDs. @@ -250,7 +248,7 @@ namespace Tpetra { //@{ //! A one-line human-readable description of this object. - std::string description () const; + std::string description () const override; //@} protected: @@ -305,7 +303,7 @@ namespace Tpetra { //@{ //! A one-line human-readable description of this object. - std::string description () const; + std::string description () const override; //@} protected: @@ -402,7 +400,7 @@ namespace Tpetra { //@{ //! A one-line human-readable description of this object. - std::string description () const; + std::string description () const override; //@} protected: //! Find process IDs and (optionally) local IDs for the given global IDs. From 9685ec20bb00c1bd7cab90ec67ea24c3d343fbe2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 15 Nov 2019 13:12:30 -0700 Subject: [PATCH 02/13] Tpetra: Simplify unpackRow(Count) @trilinos/tpetra If we only ever pass a View to call .data() on it, then it's cheaper just to pass a raw array. --- ..._Details_unpackCrsMatrixAndCombine_def.hpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp b/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp index 30bb05137c1e..fffcd139c602 100644 --- a/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -108,7 +106,7 @@ KOKKOS_FUNCTION int unpackRow(typename PackTraits::output_array_type& gids_out, typename PackTraits::output_array_type& pids_out, typename PackTraits::output_array_type& vals_out, - const Kokkos::View& imports, + const char imports[], const size_t offset, const size_t /* num_bytes */, const size_t num_ent, @@ -135,10 +133,10 @@ unpackRow(typename PackTraits::output_array_type& gids_out, const size_t vals_beg = gids_beg + gids_len + pids_len; const size_t vals_len = num_ent * num_bytes_per_value; - const char* const num_ent_in = imports.data () + num_ent_beg; - const char* const gids_in = imports.data () + gids_beg; - const char* const pids_in = unpack_pids ? imports.data () + pids_beg : NULL; - const char* const vals_in = imports.data () + vals_beg; + const char* const num_ent_in = imports + num_ent_beg; + const char* const gids_in = imports + gids_beg; + const char* const pids_in = unpack_pids ? imports + pids_beg : nullptr; + const char* const vals_in = imports + vals_beg; size_t num_bytes_out = 0; LO num_ent_out; @@ -356,7 +354,7 @@ struct UnpackCrsMatrixAndCombineFunctor { // Unpack this row! int unpack_err = unpackRow(gids_out, pids_out, vals_out, - imports, offset, num_bytes, + imports.data(), offset, num_bytes, num_ent, num_bytes_per_value); if (unpack_err != 0) { dst = Kokkos::make_pair (unpack_err, i); // unpack error @@ -676,21 +674,23 @@ unpackAndCombineWithOwningPIDsCount( return count; } -template +template KOKKOS_INLINE_FUNCTION size_t -unpackRowCount(const Kokkos::View& imports, +unpackRowCount(const char imports[], const size_t offset, const size_t num_bytes) { + using PT = PackTraits; + LO num_ent_LO = 0; if (num_bytes > 0) { - const size_t p_num_bytes = PackTraits::packValueCount(num_ent_LO); + const size_t p_num_bytes = PT::packValueCount(num_ent_LO); if (p_num_bytes > num_bytes) { return OrdinalTraits::invalid(); } - const char* const in_buf = imports.data () + offset; - (void) PackTraits::unpackValue(num_ent_LO, in_buf); + const char* const in_buf = imports + offset; + (void) PT::unpackValue(num_ent_LO, in_buf); } return static_cast(num_ent_LO); } @@ -720,7 +720,7 @@ setupRowPointersForRemotes( typedef typename std::remove_reference< decltype( tgt_rowptr(0) ) >::type atomic_incr_type; const size_t num_bytes = num_packets_per_lid(i); const size_t offset = offsets(i); - const size_t num_ent = unpackRowCount (imports, offset, num_bytes); + const size_t num_ent = unpackRowCount (imports.data(), offset, num_bytes); if (num_ent == InvalidNum) { k_error += 1; } @@ -899,7 +899,7 @@ unpackAndCombineIntoCrsArrays2( // Empty buffer means that the row is empty. return; } - size_t num_ent = unpackRowCount(imports, offset, num_bytes); + size_t num_ent = unpackRowCount(imports.data(), offset, num_bytes); if (num_ent == InvalidNum) { k_error += 1; return; @@ -913,7 +913,7 @@ unpackAndCombineIntoCrsArrays2( pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row)); k_error += unpackRow(gids_out, pids_out, vals_out, - imports, offset, num_bytes, + imports.data(), offset, num_bytes, num_ent, num_bytes_per_value); // Correct target PIDs. From 071df304422a9e9bffd1f26ec8efc326fccdec3e Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 15 Nov 2019 13:48:01 -0700 Subject: [PATCH 03/13] Tpetra,Stokhos: Refactor PackTraits @trilinos/tpetra @trilinos/stokhos Tpetra::Details::PackTraits only has the D(evice) template parameter because of allocateArray. This template parameter unnecessarily complicates the implementation of CrsMatrix unpack. Thus, I have factored allocateArray out of PackTraits, into a new traits class, ScalarViewTraits. --- .../pce/tpetra/Stokhos_Tpetra_UQ_PCE.hpp | 63 ++++---- packages/tpetra/core/src/CMakeLists.txt | 2 +- .../core/src/Tpetra_BlockCrsMatrix_def.hpp | 71 +++++---- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 4 +- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 49 +++--- .../core/src/Tpetra_Details_PackTraits.hpp | 67 +------- .../src/Tpetra_Details_ScalarViewTraits.hpp | 121 +++++++++++++++ .../src/Tpetra_Details_packCrsGraph_def.hpp | 43 ++---- .../src/Tpetra_Details_packCrsMatrix_def.hpp | 96 ++++-------- ..._Details_unpackCrsMatrixAndCombine_def.hpp | 144 ++++++++---------- .../core/src/Tpetra_MultiVector_def.hpp | 6 +- 11 files changed, 338 insertions(+), 328 deletions(-) create mode 100644 packages/tpetra/core/src/Tpetra_Details_ScalarViewTraits.hpp diff --git a/packages/stokhos/src/sacado/kokkos/pce/tpetra/Stokhos_Tpetra_UQ_PCE.hpp b/packages/stokhos/src/sacado/kokkos/pce/tpetra/Stokhos_Tpetra_UQ_PCE.hpp index af3e8f0f63eb..828c4b0c2b4f 100644 --- a/packages/stokhos/src/sacado/kokkos/pce/tpetra/Stokhos_Tpetra_UQ_PCE.hpp +++ b/packages/stokhos/src/sacado/kokkos/pce/tpetra/Stokhos_Tpetra_UQ_PCE.hpp @@ -135,6 +135,7 @@ struct DeviceForNode2< Kokkos::Compat::KokkosDeviceWrapperNode > { } #include "Tpetra_Details_PackTraits.hpp" +#include "Tpetra_Details_ScalarViewTraits.hpp" namespace Tpetra { namespace Details { @@ -142,49 +143,36 @@ namespace Details { /// \brief Partial specialization of PackTraits for Sacado's PCE UQ type. /// /// \tparam S The underlying scalar type in the PCE UQ type. -/// \tparam D The Kokkos "device" type. -template -struct PackTraits< Sacado::UQ::PCE, D > { - typedef Sacado::UQ::PCE value_type; - typedef typename D::execution_space execution_space; - typedef D device_type; - typedef typename execution_space::size_type size_type; +template +struct PackTraits> { + using value_type = Sacado::UQ::PCE; /// \brief Whether the number of bytes required to pack one instance /// of \c value_type is fixed at compile time. static const bool compileTimeSize = false; - typedef Kokkos::View input_buffer_type; - typedef Kokkos::View output_buffer_type; - typedef Kokkos::View input_array_type; - typedef Kokkos::View output_array_type; + using input_buffer_type = Kokkos::View; + using output_buffer_type = Kokkos::View; + using input_array_type = Kokkos::View; + using output_array_type = Kokkos::View; - typedef typename value_type::value_type scalar_value_type; - typedef PackTraits< scalar_value_type, device_type > SPT; - typedef typename SPT::input_array_type scalar_input_array_type; - typedef typename SPT::output_array_type scalar_output_array_type; + using scalar_value_type = typename value_type::value_type; + using SPT = PackTraits; + using scalar_input_array_type = typename SPT::input_array_type; + using scalar_output_array_type = typename SPT::output_array_type; KOKKOS_INLINE_FUNCTION static size_t numValuesPerScalar (const value_type& x) { return x.size (); } - static Kokkos::View - allocateArray (const value_type& x, const size_t numEnt, const std::string& label = "") - { - typedef Kokkos::View view_type; - - const size_type numVals = numValuesPerScalar (x); - return view_type (label, static_cast (numEnt), numVals); - } - KOKKOS_INLINE_FUNCTION static Kokkos::pair packArray (char outBuf[], const value_type inBuf[], const size_t numEnt) { - typedef Kokkos::pair return_type; + using return_type = Kokkos::pair; size_t numBytes = 0; int errorCode = 0; @@ -232,7 +220,7 @@ struct PackTraits< Sacado::UQ::PCE, D > { const char inBuf[], const size_t numEnt) { - typedef Kokkos::pair return_type; + using return_type = Kokkos::pair; size_t numBytes = 0; int errorCode = 0; @@ -242,7 +230,7 @@ struct PackTraits< Sacado::UQ::PCE, D > { else { // Check whether output array is contiguously allocated based on the size // of the first entry. We have a simpler method to unpack in this case - const size_type scalar_size = numValuesPerScalar (outBuf[0]); + const size_t scalar_size = numValuesPerScalar (outBuf[0]); const scalar_value_type* last_coeff = outBuf[numEnt - 1].coeff (); const scalar_value_type* last_coeff_expected = outBuf[0].coeff () + (numEnt - 1) * scalar_size; @@ -308,6 +296,27 @@ struct PackTraits< Sacado::UQ::PCE, D > { } }; // struct PackTraits +/// \brief Partial specialization of ScalarViewTraits +/// for Sacado's PCE UQ type. +/// +/// \tparam S The underlying scalar type in the PCE UQ type. +/// \tparam D The Kokkos "device" type. +template +struct ScalarViewTraits, D> { + using value_type = Sacado::UQ::PCE; + using device_type = D; + + static Kokkos::View + allocateArray (const value_type& x, + const size_t numEnt, + const std::string& label = "") + { + const size_t numVals = PackTraits::numValuesPerScalar (x); + using view_type = Kokkos::View; + return view_type (label, numEnt, numVals); + } +}; + } // namespace Details } // namespace Tpetra diff --git a/packages/tpetra/core/src/CMakeLists.txt b/packages/tpetra/core/src/CMakeLists.txt index 0f28e1f2d098..60acfe9ba764 100644 --- a/packages/tpetra/core/src/CMakeLists.txt +++ b/packages/tpetra/core/src/CMakeLists.txt @@ -768,5 +768,5 @@ SET_PROPERTY( # / from this directory, or to / from the 'impl' subdirectory. That ensures # that running "make" will also rerun CMake in order to regenerate Makefiles. # -# Here's another change. +# Here's another change, and another. # diff --git a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_def.hpp index 60cbbaad2c73..2b143c3e55f9 100644 --- a/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_BlockCrsMatrix_def.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -2582,7 +2580,7 @@ class GetLocalDiagCopy { /// its constructor), then the second argument is the result of /// PackTraits::packValueCount, called on a /// Scalar value with the correct run-time size. - template + template size_t packRowCount (const size_t numEnt, const size_t numBytesPerValue, @@ -2597,9 +2595,9 @@ class GetLocalDiagCopy { else { // We store the number of entries as a local index (LO). LO numEntLO = 0; // packValueCount wants this. - GO gid; - const size_t numEntLen = PackTraits::packValueCount (numEntLO); - const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); + GO gid {}; + const size_t numEntLen = PackTraits::packValueCount (numEntLO); + const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize; return numEntLen + gidsLen + valsLen; } @@ -2615,9 +2613,9 @@ class GetLocalDiagCopy { /// scalar (not block!) entry (value) of the row. /// /// \return Number of (block) entries in the packed row. - template + template size_t - unpackRowCount (const typename ::Tpetra::Details::PackTraits::input_buffer_type& imports, + unpackRowCount (const typename ::Tpetra::Details::PackTraits::input_buffer_type& imports, const size_t offset, const size_t numBytes, const size_t /* numBytesPerValue */) @@ -2631,13 +2629,13 @@ class GetLocalDiagCopy { } else { LO numEntLO = 0; - const size_t theNumBytes = PackTraits::packValueCount (numEntLO); + const size_t theNumBytes = PackTraits::packValueCount (numEntLO); TEUCHOS_TEST_FOR_EXCEPTION (theNumBytes > numBytes, std::logic_error, "unpackRowCount: " "theNumBytes = " << theNumBytes << " < numBytes = " << numBytes << "."); const char* const inBuf = imports.data () + offset; - const size_t actualNumBytes = PackTraits::unpackValue (numEntLO, inBuf); + const size_t actualNumBytes = PackTraits::unpackValue (numEntLO, inBuf); TEUCHOS_TEST_FOR_EXCEPTION (actualNumBytes > numBytes, std::logic_error, "unpackRowCount: " "actualNumBytes = " << actualNumBytes << " < numBytes = " << numBytes @@ -2649,13 +2647,13 @@ class GetLocalDiagCopy { /// \brief Pack the block row (stored in the input arrays). /// /// \return The number of bytes packed. - template + template size_t - packRowForBlockCrs (const typename ::Tpetra::Details::PackTraits::output_buffer_type exports, + packRowForBlockCrs (const typename ::Tpetra::Details::PackTraits::output_buffer_type exports, const size_t offset, const size_t numEnt, - const typename ::Tpetra::Details::PackTraits::input_array_type& gidsIn, - const typename ::Tpetra::Details::PackTraits::input_array_type& valsIn, + const typename ::Tpetra::Details::PackTraits::input_array_type& gidsIn, + const typename ::Tpetra::Details::PackTraits::input_array_type& valsIn, const size_t numBytesPerValue, const size_t blockSize) { @@ -2672,9 +2670,9 @@ class GetLocalDiagCopy { const LO numEntLO = static_cast (numEnt); const size_t numEntBeg = offset; - const size_t numEntLen = PackTraits::packValueCount (numEntLO); + const size_t numEntLen = PackTraits::packValueCount (numEntLO); const size_t gidsBeg = numEntBeg + numEntLen; - const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); + const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); const size_t valsBeg = gidsBeg + gidsLen; const size_t valsLen = numScalarEnt * numBytesPerValue; @@ -2684,15 +2682,15 @@ class GetLocalDiagCopy { size_t numBytesOut = 0; int errorCode = 0; - numBytesOut += PackTraits::packValue (numEntOut, numEntLO); + numBytesOut += PackTraits::packValue (numEntOut, numEntLO); { Kokkos::pair p; - p = PackTraits::packArray (gidsOut, gidsIn.data (), numEnt); + p = PackTraits::packArray (gidsOut, gidsIn.data (), numEnt); errorCode += p.first; numBytesOut += p.second; - p = PackTraits::packArray (valsOut, valsIn.data (), numScalarEnt); + p = PackTraits::packArray (valsOut, valsIn.data (), numScalarEnt); errorCode += p.first; numBytesOut += p.second; } @@ -2711,11 +2709,11 @@ class GetLocalDiagCopy { } // Return the number of bytes actually read / used. - template + template size_t - unpackRowForBlockCrs (const typename ::Tpetra::Details::PackTraits::output_array_type& gidsOut, - const typename ::Tpetra::Details::PackTraits::output_array_type& valsOut, - const typename ::Tpetra::Details::PackTraits::input_buffer_type& imports, + unpackRowForBlockCrs (const typename ::Tpetra::Details::PackTraits::output_array_type& gidsOut, + const typename ::Tpetra::Details::PackTraits::output_array_type& valsOut, + const typename ::Tpetra::Details::PackTraits::input_buffer_type& imports, const size_t offset, const size_t numBytes, const size_t numEnt, @@ -2743,9 +2741,9 @@ class GetLocalDiagCopy { const LO lid = 0; // packValueCount wants this const size_t numEntBeg = offset; - const size_t numEntLen = PackTraits::packValueCount (lid); + const size_t numEntLen = PackTraits::packValueCount (lid); const size_t gidsBeg = numEntBeg + numEntLen; - const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); + const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); const size_t valsBeg = gidsBeg + gidsLen; const size_t valsLen = numScalarEnt * numBytesPerValue; @@ -2756,7 +2754,7 @@ class GetLocalDiagCopy { size_t numBytesOut = 0; int errorCode = 0; LO numEntOut; - numBytesOut += PackTraits::unpackValue (numEntOut, numEntIn); + numBytesOut += PackTraits::unpackValue (numEntOut, numEntIn); TEUCHOS_TEST_FOR_EXCEPTION (static_cast (numEntOut) != numEnt, std::logic_error, "unpackRowForBlockCrs: Expected number of entries " << numEnt @@ -2764,11 +2762,11 @@ class GetLocalDiagCopy { { Kokkos::pair p; - p = PackTraits::unpackArray (gidsOut.data (), gidsIn, numEnt); + p = PackTraits::unpackArray (gidsOut.data (), gidsIn, numEnt); errorCode += p.first; numBytesOut += p.second; - p = PackTraits::unpackArray (valsOut.data (), valsIn, numScalarEnt); + p = PackTraits::unpackArray (valsOut.data (), valsIn, numScalarEnt); errorCode += p.first; numBytesOut += p.second; } @@ -2897,7 +2895,7 @@ class GetLocalDiagCopy { const size_t blockSize = static_cast (src->getBlockSize ()); const size_t numExportLIDs = exportLIDs.extent (0); const size_t numBytesPerValue = - PackTraits + PackTraits ::packValueCount(this->val_.extent(0) ? this->val_.view_host()(0) : impl_scalar_type()); // Compute the number of bytes ("packets") per row to pack. While @@ -2921,7 +2919,7 @@ class GetLocalDiagCopy { numEnt = (numEnt == Teuchos::OrdinalTraits::invalid () ? 0 : numEnt); const size_t numBytes = - packRowCount (numEnt, numBytesPerValue, blockSize); + packRowCount (numEnt, numBytesPerValue, blockSize); numPacketsPerLIDHost(i) = numBytes; update += typename reducer_type::value_type(numEnt, numBytes, numEnt); }, rowReducerStruct); @@ -3016,12 +3014,13 @@ class GetLocalDiagCopy { // the following function interface need the same execution space // host scratch space somehow is not considered same as the host_exec // Copy the row's data into the current spot in the exports array. - const size_t numBytes = packRowForBlockCrs + const size_t numBytes = + packRowForBlockCrs (exports.view_host(), offset(i), numEnt, - Kokkos::View(gblColInds.data(), maxRowLength), - Kokkos::View(reinterpret_cast(valsRaw), numEnt*blockSize*blockSize), + Kokkos::View(gblColInds.data(), maxRowLength), + Kokkos::View(reinterpret_cast(valsRaw), numEnt*blockSize*blockSize), numBytesPerValue, blockSize); @@ -3164,7 +3163,7 @@ class GetLocalDiagCopy { // could be bad if the calling process has no entries, but other // processes have entries that they want to send to us. const size_t numBytesPerValue = - PackTraits::packValueCount + PackTraits::packValueCount (this->val_.extent (0) ? this->val_.view_host () (0) : impl_scalar_type ()); const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries (); const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize; @@ -3273,7 +3272,7 @@ class GetLocalDiagCopy { const LO lclRow = importLIDsHost(i); const size_t numBytes = numPacketsPerLIDHost(i); const size_t numEnt = - unpackRowCount + unpackRowCount (imports.view_host (), offval, numBytes, numBytesPerValue); if (numBytes > 0) { @@ -3301,7 +3300,7 @@ class GetLocalDiagCopy { size_t numBytesOut = 0; try { numBytesOut = - unpackRowForBlockCrs + unpackRowForBlockCrs (Kokkos::View(gidsOut.data(), numEnt), Kokkos::View(valsOut.data(), numScalarEnt), imports.view_host(), diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index 3992ff01f9c3..f32c7e552efa 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -55,7 +55,7 @@ #include "Tpetra_DistObject.hpp" #include "Tpetra_CrsGraph.hpp" #include "Tpetra_Vector.hpp" -#include "Tpetra_Details_PackTraits.hpp" +#include "Tpetra_Details_PackTraits.hpp" // unused here, could delete #include "KokkosSparse_CrsMatrix.hpp" // localGaussSeidel and reorderedLocalGaussSeidel are templated on @@ -304,7 +304,7 @@ namespace Tpetra { const Teuchos::RCP& params); /// \brief Nonmember function that computes a residual - /// Computes R = B - A * X + /// Computes R = B - A * X namespace Details { template void residual(const Operator & A, diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 38b28833e621..3c06c709bdf9 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -65,6 +65,7 @@ #include "Tpetra_Details_leftScaleLocalCrsMatrix.hpp" #include "Tpetra_Details_Profiling.hpp" #include "Tpetra_Details_rightScaleLocalCrsMatrix.hpp" +#include "Tpetra_Details_ScalarViewTraits.hpp" #include "KokkosSparse_getDiagCopy.hpp" #include "Tpetra_Details_copyConvert.hpp" #include "Tpetra_Details_iallreduce.hpp" @@ -7290,7 +7291,6 @@ namespace Tpetra { typedef LocalOrdinal LO; typedef GlobalOrdinal GO; typedef impl_scalar_type ST; - typedef typename View::HostMirror::execution_space HES; if (numEnt == 0) { // Empty rows always take zero bytes, to ensure sparsity. @@ -7301,9 +7301,9 @@ namespace Tpetra { const LO numEntLO = static_cast (numEnt); const size_t numEntBeg = offset; - const size_t numEntLen = PackTraits::packValueCount (numEntLO); + const size_t numEntLen = PackTraits::packValueCount (numEntLO); const size_t gidsBeg = numEntBeg + numEntLen; - const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); + const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); const size_t valsBeg = gidsBeg + gidsLen; const size_t valsLen = numEnt * numBytesPerValue; @@ -7313,15 +7313,15 @@ namespace Tpetra { size_t numBytesOut = 0; int errorCode = 0; - numBytesOut += PackTraits::packValue (numEntOut, numEntLO); + numBytesOut += PackTraits::packValue (numEntOut, numEntLO); { Kokkos::pair p; - p = PackTraits::packArray (gidsOut, gidsIn, numEnt); + p = PackTraits::packArray (gidsOut, gidsIn, numEnt); errorCode += p.first; numBytesOut += p.second; - p = PackTraits::packArray (valsOut, valsIn, numEnt); + p = PackTraits::packArray (valsOut, valsIn, numEnt); errorCode += p.first; numBytesOut += p.second; } @@ -7355,7 +7355,6 @@ namespace Tpetra { typedef LocalOrdinal LO; typedef GlobalOrdinal GO; typedef impl_scalar_type ST; - typedef typename View::HostMirror::execution_space HES; if (numBytes == 0) { // Rows with zero bytes should always have zero entries. @@ -7383,9 +7382,9 @@ namespace Tpetra { const LO lid = 0; // packValueCount wants this const size_t numEntBeg = offset; - const size_t numEntLen = PackTraits::packValueCount (lid); + const size_t numEntLen = PackTraits::packValueCount (lid); const size_t gidsBeg = numEntBeg + numEntLen; - const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); + const size_t gidsLen = numEnt * PackTraits::packValueCount (gid); const size_t valsBeg = gidsBeg + gidsLen; const size_t valsLen = numEnt * numBytesPerValue; @@ -7396,7 +7395,7 @@ namespace Tpetra { size_t numBytesOut = 0; int errorCode = 0; LO numEntOut; - numBytesOut += PackTraits::unpackValue (numEntOut, numEntIn); + numBytesOut += PackTraits::unpackValue (numEntOut, numEntIn); if (static_cast (numEntOut) != numEnt || numEntOut == static_cast (0)) { const int myRank = this->getMap ()->getComm ()->getRank (); @@ -7425,11 +7424,11 @@ namespace Tpetra { { Kokkos::pair p; - p = PackTraits::unpackArray (gidsOut, gidsIn, numEnt); + p = PackTraits::unpackArray (gidsOut, gidsIn, numEnt); errorCode += p.first; numBytesOut += p.second; - p = PackTraits::unpackArray (valsOut, valsIn, numEnt); + p = PackTraits::unpackArray (valsOut, valsIn, numEnt); errorCode += p.first; numBytesOut += p.second; } @@ -7676,11 +7675,9 @@ namespace Tpetra { } // Temporary buffer for global column indices. - View gidsIn_k; - { - GO gid = 0; - gidsIn_k = PackTraits::allocateArray(gid, numEnt, "gids"); - } + using Details::ScalarViewTraits; + View gidsIn_k = + ScalarViewTraits::allocateArray (GO (0), numEnt, "gids"); Teuchos::ArrayView valsIn; if (this->isLocallyIndexed ()) { @@ -7719,7 +7716,7 @@ namespace Tpetra { valsIn.size (), true, "valsIn"); const size_t numBytesPerValue = - PackTraits::packValueCount (valsIn[0]); + PackTraits::packValueCount (valsIn[0]); const size_t numBytes = this->packRow (exports_h.data (), offset, numEnt, gidsIn_k.data (), valsIn_k.data (), numBytesPerValue); @@ -8053,6 +8050,7 @@ namespace Tpetra { using Tpetra::Details::castAwayConstDualView; using Tpetra::Details::create_mirror_view_from_raw_host_array; using Tpetra::Details::PackTraits; + using Tpetra::Details::ScalarViewTraits; using std::endl; typedef LocalOrdinal LO; typedef GlobalOrdinal GO; @@ -8135,7 +8133,7 @@ namespace Tpetra { // for each row's data to contain the run-time size. This is only // necessary if the size is not a compile-time constant. Scalar val; - numBytesPerValue = PackTraits::packValueCount (val); + numBytesPerValue = PackTraits::packValueCount (val); } // Determine the maximum number of entries in any one row @@ -8159,7 +8157,7 @@ namespace Tpetra { LO numEntLO = 0; #ifdef HAVE_TPETRA_DEBUG - const size_t theNumBytes = PackTraits::packValueCount (numEntLO); + const size_t theNumBytes = PackTraits::packValueCount (numEntLO); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (theNumBytes > numBytes, std::logic_error, "theNumBytes = " << theNumBytes << " > numBytes = " << numBytes << "."); @@ -8167,7 +8165,7 @@ namespace Tpetra { const char* const inBuf = imports_h.data () + offset; const size_t actualNumBytes = - PackTraits::unpackValue (numEntLO, inBuf); + PackTraits::unpackValue (numEntLO, inBuf); #ifdef HAVE_TPETRA_DEBUG TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -8206,9 +8204,9 @@ namespace Tpetra { // for each row's data to contain the run-time size. This is only // necessary if the size is not a compile-time constant. Scalar val; - gblColInds = PackTraits::allocateArray (gid, maxRowNumEnt, "gids"); - lclColInds = PackTraits::allocateArray (lid, maxRowNumEnt, "lids"); - vals = PackTraits::allocateArray (val, maxRowNumEnt, "vals"); + gblColInds = ScalarViewTraits::allocateArray (gid, maxRowNumEnt, "gids"); + lclColInds = ScalarViewTraits::allocateArray (lid, maxRowNumEnt, "lids"); + vals = ScalarViewTraits::allocateArray (val, maxRowNumEnt, "vals"); } offset = 0; @@ -8219,7 +8217,8 @@ namespace Tpetra { } LO numEntLO = 0; const char* const inBuf = imports_h.data () + offset; - const size_t actualNumBytes = PackTraits::unpackValue (numEntLO, inBuf); + const size_t actualNumBytes = + PackTraits::unpackValue (numEntLO, inBuf); (void) actualNumBytes; const size_t numEnt = static_cast(numEntLO);; diff --git a/packages/tpetra/core/src/Tpetra_Details_PackTraits.hpp b/packages/tpetra/core/src/Tpetra_Details_PackTraits.hpp index d8adf29159a8..c6a5084147f0 100644 --- a/packages/tpetra/core/src/Tpetra_Details_PackTraits.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_PackTraits.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -54,15 +52,13 @@ namespace Tpetra { namespace Details { -/// \brief Traits class for packing / unpacking data of type \c T, -/// using Kokkos data structures that live in the given space \c D. +/// \brief Traits class for packing / unpacking data of type \c T. /// /// \tparam T The type of the data to pack / unpack. -/// \tparam D The Kokkos "device" type; where the data live. -template +template struct PackTraits { //! The type of data to pack or unpack. - typedef T value_type; + using value_type = T; /// \brief Whether the number of bytes required to pack one instance /// of \c value_type is fixed at compile time. @@ -80,16 +76,16 @@ struct PackTraits { static const bool compileTimeSize = true; //! The type of an input buffer of bytes. - typedef Kokkos::View input_buffer_type; + using input_buffer_type = Kokkos::View; //! The type of an output buffer of bytes. - typedef Kokkos::View output_buffer_type; + using output_buffer_type = Kokkos::View; //! The type of an input array of \c value_type. - typedef Kokkos::View input_array_type; + using input_array_type = Kokkos::View; //! The type of an output array of \c value_type. - typedef Kokkos::View output_array_type; + using output_array_type = Kokkos::View; /// \brief Given an instance of \c value_type allocated with the /// right size, return the "number of values" that make up that @@ -113,55 +109,6 @@ struct PackTraits { return static_cast (1); } - /// \brief Given an instance of \c value_type allocated with the - /// right size, allocate and return a one-dimensional array of - /// \c value_type. - /// - /// This function lets the pack and unpack code that uses PackTraits - /// correctly handle types that have a size specified at run time. - /// In particular, it's helpful if that code needs to allocate - /// temporary buffers of \c value_type. PackTraits still assumes - /// that all instances of \c value_type in an input or output array - /// have the same run-time size. - /// - /// \param x [in] Instance of \c value_type with the correct (run-time) size. - /// \param numEnt [in] Number of entries in the returned array. - /// \param label [in] Optional string label of the returned - /// Kokkos::View. (Kokkos::View's constructor takes a string - /// label, which Kokkos uses for debugging output.) - /// - /// \return One-dimensional array of \c value_type, all instances of - /// which have the same (run-time) size as \c x. - /// - /// \note To implementers of specializations: If the number of bytes - /// to pack or unpack your type may be determined at run time, you - /// might be able just to use this implementation as-is, and just - /// reimplement numValuesPerScalar(). - static Kokkos::View - allocateArray (const value_type& x, - const size_t numEnt, - const std::string& label = "") - { - typedef Kokkos::View view_type; - typedef typename view_type::size_type size_type; - - // When the traits::specialize type is non-void this exploits - // the fact that Kokkos::View's constructor ignores - // size arguments beyond what the View's type specifies. For - // value_type = Stokhos::UQ::PCE, numValuesPerScalar returns - // something other than 1, and the constructor will actually use - // that value. - // Otherwise, the number of arguments must match the dynamic rank - // (i.e. number *'s with the value_type of the View) - const size_type numVals = numValuesPerScalar (x); - if ( std::is_same< typename view_type::traits::specialize, void >::value ) { - return view_type (label, static_cast (numEnt)); - } - else { - return view_type (label, static_cast (numEnt), numVals); - } - } - /// \brief Pack the first numEnt entries of the given input buffer /// of \c value_type, into the output buffer of bytes. /// diff --git a/packages/tpetra/core/src/Tpetra_Details_ScalarViewTraits.hpp b/packages/tpetra/core/src/Tpetra_Details_ScalarViewTraits.hpp new file mode 100644 index 000000000000..02e25dc3cdaf --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_ScalarViewTraits.hpp @@ -0,0 +1,121 @@ +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER + +#ifndef TPETRA_DETAILS_SCALARVIEWTRAITS_HPP +#define TPETRA_DETAILS_SCALARVIEWTRAITS_HPP + +/// +/// \file Tpetra_Details_ScalarViewTraits.hpp +/// \brief Declaration and generic definition of traits class that +/// tells Tpetra::CrsMatrix how to pack and unpack data. +/// + +#include "Tpetra_Details_PackTraits.hpp" + +namespace Tpetra { +namespace Details { + +/// \brief Traits class for allocating a Kokkos::View. +/// +/// \tparam T The type of the data to pack / unpack. +/// \tparam D The Kokkos "device" type; where the data live. +template +struct ScalarViewTraits { + using value_type = T; + using device_type = D; + + /// \brief Given an instance of \c value_type allocated with the + /// right size, allocate and return a one-dimensional array of + /// \c value_type. + /// + /// This function lets the pack and unpack code that uses + /// ScalarViewTraits correctly handle types that have a size + /// specified at run time. In particular, it's helpful if that code + /// needs to allocate temporary buffers of \c value_type. + /// ScalarViewTraits still assumes that all instances of \c + /// value_type in an input or output array have the same run-time + /// size. + /// + /// \param x [in] Instance of \c value_type with the correct + /// (run-time) size. + /// + /// \param numEnt [in] Number of entries in the returned + /// Kokkos::View. + /// + /// \param label [in] Optional string label of the returned + /// Kokkos::View. (Kokkos::View's constructor takes a string + /// label, which Kokkos uses for debugging output.) + /// + /// \return One-dimensional array of \c value_type, all instances of + /// which have the same (run-time) size as \c x. + /// + /// \note To implementers of specializations: If the number of bytes + /// to pack or unpack your type may be determined at run time, you + /// might be able just to use this implementation as-is, and just + /// reimplement numValuesPerScalar(). + static Kokkos::View + allocateArray (const value_type& x, + const size_t numEnt, + const std::string& label = "") + { + using view_type = Kokkos::View; + using size_type = typename view_type::size_type; + + // When the traits::specialize type is non-void this exploits + // the fact that Kokkos::View's constructor ignores + // size arguments beyond what the View's type specifies. For + // value_type = Stokhos::UQ::PCE, numValuesPerScalar returns + // something other than 1, and the constructor will actually use + // that value. + // Otherwise, the number of arguments must match the dynamic rank + // (i.e. number *'s with the value_type of the View) + const size_t numVals = PackTraits::numValuesPerScalar (x); + if (std::is_same::value ) { + return view_type (label, numEnt); + } + else { + return view_type (label, numEnt, numVals); + } + } +}; // struct ScalarViewTraits + +} // namespace Details +} // namespace Tpetra + +#endif // TPETRA_DETAILS_SCALARVIEWTRAITS_HPP diff --git a/packages/tpetra/core/src/Tpetra_Details_packCrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_Details_packCrsGraph_def.hpp index 357f5f043212..205aafacd0c9 100644 --- a/packages/tpetra/core/src/Tpetra_Details_packCrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_packCrsGraph_def.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -434,9 +432,9 @@ struct PackCrsGraphFunctor { using offsets_view_type = Kokkos::View; using exports_view_type = Kokkos::View; using export_lids_view_type = - typename PackTraits::input_array_type; + typename PackTraits::input_array_type; using source_pids_view_type = - typename PackTraits::input_array_type; + typename PackTraits::input_array_type; using count_type = typename num_packets_per_lid_view_type::non_const_value_type; @@ -566,16 +564,13 @@ do_pack(const LocalGraph& local_graph, const LocalMap& local_map, const Kokkos::View& exports, const typename PackTraits< - size_t, - BufferDeviceType + size_t >::input_array_type& num_packets_per_lid, const typename PackTraits< - typename LocalMap::local_ordinal_type, - BufferDeviceType + typename LocalMap::local_ordinal_type >::input_array_type& export_lids, const typename PackTraits< - int, - BufferDeviceType + int >::input_array_type& source_pids, const Kokkos::View& offsets, const bool pack_pids) @@ -620,33 +615,19 @@ do_pack(const LocalGraph& local_graph, Kokkos::parallel_reduce (range, f, result); if (result.first != 0) { + // We can't deep_copy from AnonymousSpace Views, so we can't + // print out any information from them in case of error. std::ostringstream os; - if (result.first == 1) { // invalid local row index - auto export_lids_h = Kokkos::create_mirror_view (export_lids); - Kokkos::deep_copy (export_lids_h, export_lids); - const auto firstBadLid = export_lids_h(result.second); - os << "First bad export LID: export_lids(i=" << result.second << ") = " - << firstBadLid; + os << "invalid local row index"; } else if (result.first == 2) { // invalid offset - auto offsets_h = Kokkos::create_mirror_view (offsets); - Kokkos::deep_copy (offsets_h, offsets); - const auto firstBadOffset = offsets_h(result.second); - - auto num_packets_per_lid_h = - Kokkos::create_mirror_view (num_packets_per_lid); - Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid); - os << "First bad offset: offsets(i=" << result.second << ") = " - << firstBadOffset << ", num_packets_per_lid(i) = " - << num_packets_per_lid_h(result.second) << ", buf_size = " - << exports.size (); + os << "invalid offset"; } - TEUCHOS_TEST_FOR_EXCEPTION - (true, std::runtime_error, prefix << "PackCrsGraphFunctor reported " - "error code " << result.first << " for the first bad row " - << result.second << ". " << os.str ()); + (true, std::runtime_error, prefix << "PackCrsGraphFunctor " + "reported error code " << result.first << " (" << os.str () + << ") for the first bad row " << result.second << "."); } } diff --git a/packages/tpetra/core/src/Tpetra_Details_packCrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_Details_packCrsMatrix_def.hpp index e4bbfcec1c22..5587a3c75479 100644 --- a/packages/tpetra/core/src/Tpetra_Details_packCrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_packCrsMatrix_def.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -407,9 +405,9 @@ KOKKOS_FUNCTION Kokkos::pair packCrsMatrixRow (const ColumnMap& col_map, const Kokkos::View& exports, - const typename PackTraits::input_array_type& lids_in, - const typename PackTraits::input_array_type& pids_in, - const typename PackTraits::input_array_type& vals_in, + const typename PackTraits::input_array_type& lids_in, + const typename PackTraits::input_array_type& pids_in, + const typename PackTraits::input_array_type& vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, @@ -428,14 +426,14 @@ packCrsMatrixRow (const ColumnMap& col_map, const LO num_ent_LO = static_cast (num_ent); // packValueCount wants this const size_t num_ent_beg = offset; - const size_t num_ent_len = PackTraits::packValueCount (num_ent_LO); + const size_t num_ent_len = PackTraits::packValueCount (num_ent_LO); const size_t gids_beg = num_ent_beg + num_ent_len; - const size_t gids_len = num_ent * PackTraits::packValueCount (GO (0)); + const size_t gids_len = num_ent * PackTraits::packValueCount (GO (0)); const size_t pids_beg = gids_beg + gids_len; const size_t pids_len = pack_pids ? - num_ent * PackTraits::packValueCount (int (0)) : + num_ent * PackTraits::packValueCount (int (0)) : static_cast (0); const size_t vals_beg = gids_beg + gids_len + pids_len; @@ -448,7 +446,7 @@ packCrsMatrixRow (const ColumnMap& col_map, size_t num_bytes_out = 0; int error_code = 0; - num_bytes_out += PackTraits::packValue (num_ent_out, num_ent_LO); + num_bytes_out += PackTraits::packValue (num_ent_out, num_ent_LO); { // Copy column indices one at a time, so that we don't need @@ -456,18 +454,18 @@ packCrsMatrixRow (const ColumnMap& col_map, for (size_t k = 0; k < num_ent; ++k) { const LO lid = lids_in[k]; const GO gid = col_map.getGlobalElement (lid); - num_bytes_out += PackTraits::packValue (gids_out, k, gid); + num_bytes_out += PackTraits::packValue (gids_out, k, gid); } // Copy PIDs one at a time, so that we don't need temporary storage. if (pack_pids) { for (size_t k = 0; k < num_ent; ++k) { const LO lid = lids_in[k]; const int pid = pids_in[lid]; - num_bytes_out += PackTraits::packValue (pids_out, k, pid); + num_bytes_out += PackTraits::packValue (pids_out, k, pid); } } const auto p = - PackTraits::packArray (vals_out, vals_in.data (), num_ent); + PackTraits::packArray (vals_out, vals_in.data (), num_ent); error_code += p.first; num_bytes_out += p.second; } @@ -497,10 +495,8 @@ struct PackCrsMatrixFunctor { num_packets_per_lid_view_type; typedef Kokkos::View offsets_view_type; typedef Kokkos::View exports_view_type; - typedef typename PackTraits::input_array_type - export_lids_view_type; - typedef typename PackTraits::input_array_type - source_pids_view_type; + using export_lids_view_type = typename PackTraits::input_array_type; + using source_pids_view_type = typename PackTraits::input_array_type; typedef typename num_packets_per_lid_view_type::non_const_value_type count_type; @@ -642,25 +638,16 @@ void do_pack (const LocalMatrix& local_matrix, const LocalMap& local_map, const Kokkos::View& exports, - const typename PackTraits< - size_t, - BufferDeviceType - >::input_array_type& num_packets_per_lid, - const typename PackTraits< - typename LocalMap::local_ordinal_type, - BufferDeviceType - >::input_array_type& export_lids, - const typename PackTraits< - int, - typename LocalMatrix::device_type - >::input_array_type& source_pids, + const typename PackTraits::input_array_type& num_packets_per_lid, + const typename PackTraits::input_array_type& export_lids, + const typename PackTraits::input_array_type& source_pids, const Kokkos::View& offsets, const size_t num_bytes_per_value, const bool pack_pids) { - typedef typename LocalMap::local_ordinal_type LO; - typedef typename LocalMatrix::device_type DT; - typedef Kokkos::RangePolicy range_type; + using LO = typename LocalMap::local_ordinal_type; + using DT = typename LocalMatrix::device_type; + using range_type = Kokkos::RangePolicy; const char prefix[] = "Tpetra::Details::do_pack: "; if (export_lids.extent (0) != 0) { @@ -686,8 +673,8 @@ do_pack (const LocalMatrix& local_matrix, "least one matrix entry, but source_pids.extent(0) = 0."); } - typedef PackCrsMatrixFunctor pack_functor_type; + using pack_functor_type = + PackCrsMatrixFunctor; pack_functor_type f (local_matrix, local_map, exports, num_packets_per_lid, export_lids, source_pids, offsets, num_bytes_per_value, @@ -698,33 +685,12 @@ do_pack (const LocalMatrix& local_matrix, Kokkos::parallel_reduce (range, f, result); if (result.first != 0) { - std::ostringstream os; - - if (result.first == 1) { // invalid local row index - auto export_lids_h = Kokkos::create_mirror_view (export_lids); - Kokkos::deep_copy (export_lids_h, export_lids); - const auto firstBadLid = export_lids_h(result.second); - os << "First bad export LID: export_lids(i=" << result.second << ") = " - << firstBadLid; - } - else if (result.first == 2) { // invalid offset - auto offsets_h = Kokkos::create_mirror_view (offsets); - Kokkos::deep_copy (offsets_h, offsets); - const auto firstBadOffset = offsets_h(result.second); - - auto num_packets_per_lid_h = - Kokkos::create_mirror_view (num_packets_per_lid); - Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid); - os << "First bad offset: offsets(i=" << result.second << ") = " - << firstBadOffset << ", num_packets_per_lid(i) = " - << num_packets_per_lid_h(result.second) << ", buf_size = " - << exports.size (); - } - + // We can't deep_copy from AnonymousSpace Views, so we can't print + // out any information from them in case of error. TEUCHOS_TEST_FOR_EXCEPTION - (true, std::runtime_error, prefix << "PackCrsMatrixFunctor reported " - "error code " << result.first << " for the first bad row " - << result.second << ". " << os.str ()); + (true, std::runtime_error, prefix << "PackCrsMatrixFunctor " + "reported error code " << result.first << " for the first " + "bad row " << result.second << "."); } } @@ -799,14 +765,14 @@ packCrsMatrix (const CrsMatrix& sourceMatrix, << num_packets_per_lid.data () << " == NULL."); } - const size_t num_bytes_per_lid = PackTraits::packValueCount (LO (0)); - const size_t num_bytes_per_gid = PackTraits::packValueCount (GO (0)); - const size_t num_bytes_per_pid = PackTraits::packValueCount (int (0)); + const size_t num_bytes_per_lid = PackTraits::packValueCount (LO (0)); + const size_t num_bytes_per_gid = PackTraits::packValueCount (GO (0)); + const size_t num_bytes_per_pid = PackTraits::packValueCount (int (0)); size_t num_bytes_per_value = 0; - if (PackTraits::compileTimeSize) { + if (PackTraits::compileTimeSize) { // Assume ST is default constructible; packValueCount wants an instance. - num_bytes_per_value = PackTraits::packValueCount (ST ()); + num_bytes_per_value = PackTraits::packValueCount (ST ()); } else { // Since the packed data come from the source matrix, we can use @@ -821,7 +787,7 @@ packCrsMatrix (const CrsMatrix& sourceMatrix, size_t num_bytes_per_value_l = 0; if (local_matrix.values.extent(0) > 0) { const ST& val = local_matrix.values(0); - num_bytes_per_value_l = PackTraits::packValueCount (val); + num_bytes_per_value_l = PackTraits::packValueCount (val); } using Teuchos::reduceAll; reduceAll (* (sourceMatrix.getComm ()), diff --git a/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp b/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp index fffcd139c602..52034d7dfa76 100644 --- a/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp @@ -98,14 +98,11 @@ namespace UnpackAndCombineCrsMatrixImpl { /// documentation of Map for requirements. /// \tparam GO The type of global indices. See the /// documentation of Map for requirements. -/// \tparam DT The Kokkos device type. See the documentation of Map -/// for requirements. -/// \tparam BDT The "buffer device type." -template +template KOKKOS_FUNCTION int -unpackRow(typename PackTraits::output_array_type& gids_out, - typename PackTraits::output_array_type& pids_out, - typename PackTraits::output_array_type& vals_out, +unpackRow(const typename PackTraits::output_array_type& gids_out, + const typename PackTraits::output_array_type& pids_out, + const typename PackTraits::output_array_type& vals_out, const char imports[], const size_t offset, const size_t /* num_bytes */, @@ -119,15 +116,15 @@ unpackRow(typename PackTraits::output_array_type& gids_out, bool unpack_pids = pids_out.size() > 0; const size_t num_ent_beg = offset; - const size_t num_ent_len = PackTraits::packValueCount (LO (0)); + const size_t num_ent_len = PackTraits::packValueCount (LO (0)); const size_t gids_beg = num_ent_beg + num_ent_len; const size_t gids_len = - num_ent * PackTraits::packValueCount (GO (0)); + num_ent * PackTraits::packValueCount (GO (0)); const size_t pids_beg = gids_beg + gids_len; const size_t pids_len = unpack_pids ? - size_t (num_ent * PackTraits::packValueCount (int (0))) : + size_t (num_ent * PackTraits::packValueCount (int (0))) : size_t (0); const size_t vals_beg = gids_beg + gids_len + pids_len; @@ -140,28 +137,28 @@ unpackRow(typename PackTraits::output_array_type& gids_out, size_t num_bytes_out = 0; LO num_ent_out; - num_bytes_out += PackTraits::unpackValue (num_ent_out, num_ent_in); + num_bytes_out += PackTraits::unpackValue (num_ent_out, num_ent_in); if (static_cast (num_ent_out) != num_ent) { return 20; // error code } { Kokkos::pair p; - p = PackTraits::unpackArray (gids_out.data (), gids_in, num_ent); + p = PackTraits::unpackArray (gids_out.data (), gids_in, num_ent); if (p.first != 0) { return 21; // error code } num_bytes_out += p.second; if (unpack_pids) { - p = PackTraits::unpackArray (pids_out.data (), pids_in, num_ent); + p = PackTraits::unpackArray (pids_out.data (), pids_in, num_ent); if (p.first != 0) { return 22; // error code } num_bytes_out += p.second; } - p = PackTraits::unpackArray (vals_out.data (), vals_in, num_ent); + p = PackTraits::unpackArray (vals_out.data (), vals_in, num_ent); if (p.first != 0) { return 23; // error code } @@ -316,18 +313,18 @@ struct UnpackCrsMatrixAndCombineFunctor { // Get the number of entries to expect in the received data for this row. LO num_ent_LO = 0; const char* const in_buf = imports.data () + offset; - (void) PackTraits::unpackValue (num_ent_LO, in_buf); + (void) PackTraits::unpackValue (num_ent_LO, in_buf); const size_t num_ent = static_cast (num_ent_LO); // Count the number of bytes expected to unpack size_t expected_num_bytes = 0; { - expected_num_bytes += PackTraits::packValueCount (LO (0)); - expected_num_bytes += num_ent * PackTraits::packValueCount (GO (0)); + expected_num_bytes += PackTraits::packValueCount (LO (0)); + expected_num_bytes += num_ent * PackTraits::packValueCount (GO (0)); if (unpack_pids) { - expected_num_bytes += num_ent * PackTraits::packValueCount (int (0)); + expected_num_bytes += num_ent * PackTraits::packValueCount (int (0)); } - expected_num_bytes += num_ent * PackTraits::packValueCount (ST ()); + expected_num_bytes += num_ent * PackTraits::packValueCount (ST ()); } if (expected_num_bytes > num_bytes) { @@ -353,9 +350,9 @@ struct UnpackCrsMatrixAndCombineFunctor { // Unpack this row! int unpack_err = - unpackRow(gids_out, pids_out, vals_out, - imports.data(), offset, num_bytes, - num_ent, num_bytes_per_value); + unpackRow(gids_out, pids_out, vals_out, + imports.data(), offset, num_bytes, + num_ent, num_bytes_per_value); if (unpack_err != 0) { dst = Kokkos::make_pair (unpack_err, i); // unpack error tokens.release (token); @@ -439,7 +436,7 @@ class NumEntriesFunctor { if (num_bytes > 0) { LO num_ent_LO = 0; // output argument of unpackValue const char* const in_buf = imports.data () + offsets(i); - (void) PackTraits::unpackValue (num_ent_LO, in_buf); + (void) PackTraits::unpackValue (num_ent_LO, in_buf); const size_t num_ent = static_cast (num_ent_LO); update = (update < num_ent) ? num_ent : update; @@ -461,7 +458,7 @@ class NumEntriesFunctor { if (num_bytes > 0) { LO num_ent_LO = 0; // output argument of unpackValue const char* const in_buf = imports.data () + offsets(i); - (void) PackTraits::unpackValue (num_ent_LO, in_buf); + (void) PackTraits::unpackValue (num_ent_LO, in_buf); tot_num_ent += static_cast (num_ent_LO); } } @@ -537,7 +534,7 @@ unpackAndCombineIntoCrsMatrix( const LocalMap& local_map, const Kokkos::View& imports, const Kokkos::View& num_packets_per_lid, - const typename PackTraits::input_array_type import_lids, + const typename PackTraits::input_array_type import_lids, const Tpetra::CombineMode combine_mode, const bool unpack_pids, const bool atomic) @@ -599,7 +596,7 @@ unpackAndCombineIntoCrsMatrix( // FIXME (TJF SEP 2017) // The scalar type is not necessarily default constructible - size_t num_bytes_per_value = PackTraits::packValueCount(ST()); + size_t num_bytes_per_value = PackTraits::packValueCount(ST()); // Now do the actual unpack! unpack_functor_type f(local_matrix, local_map, @@ -620,7 +617,7 @@ template size_t unpackAndCombineWithOwningPIDsCount( const LocalMatrix& local_matrix, - const typename PackTraits::input_array_type permute_from_lids, + const typename PackTraits::input_array_type permute_from_lids, const Kokkos::View& imports, const Kokkos::View& num_packets_per_lid, const size_t num_same_ids) @@ -674,14 +671,14 @@ unpackAndCombineWithOwningPIDsCount( return count; } -template +template KOKKOS_INLINE_FUNCTION size_t unpackRowCount(const char imports[], const size_t offset, const size_t num_bytes) { - using PT = PackTraits; + using PT = PackTraits; LO num_ent_LO = 0; if (num_bytes > 0) { @@ -699,15 +696,15 @@ unpackRowCount(const char imports[], template int setupRowPointersForRemotes( - const typename PackTraits::output_array_type& tgt_rowptr, - const typename PackTraits::input_array_type& import_lids, + const typename PackTraits::output_array_type& tgt_rowptr, + const typename PackTraits::input_array_type& import_lids, const Kokkos::View& imports, const Kokkos::View& num_packets_per_lid, - const typename PackTraits::input_array_type& offsets) + const typename PackTraits::input_array_type& offsets) { using Kokkos::parallel_reduce; typedef typename DT::execution_space XS; - typedef typename PackTraits::input_array_type::size_type size_type; + typedef typename PackTraits::input_array_type::size_type size_type; typedef Kokkos::RangePolicy > range_policy; const size_t InvalidNum = OrdinalTraits::invalid(); @@ -720,7 +717,7 @@ setupRowPointersForRemotes( typedef typename std::remove_reference< decltype( tgt_rowptr(0) ) >::type atomic_incr_type; const size_t num_bytes = num_packets_per_lid(i); const size_t offset = offsets(i); - const size_t num_ent = unpackRowCount (imports.data(), offset, num_bytes); + const size_t num_ent = unpackRowCount (imports.data(), offset, num_bytes); if (num_ent == InvalidNum) { k_error += 1; } @@ -733,7 +730,7 @@ setupRowPointersForRemotes( template void makeCrsRowPtrFromLengths( - const typename PackTraits::output_array_type& tgt_rowptr, + const typename PackTraits::output_array_type& tgt_rowptr, const Kokkos::View& new_start_row) { using Kokkos::parallel_scan; @@ -756,12 +753,12 @@ makeCrsRowPtrFromLengths( template void copyDataFromSameIDs( - const typename PackTraits::output_array_type& tgt_colind, - const typename PackTraits::output_array_type& tgt_pids, - const typename PackTraits::output_array_type& tgt_vals, - const Kokkos::View& new_start_row, - const typename PackTraits::output_array_type& tgt_rowptr, - const typename PackTraits::input_array_type& src_pids, + const typename PackTraits::output_array_type& tgt_colind, + const typename PackTraits::output_array_type& tgt_pids, + const typename PackTraits::output_array_type& tgt_vals, + const Kokkos::View& new_start_row, + const typename PackTraits::output_array_type& tgt_rowptr, + const typename PackTraits::input_array_type& src_pids, const LocalMatrix& local_matrix, const LocalMap& local_col_map, const size_t num_same_ids, @@ -801,14 +798,14 @@ copyDataFromSameIDs( template void copyDataFromPermuteIDs( - const typename PackTraits::output_array_type& tgt_colind, - const typename PackTraits::output_array_type& tgt_pids, - const typename PackTraits::output_array_type& tgt_vals, + const typename PackTraits::output_array_type& tgt_colind, + const typename PackTraits::output_array_type& tgt_pids, + const typename PackTraits::output_array_type& tgt_vals, const Kokkos::View& new_start_row, - const typename PackTraits::output_array_type& tgt_rowptr, - const typename PackTraits::input_array_type& src_pids, - const typename PackTraits::input_array_type& permute_to_lids, - const typename PackTraits::input_array_type& permute_from_lids, + const typename PackTraits::output_array_type& tgt_rowptr, + const typename PackTraits::input_array_type& src_pids, + const typename PackTraits::input_array_type& permute_to_lids, + const typename PackTraits::input_array_type& permute_from_lids, const LocalMatrix& local_matrix, const LocalMap& local_col_map, const int my_pid) @@ -817,7 +814,7 @@ copyDataFromPermuteIDs( typedef typename LocalMap::device_type DT; typedef typename LocalMap::local_ordinal_type LO; typedef typename DT::execution_space XS; - typedef typename PackTraits::input_array_type::size_type size_type; + typedef typename PackTraits::input_array_type::size_type size_type; typedef Kokkos::RangePolicy > range_policy; const size_type num_permute_to_lids = permute_to_lids.extent(0); @@ -850,12 +847,12 @@ copyDataFromPermuteIDs( template int unpackAndCombineIntoCrsArrays2( - const typename PackTraits::output_array_type& tgt_colind, - const typename PackTraits::output_array_type& tgt_pids, - const typename PackTraits::output_array_type& tgt_vals, + const typename PackTraits::output_array_type& tgt_colind, + const typename PackTraits::output_array_type& tgt_pids, + const typename PackTraits::output_array_type& tgt_vals, const Kokkos::View& new_start_row, - const typename PackTraits::input_array_type& offsets, - const typename PackTraits::input_array_type& import_lids, + const typename PackTraits::input_array_type& offsets, + const typename PackTraits::input_array_type& import_lids, const Kokkos::View& imports, const Kokkos::View& num_packets_per_lid, const LocalMatrix& /* local_matrix */, @@ -899,7 +896,7 @@ unpackAndCombineIntoCrsArrays2( // Empty buffer means that the row is empty. return; } - size_t num_ent = unpackRowCount(imports.data(), offset, num_bytes); + size_t num_ent = unpackRowCount(imports.data(), offset, num_bytes); if (num_ent == InvalidNum) { k_error += 1; return; @@ -912,9 +909,9 @@ unpackAndCombineIntoCrsArrays2( vals_out_type vals_out = subview(tgt_vals, slice(start_row, end_row)); pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row)); - k_error += unpackRow(gids_out, pids_out, vals_out, - imports.data(), offset, num_bytes, - num_ent, num_bytes_per_value); + k_error += unpackRow(gids_out, pids_out, vals_out, + imports.data(), offset, num_bytes, + num_ent, num_bytes_per_value); // Correct target PIDs. for (size_t j = 0; j < static_cast(num_ent); ++j) { @@ -931,16 +928,16 @@ void unpackAndCombineIntoCrsArrays( const LocalMatrix & local_matrix, const LocalMap & local_col_map, - const typename PackTraits::input_array_type& import_lids, + const typename PackTraits::input_array_type& import_lids, const Kokkos::View& imports, const Kokkos::View& num_packets_per_lid, - const typename PackTraits::input_array_type& permute_to_lids, - const typename PackTraits::input_array_type& permute_from_lids, - const typename PackTraits::output_array_type& tgt_rowptr, - const typename PackTraits::output_array_type& tgt_colind, - const typename PackTraits::output_array_type& tgt_vals, - const typename PackTraits::input_array_type& src_pids, - const typename PackTraits::output_array_type& tgt_pids, + const typename PackTraits::input_array_type& permute_to_lids, + const typename PackTraits::input_array_type& permute_from_lids, + const typename PackTraits::output_array_type& tgt_rowptr, + const typename PackTraits::output_array_type& tgt_colind, + const typename PackTraits::output_array_type& tgt_vals, + const typename PackTraits::input_array_type& src_pids, + const typename PackTraits::output_array_type& tgt_pids, const size_t num_same_ids, const size_t tgt_num_rows, const size_t tgt_num_nonzeros, @@ -1028,13 +1025,6 @@ unpackAndCombineIntoCrsArrays( // Turn row length into a real CRS row pointer makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row); - { - auto nth_tgt_rowptr_h = getEntryOnHost(tgt_rowptr, N); - bool condition = nth_tgt_rowptr_h != mynnz; - TEUCHOS_TEST_FOR_EXCEPTION(condition, std::invalid_argument, - prefix << "CRS_rowptr[last] = " << - nth_tgt_rowptr_h << "!= mynnz = " << mynnz << "."); - } // SameIDs: Copy the data over copyDataFromSameIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row, @@ -1462,9 +1452,9 @@ unpackAndCombineIntoCrsArrays ( TargetPids.size(), true, "tgt_pids"); size_t num_bytes_per_value = 0; - if (PackTraits::compileTimeSize) { + if (PackTraits::compileTimeSize) { // assume that ST is default constructible - num_bytes_per_value = PackTraits::packValueCount(ST()); + num_bytes_per_value = PackTraits::packValueCount(ST()); } else { // Since the packed data come from the source matrix, we can use the source @@ -1478,10 +1468,10 @@ unpackAndCombineIntoCrsArrays ( size_t num_bytes_per_value_l = 0; if (local_matrix.values.extent(0) > 0) { const ST& val = local_matrix.values(0); - num_bytes_per_value_l = PackTraits::packValueCount(val); + num_bytes_per_value_l = PackTraits::packValueCount(val); } else { const ST& val = crs_vals_d(0); - num_bytes_per_value_l = PackTraits::packValueCount(val); + num_bytes_per_value_l = PackTraits::packValueCount(val); } Teuchos::reduceAll(*(sourceMatrix.getComm()), Teuchos::REDUCE_MAX, diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index faa1683434bd..5fe669295e11 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -4621,8 +4619,8 @@ namespace Tpetra { auto v1 = this->getLocalViewHost (); auto v2 = vec.getLocalViewHost (); - if (PackTraits::packValueCount (v1(0,0)) != - PackTraits::packValueCount (v2(0,0))) { + if (PackTraits::packValueCount (v1(0,0)) != + PackTraits::packValueCount (v2(0,0))) { return false; } From 2fa0707e005016af485f911eeba07d7593a287a5 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 17 Nov 2019 15:37:24 -0700 Subject: [PATCH 04/13] Tpetra: Start work on benchmark for CrsMatrix dense row unpack @trilinos/tpetra This is related to #6282. --- .../advanced/Benchmarks/CMakeLists.txt | 6 + .../Benchmarks/CrsMatrixDenseRowUnpack.cpp | 494 ++++++++++++++++++ packages/tpetra/core/src/Tpetra_Map_decl.hpp | 2 +- 3 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 packages/tpetra/core/example/advanced/Benchmarks/CrsMatrixDenseRowUnpack.cpp diff --git a/packages/tpetra/core/example/advanced/Benchmarks/CMakeLists.txt b/packages/tpetra/core/example/advanced/Benchmarks/CMakeLists.txt index cb0fb4b8d688..94c10235d48f 100644 --- a/packages/tpetra/core/example/advanced/Benchmarks/CMakeLists.txt +++ b/packages/tpetra/core/example/advanced/Benchmarks/CMakeLists.txt @@ -32,3 +32,9 @@ TRIBITS_ADD_EXECUTABLE( SOURCES CrsMatrix_sumIntoLocalValues.cpp COMM serial mpi ) + +TRIBITS_ADD_EXECUTABLE( + CrsMatrixDenseRowUnpack + SOURCES CrsMatrixDenseRowUnpack.cpp + COMM serial mpi +) diff --git a/packages/tpetra/core/example/advanced/Benchmarks/CrsMatrixDenseRowUnpack.cpp b/packages/tpetra/core/example/advanced/Benchmarks/CrsMatrixDenseRowUnpack.cpp new file mode 100644 index 000000000000..badd9e855bcd --- /dev/null +++ b/packages/tpetra/core/example/advanced/Benchmarks/CrsMatrixDenseRowUnpack.cpp @@ -0,0 +1,494 @@ +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER + +#include "Tpetra_Core.hpp" +#include "Tpetra_CrsGraph.hpp" +#include "Tpetra_CrsMatrix.hpp" +#include "Tpetra_Export.hpp" +#include "Tpetra_Map.hpp" +#include "Tpetra_Details_Behavior.hpp" +#include "Teuchos_CommandLineProcessor.hpp" +#include "Teuchos_FancyOStream.hpp" +#include "Teuchos_TimeMonitor.hpp" +#include +#include + +using Tpetra::global_size_t; +using Teuchos::Array; +using Teuchos::ArrayView; +using Teuchos::as; +using Teuchos::CommandLineProcessor; +using Teuchos::RCP; +using Teuchos::rcp; +using Teuchos::Time; +using Teuchos::TimeMonitor; + +using GST = Tpetra::global_size_t; +using map_type = Tpetra::Map<>; +using LO = map_type::local_ordinal_type; +using GO = map_type::global_ordinal_type; +using crs_graph_type = Tpetra::CrsGraph<>; +using crs_matrix_type = Tpetra::CrsMatrix; +using export_type = Tpetra::Export<>; + +struct CmdLineArgs { + double densityFraction = 1.0; + int lclNumRows = 10; + int lclNumCols = -1; + int numTrials = 100; + int numOverlapRows = 3; + bool contiguousMaps = true; +}; + +std::unique_ptr +getTargetRowMapIndices(const LO lclNumRows, + const int myRank, + const int numProcs) +{ + const GO gblNumRows = GO(lclNumRows) * GO(numProcs); + const GO indexBase = 0; + const GO gidBase = indexBase + GO(lclNumRows) * GO(myRank); + + // LIDs [0, N-1] -> GIDs gidBase + [N-1, 1, N-2, 2, ...]. + std::unique_ptr tgtGids(new GO[lclNumRows]); + for (LO lid = 0; lid < lclNumRows; ++lid) { + const LO offset = (lid % LO(2) == 0) ? + lclNumRows - LO(1) - lid : lid; + const GO gblRow = gidBase + GO(offset); + TEUCHOS_ASSERT(gblRow >= indexBase); + TEUCHOS_ASSERT(gblRow < indexBase + gblNumRows); + tgtGids[lid] = gblRow; + } + return std::move(tgtGids); +} + +RCP +getTargetRowMap(const RCP>& comm, + const CmdLineArgs& args) +{ + const int myRank = comm->getRank(); + const int numProcs = comm->getSize(); + const LO lclNumRows = args.lclNumRows; + const GO gblNumRows = GO(lclNumRows) * GO(numProcs); + const GO indexBase = 0; + + if(args.contiguousMaps) { + return rcp(new map_type(gblNumRows, indexBase, comm)); + } + else { + std::unique_ptr tgtGids = + getTargetRowMapIndices(lclNumRows, myRank, numProcs); + return rcp(new map_type(gblNumRows, tgtGids.get(), + lclNumRows, indexBase, comm)); + } +} + +RCP +getSourceRowMap(const RCP& tgtMap, + const CmdLineArgs& args) +{ + const auto comm = tgtMap->getComm(); + const int myRank = comm->getRank(); + const int numProcs = comm->getSize(); + const LO tgtLclNumRows = LO(args.lclNumRows); + const GO tgtGblNumRows = GO(tgtLclNumRows) * GO(numProcs); + const GO indexBase = tgtMap->getIndexBase(); + const GO tgtGidBase = indexBase + GO(tgtLclNumRows) * GO(myRank); + const GO srcGidBase = indexBase + + ((tgtGidBase + GO(tgtLclNumRows)) % tgtGblNumRows); + const LO srcLclNumRows = LO(args.numOverlapRows); + + TEUCHOS_ASSERT(srcGidBase >= indexBase); + TEUCHOS_ASSERT(srcGidBase < indexBase + tgtGblNumRows); + + // Construct Source Map so that copyAndPermute has nothing to do. + // This should help focus the benchmark on pack and unpack. + std::unique_ptr srcGids(new GO[srcLclNumRows]); + for(LO lid = 0; lid < srcLclNumRows; ++lid) { + const GO gblRow = srcGidBase + GO(lid); + TEUCHOS_ASSERT(gblRow >= indexBase); + TEUCHOS_ASSERT(gblRow < indexBase + tgtGblNumRows); + srcGids[lid] = gblRow; + } + using Teuchos::OrdinalTraits; + return rcp(new map_type(OrdinalTraits::invalid(), srcGids.get(), + srcLclNumRows, tgtMap->getIndexBase(), comm)); +} + +RCP +getDomainMap(const RCP>& comm, + const CmdLineArgs& args) +{ + const int numProcs = comm->getSize(); + const GO gblNumCols = GO(args.lclNumCols) * GO(numProcs); + const GO indexBase = 0; + + return rcp(new map_type(gblNumCols, indexBase, comm)); +} + +// Create a new timer with the given name if it hasn't already been +// created, else get the previously created timer with that name. +RCP