From b55ec8cdcd8e42b7b3f1d6978d740140f5fb7c06 Mon Sep 17 00:00:00 2001 From: "K. Devine" Date: Wed, 15 Apr 2020 12:57:00 -0600 Subject: [PATCH 1/3] tpetra: Adding a test that doesn't use row-based matrix distribution --- .../tpetra/core/test/CrsMatrix/CMakeLists.txt | 9 + .../test/CrsMatrix/CrsMatrix_2DRandomDist.cpp | 367 ++++++++++++++++++ 2 files changed, 376 insertions(+) create mode 100644 packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp diff --git a/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt b/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt index 8f3ff7d3fa4a..1b9bad78415b 100644 --- a/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt +++ b/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt @@ -88,6 +88,15 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( STANDARD_PASS_OUTPUT ) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + CrsMatrix_2DRandomDist + SOURCES + CrsMatrix_2DRandomDist.cpp + COMM serial mpi + PASS_REGULAR_EXPRESSION "PASS" + FAIL_REGULAR_EXPRESSION "FAIL" + ) + # We split the CrsMatrix_WithGraph test by execution space. # This speeds up the build. diff --git a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp new file mode 100644 index 000000000000..f966fd938d16 --- /dev/null +++ b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp @@ -0,0 +1,367 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER +*/ + +// This program tests matrix creation and matrix apply using matrices with +// arbitrarily distributed nonzeros (not necessarily row-based distribution). +// +// Create global matrix nonzeros randomly; store all global nonzeros on +// each proc in a std::map. +// Create distributed vectors with randomized entries using Trilinos' default +// maps +// For each test (linear row-wise distribution, linear column-wise distribution, +// random distribution of nonzeros (2D) to processors) +// distribute matrix nonzeros (each proc selects subset of global nonzeros) +// create distributed CrsMatrix +// perform SpMV (nMatvec SpMVs) +// return result of SpMV +// Compare norms of results of SpMV from all distributions; they should be the +// same. +// +// NOTE: timings are also reported but should be interpreted carefully. +// This test does not attempt to optimize the distribution of the vectors to +// minimize communication costs. Moreover, 2D random distribution of nonzeros +// can lead to high communication volume; a better 2D approach would be a +// block-based approach that better aligns vector entries with matrix entries. + +#include "Tpetra_Core.hpp" +#include "Tpetra_Map.hpp" +#include "Tpetra_CrsMatrix.hpp" +#include "Tpetra_Vector.hpp" +#include "Teuchos_TimeMonitor.hpp" + +// Class to generate, distribute and apply nonzeros +template +class generatedNonzeros +{ +public: + using map_t = Tpetra::Map<>; + using vector_t = Tpetra::Vector; + + // Randomly generate all nonzeros for the entire matrix on every processor + // Values are in the range 0.1 to 10.1; + generatedNonzeros( + size_t nRows_, size_t nCols_, size_t nnz, + const Teuchos::RCP > &comm_ + ) : + nRows(nRows_), nCols(nCols_), comm(comm_) + { + // Synchronize RNG; want all processors to generate the same nonzeros + srand(1); + + // use a map to remove duplicates and sort entries by row + for (size_t n = 0; n < nnz; n++) { + gno_t i = std::rand() % nRows; + gno_t j = std::rand() % nCols; + scalar_t val = 0.1 + scalar_t(std::rand() % 10); + nzmap[std::make_pair(i,j)] = val; + } + + nNz = nzmap.size(); + } + + // Select nonzeros from nzmap that are to be assigned to this processor + // Create CRS data structures to ease matrix construction + void getMyNonzeros( + const int distribution, // flag indicating how to distribute the nonzeros + // == 1 --> row-wise (Trilinos default) + // == 2 --> column-wise + // == 3 --> randomly assign nonzeros to procs + Teuchos::Array &rowIdx, // output: unique row indices of + // nonzeros on this proc (sorted + // in ascending order) + Teuchos::Array &nPerRow, // output: nPerRow[i] == + // number of nonzeros + // in rowIdx[i] on this proc + Teuchos::Array &offsets, // output: CRS offset array; + // length = length(rowIdx) + 1 + Teuchos::Array &colIdx, // output: CRS column indices + Teuchos::Array &val // output: nonzerovalues + ) const + { + int np = comm->getSize(); + int me = comm->getRank(); + + // Precompute values needed for distribution 1 (linear row-wise) + gno_t nMyRows = nRows / np + (nRows % np > me); + gno_t myFirstRow = (me * (nRows / np) + std::min(nRows % np, me)); + gno_t myLastRow = myFirstRow + nMyRows - 1; + + // Precompute values needed for distribution 2 (linear column-wise) + gno_t nMyCols = nCols / np + (nCols % np > me); + gno_t myFirstCol = (me * (nCols / np) + std::min(nCols % np, me)); + gno_t myLastCol = myFirstCol + nMyCols - 1; + + // Produce CRS formatted arrays of nonzeros assigned to this processor + // given a requested Distribution + gno_t prev_i = std::numeric_limits::max(); + + // Loop over global nonzeros; insert those assigned to this processor in + // CRS arrays. + // Exploit fact that nzmap entries are sorted by row i to build CRS arrays + for (auto nz = nzmap.begin(); nz != nzmap.end(); nz++) { + + gno_t i = nz->first.first; + gno_t j = nz->first.second; + scalar_t v = nz->second; + + // Check whether nonzero (i,j) should be stored on this processor + bool mine = false; + switch (distribution) { + case 1: // linear row-wise + if (i >= myFirstRow && i <= myLastRow) mine = true; + break; + case 2: // linear col-wise + if (j >= myFirstCol && j <= myLastCol) mine = true; + break; + case 3: // random + int randomproc = std::rand() % np; + if (me == randomproc) mine = true; + break; + } + + if (mine) { + // nzmap entries are sorted by i; add a new i when different from prev i + if (i != prev_i) { + rowIdx.push_back(i); + nPerRow.push_back(0); + prev_i = i; + } + colIdx.push_back(j); + val.push_back(v); + nPerRow.back()++; + } + } + + // Compute prefix sum in offsets array + offsets.resize(rowIdx.size() + 1); + offsets[0] = 0; + for (size_t row = 0; row < rowIdx.size(); row++) + offsets[row+1] = offsets[row] + nPerRow[row]; + } + + // Distribute nonzeros to processors, create CrsMatrix, then apply it to + // input vector x, giving y + // Time the SpMV application + void distributeAndApply( + const int distribution, // flag indicating how to distribute the nonzeros + // == 1 --> row-wise (Trilinos default) + // == 2 --> column-wise + // == 3 --> randomly assign nonzeros to procs + const int nMatvecs, // Number of SpMV to do (for timing test) + const vector_t &xvec, // input: domain vector + vector_t &yvec // output: range vector + ) const + { + // Select this processor's nonzeros based on distribution + Teuchos::Array offsets; + Teuchos::Array nPerRow; + Teuchos::Array rowIdx; + Teuchos::Array colIdx; + Teuchos::Array val; + + getMyNonzeros(distribution, rowIdx, nPerRow, offsets, colIdx, val); + + // Build the CrsMatrix with the assigned nonzeros + using matrix_t = Tpetra::CrsMatrix; + + size_t dummy = Teuchos::OrdinalTraits::invalid(); + Teuchos::RCP rowMap = + Teuchos::rcp(new map_t(dummy, rowIdx(), 0, comm)); + + Teuchos::RCP Amat = Teuchos::rcp(new matrix_t(rowMap, nPerRow())); + + for (size_t r = 0; r < rowIdx.size(); r++) { + size_t tmp = offsets[r+1] - offsets[r]; + Amat->insertGlobalValues(rowIdx[r], + colIdx(offsets[r],tmp), val(offsets[r],tmp)); + } + + std::string tname; + { + switch (distribution) { + case 1: tname = "fillComplete: 1 row-wise"; break; + case 2: tname = "fillComplete: 2 column-wise"; break; + case 3: tname = "fillComplete: 3 random 2D"; break; + } + auto timer = Teuchos::TimeMonitor::getNewTimer(tname); + + Teuchos::TimeMonitor tt(*timer); + Amat->fillComplete(xvec.getMap(), yvec.getMap()); + } + + std::cout << comm->getRank() + << ": nRows " << Amat->getNodeNumRows() + << "; nCols " << Amat->getNodeNumCols() + << "; nnz " << Amat->getNodeNumEntries() + << "; import " + << (Amat->getGraph()->getImporter() == Teuchos::null ? 0 : + Amat->getGraph()->getImporter()->getNumExportIDs()) + << "; export " + << (Amat->getGraph()->getExporter() == Teuchos::null ? 0 : + Amat->getGraph()->getExporter()->getNumExportIDs()) + << std::endl; + + // Perform SpMV; do several iterations to get some timing info + { + switch (distribution) { + case 1: tname = "SpMV: 1 row-wise"; break; + case 2: tname = "SpMV: 2 column-wise"; break; + case 3: tname = "SpMV: 3 random 2D"; break; + } + + auto timer = Teuchos::TimeMonitor::getNewTimer(tname); + for (int n = 0; n < nMatvecs; n++) { + Teuchos::TimeMonitor tt(*timer); + Amat->apply(xvec, yvec); + } + } + } + +private: + + using coord = std::pair; + struct compareCoord { // sort nonzeros by row, then column + bool operator() (const coord &lhs, const coord &rhs) const + { if (lhs.first < rhs.first) return true; + if ((lhs.first == rhs.first) && (lhs.second < rhs.second)) return true; + return false; + } + }; + std::map nzmap; // sorted global nonzeros + + size_t nRows, nCols, nNz; + const Teuchos::RCP > comm; + +}; + +//////////////////////////////////////////////////////////////////////////// + +int main(int narg, char *arg[]) +{ + Tpetra::ScopeGuard scope(&narg, &arg); + Teuchos::RCP > comm = Tpetra::getDefaultComm(); + + using scalar_t = Tpetra::Details::DefaultTypes::scalar_type; + using gno_t = Tpetra::Map<>::global_ordinal_type; + + int me = comm->getRank(); + int np = comm->getSize(); + + const int nMatvecs = 1000; + size_t nRows = np * 100; + size_t nCols = np * 200; + size_t nNz = np * 1000; + + // Create random nonzeros -- all global nonzeros generated on every processor + generatedNonzeros gNz(nRows, nCols, nNz, comm); + + // Create vectors; use Trilinos default range and domain maps + // These vectors do not optimize communication for the random 2D distribution + using map_t = Tpetra::Map<>; + using vector_t = Tpetra::Vector; + + Teuchos::RCP rangeMap = Teuchos::rcp(new map_t(nRows, 0, comm)); + vector_t yvec(rangeMap); + + Teuchos::RCP domainMap = Teuchos::rcp(new map_t(nCols, 0, comm)); + vector_t xvec(domainMap); + xvec.randomize(); + + // Row-wise 1D distribution + gNz.distributeAndApply(1, nMatvecs, xvec, yvec); + scalar_t row1DNorm1 = yvec.norm2(); + scalar_t row1DNorm2 = yvec.norm2(); + scalar_t row1DNormInf = yvec.normInf(); + if (me == 0) + std::cout << "Row-wise 1D distribution: norm1 " << row1DNorm1 + << "; norm2 " << row1DNorm2 + << "; norminf " << row1DNormInf + << std::endl; + + // Column-wise 1D distribution + gNz.distributeAndApply(2, nMatvecs, xvec, yvec); + scalar_t col1DNorm1 = yvec.norm1(); + scalar_t col1DNorm2 = yvec.norm2(); + scalar_t col1DNormInf = yvec.normInf(); + if (me == 0) + std::cout << "Col-wise 1D distribution: norm1 " << col1DNorm1 + << "; norm2 " << col1DNorm2 + << "; norminf " << col1DNormInf + << std::endl; + + // Random 2D distribution + gNz.distributeAndApply(3, nMatvecs, xvec, yvec); + scalar_t random2DNorm1 = yvec.norm1(); + scalar_t random2DNorm2 = yvec.norm2(); + scalar_t random2DNormInf = yvec.normInf(); + if (me == 0) + std::cout << "Random 2D distribution: norm1 " << random2DNorm1 + << "; norm2 " << random2DNorm2 + << "; norminf " << random2DNormInf + << std::endl; + + // Check results + int ierr = 0; + + const scalar_t epsilon = 0.0000001; + if (std::abs(col1DNorm2 - row1DNorm2) > epsilon) { + ierr++; + if (me == 0) + std::cout << "FAIL: column-wise 1D norm " << col1DNorm2 + << " - " << row1DNorm2 << " row-wise 1D norm" + << " = " << std::abs(col1DNorm2 - row1DNorm2) << std::endl; + } + + if (std::abs(random2DNorm2 - row1DNorm2) > epsilon) { + ierr++; + if (me == 0) + std::cout << "FAIL: random 2D norm " << random2DNorm2 + << " = " << row1DNorm2 << " row-wise 1D norm" + << " = " << std::abs(random2DNorm2 - row1DNorm2) << std::endl; + } + + if (ierr == 0 && me == 0) + std::cout << "PASS" << std::endl; + + Teuchos::TimeMonitor::summarize(); + return ierr; +} + From ded23463582941c8e78d9d4ebd98304aa9c878f1 Mon Sep 17 00:00:00 2001 From: "K. Devine" Date: Wed, 15 Apr 2020 13:41:23 -0600 Subject: [PATCH 2/3] Fixed typo --- packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp index f966fd938d16..bddeabc16c1f 100644 --- a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp +++ b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp @@ -307,7 +307,7 @@ int main(int narg, char *arg[]) // Row-wise 1D distribution gNz.distributeAndApply(1, nMatvecs, xvec, yvec); - scalar_t row1DNorm1 = yvec.norm2(); + scalar_t row1DNorm1 = yvec.norm1(); scalar_t row1DNorm2 = yvec.norm2(); scalar_t row1DNormInf = yvec.normInf(); if (me == 0) From 92a88916a9d64448d9d2f97703223548fa660f7d Mon Sep 17 00:00:00 2001 From: "K. Devine" Date: Fri, 15 May 2020 15:04:15 -0600 Subject: [PATCH 3/3] tpetra: fixed two picky compiler errors (signed vs unsigned) --- .../core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp index bddeabc16c1f..b8320f2ab972 100644 --- a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp +++ b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_2DRandomDist.cpp @@ -120,12 +120,12 @@ class generatedNonzeros int me = comm->getRank(); // Precompute values needed for distribution 1 (linear row-wise) - gno_t nMyRows = nRows / np + (nRows % np > me); + gno_t nMyRows = nRows / np + (int(nRows % np) > me); gno_t myFirstRow = (me * (nRows / np) + std::min(nRows % np, me)); gno_t myLastRow = myFirstRow + nMyRows - 1; // Precompute values needed for distribution 2 (linear column-wise) - gno_t nMyCols = nCols / np + (nCols % np > me); + gno_t nMyCols = nCols / np + (int(nCols % np) > me); gno_t myFirstCol = (me * (nCols / np) + std::min(nCols % np, me)); gno_t myLastCol = myFirstCol + nMyCols - 1; @@ -173,7 +173,8 @@ class generatedNonzeros // Compute prefix sum in offsets array offsets.resize(rowIdx.size() + 1); offsets[0] = 0; - for (size_t row = 0; row < rowIdx.size(); row++) + size_t nRowIdx = size_t(rowIdx.size()); + for (size_t row = 0; row < nRowIdx; row++) offsets[row+1] = offsets[row] + nPerRow[row]; } @@ -208,7 +209,8 @@ class generatedNonzeros Teuchos::RCP Amat = Teuchos::rcp(new matrix_t(rowMap, nPerRow())); - for (size_t r = 0; r < rowIdx.size(); r++) { + size_t nRowIdx = size_t(rowIdx.size()); + for (size_t r = 0; r < nRowIdx; r++) { size_t tmp = offsets[r+1] - offsets[r]; Amat->insertGlobalValues(rowIdx[r], colIdx(offsets[r],tmp), val(offsets[r],tmp));