From 00e8457d329b61aeadbb20637084d503fe7a9acd Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 9 Sep 2016 17:38:46 -0600 Subject: [PATCH] Tpetra::CrsGraph: Fix #601 (overlapping row Map globalAssemble) @trilinos/tpetra My earlier commits today reimplemented Tpetra::CrsGraph::globalAssemble to fix both performance and correctness issues. This commit fixes that work for the case where the CrsGraph has an overlapping row Map. I added a test for this case, which passes. This commit thus fixes #601. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 16 +- .../tpetra/core/test/CrsGraph/CMakeLists.txt | 12 + .../core/test/CrsGraph/CrsGraph_Issue601.cpp | 223 ++++++++++++++++++ 3 files changed, 245 insertions(+), 6 deletions(-) create mode 100644 packages/tpetra/core/test/CrsGraph/CrsGraph_Issue601.cpp diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 397000e06b37..0a3c27e660ae 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -3327,7 +3327,10 @@ namespace Tpetra { // We just use it as a temporary container for the Export. // 4. If the original row Map is one to one, then we can Export - // directly from nonlocalGraph into this. + // directly from nonlocalGraph into this. Otherwise, we have + // to create a temporary graph with a one-to-one row Map, + // Export into that, then Import from the temporary graph into + // *this. auto origRowMap = this->getRowMap (); const bool origRowMapIsOneToOne = origRowMap->isOneToOne (); @@ -3345,17 +3348,18 @@ namespace Tpetra { auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap); export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap); - // We don't need the graph of nonlocals anymore, so get rid of - // it, to keep the memory high-water mark down. - nonlocalGraph = Teuchos::null; - - // Create a new graph with the one-to-one row Map. + // Create a temporary graph with the one-to-one row Map. // // TODO (mfh 09 Sep 2016) Estimate the number of entries in each // row, to avoid reallocation during the Export operation. crs_graph_type oneToOneGraph (oneToOneRowMap, 0); + // Export from graph of nonlocals into the temp one-to-one graph. oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT); + // We don't need the graph of nonlocals anymore, so get rid of + // it, to keep the memory high-water mark down. + nonlocalGraph = Teuchos::null; + // Import from the one-to-one graph to the original graph. import_type importToOrig (oneToOneRowMap, origRowMap); this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT); diff --git a/packages/tpetra/core/test/CrsGraph/CMakeLists.txt b/packages/tpetra/core/test/CrsGraph/CMakeLists.txt index 4c376ec2abbb..dd33e058b144 100644 --- a/packages/tpetra/core/test/CrsGraph/CMakeLists.txt +++ b/packages/tpetra/core/test/CrsGraph/CMakeLists.txt @@ -37,3 +37,15 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( # SOURCE_FILES west0067.rua mhd1280b.cua # EXEDEPS CrsGraph_UnitTests # ) + +# This test needs at least 3 MPI processes to be meaningful. +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Issue601 + SOURCES + CrsGraph_Issue601.cpp + ${TEUCHOS_STD_UNIT_TEST_MAIN} + ARGS "" + COMM mpi + NUM_MPI_PROCS 3-8 + STANDARD_PASS_OUTPUT + ) diff --git a/packages/tpetra/core/test/CrsGraph/CrsGraph_Issue601.cpp b/packages/tpetra/core/test/CrsGraph/CrsGraph_Issue601.cpp new file mode 100644 index 000000000000..d059a70147b4 --- /dev/null +++ b/packages/tpetra/core/test/CrsGraph/CrsGraph_Issue601.cpp @@ -0,0 +1,223 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#include "Tpetra_CrsGraph.hpp" +#include "Tpetra_Map.hpp" +#include "Teuchos_CommHelpers.hpp" // REDUCE_MIN, reduceAll +#include "Tpetra_TestingUtilities.hpp" + +namespace { // (anonymous) + + using Tpetra::TestingUtilities::getDefaultComm; + using Teuchos::outArg; + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::REDUCE_MIN; + using Teuchos::reduceAll; + using Teuchos::tuple; + using std::endl; + typedef Tpetra::global_size_t GST; + + // + // UNIT TESTS + // + + // See Github Issue #601. This test is really only useful if run with > 2 MPI processes. + TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, GlobalAssembleOverlappingRowMap, LO, GO, NT ) + { + typedef Tpetra::CrsGraph crs_graph_type; + typedef Tpetra::Map map_type; + int lclSuccess = 1; + int gblSuccess = 0; + + out << "Test Tpetra::CrsGraph::globalAssemble with overlapping row Map " + "(see Github Issue #601)" << endl; + Teuchos::OSTab tab1 (out); + + auto comm = getDefaultComm (); + const int myRank = comm->getRank (); + const int numProcs = comm->getSize (); + + TEST_ASSERT( numProcs > 2 ); + if (! success) { + out << "This test is not useful unless you run it with 3 or more MPI " + "processes!" << endl; + return; + } + out << "Create overlapping row Map" << endl; + + // Each process r gets 2 rows: r, and (r+1) % numProcs. + // The resulting row Map is overlapping, because numProcs > 2. + + const GST INV = Teuchos::OrdinalTraits::invalid (); + Teuchos::Array myGblRowInds (2); + myGblRowInds[0] = static_cast (myRank); + myGblRowInds[1] = static_cast ((myRank + 1) % numProcs); + const GO indexBase = 0; + RCP rowMap = + rcp (new map_type (INV, myGblRowInds (), indexBase, comm)); + + Tpetra::ProfileType profileTypes[] = { Tpetra::DynamicProfile, Tpetra::StaticProfile }; + bool insertLocalEntryValues[] = { true, false }; + + // Test both dynamic and static profile. + for (Tpetra::ProfileType profileType : profileTypes) { + + out << "ProfileType: " + << ((profileType == Tpetra::DynamicProfile) ? "Dynamic" : "Static") + << "Profile" << endl; + Teuchos::OSTab tab2 (out); + + // Test both with no local entries before globalAssemble(), and + // with some (one) local entries before globalAssemble(). This + // exercises lazy allocation of entries, if that's what CrsGraph + // does (it does this as of 09 Sep 2016, though I've been aiming + // to get rid of lazy allocation for years). + for (bool insertLocalEntry : insertLocalEntryValues) { + out << "insertLocalEntry: " << (insertLocalEntry ? "true" : "false") + << endl; + Teuchos::OSTab tab3 (out); + + const size_t maxNumEntPerRow = static_cast (insertLocalEntry ? 2 : 1); + crs_graph_type G (rowMap, maxNumEntPerRow, profileType); + + const GO gblRow0 = static_cast (myRank); + const GO gblRow1 = static_cast ((myRank + 1) % numProcs); + + if (insertLocalEntry) { + // Insert the diagonal entry in each of the rows on this + // process. This results in duplication across processes. + // globalAssemble() should do the right thing in that case. + G.insertGlobalIndices (gblRow0, tuple (gblRow0)); + G.insertGlobalIndices (gblRow1, tuple (gblRow1)); + } + // Insert an entry into row (myRank - 1) % numProcs. Add + // numProcs to (myRank - 1) before taking the mod, so it + // doesn't turn out negative. (C(++) doesn't promise that the + // result of mod is nonnegative if the input is negative.) + { + const GO nonlocalGblRow = + static_cast ((numProcs + myRank - 1) % numProcs); + // Make sure the new column index is not a duplicate (on the + // process that will receive it at globalAssemble). This is + // very much NOT a square graph (/ matrix). + const GO gblCol = static_cast (numProcs) + nonlocalGblRow; + TEST_NOTHROW( G.insertGlobalIndices (nonlocalGblRow, tuple (gblCol)) ); + } + + out << "Call G.globalAssemble()" << endl; + // This is the moment of truth. + TEST_NOTHROW( G.globalAssemble () ); + + lclSuccess = success ? 1 : 0; + gblSuccess = 0; + reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess)); + TEST_EQUALITY_CONST( gblSuccess, 1 ); + if (! success) { + out << "Test FAILED on some process; returning early." << endl; + return; + } + + out << "Now make sure each of our process' rows are correct" << endl; + + // Test gblRow0 + { + Teuchos::ArrayView gblInds; + G.getGlobalRowView (gblRow0, gblInds); + + const LO expectedNumEnt = static_cast (maxNumEntPerRow); + TEST_EQUALITY( static_cast (gblInds.size ()), expectedNumEnt ); + if (static_cast (gblInds.size ()) == expectedNumEnt) { + if (insertLocalEntry) { + auto lclEntIter = std::find (gblInds.begin (), gblInds.end (), gblRow0); + TEST_ASSERT( lclEntIter != gblInds.end () ); + } + const GO gblCol0 = gblRow0 + static_cast (numProcs); + auto nonlclEntIter = std::find (gblInds.begin (), gblInds.end (), gblCol0); + TEST_ASSERT( nonlclEntIter != gblInds.end () ); + } + } + + // Test gblRow1 + { + Teuchos::ArrayView gblInds; + G.getGlobalRowView (gblRow1, gblInds); + + const LO expectedNumEnt = static_cast (maxNumEntPerRow); + TEST_EQUALITY( static_cast (gblInds.size ()), expectedNumEnt ); + if (static_cast (gblInds.size ()) == expectedNumEnt) { + if (insertLocalEntry) { + auto lclEntIter = std::find (gblInds.begin (), gblInds.end (), gblRow1); + TEST_ASSERT( lclEntIter != gblInds.end () ); + } + const GO gblCol1 = gblRow1 + static_cast (numProcs); + auto nonlclEntIter = std::find (gblInds.begin (), gblInds.end (), gblCol1); + TEST_ASSERT( nonlclEntIter != gblInds.end () ); + } + } + + // Test across processes. + lclSuccess = success ? 1 : 0; + gblSuccess = 0; + reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess)); + TEST_EQUALITY_CONST( gblSuccess, 1 ); + if (! success) { + out << "Test FAILED on some process; returning early." << endl; + return; + } + } // insertLocalEntry + } // profileType + } + + // + // INSTANTIATIONS + // + +#define UNIT_TEST_GROUP( LO, GO, NT ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, GlobalAssembleOverlappingRowMap, LO, GO, NT ) + + TPETRA_ETI_MANGLING_TYPEDEFS() + + TPETRA_INSTANTIATE_LGN( UNIT_TEST_GROUP ) + +} // namespace (anonymous)