From 8098d56e9edea90f3bf2f7bc068d6dbaf75fa3b4 Mon Sep 17 00:00:00 2001 From: Seher Acer Date: Thu, 14 Jan 2021 01:48:49 -0500 Subject: [PATCH] Add quotient algorithm and communication graph model --- .../MueLu_Zoltan2Interface_def.hpp | 25 +- .../partition/Zoltan2_AlgParMETIS.hpp | 18 +- .../partition/Zoltan2_AlgQuotient.hpp | 186 +++++ .../Zoltan2_PartitioningAlgorithms.hpp | 1 + .../src/models/Zoltan2_CommGraphModel.hpp | 633 ++++++++++++++++++ .../zoltan2/core/src/models/Zoltan2_Model.hpp | 1 + .../problems/Zoltan2_PartitioningProblem.hpp | 49 +- .../core/src/problems/Zoltan2_Problem.hpp | 4 +- .../test/core/partition/CMakeLists.txt | 11 + .../test/core/partition/partitioning1.cpp | 30 +- 10 files changed, 924 insertions(+), 34 deletions(-) create mode 100644 packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgQuotient.hpp create mode 100644 packages/zoltan2/core/src/models/Zoltan2_CommGraphModel.hpp diff --git a/packages/muelu/src/Rebalancing/MueLu_Zoltan2Interface_def.hpp b/packages/muelu/src/Rebalancing/MueLu_Zoltan2Interface_def.hpp index 0498e497c1b1..552b2c0ad71f 100644 --- a/packages/muelu/src/Rebalancing/MueLu_Zoltan2Interface_def.hpp +++ b/packages/muelu/src/Rebalancing/MueLu_Zoltan2Interface_def.hpp @@ -256,14 +256,25 @@ namespace MueLu { const typename InputAdapterType::part_t * parts = problem->getSolution().getPartListView(); - // For blkSize > 1, ignore solution for every row but the first ones in a block. - for (GO i = 0; i < numElements/blkSize; i++) { - int partNum = parts[i*blkSize]; - - for (LO j = 0; j < blkSize; j++) - decompEntries[i*blkSize + j] = partNum; + if(algo == "quotient") { + // Quotient algorithm outputs a partition vector of size #MPIranks, + // where each rank gets a single entry locally, i.e., parts[0]. This entry + // denotes which MPI rank in the coarse system that the current MPI rank + // should migrate to as a whole. + int partNum = parts[0]; + for (GO i = 0; i < numElements; i++) { + decompEntries[i] = partNum; + } + } + else { + // For blkSize > 1, ignore solution for every row but the first ones in a block. + for (GO i = 0; i < numElements/blkSize; i++) { + int partNum = parts[i*blkSize]; + + for (LO j = 0; j < blkSize; j++) + decompEntries[i*blkSize + j] = partNum; + } } - Set(level, "Partition", decomposition); } } diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMETIS.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMETIS.hpp index 2a57becc96eb..c7915e533a1c 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMETIS.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgParMETIS.hpp @@ -61,13 +61,13 @@ // but Zoltan2 not built with ParMETIS. namespace Zoltan2 { -template +template > class AlgParMETIS : public Algorithm { public: AlgParMETIS(const RCP &/* env */, const RCP > &/* problemComm */, - const RCP > &/* model */ + const RCP &/* model */ ) { throw std::runtime_error( @@ -108,7 +108,7 @@ extern "C" { namespace Zoltan2 { -template +template > class AlgParMETIS : public Algorithm { public: @@ -135,7 +135,7 @@ class AlgParMETIS : public Algorithm */ AlgParMETIS(const RCP &env__, const RCP > &problemComm__, - const RCP &model__) : + const RCP &model__) : env(env__), problemComm(problemComm__), model(model__) { } @@ -146,7 +146,7 @@ class AlgParMETIS : public Algorithm const RCP env; const RCP > problemComm; - const RCP > model; + const RCP model; void scale_weights(size_t n, ArrayView > &fwgts, pm_idx_t *iwgts); @@ -154,8 +154,8 @@ class AlgParMETIS : public Algorithm ///////////////////////////////////////////////////////////////////////////// -template -void AlgParMETIS::partition( + template + void AlgParMETIS::partition( const RCP > &solution ) { @@ -392,8 +392,8 @@ void AlgParMETIS::partition( // rounding to zero weights. // Based on Zoltan's scale_round_weights, mode 1 -template -void AlgParMETIS::scale_weights( + template + void AlgParMETIS::scale_weights( size_t n, ArrayView > &fwgts, diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgQuotient.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgQuotient.hpp new file mode 100644 index 000000000000..0ef2d12a71b9 --- /dev/null +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_AlgQuotient.hpp @@ -0,0 +1,186 @@ +// @HEADER +// +// *********************************************************************** +// +// Zoltan2: A package of combinatorial algorithms for scientific computing +// Copyright 2012 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Karen Devine (kddevin@sandia.gov) +// Erik Boman (egboman@sandia.gov) +// Siva Rajamanickam (srajama@sandia.gov) +// +// *********************************************************************** +// +// @HEADER +#ifndef _ZOLTAN2_ALGQUOTIENT_HPP_ +#define _ZOLTAN2_ALGQUOTIENT_HPP_ + +#include +#include +#include +#include + +///////////////////////////////////////////////////////////////////////////// +//! \file Zoltan2_AlgQuotient.hpp +// +// This algorithm partitions the CommGraphModel (communication graph model) +// and transfers the solution (which is only stored on active ranks of +// CommGraphModel) to all MPI ranks. Please see model/Zoltan2_CommGraphModel.hpp +// for more details. +///////////////////////////////////////////////////////////////////////////// + +namespace Zoltan2 { + +template +class AlgQuotient : public Algorithm +{ +public: + + typedef CommGraphModel graphModel_t; + typedef typename Adapter::part_t part_t; + + /*! AlgQuotient constructor + * \param env parameters for the problem and library configuration + * \param problemComm the communicator for the problem + * \param model a graph + * \param algName the (inner) algorithm to partition the quotient graph + */ + AlgQuotient(const RCP &env__, + const RCP > &problemComm__, + const RCP &model__, + const std::string algName__) : + env(env__), problemComm(problemComm__), + model(model__) + { + + if(algName__ == "parmetis") + this->innerAlgorithm = rcp(new AlgParMETIS(env, + problemComm, + model)); + else + throw std::logic_error(algName__ + " is not implemented in AlgQuotient yet\n"); + + } + + void partition(const RCP > &solution); + void migrateBack(const RCP > &solution); + +private: + + const RCP env; + const RCP > problemComm; + const RCP model; + + RCP> innerAlgorithm; // algorithm to partition the quotient graph + RCP> quotientSolution; // the solution stored on active ranks +}; + + +///////////////////////////////////////////////////////////////////////////// +template +void AlgQuotient::partition( + const RCP > &solution +) +{ + if(model->getMigrated()){ + + // Create a different object for pre-migration solution + PartitioningSolution *soln = NULL; + try{ + soln = new PartitioningSolution(env, problemComm, 1); + } + Z2_FORWARD_EXCEPTIONS; + quotientSolution = rcp(soln); + + try { + this->innerAlgorithm->partition(quotientSolution); + } + Z2_FORWARD_EXCEPTIONS; + + // Migrate the solution + migrateBack(solution); + } + else{ + + try { + this->innerAlgorithm->partition(solution); + } + Z2_FORWARD_EXCEPTIONS; + } + +} + +// Pre-condition: +// The partitioning solution in quotientSolution is only stored on active MPI ranks, +// which is a single rank if commGraphModel has less than 1024 vertices. +// Post-condition: +// The partitioning solution in output parameter solution is distributed to all MPI ranks, +// so that each rank has a single entry of the solution. +template +void AlgQuotient::migrateBack( + const RCP > &solution +) +{ + int me = problemComm->getRank(); + int nActiveRanks = model->getNumActiveRanks(); + int dRank = model->getDestinationRank(); + + Teuchos::ArrayRCP parts(1); // Each rank has a single vertex for now. + RCP> *requests = new RCP>[1]; + + + requests[0] = Teuchos::ireceive(*problemComm, parts, dRank); + + if(me < nActiveRanks){ + + const part_t *qtntSlnView = quotientSolution->getPartListView(); + + int sRank = model->getStartRank(); + int eRank = model->getEndRank(); + + ArrayView vtxdist; + model->getVertexDist(vtxdist); + for(int i = sRank; i < eRank; i++) + Teuchos::send(*problemComm, 1, &qtntSlnView[i-sRank], i); + + } + + Teuchos::waitAll(*problemComm, Teuchos::arrayView(requests, 1)); + + solution->setParts(parts); + +} + +} // namespace Zoltan2 + +#endif diff --git a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_PartitioningAlgorithms.hpp b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_PartitioningAlgorithms.hpp index 7d445c2ce338..986ce1c6664a 100644 --- a/packages/zoltan2/core/src/algorithms/partition/Zoltan2_PartitioningAlgorithms.hpp +++ b/packages/zoltan2/core/src/algorithms/partition/Zoltan2_PartitioningAlgorithms.hpp @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/packages/zoltan2/core/src/models/Zoltan2_CommGraphModel.hpp b/packages/zoltan2/core/src/models/Zoltan2_CommGraphModel.hpp new file mode 100644 index 000000000000..eda610d48608 --- /dev/null +++ b/packages/zoltan2/core/src/models/Zoltan2_CommGraphModel.hpp @@ -0,0 +1,633 @@ +// @HEADER +// +// *********************************************************************** +// +// Zoltan2: A package of combinatorial algorithms for scientific computing +// Copyright 2012 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Karen Devine (kddevin@sandia.gov) +// Erik Boman (egboman@sandia.gov) +// Siva Rajamanickam (srajama@sandia.gov) +// +// *********************************************************************** +// +// @HEADER + +/*! CommGraphModel creates a graph representing the communication topology of +the MPI ranks for a given XpetraCrsGraphAdapter object. If there are n MPI ranks +in the given communicator, then this model contains n vertices so that each vertex +represents an MPI rank. If rank i sends a message to rank j (during the mat-vec on +the matrix corresponding to the given adapter), then there is a directed edge +from vertex i to vertex j in the graph. The size of the edge is the number of +nonzeros that cause that message. The Weight of vertex i is the number of nonzeros +currently residing at rank i. + +Since the above mentioned graph is too small, we migrate it into a subset of ranks, +which we call activeRanks. nActiveRanks_ denotes the number of active ranks and +is computed as n/threshold_. For now, this migration is mandatory but we can make it +optional (by setting a parameter and migrated_ flag). The threshold_ value can also +be parameterized. +*/ + +#ifndef _ZOLTAN2_COMMGRAPHMODEL_HPP_ +#define _ZOLTAN2_COMMGRAPHMODEL_HPP_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Zoltan2 { + +////////////////////////////////////////////////////////////////////////// +/*! \brief CommGraphModel defines the interface required for communication graph. + + The constructor of the GraphModel can be a global call, requiring + all processes in the application to call it. The rest of the + methods should be local methods. + + The template parameter is an InputAdapter, which is an object that + provides a uniform interface for models to the user's input data. + + For now, this model only works with GraphAdapter (XpetraCrsGraphAdapter). + +*/ +template +class CommGraphModel : public Model +{ +public: + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + typedef typename Adapter::scalar_t scalar_t; + typedef typename Adapter::gno_t gno_t; + typedef typename Adapter::lno_t lno_t; + typedef typename Adapter::node_t node_t; + typedef typename Adapter::user_t user_t; + typedef typename Adapter::userCoord_t userCoord_t; + typedef StridedData input_t; + typedef typename Adapter::offset_t offset_t; +#endif + + //! Destructor + ~CommGraphModel() { } + + /*! \brief Constructor + * + * \param inputAdapter a pointer to the user's data + * \param env object containing the parameters + * \param comm communicator for the problem + * + * All processes in the communicator must call the constructor. + */ + + CommGraphModel(const RCP > &ia, + const RCP &env, const RCP > &comm) + { + throw std::runtime_error("CommGraphModel is not implemented for MatrixAdapter yet."); + } + + CommGraphModel(const RCP > &ia, + const RCP &env, const RCP > &comm); + + CommGraphModel(const RCP > &ia, + const RCP &env, const RCP > &comm) + { + throw std::runtime_error("CommGraphModel is not implemented for MeshAdapter yet."); + } + + CommGraphModel(const RCP > &/* ia */, + const RCP &/* env */, const RCP > &/* comm */) + { + throw std::runtime_error("cannot build CommGraphModel from VectorAdapter"); + } + + CommGraphModel(const RCP > &ia, + const RCP &env, const RCP > &comm) + { + throw std::runtime_error("cannot build GraphModel from IdentifierAdapter"); + } + + /*! \brief Return the communicator used by the model + */ + const RCP > getComm() { return comm_; } + + /*! \brief Returns the number vertices on this process. + */ + size_t getLocalNumVertices() const { return nLocalVertices_; } + + /*! \brief Returns the global number vertices. + */ + size_t getGlobalNumVertices() const { return nGlobalVertices_; } + + /*! \brief Returns the number of edges on this process. + * In global or subset graphs, includes off-process edges. + */ + size_t getLocalNumEdges() const { return nLocalEdges_; } + + /*! \brief Returns the global number edges. + * For local graphs, the number of global edges is the number of local edges. + */ + size_t getGlobalNumEdges() const { return nGlobalEdges_; } + + /*! \brief Returns the number (0 or greater) of weights per vertex + */ + int getNumWeightsPerVertex() const { return nWeightsPerVertex_; } + + /*! \brief Returns the number (0 or greater) of weights per edge. + */ + int getNumWeightsPerEdge() const { return nWeightsPerEdge_; } + + + /*! \brief Sets pointers to this process' vertex Ids and their weights. + + \param Ids will on return point to the list of the global Ids for + each vertex on this process. + \param wgts If vertex weights is available, \c wgts + will on return point to a StridedData object of weights. + */ + size_t getVertexList( + ArrayView &Ids, + ArrayView &wgts) const + { + Ids = vGids_.view(0, nLocalVertices_); + wgts = vWeights_.view(0, nWeightsPerVertex_); + return nLocalVertices_; + } + + // Implied Vertex LNOs from getVertexList are used as indices to offsets + // array. + // Vertex GNOs are returned as neighbors in edgeIds. + + size_t getEdgeList( ArrayView &edgeIds, + ArrayView &offsets, + ArrayView &wgts) const + { + edgeIds = eGids_.view(0, nLocalEdges_); + offsets = eOffsets_.view(0, nLocalVertices_+1); + wgts = eWeights_.view(0, nWeightsPerEdge_); + return nLocalEdges_; + } + + /*! \brief Return the vtxDist array + * Array of size comm->getSize() + 1 + * Array[n+1] - Array[n] is number of vertices on rank n + */ + inline void getVertexDist(ArrayView &vtxdist) const + { + vtxdist = vtxDist_(); + if (vtxDist_.size() == 0) { + throw std::runtime_error("getVertexDist is available only " + "when consecutiveIdsRequired"); + } + } + + //////////////////////////////////////////////////// + // The Model interface. + //////////////////////////////////////////////////// + + size_t getLocalNumObjects() const { return nLocalVertices_; } + + size_t getGlobalNumObjects() const { return nGlobalVertices_; } + + //////////////////////////////////////////////////// + // Migration-related functions. + //////////////////////////////////////////////////// + + bool getMigrated() const { return migrated_; } + + int getNumActiveRanks() const { return nActiveRanks_; } + + int getDestinationRank() const { return destRank_; } + + int getStartRank() const { return startRank_; } + + int getEndRank() const { return endRank_; } + +private: + + void print(); // For debugging + void migrateGraph(); // For debugging + + const RCP env_; + const RCP > comm_; + + int threshold_; // threshold on #vertices each rank stores post-migration + bool migrated_; // Flag indicating whether this graph is + // has been migrated into a smaller set of + // MPI ranks: 0, 1, ... , nActiveRanks + int nActiveRanks_ ; // # ranks for the small graph to be partitioned on + int destRank_, startRank_, endRank_; + + + size_t nLocalVertices_; // # local vertices in built graph + size_t nGlobalVertices_; // # global vertices in built graph + ArrayRCP vGids_; // vertices of graph built in model; + // may be same as adapter's input + // or may be renumbered 0 to (N-1). + + int nWeightsPerVertex_; + ArrayRCP vWeights_; + + // Note: in some cases, size of these arrays + // may be larger than nLocalEdges_. So do not use .size(). + // Use nLocalEdges_, nGlobalEdges_ + + size_t nLocalEdges_; // # local edges in built graph + size_t nGlobalEdges_; // # global edges in built graph + ArrayRCP eGids_; // edges of graph built in model + ArrayRCP eOffsets_; // edge offsets build in model + // May be same as adapter's input + // or may differ + // due to renumbering, self-edge + // removal, or local graph. + + int nWeightsPerEdge_; + ArrayRCP eWeights_; // edge weights in built graph + // May be same as adapter's input + // or may differ due to self-edge + // removal, or local graph. + + ArrayRCP vtxDist_; // If consecutiveIdsRequired, + // vtxDist (as needed by ParMETIS + // and Scotch) is also created. + // Otherwise, it is Teuchos::null. +}; + + +//////////////////////////////////////////////////////////////// +// GraphModel from GraphAdapter +template +CommGraphModel::CommGraphModel( + const RCP > &bia, + const RCP &env, + const RCP > &comm): + env_(env), + comm_(comm), + migrated_(false), + nLocalVertices_(0), + nGlobalVertices_(0), + vGids_(), + nWeightsPerVertex_(0), + vWeights_(), + nLocalEdges_(0), + nGlobalEdges_(0), + eGids_(), + eOffsets_(), + nWeightsPerEdge_(0), + eWeights_(), + vtxDist_() +{ + int me = comm_->getRank(); + int commSize = comm_->getSize(); + + // Get XpetraCrsGraphAdapter from GraphAdapter + RCP> ia; + try{ + RCP> tmp = + rcp_const_cast>(bia); + ia = rcp_dynamic_cast>(tmp); + } + Z2_FORWARD_EXCEPTIONS; + + // Get the graph from the input adapter + auto inGraph = ia->getUserGraph(); + + // Get the importer of the graph + auto imp = inGraph->getImporter(); + + // Identify nbor PIDs and number of entries sent per PID + std::map exportpidmap; + auto exportpids = imp->getExportPIDs(); + size_t nexportpids = imp->getNumExportIDs(); + for (size_t i = 0; i < nexportpids; i++) { + int k = exportpids[i]; + if (exportpidmap.find(k) != exportpidmap.end()) + exportpidmap[k] = exportpidmap[k] + 1.; + else + exportpidmap[k] = 1.; + } + + // Set sizes + // There is only one vertex in each rank + nLocalVertices_ = 1; + nLocalEdges_ = exportpidmap.size(); + + // Allocate space + vGids_ = arcp(new gno_t[nLocalVertices_], + 0, nLocalVertices_, true); + eGids_ = arcp(new gno_t[nLocalEdges_], + 0, nLocalEdges_, true); + eOffsets_ = arcp(new offset_t[nLocalVertices_+1], + 0, nLocalVertices_+1, true); + scalar_t *wgts2 = new scalar_t [nLocalEdges_]; + + // Form the vertices + vGids_[0] = comm->getRank(); + + // Form the edges + size_t ptr = 0; + eOffsets_[0] = ptr; + for (std::map::iterator it = exportpidmap.begin(); + it != exportpidmap.end(); it++) { + eGids_[ptr] = it->first; + wgts2[ptr++] = it->second; + } + eOffsets_[nLocalVertices_] = ptr; + + // Edge weights + nWeightsPerEdge_ = 1; + input_t *wgts = new input_t [nWeightsPerEdge_]; + eWeights_ = arcp(wgts, 0, nWeightsPerEdge_, true); + + for (int w=0; w < nWeightsPerEdge_; w++){ + int stride=0; + ArrayRCP wgtArray = arcp(wgts2, 0, nLocalEdges_, true); + eWeights_[w] = input_t(wgtArray, stride); + } + + // Vertex weights + nWeightsPerVertex_ = 1; + input_t *weightInfo = new input_t [nWeightsPerVertex_]; + + for (int idx=0; idx < nWeightsPerVertex_; idx++){ + scalar_t *wgt = new scalar_t [nLocalVertices_]; + wgt[0] = inGraph->getNodeNumEntries(); + ArrayRCP wgtArray = arcp(wgt, 0, nLocalVertices_, true); + weightInfo[idx] = input_t(wgtArray, 1); + } + + vWeights_ = arcp(weightInfo, 0, nWeightsPerVertex_, true); + + reduceAll(*comm_, Teuchos::REDUCE_SUM, 1, + &nLocalVertices_, &nGlobalVertices_); + reduceAll(*comm_, Teuchos::REDUCE_SUM, 1, + &nLocalEdges_, &nGlobalEdges_); + + // Build vtxDist_ array starting with vGid on each rank + vtxDist_ = arcp(new size_t[commSize+1], 0, commSize+1, true); + vtxDist_[0] = 0; + Teuchos::gatherAll(*comm_, 1, &nLocalVertices_, commSize, &vtxDist_[1]); + for (int i = 0; i < commSize; i++) + vtxDist_[i+1] += vtxDist_[i]; + + // Migrate the quotient graph into smaller number of MPI ranks + migrateGraph(); + migrated_ = true; + +} + +template +void CommGraphModel::migrateGraph() +{ + // Set default threshold for migration + // We may want to get this from the parameter list + threshold_ = 1024; + + // Compute the sizes of/in the new distribution + nActiveRanks_ = std::ceil((double) nGlobalVertices_ / threshold_); + size_t avgVertexShare = nGlobalVertices_ / nActiveRanks_; + size_t myVertexShare = 0; + + int me = comm_->getRank(); + int commSize = comm_->getSize(); + + // Save the original pointers + ArrayRCP old_eOffsets_ = eOffsets_; + ArrayRCP old_eGids_ = eGids_; + size_t old_nLocalEdges_ = nLocalEdges_; + ArrayRCP old_vWeights_ = vWeights_; + ArrayRCP old_eWeights_ = eWeights_; + + // Compute whom to send to + destRank_ = me / (int) avgVertexShare; + if(destRank_ >= nActiveRanks_) + destRank_ = nActiveRanks_ - 1; + + // Start with sending the size of the edge list + RCP> *requests; + if(me < nActiveRanks_) { + + // Determine the range of ranks to receive edges from + // Needs to be updated when chunks are introduced + startRank_ = me * static_cast(avgVertexShare); + endRank_ = (me+1) * static_cast(avgVertexShare); + if(me == nActiveRanks_ - 1 ) // Last rank gets the surplus + endRank_ = static_cast(nGlobalVertices_); + myVertexShare = endRank_ - startRank_; + + eOffsets_ = arcp(new offset_t[myVertexShare+1], 0, myVertexShare+1, true); + eOffsets_[0] = 0; + + // Receive the sizes of their edge list + requests = new RCP>[myVertexShare]; + for(int i = startRank_; i < endRank_; i++) { + requests[i-startRank_] = Teuchos::ireceive(*comm_, + arcp(&eOffsets_[i-startRank_+1], 0, 1, false), + i); + } + + // Send adjacency size even though this rank will remain active + Teuchos::send(*comm_, 1, &old_eOffsets_[nLocalVertices_], destRank_); + + // Wait + Teuchos::waitAll(*comm_, Teuchos::arrayView(requests, myVertexShare)); + + // Prefix sum over the offsets + for(int i = 1; i <= myVertexShare; i++) + eOffsets_[i] += eOffsets_[i-1]; + + // Recompute the number of local edges + nLocalEdges_ = eOffsets_[myVertexShare]; + + // Reallocate the adjacency array + eGids_ = arcp(new gno_t[nLocalEdges_], 0, nLocalEdges_, true); + + + // Receive the adjacency lists + for(int i = startRank_; i < endRank_; i++) { + offset_t adjStartRank_ = eOffsets_[i-startRank_]; + offset_t adjSize = eOffsets_[i-startRank_+1] - adjStartRank_; + requests[i-startRank_] = Teuchos::ireceive(*comm_, + arcp(&eGids_[adjStartRank_], 0, adjSize, false), + i); + } + + // Send adjacency even though this rank will remain active + Teuchos::send(*comm_, old_nLocalEdges_, &old_eGids_[0], destRank_); + Teuchos::waitAll(*comm_, Teuchos::arrayView(requests, myVertexShare)); + + + // Migrate vertex weights arrays + scalar_t *wgts = new scalar_t [myVertexShare]; + for(int i = startRank_; i < endRank_; i++) { + requests[i-startRank_] = Teuchos::ireceive(*comm_, + arcp(&wgts[i-startRank_], 0, 1, false), // assumes one vertex per rank + i); + } + + const scalar_t *wPtr; + size_t wLen = 0; + int stride = 0; + old_vWeights_[0].getStridedList(wLen, wPtr, stride); + Teuchos::send(*comm_, nLocalVertices_, wPtr, destRank_); + + Teuchos::waitAll(*comm_, Teuchos::arrayView(requests, myVertexShare)); + + input_t *weightInfo = new input_t [nWeightsPerVertex_]; + for (int idx=0; idx < nWeightsPerVertex_; idx++){ + ArrayRCP wgtArray = arcp(wgts, 0, myVertexShare, true); + weightInfo[idx] = input_t(wgtArray, 1); + } + vWeights_ = arcp(weightInfo, 0, nWeightsPerVertex_, true); + + // Migrate edge weights arrays + scalar_t *ewgts = new scalar_t [nLocalEdges_]; + for(int i = startRank_; i < endRank_; i++) { + offset_t adjStartRank_ = eOffsets_[i-startRank_]; + offset_t adjSize = eOffsets_[i-startRank_+1] - adjStartRank_; + requests[i-startRank_] = Teuchos::ireceive(*comm_, + arcp(&ewgts[adjStartRank_], 0, adjSize, false), // assumes one vertex per rank + i); + } + + old_eWeights_[0].getStridedList(wLen, wPtr, stride); + Teuchos::send(*comm_, old_nLocalEdges_, wPtr, destRank_); + + Teuchos::waitAll(*comm_, Teuchos::arrayView(requests, myVertexShare)); + + input_t *eweightInfo = new input_t [nWeightsPerEdge_]; + for (int idx=0; idx < nWeightsPerEdge_; idx++){ + ArrayRCP ewgtArray = arcp(ewgts, 0, nLocalEdges_, true); + eweightInfo[idx] = input_t(ewgtArray, 1); + } + eWeights_ = arcp(eweightInfo, 0, nWeightsPerEdge_, true); + + + // Finalize the migration + vGids_ = arcp(new gno_t[myVertexShare], 0, myVertexShare, true); + for(int i = startRank_; i < endRank_; i++) + vGids_[i-startRank_] = i; + + nLocalVertices_ = myVertexShare; + + + } + else { + + // Send adjacency size + Teuchos::send(*comm_, 1, &eOffsets_[nLocalVertices_], destRank_); + + // Send adjacency list + Teuchos::send(*comm_, nLocalEdges_, &eGids_[0], destRank_); + + // Send vertex weights list + const scalar_t *wPtr; + size_t wLen = 0; + int stride = 0; + vWeights_[0].getStridedList(wLen, wPtr, stride); + Teuchos::send(*comm_, nLocalVertices_, wPtr, destRank_); + + // Send edge weights list + eWeights_[0].getStridedList(wLen, wPtr, stride); + Teuchos::send(*comm_, nLocalEdges_, wPtr, destRank_); + + nLocalVertices_ = 0; + } + + for (int i = 0; i <= commSize; i++) + vtxDist_[i] = 0; + + Teuchos::gatherAll(*comm_, 1, &nLocalVertices_, commSize, &vtxDist_[1]); + for (int i = 0; i < commSize; i++) + vtxDist_[i+1] += vtxDist_[i]; + +} + +////////////////////////////////////////////////////////////////////////// +template +void CommGraphModel::print() +{ + std::ostream *os = env_->getDebugOStream(); + + int me = comm_->getRank(); + + *os << me + << " Nvtx " << nLocalVertices_ + << " Nedge " << nLocalEdges_ + << " NVWgt " << nWeightsPerVertex_ + << " NEWgt " << nWeightsPerEdge_ + << std::endl; + + for (size_t i = 0; i < nLocalVertices_; i++) { + *os << me << " " << i << " GID " << vGids_[i] << ": "; + for (offset_t j = eOffsets_[i]; j < eOffsets_[i+1]; j++) + *os << eGids_[j] << " " ; + *os << std::endl; + } + + if (nWeightsPerVertex_) { + for (size_t i = 0; i < nLocalVertices_; i++) { + *os << me << " " << i << " VWGTS " << vGids_[i] << ": "; + for (int j = 0; j < nWeightsPerVertex_; j++) + *os << vWeights_[j][i] << " "; + *os << std::endl; + } + } + + if (nWeightsPerEdge_) { + for (size_t i = 0; i < nLocalVertices_; i++) { + *os << me << " " << i << " EWGTS " << vGids_[i] << ": "; + for (offset_t j = eOffsets_[i]; j < eOffsets_[i+1]; j++) { + *os << eGids_[j] << " ("; + for (int w = 0; w < nWeightsPerEdge_; w++) + *os << eWeights_[w][j] << " "; + *os << ") "; + } + *os << std::endl; + } + } + +} + +} // namespace Zoltan2 + + +#endif + diff --git a/packages/zoltan2/core/src/models/Zoltan2_Model.hpp b/packages/zoltan2/core/src/models/Zoltan2_Model.hpp index 2edab6b15344..06f0adead2d2 100644 --- a/packages/zoltan2/core/src/models/Zoltan2_Model.hpp +++ b/packages/zoltan2/core/src/models/Zoltan2_Model.hpp @@ -61,6 +61,7 @@ enum ModelType { HypergraphModelType, GraphModelType, + CommGraphModelType, CoordinateModelType, IdentifierModelType, MAX_NUM_MODEL_TYPES diff --git a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningProblem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningProblem.hpp index 45646bfba895..e25761d98f4f 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_PartitioningProblem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_PartitioningProblem.hpp @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -271,7 +272,7 @@ class PartitioningProblem : public Problem // didn't want to have low level changes with this particular refactor // TO DO: Add more Tuple constructors and then redo this code to be // Teuchos::tuple algorithm_names( "rcb", "multijagged" ... ); - Array algorithm_names(18); + Array algorithm_names(19); algorithm_names[0] = "rcb"; algorithm_names[1] = "multijagged"; algorithm_names[2] = "rib"; @@ -280,16 +281,17 @@ class PartitioningProblem : public Problem algorithm_names[5] = "phg"; algorithm_names[6] = "metis"; algorithm_names[7] = "parmetis"; - algorithm_names[8] = "pulp"; - algorithm_names[9] = "parma"; - algorithm_names[10] = "scotch"; - algorithm_names[11] = "ptscotch"; - algorithm_names[12] = "block"; - algorithm_names[13] = "cyclic"; - algorithm_names[14] = "sarma"; - algorithm_names[15] = "random"; - algorithm_names[16] = "zoltan"; - algorithm_names[17] = "forTestingOnly"; + algorithm_names[8] = "quotient"; + algorithm_names[9] = "pulp"; + algorithm_names[10] = "parma"; + algorithm_names[11] = "scotch"; + algorithm_names[12] = "ptscotch"; + algorithm_names[13] = "block"; + algorithm_names[14] = "cyclic"; + algorithm_names[15] = "sarma"; + algorithm_names[16] = "random"; + algorithm_names[17] = "zoltan"; + algorithm_names[18] = "forTestingOnly"; RCP algorithm_Validator = Teuchos::rcp( new Teuchos::StringValidator( algorithm_names )); pl.set("algorithm", "random", "partitioning algorithm", @@ -565,10 +567,17 @@ void PartitioningProblem::solve(bool updateInputData) this->baseInputAdapter_)); } else if (algName_ == std::string("parmetis")) { - this->algorithm_ = rcp(new AlgParMETIS(this->envConst_, + using model_t = GraphModel; + this->algorithm_ = rcp(new AlgParMETIS(this->envConst_, this->comm_, this->graphModel_)); } + else if (algName_ == std::string("quotient")) { + this->algorithm_ = rcp(new AlgQuotient(this->envConst_, + this->comm_, + this->commGraphModel_, + "parmetis")); // The default alg. to use inside Quotient + } // is ParMETIS for now. else if (algName_ == std::string("pulp")) { this->algorithm_ = rcp(new AlgPuLP(this->envConst_, this->comm_, @@ -826,6 +835,13 @@ void PartitioningProblem::createPartitioningProblem(bool newData) removeSelfEdges = true; needConsecutiveGlobalIds = true; } + else if (algorithm == std::string("quotient")) + { + modelAvail_[CommGraphModelType] = true; + algName_ = algorithm; + removeSelfEdges = true; + needConsecutiveGlobalIds = true; + } else if (algorithm == std::string("scotch") || algorithm == std::string("ptscotch")) // BDD: Don't construct graph for scotch here { @@ -1003,7 +1019,7 @@ void PartitioningProblem::createPartitioningProblem(bool newData) this->env_->debug(DETAILED_STATUS, " models"); // if (modelType_ == GraphModelType) - if (modelAvail_[GraphModelType]==true) + if (modelAvail_[GraphModelType]==true || modelAvail_[CommGraphModelType]==true) { // Any parameters in the graph sublist? @@ -1129,6 +1145,13 @@ void PartitioningProblem::createPartitioningProblem(bool newData) this->identifierModel_); } + if(modelAvail_[CommGraphModelType]==true) + { + this->env_->debug(DETAILED_STATUS, " building communication graph model"); + this->commGraphModel_ = rcp(new CommGraphModel( + this->baseInputAdapter_, this->envConst_, this->comm_)); + } + this->env_->memory("After creating Model"); this->env_->debug(DETAILED_STATUS, "createPartitioningProblem done"); } diff --git a/packages/zoltan2/core/src/problems/Zoltan2_Problem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_Problem.hpp index 80aa5afb3b71..c7d1a1bad92f 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_Problem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_Problem.hpp @@ -52,6 +52,7 @@ #include #include +#include #include #include #include @@ -199,7 +200,8 @@ class Problem : public ProblemRoot { RCP inputAdapter_; RCP baseInputAdapter_; - RCP > graphModel_; + RCP > graphModel_; + RCP > commGraphModel_; RCP > identifierModel_; RCP > coordinateModel_; diff --git a/packages/zoltan2/test/core/partition/CMakeLists.txt b/packages/zoltan2/test/core/partition/CMakeLists.txt index e096fbd94d7a..7a784aceb77c 100644 --- a/packages/zoltan2/test/core/partition/CMakeLists.txt +++ b/packages/zoltan2/test/core/partition/CMakeLists.txt @@ -561,6 +561,17 @@ TRIBITS_ADD_TEST( FAIL_REGULAR_EXPRESSION "FAIL" ) +TRIBITS_ADD_TEST( + Partitioning1 + NAME Partitioning1_Quotient + NUM_MPI_PROCS 4 + COMM serial mpi + ARGS + "--inputFile=simple --method=quotient --nparts=2" + PASS_REGULAR_EXPRESSION "PASS" + FAIL_REGULAR_EXPRESSION "FAIL" + ) + IF (Tpetra_INST_DOUBLE) # Double needed for file I/O. TRIBITS_ADD_EXECUTABLE( diff --git a/packages/zoltan2/test/core/partition/partitioning1.cpp b/packages/zoltan2/test/core/partition/partitioning1.cpp index 4cc98899a2a9..4214a605672c 100644 --- a/packages/zoltan2/test/core/partition/partitioning1.cpp +++ b/packages/zoltan2/test/core/partition/partitioning1.cpp @@ -104,6 +104,7 @@ int main(int narg, char** arg) bool verbose = false; // Verbosity of output bool distributeInput = true; bool haveFailure = false; + int nParts = -1; int nVwgts = 0; int nEwgts = 0; int testReturn = 0; @@ -126,6 +127,8 @@ int main(int narg, char** arg) "echoing the input/generated matrix."); cmdp.setOption("method", &method, "Partitioning method to use: scotch or parmetis."); + cmdp.setOption("nparts", &nParts, + "Number of parts being requested"); cmdp.setOption("vertexWeights", &nVwgts, "Number of weights to generate for each vertex"); cmdp.setOption("edgeWeights", &nEwgts, @@ -206,6 +209,13 @@ int main(int narg, char** arg) params.set("partitioning_approach", "partition"); params.set("algorithm", method); + ////// Set number of parts if specified + if(nParts > 0) { + params.set("num_global_parts", nParts); + if(nParts != comm->getSize()) + distributeInput= false; + } + ////// Create an input adapter for the graph of the Tpetra matrix. SparseGraphAdapter adapter(origMatrix->getCrsGraph(), nVwgts, nEwgts); @@ -306,12 +316,17 @@ int main(int narg, char** arg) << " FAIL" << std::endl; return -1; } - + ///// Basic metric checking of the partitioning solution ///// Not ordinarily done in application code; just doing it for testing here. size_t checkNparts = comm->getSize(); + size_t checkLength = origMatrix->getNodeNumRows(); - size_t checkLength = origMatrix->getNodeNumRows(); + if(method == "quotient") { + checkNparts = nParts; + checkLength = 1; //assuming one vertex per rank in the quotient model + } + const SparseGraphAdapter::part_t *checkParts = problem.getSolution().getPartListView(); // Check for load balance @@ -333,6 +348,13 @@ int main(int narg, char** arg) wtPerPart[checkParts[i]*nVwgts+j] += origMatrix->getNumEntriesInLocalRow(i); } } + + // The rest of the checks do not apply to the quotient algorithm + if(method == "quotient") { + std::cout << "PASS" << std::endl; + return testReturn; + } + Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, checkNparts, countPerPart, globalCountPerPart); Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, @@ -348,7 +370,7 @@ int main(int narg, char** arg) if (globalCountPerPart[i] > max) {max = globalCountPerPart[i]; maxrank = i;} sum += globalCountPerPart[i]; } - + if (me == 0) { float avg = (float) sum / (float) checkNparts; std::cout << "Minimum count: " << min << " on rank " << minrank << std::endl; @@ -468,6 +490,6 @@ int main(int narg, char** arg) if (!haveFailure) std::cout << "PASS" << std::endl; } - + return testReturn; }