diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybrid2GL.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybrid2GL.hpp index a4e1ded6aed9..580219b6d3b9 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybrid2GL.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybrid2GL.hpp @@ -270,7 +270,7 @@ class AlgTwoGhostLayer : public Algorithm { RCP env; RCP > comm; bool verbose; - + bool timing; private: //This function constructs a CSR with complete adjacency information for @@ -688,8 +688,8 @@ class AlgTwoGhostLayer : public Algorithm { std::vector recvcnts(comm->getSize(), 0); Teuchos::ArrayView recvcnts_view = Teuchos::arrayViewFromVector(recvcnts); - //if we're computing statistics, remove the computation imbalance from the comm timer - if(verbose) comm->barrier(); + //if we're reporting times, remove the computation imbalance from the comm timer + if(timing) comm->barrier(); double comm_total = 0.0; double comm_temp = timer(); @@ -720,6 +720,7 @@ class AlgTwoGhostLayer : public Algorithm { const RCP > &comm_) : adapter(adapter_), pl(pl_), env(env_), comm(comm_){ verbose = pl->get("verbose",false); + timing = pl->get("timing", false); modelFlag_t flags; flags.reset(); buildModel(flags); @@ -1071,7 +1072,7 @@ class AlgTwoGhostLayer : public Algorithm { double conflict_detection = 0.0; //Number of rounds we are saving statistics for - //100 is a decent default. + //100 is a decent default. Reporting requires --verbose argument. const int numStatisticRecordingRounds = 100; //includes all ghosts, including the second layer. @@ -1322,7 +1323,7 @@ class AlgTwoGhostLayer : public Algorithm { //Done initializing, start coloring! //use a barrier if we are reporting timing info - if(verbose) comm->barrier(); + if(timing) comm->barrier(); interior_time = timer(); total_time = timer(); //give the entire local graph to KokkosKernels to color @@ -1427,7 +1428,7 @@ class AlgTwoGhostLayer : public Algorithm { vertsPerRound[distributedRounds] = verts_to_recolor_size_host(0); } - if(verbose) comm->barrier(); + if(timing) comm->barrier(); double recolor_temp = timer(); //recolor using KokkosKernels' coloring function if(verts_to_recolor_size_host(0) > 0){ @@ -1437,10 +1438,13 @@ class AlgTwoGhostLayer : public Algorithm { if(distributedRounds < numStatisticRecordingRounds){ recoloringPerRound[distributedRounds] = timer() - recolor_temp; recoloring_time += recoloringPerRound[distributedRounds]; - total_time += recoloringPerRound[distributedRounds]; comp_time += recoloringPerRound[distributedRounds]; compPerRound[distributedRounds] = recoloringPerRound[distributedRounds]; totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds]; + } else if(timing){ + double recoloring_round_time = timer() - recolor_temp; + recoloring_time += recoloring_round_time; + comp_time += recoloring_round_time; } //reset the ghost colors to what they were before recoloring @@ -1461,10 +1465,6 @@ class AlgTwoGhostLayer : public Algorithm { commPerRound[distributedRounds] = curr_comm_time; recvPerRound[distributedRounds] = recv; sentPerRound[distributedRounds] = sent; - if(verbose) { - std::cout<getRank()<<": total sent in round "<getRank()<<": total recv in round "< { compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds]; totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds]; comp_time += conflictDetectionPerRound[distributedRounds]; - } + } else if(timing){ + double conflict_detection_round_time = timer() - detection_temp; + conflict_detection += conflict_detection_round_time; + comp_time += conflict_detection_round_time; + } distributedRounds++; size_t localDone = recoloringSize_host(0); @@ -1535,10 +1539,9 @@ class AlgTwoGhostLayer : public Algorithm { if(distributedRounds < numStatisticRecordingRounds){ vertsPerRound[distributedRounds] = recoloringSize_host(0); } - if(verbose){ - std::cout<getRank()<<": starting to recolor, serial\n"; - comm->barrier(); - } + if(verbose) std::cout<getRank()<<": starting to recolor, serial\n"; + if(timing) comm->barrier(); + double recolor_temp = timer(); if(verts_to_recolor_size_host(0) > 0){ this->colorInterior_serial(femv_colors.size(), dist_adjs_host, dist_offsets_host, femv, @@ -1547,10 +1550,13 @@ class AlgTwoGhostLayer : public Algorithm { if(distributedRounds < numStatisticRecordingRounds){ recoloringPerRound[distributedRounds] = timer() - recolor_temp; recoloring_time += recoloringPerRound[distributedRounds]; - total_time += recoloringPerRound[distributedRounds]; comp_time += recoloringPerRound[distributedRounds]; compPerRound[distributedRounds] = recoloringPerRound[distributedRounds]; totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds]; + } else if(timing){ + double recoloring_serial_round_time = timer() - recolor_temp; + recoloring_time += recoloring_serial_round_time; + comp_time += recoloring_serial_round_time; } //reset the ghost colors to their previous values to avoid @@ -1566,10 +1572,6 @@ class AlgTwoGhostLayer : public Algorithm { commPerRound[distributedRounds] = curr_comm_time; recvPerRound[distributedRounds] = recv; sentPerRound[distributedRounds] = sent; - if(verbose) { - std::cout<getRank()<<": total sent in round "<getRank()<<": total recv in round "< { ghost_colors_host(i) = colors_host(i+n_local); } - if(verbose) comm->barrier(); + if(timing) comm->barrier(); double detection_temp = timer(); //zero these out, they'll be updated by detectConflicts_serial @@ -1599,7 +1601,12 @@ class AlgTwoGhostLayer : public Algorithm { compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds]; totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds]; comp_time += conflictDetectionPerRound[distributedRounds]; - } + } else if(timing){ + double conflict_detection_serial_round_time = timer() - detection_temp; + conflict_detection += conflict_detection_serial_round_time; + comp_time += conflict_detection_serial_round_time; + } + size_t globalDone = 0; size_t localDone = recoloringSize_host(0); Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &localDone, &globalDone); @@ -1656,6 +1663,7 @@ class AlgTwoGhostLayer : public Algorithm { Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,recvPerRound,finalRecvPerRound); Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,sentPerRound,finalSentPerRound); printf("Rank %d: boundary size: %ld\n",comm->getRank(),localBoundaryVertices); + if(comm->getRank() == 0) printf("Total boundary size: %ld\n",totalBoundarySize); for(int i = 0; i < std::min((int)distributedRounds,numStatisticRecordingRounds); i++){ printf("Rank %d: recolor %ld vertices in round %d\n",comm->getRank(), vertsPerRound[i],i); printf("Rank %d: sentbuf had %lld entries in round %d\n", comm->getRank(), sentPerRound[i],i); @@ -1672,6 +1680,7 @@ class AlgTwoGhostLayer : public Algorithm { printf("comp time in round %d: %f\n",i,finalCompPerRound[i]); } } + } else if (timing){ double global_total_time = 0.0; double global_recoloring_time = 0.0; double global_min_recoloring_time = 0.0; @@ -1689,7 +1698,6 @@ class AlgTwoGhostLayer : public Algorithm { comm->barrier(); fflush(stdout); if(comm->getRank()==0){ - printf("Boundary size: %ld\n",totalBoundarySize); printf("Total Time: %f\n",global_total_time); printf("Interior Time: %f\n",global_interior_time); printf("Recoloring Time: %f\n",global_recoloring_time); diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1-2GL.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1-2GL.hpp index d0d846252c06..1885f42714ee 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1-2GL.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1-2GL.hpp @@ -210,7 +210,9 @@ class AlgDistance1TwoGhostLayer : public AlgTwoGhostLayer { } },recoloringSize(0)); Kokkos::fence(); - Kokkos::parallel_for(femv_colors.size(), KOKKOS_LAMBDA (const size_t& i){ + Kokkos::parallel_for("rebuild verts_to_send and verts_to_recolor", + Kokkos::RangePolicy(0,femv_colors.size()), + KOKKOS_LAMBDA (const size_t& i){ if(femv_colors(i) == 0){ if(i < n_local){ verts_to_send_view(verts_to_send_size_atomic(0)++) = i; diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1.hpp index 9c0689e97abd..36f1d46f8054 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD1.hpp @@ -232,11 +232,15 @@ class AlgDistance1 : public Algorithm } } },recoloringSize(0)); - Kokkos::parallel_for(n_local, KOKKOS_LAMBDA(const int& i){ + Kokkos::fence(); + Kokkos::parallel_for("Rebuild verts_to_send_view", + Kokkos::RangePolicy(0,n_local), + KOKKOS_LAMBDA(const int& i){ if(femv_colors(i) == 0){ verts_to_send_view(verts_to_send_size_atomic(0)++) = i; } }); + Kokkos::fence(); } private: @@ -317,8 +321,8 @@ class AlgDistance1 : public Algorithm std::vector recvcnts(comm->getSize(), 0); Teuchos::ArrayView recvcnts_view = Teuchos::arrayViewFromVector(recvcnts); - //if we're computing statistics, remove the computation imbalance from the comm timer. - if(verbose) comm->barrier(); + //if we're reporting timings, remove the computation imbalance from the comm timer. + if(timing) comm->barrier(); double comm_total = 0.0; double comm_temp = timer(); @@ -346,7 +350,7 @@ class AlgDistance1 : public Algorithm RCP env; RCP > comm; bool verbose; - + bool timing; public: //constructor for the hybrid distributed distance-1 algorithm AlgDistance1( @@ -356,6 +360,7 @@ class AlgDistance1 : public Algorithm const RCP > &comm_) : adapter(adapter_), pl(pl_), env(env_), comm(comm_) { verbose = pl->get("verbose",false); + timing = pl->get("timing", false); if(verbose) std::cout<getRank()<<": inside coloring constructor\n"; modelFlag_t flags; flags.reset(); @@ -724,18 +729,18 @@ class AlgDistance1 : public Algorithm if(verbose) std::cout<getRank()<<": Coloring interior\n"; //initialize interior and total timers, barrier to prevent any imbalance from setup. //Only use a barrier if timing is happening. - if(verbose) comm->barrier(); + if(timing) comm->barrier(); interior_time = timer(); total_time = timer(); //call the KokkosKernels coloring function with the Tpetra default spaces. bool use_vbbit = (global_max_degree < 6000); this->colorInterior (nVtx, dist_adjs, dist_offsets, femv,dist_adjs,0,use_vbbit); - if(verbose){ + if(timing){ interior_time = timer() - interior_time; comp_time = interior_time; - std::cout<getRank()<<": Going to recolor\n"; } + if(verbose) std::cout<getRank()<<": Going to recolor\n"; bool recolor_degrees = this->pl->template get("recolor_degrees", true); //if there is more than a single process, check distributed conflicts and recolor @@ -819,7 +824,6 @@ class AlgDistance1 : public Algorithm if(distributedRounds < numStatisticRecordingRounds) { vertsPerRound[distributedRounds] = recoloringSize_host(0); } - if(verbose) std::cout<getRank()<<": starting to recolor\n"; //copying the send view to the recolor view is necessary because //KokkosKernels can change the view passed in, and we need the send view @@ -844,9 +848,12 @@ class AlgDistance1 : public Algorithm comp_time += recoloringPerRound[distributedRounds]; compPerRound[distributedRounds] = recoloringPerRound[distributedRounds]; totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds]; + } else if(timing) { + double recolor_round_time = timer() - recolor_temp; + recoloring_time += recolor_round_time; + comp_time += recolor_round_time; } - if(verbose) std::cout<getRank()<<": done recoloring\n"; //reset the recoloringSize device host and device views //to zero recoloringSize_host(0) = 0; @@ -874,10 +881,6 @@ class AlgDistance1 : public Algorithm comm_time += curr_comm_time; if(distributedRounds < numStatisticRecordingRounds){ commPerRound[distributedRounds] = curr_comm_time; - if(verbose){ - std::cout<getRank()<<": total sent in round "<getRank()<<": total recv in round "< compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds]; totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds]; comp_time += conflictDetectionPerRound[distributedRounds]; + } else if(timing){ + double conflict_detection_round_time = timer()- detection_temp; + conflict_detection += conflict_detection_round_time; + comp_time += conflict_detection_round_time; } //do a reduction to determine if we're done int globalDone = 0; @@ -942,7 +949,6 @@ class AlgDistance1 : public Algorithm if(distributedRounds < 100){ vertsPerRound[distributedRounds] = recoloringSize_host(0); } - if(verbose) std::cout<getRank()<<": starting to recolor, serial\n"; double recolor_temp = timer(); //use KokkosKernels to recolor the conflicting vertices @@ -958,9 +964,12 @@ class AlgDistance1 : public Algorithm comp_time += recoloringPerRound[distributedRounds]; compPerRound[distributedRounds] = recoloringPerRound[distributedRounds]; totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds]; - } + } else if(timing){ + double recolor_serial_round_time = timer() - recolor_temp; + recoloring_time += recolor_serial_round_time; + comp_time += recolor_serial_round_time; + } - if(verbose) std::cout<getRank()<<": done recoloring\n"; recoloringSize_host(0) = 0; for(size_t i = 0; i < rand.size() -nVtx; i++){ @@ -980,10 +989,6 @@ class AlgDistance1 : public Algorithm if(distributedRounds < numStatisticRecordingRounds){ commPerRound[distributedRounds] = curr_comm_time; - if(verbose){ - std::cout<getRank()<<": total sent in round "<getRank()<<": total recv in round "< compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds]; totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds]; comp_time += conflictDetectionPerRound[distributedRounds]; - } + } else if(timing){ + double conflict_detection_serial_round_time = timer() - detection_temp; + conflict_detection += conflict_detection_serial_round_time; + comp_time += conflict_detection_serial_round_time; + } //do a reduction to determine if we're done int globalDone = 0; int localDone = recoloringSize_host(0); @@ -1036,8 +1045,8 @@ class AlgDistance1 : public Algorithm } //print how many rounds of speculating/correcting happened (this should be the same for all ranks): if(comm->getRank()==0) printf("did %d rounds of distributed coloring\n", distributedRounds); + int totalBoundarySize = 0; int totalVertsPerRound[numStatisticRecordingRounds]; - int totalBoundarySize = 0; double finalTotalPerRound[numStatisticRecordingRounds]; double maxRecoloringPerRound[numStatisticRecordingRounds]; double finalSerialRecoloringPerRound[numStatisticRecordingRounds]; @@ -1072,6 +1081,7 @@ class AlgDistance1 : public Algorithm Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,sentPerRound, finalSentPerRound); printf("Rank %d: boundary size: %d\n",comm->getRank(),localBoundaryVertices); + if(comm->getRank()==0) printf("Total boundary size: %d\n",totalBoundarySize); for(int i = 0; i < std::min(distributedRounds,numStatisticRecordingRounds); i++){ printf("Rank %d: recolor %d vertices in round %d\n",comm->getRank(),vertsPerRound[i],i); if(comm->getRank()==0) printf("recolored %d vertices in round %d\n",totalVertsPerRound[i],i); @@ -1085,7 +1095,7 @@ class AlgDistance1 : public Algorithm if(comm->getRank()==0) printf("total recv in round %d: %lld\n",i,finalRecvPerRound[i]); if(comm->getRank()==0) printf("comp time in round %d: %f\n",i,finalCompPerRound[i]); } - + } else if(timing){ double global_total_time = 0.0; double global_recoloring_time=0.0; double global_min_recoloring_time=0.0; @@ -1103,7 +1113,6 @@ class AlgDistance1 : public Algorithm comm->barrier(); fflush(stdout); if(comm->getRank()==0){ - printf("Boundary size: %d\n",totalBoundarySize); printf("Total Time: %f\n",global_total_time); printf("Interior Time: %f\n",global_interior_time); printf("Recoloring Time: %f\n",global_recoloring_time); @@ -1112,8 +1121,8 @@ class AlgDistance1 : public Algorithm printf("Comm Time: %f\n",global_comm_time); printf("Comp Time: %f\n",global_comp_time); } - std::cout<getRank()<<": exiting coloring\n"; } + if(verbose) std::cout<getRank()<<": exiting coloring\n"; } }; diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD2.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD2.hpp index 6b3a4ecbd6ad..953a66d809b2 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD2.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridD2.hpp @@ -255,7 +255,9 @@ class AlgDistance2 : public AlgTwoGhostLayer { Kokkos::fence(); //update the verts_to_send and verts_to_recolor views. - Kokkos::parallel_for(femv_colors.size(), KOKKOS_LAMBDA(const uint64_t& i){ + Kokkos::parallel_for("rebuild verts_to_send and verts_to_recolor", + Kokkos::RangePolicy(0,femv_colors.size()), + KOKKOS_LAMBDA(const uint64_t& i){ if(femv_colors(i) == 0){ //we only send vertices owned by the current process if(i < n_local){ diff --git a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridPD2.hpp b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridPD2.hpp index 4357cab70934..75f1707e265e 100644 --- a/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridPD2.hpp +++ b/packages/zoltan2/core/src/algorithms/color/Zoltan2_AlgHybridPD2.hpp @@ -218,7 +218,9 @@ class AlgPartialDistance2 : public AlgTwoGhostLayer { },recoloringSize(0)); Kokkos::fence(); //update the verts_to_send and verts_to_recolor views - Kokkos::parallel_for(femv_colors.size(), KOKKOS_LAMBDA(const uint64_t& i){ + Kokkos::parallel_for("rebuild verts_to_send and verts_to_recolor", + Kokkos::RangePolicy(0,femv_colors.size()), + KOKKOS_LAMBDA(const uint64_t& i){ if(femv_colors(i) == 0){ if(i < n_local){ //we only send vertices owned by the current process diff --git a/packages/zoltan2/core/src/problems/Zoltan2_ColoringProblem.hpp b/packages/zoltan2/core/src/problems/Zoltan2_ColoringProblem.hpp index efd16e3b32a0..12b47b95214e 100644 --- a/packages/zoltan2/core/src/problems/Zoltan2_ColoringProblem.hpp +++ b/packages/zoltan2/core/src/problems/Zoltan2_ColoringProblem.hpp @@ -139,6 +139,7 @@ class ColoringProblem : public Problem pl.set("color_method", "SerialGreedy", "coloring algorithm", color_method_Validator); pl.set("verbose", false, "print all output", Environment::getBoolValidator()); + pl.set("timing", false, "print timing data", Environment::getBoolValidator()); pl.set("serial_threshold",0,"vertices to recolor in serial",Environment::getAnyIntValidator()); pl.set("recolor_degrees",true,"recolor based on vertex degrees",Environment::getBoolValidator()); } diff --git a/packages/zoltan2/test/core/color/CMakeLists.txt b/packages/zoltan2/test/core/color/CMakeLists.txt index 3ad847cf7cb4..d749d79d4909 100644 --- a/packages/zoltan2/test/core/color/CMakeLists.txt +++ b/packages/zoltan2/test/core/color/CMakeLists.txt @@ -37,7 +37,7 @@ TRIBITS_ADD_TEST( NUM_MPI_PROCS 4 COMM serial mpi ARGS - "--inputFile=simple --colorMethod=D1" + "--inputFile=simple --colorMethod=D1 --timing" PASS_REGULAR_EXPRESSION "PASS" FAIL_REGULAR_EXPRESSION "FAIL" ) @@ -155,7 +155,7 @@ TRIBITS_ADD_TEST( NUM_MPI_PROCS 4 COMM serial mpi ARGS - "--inputFile=simple --colorMethod=D1-2GL" + "--inputFile=simple --colorMethod=D1-2GL --timing" PASS_REGULAR_EXPRESSION "PASS" FAIL_REGULAR_EXPRESSION "FAIL" ) diff --git a/packages/zoltan2/test/core/color/coloring1.cpp b/packages/zoltan2/test/core/color/coloring1.cpp index 7a3ad1a88bfb..e783f87389f5 100644 --- a/packages/zoltan2/test/core/color/coloring1.cpp +++ b/packages/zoltan2/test/core/color/coloring1.cpp @@ -197,6 +197,7 @@ int main(int narg, char** arg) std::string outputFile = ""; // Output file to write std::string colorAlg = "SerialGreedy"; // Default algorithm is the serial greedy bool verbose = false; // Verbosity of output + bool timing = false; // If true, report coloring times. int testReturn = 0; bool recolorDegrees = false; std::string prepartition = ""; // Call Zoltan2 partitioning to better distribute @@ -229,6 +230,8 @@ int main(int narg, char** arg) "number of vertices to recolor in serial"); cmdp.setOption("recolorDegrees","recolorRandom",&recolorDegrees, "recolor based on vertex degrees or random numbers"); + cmdp.setOption("timing", "notimes", &timing, + "report how long coloring takes"); std::cout << "Starting everything" << std::endl; ////////////////////////////////// @@ -341,6 +344,7 @@ int main(int narg, char** arg) params.set("color_choice", colorMethod); params.set("color_method", colorAlg); params.set("verbose", verbose); + params.set("timing", timing); params.set("serial_threshold",serialThreshold); params.set("recolor_degrees",recolorDegrees); //params.set("balance_colors", balanceColors); // TODO