diff --git a/cmake/std/sems/PullRequestClang10.0.0TestingEnv.sh b/cmake/std/sems/PullRequestClang10.0.0TestingEnv.sh index 6042879d3fd9..e8428587f53e 100644 --- a/cmake/std/sems/PullRequestClang10.0.0TestingEnv.sh +++ b/cmake/std/sems/PullRequestClang10.0.0TestingEnv.sh @@ -9,13 +9,13 @@ # $ module purge # or Trilinos/cmake/unload_sems_dev_env.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh -module load sems-git/2.10.1 module load sems-gcc/5.3.0 module load sems-clang/10.0.0 module load sems-openmpi/1.10.1 -module load sems-python/2.7.9 module load sems-boost/1.69.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -23,8 +23,14 @@ module load sems-netcdf/4.7.3/parallel module load sems-parmetis/4.0.3/parallel module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base + module load sems-cmake/3.17.1 module load sems-ninja_fortran/1.10.0 +module load sems-git/2.10.1 + +module unload sems-python +module load sems-python/3.5.2 + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestClang7.0.1TestingEnv.sh b/cmake/std/sems/PullRequestClang7.0.1TestingEnv.sh index b6066ad86bfe..93975363d4db 100644 --- a/cmake/std/sems/PullRequestClang7.0.1TestingEnv.sh +++ b/cmake/std/sems/PullRequestClang7.0.1TestingEnv.sh @@ -9,13 +9,14 @@ # $ module purge # or Trilinos/cmake/unload_sems_dev_env.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh module load sems-git/2.10.1 module load sems-gcc/5.3.0 module load sems-clang/7.0.1 module load sems-openmpi/1.10.1 -module load sems-python/2.7.9 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -23,7 +24,6 @@ module load sems-netcdf/4.7.3/parallel module load sems-parmetis/4.0.3/parallel module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base -module load sems-cmake/3.12.2 # Load the SEMS CMake Module # - One of the SEMS modules will load CMake 3.4.x also, @@ -41,5 +41,8 @@ module load atdm-env #module load atdm-cmake/3.10.1 module load atdm-ninja_fortran/1.7.2 +module unload sems-python +module load sems-python/3.5.2 + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestClang9.0.0TestingEnv.sh b/cmake/std/sems/PullRequestClang9.0.0TestingEnv.sh index b70114233685..329895a918d2 100644 --- a/cmake/std/sems/PullRequestClang9.0.0TestingEnv.sh +++ b/cmake/std/sems/PullRequestClang9.0.0TestingEnv.sh @@ -9,13 +9,14 @@ # $ module purge # or Trilinos/cmake/unload_sems_dev_env.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh module load sems-git/2.10.1 module load sems-gcc/5.3.0 module load sems-clang/9.0.0 module load sems-openmpi/1.10.1 -module load sems-python/2.7.9 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -23,13 +24,12 @@ module load sems-netcdf/4.7.3/parallel module load sems-parmetis/4.0.3/parallel module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base -module load sems-cmake/3.12.2 # Load the SEMS CMake Module # - One of the SEMS modules will load CMake 3.4.x also, # so this will pull in the SEMS cmake 3.10.3 version # for Trilinos compatibility. -module load sems-cmake/3.12.2 +module load sems-cmake/3.10.3 # Using CMake and Ninja modules from the ATDM project space. # SEMS does not yet supply a recent enough version of CMake @@ -41,5 +41,9 @@ module load atdm-env #module load atdm-cmake/3.10.1 module load atdm-ninja_fortran/1.7.2 +module unload sems-python +module load sems-python/3.5.2 + + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestGCC4.8.4TestingEnv.sh b/cmake/std/sems/PullRequestGCC4.8.4TestingEnv.sh index 81be3b7e7a88..42e81ed73ca4 100644 --- a/cmake/std/sems/PullRequestGCC4.8.4TestingEnv.sh +++ b/cmake/std/sems/PullRequestGCC4.8.4TestingEnv.sh @@ -8,12 +8,12 @@ # $ module purge # or Trilinos/cmake/unload_sems_dev_env.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh module load sems-gcc/4.8.4 module load sems-openmpi/1.10.1 -module load sems-python/2.7.9 -module load sems-git/2.10.1 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -29,5 +29,10 @@ module load sems-superlu/4.3/base module load sems-cmake/3.10.3 module load sems-ninja_fortran/1.8.2 +module load sems-git/2.10.1 + +module unload sems-python +module load sems-python/3.5.2 + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestGCC4.9.3TestingEnv.sh b/cmake/std/sems/PullRequestGCC4.9.3TestingEnv.sh index e82c8c4cbb2b..dcef5dfbbed0 100644 --- a/cmake/std/sems/PullRequestGCC4.9.3TestingEnv.sh +++ b/cmake/std/sems/PullRequestGCC4.9.3TestingEnv.sh @@ -4,12 +4,12 @@ # usage: $ source PullRequestGCC4.9.3TestingEnv.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh module load sems-gcc/4.9.3 module load sems-openmpi/1.10.1 -module load sems-python/2.7.9 -module load sems-git/2.10.1 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -17,9 +17,15 @@ module load sems-netcdf/4.7.3/parallel module load sems-parmetis/4.0.3/parallel module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base + module load sems-cmake/3.12.2 module load sems-ninja_fortran/1.8.2 +module load sems-git/2.10.1 + +module unload sems-python +module load sems-python/3.5.2 + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestGCC4.9.3TestingEnvSERIAL.sh b/cmake/std/sems/PullRequestGCC4.9.3TestingEnvSERIAL.sh index dce5e353de08..6cc883051734 100644 --- a/cmake/std/sems/PullRequestGCC4.9.3TestingEnvSERIAL.sh +++ b/cmake/std/sems/PullRequestGCC4.9.3TestingEnvSERIAL.sh @@ -4,11 +4,11 @@ # usage: $ source PullRequestGCC4.9.3TestingEnv.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh module load sems-gcc/4.9.3 -module load sems-python/2.7.9 -module load sems-git/2.10.1 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/base @@ -16,8 +16,15 @@ module load sems-netcdf/4.7.3/base module load sems-metis/5.1.0/base # module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base + module load sems-cmake/3.12.2 module load sems-ninja_fortran/1.8.2 +module load sems-git/2.10.1 + +module unload sems-python +module load sems-python/3.5.2 + + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestGCC7.2.0SerialTestingEnv.sh b/cmake/std/sems/PullRequestGCC7.2.0SerialTestingEnv.sh index d0f7431346d5..7a995c6d22b1 100644 --- a/cmake/std/sems/PullRequestGCC7.2.0SerialTestingEnv.sh +++ b/cmake/std/sems/PullRequestGCC7.2.0SerialTestingEnv.sh @@ -4,19 +4,26 @@ # usage: $ source PullRequestGCC7.2.0TestingEnv.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh module load sems-gcc/7.2.0 -module load sems-python/2.7.9 -module load sems-git/2.10.1 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/base module load sems-netcdf/4.7.3/base module load sems-metis/5.1.0/base module load sems-superlu/4.3/base + module load sems-cmake/3.12.2 module load sems-ninja_fortran/1.8.2 +# Boost loads sems-python/2.7.x as a side effect. +module unload sems-python +module load sems-python/3.5.2 + +module load sems-git/2.10.1 + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestGCC7.2.0TestingEnv.sh b/cmake/std/sems/PullRequestGCC7.2.0TestingEnv.sh index d9972e3b6869..f858d316ed17 100644 --- a/cmake/std/sems/PullRequestGCC7.2.0TestingEnv.sh +++ b/cmake/std/sems/PullRequestGCC7.2.0TestingEnv.sh @@ -4,12 +4,12 @@ # usage: $ source PullRequestGCC7.2.0TestingEnv.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh module load sems-gcc/7.2.0 module load sems-openmpi/1.10.1 -module load sems-python/2.7.9 -module load sems-git/2.10.1 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -17,9 +17,17 @@ module load sems-netcdf/4.7.3/parallel module load sems-parmetis/4.0.3/parallel module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base -module load sems-cmake/3.12.2 + +module load sems-cmake/3.10.3 module load sems-ninja_fortran/1.8.2 +module load sems-git/2.10.1 + +# Note: sems-boost also loads sems-python/2.7.x, we need to +# unload it here. +module unload sems-python +module load sems-python/3.5.2 + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestGCC8.3.0TestingEnv.sh b/cmake/std/sems/PullRequestGCC8.3.0TestingEnv.sh index 628ac355869e..69c913cbf925 100644 --- a/cmake/std/sems/PullRequestGCC8.3.0TestingEnv.sh +++ b/cmake/std/sems/PullRequestGCC8.3.0TestingEnv.sh @@ -4,12 +4,12 @@ # usage: $ source PullRequestGCC8.3.0TestingEnv.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh module load sems-gcc/8.3.0 module load sems-openmpi/1.10.1 -module load sems-python/2.7.9 -module load sems-git/2.10.1 module load sems-boost/1.66.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -17,8 +17,17 @@ module load sems-netcdf/4.7.3/parallel module load sems-parmetis/4.0.3/parallel module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base + module load sems-cmake/3.17.1 module load sems-ninja_fortran/1.10.0 +module load sems-git/2.10.1 + +module unload sems-python +module load sems-python/3.5.2 + + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 + + diff --git a/cmake/std/sems/PullRequestIntel17.0.1TestingEnv.sh b/cmake/std/sems/PullRequestIntel17.0.1TestingEnv.sh index 18fbc3611d3c..26bcf3fc970a 100644 --- a/cmake/std/sems/PullRequestIntel17.0.1TestingEnv.sh +++ b/cmake/std/sems/PullRequestIntel17.0.1TestingEnv.sh @@ -7,6 +7,8 @@ # After the environment is no longer needed, it can be purged using # $ module purge # or Trilinos/cmake/unload_sems_dev_env.sh + +module purge source /projects/sems/modulefiles/utils/sems-modules-init.sh @@ -14,8 +16,6 @@ export SEMS_FORCE_LOCAL_COMPILER_VERSION=4.9.3 module load sems-gcc/5.3.0 module load sems-intel/17.0.1 module load sems-mpich/3.2 -module load sems-python/2.7.9 -module load sems-git/2.10.1 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -23,9 +23,15 @@ module load sems-netcdf/4.7.3/parallel module load sems-parmetis/4.0.3/parallel module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base + module load sems-cmake/3.12.2 module load sems-ninja_fortran/1.8.2 +module load sems-git/2.10.1 + +module unload sems-python +module load sems-python/3.5.2 + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/cmake/std/sems/PullRequestIntel19.0.5TestingEnv.sh b/cmake/std/sems/PullRequestIntel19.0.5TestingEnv.sh index 08af6094b5b7..ef908231c123 100644 --- a/cmake/std/sems/PullRequestIntel19.0.5TestingEnv.sh +++ b/cmake/std/sems/PullRequestIntel19.0.5TestingEnv.sh @@ -8,14 +8,14 @@ # $ module purge # or Trilinos/cmake/unload_sems_dev_env.sh +module purge + source /projects/sems/modulefiles/utils/sems-modules-init.sh export SEMS_FORCE_LOCAL_COMPILER_VERSION=4.9.3 module load sems-gcc/6.1.0 module load sems-intel/19.0.5 module load sems-mpich/3.2 -module load sems-python/2.7.9 -module load sems-git/2.10.1 module load sems-boost/1.63.0/base module load sems-zlib/1.2.8/base module load sems-hdf5/1.10.6/parallel @@ -23,8 +23,14 @@ module load sems-netcdf/4.7.3/parallel module load sems-parmetis/4.0.3/parallel module load sems-scotch/6.0.3/nopthread_64bit_parallel module load sems-superlu/4.3/base + module load sems-cmake/3.12.2 module load sems-ninja_fortran/1.8.2 +module load sems-git/2.10.1 + +module unload sems-python +module load sems-python/3.5.2 + # add the OpenMP environment variable we need export OMP_NUM_THREADS=2 diff --git a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp index 883cbb08cbbb..57096e4961d0 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp @@ -154,12 +154,13 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, for(int currentLevel = 1; currentLevel < numLevels; ++currentLevel) { RCP level = regHierarchy->GetLevel(currentLevel); - RCP regProlong = level->Get >("P"); - RCP regRowMap = level->Get >("regRowMap"); + RCP regProlong = level->Get >("P"); + RCP regRowMap = Teuchos::rcp_const_cast(regProlong->getColMap()); RCP levelFine = regHierarchy->GetLevel(currentLevel-1); RCP regRowImportFine = levelFine->Get >("rowImport"); - RCP regRowMapFine = levelFine->Get >("regRowMap"); + RCP regMatFine = levelFine->Get >("A"); + RCP regRowMapFine = Teuchos::rcp_const_cast(regMatFine->getRowMap()); // Extracting some basic information about local mesh in composite/region format const size_t numFineRegionNodes = regProlong->getNodeNumRows(); @@ -325,15 +326,10 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, regionMatVecLIDs, regionInterfaceImporter); // Fill level with the outputs - level->Set >("compRowMap",compRowMap); - level->Set >("regRowMap",regRowMapCurrent); - level->Set >("regColMap",Teuchos::rcp_const_cast(regProlong->getColMap())); level->Set >("regionMatVecLIDs",regionMatVecLIDs); level->Set > >("regionInterfaceImporter", regionInterfaceImporter); level->Set > >("interfaceGIDs", interfaceGIDs); level->Set > >("regionsPerGIDWithGhosts", regionsPerGIDWithGhosts); - level->Set >("rowMap",quasiRegRowMap); - level->Set >("colMap",quasiRegRowMap); level->Set > >("rowImport",regRowImportCurrent); // Finally reset compositeToRegionLIDs @@ -707,7 +703,6 @@ void MakeInterfaceScalingFactors(const int numLevels, { #include "Xpetra_UseShortNames.hpp" #include "MueLu_UseShortNames.hpp" - // std::cout << compRowMaps[0]->getComm()->getRank() << " | Computing interface scaling factors ..." << std::endl; const SC SC_ONE = Teuchos::ScalarTraits::one(); @@ -715,15 +710,15 @@ void MakeInterfaceScalingFactors(const int numLevels, for (int l = 0; l < numLevels; l++) { RCP level = regHierarchy->GetLevel(l); - RCP > regRowMap = level->Get > >("regRowMap"); + RCP regMat = level->Get >("A"); + RCP regRowMap = Teuchos::rcp_const_cast(regMat->getRowMap()); RCP > regRowImporters = level->Get > >("rowImport"); // initialize region vector with all ones. RCP > regInterfaceScalings = VectorFactory::Build(regRowMap); regInterfaceScalings->putScalar(SC_ONE); // transform to composite layout while adding interface values via the Export() combine mode - //RCP compInterfaceScalingSum = VectorFactory::Build(compRowMaps[l], true); - RCP compInterfaceScalingSum = VectorFactory::Build( level->Get > >("compRowMap"), true); + RCP compInterfaceScalingSum = VectorFactory::Build( regRowImporters->getSourceMap() , true); regionalToComposite(regInterfaceScalings, compInterfaceScalingSum, regRowImporters); /* transform composite layout back to regional layout. Now, GIDs associated @@ -779,7 +774,9 @@ void createRegionHierarchy(const int numDimensions, } RCP level0 = regHierarchy->GetLevel(0); - RCP > revisedRowMap1 = level0->Get > >("regRowMap"); + RCP > regRowImporters = level0->Get > >("rowImport"); + RCP regMat = level0->Get >("A"); + RCP revisedRowMap = Teuchos::rcp_const_cast(regMat->getRowMap()); /* Get coarse level matrices and prolongators from MueLu hierarchy * Note: fine level has been dealt with previously, so we start at level 1 here. */ @@ -795,15 +792,12 @@ void createRegionHierarchy(const int numDimensions, RCP regMatrices = level->Get >("A", MueLu::NoFactory::get()); - level->Set > >("regRowMap",Teuchos::rcp_const_cast>(regMatrices->getRowMap())); - level->Set > >("regColMap",Teuchos::rcp_const_cast>(regMatrices->getColMap())); - // Create residual and solution vectors and cache them for vCycle apply std::string levelName("level"); levelName += std::to_string(l); ParameterList& levelList = hierarchyData->sublist(levelName, false, "list of data on current level"); - RCP regRes = VectorFactory::Build(revisedRowMap1, true); - RCP regSol = VectorFactory::Build(revisedRowMap1, true); + RCP regRes = VectorFactory::Build(revisedRowMap, true); + RCP regSol = VectorFactory::Build(revisedRowMap, true); levelList.set >("residual", regRes, "Cached residual vector"); levelList.set >("solution", regSol, "Cached solution vector"); @@ -815,7 +809,7 @@ void createRegionHierarchy(const int numDimensions, tmLocal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("createRegionHierarchy: MakeCoarseLevel"))); MakeCoarseLevelMaps(maxRegPerGID, - regHierarchy); //TODO: CLEAN UP THIS FUNCTION + regHierarchy); tmLocal = Teuchos::null; tmLocal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("createRegionHierarchy: MakeInterfaceScaling"))); @@ -833,13 +827,13 @@ void createRegionHierarchy(const int numDimensions, for(int levelIdx = 0; levelIdx < numLevels - 1; ++levelIdx) { RCP level = regHierarchy->GetLevel(levelIdx); RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); - RCP > regRowMap = level->Get > >("regRowMap"); + RCP regRowMap = Teuchos::rcp_const_cast(regMatrix->getRowMap()); RCP > regRowImporter = level->Get > >("rowImport"); - RCP > regInterfaceScalings1 = level->Get > >("regInterfaceScalings"); + RCP > regInterfaceScalings = level->Get > >("regInterfaceScalings"); smootherParams[levelIdx]->set("smoother: level", levelIdx); smootherSetup(smootherParams[levelIdx], regRowMap, - regMatrix, regInterfaceScalings1, + regMatrix, regInterfaceScalings, regRowImporter); } @@ -850,10 +844,10 @@ void createRegionHierarchy(const int numDimensions, const std::string coarseSolverType = coarseSolverData->get("coarse solver type"); if (coarseSolverType == "smoother") { RCP level = regHierarchy->GetLevel(numLevels - 1); - RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); - RCP > regRowMap = level->Get > >("regRowMap"); - RCP > regRowImporter = level->Get > >("rowImport"); - RCP > regInterfaceScalings1 = level->Get > >("regInterfaceScalings"); + RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); + RCP regRowMap = Teuchos::rcp_const_cast(regMatrix->getRowMap()); + RCP > regRowImporter = level->Get > >("rowImport"); + RCP > regInterfaceScalings = level->Get > >("regInterfaceScalings"); // Set the smoother on the coarsest level. const std::string smootherXMLFileName = coarseSolverData->get("smoother xml file"); RCP coarseSmootherParams = smootherParams[numLevels - 1]; @@ -863,7 +857,7 @@ void createRegionHierarchy(const int numDimensions, smootherSetup(smootherParams[numLevels - 1], regRowMap, - regMatrix, regInterfaceScalings1, + regMatrix, regInterfaceScalings, regRowImporter); } else if( (coarseSolverType == "direct") || (coarseSolverType == "amg") ) { // A composite coarse matrix is needed @@ -871,12 +865,12 @@ void createRegionHierarchy(const int numDimensions, // std::cout << mapComp->getComm()->getRank() << " | MakeCoarseCompositeOperator ..." << std::endl; RCP level = regHierarchy->GetLevel(numLevels - 1); - RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); - RCP > compRowMap = level->Get > >("compRowMap"); - RCP > regRowMap = level->Get > >("regRowMap"); + RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); + RCP regRowMap = Teuchos::rcp_const_cast(regMatrix->getRowMap()); RCP > regRowImporter = level->Get > >("rowImport"); - RCP > quasiRegRowMap = level->Get > >("rowMap"); - RCP > quasiRegColMap = level->Get > >("colMap"); + RCP compRowMap = Teuchos::rcp_const_cast(regRowImporter->getSourceMap()); + RCP quasiRegRowMap = Teuchos::rcp_const_cast(regRowImporter->getTargetMap()); + RCP quasiRegColMap = Teuchos::rcp_const_cast(regRowImporter->getTargetMap());// col map same as row map. RCP > coarseCompOp; RCP compCoarseCoordinates; @@ -957,12 +951,12 @@ void vCycle(const int l, ///< ID of current level RCP level = regHierarchy->GetLevel(l); RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); - RCP > compRowMap = level->Get > >("compRowMap"); - RCP > regRowMap = level->Get > >("regRowMap"); RCP > regRowImporter = level->Get > >("rowImport"); - RCP > quasiRegRowMap = level->Get > >("rowMap"); - RCP > quasiRegColMap = level->Get > >("colMap"); - RCP > regInterfaceScalings1 = level->Get > >("regInterfaceScalings"); + RCP regRowMap = Teuchos::rcp_const_cast(regMatrix->getRowMap()); + RCP compRowMap = Teuchos::rcp_const_cast(regRowImporter->getSourceMap()); + RCP quasiRegRowMap = Teuchos::rcp_const_cast(regRowImporter->getTargetMap()); + RCP quasiRegColMap = Teuchos::rcp_const_cast(regRowImporter->getTargetMap()); // col map samle as row map + RCP > regInterfaceScalings = level->Get > >("regInterfaceScalings"); int cycleCount = 1; if(cycleType == "W" && l > 0) // W cycle and not on finest level @@ -1003,7 +997,7 @@ void vCycle(const int l, ///< ID of current level tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 3 - scale interface"))); - scaleInterfaceDOFs(regRes, regInterfaceScalings1, true); + scaleInterfaceDOFs(regRes, regInterfaceScalings, true); tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 4 - create coarse vectors"))); @@ -1015,7 +1009,7 @@ void vCycle(const int l, ///< ID of current level { RCP levelCoarse = regHierarchy->GetLevel(l+1); RCP regProlongCoarse = levelCoarse->Get >("P", MueLu::NoFactory::get()); - RCP > regRowMapCoarse = levelCoarse->Get > >("regRowMap"); + RCP > regRowMapCoarse = regProlongCoarse->getColMap(); // Get pre-communicated communication patterns for the fast MatVec const ArrayRCP regionInterfaceLIDs = smootherParams[l+1]->get>("Fast MatVec: interface LIDs"); const RCP regionInterfaceImporter = smootherParams[l+1]->get>("Fast MatVec: interface importer"); @@ -1096,8 +1090,8 @@ void vCycle(const int l, ///< ID of current level RCP compX = VectorFactory::Build(coarseRowMap, true); RCP compRhs = VectorFactory::Build(coarseRowMap, true); { - RCP inverseInterfaceScaling = VectorFactory::Build(regInterfaceScalings1->getMap()); - inverseInterfaceScaling->reciprocal(*regInterfaceScalings1); + RCP inverseInterfaceScaling = VectorFactory::Build(regInterfaceScalings->getMap()); + inverseInterfaceScaling->reciprocal(*regInterfaceScalings); fineRegB->elementWiseMultiply(SC_ONE, *fineRegB, *inverseInterfaceScaling, SC_ZERO); regionalToComposite(fineRegB, compRhs, regRowImporter); diff --git a/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp b/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp index c6049d03084d..f95b413b5d5a 100644 --- a/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp +++ b/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp @@ -714,11 +714,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar { RCP level0 = regHierarchy->GetLevel(0); - level0->Set > >("compRowMap", dofMap); - level0->Set > >("rowMap", rowMap); - level0->Set > >("colMap", colMap); - level0->Set > >("regRowMap", revisedRowMap); - level0->Set > >("regColMap", revisedColMap); level0->Set > >("rowImport",rowImport); level0->Set > ("compositeToRegionLIDs", compositeToRegionLIDs() ); level0->Set > >("interfaceGIDs", interfaceGIDsPerLevel[0]); diff --git a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp index 309d5bdc9fa9..1a4dce874ff4 100644 --- a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp @@ -126,7 +126,7 @@ namespace MueLu { } else { RCP bA00 = Teuchos::rcp_dynamic_cast(A00); TEUCHOS_TEST_FOR_EXCEPTION(bA00.is_null()==false, MueLu::Exceptions::RuntimeError,"MueLu::SchurComplementFactory::Build: Mass lumping not implemented. Implement a mass lumping kernel!"); - diag = Utilities::GetLumpedMatrixDiagonal(A00); + diag = Utilities::GetLumpedMatrixDiagonal(*A00); } // invert diagonal vector. Replace all entries smaller than 1e-4 by one! RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, 1e-4, one)); diff --git a/packages/muelu/src/Smoothers/BlockedSmoothers/MueLu_BraessSarazinSmoother_def.hpp b/packages/muelu/src/Smoothers/BlockedSmoothers/MueLu_BraessSarazinSmoother_def.hpp index 87ebaa414c31..f49bfef1ddc7 100644 --- a/packages/muelu/src/Smoothers/BlockedSmoothers/MueLu_BraessSarazinSmoother_def.hpp +++ b/packages/muelu/src/Smoothers/BlockedSmoothers/MueLu_BraessSarazinSmoother_def.hpp @@ -212,7 +212,7 @@ namespace MueLu { if (pL.get("lumping") == false) { A00_->getLocalDiagCopy(*diagFVector); // extract diagonal of F } else { - diagFVector = Utilities::GetLumpedMatrixDiagonal(A00_); + diagFVector = Utilities::GetLumpedMatrixDiagonal(*A00_); } diagFVector->scale(omega); D_ = Utilities::GetInverse(diagFVector); diff --git a/packages/muelu/src/Smoothers/BlockedSmoothers/MueLu_SimpleSmoother_def.hpp b/packages/muelu/src/Smoothers/BlockedSmoothers/MueLu_SimpleSmoother_def.hpp index 6e6dd58ec693..2c4920f14e17 100644 --- a/packages/muelu/src/Smoothers/BlockedSmoothers/MueLu_SimpleSmoother_def.hpp +++ b/packages/muelu/src/Smoothers/BlockedSmoothers/MueLu_SimpleSmoother_def.hpp @@ -250,7 +250,7 @@ namespace MueLu { diag[i] = absRowSum; }*/ // TODO this does not work if F_ is nested! - diagFVector = Utilities::GetLumpedMatrixDiagonal(F_); + diagFVector = Utilities::GetLumpedMatrixDiagonal(*F_); } diagFinv_ = Utilities::GetInverse(diagFVector); diff --git a/packages/muelu/src/Smoothers/MueLu_Ifpack2Smoother_def.hpp b/packages/muelu/src/Smoothers/MueLu_Ifpack2Smoother_def.hpp index 366d845770c0..f5e4f909fb81 100644 --- a/packages/muelu/src/Smoothers/MueLu_Ifpack2Smoother_def.hpp +++ b/packages/muelu/src/Smoothers/MueLu_Ifpack2Smoother_def.hpp @@ -644,7 +644,7 @@ namespace MueLu { doScale = paramList.get("chebyshev: use rowsumabs diagonal scaling"); paramList.remove("chebyshev: use rowsumabs diagonal scaling"); if (doScale) { - RCP lumpedDiagonal = Utilities::GetLumpedMatrixDiagonal(currentLevel.Get >("A"),true); + RCP lumpedDiagonal = Utilities::GetLumpedMatrixDiagonal(*(currentLevel.Get >("A")),true); const Xpetra::TpetraVector& tmpVec = dynamic_cast&>(*lumpedDiagonal); paramList.set("chebyshev: operator inv diagonal",tmpVec.getTpetra_Vector()); } diff --git a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_SaPFactory_def.hpp b/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_SaPFactory_def.hpp index 0654fdeaa871..4eafe08bb1bf 100644 --- a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_SaPFactory_def.hpp +++ b/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_SaPFactory_def.hpp @@ -178,7 +178,7 @@ namespace MueLu { Coordinate stopTol = 1e-4; if (useAbsValueRowSum) { const bool returnReciprocal=true; - invDiag = Utilities::GetLumpedMatrixDiagonal(A,returnReciprocal); + invDiag = Utilities::GetLumpedMatrixDiagonal(*A,returnReciprocal); TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: eigenvalue estimate: diagonal reciprocal is null."); lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol); @@ -202,7 +202,7 @@ namespace MueLu { else if (invDiag == Teuchos::null) { GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl; const bool returnReciprocal=true; - invDiag = Utilities::GetLumpedMatrixDiagonal(A,returnReciprocal); + invDiag = Utilities::GetLumpedMatrixDiagonal(*A,returnReciprocal); TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null."); } diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp index e3e9e47f8700..ae68787366ed 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp @@ -166,58 +166,23 @@ namespace MueLu { static RCP GetMatrixDiagonalInverse(const Matrix& A, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetMatrixDiagonalInverse"); - RCP rcpA = Teuchos::rcpFromRef(A); - - RCP rowMap = rcpA->getRowMap(); + RCP rowMap = A.getRowMap(); RCP diag = Xpetra::VectorFactory::Build(rowMap,true); - rcpA->getLocalDiagCopy(*diag); + A.getLocalDiagCopy(*diag); RCP inv = MueLu::UtilitiesBase::GetInverse(diag, tol, tolReplacement); return inv; } - - - /*! @brief Extract Matrix Diagonal of lumped matrix - - Returns Matrix diagonal of lumped matrix in ArrayRCP. - - NOTE -- it's assumed that A has been fillComplete'd. - */ - static Teuchos::ArrayRCP GetLumpedMatrixDiagonal(const Matrix& A, const bool doReciprocal = false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { - - size_t numRows = A.getRowMap()->getNodeNumElements(); - const Scalar zero = Teuchos::ScalarTraits::zero(); - const Scalar one = Teuchos::ScalarTraits::one(); - Teuchos::ArrayRCP diag(numRows); - Teuchos::ArrayView cols; - Teuchos::ArrayView vals; - for (size_t i = 0; i < numRows; ++i) { - A.getLocalRowView(i, cols, vals); - diag[i] = zero; - for (LocalOrdinal j = 0; j < cols.size(); ++j) { - diag[i] += Teuchos::ScalarTraits::magnitude(vals[j]); - } - } - if (doReciprocal) { - for (size_t i = 0; i < numRows; ++i) - if(Teuchos::ScalarTraits::magnitude(diag[i]) > tol) - diag[i] = one / diag[i]; - else - diag[i] = tolReplacement; - } - return diag; - } - /*! @brief Extract Matrix Diagonal of lumped matrix - Returns Matrix diagonal of lumped matrix in ArrayRCP. + Returns Matrix diagonal of lumped matrix in RCP. NOTE -- it's assumed that A has been fillComplete'd. */ - static Teuchos::RCP GetLumpedMatrixDiagonal(Teuchos::RCP rcpA, const bool doReciprocal = false, + static Teuchos::RCP GetLumpedMatrixDiagonal(Matrix const & A, const bool doReciprocal = false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { @@ -225,6 +190,8 @@ namespace MueLu { const Scalar zero = Teuchos::ScalarTraits::zero(); const Scalar one = Teuchos::ScalarTraits::one(); + Teuchos::RCP rcpA = Teuchos::rcpFromRef(A); + RCP > bA = Teuchos::rcp_dynamic_cast >(rcpA); if(bA == Teuchos::null) { @@ -251,9 +218,6 @@ namespace MueLu { } else { TEUCHOS_TEST_FOR_EXCEPTION(doReciprocal, Xpetra::Exceptions::RuntimeError, "UtilitiesBase::GetLumpedMatrixDiagonal(): extracting reciprocal of diagonal of a blocked matrix is not supported"); - //TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bA->Cols(), Xpetra::Exceptions::RuntimeError, - // "UtilitiesBase::GetLumpedMatrixDiagonal(): you cannot extract the diagonal of a "<< bA->Rows() << "x"<< bA->Cols() << " blocked matrix." ); - diag = Xpetra::VectorFactory::Build(bA->getRangeMapExtractor()->getFullMap(),true); for (size_t row = 0; row < bA->Rows(); ++row) { @@ -262,7 +226,7 @@ namespace MueLu { // if we are in Thyra mode, but the block (row,row) is again a blocked operator, we have to use (pseudo) Xpetra-style GIDs with offset! bool bThyraMode = bA->getRangeMapExtractor()->getThyraMode() && (Teuchos::rcp_dynamic_cast >(bA->getMatrix(row,col)) == Teuchos::null); RCP ddtemp = bA->getRangeMapExtractor()->ExtractVector(diag,row,bThyraMode); - RCP dd = GetLumpedMatrixDiagonal(bA->getMatrix(row,col)); + RCP dd = GetLumpedMatrixDiagonal(*(bA->getMatrix(row,col))); ddtemp->update(Teuchos::as(1.0),*dd,Teuchos::as(1.0)); bA->getRangeMapExtractor()->InsertVector(ddtemp,row,diag,bThyraMode); } diff --git a/packages/muelu/src/Utils/MueLu_Utilities_decl.hpp b/packages/muelu/src/Utils/MueLu_Utilities_decl.hpp index 271037ee9679..ae7ca5759d6d 100644 --- a/packages/muelu/src/Utils/MueLu_Utilities_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_Utilities_decl.hpp @@ -208,9 +208,7 @@ namespace MueLu { static RCP > Crs2Op(RCP > Op) { return UtilitiesBase::Crs2Op(Op); } static Teuchos::ArrayRCP GetMatrixDiagonal(const Xpetra::Matrix& A) { return MueLu::UtilitiesBase::GetMatrixDiagonal(A); } static RCP > GetMatrixDiagonalInverse(const Xpetra::Matrix& A, Magnitude tol = Teuchos::ScalarTraits::eps()*100) { return MueLu::UtilitiesBase::GetMatrixDiagonalInverse(A,tol); } - static Teuchos::ArrayRCP GetLumpedMatrixDiagonal(const Xpetra::Matrix& A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) - { return MueLu::UtilitiesBase::GetLumpedMatrixDiagonal(A, doReciprocal, tol, tolReplacement); } - static Teuchos::RCP > GetLumpedMatrixDiagonal(Teuchos::RCP > A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) + static Teuchos::RCP > GetLumpedMatrixDiagonal(Xpetra::Matrix const &A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { return MueLu::UtilitiesBase::GetLumpedMatrixDiagonal(A, doReciprocal, tol, tolReplacement); } static RCP > GetMatrixOverlappedDiagonal(const Xpetra::Matrix& A) { return MueLu::UtilitiesBase::GetMatrixOverlappedDiagonal(A); } static Teuchos::RCP > GetInverse(Teuchos::RCP > v, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { return MueLu::UtilitiesBase::GetInverse(v,tol,tolReplacement); } @@ -585,8 +583,7 @@ namespace MueLu { static RCP Crs2Op(RCP Op) { return MueLu::UtilitiesBase::Crs2Op(Op); } static Teuchos::ArrayRCP GetMatrixDiagonal(const Matrix& A) { return MueLu::UtilitiesBase::GetMatrixDiagonal(A); } static RCP GetMatrixDiagonalInverse(const Matrix& A, Magnitude tol = Teuchos::ScalarTraits::eps()*100) { return MueLu::UtilitiesBase::GetMatrixDiagonalInverse(A,tol); } - static Teuchos::ArrayRCP GetLumpedMatrixDiagonal(const Matrix& A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { return MueLu::UtilitiesBase::GetLumpedMatrixDiagonal(A, doReciprocal, tol, tolReplacement); } - static Teuchos::RCP > GetLumpedMatrixDiagonal(Teuchos::RCP > A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) + static Teuchos::RCP > GetLumpedMatrixDiagonal(Xpetra::Matrix const &A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { return MueLu::UtilitiesBase::GetLumpedMatrixDiagonal(A, doReciprocal, tol, tolReplacement); } static RCP GetMatrixOverlappedDiagonal(const Matrix& A) { return MueLu::UtilitiesBase::GetMatrixOverlappedDiagonal(A); } static RCP GetInverse(Teuchos::RCP v, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { return MueLu::UtilitiesBase::GetInverse(v,tol,tolReplacement); } diff --git a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp index 78af7bf51aed..5eceebd91590 100644 --- a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp @@ -386,10 +386,7 @@ namespace MueLu { static RCP GetMatrixDiagonalInverse(const Matrix& A, Magnitude tol = Teuchos::ScalarTraits::eps()*100, const bool doLumped=false) { return UtilitiesBase::GetMatrixDiagonalInverse(A, tol, doLumped); } - static ArrayRCP GetLumpedMatrixDiagonal(const Matrix& A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { - return UtilitiesBase::GetLumpedMatrixDiagonal(A, doReciprocal, tol, tolReplacement); - } - static RCP GetLumpedMatrixDiagonal(RCP A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { + static RCP GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol = Teuchos::ScalarTraits::eps()*100, Scalar tolReplacement = Teuchos::ScalarTraits::zero()) { return UtilitiesBase::GetLumpedMatrixDiagonal(A, doReciprocal, tol, tolReplacement); } static RCP GetMatrixOverlappedDiagonal(const Matrix& A) { diff --git a/packages/muelu/test/maxwell/Maxwell3D.cpp b/packages/muelu/test/maxwell/Maxwell3D.cpp index 3f6cfd3d10cc..d13c1bdbd050 100644 --- a/packages/muelu/test/maxwell/Maxwell3D.cpp +++ b/packages/muelu/test/maxwell/Maxwell3D.cpp @@ -713,7 +713,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // nodal mass matrix RCP M0_Matrix = Xpetra::IO::Read(M0_file, node_map); // build lumped mass matrix inverse (M0inv_Matrix) - RCP diag = Utilities::GetLumpedMatrixDiagonal(M0_Matrix); + RCP diag = Utilities::GetLumpedMatrixDiagonal(*M0_Matrix); RCP M0inv_MatrixWrap = rcp(new CrsMatrixWrap(node_map, node_map, 0)); RCP M0inv_CrsMatrix = M0inv_MatrixWrap->getCrsMatrix(); Teuchos::ArrayRCP rowPtr; diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index 377fb52a39e8..b5f9d5514cf0 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -823,8 +823,9 @@ namespace Tpetra { /// must access the device View directly by calling /// getLocalView(). Please see modify(), sync(), and the /// discussion of DualView semantics elsewhere in the - /// documentation. You are responsible for calling modify() and - /// sync(), if needed; this method doesn't do that. + /// documentation. + /// This method calls sync_host() before modifying + /// host data, and modify_host() afterwards. /// /// This method does not have an "atomic" option like /// sumIntoGlobalValue. This is deliberate. Replacement is not @@ -842,7 +843,7 @@ namespace Tpetra { void replaceGlobalValue (const GlobalOrdinal gblRow, const size_t col, - const impl_scalar_type& value) const; + const impl_scalar_type& value); /// \brief Like the above replaceGlobalValue, but only enabled if /// T differs from impl_scalar_type. @@ -859,9 +860,9 @@ namespace Tpetra { /// and you want to modify the non-host version of the data, you /// must access the device View directly by calling getLocalView(). /// Please see modify(), sync(), and the discussion of DualView - /// semantics elsewhere in the documentation. You are responsible - /// for calling modify() and sync(), if needed; this method - /// doesn't do that. + /// semantics elsewhere in the documentation. + /// This method calls sync_host() before modifying + /// host data, and modify_host() afterwards. /// /// This method does not have an "atomic" option like /// sumIntoGlobalValue. This is deliberate. Replacement is not @@ -880,7 +881,7 @@ namespace Tpetra { typename std::enable_if::value && std::is_convertible::value, void>::type replaceGlobalValue (GlobalOrdinal globalRow, size_t col, - const T& value) const + const T& value) { replaceGlobalValue (globalRow, col, static_cast (value)); } @@ -897,8 +898,9 @@ namespace Tpetra { /// must access the device View directly by calling /// getLocalView(). Please see modify(), sync(), and the /// discussion of DualView semantics elsewhere in the - /// documentation. You are responsible for calling modify() and - /// sync(), if needed; this method doesn't do that. + /// documentation. + /// This method calls sync_host() before modifying + /// host data, and modify_host() afterwards. /// /// \param gblRow [in] Global row index of the entry to modify. /// This must be a valid global row index on the calling @@ -912,7 +914,7 @@ namespace Tpetra { sumIntoGlobalValue (const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type& value, - const bool atomic = useAtomicUpdatesByDefault) const; + const bool atomic = useAtomicUpdatesByDefault); /// \brief Like the above sumIntoGlobalValue, but only enabled if /// T differs from impl_scalar_type. @@ -930,8 +932,9 @@ namespace Tpetra { /// must access the device View directly by calling /// getLocalView(). Please see modify(), sync(), and the /// discussion of DualView semantics elsewhere in the - /// documentation. You are responsible for calling modify() and - /// sync(), if needed; this method doesn't do that. + /// documentation. + /// This method calls sync_host() before modifying + /// host data, and modify_host() afterwards. /// /// \param gblRow [in] Global row index of the entry to modify. /// This must be a valid global row index on the calling @@ -946,7 +949,7 @@ namespace Tpetra { sumIntoGlobalValue (const GlobalOrdinal gblRow, const size_t col, const T& val, - const bool atomic = useAtomicUpdatesByDefault) const + const bool atomic = useAtomicUpdatesByDefault) { sumIntoGlobalValue (gblRow, col, static_cast (val), atomic); } @@ -963,8 +966,9 @@ namespace Tpetra { /// must access the device View directly by calling /// getLocalView(). Please see modify(), sync(), and the /// discussion of DualView semantics elsewhere in the - /// documentation. You are responsible for calling modify() and - /// sync(), if needed; this method doesn't do that. + /// documentation. + /// This method calls sync_host() before modifying + /// host data, and modify_host() afterwards. /// /// This method does not have an "atomic" option like /// sumIntoLocalValue. This is deliberate. Replacement is not @@ -982,7 +986,7 @@ namespace Tpetra { void replaceLocalValue (const LocalOrdinal lclRow, const size_t col, - const impl_scalar_type& value) const; + const impl_scalar_type& value); /// \brief Like the above replaceLocalValue, but only enabled if /// T differs from impl_scalar_type. @@ -1000,8 +1004,9 @@ namespace Tpetra { /// must access the device View directly by calling /// getLocalView(). Please see modify(), sync(), and the /// discussion of DualView semantics elsewhere in the - /// documentation. You are responsible for calling modify() and - /// sync(), if needed; this method doesn't do that. + /// documentation. + /// This method calls sync_host() before modifying + /// host data, and modify_host() afterwards. /// /// This method does not have an "atomic" option like /// sumIntoLocalValue. This is deliberate. Replacement is not @@ -1020,7 +1025,7 @@ namespace Tpetra { typename std::enable_if::value && std::is_convertible::value, void>::type replaceLocalValue (const LocalOrdinal lclRow, const size_t col, - const T& val) const + const T& val) { replaceLocalValue (lclRow, col, static_cast (val)); } @@ -1037,8 +1042,9 @@ namespace Tpetra { /// must access the device View directly by calling /// getLocalView(). Please see modify(), sync(), and the /// discussion of DualView semantics elsewhere in the - /// documentation. You are responsible for calling modify() and - /// sync(), if needed; this method doesn't do that. + /// documentation. + /// This method calls sync_host() before modifying + /// host data, and modify_host() afterwards. /// /// \param lclRow [in] Local row index of the entry to modify. /// Must be a valid local index in this MultiVector's Map on the @@ -1052,7 +1058,7 @@ namespace Tpetra { sumIntoLocalValue (const LocalOrdinal lclRow, const size_t col, const impl_scalar_type& val, - const bool atomic = useAtomicUpdatesByDefault) const; + const bool atomic = useAtomicUpdatesByDefault); /// \brief Like the above sumIntoLocalValue, but only enabled if /// T differs from impl_scalar_type. @@ -1070,8 +1076,9 @@ namespace Tpetra { /// must access the device View directly by calling /// getLocalView(). Please see modify(), sync(), and the /// discussion of DualView semantics elsewhere in the - /// documentation. You are responsible for calling modify() and - /// sync(), if needed; this method doesn't do that. + /// documentation. + /// This method calls sync_host() before modifying + /// host data, and modify_host() afterwards. /// /// \param lclRow [in] Local row index of the entry to modify. /// \param col [in] Column index of the entry to modify. @@ -1084,7 +1091,7 @@ namespace Tpetra { sumIntoLocalValue (const LocalOrdinal lclRow, const size_t col, const T& val, - const bool atomic = useAtomicUpdatesByDefault) const + const bool atomic = useAtomicUpdatesByDefault) { sumIntoLocalValue (lclRow, col, static_cast (val), atomic); } diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 3269dc9e22f9..3d44c695cb73 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -2044,7 +2044,7 @@ namespace Tpetra { namespace { // (anonymous) template typename MultiVector::dot_type - multiVectorSingleColumnDot (MultiVector& x, + multiVectorSingleColumnDot (const MultiVector& x, const MultiVector& y) { using ::Tpetra::Details::ProfilingRegion; @@ -2070,14 +2070,9 @@ namespace Tpetra { dot_type gblDot = Kokkos::ArithTraits::zero (); // All non-unary kernels are executed on the device as per Tpetra policy. Sync to device if needed. - if (x.need_sync_device ()) { - x.sync_device (); - } - if (y.need_sync_device ()) { - const_cast(y).sync_device (); - } + const_cast(x).sync_device (); + const_cast(y).sync_device (); - x.modify_device (); auto x_2d = x.getLocalViewDevice (); auto x_1d = Kokkos::subview (x_2d, rowRng, 0); auto y_2d = y.getLocalViewDevice (); @@ -2139,7 +2134,7 @@ namespace Tpetra { numDots << " != this->getNumVectors() = " << numVecs << "."); if (numVecs == 1 && this->isConstantStride () && A.isConstantStride ()) { - const dot_type gblDot = multiVectorSingleColumnDot (const_cast (*this), A); + const dot_type gblDot = multiVectorSingleColumnDot (*this, A); dots[0] = gblDot; } else { @@ -4145,26 +4140,30 @@ namespace Tpetra { MultiVector:: replaceLocalValue (const LocalOrdinal lclRow, const size_t col, - const impl_scalar_type& ScalarValue) const + const impl_scalar_type& ScalarValue) { -#ifdef HAVE_TPETRA_DEBUG - const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex(); - const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex(); - TEUCHOS_TEST_FOR_EXCEPTION( - lclRow < minLocalIndex || lclRow > maxLocalIndex, - std::runtime_error, - "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow - << " is invalid. The range of valid row indices on this process " - << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex - << ", " << maxLocalIndex << "]."); - TEUCHOS_TEST_FOR_EXCEPTION( - vectorIndexOutOfRange(col), - std::runtime_error, - "Tpetra::MultiVector::replaceLocalValue: vector index " << col - << " of the multivector is invalid."); -#endif + if (::Tpetra::Details::Behavior::debug()) { + const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex(); + const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex(); + TEUCHOS_TEST_FOR_EXCEPTION( + lclRow < minLocalIndex || lclRow > maxLocalIndex, + std::runtime_error, + "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow + << " is invalid. The range of valid row indices on this process " + << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex + << ", " << maxLocalIndex << "]."); + TEUCHOS_TEST_FOR_EXCEPTION( + vectorIndexOutOfRange(col), + std::runtime_error, + "Tpetra::MultiVector::replaceLocalValue: vector index " << col + << " of the multivector is invalid."); + } + + this->sync_host(); + const size_t colInd = isConstantStride () ? col : whichVectors_[col]; view_.h_view (lclRow, colInd) = ScalarValue; + this->modify_host(); } @@ -4174,24 +4173,27 @@ namespace Tpetra { sumIntoLocalValue (const LocalOrdinal lclRow, const size_t col, const impl_scalar_type& value, - const bool atomic) const + const bool atomic) { -#ifdef HAVE_TPETRA_DEBUG - const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex(); - const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex(); - TEUCHOS_TEST_FOR_EXCEPTION( - lclRow < minLocalIndex || lclRow > maxLocalIndex, - std::runtime_error, - "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow - << " is invalid. The range of valid row indices on this process " - << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex - << ", " << maxLocalIndex << "]."); - TEUCHOS_TEST_FOR_EXCEPTION( - vectorIndexOutOfRange(col), - std::runtime_error, - "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col - << " of the multivector is invalid."); -#endif + if (::Tpetra::Details::Behavior::debug()) { + const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex(); + const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex(); + TEUCHOS_TEST_FOR_EXCEPTION( + lclRow < minLocalIndex || lclRow > maxLocalIndex, + std::runtime_error, + "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow + << " is invalid. The range of valid row indices on this process " + << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex + << ", " << maxLocalIndex << "]."); + TEUCHOS_TEST_FOR_EXCEPTION( + vectorIndexOutOfRange(col), + std::runtime_error, + "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col + << " of the multivector is invalid."); + } + + this->sync_host(); + const size_t colInd = isConstantStride () ? col : whichVectors_[col]; if (atomic) { Kokkos::atomic_add (& (view_.h_view(lclRow, colInd)), value); @@ -4199,6 +4201,7 @@ namespace Tpetra { else { view_.h_view (lclRow, colInd) += value; } + this->modify_host(); } @@ -4207,22 +4210,24 @@ namespace Tpetra { MultiVector:: replaceGlobalValue (const GlobalOrdinal gblRow, const size_t col, - const impl_scalar_type& ScalarValue) const + const impl_scalar_type& ScalarValue) { // mfh 23 Nov 2015: Use map_ and not getMap(), because the latter // touches the RCP's reference count, which isn't thread safe. const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow); -#ifdef HAVE_TPETRA_DEBUG - const char tfecfFuncName[] = "replaceGlobalValue: "; - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (lclRow == Teuchos::OrdinalTraits::invalid (), - std::runtime_error, - "Global row index " << gblRow << " is not present on this process " - << this->getMap ()->getComm ()->getRank () << "."); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (this->vectorIndexOutOfRange (col), std::runtime_error, - "Vector index " << col << " of the MultiVector is invalid."); -#endif // HAVE_TPETRA_DEBUG + + if (::Tpetra::Details::Behavior::debug()) { + const char tfecfFuncName[] = "replaceGlobalValue: "; + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (lclRow == Teuchos::OrdinalTraits::invalid (), + std::runtime_error, + "Global row index " << gblRow << " is not present on this process " + << this->getMap ()->getComm ()->getRank () << "."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (this->vectorIndexOutOfRange (col), std::runtime_error, + "Vector index " << col << " of the MultiVector is invalid."); + } + this->replaceLocalValue (lclRow, col, ScalarValue); } @@ -4232,24 +4237,26 @@ namespace Tpetra { sumIntoGlobalValue (const GlobalOrdinal globalRow, const size_t col, const impl_scalar_type& value, - const bool atomic) const + const bool atomic) { // mfh 23 Nov 2015: Use map_ and not getMap(), because the latter // touches the RCP's reference count, which isn't thread safe. const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow); -#ifdef HAVE_TEUCHOS_DEBUG - TEUCHOS_TEST_FOR_EXCEPTION( - lclRow == Teuchos::OrdinalTraits::invalid (), - std::runtime_error, - "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow - << " is not present on this process " - << this->getMap ()->getComm ()->getRank () << "."); - TEUCHOS_TEST_FOR_EXCEPTION( - vectorIndexOutOfRange(col), - std::runtime_error, - "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col - << " of the multivector is invalid."); -#endif + + if (::Tpetra::Details::Behavior::debug()) { + TEUCHOS_TEST_FOR_EXCEPTION( + lclRow == Teuchos::OrdinalTraits::invalid (), + std::runtime_error, + "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow + << " is not present on this process " + << this->getMap ()->getComm ()->getRank () << "."); + TEUCHOS_TEST_FOR_EXCEPTION( + vectorIndexOutOfRange(col), + std::runtime_error, + "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col + << " of the multivector is invalid."); + } + this->sumIntoLocalValue (lclRow, col, value, atomic); } diff --git a/packages/tpetra/core/src/Tpetra_Vector_decl.hpp b/packages/tpetra/core/src/Tpetra_Vector_decl.hpp index 387670736cbc..7116c0455c90 100644 --- a/packages/tpetra/core/src/Tpetra_Vector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_Vector_decl.hpp @@ -262,7 +262,7 @@ class Vector : public MultiVector //! Replace current value at the specified location with specified value. /** \pre \c globalRow must be a valid global element on this node, according to the row map. */ - void replaceGlobalValue (const GlobalOrdinal globalRow, const Scalar& value) const; + void replaceGlobalValue (const GlobalOrdinal globalRow, const Scalar& value); /// \brief Add value to existing value, using global (row) index. /// @@ -286,12 +286,12 @@ class Vector : public MultiVector void sumIntoGlobalValue (const GlobalOrdinal globalRow, const Scalar& value, - const bool atomic = base_type::useAtomicUpdatesByDefault) const; + const bool atomic = base_type::useAtomicUpdatesByDefault); //! Replace current value at the specified location with specified values. /** \pre \c localRow must be a valid local element on this node, according to the row map. */ - void replaceLocalValue (const LocalOrdinal myRow, const Scalar& value) const; + void replaceLocalValue (const LocalOrdinal myRow, const Scalar& value); /// \brief Add \c value to existing value, using local (row) index. /// @@ -313,7 +313,7 @@ class Vector : public MultiVector void sumIntoLocalValue (const LocalOrdinal myRow, const Scalar& value, - const bool atomic = base_type::useAtomicUpdatesByDefault) const; + const bool atomic = base_type::useAtomicUpdatesByDefault); //@} diff --git a/packages/tpetra/core/src/Tpetra_Vector_def.hpp b/packages/tpetra/core/src/Tpetra_Vector_def.hpp index 18a0c3bd16a0..90a235f1a3aa 100644 --- a/packages/tpetra/core/src/Tpetra_Vector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Vector_def.hpp @@ -118,7 +118,7 @@ namespace Tpetra { template void Vector:: - replaceGlobalValue (const GlobalOrdinal globalRow, const Scalar& value) const { + replaceGlobalValue (const GlobalOrdinal globalRow, const Scalar& value) { this->base_type::replaceGlobalValue (globalRow, 0, value); } @@ -127,7 +127,7 @@ namespace Tpetra { Vector:: sumIntoGlobalValue (const GlobalOrdinal globalRow, const Scalar& value, - const bool atomic) const + const bool atomic) { this->base_type::sumIntoGlobalValue (globalRow, 0, value, atomic); } @@ -135,7 +135,7 @@ namespace Tpetra { template void Vector:: - replaceLocalValue (const LocalOrdinal myRow, const Scalar& value) const { + replaceLocalValue (const LocalOrdinal myRow, const Scalar& value) { this->base_type::replaceLocalValue (myRow, 0, value); } @@ -144,7 +144,7 @@ namespace Tpetra { Vector:: sumIntoLocalValue (const LocalOrdinal globalRow, const Scalar& value, - const bool atomic) const + const bool atomic) { this->base_type::sumIntoLocalValue (globalRow, 0, value, atomic); } diff --git a/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp b/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp index 5f0eeb0a1a1a..4a0617779b13 100644 --- a/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp +++ b/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp @@ -1749,7 +1749,7 @@ namespace { mvec2.norm1(norms2()); std::fill(ans.begin(), ans.end(), M0); TEST_COMPARE_FLOATING_ARRAYS(norms1,ans,M0); - TEST_COMPARE_FLOATING_ARRAYS(norms1,ans,M0); + TEST_COMPARE_FLOATING_ARRAYS(norms2,ans,M0); // replace local entries s.t. // mvec1 = [1 1] and mvec2 = [0 0] // [0 0] [1 1] @@ -2705,6 +2705,8 @@ namespace { //// TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, ScaleAndAssign, LO , GO , Scalar , Node ) { + std::cerr << std::endl; + typedef typename ScalarTraits::magnitudeType Mag; typedef Tpetra::MultiVector MV; typedef Tpetra::Vector V; @@ -2735,12 +2737,13 @@ namespace { // Also, ensure that other vectors aren't changed out << "Create A, and fill with random numbers" << endl; - MV A (map, numVectors, false); - A.randomize (); + MV A(map, numVectors, false); + A.randomize(); + A.sync_host(); out << "Stash away norms of columns of A" << endl; Array Anrms(numVectors); - A.norm1 (Anrms ()); + A.norm1(Anrms()); out << "Test B := A*2, using different methods" << endl; // set B = A * 2, using different techniques @@ -2769,8 +2772,8 @@ namespace { std::ostringstream os; os << ">>> Proc " << comm->getSize (); os << ": A.modified_host: " << (A.need_sync_device ()?1:0); - os << ", A.modified_device: " << (A.need_sync_host ()?1:0); - os << ": B.modified_host: " << (B.need_sync_device ()?1:0); + os << ", A.modified_device: " << (A.need_sync_host ()?1:0); + os << ", B.modified_host: " << (B.need_sync_device ()?1:0); os << ", B.modified_device: " << (B.need_sync_host ()?1:0); os << std::endl; std::cerr << os.str ();