From 847a14e05cd9775f556f444f8a87021810c7d2d3 Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Wed, 22 Apr 2020 07:58:42 -0600 Subject: [PATCH 01/86] Set TEST_0 PASS_REGULAR_EXPRESSION and untangle test-block and overall args I noticed this while looking over this failing test on CDash. I should update the TriBITS documentation to advise people not to set FINAL_PASS_REGULAR_EXPRESSION but instead determine pass/fail at the TEST_ block level. --- packages/shylu/shylu_node/hts/test/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/shylu/shylu_node/hts/test/CMakeLists.txt b/packages/shylu/shylu_node/hts/test/CMakeLists.txt index 956ed73a32cc..87e391cda041 100644 --- a/packages/shylu/shylu_node/hts/test/CMakeLists.txt +++ b/packages/shylu/shylu_node/hts/test/CMakeLists.txt @@ -5,8 +5,8 @@ TRIBITS_ADD_EXECUTABLE( TRIBITS_ADD_ADVANCED_TEST( hts_test_1 TEST_0 EXEC hts_test NOEXEPREFIX - ENVIRONMENT OMP_NUM_THREADS=2 - FINAL_PASS_REGULAR_EXPRESSION Passed + NUM_MPI_PROCS 1 + NUM_TOTAL_CORES_USED 2 + PASS_REGULAR_EXPRESSION Passed COMM serial mpi - OVERALL_NUM_MPI_PROCS 1 - OVERALL_NUM_TOTAL_CORES_USED 2) + ENVIRONMENT OMP_NUM_THREADS=2) From 9434e14de21151d05a1d03b29aeb6503775d9d63 Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 17 Apr 2020 18:00:55 -0600 Subject: [PATCH 02/86] Intrepid2: fix view rank in TensorBasis to avoid error in debug builds --- .../Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp | 2 +- .../Basis/Intrepid2_TensorBasis.hpp | 60 +++++++++---------- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp index f2c9d6dad54f..e106be57a034 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp @@ -290,7 +290,7 @@ namespace Intrepid2 op3 = Intrepid2::OPERATOR_VALUE; // family 3 goes in the z component; 0 in the x and y components - auto outputValuesComponent_xy = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),std::make_pair(0,1)); + auto outputValuesComponent_xy = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),std::make_pair(0,2)); auto outputValuesComponent_z = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),2); // 0 in x and y components diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_TensorBasis.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_TensorBasis.hpp index f19c03ceacc8..beeed4b6b5a6 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_TensorBasis.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_TensorBasis.hpp @@ -929,7 +929,7 @@ namespace Intrepid2 if (!tensorPoints_) { - if ((input1_.rank() == 2) && (input2_.rank() == 2)) + if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2)) { Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) { for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++) @@ -970,7 +970,7 @@ namespace Intrepid2 { for (int d=0; d Date: Fri, 17 Apr 2020 18:07:26 -0600 Subject: [PATCH 03/86] Intrepid2: added methods getName() and requiresOrientation() --- .../Intrepid2_DerivedBasis_HCURL_HEX.hpp | 2 +- .../Intrepid2_DerivedBasis_HCURL_QUAD.hpp | 2 +- .../Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp | 2 +- .../Intrepid2_DerivedBasis_HDIV_QUAD.hpp | 2 +- .../Intrepid2_DerivedBasis_HGRAD_HEX.hpp | 2 +- .../Intrepid2_DerivedBasis_HGRAD_QUAD.hpp | 2 +- .../Basis/Intrepid2_DerivedBasis_HVOL_HEX.hpp | 22 ++++++++++++++- .../Intrepid2_DerivedBasis_HVOL_QUAD.hpp | 28 +++++++++++++++++-- ...id2_IntegratedLegendreBasis_HGRAD_LINE.hpp | 14 ++++++++++ ...pid2_IntegratedLegendreBasis_HGRAD_TET.hpp | 6 ++++ ...pid2_IntegratedLegendreBasis_HGRAD_TRI.hpp | 6 ++++ 11 files changed, 78 insertions(+), 10 deletions(-) diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HCURL_HEX.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HCURL_HEX.hpp index d014ee3d49a9..a43cd9ba706d 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HCURL_HEX.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HCURL_HEX.hpp @@ -424,7 +424,7 @@ namespace Intrepid2 /** \brief True if orientation is required */ virtual bool requireOrientation() const { - return true; + return (this->getDofCount(1,0) > 0); //if it has edge DOFs, than it needs orientations } }; } // end namespace Intrepid2 diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HCURL_QUAD.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HCURL_QUAD.hpp index 1c987643c513..c4baef109b8e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HCURL_QUAD.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HCURL_QUAD.hpp @@ -257,7 +257,7 @@ namespace Intrepid2 /** \brief True if orientation is required */ virtual bool requireOrientation() const { - return true; + return (this->getDofCount(1,0) > 0); //if it has edge DOFs, than it needs orientations } }; diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp index e106be57a034..91720bf11dab 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_HEX.hpp @@ -388,7 +388,7 @@ namespace Intrepid2 /** \brief True if orientation is required */ virtual bool requireOrientation() const { - return true; + return (this->getDofCount(2,0) > 0); //if it has side DOFs, than it needs orientations } }; } // end namespace Intrepid2 diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_QUAD.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_QUAD.hpp index f1c26926f363..571c36b940ed 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_QUAD.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HDIV_QUAD.hpp @@ -251,7 +251,7 @@ namespace Intrepid2 /** \brief True if orientation is required */ virtual bool requireOrientation() const { - return true; + return (this->getDofCount(1,0) > 0); //if it has side DOFs, than it needs orientations } }; } // end namespace Intrepid2 diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HGRAD_HEX.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HGRAD_HEX.hpp index b8027656fa83..601ac5f17b4e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HGRAD_HEX.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HGRAD_HEX.hpp @@ -113,7 +113,7 @@ namespace Intrepid2 /** \brief True if orientation is required */ virtual bool requireOrientation() const override { - return (this->getDegree() > 2); + return (this->getDofCount(1,0) > 1); //if it has more than 1 DOF per edge, than it needs orientations } using Basis::getValues; diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HGRAD_QUAD.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HGRAD_QUAD.hpp index c79f66bcb88e..0ed945b8a2aa 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HGRAD_QUAD.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HGRAD_QUAD.hpp @@ -101,7 +101,7 @@ namespace Intrepid2 /** \brief True if orientation is required */ virtual bool requireOrientation() const override { - return (this->getDegree() > 2); + return (this->getDofCount(1,0) > 1); //if it has more than 1 DOF per edge, than it needs orientations } using Basis::getValues; diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HVOL_HEX.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HVOL_HEX.hpp index ec9224f91782..4cab3668f837 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HVOL_HEX.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HVOL_HEX.hpp @@ -70,6 +70,10 @@ namespace Intrepid2 // TODO: make this a subclass of TensorBasis3 instead, following what we've done for H(curl) and H(div) { public: + using ExecutionSpace = typename HVOL_LINE::ExecutionSpace; + using OutputValueType = typename HVOL_LINE::OutputValueType; + using PointValueType = typename HVOL_LINE::PointValueType; + using OutputViewType = typename HVOL_LINE::OutputViewType; using PointViewType = typename HVOL_LINE::PointViewType ; using ScalarViewType = typename HVOL_LINE::ScalarViewType; @@ -77,7 +81,7 @@ namespace Intrepid2 using LineBasis = HVOL_LINE; using QuadBasis = Intrepid2::Basis_Derived_HVOL_QUAD; using TensorBasis = Basis_TensorBasis; - public: + /** \brief Constructor. \param [in] polyOrder_x - the polynomial order in the x dimension. \param [in] polyOrder_y - the polynomial order in the y dimension. @@ -96,6 +100,22 @@ namespace Intrepid2 */ Basis_Derived_HVOL_HEX(int polyOrder) : Basis_Derived_HVOL_HEX(polyOrder, polyOrder, polyOrder) {} + /** \brief Returns basis name + + \return the name of the basis + */ + virtual + const char* + getName() const { + return "Intrepid2_DerivedBasis_HVOL_HEX"; + } + + /** \brief True if orientation is required + */ + virtual bool requireOrientation() const { + return false; + } + using TensorBasis::getValues; /** \brief multi-component getValues() method (required/called by TensorBasis) diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HVOL_QUAD.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HVOL_QUAD.hpp index d5af42d33ee5..4d3752b7e9d7 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HVOL_QUAD.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_DerivedBasis_HVOL_QUAD.hpp @@ -66,13 +66,19 @@ namespace Intrepid2 : public Basis_TensorBasis { + using LineBasis = HVOL_LINE; + using TensorBasis = Basis_TensorBasis; + + public: + + using ExecutionSpace = typename HVOL_LINE::ExecutionSpace; + using OutputValueType = typename HVOL_LINE::OutputValueType; + using PointValueType = typename HVOL_LINE::PointValueType; + using OutputViewType = typename HVOL_LINE::OutputViewType; using PointViewType = typename HVOL_LINE::PointViewType ; using ScalarViewType = typename HVOL_LINE::ScalarViewType; - using LineBasis = HVOL_LINE; - using TensorBasis = Basis_TensorBasis; - public: /** \brief Constructor. \param [in] polyOrder_x - the polynomial order in the x dimension. \param [in] polyOrder_y - the polynomial order in the y dimension. @@ -90,6 +96,22 @@ namespace Intrepid2 */ Basis_Derived_HVOL_QUAD(int polyOrder) : Basis_Derived_HVOL_QUAD(polyOrder,polyOrder) {} + /** \brief Returns basis name + + \return the name of the basis + */ + virtual + const char* + getName() const { + return "Intrepid2_DerivedBasis_HVOL_QUAD"; + } + + /** \brief True if orientation is required + */ + virtual bool requireOrientation() const { + return false; + } + using TensorBasis::getValues; /** \brief multi-component getValues() method (required/called by TensorBasis) diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_LINE.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_LINE.hpp index 2e9f68f4304f..0f89c0359a05 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_LINE.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_LINE.hpp @@ -338,6 +338,20 @@ namespace Intrepid2 } } + /** \brief Returns basis name + + \return the name of the basis + */ + const char* getName() const override { + return "Intrepid2_IntegratedLegendreBasis_HGRAD_LINE"; + } + + /** \brief True if orientation is required + */ + virtual bool requireOrientation() const override { + return false; + } + // since the getValues() below only overrides the FEM variant, we specify that // we use the base class's getValues(), which implements the FVD variant by throwing an exception. // (It's an error to use the FVD variant on this basis.) diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp index 255c587ada40..3070cb76786f 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp @@ -810,6 +810,12 @@ namespace Intrepid2 return "Intrepid2_IntegratedLegendreBasis_HGRAD_TET"; } + /** \brief True if orientation is required + */ + virtual bool requireOrientation() const override { + return (this->getDegree() > 2); + } + // since the getValues() below only overrides the FEM variant, we specify that // we use the base class's getValues(), which implements the FVD variant by throwing an exception. // (It's an error to use the FVD variant on this basis.) diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp index a6923ce624d7..f142e81e3670 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp @@ -522,6 +522,12 @@ namespace Intrepid2 return "Intrepid2_IntegratedLegendreBasis_HGRAD_TRI"; } + /** \brief True if orientation is required + */ + virtual bool requireOrientation() const override { + return (this->getDegree() > 2); + } + // since the getValues() below only overrides the FEM variant, we specify that // we use the base class's getValues(), which implements the FVD variant by throwing an exception. // (It's an error to use the FVD variant on this basis.) From 8569bf4cf0f4706c03ebf9005b659e3a4ffd4340 Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 17 Apr 2020 18:09:02 -0600 Subject: [PATCH 04/86] Intrepid2: Use the correct execution spaces rather than the default ones --- .../Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp | 6 +++--- .../Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp index 3070cb76786f..5a0cb823815b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TET.hpp @@ -68,13 +68,13 @@ namespace Intrepid2 class OutputFieldType, class InputPointsType> struct Hierarchical_HGRAD_TET_Functor { - using ScratchSpace = Kokkos::DefaultExecutionSpace::scratch_memory_space; + using ScratchSpace = typename ExecutionSpace::scratch_memory_space; using OutputScratchView = Kokkos::View>; using OutputScratchView2D = Kokkos::View>; using PointScratchView = Kokkos::View>; - using TeamPolicy = Kokkos::TeamPolicy<>; - using TeamMember = TeamPolicy::member_type; + using TeamPolicy = Kokkos::TeamPolicy; + using TeamMember = typename TeamPolicy::member_type; EOperator opType_; diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp index f142e81e3670..1f6aee61f4d2 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_IntegratedLegendreBasis_HGRAD_TRI.hpp @@ -68,12 +68,12 @@ namespace Intrepid2 class OutputFieldType, class InputPointsType> struct Hierarchical_HGRAD_TRI_Functor { - using ScratchSpace = Kokkos::DefaultExecutionSpace::scratch_memory_space; + using ScratchSpace = typename ExecutionSpace::scratch_memory_space; using OutputScratchView = Kokkos::View>; using PointScratchView = Kokkos::View>; - using TeamPolicy = Kokkos::TeamPolicy<>; - using TeamMember = TeamPolicy::member_type; + using TeamPolicy = Kokkos::TeamPolicy; + using TeamMember = typename TeamPolicy::member_type; EOperator opType_; From e19e43b9673a590bedece42edf4db8f62f4ee31c Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 17 Apr 2020 18:11:13 -0600 Subject: [PATCH 05/86] Intrepid2: Remove DerivedNodalBasisFamilyModified Use DerivedNodalBasisFamily instead --- .../Basis/Intrepid2_NodalBasisFamily.hpp | 45 +------------------ ...ntrepid2_OrientationToolsDefMatrixData.hpp | 24 +++++----- 2 files changed, 13 insertions(+), 56 deletions(-) diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_NodalBasisFamily.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_NodalBasisFamily.hpp index d12ce335641a..780e29ca8afa 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_NodalBasisFamily.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_NodalBasisFamily.hpp @@ -42,7 +42,7 @@ // @HEADER /** \file Intrepid2_NodalBasisFamily.hpp - \brief Stateless class that acts as a factory for two families of nodal bases (hypercube topologies only at this point). DerivedNodalBasisFamilyModified should match existing high-order bases in Intrepid2, while NodalBasisFamily is templated on H(vol) and H(grad) bases in a way that is more consistent with the literature and the hierarchical basis family in Intrepid2. Once we support all standard topologies, we expect to replace the existing high-order nodal basis implementations in Intrepid2 with those from DerivedNodalBasisFamily. + \brief Stateless class that acts as a factory for a family of nodal bases (hypercube topologies only at this point). NodalBasisFamily is templated on H(vol) and H(grad) bases in a way that is more consistent with the literature and the hierarchical basis family in Intrepid2. Once we support all standard topologies, we expect to replace the existing high-order nodal basis implementations in Intrepid2 with those from DerivedNodalBasisFamily. \author Created by N.V. Roberts. */ @@ -141,49 +141,6 @@ namespace Intrepid2 { using HVOL_TET = Basis_HVOL_TET_Cn_FEM; }; - - /** \class Intrepid2::DerivedBasisFamilyModified - \brief A family of nodal basis functions that should match, modulo basis numbering, the Lagrangian basis family that Intrepid2 has historically supported. - - For compatibility with the bases in NodalBasisFamily, in this basis H(div) and H(curl) are defined in terms of HGRAD(n) x HGRAD(n-1), etc., instead of HGRAD x HVOL. - - These bases should match those in NodalBasisFamily, modulo basis numbering (which should be resolvable via getDofCoords()). - - At present, only hypercube topologies (line, quadrilateral, hexahedron) are supported, but other topologies will be supported in the future. - */ - template - class DerivedBasisFamilyModified - { - public: - using ExecutionSpace = typename LineBasisHGRAD::ExecutionSpace; - using OutputValueType = typename LineBasisHGRAD::OutputValueType; - using PointValueType = typename LineBasisHGRAD::PointValueType; - - using BasisType = Basis; - using BasisPtr = Teuchos::RCP; - - // line bases - using HGRAD_LINE = LineBasisHGRAD; - using HVOL_LINE = LineBasisHVOL; - - // quadrilateral bases - using HGRAD_QUAD = Basis_Derived_HGRAD_QUAD; - using HCURL_QUAD = Basis_Derived_HCURL_QUAD; - using HDIV_QUAD = Basis_Derived_HDIV_QUAD ; - using HVOL_QUAD = Basis_Derived_HVOL_QUAD ; - - // hexahedron bases - using HGRAD_HEX = Basis_Derived_HGRAD_HEX; - using HCURL_HEX = Basis_Derived_HCURL_HEX; - using HDIV_HEX = Basis_Derived_HDIV_HEX ; - using HVOL_HEX = Basis_Derived_HVOL_HEX ; - }; - - template - using DerivedNodalBasisFamilyModified = DerivedBasisFamilyModified< Basis_HGRAD_LINE_Cn_FEM, - Basis_HVOL_LINE_Cn_FEM >; } #endif /* Intrepid2_NodalBasisFamily_h */ diff --git a/packages/intrepid2/src/Orientation/Intrepid2_OrientationToolsDefMatrixData.hpp b/packages/intrepid2/src/Orientation/Intrepid2_OrientationToolsDefMatrixData.hpp index 244bc5f13e7c..c8e17fd43f79 100644 --- a/packages/intrepid2/src/Orientation/Intrepid2_OrientationToolsDefMatrixData.hpp +++ b/packages/intrepid2/src/Orientation/Intrepid2_OrientationToolsDefMatrixData.hpp @@ -91,8 +91,8 @@ namespace Intrepid2 { if(dynamic_cast::HGRAD_QUAD*>(basis)) { typename NodalBasisFamily::HGRAD_QUAD hostBasis(order); init_HGRAD_QUAD(matData, &hostBasis); - } else if(dynamic_cast::HGRAD_QUAD*>(basis)) { - typename DerivedNodalBasisFamilyModified::HGRAD_QUAD hostBasis(order); + } else if(dynamic_cast::HGRAD_QUAD*>(basis)) { + typename DerivedNodalBasisFamily::HGRAD_QUAD hostBasis(order); init_HGRAD_QUAD(matData, &hostBasis); } else if(dynamic_cast::HGRAD_QUAD*>(basis)) { typename HierarchicalBasisFamily::HGRAD_QUAD hostBasis(order); @@ -116,8 +116,8 @@ namespace Intrepid2 { if(dynamic_cast::HGRAD_HEX*>(basis)) { typename NodalBasisFamily::HGRAD_HEX hostBasis(order); init_HGRAD_HEX(matData, &hostBasis); - } else if(dynamic_cast::HGRAD_HEX*>(basis)) { - typename DerivedNodalBasisFamilyModified::HGRAD_HEX hostBasis(order); + } else if(dynamic_cast::HGRAD_HEX*>(basis)) { + typename DerivedNodalBasisFamily::HGRAD_HEX hostBasis(order); init_HGRAD_HEX(matData, &hostBasis); } else if(dynamic_cast::HGRAD_HEX*>(basis)) { typename HierarchicalBasisFamily::HGRAD_HEX hostBasis(order); @@ -180,8 +180,8 @@ namespace Intrepid2 { } else if(dynamic_cast::HCURL_QUAD*>(basis)) { typename NodalBasisFamily::HCURL_QUAD hostBasis(order); init_HCURL_QUAD(matData, &hostBasis); - } else if(dynamic_cast::HCURL_QUAD*>(basis)) { - typename DerivedNodalBasisFamilyModified::HCURL_QUAD hostBasis(order); + } else if(dynamic_cast::HCURL_QUAD*>(basis)) { + typename DerivedNodalBasisFamily::HCURL_QUAD hostBasis(order); init_HCURL_QUAD(matData, &hostBasis); } else if(dynamic_cast::HCURL_QUAD*>(basis)) { typename HierarchicalBasisFamily::HCURL_QUAD hostBasis(order); @@ -204,8 +204,8 @@ namespace Intrepid2 { } else if(dynamic_cast::HCURL_HEX*>(basis)) { typename NodalBasisFamily::HCURL_HEX hostBasis(order); init_HCURL_HEX(matData, &hostBasis); - } else if(dynamic_cast::HCURL_HEX*>(basis)) { - typename DerivedNodalBasisFamilyModified::HCURL_HEX hostBasis(order); + } else if(dynamic_cast::HCURL_HEX*>(basis)) { + typename DerivedNodalBasisFamily::HCURL_HEX hostBasis(order); init_HCURL_HEX(matData, &hostBasis); } else if(dynamic_cast::HCURL_HEX*>(basis)) { typename HierarchicalBasisFamily::HCURL_HEX hostBasis(order); @@ -267,8 +267,8 @@ namespace Intrepid2 { } else if(dynamic_cast::HDIV_QUAD*>(basis)) { typename NodalBasisFamily::HDIV_QUAD hostBasis(order); init_HDIV_QUAD(matData, &hostBasis); - } else if(dynamic_cast::HDIV_QUAD*>(basis)) { - typename DerivedNodalBasisFamilyModified::HDIV_QUAD hostBasis(order); + } else if(dynamic_cast::HDIV_QUAD*>(basis)) { + typename DerivedNodalBasisFamily::HDIV_QUAD hostBasis(order); init_HDIV_QUAD(matData, &hostBasis); } else if(dynamic_cast::HDIV_QUAD*>(basis)) { typename HierarchicalBasisFamily::HDIV_QUAD hostBasis(order); @@ -290,8 +290,8 @@ namespace Intrepid2 { } else if(dynamic_cast::HDIV_HEX*>(basis)) { typename NodalBasisFamily::HDIV_HEX hostBasis(order); init_HDIV_HEX(matData, &hostBasis); - } else if(dynamic_cast::HDIV_HEX*>(basis)) { - typename DerivedNodalBasisFamilyModified::HDIV_HEX hostBasis(order); + } else if(dynamic_cast::HDIV_HEX*>(basis)) { + typename DerivedNodalBasisFamily::HDIV_HEX hostBasis(order); init_HDIV_HEX(matData, &hostBasis); } else if(dynamic_cast::HDIV_HEX*>(basis)) { typename HierarchicalBasisFamily::HDIV_HEX hostBasis(order); From 85d10311575ddb0c0b28740f4959df2570eb2227 Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 17 Apr 2020 18:14:55 -0600 Subject: [PATCH 06/86] Intrepid2: enabling execution of mapToReferenceSubcell on device minor fix. --- .../src/Cell/Intrepid2_CellTools.hpp | 60 +++++++++++++------ .../Intrepid2_CellToolsDefParametrization.hpp | 5 +- .../Cell/Intrepid2_CellToolsDefRefToPhys.hpp | 41 +++++++++---- 3 files changed, 74 insertions(+), 32 deletions(-) diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp index ac44f987fa9c..df879bbe4a9b 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp @@ -154,6 +154,8 @@ namespace Intrepid2 { return r_val; } + typedef Kokkos::DynRankView subcellParamViewType; + private: /** \brief Generates default HGrad basis based on cell topology @@ -250,7 +252,6 @@ namespace Intrepid2 { /** \struct Intrepid2::CellTools::SubcellParamData \brief Parametrization coefficients of edges and faces of reference cells */ - typedef Kokkos::DynRankView subcellParamViewType; struct SubcellParamData { subcellParamViewType dummy; subcellParamViewType lineEdges; // edge maps for 2d non-standard cells; shell line and beam @@ -265,6 +266,24 @@ namespace Intrepid2 { static bool isSubcellParametrizationSet_; + + /** \brief Sets orientation-preserving parametrizations of reference edges and faces of cell + topologies with reference cells. Used to populate Intrepid2::CellTools::SubcellParamData. + + See Intrepid2::CellTools::setSubcellParametrization and Section \ref sec_cell_topology_subcell_map + more information about parametrization maps. + + \param subcellParam [out] - array with the coefficients of the parametrization map + \param subcellDim [in] - dimension of the subcells being parametrized (1 or 2) + \param parentCell [in] - topology of the parent cell owning the subcells. + */ + static void + setSubcellParametrization( subcellParamViewType &subcellParam, + const ordinal_type subcellDim, + const shards::CellTopology parentCell ); + + public: + /** \brief Defines orientation-preserving parametrizations of reference edges and faces of cell topologies with reference cells. @@ -301,23 +320,6 @@ namespace Intrepid2 { */ static void setSubcellParametrization(); - /** \brief Sets orientation-preserving parametrizations of reference edges and faces of cell - topologies with reference cells. Used to populate Intrepid2::CellTools::SubcellParamData. - - See Intrepid2::CellTools::setSubcellParametrization and Section \ref sec_cell_topology_subcell_map - more information about parametrization maps. - - \param subcellParametrization [out] - array with the coefficients of the parametrization map - \param subcellDim [in] - dimension of the subcells being parametrized (1 or 2) - \param parentCell [in] - topology of the parent cell owning the subcells. - */ - static void - setSubcellParametrization( subcellParamViewType &subcellParam, - const ordinal_type subcellDim, - const shards::CellTopology parentCell ); - - public: - /** \brief Default constructor. */ CellTools() = default; @@ -1141,6 +1143,28 @@ namespace Intrepid2 { const shards::CellTopology parentCell ); + /** + \brief Overload of mapToReferenceSubcell that runs on device. + + \param refSubcellPoints [out] - rank-2 (P,D1) array with images of parameter space points + \param paramPoints [in] - rank-2 (P,D2) array with points in 1D or 2D parameter domain + \param subcellMap [in] - array with the coefficients of the subcell parametrization map + \param subcellDim [in] - dimension of the subcell where points are mapped to + \param subcellOrd [in] - subcell ordinal + \param parentCellDim [in] - dimension of the parent cell. + */ + template + static void + KOKKOS_INLINE_FUNCTION + mapToReferenceSubcell( Kokkos::DynRankView refSubcellPoints, + const Kokkos::DynRankView paramPoints, + const subcellParamViewType subcellMap, + const ordinal_type subcellDim, + const ordinal_type subcellOrd, + const ordinal_type parentCellDim); + + //============================================================================================// // // // Physical-to-reference frame mapping and its inverse // diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefParametrization.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefParametrization.hpp index 8329186f2ab8..97a01eaa40de 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefParametrization.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefParametrization.hpp @@ -67,6 +67,9 @@ namespace Intrepid2 { void CellTools:: setSubcellParametrization() { + if(isSubcellParametrizationSet_) + return; + { const auto tet = shards::CellTopology(shards::getCellTopologyData >()); setSubcellParametrization( subcellParamData_.tetFaces, 2, tet ); @@ -119,7 +122,7 @@ namespace Intrepid2 { subcellParamData_.wedgeFaces = subcellParamViewType(); }); - isReferenceNodeDataSet_ = true; + isSubcellParametrizationSet_= true; } // template diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefRefToPhys.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefRefToPhys.hpp index 40754d9ff059..9765446620f5 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefRefToPhys.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefRefToPhys.hpp @@ -223,20 +223,39 @@ namespace Intrepid2 { ">>> ERROR (Intrepid2::CellTools::mapToReferenceSubcell): refSubcellPoints dimension (0) does not match to paramPoints dimension(0)."); #endif + if(!isSubcellParametrizationSet_) + setSubcellParametrization(); - const ordinal_type cellDim = parentCell.getDimension(); - const ordinal_type numPts = paramPoints.extent(0); + INTREPID2_TEST_FOR_EXCEPTION( subcellDim != 1 && + subcellDim != 2, std::invalid_argument, + ">>> ERROR (Intrepid2::CellTools::mapToReferenceSubcell): method defined only for 1 and 2-subcells"); - // Get the subcell map, i.e., the coefficients of the parametrization function for the subcell - // can i get this map from devices ? + // Get the subcell map, i.e., the coefficients of the parametrization function for the subcell subcellParamViewType subcellMap; getSubcellParametrization( subcellMap, subcellDim, parentCell ); - // subcell parameterization should be small computation (numPts is small) and it should be decorated with - // kokkos inline... let's not do this yet + // Apply the parametrization map to every point in parameter domain + mapToReferenceSubcell( refSubcellPoints, paramPoints, subcellMap, subcellDim, subcellOrd, parentCell.getDimension()); + } + + + template + template + void + KOKKOS_INLINE_FUNCTION + CellTools:: + mapToReferenceSubcell( Kokkos::DynRankView refSubcellPoints, + const Kokkos::DynRankView paramPoints, + const subcellParamViewType subcellMap, + const ordinal_type subcellDim, + const ordinal_type subcellOrd, + const ordinal_type parentCellDim ) { + + const ordinal_type numPts = paramPoints.extent(0); // Apply the parametrization map to every point in parameter domain switch (subcellDim) { @@ -246,7 +265,7 @@ namespace Intrepid2 { const auto v = paramPoints(pt, 1); // map_dim(u,v) = c_0(dim) + c_1(dim)*u + c_2(dim)*v because both Quad and Tri ref faces are affine! - for (ordinal_type i=0;i>> ERROR (Intrepid2::CellTools::mapToReferenceSubcell): method defined only for 1 and 2-subcells"); - } + default: {} } } } From 46b2a5af5171537c56eda676444a8febebbf57de Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 17 Apr 2020 18:16:58 -0600 Subject: [PATCH 07/86] Intrepid2: minor fixes to avoid warnings --- .../unit-test/Discretization/Basis/BasisEquivalenceTests.cpp | 4 ++-- .../Basis/HierarchicalBases/AnalyticPolynomialsMatchTests.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/BasisEquivalenceTests.cpp b/packages/intrepid2/unit-test/Discretization/Basis/BasisEquivalenceTests.cpp index 5d1959fe702a..342c676df5a0 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/BasisEquivalenceTests.cpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/BasisEquivalenceTests.cpp @@ -108,7 +108,7 @@ namespace // since A is SPD, col/row major has no effect on the data // but B's data may be transposed relative to what LAPACK expects (column-major order) // so we allocate our own storage for B to make sure of the ordering - double B[N*M]; + std::vector B(N*M); for (int j=0; j Date: Fri, 17 Apr 2020 18:20:27 -0600 Subject: [PATCH 08/86] Intrepid2: Improve parallelization of Projection functions - use QR factorization provided in KokkosKernels instead of Lapack - use parallel_for over cells - cleaning/renaming code for improving readability - note: this adds required dependency of Intrepid2 on KokkosKernels --- packages/intrepid2/cmake/Dependencies.cmake | 2 +- .../Intrepid2_LagrangianInterpolationDef.hpp | 3 +- .../Projection/Intrepid2_ProjectionStruct.hpp | 215 ++- .../Intrepid2_ProjectionStructDef.hpp | 260 ++-- .../Projection/Intrepid2_ProjectionTools.hpp | 112 +- .../Intrepid2_ProjectionToolsDefHCURL.hpp | 1164 ++++++++++------- .../Intrepid2_ProjectionToolsDefHDIV.hpp | 674 +++++----- .../Intrepid2_ProjectionToolsDefHGRAD.hpp | 834 +++++++----- .../Intrepid2_ProjectionToolsDefHVOL.hpp | 133 +- .../Intrepid2_ProjectionToolsDefL2.hpp | 875 +++++++------ 10 files changed, 2491 insertions(+), 1781 deletions(-) diff --git a/packages/intrepid2/cmake/Dependencies.cmake b/packages/intrepid2/cmake/Dependencies.cmake index 732f0ec074e0..91ac866a6fe9 100644 --- a/packages/intrepid2/cmake/Dependencies.cmake +++ b/packages/intrepid2/cmake/Dependencies.cmake @@ -1,4 +1,4 @@ -SET(LIB_REQUIRED_DEP_PACKAGES TeuchosCore TeuchosNumerics Shards KokkosCore KokkosContainers KokkosAlgorithms) +SET(LIB_REQUIRED_DEP_PACKAGES TeuchosCore TeuchosNumerics Shards KokkosCore KokkosContainers KokkosAlgorithms KokkosKernels) SET(LIB_OPTIONAL_DEP_PACKAGES Sacado) SET(TEST_REQUIRED_DEP_PACKAGES) SET(TEST_OPTIONAL_DEP_PACKAGES Sacado) diff --git a/packages/intrepid2/src/Projection/Intrepid2_LagrangianInterpolationDef.hpp b/packages/intrepid2/src/Projection/Intrepid2_LagrangianInterpolationDef.hpp index df0387ac8467..7c9c7f903cef 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_LagrangianInterpolationDef.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_LagrangianInterpolationDef.hpp @@ -345,8 +345,7 @@ LagrangianInterpolation::getDofCoordsAndCoeffs( "method not implemented for this basis function"); } - auto tagToOrdinal = Kokkos::create_mirror_view(typename SpT::memory_space(), basis->getAllDofOrdinal()); - Kokkos::deep_copy(tagToOrdinal, basis->getAllDofOrdinal()); + auto tagToOrdinal = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(), basis->getAllDofOrdinal()); const ordinal_type dim = topo.getDimension(); diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionStruct.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionStruct.hpp index c49dbbaa9772..9f2991007d2e 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionStruct.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionStruct.hpp @@ -76,16 +76,30 @@ namespace Experimental { The created class is then used with the Projection Tools. See ProjectionTools class for more info. */ + +ordinal_type +KOKKOS_INLINE_FUNCTION +range_size(const Kokkos::pair& range) { + return range.second - range.first; +} + template class ProjectionStruct { public: + enum EvalPointsType {BASIS, TARGET}; + typedef Kokkos::pair range_type; typedef typename Kokkos::Impl::is_space::host_mirror_space::execution_space host_space_type; typedef Kokkos::DynRankView view_type; - typedef std::array,4> range_tag; - typedef std::array,4> view_tag; - typedef std::array,4> key_tag; + typedef Kokkos::View range_tag; + static constexpr int numberSubCellDims = 4; //{0 for vertex, 1 for edges, 2 for faces, 3 for volumes} + //max of numVertices, numEdges, numFaces for a reference cell. + //12 is the number of edges in a Hexahderon. + //We'll need to change this if we consider generic polyhedra + static constexpr int maxSubCellsCount = 12; + typedef std::array, numberSubCellDims> view_tag; + typedef Kokkos::View key_tag; /** \brief Returns number of basis evaluation points */ @@ -111,44 +125,30 @@ class ProjectionStruct { return numTargetDerivEvalPoints; } - /** \brief Returns number of basis evaluation points on a subcell - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology - - \return the number of basis evaluation points on the selected subcell - */ - ordinal_type getNumBasisEvalPoints(const ordinal_type subCellDim, const ordinal_type subCellId) { - return basisCubPoints[subCellDim][subCellId].extent(0); - } - - /** \brief Returns number of evaluation points for basis derivatives on a subcell - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology - - \return the number of basis derivatives evaluation points on the selected subcell - */ - ordinal_type getNumBasisDerivEvalPoints(const ordinal_type subCellDim, const ordinal_type subCellId) { - return basisDerivCubPoints[subCellDim][subCellId].extent(0); - } + /** \brief Returns the maximum number of derivative evaluation points across all the subcells + \param evalPointType [in] - enum selecting whether the points should be computed for the basis + functions or for the target function - /** \brief Returns number of points where to evaluate the target function on a subcell - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology - - \return the number of target evaluation points on the selected subcell + \return the maximum number of the derivative evaluation points across all the subcells */ - ordinal_type getNumTargetEvalPoints(const ordinal_type subCellDim, const ordinal_type subCellId) { - return targetCubPoints[subCellDim][subCellId].extent(0); + ordinal_type getMaxNumDerivPoints(const EvalPointsType type) const { + if(type == BASIS) + return maxNumBasisDerivEvalPoints; + else + return maxNumTargetDerivEvalPoints; } - /** \brief Returns number of points where to evaluate the derivatives of the target function on a subcell - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology + /** \brief Returns the maximum number of evaluation points across all the subcells + \param evalPointType [in] - enum selecting whether the points should be computed for the basis + functions or for the target function - \return the number of target derivatives evaluation points on the selected subcell + \return the maximum number of the evaluation points across all the subcells */ - ordinal_type getNumTargetDerivEvalPoints(const ordinal_type subCellDim, const ordinal_type subCellId) { - return targetDerivCubPoints[subCellDim][subCellId].extent(0); + ordinal_type getMaxNumEvalPoints(const EvalPointsType type) const { + if(type == BASIS) + return maxNumBasisEvalPoints; + else + return maxNumTargetEvalPoints; } /** \brief Returns the basis evaluation points on a subcell @@ -167,6 +167,7 @@ class ProjectionStruct { return basisCubPoints[subCellDim][subCellId]; } + /** \brief Returns the evaluation points for basis derivatives on a subcell \code @@ -174,8 +175,8 @@ class ProjectionStruct { D - spatial dimension \endcode - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology + \param subCellDim [in] - dimension of the subcell + \param subCellId [in] - ordinal of the subcell defined by cell topology \return a rank-2 view (P,D) containing the basis derivatives evaluation points on the selected subcell */ @@ -183,6 +184,7 @@ class ProjectionStruct { return basisDerivCubPoints[subCellDim][subCellId]; } + /** \brief Returns the points where to evaluate the target function on a subcell \code @@ -199,6 +201,7 @@ class ProjectionStruct { return targetCubPoints[subCellDim][subCellId]; } + /** \brief Returns the points where to evaluate the derivatives of the target function on a subcell \code @@ -215,6 +218,51 @@ class ProjectionStruct { return targetDerivCubPoints[subCellDim][subCellId]; } + + /** \brief Returns the basis/target evaluation points on a subcell + + \code + P - num. evaluation points + D - spatial dimension + \endcode + + \param subCellDim [in] - dimension of the subcell + \param subCellId [in] - ordinal of the subcell defined by cell topology + \param evalPointType [in] - enum selecting whether the points should be computed for the basis + functions or for the target function + + \return a rank-2 view (P,D) containing the basis/target function evaluation points on the selected subcell + */ + view_type getEvalPoints(const ordinal_type subCellDim, const ordinal_type subCellId, EvalPointsType type) const{ + if(type == BASIS) + return basisCubPoints[subCellDim][subCellId]; + else + return targetCubPoints[subCellDim][subCellId]; + } + + /** \brief Returns the evaluation points for basis/target derivatives on a subcell + + \code + P - num. evaluation points + D - spatial dimension + \endcode + + \param subCellDim [in] - dimension of the subcell + \param subCellId [in] - ordinal of the subcell defined by cell topology + \param evalPointType [in] - enum selecting whether the points should be computed for the basis + functions or for the target function + + \return a rank-2 view (P,D) containing the basis/target derivatives evaluation points on the selected subcell + */ + view_type getDerivEvalPoints(const ordinal_type subCellDim, const ordinal_type subCellId, EvalPointsType type) const{ + if(type == BASIS) + return basisDerivCubPoints[subCellDim][subCellId]; + else + return targetDerivCubPoints[subCellDim][subCellId]; + } + + + /** \brief Returns the basis evaluation weights on a subcell \code @@ -230,6 +278,7 @@ class ProjectionStruct { return basisCubWeights[subCellDim][subCellId]; } + /** \brief Returns the basis derivatives evaluation weights on a subcell \code @@ -245,6 +294,7 @@ class ProjectionStruct { return basisDerivCubWeights[subCellDim][subCellId]; } + /** \brief Returns the function evaluation weights on a subcell \code @@ -260,6 +310,7 @@ class ProjectionStruct { return targetCubWeights[subCellDim][subCellId]; } + /** \brief Returns the function derivatives evaluation weights on a subcell \code @@ -275,56 +326,79 @@ class ProjectionStruct { return targetDerivCubWeights[subCellDim][subCellId]; } - /** \brief Returns the range of the basis evaluation points corresponding to a subcell - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology - \return the range of the basis evaluation points corresponding to the selected subcell + /** \brief Returns the range tag of the basis evaluation points subcells + + \return the range tag of the basis evaluation points on subcells */ - range_type getBasisPointsRange(const ordinal_type subCellDim, const ordinal_type subCellId) { - return basisPointsRange[subCellDim][subCellId]; + const range_tag getBasisPointsRange() const { + return basisPointsRange; } - /** \brief Returns the range of the basis derivative evaluation points corresponding to a subcell - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology - \return the range of the basis derivative evaluation points corresponding to the selected subcell + /** \brief Returns the range tag of the basis/target evaluation points in subcells + \param evalPointType [in] - enum selecting whether the points should be computed for the basis + functions or for the target function + \return the range tag of the basis/target evaluation points on subcells */ - range_type getBasisDerivPointsRange(const ordinal_type subCellDim, const ordinal_type subCellId) { - return basisDerivPointsRange[subCellDim][subCellId]; + const range_tag getPointsRange(const EvalPointsType type) const { + if(type == BASIS) + return basisPointsRange; + else + return targetPointsRange; } - /** \brief Returns the range of the target function evaluation points corresponding to a subcell - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology - \return the range of the target function evaluation points corresponding to the selected subcell + /** \brief Returns the range tag of the derivative evaluation points on subcell + + \return the range tag of the basis derivative evaluation points corresponding on subcell */ - range_type getTargetPointsRange(const ordinal_type subCellDim, const ordinal_type subCellId) { - return targetPointsRange[subCellDim][subCellId]; + const range_tag getBasisDerivPointsRange() const { + return basisDerivPointsRange; } - /** \brief Returns the range of the target function derivative evaluation points corresponding to a subcell - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology + /** \brief Returns the range tag of the basis/target derivative evaluation points on subcells + \param evalPointType [in] - enum selecting whether the points should be computed for the basis + functions or for the target function - \return the range of the target function derivative evaluation points corresponding to the selected subcell + \return the range tag of the basis/target derivative evaluation points on subcells */ - range_type getTargetDerivPointsRange(const ordinal_type subCellDim, const ordinal_type subCellId) { - return targetDerivPointsRange[subCellDim][subCellId]; + const range_tag getDerivPointsRange(const EvalPointsType type) const { + if(type == BASIS) + return basisDerivPointsRange; + else + return targetDerivPointsRange; + } + + + /** \brief Returns the range of the target function evaluation points on subcells + + \return the range of the target function evaluation points corresponding on subcells + */ + const range_tag getTargetPointsRange() const { + return targetPointsRange; } - /** \brief Returns the key of a subcell topology - \param subCellDim [in] - dimension of the subcell - \param subCellId [in] - ordinal of the subcell defined by cell topology - \return the key of the selected subcell + /** \brief Returns the range tag of the target function derivative evaluation points on subcells + + \return the range of the target function derivative evaluation points corresponding on subcells */ - unsigned getTopologyKey(const ordinal_type subCellDim, const ordinal_type subCellId) { - return subCellTopologyKey[subCellDim][subCellId]; + const range_tag getTargetDerivPointsRange() const { + return targetDerivPointsRange; } + /** \brief Returns the key tag for subcells + + \return the key tag of the selected subcells + */ + const key_tag getTopologyKey() const { + return subCellTopologyKey; + } + + + + /** \brief Initialize the ProjectionStruct for L2 projections \param cellBasis [in] - basis functions for the projection \param targetCubDegree [in] - degree of the cubature needed to integrate the target function @@ -333,6 +407,7 @@ class ProjectionStruct { void createL2ProjectionStruct(const BasisPtrType cellBasis, const ordinal_type targetCubDegree); + /** \brief Initialize the ProjectionStruct for HGRAD projections \param cellBasis [in] - HGRAD basis functions for the projection \param targetCubDegree [in] - degree of the cubature needed to integrate the target function @@ -343,6 +418,7 @@ class ProjectionStruct { const ordinal_type targetCubDegree, const ordinal_type targetGradCubDegre); + /** \brief Initialize the ProjectionStruct for HCURL projections \param cellBasis [in] - HCURL basis functions for the projection \param targetCubDegree [in] - degree of the cubature needed to integrate the target function @@ -353,6 +429,7 @@ class ProjectionStruct { const ordinal_type targetCubDegree, const ordinal_type targetCurlCubDegre); + /** \brief Initialize the ProjectionStruct for HDIV projections \param cellBasis [in] - HDIV basis functions for the projection \param targetCubDegree [in] - degree of the cubature needed to integrate the target function @@ -388,6 +465,10 @@ class ProjectionStruct { ordinal_type numBasisDerivEvalPoints; ordinal_type numTargetEvalPoints; ordinal_type numTargetDerivEvalPoints; + ordinal_type maxNumBasisEvalPoints; + ordinal_type maxNumTargetEvalPoints; + ordinal_type maxNumBasisDerivEvalPoints; + ordinal_type maxNumTargetDerivEvalPoints; }; } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionStructDef.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionStructDef.hpp index a2e751e00bb8..25dba9696671 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionStructDef.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionStructDef.hpp @@ -83,18 +83,32 @@ void ProjectionStruct::createL2ProjectionStruct(const BasisPtrTyp ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount() : 0; ordinal_type numEdges = (cellBasis->getDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; + ordinal_type hasCellDofs = (cellBasis->getDofCount(dim, 0) > 0); + + INTREPID2_TEST_FOR_ABORT( (numVertices > maxSubCellsCount) || (numFaces > maxSubCellsCount) || (numEdges > maxSubCellsCount), + ">>> ERROR (Intrepid2::ProjectionStruct:createHDivProjectionStruct, Projections do not support a cell topology with this cub cells count"); + + numBasisEvalPoints += numVertices; numTargetEvalPoints += numVertices; - view_type dofCoords("dofCoords", cellBasis->getCardinality(), dim); + view_type coord("vertex_coord", dim); + + basisPointsRange = range_tag("basisPointsRange", 4,maxSubCellsCount); + targetPointsRange = range_tag("targetPointsRange", 4,maxSubCellsCount); + basisDerivPointsRange = range_tag("basisDerivPointsRange", 4,maxSubCellsCount); + targetDerivPointsRange = range_tag("targetDerivPointsRange", numberSubCellDims,maxSubCellsCount); + subCellTopologyKey = key_tag("subCellTopologyKey",numberSubCellDims,maxSubCellsCount); + + maxNumBasisEvalPoints = numVertices; maxNumTargetEvalPoints = numVertices; for(ordinal_type iv=0; ivgetDofOrdinal(0, iv, 0); + CellTools::getReferenceVertex(coord, cellTopo, iv); for(ordinal_type d=0; d::createL2ProjectionStruct(const BasisPtrTyp DefaultCubatureFactory cub_factory; for(ordinal_type ie=0; iegetBaseCellTopology().getKey(edgeDim, ie); + subCellTopologyKey(edgeDim,ie) = cellBasis->getBaseCellTopology().getKey(edgeDim, ie); auto edgeBasisCub = cub_factory.create(cellBasis->getBaseCellTopology().getKey(edgeDim, ie), cub_degree); - basisPointsRange[edgeDim][ie] = range_type(numBasisEvalPoints, numBasisEvalPoints+edgeBasisCub->getNumPoints()); + basisPointsRange(edgeDim,ie) = range_type(numBasisEvalPoints, numBasisEvalPoints+edgeBasisCub->getNumPoints()); numBasisEvalPoints += edgeBasisCub->getNumPoints(); + maxNumBasisEvalPoints = std::max(maxNumBasisEvalPoints, edgeBasisCub->getNumPoints()); basisCubPoints[edgeDim][ie] = view_type("basisCubPoints",edgeBasisCub->getNumPoints(),edgeDim); basisCubWeights[edgeDim][ie] = view_type("basisCubWeights",edgeBasisCub->getNumPoints()); edgeBasisCub->getCubature(basisCubPoints[edgeDim][ie], basisCubWeights[edgeDim][ie]); cub_degree = edgeBasisCubDegree + targetCubDegree; auto edgeTargetCub = cub_factory.create(cellBasis->getBaseCellTopology().getKey(edgeDim, ie), cub_degree); - targetPointsRange[edgeDim][ie] = range_type(numTargetEvalPoints, numTargetEvalPoints+edgeTargetCub->getNumPoints()); + targetPointsRange(edgeDim,ie) = range_type(numTargetEvalPoints, numTargetEvalPoints+edgeTargetCub->getNumPoints()); numTargetEvalPoints += edgeTargetCub->getNumPoints(); + maxNumTargetEvalPoints = std::max(maxNumTargetEvalPoints, edgeTargetCub->getNumPoints()); targetCubPoints[edgeDim][ie] = view_type("targetCubPoints",edgeTargetCub->getNumPoints(),edgeDim); targetCubWeights[edgeDim][ie] = view_type("targetCubWeights",edgeTargetCub->getNumPoints()); edgeTargetCub->getCubature(targetCubPoints[edgeDim][ie], targetCubWeights[edgeDim][ie]); @@ -135,36 +151,40 @@ void ProjectionStruct::createL2ProjectionStruct(const BasisPtrTyp for(ordinal_type iface=0; ifacegetBaseCellTopology().getKey(faceDim, iface); - auto faceBasisCub = cub_factory.create(subCellTopologyKey[faceDim][iface], cub_degree); - basisPointsRange[faceDim][iface] = range_type(numBasisEvalPoints, numBasisEvalPoints+faceBasisCub->getNumPoints()); + subCellTopologyKey(faceDim,iface) = cellBasis->getBaseCellTopology().getKey(faceDim, iface); + auto faceBasisCub = cub_factory.create(subCellTopologyKey(faceDim,iface), cub_degree); + basisPointsRange(faceDim,iface) = range_type(numBasisEvalPoints, numBasisEvalPoints+faceBasisCub->getNumPoints()); numBasisEvalPoints += faceBasisCub->getNumPoints(); + maxNumBasisEvalPoints = std::max(maxNumBasisEvalPoints, faceBasisCub->getNumPoints()); basisCubPoints[faceDim][iface] = view_type("basisCubPoints",faceBasisCub->getNumPoints(),faceDim); basisCubWeights[faceDim][iface] = view_type("basisCubWeights",faceBasisCub->getNumPoints()); faceBasisCub->getCubature(basisCubPoints[faceDim][iface], basisCubWeights[faceDim][iface]); cub_degree = faceBasisCubDegree + targetCubDegree; - auto faceTargetCub = cub_factory.create(subCellTopologyKey[faceDim][iface], cub_degree); - targetPointsRange[faceDim][iface] = range_type(numTargetEvalPoints, numTargetEvalPoints+faceTargetCub->getNumPoints()); + auto faceTargetCub = cub_factory.create(subCellTopologyKey(faceDim,iface), cub_degree); + targetPointsRange(faceDim,iface) = range_type(numTargetEvalPoints, numTargetEvalPoints+faceTargetCub->getNumPoints()); numTargetEvalPoints += faceTargetCub->getNumPoints(); + maxNumTargetEvalPoints = std::max(maxNumTargetEvalPoints, faceTargetCub->getNumPoints()); targetCubPoints[faceDim][iface] = view_type("targetCubPoints",faceTargetCub->getNumPoints(),faceDim); targetCubWeights[faceDim][iface] = view_type("targetCubWeights",faceTargetCub->getNumPoints()); faceTargetCub->getCubature(targetCubPoints[faceDim][iface], targetCubWeights[faceDim][iface]); } - subCellTopologyKey[dim][0] = cellBasis->getBaseCellTopology().getBaseKey(); - if(cellBasis->getDofCount(dim,0)>0) { + subCellTopologyKey(dim,0) = cellBasis->getBaseCellTopology().getBaseKey(); + if(hasCellDofs) { ordinal_type cub_degree = 2*basisCubDegree; - auto elemBasisCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - basisPointsRange[dim][0] = range_type(numBasisEvalPoints, numBasisEvalPoints+elemBasisCub->getNumPoints()); + auto elemBasisCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + basisPointsRange(dim,0) = range_type(numBasisEvalPoints, numBasisEvalPoints+elemBasisCub->getNumPoints()); numBasisEvalPoints += elemBasisCub->getNumPoints(); + maxNumBasisEvalPoints = std::max(maxNumBasisEvalPoints, elemBasisCub->getNumPoints()); basisCubPoints[dim][0] = view_type("basisCubPoints",elemBasisCub->getNumPoints(),dim); basisCubWeights[dim][0] = view_type("basisCubWeights",elemBasisCub->getNumPoints()); elemBasisCub->getCubature(basisCubPoints[dim][0], basisCubWeights[dim][0]); cub_degree = basisCubDegree + targetCubDegree; - auto elemTargetCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - targetPointsRange[dim][0] = range_type(numTargetEvalPoints, numTargetEvalPoints+elemTargetCub->getNumPoints()); + auto elemTargetCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + targetPointsRange(dim,0) = range_type(numTargetEvalPoints, numTargetEvalPoints+elemTargetCub->getNumPoints()); numTargetEvalPoints += elemTargetCub->getNumPoints(); + maxNumTargetEvalPoints = std::max(maxNumTargetEvalPoints, elemTargetCub->getNumPoints()); targetCubPoints[dim][0] = view_type("targetCubPoints",elemTargetCub->getNumPoints(),dim); targetCubWeights[dim][0] = view_type("targetCubWeights",elemTargetCub->getNumPoints()); elemTargetCub->getCubature(targetCubPoints[dim][0], targetCubWeights[dim][0]); @@ -194,36 +214,53 @@ void ProjectionStruct::createHGradProjectionStruct(const BasisPtr ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount(): 0; ordinal_type numEdges = (cellBasis->getDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; + INTREPID2_TEST_FOR_ABORT( (numFaces > maxSubCellsCount) || (numEdges > maxSubCellsCount), + ">>> ERROR (Intrepid2::ProjectionStruct:createHDivProjectionStruct, Projections do not support a cell topology with this cub cells count"); + + + ordinal_type hasCellDofs = (cellBasis->getDofCount(dim, 0) > 0); + + maxNumBasisEvalPoints = numVertices; maxNumTargetEvalPoints = numVertices; + maxNumBasisDerivEvalPoints = 0; maxNumTargetDerivEvalPoints = 0; + + basisPointsRange = range_tag("basisPointsRange", numberSubCellDims,maxSubCellsCount); + targetPointsRange = range_tag("targetPointsRange", numberSubCellDims,maxSubCellsCount); + basisDerivPointsRange = range_tag("basisDerivPointsRange", numberSubCellDims,maxSubCellsCount); + targetDerivPointsRange = range_tag("targetDerivPointsRange", numberSubCellDims,maxSubCellsCount); + subCellTopologyKey = key_tag("subCellTopologyKey",numberSubCellDims,maxSubCellsCount); + numBasisEvalPoints += numVertices; numTargetEvalPoints += numVertices; - view_type dofCoords("dofCoords", cellBasis->getCardinality(), dim); + view_type coord("vertex_coord", dim); for(ordinal_type iv=0; ivgetDofOrdinal(0, iv, 0); + CellTools::getReferenceVertex(coord, cellTopo, iv); for(ordinal_type d=0; dgetBaseCellTopology().getKey(edgeDim, ie); + subCellTopologyKey(edgeDim,ie) = cellBasis->getBaseCellTopology().getKey(edgeDim, ie); auto edgeBasisCub = cub_factory.create(cellBasis->getBaseCellTopology().getKey(edgeDim, ie), cub_degree); - basisDerivPointsRange[edgeDim][ie] = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+edgeBasisCub->getNumPoints()); + basisDerivPointsRange(edgeDim,ie) = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+edgeBasisCub->getNumPoints()); numBasisDerivEvalPoints += edgeBasisCub->getNumPoints(); + maxNumBasisDerivEvalPoints = std::max(maxNumBasisDerivEvalPoints, edgeBasisCub->getNumPoints()); basisDerivCubPoints[edgeDim][ie] = view_type("basisDerivCubPoints",edgeBasisCub->getNumPoints(),edgeDim); basisDerivCubWeights[edgeDim][ie] = view_type("basisDerivCubWeights",edgeBasisCub->getNumPoints()); edgeBasisCub->getCubature(basisDerivCubPoints[edgeDim][ie], basisDerivCubWeights[edgeDim][ie]); cub_degree = edgeBasisCubDegree + targetGradCubDegree; auto edgeTargetCub = cub_factory.create(cellBasis->getBaseCellTopology().getKey(edgeDim, ie), cub_degree); - targetDerivPointsRange[edgeDim][ie] = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+edgeTargetCub->getNumPoints()); + targetDerivPointsRange(edgeDim,ie) = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+edgeTargetCub->getNumPoints()); numTargetDerivEvalPoints += edgeTargetCub->getNumPoints(); + maxNumTargetDerivEvalPoints = std::max(maxNumTargetDerivEvalPoints, edgeTargetCub->getNumPoints()); targetDerivCubPoints[edgeDim][ie] = view_type("targetDerivCubPoints",edgeTargetCub->getNumPoints(),edgeDim); targetDerivCubWeights[edgeDim][ie] = view_type("targetDerivCubWeights",edgeTargetCub->getNumPoints()); edgeTargetCub->getCubature(targetDerivCubPoints[edgeDim][ie], targetDerivCubWeights[edgeDim][ie]); @@ -231,36 +268,40 @@ void ProjectionStruct::createHGradProjectionStruct(const BasisPtr for(ordinal_type iface=0; ifacegetBaseCellTopology().getKey(faceDim, iface); - auto faceBasisGradCub = cub_factory.create(subCellTopologyKey[faceDim][iface], cub_degree); - basisDerivPointsRange[faceDim][iface] = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+faceBasisGradCub->getNumPoints()); + subCellTopologyKey(faceDim,iface) = cellBasis->getBaseCellTopology().getKey(faceDim, iface); + auto faceBasisGradCub = cub_factory.create(subCellTopologyKey(faceDim,iface), cub_degree); + basisDerivPointsRange(faceDim,iface) = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+faceBasisGradCub->getNumPoints()); numBasisDerivEvalPoints += faceBasisGradCub->getNumPoints(); + maxNumBasisDerivEvalPoints = std::max(maxNumBasisDerivEvalPoints, faceBasisGradCub->getNumPoints()); basisDerivCubPoints[faceDim][iface] = view_type("basisDerivCubPoints",faceBasisGradCub->getNumPoints(),faceDim); basisDerivCubWeights[faceDim][iface] = view_type("basisDerivCubWeights",faceBasisGradCub->getNumPoints()); faceBasisGradCub->getCubature(basisDerivCubPoints[faceDim][iface], basisDerivCubWeights[faceDim][iface]); cub_degree = faceBasisCubDegree + targetGradCubDegree; - auto faceTargetDerivCub = cub_factory.create(subCellTopologyKey[faceDim][iface], cub_degree); - targetDerivPointsRange[faceDim][iface] = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+faceTargetDerivCub->getNumPoints()); + auto faceTargetDerivCub = cub_factory.create(subCellTopologyKey(faceDim,iface), cub_degree); + targetDerivPointsRange(faceDim,iface) = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+faceTargetDerivCub->getNumPoints()); numTargetDerivEvalPoints += faceTargetDerivCub->getNumPoints(); + maxNumTargetDerivEvalPoints = std::max(maxNumTargetDerivEvalPoints, faceTargetDerivCub->getNumPoints()); targetDerivCubPoints[faceDim][iface] = view_type("targetDerivCubPoints",faceTargetDerivCub->getNumPoints(),faceDim); targetDerivCubWeights[faceDim][iface] = view_type("targetDerivCubWeights",faceTargetDerivCub->getNumPoints()); faceTargetDerivCub->getCubature(targetDerivCubPoints[faceDim][iface], targetDerivCubWeights[faceDim][iface]); } - subCellTopologyKey[dim][0] = cellBasis->getBaseCellTopology().getBaseKey(); - if(cellBasis->getDofCount(dim,0)>0) { + subCellTopologyKey(dim,0) = cellBasis->getBaseCellTopology().getBaseKey(); + if(hasCellDofs) { ordinal_type cub_degree = 2*basisCubDegree; - auto elemBasisGradCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - basisDerivPointsRange[dim][0] = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+elemBasisGradCub->getNumPoints()); + auto elemBasisGradCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + basisDerivPointsRange(dim,0) = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+elemBasisGradCub->getNumPoints()); numBasisDerivEvalPoints += elemBasisGradCub->getNumPoints(); + maxNumBasisDerivEvalPoints = std::max(maxNumBasisDerivEvalPoints, elemBasisGradCub->getNumPoints()); basisDerivCubPoints[dim][0] = view_type("basisDerivCubPoints",elemBasisGradCub->getNumPoints(),dim); basisDerivCubWeights[dim][0] = view_type("basisDerivCubWeights",elemBasisGradCub->getNumPoints()); elemBasisGradCub->getCubature(basisDerivCubPoints[dim][0], basisDerivCubWeights[dim][0]); cub_degree = basisCubDegree + targetGradCubDegree; - auto elemTargetGradCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - targetDerivPointsRange[dim][0] = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+elemTargetGradCub->getNumPoints()); + auto elemTargetGradCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + targetDerivPointsRange(dim,0) = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+elemTargetGradCub->getNumPoints()); numTargetDerivEvalPoints += elemTargetGradCub->getNumPoints(); + maxNumTargetDerivEvalPoints = std::max(maxNumTargetDerivEvalPoints, elemTargetGradCub->getNumPoints()); targetDerivCubPoints[dim][0] = view_type("targetDerivCubPoints",elemTargetGradCub->getNumPoints(),dim); targetDerivCubWeights[dim][0] = view_type("targetDerivCubWeights",elemTargetGradCub->getNumPoints()); elemTargetGradCub->getCubature(targetDerivCubPoints[dim][0], targetDerivCubWeights[dim][0]); @@ -287,22 +328,34 @@ void ProjectionStruct::createHCurlProjectionStruct(const BasisPtr ordinal_type faceBasisCubDegree = basisCubDegree; ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount() : 0; ordinal_type numEdges = (cellBasis->getDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; + ordinal_type hasCellDofs = (cellBasis->getDofCount(dim, 0) > 0); + + maxNumBasisEvalPoints = 0; maxNumTargetEvalPoints = 0; + maxNumBasisDerivEvalPoints = 0; maxNumTargetDerivEvalPoints = 0; + + basisPointsRange = range_tag("basisPointsRange", numberSubCellDims,maxSubCellsCount); + targetPointsRange = range_tag("targetPointsRange", numberSubCellDims,maxSubCellsCount); + basisDerivPointsRange = range_tag("basisDerivPointsRange", numberSubCellDims,maxSubCellsCount); + targetDerivPointsRange = range_tag("targetDerivPointsRange", numberSubCellDims,maxSubCellsCount); + subCellTopologyKey = key_tag("subCellTopologyKey",numberSubCellDims,maxSubCellsCount); DefaultCubatureFactory cub_factory; for(ordinal_type ie=0; iegetBaseCellTopology().getKey(edgeDim, ie); - auto edgeBasisCub = cub_factory.create(subCellTopologyKey[edgeDim][ie], cub_degree); - basisPointsRange[edgeDim][ie] = range_type(numBasisEvalPoints, numBasisEvalPoints+edgeBasisCub->getNumPoints()); + subCellTopologyKey(edgeDim,ie) = cellBasis->getBaseCellTopology().getKey(edgeDim, ie); + auto edgeBasisCub = cub_factory.create(subCellTopologyKey(edgeDim,ie), cub_degree); + basisPointsRange(edgeDim,ie) = range_type(numBasisEvalPoints, numBasisEvalPoints+edgeBasisCub->getNumPoints()); numBasisEvalPoints += edgeBasisCub->getNumPoints(); + maxNumBasisEvalPoints = std::max(maxNumBasisEvalPoints, edgeBasisCub->getNumPoints()); basisCubPoints[edgeDim][ie] = view_type("basisCubPoints",edgeBasisCub->getNumPoints(),edgeDim); basisCubWeights[edgeDim][ie] = view_type("basisCubWeights",edgeBasisCub->getNumPoints()); edgeBasisCub->getCubature(basisCubPoints[edgeDim][ie], basisCubWeights[edgeDim][ie]); cub_degree = edgeBasisCubDegree + targetCubDegree; - auto edgeTargetCub = cub_factory.create(subCellTopologyKey[edgeDim][ie], cub_degree); - targetPointsRange[edgeDim][ie] = range_type(numTargetEvalPoints, numTargetEvalPoints+edgeTargetCub->getNumPoints()); + auto edgeTargetCub = cub_factory.create(subCellTopologyKey(edgeDim,ie), cub_degree); + targetPointsRange(edgeDim,ie) = range_type(numTargetEvalPoints, numTargetEvalPoints+edgeTargetCub->getNumPoints()); numTargetEvalPoints += edgeTargetCub->getNumPoints(); + maxNumTargetEvalPoints = std::max(maxNumTargetEvalPoints, edgeTargetCub->getNumPoints()); targetCubPoints[edgeDim][ie] = view_type("targetCubPoints",edgeTargetCub->getNumPoints(),edgeDim); targetCubWeights[edgeDim][ie] = view_type("targetCubWeights",edgeTargetCub->getNumPoints()); edgeTargetCub->getCubature(targetCubPoints[edgeDim][ie], targetCubWeights[edgeDim][ie]); @@ -310,70 +363,78 @@ void ProjectionStruct::createHCurlProjectionStruct(const BasisPtr for(ordinal_type iface=0; ifacegetBaseCellTopology().getKey(faceDim, iface); - auto faceBasisCub = cub_factory.create(subCellTopologyKey[faceDim][iface], cub_degree); - basisPointsRange[faceDim][iface] = range_type(numBasisEvalPoints, numBasisEvalPoints+faceBasisCub->getNumPoints()); + subCellTopologyKey(faceDim,iface) = cellBasis->getBaseCellTopology().getKey(faceDim, iface); + auto faceBasisCub = cub_factory.create(subCellTopologyKey(faceDim,iface), cub_degree); + basisPointsRange(faceDim,iface) = range_type(numBasisEvalPoints, numBasisEvalPoints+faceBasisCub->getNumPoints()); numBasisEvalPoints += faceBasisCub->getNumPoints(); + maxNumBasisEvalPoints = std::max(maxNumBasisEvalPoints, faceBasisCub->getNumPoints()); basisCubPoints[faceDim][iface] = view_type("basisCubPoints",faceBasisCub->getNumPoints(),faceDim); basisCubWeights[faceDim][iface] = view_type("basisCubWeights",faceBasisCub->getNumPoints()); faceBasisCub->getCubature(basisCubPoints[faceDim][iface], basisCubWeights[faceDim][iface]); - basisDerivPointsRange[faceDim][iface] = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+faceBasisCub->getNumPoints()); + auto faceBasisDerivCub = cub_factory.create(subCellTopologyKey(faceDim,iface), cub_degree); + basisDerivPointsRange(faceDim,iface) = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+faceBasisCub->getNumPoints()); numBasisDerivEvalPoints += faceBasisCub->getNumPoints(); + maxNumBasisDerivEvalPoints = std::max(maxNumBasisDerivEvalPoints, faceBasisCub->getNumPoints()); basisDerivCubPoints[faceDim][iface] = view_type("basisDerivCubPoints",faceBasisCub->getNumPoints(),faceDim); basisDerivCubWeights[faceDim][iface] = view_type("basisDerivCubWeights",faceBasisCub->getNumPoints()); faceBasisCub->getCubature(basisDerivCubPoints[faceDim][iface], basisDerivCubWeights[faceDim][iface]); cub_degree = faceBasisCubDegree + targetCubDegree; - auto faceTargetCub = cub_factory.create(subCellTopologyKey[faceDim][iface], cub_degree); - targetPointsRange[faceDim][iface] = range_type(numTargetEvalPoints, numTargetEvalPoints+faceTargetCub->getNumPoints()); + auto faceTargetCub = cub_factory.create(subCellTopologyKey(faceDim,iface), cub_degree); + targetPointsRange(faceDim,iface) = range_type(numTargetEvalPoints, numTargetEvalPoints+faceTargetCub->getNumPoints()); numTargetEvalPoints += faceTargetCub->getNumPoints(); + maxNumTargetEvalPoints = std::max(maxNumTargetEvalPoints, faceTargetCub->getNumPoints()); targetCubPoints[faceDim][iface] = view_type("targetCubPoints",faceTargetCub->getNumPoints(),faceDim); targetCubWeights[faceDim][iface] = view_type("targetCubWeights",faceTargetCub->getNumPoints()); faceTargetCub->getCubature(targetCubPoints[faceDim][iface], targetCubWeights[faceDim][iface]); cub_degree = faceBasisCubDegree + targetCurlCubDegre; - auto faceTargetDerivCub = cub_factory.create(subCellTopologyKey[faceDim][iface], cub_degree); - targetDerivPointsRange[faceDim][iface] = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+faceTargetDerivCub->getNumPoints()); + auto faceTargetDerivCub = cub_factory.create(subCellTopologyKey(faceDim,iface), cub_degree); + targetDerivPointsRange(faceDim,iface) = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+faceTargetDerivCub->getNumPoints()); numTargetDerivEvalPoints += faceTargetDerivCub->getNumPoints(); + maxNumTargetDerivEvalPoints = std::max(maxNumTargetDerivEvalPoints, faceTargetDerivCub->getNumPoints()); targetDerivCubPoints[faceDim][iface] = view_type("targetDerivCubPoints",faceTargetDerivCub->getNumPoints(),faceDim); targetDerivCubWeights[faceDim][iface] = view_type("targetDerivCubWeights",faceTargetDerivCub->getNumPoints()); faceTargetDerivCub->getCubature(targetDerivCubPoints[faceDim][iface], targetDerivCubWeights[faceDim][iface]); } - subCellTopologyKey[dim][0] = cellBasis->getBaseCellTopology().getBaseKey(); - if(cellBasis->getDofCount(dim,0)>0) { + subCellTopologyKey(dim,0) = cellBasis->getBaseCellTopology().getBaseKey(); + if(hasCellDofs) { ordinal_type cub_degree = 2*basisCubDegree; - auto elemBasisCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - basisPointsRange[dim][0] = range_type(numBasisEvalPoints, numBasisEvalPoints+elemBasisCub->getNumPoints()); + auto elemBasisCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + basisPointsRange(dim,0) = range_type(numBasisEvalPoints, numBasisEvalPoints+elemBasisCub->getNumPoints()); numBasisEvalPoints += elemBasisCub->getNumPoints(); + maxNumBasisEvalPoints = std::max(maxNumBasisEvalPoints, elemBasisCub->getNumPoints()); basisCubPoints[dim][0] = view_type("basisCubPoints",elemBasisCub->getNumPoints(),dim); basisCubWeights[dim][0] = view_type("basisCubWeights",elemBasisCub->getNumPoints()); elemBasisCub->getCubature(basisCubPoints[dim][0], basisCubWeights[dim][0]); - basisDerivPointsRange[dim][0] = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+elemBasisCub->getNumPoints()); + basisDerivPointsRange(dim,0) = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+elemBasisCub->getNumPoints()); numBasisDerivEvalPoints += elemBasisCub->getNumPoints(); + maxNumBasisDerivEvalPoints = std::max(maxNumBasisDerivEvalPoints, elemBasisCub->getNumPoints()); basisDerivCubPoints[dim][0] = view_type("basisDerivCubPoints",elemBasisCub->getNumPoints(),dim); basisDerivCubWeights[dim][0] = view_type("basisDerivCubWeights",elemBasisCub->getNumPoints()); elemBasisCub->getCubature(basisDerivCubPoints[dim][0], basisDerivCubWeights[dim][0]); cub_degree = basisCubDegree + targetCubDegree; - auto elemTargetCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - targetPointsRange[dim][0] = range_type(numTargetEvalPoints, numTargetEvalPoints+elemTargetCub->getNumPoints()); + auto elemTargetCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + targetPointsRange(dim,0) = range_type(numTargetEvalPoints, numTargetEvalPoints+elemTargetCub->getNumPoints()); numTargetEvalPoints += elemTargetCub->getNumPoints(); + maxNumTargetEvalPoints = std::max(maxNumTargetEvalPoints, elemTargetCub->getNumPoints()); targetCubPoints[dim][0] = view_type("targetCubPoints",elemTargetCub->getNumPoints(),dim); targetCubWeights[dim][0] = view_type("targetCubWeights",elemTargetCub->getNumPoints()); elemTargetCub->getCubature(targetCubPoints[dim][0], targetCubWeights[dim][0]); cub_degree = basisCubDegree + targetCurlCubDegre; - auto elemTargetCurlCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - targetDerivPointsRange[dim][0] = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+elemTargetCurlCub->getNumPoints()); + auto elemTargetCurlCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + targetDerivPointsRange(dim,0) = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+elemTargetCurlCub->getNumPoints()); numTargetDerivEvalPoints += elemTargetCurlCub->getNumPoints(); + maxNumTargetDerivEvalPoints = std::max(maxNumTargetDerivEvalPoints, elemTargetCurlCub->getNumPoints()); targetDerivCubPoints[dim][0] = view_type("targetDerivCubPoints",elemTargetCurlCub->getNumPoints(),dim); targetDerivCubWeights[dim][0] = view_type("targetDerivCubWeights",elemTargetCurlCub->getNumPoints()); elemTargetCurlCub->getCubature(targetDerivCubPoints[dim][0], targetDerivCubWeights[dim][0]); } - } template @@ -393,15 +454,28 @@ void ProjectionStruct::createHDivProjectionStruct(const BasisPtrT ordinal_type basisCubDegree = cellBasis->getDegree(); ordinal_type sideBasisCubDegree = basisCubDegree - 1; ordinal_type numSides = cellTopo.getSideCount()*ordinal_type(cellBasis->getDofCount(sideDim, 0) > 0); + ordinal_type hasCellDofs = (cellBasis->getDofCount(dim, 0) > 0); + + INTREPID2_TEST_FOR_ABORT( numSides > maxSubCellsCount, + ">>> ERROR (Intrepid2::ProjectionStruct:createHDivProjectionStruct, Projections do not support a cell topology with so many sides"); + + maxNumBasisEvalPoints = 0; maxNumTargetEvalPoints = 0; + maxNumBasisDerivEvalPoints = 0; maxNumTargetDerivEvalPoints = 0; + + basisPointsRange = range_tag("basisPointsRange", numberSubCellDims,maxSubCellsCount); + targetPointsRange = range_tag("targetPointsRange", numberSubCellDims,maxSubCellsCount); + basisDerivPointsRange = range_tag("basisDerivPointsRange", numberSubCellDims,maxSubCellsCount); + targetDerivPointsRange = range_tag("targetDerivPointsRange", numberSubCellDims,maxSubCellsCount); + subCellTopologyKey = key_tag("subCellTopologyKey",numberSubCellDims,maxSubCellsCount); Basis *hcurlBasis = NULL; - if(name.find("HEX")!=std::string::npos) + if(cellTopo.getKey() == shards::getCellTopologyData >()->key) hcurlBasis = new Basis_HCURL_HEX_In_FEM(cellBasis->getDegree()); - else if(name.find("TET")!=std::string::npos) + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) hcurlBasis = new Basis_HCURL_TET_In_FEM(cellBasis->getDegree()); - else if(name.find("QUAD")!=std::string::npos) + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) hcurlBasis = new Basis_HGRAD_QUAD_Cn_FEM(cellBasis->getDegree()); - else if(name.find("TRI")!=std::string::npos) + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) hcurlBasis = new Basis_HGRAD_TRI_Cn_FEM(cellBasis->getDegree()); else { std::stringstream ss; @@ -417,58 +491,64 @@ void ProjectionStruct::createHDivProjectionStruct(const BasisPtrT for(ordinal_type is=0; isgetBaseCellTopology().getKey(sideDim, is); - auto sideBasisCub = cub_factory.create(subCellTopologyKey[sideDim][is], cub_degree); - basisPointsRange[sideDim][is] = range_type(numBasisEvalPoints, numBasisEvalPoints+sideBasisCub->getNumPoints()); + subCellTopologyKey(sideDim,is) = cellBasis->getBaseCellTopology().getKey(sideDim, is); + auto sideBasisCub = cub_factory.create(subCellTopologyKey(sideDim,is), cub_degree); + basisPointsRange(sideDim,is) = range_type(numBasisEvalPoints, numBasisEvalPoints+sideBasisCub->getNumPoints()); numBasisEvalPoints += sideBasisCub->getNumPoints(); basisCubPoints[sideDim][is] = view_type("basisCubPoints",sideBasisCub->getNumPoints(),sideDim); basisCubWeights[sideDim][is] = view_type("basisCubWeights",sideBasisCub->getNumPoints()); sideBasisCub->getCubature(basisCubPoints[sideDim][is], basisCubWeights[sideDim][is]); + maxNumBasisEvalPoints = std::max(maxNumBasisEvalPoints, sideBasisCub->getNumPoints()); cub_degree = sideBasisCubDegree + targetCubDegree; - auto sideTargetCub = cub_factory.create(subCellTopologyKey[sideDim][is], cub_degree); - targetPointsRange[sideDim][is] = range_type(numTargetEvalPoints, numTargetEvalPoints+sideTargetCub->getNumPoints()); + auto sideTargetCub = cub_factory.create(subCellTopologyKey(sideDim,is), cub_degree); + targetPointsRange(sideDim,is) = range_type(numTargetEvalPoints, numTargetEvalPoints+sideTargetCub->getNumPoints()); numTargetEvalPoints += sideTargetCub->getNumPoints(); targetCubPoints[sideDim][is] = view_type("targetCubPoints",sideTargetCub->getNumPoints(),sideDim); targetCubWeights[sideDim][is] = view_type("targetCubWeights",sideTargetCub->getNumPoints()); sideTargetCub->getCubature(targetCubPoints[sideDim][is], targetCubWeights[sideDim][is]); + maxNumTargetEvalPoints = std::max(maxNumTargetEvalPoints, sideTargetCub->getNumPoints()); } - subCellTopologyKey[dim][0] = cellBasis->getBaseCellTopology().getBaseKey(); - if(cellBasis->getDofCount(dim,0)>0) { + subCellTopologyKey(dim,0) = cellBasis->getBaseCellTopology().getBaseKey(); + if(hasCellDofs) { ordinal_type cub_degree = 2*basisCubDegree - 1; - auto elemBasisDivCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - basisDerivPointsRange[dim][0] = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+elemBasisDivCub->getNumPoints()); + auto elemBasisDivCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + basisDerivPointsRange(dim,0) = range_type(numBasisDerivEvalPoints, numBasisDerivEvalPoints+elemBasisDivCub->getNumPoints()); numBasisDerivEvalPoints += elemBasisDivCub->getNumPoints(); basisDerivCubPoints[dim][0] = view_type("basisDerivCubPoints",elemBasisDivCub->getNumPoints(),dim); basisDerivCubWeights[dim][0] = view_type("basisDerivCubWeights",elemBasisDivCub->getNumPoints()); elemBasisDivCub->getCubature(basisDerivCubPoints[dim][0], basisDerivCubWeights[dim][0]); + maxNumBasisDerivEvalPoints = std::max(maxNumBasisDerivEvalPoints, elemBasisDivCub->getNumPoints()); cub_degree = basisCubDegree - 1 + targetDivCubDegre; - auto elemTargetDivCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - targetDerivPointsRange[dim][0] = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+elemTargetDivCub->getNumPoints()); + auto elemTargetDivCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + targetDerivPointsRange(dim,0) = range_type(numTargetDerivEvalPoints, numTargetDerivEvalPoints+elemTargetDivCub->getNumPoints()); numTargetDerivEvalPoints += elemTargetDivCub->getNumPoints(); targetDerivCubPoints[dim][0] = view_type("targetDerivCubPoints",elemTargetDivCub->getNumPoints(),dim); targetDerivCubWeights[dim][0] = view_type("targetDerivCubWeights",elemTargetDivCub->getNumPoints()); elemTargetDivCub->getCubature(targetDerivCubPoints[dim][0], targetDerivCubWeights[dim][0]); + maxNumTargetDerivEvalPoints = std::max(maxNumTargetDerivEvalPoints, elemTargetDivCub->getNumPoints()); if(haveHCurlConstraint) { cub_degree = 2*basisCubDegree; - auto elemBasisCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - basisPointsRange[dim][0] = range_type(numBasisEvalPoints, numBasisEvalPoints + elemBasisCub->getNumPoints()); + auto elemBasisCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + basisPointsRange(dim,0) = range_type(numBasisEvalPoints, numBasisEvalPoints + elemBasisCub->getNumPoints()); numBasisEvalPoints += elemBasisCub->getNumPoints(); basisCubPoints[dim][0] = view_type("basisCubPoints",elemBasisCub->getNumPoints(),dim); basisCubWeights[dim][0] = view_type("basisCubWeights",elemBasisCub->getNumPoints()); elemBasisCub->getCubature(basisCubPoints[dim][0], basisCubWeights[dim][0]); + maxNumBasisEvalPoints = std::max(maxNumBasisEvalPoints, elemBasisCub->getNumPoints()); cub_degree = basisCubDegree + targetCubDegree; - auto elemTargetCub = cub_factory.create(subCellTopologyKey[dim][0], cub_degree); - targetPointsRange[dim][0] = range_type(numTargetEvalPoints, numTargetEvalPoints + elemTargetCub->getNumPoints()); + auto elemTargetCub = cub_factory.create(subCellTopologyKey(dim,0), cub_degree); + targetPointsRange(dim,0) = range_type(numTargetEvalPoints, numTargetEvalPoints + elemTargetCub->getNumPoints()); numTargetEvalPoints += elemTargetCub->getNumPoints(); targetCubPoints[dim][0] = view_type("targetCubPoints",elemTargetCub->getNumPoints(),dim); targetCubWeights[dim][0] = view_type("targetCubWeights",elemTargetCub->getNumPoints()); elemTargetCub->getCubature(targetCubPoints[dim][0], targetCubWeights[dim][0]); + maxNumTargetEvalPoints = std::max(maxNumTargetEvalPoints, elemTargetCub->getNumPoints()); } } } @@ -484,23 +564,33 @@ void ProjectionStruct::createHVolProjectionStruct(const BasisPtrT numTargetEvalPoints = 0; numTargetDerivEvalPoints = 0; + basisPointsRange = range_tag("basisPointsRange", 4,maxSubCellsCount); + targetPointsRange = range_tag("targetPointsRange", 4,maxSubCellsCount); + basisDerivPointsRange = range_tag("basisDerivPointsRange", 4,maxSubCellsCount); + targetDerivPointsRange = range_tag("targetDerivPointsRange", 4,maxSubCellsCount); + subCellTopologyKey = key_tag("subCellTopologyKey",4,maxSubCellsCount); + ordinal_type basisCubDegree = cellBasis->getDegree(); DefaultCubatureFactory cub_factory; - subCellTopologyKey[dim][0] = cellBasis->getBaseCellTopology().getBaseKey(); + subCellTopologyKey(dim,0) = cellBasis->getBaseCellTopology().getBaseKey(); + + maxNumBasisEvalPoints = 0; maxNumTargetEvalPoints =0; if(cellBasis->getDofCount(dim,0)>0) { ordinal_type cub_degree = 2*basisCubDegree; auto elemBasisCub = cub_factory.create(cellTopo.getBaseKey(), cub_degree); - basisPointsRange[dim][0] = range_type(0, elemBasisCub->getNumPoints()); + basisPointsRange(dim,0) = range_type(0, elemBasisCub->getNumPoints()); numBasisEvalPoints += elemBasisCub->getNumPoints(); + maxNumBasisEvalPoints = elemBasisCub->getNumPoints(); basisCubPoints[dim][0] = view_type("basisCubPoints",elemBasisCub->getNumPoints(),dim); basisCubWeights[dim][0] = view_type("basisCubWeights",elemBasisCub->getNumPoints()); elemBasisCub->getCubature(basisCubPoints[dim][0], basisCubWeights[dim][0]); cub_degree = basisCubDegree + targetCubDegree; auto elemTargetCub = cub_factory.create(cellTopo.getBaseKey(), cub_degree); - targetPointsRange[dim][0] = range_type(0, elemTargetCub->getNumPoints()); + targetPointsRange(dim,0) = range_type(0, elemTargetCub->getNumPoints()); numTargetEvalPoints += elemTargetCub->getNumPoints(); + maxNumTargetEvalPoints = elemTargetCub->getNumPoints(); targetCubPoints[dim][0] = view_type("targetCubPoints",elemTargetCub->getNumPoints(),dim); targetCubWeights[dim][0] = view_type("targetCubWeights",elemTargetCub->getNumPoints()); elemTargetCub->getCubature(targetCubPoints[dim][0], targetCubWeights[dim][0]); diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp index f4aa3831db40..389d28b91e55 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp @@ -102,7 +102,10 @@ #include "Intrepid2_ProjectionStruct.hpp" - +#include "KokkosBatched_QR_Serial_Internal.hpp" +#include "KokkosBatched_ApplyQ_Serial_Internal.hpp" +#include "KokkosBatched_Trsv_Serial_Internal.hpp" +#include "KokkosBatched_Util.hpp" namespace Intrepid2 { @@ -162,14 +165,23 @@ namespace Experimental { performed on the \f$H^1\f$ seminorm and the \f$L^2\f$ norm respectively, instead of on the \f$L^2\f$ and \f$H^{-1}\f$ and norms. This requires more regularity of the target function. - \todo The implementation is mostly serial and needs to be improved for performance portability + \todo There is room for significant improvement. + One could separate the computation of the basis function values and derivatives from the functions getXXXBasisCoeffs, + so that they can be stored and reused for projecting other target functions. + Similarly one could store all the QR factorizations and reuse them for other target functions. + For internal evaluation points (that are not affected by orientation) one could compute the QR factorization on the reference cell + and then use on all the cells. + + Note: Other algorithmic improvements could be enabled by accessing the implementation of the orientation tools, + however, we preferred the projections to work with any orientation, and assuming only that internal basis functions are not affected by + the orientation. */ template class ProjectionTools { public: - enum EvalPointsType {BASIS, TARGET}; + using EvalPointsType = typename ProjectionStruct::EvalPointsType; /** \brief Computes evaluation points for L2 projection @@ -195,7 +207,7 @@ class ProjectionTools { const Kokkos::DynRankView cellOrientations, const BasisType* cellBasis, ProjectionStruct * projStruct, - const EvalPointsType evalPointType = TARGET + const EvalPointsType evalPointType = EvalPointsType::TARGET ); /** \brief Computes the basis coefficients of the L2 projection of the target function @@ -257,7 +269,7 @@ class ProjectionTools { const Kokkos::DynRankView cellOrientations, const BasisType* cellBasis, ProjectionStruct * projStruct, - const EvalPointsType evalPointType = TARGET + const EvalPointsType evalPointType = EvalPointsType::TARGET ); /** \brief Computes the basis coefficients of the HGrad projection of the target function @@ -325,7 +337,7 @@ class ProjectionTools { const Kokkos::DynRankView cellOrientations, const BasisType* cellBasis, ProjectionStruct * projStruct, - const EvalPointsType evalPointType = TARGET + const EvalPointsType evalPointType = EvalPointsType::TARGET ); /** \brief Computes the basis coefficients of the HCurl projection of the target function @@ -395,7 +407,7 @@ class ProjectionTools { const Kokkos::DynRankView cellOrientations, const BasisType* cellBasis, ProjectionStruct * projStruct, - const EvalPointsType evalPointType = TARGET + const EvalPointsType evalPointType = EvalPointsType::TARGET ); /** \brief Computes the basis coefficients of the HDiv projection of the target function @@ -458,7 +470,7 @@ class ProjectionTools { const Kokkos::DynRankView cellOrientations, const BasisType* cellBasis, ProjectionStruct * projStruct, - const EvalPointsType evalPointType = TARGET + const EvalPointsType evalPointType = EvalPointsType::TARGET ); /** \brief Computes the basis coefficients of the HVol projection of the target function @@ -493,6 +505,90 @@ class ProjectionTools { + /** \brief Functor to solve a square system A x = b on each cell using QR method implemented in KokkosKernels + A is expected to be saddle a point (KKT) matrix of the form [C B; B^T 0], + where C has size nxn and B nxm, with n>0, m>=0. + B^T is copied from B, so one does not have to define the B^T portion of A. + b will contain the solution x. + The first n-entries of x are copied into the provided basis coefficients using the provided indexing. + */ + template + struct SolveSystem { + ViewType1 basisCoeffs_; // rank-2 view (C,F) containing the basis coefficients on each cell + ViewType2 elemMat_; // rank-3 view (C,P,P) containing the element matrix on each cell + ViewType2 elemRhs_; // rank-2 view (C,P) containing the element rhs on each cell + ViewType2 tau_; // rank-2 view (C,P) used to store the QR factorization + ViewType3 w_; // rank-2 view (C,P) used has a workspace (needs to be of Layout Right) + + const ViewType4 elemDof_; // rank-1 view having dimension n, containing the basis numbering + ordinal_type n_, m_; // basis cardinality and dimension of the constraint of the KKT system + + /** \brief Functor constructor + + \code + C - num. cells + P - num. evaluation points + \endcode + + + \param basisCoeffs [out] - rank-2 view (C,F) containing the basis coefficients + \param elemMat [in/out] - rank-3 view (C,P,P) containing the element matrix of size + numCells x (n+m)x(n+m) on each cell + it will be overwritten. + \param elemRhs [in/out] - rank-2 view (C,P) containing the element rhs on each cell + of size numCells x (n+m) + it will contain the solution of the system on output + \param tau [out] - rank-2 view (C,P) used to store the QR factorization + size: numCells x (n+m) + \param w [out] - rank-2 view (C,P) used has a workspace + Layout Right, size: numCells x (n+m) + \param elemDof [in] - rank-1 view having dimension n, containing the basis numbering + \param n [in] - ordinal_type, basis cardinality + \param m [in] - ordinal_type, dimension of the constraint of the KKT system + */ + SolveSystem (ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 tau, + ViewType3 w,const ViewType4 elemDof, ordinal_type n, ordinal_type m=0) : + basisCoeffs_(basisCoeffs), elemMat_(elemMat), elemRhs_(elemRhs), + tau_(tau), w_(w), elemDof_(elemDof), n_(n), m_(m){}; + + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + auto A = Kokkos::subview(elemMat_, ic, Kokkos::ALL(), Kokkos::ALL()); + auto b = Kokkos::subview(elemRhs_, ic, Kokkos::ALL()); + auto tau = Kokkos::subview(tau_, ic, Kokkos::ALL()); + auto w = Kokkos::subview(w_, ic, Kokkos::ALL()); + + for(ordinal_type i=n_; i b + KokkosBatched::SerialApplyQ_RightNoTransForwardInternal::invoke( + 1, A.extent(0), A.extent(1), + A.data(), A.stride_0(), A.stride_1(), + tau.data(), tau.stride_0(), + b.data(), 1, b.stride_0(), + w.data()); + + // R^{-1} b -> b + KokkosBatched::SerialTrsvInternalUpper::invoke(false, + A.extent(0), + 1.0, + A.data(), A.stride_0(), A.stride_1(), + b.data(), b.stride_0()); + + //scattering b into the basis coefficients + for(ordinal_type i=0; i +struct ComputeBasisCoeffsOnEdges_HCurl { + const ViewType1 basisTanAtBasisEPoints_; + const ViewType1 basisAtBasisEPoints_; + const ViewType2 basisEWeights_; + const ViewType1 wTanBasisAtBasisEPoints_; + const ViewType2 targetEWeights_; + const ViewType1 basisAtTargetEPoints_; + const ViewType1 wTanBasisAtTargetEPoints_; + const ViewType3 tagToOrdinal_; + const ViewType4 targetAtTargetEPoints_; + const ViewType1 targetTanAtTargetEPoints_; + const ViewType1 refEdgesTangent_; + ordinal_type edgeCardinality_; + ordinal_type offsetBasis_; + ordinal_type offsetTarget_; + ordinal_type edgeDim_; + ordinal_type dim_; + ordinal_type iedge_; + + ComputeBasisCoeffsOnEdges_HCurl(const ViewType1 basisTanAtBasisEPoints, + const ViewType1 basisAtBasisEPoints, const ViewType2 basisEWeights, const ViewType1 wTanBasisAtBasisEPoints, const ViewType2 targetEWeights, + const ViewType1 basisAtTargetEPoints, const ViewType1 wTanBasisAtTargetEPoints, const ViewType3 tagToOrdinal, + const ViewType4 targetAtTargetEPoints, const ViewType1 targetTanAtTargetEPoints, + const ViewType1 refEdgesTangent, ordinal_type edgeCardinality, ordinal_type offsetBasis, + ordinal_type offsetTarget, ordinal_type edgeDim, + ordinal_type dim, ordinal_type iedge) : + basisTanAtBasisEPoints_(basisTanAtBasisEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), basisEWeights_(basisEWeights), wTanBasisAtBasisEPoints_(wTanBasisAtBasisEPoints), targetEWeights_(targetEWeights), + basisAtTargetEPoints_(basisAtTargetEPoints), wTanBasisAtTargetEPoints_(wTanBasisAtTargetEPoints), + tagToOrdinal_(tagToOrdinal), targetAtTargetEPoints_(targetAtTargetEPoints), + targetTanAtTargetEPoints_(targetTanAtTargetEPoints), + refEdgesTangent_(refEdgesTangent), edgeCardinality_(edgeCardinality), offsetBasis_(offsetBasis), + offsetTarget_(offsetTarget), edgeDim_(edgeDim), dim_(dim), iedge_(iedge) + {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + ordinal_type numBasisEPoints = basisEWeights_.extent(0); + ordinal_type numTargetEPoints = targetEWeights_.extent(0); + for(ordinal_type j=0; j +struct ComputeBasisCoeffsOnFaces_HCurl { + const ViewType1 basisCoeffs_; + const ViewType2 orts_; + const ViewType3 negPartialProjTan_; + const ViewType3 negPartialProjCurlNormal_; + const ViewType3 hgradBasisGradAtBasisEPoints_; + const ViewType3 wHgradBasisGradAtBasisEPoints_; + const ViewType3 basisCurlAtBasisCurlEPoints_; + const ViewType3 basisCurlNormalAtBasisCurlEPoints_; + const ViewType3 basisAtBasisEPoints_; + const ViewType3 normalTargetCurlAtTargetEPoints_; + const ViewType3 basisTanAtBasisEPoints_; + const ViewType3 hgradBasisGradAtTargetEPoints_; + const ViewType3 wHgradBasisGradAtTargetEPoints_; + const ViewType3 wNormalBasisCurlAtBasisCurlEPoints_; + const ViewType3 basisCurlAtTargetCurlEPoints_; + const ViewType3 wNormalBasisCurlBasisAtTargetCurlEPoints_; + const ViewType4 targetAtTargetEPoints_; + const ViewType3 targetTanAtTargetEPoints_; + const ViewType4 targetCurlAtTargetCurlEPoints_; + const ViewType5 basisEWeights_; + const ViewType5 targetEWeights_; + const ViewType5 basisCurlEWeights_; + const ViewType5 targetCurlEWeights_; + const ViewType6 tagToOrdinal_; + const ViewType6 hGradTagToOrdinal_; + const ViewType7 refTopologyKey_; + const ViewType3 refFacesNormal_; + const ViewType3 refFacesTangents_; + const ViewType8 computedDofs_; + ordinal_type offsetBasis_; + ordinal_type offsetBasisCurl_; + ordinal_type offsetTarget_; + ordinal_type offsetTargetCurl_; + ordinal_type iface_; + ordinal_type hgradCardinality_; + ordinal_type numFaces_; + ordinal_type numFaceDofs_; + ordinal_type numEdgeDofs_; + ordinal_type faceDim_; + ordinal_type dim_; + + + + + ComputeBasisCoeffsOnFaces_HCurl(const ViewType1 basisCoeffs, + const ViewType2 orts, const ViewType3 negPartialProjTan, const ViewType3 negPartialProjCurlNormal, + const ViewType3 hgradBasisGradAtBasisEPoints, const ViewType3 wHgradBasisGradAtBasisEPoints, + const ViewType3 basisCurlAtBasisCurlEPoints, const ViewType3 basisCurlNormalAtBasisCurlEPoints, + const ViewType3 basisAtBasisEPoints, + const ViewType3 normalTargetCurlAtTargetEPoints, + const ViewType3 basisTanAtBasisEPoints, + const ViewType3 hgradBasisGradAtTargetEPoints, const ViewType3 wHgradBasisGradAtTargetEPoints, + const ViewType3 wNormalBasisCurlAtBasisCurlEPoints, const ViewType3 basisCurlAtTargetCurlEPoints, + const ViewType3 wNormalBasisCurlBasisAtTargetCurlEPoints, const ViewType4 targetAtTargetEPoints, + const ViewType3 targetTanAtTargetEPoints, const ViewType4 targetCurlAtTargetCurlEPoints, + const ViewType5 basisEWeights, const ViewType5 targetEWeights, + const ViewType5 basisCurlEWeights, const ViewType5 targetCurlEWeights, const ViewType6 tagToOrdinal, + const ViewType6 hGradTagToOrdinal, const ViewType7 refTopologyKey, + const ViewType3 refFacesNormal, const ViewType3 refFacesTangents, + const ViewType8 computedDofs, ordinal_type offsetBasis, + ordinal_type offsetBasisCurl, ordinal_type offsetTarget, + ordinal_type offsetTargetCurl, ordinal_type iface, + ordinal_type hgradCardinality, ordinal_type numFaces, + ordinal_type numFaceDofs, ordinal_type numEdgeDofs, + ordinal_type faceDim, ordinal_type dim): + basisCoeffs_(basisCoeffs), + orts_(orts), negPartialProjTan_(negPartialProjTan), negPartialProjCurlNormal_(negPartialProjCurlNormal), + hgradBasisGradAtBasisEPoints_(hgradBasisGradAtBasisEPoints), wHgradBasisGradAtBasisEPoints_(wHgradBasisGradAtBasisEPoints), + basisCurlAtBasisCurlEPoints_(basisCurlAtBasisCurlEPoints), basisCurlNormalAtBasisCurlEPoints_(basisCurlNormalAtBasisCurlEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), + normalTargetCurlAtTargetEPoints_(normalTargetCurlAtTargetEPoints), basisTanAtBasisEPoints_(basisTanAtBasisEPoints), + hgradBasisGradAtTargetEPoints_(hgradBasisGradAtTargetEPoints), wHgradBasisGradAtTargetEPoints_(wHgradBasisGradAtTargetEPoints), + wNormalBasisCurlAtBasisCurlEPoints_(wNormalBasisCurlAtBasisCurlEPoints), basisCurlAtTargetCurlEPoints_(basisCurlAtTargetCurlEPoints), + wNormalBasisCurlBasisAtTargetCurlEPoints_(wNormalBasisCurlBasisAtTargetCurlEPoints), targetAtTargetEPoints_(targetAtTargetEPoints), + targetTanAtTargetEPoints_(targetTanAtTargetEPoints), targetCurlAtTargetCurlEPoints_(targetCurlAtTargetCurlEPoints), + basisEWeights_(basisEWeights), targetEWeights_(targetEWeights), + basisCurlEWeights_(basisCurlEWeights), targetCurlEWeights_(targetCurlEWeights), tagToOrdinal_(tagToOrdinal), + hGradTagToOrdinal_(hGradTagToOrdinal), refTopologyKey_(refTopologyKey), + refFacesNormal_(refFacesNormal), refFacesTangents_(refFacesTangents), + computedDofs_(computedDofs), offsetBasis_(offsetBasis), + offsetBasisCurl_(offsetBasisCurl), offsetTarget_(offsetTarget), + offsetTargetCurl_(offsetTargetCurl), iface_(iface), + hgradCardinality_(hgradCardinality), numFaces_(numFaces), + numFaceDofs_(numFaceDofs), numEdgeDofs_(numEdgeDofs), + faceDim_(faceDim), dim_(dim){} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + ordinal_type fOrt[6]; + orts_(ic).getFaceOrientation(fOrt, numFaces_); + + ordinal_type ort = fOrt[iface_]; + typename ViewType3::value_type ortJacData[4]; //faceDim x faceDim + auto ortJac = ViewType3(ortJacData, faceDim_, faceDim_); + Impl::OrientationTools::getJacobianOfOrientationMap(ortJac, refTopologyKey_(faceDim_,iface_), ort); + + ordinal_type numBasisEPoints = basisEWeights_.extent(0); + ordinal_type numTargetEPoints = targetEWeights_.extent(0); + for(ordinal_type j=0; j +struct ComputeBasisCoeffsOnCell_HCurl { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProj_; + const ViewType2 negPartialProjCurl_; + const ViewType2 cellBasisAtBasisEPoints_; + const ViewType2 cellBasisCurlAtBasisCurlEPoints_; + const ViewType2 basisAtBasisEPoints_; + const ViewType2 hgradBasisGradAtBasisEPoints_; + const ViewType2 basisCurlAtBasisCurlEPoints_; + const ViewType2 hgradBasisGradAtTargetEPoints_; + const ViewType2 basisCurlAtTargetCurlEPoints_; + const ViewType3 basisEWeights_; + const ViewType3 basisCurlEWeights_; + const ViewType2 wHgradBasisGradAtBasisEPoints_; + const ViewType2 wBasisCurlAtBasisCurlEPoints_; + const ViewType3 targetEWeights_; + const ViewType3 targetCurlEWeights_; + const ViewType2 wHgradBasisGradAtTargetEPoints_; + const ViewType2 wBasisCurlAtTargetCurlEPoints_; + const ViewType4 computedDofs_; + const ViewType5 tagToOrdinal_; + const ViewType5 hGradTagToOrdinal_; + ordinal_type numCellDofs_; + ordinal_type hgradCardinality_; + ordinal_type offsetBasis_; + ordinal_type offsetBasisCurl_; + ordinal_type offsetTargetCurl_; + ordinal_type numEdgeFaceDofs_; + ordinal_type dim_; + ordinal_type derDim_; + + ComputeBasisCoeffsOnCell_HCurl(const ViewType1 basisCoeffs, ViewType2 negPartialProj, ViewType2 negPartialProjCurl, + const ViewType2 cellBasisAtBasisEPoints, const ViewType2 cellBasisCurlAtBasisCurlEPoints, + const ViewType2 basisAtBasisEPoints, const ViewType2 hgradBasisGradAtBasisEPoints, const ViewType2 basisCurlAtBasisCurlEPoints, + const ViewType2 hgradBasisGradAtTargetEPoints, const ViewType2 basisCurlAtTargetCurlEPoints, + const ViewType3 basisEWeights, const ViewType3 basisCurlEWeights, + const ViewType2 wHgradBasisGradAtBasisEPoints, const ViewType2 wBasisCurlAtBasisCurlEPoints, + const ViewType3 targetEWeights, const ViewType3 targetCurlEWeights, + const ViewType2 wHgradBasisGradAtTargetEPoints, + const ViewType2 wBasisCurlAtTargetCurlEPoints, const ViewType4 computedDofs, + const ViewType5 tagToOrdinal, const ViewType5 hGradTagToOrdinal, + ordinal_type numCellDofs, ordinal_type hgradCardinality, + ordinal_type offsetBasis, ordinal_type offsetBasisCurl, ordinal_type offsetTargetCurl, + ordinal_type numEdgeFaceDofs, ordinal_type dim, ordinal_type derDim) : + basisCoeffs_(basisCoeffs), negPartialProj_(negPartialProj), negPartialProjCurl_(negPartialProjCurl), + cellBasisAtBasisEPoints_(cellBasisAtBasisEPoints), cellBasisCurlAtBasisCurlEPoints_(cellBasisCurlAtBasisCurlEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), hgradBasisGradAtBasisEPoints_(hgradBasisGradAtBasisEPoints), + basisCurlAtBasisCurlEPoints_(basisCurlAtBasisCurlEPoints), + hgradBasisGradAtTargetEPoints_(hgradBasisGradAtTargetEPoints), + basisCurlAtTargetCurlEPoints_(basisCurlAtTargetCurlEPoints), + basisEWeights_(basisEWeights), basisCurlEWeights_(basisCurlEWeights), + wHgradBasisGradAtBasisEPoints_(wHgradBasisGradAtBasisEPoints), + wBasisCurlAtBasisCurlEPoints_(wBasisCurlAtBasisCurlEPoints), + targetEWeights_(targetEWeights), targetCurlEWeights_(targetCurlEWeights), + wHgradBasisGradAtTargetEPoints_(wHgradBasisGradAtTargetEPoints), + wBasisCurlAtTargetCurlEPoints_(wBasisCurlAtTargetCurlEPoints), + computedDofs_(computedDofs), tagToOrdinal_(tagToOrdinal), hGradTagToOrdinal_(hGradTagToOrdinal), + numCellDofs_(numCellDofs), hgradCardinality_(hgradCardinality), + offsetBasis_(offsetBasis), offsetBasisCurl_(offsetBasisCurl), offsetTargetCurl_(offsetTargetCurl), + numEdgeFaceDofs_(numEdgeFaceDofs), dim_(dim), derDim_(derDim) {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + ordinal_type numBasisPoints = basisEWeights_.extent(0); + ordinal_type numBasisCurlPoints = basisCurlEWeights_.extent(0); + ordinal_type numTargetPoints = targetEWeights_.extent(0); + ordinal_type numTargetCurlPoints = targetCurlEWeights_.extent(0); + for(ordinal_type j=0; j template void -ProjectionTools::getHCurlEvaluationPoints(typename BasisType::ScalarViewType evaluationPoints, - typename BasisType::ScalarViewType extDerivEvaluationPoints, +ProjectionTools::getHCurlEvaluationPoints(typename BasisType::ScalarViewType targetEPoints, + typename BasisType::ScalarViewType targetCurlEPoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct, const EvalPointsType evalPointType) { typedef typename BasisType::scalarType scalarType; - typedef Kokkos::DynRankView ScalarViewType; + typedef Kokkos::DynRankView ScalarViewType; typedef Kokkos::pair range_type; const auto cellTopo = cellBasis->getBaseCellTopology(); ordinal_type dim = cellTopo.getDimension(); - ordinal_type numCells = evaluationPoints.extent(0); + ordinal_type numCells = targetEPoints.extent(0); const ordinal_type edgeDim = 1; const ordinal_type faceDim = 2; ordinal_type numEdges = (cellBasis->getDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount() : 0; - Kokkos::View eOrt("eOrt", numEdges), fOrt("fOrt", numFaces); + CellTools::setSubcellParametrization(); + typename CellTools::subcellParamViewType subcellParamEdge, subcellParamFace; + if(numEdges>0) + CellTools::getSubcellParametrization(subcellParamEdge, edgeDim, cellTopo); + if(numFaces>0) + CellTools::getSubcellParametrization(subcellParamFace, faceDim, cellTopo); + + auto refTopologyKey = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTopologyKey()); + + auto evalPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getPointsRange(evalPointType)); + auto curlEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getDerivPointsRange(evalPointType)); + + ScalarViewType workView("workView", numCells, std::max(projStruct->getMaxNumEvalPoints(evalPointType),projStruct->getMaxNumDerivPoints(evalPointType)), dim-1); for(ordinal_type ie=0; iegetTargetPointsRange(edgeDim, ie); - cubPoints = projStruct->getTargetEvalPoints(edgeDim, ie); - } - else { - edgePointsRange = projStruct->getBasisPointsRange(edgeDim, ie); - cubPoints = projStruct->getBasisEvalPoints(edgeDim, ie); - } - ScalarViewType orientedTargetCubPoints("orientedTargetCubPoints", cubPoints.extent(0),edgeDim); + auto edgePointsRange = evalPointsRange(edgeDim, ie); + auto edgeRefPointsRange = range_type(0, range_size(edgePointsRange)); + auto edgeEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getEvalPoints(edgeDim,ie,evalPointType)); - const auto topoKey = projStruct->getTopologyKey(edgeDim,ie); + Kokkos::parallel_for + ("Evaluate Points Edges ", + Kokkos::RangePolicy (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { - for(ordinal_type ic=0; ic::mapToReferenceSubcell(Kokkos::subview(evaluationPoints,ic,edgePointsRange,Kokkos::ALL()), orientedTargetCubPoints, edgeDim, ie, cellBasis->getBaseCellTopology()); - } + ordinal_type eOrt[12]; + orts(ic).getEdgeOrientation(eOrt, numEdges); + ordinal_type ort = eOrt[ie]; + + auto orientedEdgeEPoints = Kokkos::subview(workView, ic, edgeRefPointsRange, range_type(0,edgeDim)); + Impl::OrientationTools::mapToModifiedReference(orientedEdgeEPoints,edgeEPoints,refTopologyKey(edgeDim, ie),ort); + CellTools::mapToReferenceSubcell(Kokkos::subview(targetEPoints,ic,edgePointsRange,Kokkos::ALL()), orientedEdgeEPoints, subcellParamEdge, edgeDim, ie, dim); + }); } for(ordinal_type iface=0; ifacegetDerivEvalPoints(faceDim,iface,evalPointType)); - ScalarViewType cubPoints;//("cubPoints", numTargetCubPoints, faceDim); - range_type facePointsRange; - if(evalPointType == TARGET) { - cubPoints = projStruct->getTargetEvalPoints(faceDim, iface); - facePointsRange = projStruct->getTargetPointsRange(faceDim, iface); - } else { - cubPoints = projStruct->getBasisEvalPoints(faceDim, iface); - facePointsRange = projStruct->getBasisPointsRange(faceDim, iface); - } - - ScalarViewType curlCubPoints;//("curlCubPoints", numTargetCurlCubPoints, faceDim); - range_type faceCurlPointsRange; - if(evalPointType == TARGET) { - curlCubPoints = projStruct->getTargetDerivEvalPoints(faceDim, iface); - faceCurlPointsRange = projStruct->getTargetDerivPointsRange(faceDim, iface); - } else { - curlCubPoints = projStruct->getBasisDerivEvalPoints(faceDim, iface); - faceCurlPointsRange = projStruct->getBasisDerivPointsRange(faceDim, iface); - } + auto facePointsRange = evalPointsRange(faceDim, iface); + auto faceRefPointsRange = range_type(0, range_size(facePointsRange)); + auto faceEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getEvalPoints(faceDim,iface,evalPointType)); - ScalarViewType faceCubPoints("faceCubPoints", cubPoints.extent(0), faceDim); - ScalarViewType faceCurlCubPoints("faceCurlCubPoints", curlCubPoints.extent(0), faceDim); + Kokkos::parallel_for + ("Evaluate Points Faces ", + Kokkos::RangePolicy (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { - const auto topoKey = projStruct->getTopologyKey(faceDim,iface); - for(ordinal_type ic=0; ic::mapToReferenceSubcell(Kokkos::subview(evaluationPoints, ic, facePointsRange, Kokkos::ALL()), faceCubPoints, faceDim, iface, cellBasis->getBaseCellTopology()); + auto orientedFaceEPoints = Kokkos::subview(workView, ic, faceRefPointsRange, Kokkos::ALL()); + Impl::OrientationTools::mapToModifiedReference(orientedFaceEPoints,faceEPoints,refTopologyKey(faceDim, iface),ort); + CellTools::mapToReferenceSubcell(Kokkos::subview(targetEPoints, ic, facePointsRange, Kokkos::ALL()), orientedFaceEPoints , subcellParamFace, faceDim, iface, dim); - Impl::OrientationTools::mapToModifiedReference(faceCurlCubPoints,curlCubPoints,topoKey,ort); - CellTools::mapToReferenceSubcell(Kokkos::subview(extDerivEvaluationPoints, ic, faceCurlPointsRange, Kokkos::ALL()), faceCurlCubPoints, faceDim, iface, cellBasis->getBaseCellTopology()); - } + auto orientedFaceCurlEPoints = Kokkos::subview(workView, ic, faceRefCurlPointsRange, Kokkos::ALL()); + Impl::OrientationTools::mapToModifiedReference(orientedFaceCurlEPoints,faceCurlEPoints,refTopologyKey(faceDim, iface),ort); + CellTools::mapToReferenceSubcell(Kokkos::subview(targetCurlEPoints, ic, faceCurlPointsRange, Kokkos::ALL()), orientedFaceCurlEPoints, subcellParamFace, faceDim, iface, dim); + }); } if(cellBasis->getDofCount(dim,0)>0) { - range_type cellPointsRange; - ScalarViewType cubPoints; - if(evalPointType == TARGET) { - cubPoints = projStruct->getTargetEvalPoints(dim, 0); - cellPointsRange = projStruct->getTargetPointsRange(dim, 0); - } else { - cubPoints = projStruct->getBasisEvalPoints(dim, 0); - cellPointsRange = projStruct->getBasisPointsRange(dim, 0); - } - RealSpaceTools::clone(Kokkos::subview(evaluationPoints, Kokkos::ALL(), cellPointsRange, Kokkos::ALL()), cubPoints); + auto cellEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getEvalPoints(dim,0,evalPointType)); + RealSpaceTools::clone(Kokkos::subview(targetEPoints, Kokkos::ALL(), evalPointsRange(dim, 0), Kokkos::ALL()), cellEPoints); - range_type cellCurlPointsRange; - ScalarViewType curlCubPoints; - if(evalPointType == TARGET) { - curlCubPoints = projStruct->getTargetDerivEvalPoints(dim, 0); - cellCurlPointsRange = projStruct->getTargetDerivPointsRange(dim, 0); - } else { - curlCubPoints = projStruct->getBasisDerivEvalPoints(dim, 0); - cellCurlPointsRange = projStruct->getBasisDerivPointsRange(dim, 0); - } - RealSpaceTools::clone(Kokkos::subview(extDerivEvaluationPoints, Kokkos::ALL(), cellCurlPointsRange, Kokkos::ALL()), curlCubPoints); + auto cellCurlEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getDerivEvalPoints(dim,0,evalPointType)); + RealSpaceTools::clone(Kokkos::subview(targetCurlEPoints, Kokkos::ALL(), curlEPointsRange(dim, 0), Kokkos::ALL()), cellCurlEPoints); } } @@ -179,40 +498,37 @@ typename BasisType, typename ortValueType,class ...ortProperties> void ProjectionTools::getHCurlBasisCoeffs(Kokkos::DynRankView basisCoeffs, - const Kokkos::DynRankView targetAtEvalPoints, - const Kokkos::DynRankView targetCurlAtCurlEvalPoints, - const typename BasisType::ScalarViewType evaluationPoints, - const typename BasisType::ScalarViewType extDerivEvaluationPoints, + const Kokkos::DynRankView targetAtTargetEPoints, + const Kokkos::DynRankView targetCurlAtTargetCurlEPoints, + const typename BasisType::ScalarViewType targetEPoints, + const typename BasisType::ScalarViewType targetCurlEPoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct){ - typedef typename Kokkos::Impl::is_space::host_mirror_space::execution_space host_space_type; typedef typename BasisType::scalarType scalarType; typedef Kokkos::DynRankView ScalarViewType; typedef Kokkos::pair range_type; const auto cellTopo = cellBasis->getBaseCellTopology(); ordinal_type dim = cellTopo.getDimension(); - ordinal_type numTotalEvaluationPoints(targetAtEvalPoints.extent(1)), - numTotalCurlEvaluationPoints(targetCurlAtCurlEvalPoints.extent(1)); + ordinal_type numTotalTargetEPoints(targetAtTargetEPoints.extent(1)), + numTotalTargetCurlEPoints(targetCurlAtTargetCurlEPoints.extent(1)); ordinal_type basisCardinality = cellBasis->getCardinality(); - ordinal_type numCells = targetAtEvalPoints.extent(0); + ordinal_type numCells = targetAtTargetEPoints.extent(0); const ordinal_type edgeDim = 1; const ordinal_type faceDim = 2; const ordinal_type derDim = dim == 3 ? dim : 1; + const Kokkos::RangePolicy policy(0, numCells); + const std::string& name = cellBasis->getName(); ordinal_type numEdges = (cellBasis->getDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount() : 0; - Kokkos::View eOrt("eOrt", numEdges); - Kokkos::View fOrt("fOrt", numFaces); - ScalarViewType refEdgeTan("refEdgeTan", dim); - ScalarViewType refFaceTangents("refFaceTangents", dim, 2); - ScalarViewType refFaceNormal("refFaceNormal", dim); - auto refFaceTanU = Kokkos::subview(refFaceTangents, Kokkos::ALL, 0); - auto refFaceTanV = Kokkos::subview(refFaceTangents, Kokkos::ALL, 1); + ScalarViewType refEdgesTangent("refEdgesTangent", numEdges, dim); + ScalarViewType refFacesTangents("refFaceTangents", numFaces, dim, 2); + ScalarViewType refFacesNormal("refFaceNormal", numFaces, dim); ordinal_type numEdgeDofs(0); for(ordinal_type ie=0; ie::getHCurlBasisCoeffs(Kokkos::DynRankViewgetDofCount(faceDim,iface); - Kokkos::View computedDofs("computedDofs",numEdgeDofs+numFaceDofs); + auto tagToOrdinal = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(), cellBasis->getAllDofOrdinal()); - ordinal_type computedDofsCount = 0; + Kokkos::View computedDofs("computedDofs",numEdgeDofs+numFaceDofs); + + auto targetEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetPointsRange()); + auto targetCurlEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetDerivPointsRange()); + + auto basisEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisPointsRange()); + auto basisCurlEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisDerivPointsRange()); - ordinal_type numTotalCubPoints = projStruct->getNumBasisEvalPoints(), numTotalCurlCubPoints = projStruct->getNumBasisDerivEvalPoints(); - ScalarViewType cubPoints("cubPoints",numCells,numTotalCubPoints, dim); - ScalarViewType curlCubPoints("curlCubPoints",numCells,numTotalCurlCubPoints, dim); - getHCurlEvaluationPoints(cubPoints, curlCubPoints, orts, cellBasis, projStruct, BASIS); + auto refTopologyKey = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTopologyKey()); - ScalarViewType basisAtCubPoints("basisAtCubPoints",numCells,basisCardinality, numTotalCubPoints, dim); - ScalarViewType basisAtTargetCubPoints("basisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints, dim); + ordinal_type numTotalBasisEPoints = projStruct->getNumBasisEvalPoints(), numTotalBasisCurlEPoints = projStruct->getNumBasisDerivEvalPoints(); + + ScalarViewType basisEPoints("basisEPoints",numCells,numTotalBasisEPoints, dim); + ScalarViewType basisCurlEPoints("basisCurlEPoints",numCells,numTotalBasisCurlEPoints, dim); + getHCurlEvaluationPoints(basisEPoints, basisCurlEPoints, orts, cellBasis, projStruct, EvalPointsType::BASIS); + + ScalarViewType basisAtBasisEPoints("basisAtBasisEPoints",numCells,basisCardinality, numTotalBasisEPoints, dim); + ScalarViewType basisAtTargetEPoints("basisAtTargetEPoints",numCells,basisCardinality, numTotalTargetEPoints, dim); { - ScalarViewType nonOrientedBasisAtCubPoints("nonOrientedBasisAtCubPoints",numCells,basisCardinality, numTotalCubPoints, dim); - ScalarViewType nonOrientedBasisAtTargetCubPoints("nonOrientedBasisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints, dim); + ScalarViewType nonOrientedBasisAtBasisEPoints("nonOrientedBasisAtEPoints",numCells,basisCardinality, numTotalBasisEPoints, dim); + ScalarViewType nonOrientedBasisAtTargetEPoints("nonOrientedBasisAtTargetEPoints",numCells,basisCardinality, numTotalTargetEPoints, dim); for(ordinal_type ic=0; icgetValues(Kokkos::subview(nonOrientedBasisAtTargetCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(evaluationPoints, ic, Kokkos::ALL(), Kokkos::ALL())); - cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(cubPoints, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtTargetEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(targetEPoints, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtBasisEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(basisEPoints, ic, Kokkos::ALL(), Kokkos::ALL())); } - OrientationTools::modifyBasisByOrientation(basisAtCubPoints, nonOrientedBasisAtCubPoints, orts, cellBasis); - OrientationTools::modifyBasisByOrientation(basisAtTargetCubPoints, nonOrientedBasisAtTargetCubPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisAtBasisEPoints, nonOrientedBasisAtBasisEPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisAtTargetEPoints, nonOrientedBasisAtTargetEPoints, orts, cellBasis); } - ScalarViewType basisCurlAtCurlCubPoints; - ScalarViewType basisCurlAtTargetCurlCubPoints; - if(numTotalCurlEvaluationPoints>0) { - ScalarViewType nonOrientedBasisCurlAtTargetCurlCubPoints, nonOrientedBasisCurlAtCurlCubPoints; + ScalarViewType basisCurlAtBasisCurlEPoints; + ScalarViewType basisCurlAtTargetCurlEPoints; + if(numTotalBasisCurlEPoints>0) { + ScalarViewType nonOrientedBasisCurlAtTargetCurlEPoints, nonOrientedBasisCurlAtBasisCurlEPoints; if (dim == 3) { - basisCurlAtCurlCubPoints = ScalarViewType ("basisCurlAtCurlCubPoints",numCells,basisCardinality, numTotalCurlCubPoints, dim); - nonOrientedBasisCurlAtCurlCubPoints = ScalarViewType ("nonOrientedBasisCurlAtCurlCubPoints",numCells,basisCardinality, numTotalCurlCubPoints, dim); - basisCurlAtTargetCurlCubPoints = ScalarViewType("basisCurlAtTargetCurlCubPoints",numCells,basisCardinality, numTotalCurlEvaluationPoints, dim); - nonOrientedBasisCurlAtTargetCurlCubPoints = ScalarViewType("nonOrientedBasisCurlAtTargetCurlCubPoints",numCells,basisCardinality, numTotalCurlEvaluationPoints, dim); + basisCurlAtBasisCurlEPoints = ScalarViewType ("basisCurlAtBasisCurlEPoints",numCells,basisCardinality, numTotalBasisCurlEPoints, dim); + nonOrientedBasisCurlAtBasisCurlEPoints = ScalarViewType ("nonOrientedBasisCurlAtBasisCurlEPoints",numCells,basisCardinality, numTotalBasisCurlEPoints, dim); + basisCurlAtTargetCurlEPoints = ScalarViewType("basisCurlAtTargetCurlEPoints",numCells,basisCardinality, numTotalTargetCurlEPoints, dim); + nonOrientedBasisCurlAtTargetCurlEPoints = ScalarViewType("nonOrientedBasisCurlAtTargetCurlEPoints",numCells,basisCardinality, numTotalTargetCurlEPoints, dim); } else { - basisCurlAtCurlCubPoints = ScalarViewType ("basisCurlAtCurlCubPoints",numCells,basisCardinality, numTotalCurlCubPoints); - nonOrientedBasisCurlAtCurlCubPoints = ScalarViewType ("nonOrientedBasisCurlAtCurlCubPoints",numCells,basisCardinality, numTotalCurlCubPoints); - basisCurlAtTargetCurlCubPoints = ScalarViewType("basisCurlAtTargetCurlCubPoints",numCells,basisCardinality, numTotalCurlEvaluationPoints); - nonOrientedBasisCurlAtTargetCurlCubPoints = ScalarViewType("nonOrientedBasisCurlAtTargetCurlCubPoints",numCells,basisCardinality, numTotalCurlEvaluationPoints); + basisCurlAtBasisCurlEPoints = ScalarViewType ("basisCurlAtBasisCurlEPoints",numCells,basisCardinality, numTotalBasisCurlEPoints); + nonOrientedBasisCurlAtBasisCurlEPoints = ScalarViewType ("nonOrientedBasisCurlAtBasisCurlEPoints",numCells,basisCardinality, numTotalBasisCurlEPoints); + basisCurlAtTargetCurlEPoints = ScalarViewType("basisCurlAtTargetCurlEPoints",numCells,basisCardinality, numTotalTargetCurlEPoints); + nonOrientedBasisCurlAtTargetCurlEPoints = ScalarViewType("nonOrientedBasisCurlAtTargetCurlEPoints",numCells,basisCardinality, numTotalTargetCurlEPoints); } for(ordinal_type ic=0; icgetValues(Kokkos::subview(nonOrientedBasisCurlAtCurlCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(curlCubPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_CURL); - cellBasis->getValues(Kokkos::subview(nonOrientedBasisCurlAtTargetCurlCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(extDerivEvaluationPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_CURL); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisCurlAtBasisCurlEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(basisCurlEPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_CURL); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisCurlAtTargetCurlEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(targetCurlEPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_CURL); } - OrientationTools::modifyBasisByOrientation(basisCurlAtCurlCubPoints, nonOrientedBasisCurlAtCurlCubPoints, orts, cellBasis); - OrientationTools::modifyBasisByOrientation(basisCurlAtTargetCurlCubPoints, nonOrientedBasisCurlAtTargetCurlCubPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisCurlAtBasisCurlEPoints, nonOrientedBasisCurlAtBasisCurlEPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisCurlAtTargetCurlEPoints, nonOrientedBasisCurlAtTargetCurlEPoints, orts, cellBasis); } + ordinal_type computedDofsCount = 0; for(ordinal_type ie=0; iegetDofCount(edgeDim,ie); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(edgeDim, ie); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(edgeDim, ie); - - CellTools::getReferenceEdgeTangent(refEdgeTan, ie, cellBasis->getBaseCellTopology()); + ordinal_type numBasisEPoints = range_size(basisEPointsRange(edgeDim, ie)); + ordinal_type numTargetEPoints = range_size(targetEPointsRange(edgeDim, ie)); + + { + auto refEdgeTan = Kokkos::subview(refEdgesTangent, ie, Kokkos::ALL()); + auto refEdgeTanHost = Kokkos::create_mirror_view(refEdgeTan); + CellTools::getReferenceEdgeTangent(refEdgeTanHost, ie, cellTopo); + Kokkos::deep_copy(refEdgeTan,refEdgeTanHost); + } - ScalarViewType tanBasisAtElemCubPoints("tanBasisAtElemCubPoints",numCells,edgeCardinality, numCubPoints); - ScalarViewType tanBasisAtTargetCubPoints("tanBasisAtTargetCubPoints",numCells,edgeCardinality, numTargetCubPoints); - ScalarViewType weightedTanBasisAtElemCubPoints("weightedTanBasisAtElemCubPoints",numCells,edgeCardinality, numCubPoints); - ScalarViewType weightedTanBasisAtTargetCubPoints("weightedTanBasisAtTargetCubPoints",numCells,edgeCardinality, numTargetCubPoints); - ScalarViewType tanTargetAtTargetCubPoints("normalTargetAtTargetCubPoints",numCells, numTargetCubPoints); + ScalarViewType basisTanAtBasisEPoints("basisTanAtBasisEPoints",numCells,edgeCardinality, numBasisEPoints); + ScalarViewType basisTanAtTargetEPoints("basisTanAtTargetEPoints",numCells,edgeCardinality, numTargetEPoints); + ScalarViewType weightedTanBasisAtBasisEPoints("weightedTanBasisAtBasisEPoints",numCells,edgeCardinality, numBasisEPoints); + ScalarViewType weightedTanBasisAtTargetEPoints("weightedTanBasisAtTargetEPoints",numCells,edgeCardinality, numTargetEPoints); + ScalarViewType targetTanAtTargetEPoints("normalTargetAtTargetEPoints",numCells, numTargetEPoints); - ScalarViewType targetEvalWeights = projStruct->getTargetEvalWeights(edgeDim, ie); - ScalarViewType basisEvalWeights = projStruct->getBasisEvalWeights(edgeDim, ie); + auto targetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(edgeDim,ie)); + auto basisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(edgeDim,ie)); //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection - ordinal_type offsetBasis = projStruct->getBasisPointsRange(edgeDim, ie).first; - ordinal_type offsetTarget = projStruct->getTargetPointsRange(edgeDim, ie).first; - for(ordinal_type ic=0; icgetDofOrdinal(edgeDim, ie, j); - for(ordinal_type iq=0; iq functorTypeEdge; + Kokkos::parallel_for(policy, functorTypeEdge(basisTanAtBasisEPoints,basisAtBasisEPoints,basisEWeights, + weightedTanBasisAtBasisEPoints, targetEWeights, + basisAtTargetEPoints, weightedTanBasisAtTargetEPoints, tagToOrdinal, + targetAtTargetEPoints, targetTanAtTargetEPoints, + refEdgesTangent, edgeCardinality, offsetBasis, + offsetTarget, edgeDim, + dim, ie)); + ScalarViewType edgeMassMat_("edgeMassMat_", numCells, edgeCardinality+1, edgeCardinality+1), edgeRhsMat_("rhsMat_", numCells, edgeCardinality+1); - ScalarViewType cubWeights_("cubWeights_", numCells, 1, basisEvalWeights.extent(0)), targetEvalWeights_("targetEvalWeights", numCells, 1, targetEvalWeights.extent(0)); - RealSpaceTools::clone(cubWeights_, basisEvalWeights); - RealSpaceTools::clone(targetEvalWeights_, targetEvalWeights); + ScalarViewType eWeights_("eWeights_", numCells, 1, basisEWeights.extent(0)), targetEWeights_("targetEWeights", numCells, 1, targetEWeights.extent(0)); + RealSpaceTools::clone(eWeights_, basisEWeights); + RealSpaceTools::clone(targetEWeights_, targetEWeights); range_type range_H(0, edgeCardinality); range_type range_B(edgeCardinality, edgeCardinality+1); - FunctionSpaceTools::integrate(Kokkos::subview(edgeMassMat_,Kokkos::ALL(),range_H,range_H), tanBasisAtElemCubPoints, weightedTanBasisAtElemCubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(edgeMassMat_,Kokkos::ALL(),range_H,range_B), tanBasisAtElemCubPoints, cubWeights_); - FunctionSpaceTools::integrate(Kokkos::subview(edgeRhsMat_,Kokkos::ALL(),range_H), tanTargetAtTargetCubPoints, weightedTanBasisAtTargetCubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(edgeRhsMat_,Kokkos::ALL(),range_B), tanTargetAtTargetCubPoints, targetEvalWeights_); - Kokkos::View edgeMassMat("edgeMassMat", edgeCardinality+1,edgeCardinality+1); - Kokkos::View edgeRhsMat("edgeRhsMat",edgeCardinality+1, 1); - - Teuchos::LAPACK lapack; - ordinal_type info = 0; - Kokkos::View pivVec("pivVec", edgeCardinality+1, 1); - for(ordinal_type ic=0; ic>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } + FunctionSpaceTools::integrate(Kokkos::subview(edgeMassMat_,Kokkos::ALL(),range_H,range_H), basisTanAtBasisEPoints, weightedTanBasisAtBasisEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(edgeMassMat_,Kokkos::ALL(),range_H,range_B), basisTanAtBasisEPoints, eWeights_); + FunctionSpaceTools::integrate(Kokkos::subview(edgeRhsMat_,Kokkos::ALL(),range_H), targetTanAtTargetEPoints, weightedTanBasisAtTargetEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(edgeRhsMat_,Kokkos::ALL(),range_B), targetTanAtTargetEPoints, targetEWeights_); + + typedef Kokkos::DynRankView WorkArrayViewType; + ScalarViewType t_("t",numCells, edgeCardinality+1); + WorkArrayViewType w_("w",numCells, edgeCardinality+1); + + auto edgeDofs = Kokkos::subview(tagToOrdinal, edgeDim, ie, Kokkos::ALL()); + typedef SolveSystem functorTypeCellSys; + Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDofs, edgeCardinality, 1)); - for(ordinal_type i=0; igetDofOrdinal(edgeDim, ie, i); - basisCoeffs(ic,edge_dof) = edgeRhsMat(i,0); - } - } for(ordinal_type i=0; igetDofOrdinal(edgeDim, ie, i); } - ScalarViewType ortJacobian("ortJacobian", faceDim, faceDim); - - Basis *hgradBasis = NULL; + Basis *hgradBasis = NULL; for(ordinal_type iface=0; iface(cellBasis->getDegree(),POINTTYPE_WARPBLEND); - else if(name.find("TET")!=std::string::npos) - hgradBasis = new Basis_HGRAD_TRI_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); + if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hgradBasis = new Basis_HGRAD_QUAD_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hgradBasis = new Basis_HGRAD_TRI_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); else { std::stringstream ss; ss << ">>> ERROR (Intrepid2::ProjectionTools::getHCurlBasisCoeffs): " @@ -379,171 +671,109 @@ ProjectionTools::getHCurlBasisCoeffs(Kokkos::DynRankViewgetDofCount(faceDim,iface); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(faceDim, iface); - - ordinal_type numTargetCurlCubPoints = projStruct->getNumTargetDerivEvalPoints(faceDim, iface); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(faceDim, iface); - - ScalarViewType hgradBasisGradAtCubPoints("hgradBasisGradAtCubPoints",hgradBasis->getCardinality(), numCubPoints, faceDim); - ScalarViewType hgradBasisGradAtTargetCubPoints("hgradBasisGradAtTargetCubPoints",hgradBasis->getCardinality(), numTargetCubPoints, faceDim); - - ordinal_type internalHgradCardinality = hgradBasis->getDofCount(faceDim,0); - ScalarViewType internalHgradBasisGradAtCubPoints("internalHgradBasisGradAtCubPoints",1, internalHgradCardinality, numCubPoints, faceDim); - ScalarViewType internalHgradBasisGradAtTargetCubPoints("internalHgradBasisGradAtTargetCubPoints",1, internalHgradCardinality, numTargetCubPoints, faceDim); - - - CellTools::getReferenceFaceNormal(refFaceNormal, iface, cellTopo); - CellTools::getReferenceFaceTangents(refFaceTanU, refFaceTanV,iface, cellTopo); - - hgradBasis->getValues(hgradBasisGradAtCubPoints,projStruct->getBasisEvalPoints(faceDim, iface), OPERATOR_GRAD); - hgradBasis->getValues(hgradBasisGradAtTargetCubPoints,projStruct->getTargetEvalPoints(faceDim, iface),OPERATOR_GRAD); - - for(ordinal_type j=0; j getDofOrdinal(faceDim, 0, j); - for(ordinal_type d=0; d ::getReferenceFaceTangents(refFaceTanUHost, refFaceTanVHost, iface, cellTopo); + Kokkos::deep_copy(refFaceTanU, refFaceTanUHost); + Kokkos::deep_copy(refFaceTanV, refFaceTanVHost); + auto refFaceNormal = Kokkos::subview(refFacesNormal,iface,Kokkos::ALL()); + auto refFaceNormalHost = Kokkos::create_mirror_view(refFaceNormal); + CellTools::getReferenceFaceNormal(refFaceNormalHost, iface, cellTopo); + Kokkos::deep_copy(refFaceNormal, refFaceNormalHost); } - ScalarViewType tanBasisAtElemCubPoints("tanBasisAtElemCubPoints",numCells,numFaceDofs, numCubPoints,dim-1); - ScalarViewType tanBasisAtTargetCubPoints("tanBasisAtTargetCubPoints",numCells,numFaceDofs, numTargetCubPoints,dim-1); - ScalarViewType normalBasisCurlAtElemCubPoints("normaBasisCurlAtElemCubPoints",numCells,numFaceDofs, numCubPoints); - ScalarViewType wNormalBasisCurlAtElemCubPoints("weightedNormalBasisCurlAtElemCubPoints",numCells,numFaceDofs, numCubPoints); + ordinal_type numTargetEPoints = range_size(targetEPointsRange(faceDim, iface)); + ordinal_type numTargetCurlEPoints = range_size(targetCurlEPointsRange(faceDim, iface)); + ordinal_type numBasisEPoints = range_size(basisEPointsRange(faceDim, iface)); + ordinal_type numBasisCurlEPoints = range_size(basisCurlEPointsRange(faceDim, iface)); - ScalarViewType tanTargetAtTargetCubPoints("tanTargetAtTargetCubPoints",numCells, numTargetCubPoints, dim-1); - ScalarViewType normalTargetCurlAtTargetCubPoints("normalTargetCurlAtTargetCubPoints",numCells, numTargetCurlCubPoints); - ScalarViewType normalBasisCurlAtTargetCurlCubPoints("normalBasisCurlAtTargetCurlCubPoints",numCells,numFaceDofs, numTargetCurlCubPoints); - ScalarViewType wNormalBasisCurlBasisAtTargetCurlCubPoints("weightedNormalBasisCurlAtTargetCurlCubPoints",numCells,numFaceDofs, numTargetCurlCubPoints); - - ScalarViewType wHgradBasisGradAtCubPoints("wHgradBasisGradAtCubPoints",1, internalHgradCardinality, numCubPoints, faceDim); - ScalarViewType wHgradBasisGradAtCubPoints_("wHgradBasisGradAtCubPoints_",numCells, internalHgradCardinality, numCubPoints, faceDim); - ScalarViewType wHgradBasisGradAtTargetCubPoints("wHgradBasisGradAtTargetCubPoints",1, internalHgradCardinality, numTargetCubPoints, faceDim); - ScalarViewType wHgradBasisGradAtTargetCubPoints_("wHgradBasisGradAtTargetCubPoints_",numCells, internalHgradCardinality, numTargetCubPoints, faceDim); - - ScalarViewType mNormalComputedProjectionCurl("mNormalComputedProjection", numCells,numCubPoints); - ScalarViewType mTanComputedProjection("mTanComputedProjection", numCells,numCubPoints,dim-1); - - ScalarViewType targetDerivEvalWeights = projStruct->getTargetDerivEvalWeights(faceDim, iface); - ordinal_type offsetBasis = projStruct->getBasisPointsRange(faceDim, iface).first; - ordinal_type offsetBasisCurl = projStruct->getBasisDerivPointsRange(faceDim, iface).first; - ordinal_type offsetTarget = projStruct->getTargetPointsRange(faceDim, iface).first; - ordinal_type offsetTargetCurl = projStruct->getTargetDerivPointsRange(faceDim, iface).first; - - - //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection - const auto topoKey = projStruct->getTopologyKey(faceDim,iface); - for(ordinal_type ic=0; icgetDofOrdinal(faceDim, iface, j); - for(ordinal_type iq=0; iq getDofCount(faceDim,iface); - ScalarViewType faceMassMat_("faceMassMat_", numCells, numFaceDofs+internalHgradCardinality, numFaceDofs+internalHgradCardinality), - faceRhsMat_("rhsMat_", numCells, numFaceDofs+internalHgradCardinality); + ScalarViewType hgradBasisGradAtBasisEPoints("hgradBasisGradAtBasisEPoints",hgradBasis->getCardinality(), numBasisEPoints, faceDim); + ScalarViewType hgradBasisGradAtTargetEPoints("hgradBasisGradAtTargetEPoints",hgradBasis->getCardinality(), numTargetEPoints, faceDim); + + ordinal_type hgradCardinality = hgradBasis->getDofCount(faceDim,0); + + auto refBasisEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalPoints(faceDim, iface)); + auto refTargetEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalPoints(faceDim, iface)); + hgradBasis->getValues(hgradBasisGradAtBasisEPoints, refBasisEPoints, OPERATOR_GRAD); + hgradBasis->getValues(hgradBasisGradAtTargetEPoints, refTargetEPoints, OPERATOR_GRAD); + + ScalarViewType basisTanAtBasisEPoints("basisTanAtBasisEPoints",numCells,numFaceDofs, numBasisEPoints,dim-1); + ScalarViewType basisTanAtTargetEPoints("basisTanAtTargetEPoints",numCells,numFaceDofs, numTargetEPoints,dim-1); + ScalarViewType basisCurlNormalAtBasisCurlEPoints("normaBasisCurlAtBasisEPoints",numCells,numFaceDofs, numBasisCurlEPoints); + ScalarViewType wNormalBasisCurlAtBasisCurlEPoints("weightedNormalBasisCurlAtBasisEPoints",numCells,numFaceDofs, numBasisCurlEPoints); + + ScalarViewType targetTanAtTargetEPoints("targetTanAtTargetEPoints",numCells, numTargetEPoints, dim-1); + ScalarViewType normalTargetCurlAtTargetEPoints("normalTargetCurlAtTargetEPoints",numCells, numTargetCurlEPoints); + ScalarViewType wNormalBasisCurlBasisAtTargetCurlEPoints("weightedNormalBasisCurlAtTargetCurlEPoints",numCells,numFaceDofs, numTargetCurlEPoints); + + ScalarViewType wHgradBasisGradAtBasisEPoints("wHgradBasisGradAtBasisEPoints",numCells, hgradCardinality, numBasisEPoints, faceDim); + ScalarViewType wHgradBasisGradAtTargetEPoints("wHgradBasisGradAtTargetEPoints",numCells, hgradCardinality, numTargetEPoints, faceDim); + + ScalarViewType negPartialProjCurlNormal("mNormalComputedProjection", numCells,numBasisEPoints); + ScalarViewType negPartialProjTan("negPartialProjTan", numCells,numBasisEPoints,dim-1); + + + ordinal_type offsetBasis = basisEPointsRange(faceDim, iface).first; + ordinal_type offsetBasisCurl = basisCurlEPointsRange(faceDim, iface).first; + ordinal_type offsetTarget = targetEPointsRange(faceDim, iface).first; + ordinal_type offsetTargetCurl = targetCurlEPointsRange(faceDim, iface).first; + + auto hGradTagToOrdinal = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(), hgradBasis->getAllDofOrdinal()); + + auto basisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(faceDim,iface)); + auto targetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(faceDim,iface)); + auto targetCurlEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetDerivEvalWeights(faceDim,iface)); + auto basisCurlEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisDerivEvalWeights(faceDim,iface)); + typedef ComputeBasisCoeffsOnFaces_HCurl functorTypeFaces; + Kokkos::parallel_for(policy, functorTypeFaces(basisCoeffs, + orts, negPartialProjTan, negPartialProjCurlNormal, + hgradBasisGradAtBasisEPoints, wHgradBasisGradAtBasisEPoints, + basisCurlAtBasisCurlEPoints, basisCurlNormalAtBasisCurlEPoints, + basisAtBasisEPoints, + normalTargetCurlAtTargetEPoints, basisTanAtBasisEPoints, + hgradBasisGradAtTargetEPoints, wHgradBasisGradAtTargetEPoints, + wNormalBasisCurlAtBasisCurlEPoints, basisCurlAtTargetCurlEPoints, + wNormalBasisCurlBasisAtTargetCurlEPoints, targetAtTargetEPoints, + targetTanAtTargetEPoints, targetCurlAtTargetCurlEPoints, + basisEWeights, targetEWeights, + basisCurlEWeights, targetCurlEWeights, tagToOrdinal, + hGradTagToOrdinal, refTopologyKey, + refFacesNormal, refFacesTangents, + computedDofs, offsetBasis, + offsetBasisCurl, offsetTarget, + offsetTargetCurl, iface, + hgradCardinality, numFaces, + numFaceDofs, numEdgeDofs, + faceDim, dim)); + + + ScalarViewType faceMassMat_("faceMassMat_", numCells, numFaceDofs+hgradCardinality, numFaceDofs+hgradCardinality), + faceRhsMat_("rhsMat_", numCells, numFaceDofs+hgradCardinality); + range_type range_H(0, numFaceDofs); + range_type range_B(numFaceDofs, numFaceDofs+hgradCardinality); + FunctionSpaceTools::integrate(Kokkos::subview(faceMassMat_,Kokkos::ALL(),range_H,range_H), basisCurlNormalAtBasisCurlEPoints, wNormalBasisCurlAtBasisCurlEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(faceMassMat_,Kokkos::ALL(),range_H,range_B), basisTanAtBasisEPoints, wHgradBasisGradAtBasisEPoints); - ScalarViewType targetCubWeights_("targetCubWeights_", 1, projStruct->getNumTargetEvalPoints(faceDim, iface)); - RealSpaceTools::clone(targetCubWeights_, projStruct->getTargetEvalWeights(faceDim, iface)); - ScalarViewType cubWeights_("cubWeights_", numCells, 1, numCubPoints); - RealSpaceTools::clone(cubWeights_, projStruct->getBasisEvalWeights(faceDim, iface)); - ArrayTools::scalarMultiplyDataField( wNormalBasisCurlAtElemCubPoints, Kokkos::subview(cubWeights_, Kokkos::ALL(),0, Kokkos::ALL()),normalBasisCurlAtElemCubPoints, false); - ArrayTools::scalarMultiplyDataField( wHgradBasisGradAtCubPoints, Kokkos::subview(cubWeights_, 0, Kokkos::ALL(), Kokkos::ALL()),internalHgradBasisGradAtCubPoints, false); - ArrayTools::scalarMultiplyDataField( wHgradBasisGradAtTargetCubPoints, targetCubWeights_, internalHgradBasisGradAtTargetCubPoints , false); + FunctionSpaceTools::integrate(Kokkos::subview(faceRhsMat_,Kokkos::ALL(),range_H), normalTargetCurlAtTargetEPoints, wNormalBasisCurlBasisAtTargetCurlEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(faceRhsMat_,Kokkos::ALL(),range_H), negPartialProjCurlNormal, wNormalBasisCurlAtBasisCurlEPoints,true); - RealSpaceTools::clone(wHgradBasisGradAtCubPoints_,Kokkos::subview(wHgradBasisGradAtCubPoints,0,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL())); - RealSpaceTools::clone(wHgradBasisGradAtTargetCubPoints_,Kokkos::subview(wHgradBasisGradAtTargetCubPoints,0,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL())); + FunctionSpaceTools::integrate(Kokkos::subview(faceRhsMat_,Kokkos::ALL(),range_B), targetTanAtTargetEPoints, wHgradBasisGradAtTargetEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(faceRhsMat_,Kokkos::ALL(),range_B), negPartialProjTan, wHgradBasisGradAtBasisEPoints,true); - range_type range_H(0, numFaceDofs); - range_type range_B(numFaceDofs, numFaceDofs+internalHgradCardinality); - FunctionSpaceTools::integrate(Kokkos::subview(faceMassMat_,Kokkos::ALL(),range_H,range_H), normalBasisCurlAtElemCubPoints, wNormalBasisCurlAtElemCubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(faceMassMat_,Kokkos::ALL(),range_H,range_B), tanBasisAtElemCubPoints, wHgradBasisGradAtCubPoints_); - - FunctionSpaceTools::integrate(Kokkos::subview(faceRhsMat_,Kokkos::ALL(),range_H), normalTargetCurlAtTargetCubPoints, wNormalBasisCurlBasisAtTargetCurlCubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(faceRhsMat_,Kokkos::ALL(),range_H), mNormalComputedProjectionCurl, wNormalBasisCurlAtElemCubPoints,true); - - FunctionSpaceTools::integrate(Kokkos::subview(faceRhsMat_,Kokkos::ALL(),range_B), tanTargetAtTargetCubPoints, wHgradBasisGradAtTargetCubPoints_); - FunctionSpaceTools::integrate(Kokkos::subview(faceRhsMat_,Kokkos::ALL(),range_B), mTanComputedProjection, wHgradBasisGradAtCubPoints_,true); - - Kokkos::View faceMassMat("faceMassMat", numFaceDofs+internalHgradCardinality,numFaceDofs+internalHgradCardinality); - Kokkos::View faceRhsMat("faceRhsMat",numFaceDofs+internalHgradCardinality, 1); - - Teuchos::LAPACK lapack; - ordinal_type info = 0; - Kokkos::View pivVec("pivVec", numFaceDofs+internalHgradCardinality, 1); - for(ordinal_type ic=0; ic>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } + typedef Kokkos::DynRankView WorkArrayViewType; + ScalarViewType t_("t",numCells, numFaceDofs+hgradCardinality); + WorkArrayViewType w_("w",numCells, numFaceDofs+hgradCardinality); - for(ordinal_type i=0; igetDofOrdinal(faceDim, iface, i); - basisCoeffs(ic,face_dof) = faceRhsMat(i,0); - } - } + auto faceDofs = Kokkos::subview(tagToOrdinal, faceDim, iface, Kokkos::ALL()); + typedef SolveSystem functorTypeCellSys; + Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDofs, numFaceDofs, hgradCardinality)); for(ordinal_type i=0; igetDofOrdinal(faceDim, iface, i); @@ -551,16 +781,16 @@ ProjectionTools::getHCurlBasisCoeffs(Kokkos::DynRankViewgetDofCount(dim,0); - if(numElemDofs>0) { - if(name.find("HEX")!=std::string::npos) - hgradBasis = new Basis_HGRAD_HEX_Cn_FEM(cellBasis->getDegree()); - else if(name.find("TET")!=std::string::npos) - hgradBasis = new Basis_HGRAD_TET_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); - else if(name.find("TRI")!=std::string::npos) - hgradBasis = new Basis_HGRAD_TRI_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); - else if(name.find("QUAD")!=std::string::npos) - hgradBasis = new Basis_HGRAD_QUAD_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); + ordinal_type numCellDofs = cellBasis->getDofCount(dim,0); + if(numCellDofs>0) { + if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hgradBasis = new Basis_HGRAD_HEX_Cn_FEM(cellBasis->getDegree()); + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hgradBasis = new Basis_HGRAD_TET_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hgradBasis = new Basis_HGRAD_TRI_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hgradBasis = new Basis_HGRAD_QUAD_Cn_FEM(cellBasis->getDegree(),POINTTYPE_WARPBLEND); else { std::stringstream ss; ss << ">>> ERROR (Intrepid2::ProjectionTools::getHCurlBasisCoeffs): " @@ -569,145 +799,81 @@ ProjectionTools::getHCurlBasisCoeffs(Kokkos::DynRankViewgetTargetPointsRange(dim, 0); - range_type cellCurlPointsRange = projStruct->getTargetDerivPointsRange(dim, 0); - - ordinal_type numTargetCurlCubPoints = projStruct->getNumTargetDerivEvalPoints(dim,0); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(dim,0); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(dim,0); - - ScalarViewType hgradBasisGradAtCubPoints("hgradBasisGradAtCubPoints",hgradBasis->getCardinality(), numCubPoints, dim); - ScalarViewType hgradBasisGradAtTargetCubPoints("hgradBasisGradAtTargetCubPoints",hgradBasis->getCardinality(), numTargetCubPoints, dim); - - ordinal_type internalHgradCardinality = hgradBasis->getDofCount(dim,0); - ScalarViewType internalHgradBasisGradAtCubPoints("internalHgradBasisGradAtCubPoints",1, internalHgradCardinality, numCubPoints, dim); - ScalarViewType internalHgradBasisGradAtTargetCubPoints("internalHgradBasisGradAtTargetCubPoints",numCells, internalHgradCardinality, numTargetCubPoints, dim); - ScalarViewType wHgradBasisGradAtTargetCubPoints("wHgradBasisGradAtTargetCubPoints",numCells, internalHgradCardinality, numTargetCubPoints, dim); - ScalarViewType wHgradBasisGradAtCubPoints("wHgradBasisGradAtCubPoints",numCells, internalHgradCardinality, numCubPoints, dim); - - ScalarViewType targetEvalWeights = projStruct->getTargetEvalWeights(dim, 0); - ScalarViewType basisEvalWeights = projStruct->getBasisEvalWeights(dim, 0); - - hgradBasis->getValues(hgradBasisGradAtCubPoints,projStruct->getBasisEvalPoints(dim, 0), OPERATOR_GRAD); - hgradBasis->getValues(hgradBasisGradAtTargetCubPoints,projStruct->getTargetEvalPoints(dim, 0),OPERATOR_GRAD); - - for(ordinal_type j=0; j getDofOrdinal(dim, 0, j); - for(ordinal_type d=0; d getTargetDerivEvalWeights(dim, 0); - ordinal_type offsetBasis = projStruct->getBasisPointsRange(dim, 0).first; - ordinal_type offsetBasisCurl = projStruct->getBasisDerivPointsRange(dim, 0).first; - ordinal_type offsetTargetCurl = projStruct->getTargetDerivPointsRange(dim, 0).first; - - for(ordinal_type j=0; j getDofOrdinal(dim, 0, j); - for(ordinal_type ic=0; ic::integrate(Kokkos::subview(cellMassMat_,Kokkos::ALL(),range_H,range_H), internalBasisCurlAtElemcubPoints, wBasisCurlAtElemCubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(cellMassMat_,Kokkos::ALL(),range_H,range_B), internalBasisAtElemcubPoints, wHgradBasisGradAtCubPoints); + range_type cellPointsRange = targetEPointsRange(dim, 0); + range_type cellCurlPointsRange = targetCurlEPointsRange(dim, 0); + + ordinal_type numTargetCurlEPoints = range_size(targetCurlEPointsRange(dim,0)); + ordinal_type numBasisCurlEPoints = range_size(basisCurlEPointsRange(dim,0)); + ordinal_type numBasisEPoints = range_size(basisEPointsRange(dim,0)); + ordinal_type numTargetEPoints = range_size(targetEPointsRange(dim,0)); + + ScalarViewType hgradBasisGradAtBasisEPoints("hgradBasisGradAtBasisEPoints",hgradBasis->getCardinality(), numBasisEPoints, dim); + ScalarViewType hgradBasisGradAtTargetEPoints("hgradBasisGradAtTargetEPoints",hgradBasis->getCardinality(), numTargetEPoints, dim); + + ordinal_type hgradCardinality = hgradBasis->getDofCount(dim,0); + ScalarViewType wHgradBasisGradAtTargetEPoints("wHgradBasisGradAtTargetEPoints",numCells, hgradCardinality, numTargetEPoints, dim); + ScalarViewType wHgradBasisGradAtBasisEPoints("wHgradBasisGradAtBasisEPoints",numCells, hgradCardinality, numBasisEPoints, dim); + + hgradBasis->getValues(hgradBasisGradAtBasisEPoints,Kokkos::subview(basisEPoints, 0, basisEPointsRange(dim, 0), Kokkos::ALL()), OPERATOR_GRAD); + hgradBasis->getValues(hgradBasisGradAtTargetEPoints,Kokkos::subview(targetEPoints, 0, targetEPointsRange(dim, 0), Kokkos::ALL()),OPERATOR_GRAD); + + ScalarViewType cellBasisAtBasisEPoints("basisCellAtEPoints",numCells,numCellDofs, numBasisEPoints, dim); + ScalarViewType cellBasisCurlAtCurlEPoints("cellBasisCurlAtCurlEPoints",numCells,numCellDofs, numBasisCurlEPoints, derDim); + ScalarViewType negPartialProjCurl("negPartialProjCurl", numCells, numBasisEPoints, derDim); + ScalarViewType negPartialProj("negPartialProj", numCells, numBasisEPoints, dim); + ScalarViewType wBasisCurlAtCurlEPoints("weightedBasisCurlAtBasisEPoints",numCells,numCellDofs, numBasisCurlEPoints,derDim); + ScalarViewType wBasisCurlBasisAtTargetCurlEPoints("weightedBasisCurlAtTargetCurlEPoints",numCells,numCellDofs, numTargetCurlEPoints,derDim); + + auto targetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(dim,0)); + auto basisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(dim,0)); + auto targetCurlEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetDerivEvalWeights(dim,0)); + auto basisCurlEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisDerivEvalWeights(dim,0)); + ordinal_type offsetBasis = basisEPointsRange(dim, 0).first; + ordinal_type offsetBasisCurl = basisCurlEPointsRange(dim, 0).first; + ordinal_type offsetTargetCurl = targetCurlEPointsRange(dim, 0).first; + + + auto hGradTagToOrdinal = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(), hgradBasis->getAllDofOrdinal()); + + typedef ComputeBasisCoeffsOnCell_HCurl functorTypeCell; + Kokkos::parallel_for(policy, functorTypeCell(basisCoeffs, negPartialProj, negPartialProjCurl, + cellBasisAtBasisEPoints, cellBasisCurlAtCurlEPoints, + basisAtBasisEPoints, hgradBasisGradAtBasisEPoints, basisCurlAtBasisCurlEPoints, + hgradBasisGradAtTargetEPoints, basisCurlAtTargetCurlEPoints, + basisEWeights, basisCurlEWeights, wHgradBasisGradAtBasisEPoints, + wBasisCurlAtCurlEPoints, targetEWeights, targetCurlEWeights, + wHgradBasisGradAtTargetEPoints, + wBasisCurlBasisAtTargetCurlEPoints, computedDofs, + tagToOrdinal, hGradTagToOrdinal, + numCellDofs, hgradCardinality, + offsetBasis, offsetBasisCurl, offsetTargetCurl, + numEdgeDofs+numFaceDofs, dim, derDim)); + + ScalarViewType cellMassMat_("cellMassMat_", numCells, numCellDofs+hgradCardinality, numCellDofs+hgradCardinality), + cellRhsMat_("rhsMat_", numCells, numCellDofs+hgradCardinality); + + range_type range_H(0, numCellDofs); + range_type range_B(numCellDofs, numCellDofs+hgradCardinality); + FunctionSpaceTools::integrate(Kokkos::subview(cellMassMat_,Kokkos::ALL(),range_H,range_H), cellBasisCurlAtCurlEPoints, wBasisCurlAtCurlEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(cellMassMat_,Kokkos::ALL(),range_H,range_B), cellBasisAtBasisEPoints, wHgradBasisGradAtBasisEPoints); if(dim==3) - FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_H), Kokkos::subview(targetCurlAtCurlEvalPoints,Kokkos::ALL(),cellCurlPointsRange,Kokkos::ALL()), wBasisCurlBasisAtTargetCurlCubPoints); + FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_H), Kokkos::subview(targetCurlAtTargetCurlEPoints,Kokkos::ALL(),cellCurlPointsRange,Kokkos::ALL()), wBasisCurlBasisAtTargetCurlEPoints); else - FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_H), Kokkos::subview(targetCurlAtCurlEvalPoints,Kokkos::ALL(),cellCurlPointsRange), Kokkos::subview(wBasisCurlBasisAtTargetCurlCubPoints,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL(),0)); - FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_H), mComputedProjectionCurl, wBasisCurlAtElemCubPoints, true); + FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_H), Kokkos::subview(targetCurlAtTargetCurlEPoints,Kokkos::ALL(),cellCurlPointsRange), Kokkos::subview(wBasisCurlBasisAtTargetCurlEPoints,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL(),0)); + FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_H), negPartialProjCurl, wBasisCurlAtCurlEPoints, true); - FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_B), Kokkos::subview(targetAtEvalPoints,Kokkos::ALL(),cellPointsRange,Kokkos::ALL()), wHgradBasisGradAtTargetCubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_B), mComputedProjection, wHgradBasisGradAtCubPoints, true); + FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_B), Kokkos::subview(targetAtTargetEPoints,Kokkos::ALL(),cellPointsRange,Kokkos::ALL()), wHgradBasisGradAtTargetEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(cellRhsMat_,Kokkos::ALL(),range_B), negPartialProj, wHgradBasisGradAtBasisEPoints, true); - Kokkos::View cellMassMat("cellMassMat", numElemDofs+internalHgradCardinality,numElemDofs+internalHgradCardinality); - Kokkos::View cellRhsMat("cellRhsMat",numElemDofs+internalHgradCardinality, 1); + typedef Kokkos::DynRankView WorkArrayViewType; + ScalarViewType t_("t",numCells, numCellDofs+hgradCardinality); + WorkArrayViewType w_("w",numCells, numCellDofs+hgradCardinality); - Teuchos::LAPACK lapack; - ordinal_type info = 0; - Kokkos::View pivVec("pivVec", numElemDofs+internalHgradCardinality, 1); - - for(ordinal_type ic=0; ic functorTypeCellSys; + Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numCellDofs, hgradCardinality)); - for(ordinal_type i=0; igetDofOrdinal(dim, 0, i); - basisCoeffs(ic,idof) = cellRhsMat(i,0); - } - - if (info) { - std::stringstream ss; - ss << ">>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } - } delete hgradBasis; } } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHDIV.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHDIV.hpp index 77cb323e7a9b..d155ef3866d5 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHDIV.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHDIV.hpp @@ -58,12 +58,196 @@ namespace Intrepid2 { namespace Experimental { +template +struct ComputeBasisCoeffsOnSides_HDiv { + const ViewType1 sideBasisNormalAtBasisEPoints_; + const ViewType1 basisAtBasisEPoints_; + const ViewType2 basisEWeights_; + const ViewType1 wBasisDofAtBasisEPoints_; + const ViewType2 targetEWeights_; + const ViewType1 basisAtTargetEPoints_; + const ViewType1 wBasisDofAtTargetEPoints_; + const ViewType3 tagToOrdinal_; + const ViewType1 targetAtEPoints_; + const ViewType1 targetAtTargetEPoints_; + const ViewType1 refSidesNormal_; + ordinal_type sideCardinality_; + ordinal_type offsetBasis_; + ordinal_type offsetTarget_; + ordinal_type sideDim_; + ordinal_type dim_; + ordinal_type iside_; + + ComputeBasisCoeffsOnSides_HDiv(const ViewType1 sideBasisNormalAtBasisEPoints, + const ViewType1 basisAtBasisEPoints, const ViewType2 basisEWeights, const ViewType1 wBasisDofAtBasisEPoints, const ViewType2 targetEWeights, + const ViewType1 basisAtTargetEPoints, const ViewType1 wBasisDofAtTargetEvalPoint, const ViewType3 tagToOrdinal, + const ViewType1 targetAtEPoints, const ViewType1 targetAtTargetEPoints, + const ViewType1 refSidesNormal, ordinal_type sideCardinality, ordinal_type offsetBasis, + ordinal_type offsetTarget, ordinal_type sideDim, + ordinal_type dim, ordinal_type iside) : + sideBasisNormalAtBasisEPoints_(sideBasisNormalAtBasisEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), basisEWeights_(basisEWeights), wBasisDofAtBasisEPoints_(wBasisDofAtBasisEPoints), targetEWeights_(targetEWeights), + basisAtTargetEPoints_(basisAtTargetEPoints), wBasisDofAtTargetEPoints_(wBasisDofAtTargetEvalPoint), + tagToOrdinal_(tagToOrdinal), targetAtEPoints_(targetAtEPoints), + targetAtTargetEPoints_(targetAtTargetEPoints), + refSidesNormal_(refSidesNormal), sideCardinality_(sideCardinality), offsetBasis_(offsetBasis), + offsetTarget_(offsetTarget), sideDim_(sideDim), dim_(dim), iside_(iside) + {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection + for(ordinal_type j=0; j +struct ComputeBasisCoeffsOnCells_HDiv { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProjAtBasisEPoints_; + const ViewType2 nonWeightedBasisAtBasisEPoints_; + const ViewType2 basisAtBasisEPoints_; + const ViewType3 basisEWeights_; + const ViewType2 wBasisAtBasisEPoints_; + const ViewType3 targetEWeights_; + const ViewType2 basisAtTargetEPoints_; + const ViewType2 wBasisAtTargetEPoints_; + const ViewType4 computedDofs_; + const ViewType5 cellDof_; + ordinal_type numCellDofs_; + ordinal_type offsetBasis_; + ordinal_type offsetTarget_; + ordinal_type numSideDofs_; + + ComputeBasisCoeffsOnCells_HDiv(const ViewType1 basisCoeffs, ViewType2 negPartialProjAtBasisEPoints, const ViewType2 nonWeightedBasisAtBasisEPoints, + const ViewType2 basisAtBasisEPoints, const ViewType3 basisEWeights, const ViewType2 wBasisAtBasisEPoints, const ViewType3 targetEWeights, + const ViewType2 basisAtTargetEPoints, const ViewType2 wBasisAtTargetEPoints, const ViewType4 computedDofs, const ViewType5 cellDof, + ordinal_type numCellDofs, ordinal_type offsetBasis, ordinal_type offsetTarget, ordinal_type numSideDofs) : + basisCoeffs_(basisCoeffs), negPartialProjAtBasisEPoints_(negPartialProjAtBasisEPoints), nonWeightedBasisAtBasisEPoints_(nonWeightedBasisAtBasisEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), basisEWeights_(basisEWeights), wBasisAtBasisEPoints_(wBasisAtBasisEPoints), targetEWeights_(targetEWeights), + basisAtTargetEPoints_(basisAtTargetEPoints), wBasisAtTargetEPoints_(wBasisAtTargetEPoints), + computedDofs_(computedDofs), cellDof_(cellDof),numCellDofs_(numCellDofs), offsetBasis_(offsetBasis), + offsetTarget_(offsetTarget), numSideDofs_(numSideDofs) {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + for(ordinal_type j=0; j +struct ComputeHCurlBasisCoeffsOnCells_HDiv { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProjAtBasisEPoints_; + const ViewType2 nonWeightedBasisAtBasisEPoints_; + const ViewType2 basisAtBasisEPoints_; + const ViewType2 hcurlBasisCurlAtBasisEPoints_; + const ViewType3 basisEWeights_; + const ViewType2 wHCurlBasisAtBasisEPoints_; + const ViewType3 targetEWeights_; + const ViewType2 hcurlBasisCurlAtTargetEPoints_; + const ViewType2 wHCurlBasisAtTargetEPoints_; + const ViewType4 tagToOrdinal_; + const ViewType5 computedDofs_; + const ViewType6 hCurlDof_; + ordinal_type numCellDofs_; + ordinal_type offsetBasis_; + ordinal_type numSideDofs_; + ordinal_type dim_; + + ComputeHCurlBasisCoeffsOnCells_HDiv(const ViewType1 basisCoeffs, ViewType2 negPartialProjAtBasisEPoints, const ViewType2 nonWeightedBasisAtBasisEPoints, + const ViewType2 basisAtBasisEPoints, const ViewType2 hcurlBasisCurlAtBasisEPoints, const ViewType3 basisEWeights, const ViewType2 wHCurlBasisAtBasisEPoints, const ViewType3 targetEWeights, + const ViewType2 hcurlBasisCurlAtTargetEPoints, const ViewType2 wHCurlBasisAtTargetEPoints, const ViewType4 tagToOrdinal, const ViewType5 computedDofs, const ViewType6 hCurlDof, + ordinal_type numCellDofs, ordinal_type offsetBasis, ordinal_type numSideDofs, ordinal_type dim) : + basisCoeffs_(basisCoeffs), negPartialProjAtBasisEPoints_(negPartialProjAtBasisEPoints), nonWeightedBasisAtBasisEPoints_(nonWeightedBasisAtBasisEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), hcurlBasisCurlAtBasisEPoints_(hcurlBasisCurlAtBasisEPoints), basisEWeights_(basisEWeights), wHCurlBasisAtBasisEPoints_(wHCurlBasisAtBasisEPoints), targetEWeights_(targetEWeights), + hcurlBasisCurlAtTargetEPoints_(hcurlBasisCurlAtTargetEPoints), wHCurlBasisAtTargetEPoints_(wHCurlBasisAtTargetEPoints), + tagToOrdinal_(tagToOrdinal), computedDofs_(computedDofs), hCurlDof_(hCurlDof),numCellDofs_(numCellDofs), offsetBasis_(offsetBasis), + numSideDofs_(numSideDofs), dim_(dim) {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + ordinal_type numBasisEPoints = basisEWeights_.extent(0); + + for(ordinal_type i=0; i(hCurlDof_.extent(0)); ++i) { + ordinal_type idof = hCurlDof_(i); + for(ordinal_type d=0; d(targetEWeights_.extent(0)); ++iq) { + wHCurlBasisAtTargetEPoints_(ic,i,iq,d) = hcurlBasisCurlAtTargetEPoints_(idof,iq,d)*targetEWeights_(iq); + } + } + } + } +}; + + template template void -ProjectionTools::getHDivEvaluationPoints(typename BasisType::ScalarViewType evaluationPoints, - typename BasisType::ScalarViewType extDerivEvaluationPoints, +ProjectionTools::getHDivEvaluationPoints(typename BasisType::ScalarViewType targetEPoints, + typename BasisType::ScalarViewType targetDivEPoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct, @@ -71,67 +255,63 @@ ProjectionTools::getHDivEvaluationPoints(typename BasisType::ScalarViewType typedef typename BasisType::scalarType scalarType; typedef Kokkos::DynRankView ScalarViewType; typedef Kokkos::pair range_type; + auto refTopologyKey = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTopologyKey()); + //const auto cellTopoKey = cellBasis->getBaseCellTopology().getKey(); ordinal_type dim = cellBasis->getBaseCellTopology().getDimension(); ordinal_type sideDim = dim-1; ordinal_type numSides = cellBasis->getBaseCellTopology().getSideCount(); ordinal_type numCells = orts.extent(0); - Kokkos::DynRankView sOrt("sOrt", numSides); + + CellTools::setSubcellParametrization(); + typename CellTools::subcellParamViewType subcellParamSide; + if(numSides>0) + CellTools::getSubcellParametrization(subcellParamSide, sideDim, cellBasis->getBaseCellTopology()); + + auto evalPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getPointsRange(evalPointType)); for(ordinal_type is=0; isgetTargetPointsRange(sideDim, is); - sideCubPoints = projStruct->getTargetEvalPoints(sideDim, is); - } - else { - sidePointsRange = projStruct->getBasisPointsRange(sideDim, is); - sideCubPoints = projStruct->getBasisEvalPoints(sideDim, is); - } + ScalarViewType sideWorkview("sideWorkview", numCells, projStruct->getMaxNumEvalPoints(evalPointType), sideDim); - ScalarViewType orientedTargetCubPoints("orientedTargetCubPoints", sideCubPoints.extent(0),sideDim); + const auto topoKey = refTopologyKey(sideDim,is); + auto sideBasisEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getEvalPoints(sideDim,is,evalPointType)); - const auto topoKey = projStruct->getTopologyKey(sideDim,is); + Kokkos::parallel_for + ("Evaluate Points Sides ", + Kokkos::RangePolicy (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { + auto sidePointsRange = evalPointsRange(sideDim, is); + auto sideRefPointsRange = range_type(0, range_size(sidePointsRange)); + auto orientedBasisEPoints = Kokkos::subview(sideWorkview, ic, sideRefPointsRange, range_type(0,sideDim)); - for(ordinal_type ic=0; ic::mapToReferenceSubcell(Kokkos::subview(evaluationPoints,ic,sidePointsRange,Kokkos::ALL()), orientedTargetCubPoints, sideDim, is, cellBasis->getBaseCellTopology()); - } + orts(ic).getEdgeOrientation(sOrt, numSides); + ordinal_type ort = sOrt[is]; + + Impl::OrientationTools::mapToModifiedReference(orientedBasisEPoints,sideBasisEPoints,topoKey,ort); + CellTools::mapToReferenceSubcell(Kokkos::subview(targetEPoints,ic,sidePointsRange,Kokkos::ALL()), orientedBasisEPoints, subcellParamSide, sideDim, is, dim); + }); } if(cellBasis->getDofCount(dim,0) <= 0) return; - range_type cellDivPointsRange; - ScalarViewType divCubPoints; - if(evalPointType == TARGET) { - divCubPoints = projStruct->getTargetDerivEvalPoints(dim, 0); - cellDivPointsRange = projStruct->getTargetDerivPointsRange(dim, 0); - } else { - divCubPoints = projStruct->getBasisDerivEvalPoints(dim, 0); - cellDivPointsRange = projStruct->getBasisDerivPointsRange(dim, 0); - } - RealSpaceTools::clone(Kokkos::subview(extDerivEvaluationPoints, Kokkos::ALL(), cellDivPointsRange, Kokkos::ALL()), divCubPoints); + auto evalDivPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getDerivPointsRange(evalPointType)); + auto cellDivPointsRange = evalDivPointsRange(dim, 0); + auto cellBasisDivEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getDerivEvalPoints(dim,0,evalPointType)); + + RealSpaceTools::clone(Kokkos::subview(targetDivEPoints, Kokkos::ALL(), cellDivPointsRange, Kokkos::ALL()), cellBasisDivEPoints); + if(projStruct->getTargetEvalPoints(dim, 0).data() != NULL) { - range_type cellPointsRange; - ScalarViewType cubPoints; - if(evalPointType == TARGET) { - cubPoints = projStruct->getTargetEvalPoints(dim, 0); - cellPointsRange = projStruct->getTargetPointsRange(dim, 0); - } else { - cubPoints = projStruct->getBasisEvalPoints(dim, 0); - cellPointsRange = projStruct->getBasisPointsRange(dim, 0); - } - RealSpaceTools::clone(Kokkos::subview(evaluationPoints, Kokkos::ALL(), cellPointsRange, Kokkos::ALL()), cubPoints); + auto cellPointsRange = evalPointsRange(dim, 0); + auto cellBasisEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getEvalPoints(dim,0,evalPointType)); + RealSpaceTools::clone(Kokkos::subview(targetEPoints, Kokkos::ALL(), cellPointsRange, Kokkos::ALL()), cellBasisEPoints); } } @@ -143,23 +323,22 @@ typename BasisType, typename ortValueType,class ...ortProperties> void ProjectionTools::getHDivBasisCoeffs(Kokkos::DynRankView basisCoeffs, - const Kokkos::DynRankView targetAtEvalPoints, - const Kokkos::DynRankView targetDivAtDivEvalPoints, - const typename BasisType::ScalarViewType evaluationPoints, - const typename BasisType::ScalarViewType extDerivEvaluationPoints, + const Kokkos::DynRankView targetAtEPoints, + const Kokkos::DynRankView targetDivAtDivEPoints, + const typename BasisType::ScalarViewType targetEPoints, + const typename BasisType::ScalarViewType targetDivEPoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct){ - typedef typename Kokkos::Impl::is_space::host_mirror_space::execution_space host_space_type; typedef typename BasisType::scalarType scalarType; typedef Kokkos::DynRankView ScalarViewType; typedef Kokkos::pair range_type; const auto cellTopo = cellBasis->getBaseCellTopology(); ordinal_type dim = cellTopo.getDimension(); - ordinal_type numTotalEvaluationPoints(targetAtEvalPoints.extent(1)), - numTotalDivEvaluationPoints(targetDivAtDivEvalPoints.extent(1)); + ordinal_type numTotalEvaluationPoints(targetAtEPoints.extent(1)), + numTotalDivEvaluationPoints(targetDivAtDivEPoints.extent(1)); ordinal_type basisCardinality = cellBasis->getCardinality(); - ordinal_type numCells = targetAtEvalPoints.extent(0); + ordinal_type numCells = targetAtEPoints.extent(0); const ordinal_type sideDim = dim-1; const std::string& name = cellBasis->getName(); @@ -171,165 +350,130 @@ ProjectionTools::getHDivBasisCoeffs(Kokkos::DynRankViewgetDofCount(sideDim,is); - Kokkos::View computedDofs("computedDofs",numSideDofs); + Kokkos::View computedDofs("computedDofs",numSideDofs); - ordinal_type computedDofsCount = 0; + const Kokkos::RangePolicy policy(0, numCells); + + auto targetEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetPointsRange()); + auto basisEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisPointsRange()); - ordinal_type numTotalCubPoints = projStruct->getNumBasisEvalPoints(), numTotalDivCubPoints = projStruct->getNumBasisDerivEvalPoints(); - ScalarViewType cubPoints_("cubPoints",numCells,numTotalCubPoints, dim); - ScalarViewType divCubPoints("divCubPoints",numCells,numTotalDivCubPoints, dim); - getHDivEvaluationPoints(cubPoints_, divCubPoints, orts, cellBasis, projStruct, BASIS); + auto tagToOrdinal = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(), cellBasis->getAllDofOrdinal()); - ScalarViewType basisAtCubPoints("basisAtCubPoints",numCells,basisCardinality, numTotalCubPoints, dim); - ScalarViewType basisAtTargetCubPoints("basisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints, dim); + ordinal_type numTotalBasisEPoints = projStruct->getNumBasisEvalPoints(), numTotalBasisDivEPoints = projStruct->getNumBasisDerivEvalPoints(); + ScalarViewType basisEPoints_("basisEPoints",numCells,numTotalBasisEPoints, dim); + ScalarViewType basisDivEPoints("basisDivEPoints",numCells,numTotalBasisDivEPoints, dim); + getHDivEvaluationPoints(basisEPoints_, basisDivEPoints, orts, cellBasis, projStruct, EvalPointsType::BASIS); + + ScalarViewType basisAtBasisEPoints("basisAtBasisEPoints",numCells,basisCardinality, numTotalBasisEPoints, dim); + ScalarViewType basisAtTargetEPoints("basisAtTargetEPoints",numCells,basisCardinality, numTotalEvaluationPoints, dim); { - ScalarViewType nonOrientedBasisAtCubPoints("nonOrientedBasisAtCubPoints",numCells,basisCardinality, numTotalCubPoints, dim); - ScalarViewType nonOrientedBasisAtTargetCubPoints("nonOrientedBasisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints, dim); + ScalarViewType nonOrientedBasisAtBasisEPoints("nonOrientedBasisAtBasisEPoints",numCells,basisCardinality, numTotalBasisEPoints, dim); + ScalarViewType nonOrientedBasisAtTargetEPoints("nonOrientedBasisAtTargetEPoints",numCells,basisCardinality, numTotalEvaluationPoints, dim); for(ordinal_type ic=0; icgetValues(Kokkos::subview(nonOrientedBasisAtTargetCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(evaluationPoints, ic, Kokkos::ALL(), Kokkos::ALL())); - cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(cubPoints_, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtTargetEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(targetEPoints, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtBasisEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(basisEPoints_, ic, Kokkos::ALL(), Kokkos::ALL())); } - OrientationTools::modifyBasisByOrientation(basisAtCubPoints, nonOrientedBasisAtCubPoints, orts, cellBasis); - OrientationTools::modifyBasisByOrientation(basisAtTargetCubPoints, nonOrientedBasisAtTargetCubPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisAtBasisEPoints, nonOrientedBasisAtBasisEPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisAtTargetEPoints, nonOrientedBasisAtTargetEPoints, orts, cellBasis); } - ScalarViewType basisDivAtDivCubPoints; - ScalarViewType basisDivAtTargetDivCubPoints; + ScalarViewType basisDivAtBasisDivEPoints; + ScalarViewType basisDivAtTargetDivEPoints; if(numTotalDivEvaluationPoints>0) { - ScalarViewType nonOrientedBasisDivAtTargetDivCubPoints, nonOrientedBasisDivAtDivCubPoints; - basisDivAtDivCubPoints = ScalarViewType ("basisDivAtDivCubPoints",numCells,basisCardinality, numTotalDivCubPoints); - nonOrientedBasisDivAtDivCubPoints = ScalarViewType ("nonOrientedBasisDivAtDivCubPoints",numCells,basisCardinality, numTotalDivCubPoints); - basisDivAtTargetDivCubPoints = ScalarViewType("basisDivAtTargetDivCubPoints",numCells,basisCardinality, numTotalDivEvaluationPoints); - nonOrientedBasisDivAtTargetDivCubPoints = ScalarViewType("nonOrientedBasisDivAtTargetDivCubPoints",numCells,basisCardinality, numTotalDivEvaluationPoints); + ScalarViewType nonOrientedBasisDivAtTargetDivEPoints, nonOrientedBasisDivAtBasisDivEPoints; + basisDivAtBasisDivEPoints = ScalarViewType ("basisDivAtBasisDivEPoints",numCells,basisCardinality, numTotalBasisDivEPoints); + nonOrientedBasisDivAtBasisDivEPoints = ScalarViewType ("nonOrientedBasisDivAtBasisDivEPoints",numCells,basisCardinality, numTotalBasisDivEPoints); + basisDivAtTargetDivEPoints = ScalarViewType("basisDivAtTargetDivEPoints",numCells,basisCardinality, numTotalDivEvaluationPoints); + nonOrientedBasisDivAtTargetDivEPoints = ScalarViewType("nonOrientedBasisDivAtTargetDivEPoints",numCells,basisCardinality, numTotalDivEvaluationPoints); for(ordinal_type ic=0; icgetValues(Kokkos::subview(nonOrientedBasisDivAtDivCubPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(divCubPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_DIV); - cellBasis->getValues(Kokkos::subview(nonOrientedBasisDivAtTargetDivCubPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(extDerivEvaluationPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_DIV); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisDivAtBasisDivEPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(basisDivEPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_DIV); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisDivAtTargetDivEPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(targetDivEPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_DIV); } - OrientationTools::modifyBasisByOrientation(basisDivAtDivCubPoints, nonOrientedBasisDivAtDivCubPoints, orts, cellBasis); - OrientationTools::modifyBasisByOrientation(basisDivAtTargetDivCubPoints, nonOrientedBasisDivAtTargetDivCubPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisDivAtBasisDivEPoints, nonOrientedBasisDivAtBasisDivEPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisDivAtTargetDivEPoints, nonOrientedBasisDivAtTargetDivEPoints, orts, cellBasis); } - + ScalarViewType refSidesNormal("refSidesNormal", numSides, dim); + ordinal_type computedDofsCount = 0; for(ordinal_type is=0; isgetDofCount(sideDim,is); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(sideDim,is); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(sideDim,is); + + ordinal_type numTargetEPoints = range_size(targetEPointsRange(sideDim,is)); + ordinal_type numBasisEPoints = range_size(basisEPointsRange(sideDim,is)); for(ordinal_type i=0; igetDofOrdinal(sideDim, is, i); - CellTools::getReferenceSideNormal(refSideNormal, is, cellBasis->getBaseCellTopology()); + auto sideNormal = Kokkos::subview(refSidesNormal,is,Kokkos::ALL()); + auto sideNormalHost = Kokkos::create_mirror_view(sideNormal); + CellTools::getReferenceSideNormal(sideNormalHost, is, cellTopo); + Kokkos::deep_copy(sideNormal, sideNormalHost); - ScalarViewType normalBasisAtElemcubPoints("normalBasisAtElemcubPoints",numCells,sideCardinality, numCubPoints); - ScalarViewType normalBasisAtTargetcubPoints("normalBasisAtTargetcubPoints",numCells,sideCardinality, numTargetCubPoints); - ScalarViewType weightedNormalBasisAtElemcubPoints("weightedNormalBasisAtElemcubPoints",numCells,sideCardinality, numCubPoints); - ScalarViewType weightedNormalBasisAtTargetcubPoints("weightedNormalBasisAtTargetcubPoints",numCells,sideCardinality, numTargetCubPoints); - ScalarViewType normalTargetAtTargetcubPoints("normalTargetAtTargetcubPoints",numCells, numTargetCubPoints); + ScalarViewType basisNormalAtBasisEPoints("normalBasisAtBasisEPoints",numCells,sideCardinality, numBasisEPoints); + ScalarViewType wBasisNormalAtBasisEPoints("wBasisNormalAtBasisEPoints",numCells,sideCardinality, numBasisEPoints); + ScalarViewType wBasisNormalAtTargetEPoints("wBasisNormalAtTargetEPoints",numCells,sideCardinality, numTargetEPoints); + ScalarViewType targetNormalAtTargetEPoints("targetNormalAtTargetEPoints",numCells, numTargetEPoints); - ScalarViewType targetEvalWeights = projStruct->getTargetEvalWeights(sideDim, is); - ScalarViewType basisEvalWeights = projStruct->getBasisEvalWeights(sideDim, is); + ordinal_type offsetBasis = basisEPointsRange(sideDim,is).first; + ordinal_type offsetTarget = targetEPointsRange(sideDim,is).first; + auto targetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(sideDim,is)); + auto basisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(sideDim,is)); - //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection - ordinal_type offsetBasis = projStruct->getBasisPointsRange(sideDim, is).first; - ordinal_type offsetTarget = projStruct->getTargetPointsRange(sideDim, is).first; - for(ordinal_type ic=0; icgetDofOrdinal(sideDim, is, j); - for(ordinal_type iq=0; iq functorTypeSide; + Kokkos::parallel_for(policy, functorTypeSide(basisNormalAtBasisEPoints, basisAtBasisEPoints, + basisEWeights, wBasisNormalAtBasisEPoints, targetEWeights, + basisAtTargetEPoints, wBasisNormalAtTargetEPoints, tagToOrdinal, + targetAtEPoints, targetNormalAtTargetEPoints, + refSidesNormal, sideCardinality, offsetBasis, + offsetTarget, sideDim, + dim, is)); ScalarViewType sideMassMat_("sideMassMat_", numCells, sideCardinality+1, sideCardinality+1), sideRhsMat_("rhsMat_", numCells, sideCardinality+1); - ScalarViewType targetEvalWeights_("targetEvalWeights", numCells, 1, targetEvalWeights.extent(0)); - RealSpaceTools::clone(targetEvalWeights_, targetEvalWeights); + ScalarViewType targetEWeights_("targetEWeights", numCells, 1, targetEWeights.extent(0)); + RealSpaceTools::clone(targetEWeights_, targetEWeights); range_type range_H(0, sideCardinality); range_type range_B(sideCardinality, sideCardinality+1); - ScalarViewType ones("ones",numCells,1,numCubPoints); + ScalarViewType ones("ones",numCells,1,numBasisEPoints); Kokkos::deep_copy(ones,1); - FunctionSpaceTools::integrate(Kokkos::subview(sideMassMat_, Kokkos::ALL(), range_H, range_H), normalBasisAtElemcubPoints, weightedNormalBasisAtElemcubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(sideMassMat_, Kokkos::ALL(), range_H, range_B), weightedNormalBasisAtElemcubPoints, ones); - - FunctionSpaceTools::integrate(Kokkos::subview(sideRhsMat_, Kokkos::ALL(), range_H), normalTargetAtTargetcubPoints, weightedNormalBasisAtTargetcubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(sideRhsMat_, Kokkos::ALL(), range_B), normalTargetAtTargetcubPoints, targetEvalWeights_); + FunctionSpaceTools::integrate(Kokkos::subview(sideMassMat_, Kokkos::ALL(), range_H, range_H), basisNormalAtBasisEPoints, wBasisNormalAtBasisEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(sideMassMat_, Kokkos::ALL(), range_H, range_B), wBasisNormalAtBasisEPoints, ones); - Kokkos::View sideMassMat("sideMassMat", sideCardinality+1,sideCardinality+1); - Kokkos::View sideRhsMat("sideRhsMat",sideCardinality+1, 1); + FunctionSpaceTools::integrate(Kokkos::subview(sideRhsMat_, Kokkos::ALL(), range_H), targetNormalAtTargetEPoints, wBasisNormalAtTargetEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(sideRhsMat_, Kokkos::ALL(), range_B), targetNormalAtTargetEPoints, targetEWeights_); - Teuchos::LAPACK lapack; - ordinal_type info = 0; - Kokkos::View pivVec("pivVec", sideCardinality+1, 1); + typedef Kokkos::DynRankView WorkArrayViewType; + ScalarViewType t_("t",numCells, sideCardinality+1); + WorkArrayViewType w_("w",numCells, sideCardinality+1); - for(ordinal_type ic=0; icgetDofOrdinal(dim-1, is, i); - basisCoeffs(ic,facet_dof) = sideRhsMat(i,0); - } + auto sideDof = Kokkos::subview(tagToOrdinal, sideDim, is, Kokkos::ALL()); - - if (info) { - std::stringstream ss; - ss << ">>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } - } + typedef SolveSystem functorType; + Kokkos::parallel_for(policy, functorType( basisCoeffs, sideMassMat_, sideRhsMat_, t_, w_, sideDof, sideCardinality, 1)); } - //elem - ordinal_type numElemDofs = cellBasis->getDofCount(dim,0); - if(numElemDofs==0) + //Cell + ordinal_type numCellDofs = cellBasis->getDofCount(dim,0); + if(numCellDofs==0) return; - Basis *hcurlBasis = NULL; - if(name.find("HEX")!=std::string::npos) - hcurlBasis = new Basis_HCURL_HEX_In_FEM(cellBasis->getDegree()); - else if(name.find("TET")!=std::string::npos) - hcurlBasis = new Basis_HCURL_TET_In_FEM(cellBasis->getDegree()); - else if(name.find("QUAD")!=std::string::npos) - hcurlBasis = new Basis_HGRAD_QUAD_Cn_FEM(cellBasis->getDegree()); - else if(name.find("TRI")!=std::string::npos) - hcurlBasis = new Basis_HGRAD_TRI_Cn_FEM(cellBasis->getDegree()); + Basis *hcurlBasis = NULL; + if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hcurlBasis = new Basis_HCURL_HEX_In_FEM(cellBasis->getDegree()); + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hcurlBasis = new Basis_HCURL_TET_In_FEM(cellBasis->getDegree()); + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hcurlBasis = new Basis_HGRAD_QUAD_Cn_FEM(cellBasis->getDegree()); + else if(cellTopo.getKey() == shards::getCellTopologyData >()->key) + hcurlBasis = new Basis_HGRAD_TRI_Cn_FEM(cellBasis->getDegree()); else { std::stringstream ss; ss << ">>> ERROR (Intrepid2::ProjectionTools::getHDivEvaluationPoints): " @@ -339,176 +483,86 @@ ProjectionTools::getHDivBasisCoeffs(Kokkos::DynRankViewgetNumTargetDerivEvalPoints(dim,0); - ordinal_type numDivCubPoints = projStruct->getNumBasisDerivEvalPoints(dim,0); + auto targetDivEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetDerivPointsRange()); + auto basisDivEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisDerivPointsRange()); - ScalarViewType weightedBasisDivAtcubPoints("weightedBasisDivAtcubPoints",numCells,numElemDofs, numDivCubPoints); - ScalarViewType weightedBasisDivAtcubTargetPoints("weightedBasisDivAtcubTargetPoints",numCells, numElemDofs, numTargetDivCubPoints); + ordinal_type numTargetDivEPoints = range_size(targetDivEPointsRange(dim,0)); + ordinal_type numBasisDivEPoints = range_size(basisDivEPointsRange(dim,0)); - ScalarViewType internalBasisDivAtcubPoints("basisDivAtcubPoints",numCells,numElemDofs, numDivCubPoints); + auto targetDivEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetDerivEvalWeights(dim,0)); + auto divEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisDerivEvalWeights(dim,0)); - ScalarViewType targetDivEvalWeights = projStruct->getTargetDerivEvalWeights(dim, 0); - ScalarViewType divEvalWeights = projStruct->getBasisDerivEvalWeights(dim, 0); - ordinal_type offsetBasisDiv = projStruct->getBasisDerivPointsRange(dim, 0).first; - ordinal_type offsetTargetDiv = projStruct->getTargetDerivPointsRange(dim, 0).first; + ordinal_type offsetBasisDiv = basisDivEPointsRange(dim,0).first; + ordinal_type offsetTargetDiv = targetDivEPointsRange(dim,0).first; - for(ordinal_type ic=0; icgetDofOrdinal(dim, 0, i); - for(ordinal_type iq=0; iqgetDofOrdinal(dim, 0, i); - for(ordinal_type iq=0; iq functorType; + Kokkos::parallel_for(policy, functorType( basisCoeffs, targetSideDivAtBasisEPoints, basisDivAtBasisEPoints, + basisDivAtBasisDivEPoints, divEWeights, weightedBasisDivAtBasisEPoints, targetDivEWeights, basisDivAtTargetDivEPoints, weightedBasisDivAtTargetEPoints, + computedDofs, cellDofs, numCellDofs, offsetBasisDiv, offsetTargetDiv, numSideDofs)); ordinal_type hcurlBasisCardinality = hcurlBasis->getCardinality(); ordinal_type numCurlInteriorDOFs = hcurlBasis->getDofCount(dim,0); - range_type range_H(0, numElemDofs); - range_type range_B(numElemDofs, numElemDofs+numCurlInteriorDOFs); + range_type range_H(0, numCellDofs); + range_type range_B(numCellDofs, numCellDofs+numCurlInteriorDOFs); - Kokkos::DynRankView massMat_("massMat_",numCells,numElemDofs+numCurlInteriorDOFs,numElemDofs+numCurlInteriorDOFs); - Kokkos::DynRankView rhsMatTrans("rhsMatTrans",numCells,numElemDofs+numCurlInteriorDOFs); - ScalarViewType targetSideDivAtcubPoints("targetSideDivAtcubPoints",numCells, numDivCubPoints); - for(ordinal_type i=0; i::integrate(Kokkos::subview(massMat_, Kokkos::ALL(), range_H,range_H), internalBasisDivAtcubPoints, weightedBasisDivAtcubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(rhsMatTrans, Kokkos::ALL(), range_H), targetDivAtDivEvalPoints, weightedBasisDivAtcubTargetPoints); - FunctionSpaceTools::integrate(Kokkos::subview(rhsMatTrans, Kokkos::ALL(), range_H), targetSideDivAtcubPoints, weightedBasisDivAtcubPoints,true); + FunctionSpaceTools::integrate(Kokkos::subview(massMat_, Kokkos::ALL(), range_H,range_H), basisDivAtBasisEPoints, weightedBasisDivAtBasisEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(rhsMatTrans, Kokkos::ALL(), range_H), targetDivAtDivEPoints, weightedBasisDivAtTargetEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(rhsMatTrans, Kokkos::ALL(), range_H), targetSideDivAtBasisEPoints, weightedBasisDivAtBasisEPoints,true); if(numCurlInteriorDOFs>0){ - ScalarViewType cubPoints = projStruct->getBasisEvalPoints(dim,0); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(dim,0); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(dim,0); + ordinal_type numTargetEPoints = range_size(targetEPointsRange(dim,0)); + ordinal_type numBasisEPoints = range_size(basisEPointsRange(dim,0)); - ScalarViewType targetSideApproxAtcubPoints("targetSideAtcubPoints",numCells, numCubPoints, dim); - ScalarViewType internalBasisAtcubPoints("basisAtcubPoints",numCells,numElemDofs, numCubPoints, dim); - ScalarViewType hcurlBasisCurlAtcubPoints("hcurlBasisCurlAtcubPoints",hcurlBasisCardinality, numCubPoints,dim); - ScalarViewType internalHcurlBasisCurlAtcubPoints("internalHcurlBasisCurlAtcubPoints",numCells,numCurlInteriorDOFs, numCubPoints,dim); - ScalarViewType hcurlBasisCurlAtcubTargetPoints("hcurlBasisCurlAtcubTargetPoints", hcurlBasisCardinality,numTargetCubPoints, dim); - ScalarViewType internalHcurlBasisCurlAtcubTargetPoints("internalHcurlBasisCurlAtcubTargetPoints",numCells, numCurlInteriorDOFs, numTargetCubPoints, dim); - ScalarViewType weightedHcurlBasisCurlAtcubPoints("weightedHcurlBasisHcurlAtcubPoints", numCells, numCurlInteriorDOFs, numCubPoints,dim); - ScalarViewType weightedHcurlBasisCurlAtcubTargetPoints("weightedHcurlBasisHcurlAtcubTargetPoints",numCells, numCurlInteriorDOFs, numTargetCubPoints,dim); + auto targetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(dim,0)); + auto basisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(dim,0)); - hcurlBasis->getValues(hcurlBasisCurlAtcubPoints, cubPoints, OPERATOR_CURL); + ordinal_type offsetBasis = basisEPointsRange(dim,0).first; - ordinal_type offsetBasis = projStruct->getBasisPointsRange(dim, 0).first; - range_type targetPointsRange = projStruct->getTargetPointsRange(dim, 0); + auto basisEPoints = Kokkos::subview(basisEPoints_, 0, basisEPointsRange(dim,0), Kokkos::ALL()); - ScalarViewType targetEvalWeights = projStruct->getTargetEvalWeights(dim, 0); - ScalarViewType basisEvalWeights = projStruct->getBasisEvalWeights(dim, 0); + ScalarViewType negPartialProjAtBasisEPoints("targetSideAtBasisEPoints",numCells, numBasisEPoints, dim); + ScalarViewType nonWeightedBasisAtBasisEPoints("basisAtBasisEPoints",numCells,numCellDofs, numBasisEPoints, dim); + ScalarViewType hcurlBasisCurlAtBasisEPoints("hcurlBasisCurlAtBasisEPoints",hcurlBasisCardinality, numBasisEPoints,dim); + ScalarViewType hcurlBasisCurlAtTargetEPoints("hcurlBasisCurlAtTargetEPoints", hcurlBasisCardinality,numTargetEPoints, dim); + ScalarViewType wHcurlBasisCurlAtBasisEPoints("wHcurlBasisHcurlAtBasisEPoints", numCells, numCurlInteriorDOFs, numBasisEPoints,dim); + ScalarViewType wHcurlBasisCurlAtTargetEPoints("wHcurlBasisHcurlAtTargetEPoints",numCells, numCurlInteriorDOFs, numTargetEPoints,dim); + hcurlBasis->getValues(hcurlBasisCurlAtBasisEPoints, basisEPoints, OPERATOR_CURL); + hcurlBasis->getValues(hcurlBasisCurlAtTargetEPoints, Kokkos::subview(targetEPoints,0,targetEPointsRange(dim,0),Kokkos::ALL()), OPERATOR_CURL); - for(ordinal_type ic=0; icgetDofOrdinal(dim, 0, i); - for(ordinal_type iq=0; iqgetDofOrdinal(dim, 0, i); - for(ordinal_type d=0; dgetValues(hcurlBasisCurlAtcubTargetPoints, Kokkos::subview(evaluationPoints,ic,targetPointsRange,Kokkos::ALL()), OPERATOR_CURL); - for(ordinal_type i=0; igetDofOrdinal(dim, 0, i); - for(ordinal_type d=0; d::integrate(Kokkos::subview(massMat_, Kokkos::ALL(), range_H,range_B), internalBasisAtcubPoints, weightedHcurlBasisCurlAtcubPoints); - FunctionSpaceTools::integrate(Kokkos::subview(rhsMatTrans, Kokkos::ALL(), range_B), Kokkos::subview(targetAtEvalPoints, Kokkos::ALL(), targetPointsRange, Kokkos::ALL()), weightedHcurlBasisCurlAtcubTargetPoints); - FunctionSpaceTools::integrate(Kokkos::subview(rhsMatTrans, Kokkos::ALL(), range_B), targetSideApproxAtcubPoints, weightedHcurlBasisCurlAtcubPoints,true); + auto hCurlTagToOrdinal = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(), hcurlBasis->getAllDofOrdinal()); + auto cellHCurlDof = Kokkos::subview(hCurlTagToOrdinal, dim, 0, range_type(0, numCurlInteriorDOFs)); + typedef ComputeHCurlBasisCoeffsOnCells_HDiv functorTypeHCurlCells; + Kokkos::parallel_for(policy, functorTypeHCurlCells(basisCoeffs, negPartialProjAtBasisEPoints, nonWeightedBasisAtBasisEPoints, + basisAtBasisEPoints, hcurlBasisCurlAtBasisEPoints, basisEWeights, wHcurlBasisCurlAtBasisEPoints, targetEWeights, + hcurlBasisCurlAtTargetEPoints, wHcurlBasisCurlAtTargetEPoints, tagToOrdinal, computedDofs, cellHCurlDof, + numCellDofs, offsetBasis, numSideDofs, dim)); + FunctionSpaceTools::integrate(Kokkos::subview(massMat_, Kokkos::ALL(), range_H,range_B), nonWeightedBasisAtBasisEPoints, wHcurlBasisCurlAtBasisEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(rhsMatTrans, Kokkos::ALL(), range_B), Kokkos::subview(targetAtEPoints, Kokkos::ALL(), targetEPointsRange(dim,0), Kokkos::ALL()), wHcurlBasisCurlAtTargetEPoints); + FunctionSpaceTools::integrate(Kokkos::subview(rhsMatTrans, Kokkos::ALL(), range_B), negPartialProjAtBasisEPoints, wHcurlBasisCurlAtBasisEPoints,true); } delete hcurlBasis; - Kokkos::View - massMat("massMat", numElemDofs+numCurlInteriorDOFs, numElemDofs+numCurlInteriorDOFs), - rhsMat("rhsMat", numElemDofs+numCurlInteriorDOFs, 1 ); - - Teuchos::LAPACK lapack; - ordinal_type info = 0; - Kokkos::View pivVec("pivVec", 2*(numElemDofs+numCurlInteriorDOFs), 1); - - for(ordinal_type ic=0; ic>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } + typedef Kokkos::DynRankView WorkArrayViewType; + ScalarViewType t_("t",numCells, numCellDofs+numCurlInteriorDOFs); + WorkArrayViewType w_("w",numCells, numCellDofs+numCurlInteriorDOFs); - for(ordinal_type i=0; igetDofOrdinal(dim, 0, i); - basisCoeffs(ic,idof) = rhsMat(i,0); - } - } + typedef SolveSystem functorTypeCellSys; + Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, massMat_, rhsMatTrans, t_, w_, cellDofs, numCellDofs, numCurlInteriorDOFs)); } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHGRAD.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHGRAD.hpp index 61d8a7e4e2e9..a171f8d2e50b 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHGRAD.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHGRAD.hpp @@ -57,12 +57,274 @@ namespace Intrepid2 { namespace Experimental { +template +struct ComputeBasisCoeffsOnVertices_HGRAD { + ViewType1 basisCoeffs_; + const ViewType2 tagToOrdinal_; + const ViewType3 targetEPointsRange_; + const ViewType4 targetAtTargetEPoints_; + const ViewType5 basisAtTargetEPoints_; + ordinal_type numVertices_; + + + ComputeBasisCoeffsOnVertices_HGRAD(ViewType1 basisCoeffs, ViewType2 tagToOrdinal, ViewType3 targetEPointsRange, + ViewType4 targetAtTargetEPoints, ViewType5 basisAtTargetEPoints, ordinal_type numVertices) : + basisCoeffs_(basisCoeffs), tagToOrdinal_(tagToOrdinal), targetEPointsRange_(targetEPointsRange), + targetAtTargetEPoints_(targetAtTargetEPoints), basisAtTargetEPoints_(basisAtTargetEPoints), numVertices_(numVertices) {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + + for(ordinal_type iv=0; iv +struct ComputeBasisCoeffsOnEdges_HGRAD { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProjGrad_; + const ViewType2 basisTanAtEPoints_; + const ViewType2 basisGradAtBasisGradEPoints_; + const ViewType3 basisGradEWeights_; + const ViewType2 wBasisAtBasisGradEPoints_; + const ViewType3 targetGradEWeights_; + const ViewType2 basisGradAtTargetGradEPoints_; + const ViewType2 wBasisAtTargetGradEPoints_; + const ViewType4 computedDofs_; + const ViewType5 tagToOrdinal_; + const ViewType2 targetGradTanAtTargetGradEPoints_; + const ViewType6 targetGradAtTargetGradEPoints_; + const ViewType2 refEdgesTan_; + ordinal_type edgeCardinality_; + ordinal_type offsetBasis_; + ordinal_type offsetTarget_; + ordinal_type numVertexDofs_; + ordinal_type edgeDim_; + ordinal_type dim_; + ordinal_type iedge_; + + ComputeBasisCoeffsOnEdges_HGRAD(const ViewType1 basisCoeffs, ViewType2 negPartialProjGrad, const ViewType2 basisTanAtEPoints, + const ViewType2 basisGradAtBasisGradEPoints, const ViewType3 basisGradEWeights, const ViewType2 wBasisAtBasisGradEPoints, const ViewType3 targetGradEWeights, + const ViewType2 basisGradAtTargetGradEPoints, const ViewType2 wBasisAtTargetGradEPoints, const ViewType4 computedDofs, const ViewType5 tagToOrdinal, + const ViewType2 targetGradTanAtTargetGradEPoints, const ViewType6 targetGradAtTargetGradEPoints, const ViewType2 refEdgesTan, + ordinal_type edgeCardinality, ordinal_type offsetBasis, + ordinal_type offsetTarget, ordinal_type numVertexDofs, ordinal_type edgeDim, ordinal_type dim, ordinal_type iedge) : + basisCoeffs_(basisCoeffs), negPartialProjGrad_(negPartialProjGrad), basisTanAtEPoints_(basisTanAtEPoints), + basisGradAtBasisGradEPoints_(basisGradAtBasisGradEPoints), basisGradEWeights_(basisGradEWeights), wBasisAtBasisGradEPoints_(wBasisAtBasisGradEPoints), targetGradEWeights_(targetGradEWeights), + basisGradAtTargetGradEPoints_(basisGradAtTargetGradEPoints), wBasisAtTargetGradEPoints_(wBasisAtTargetGradEPoints), + computedDofs_(computedDofs), tagToOrdinal_(tagToOrdinal), targetGradTanAtTargetGradEPoints_(targetGradTanAtTargetGradEPoints), + targetGradAtTargetGradEPoints_(targetGradAtTargetGradEPoints), refEdgesTan_(refEdgesTan), + edgeCardinality_(edgeCardinality), offsetBasis_(offsetBasis), + offsetTarget_(offsetTarget), numVertexDofs_(numVertexDofs), edgeDim_(edgeDim), dim_(dim), iedge_(iedge) + {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + ordinal_type numBasisGradEPoints = basisGradEWeights_.extent(0); + ordinal_type numTargetGradEPoints = targetGradEWeights_.extent(0); + for(ordinal_type j=0; j +struct ComputeBasisCoeffsOnFaces_HGRAD { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProjGrad_; + const ViewType2 faceBasisGradAtGradEPoints_; + const ViewType2 basisGradAtBasisGradEPoints_; + const ViewType3 basisGradEWeights_; + const ViewType2 wBasisGradAtGradEPoints_; + const ViewType3 targetGradEWeights_; + const ViewType2 basisGradAtTargetGradEPoints_; + const ViewType2 wBasisGradBasisAtTargetGradEPoints_; + const ViewType4 computedDofs_; + const ViewType5 tagToOrdinal_; + const ViewType6 orts_; + const ViewType2 targetGradTanAtTargetGradEPoints_; + const ViewType7 targetGradAtTargetGradEPoints_; + const ViewType2 ortJacobian_; + const ViewType2 faceCoeff_; + const ViewType2 refFacesTangents_; + ordinal_type faceCardinality_; + ordinal_type offsetBasisGrad_; + ordinal_type offsetTargetGrad_; + ordinal_type numVertexEdgeDofs_; + ordinal_type numFaces_; + ordinal_type faceDim_; + ordinal_type dim_; + ordinal_type iface_; + unsigned topoKey_; + + ComputeBasisCoeffsOnFaces_HGRAD(const ViewType1 basisCoeffs, ViewType2 negPartialProjGrad, const ViewType2 faceBasisGradAtGradEPoints, + const ViewType2 basisGradAtBasisGradEPoints, const ViewType3 basisGradEWeights, const ViewType2 wBasisGradAtGradEPoints, const ViewType3 targetGradEWeights, + const ViewType2 basisGradAtTargetGradEPoints, const ViewType2 wBasisGradBasisAtTargetGradEPoints, const ViewType4 computedDofs, const ViewType5 tagToOrdinal, + const ViewType6 orts, const ViewType2 targetGradTanAtTargetGradEPoints, const ViewType7 targetGradAtTargetGradEPoints, + const ViewType2 refFacesTangents, ordinal_type faceCardinality, ordinal_type offsetBasisGrad, + ordinal_type offsetTargetGrad, ordinal_type numVertexEdgeDofs, ordinal_type numFaces, ordinal_type faceDim, + ordinal_type dim, ordinal_type iface, unsigned topoKey) : + basisCoeffs_(basisCoeffs), negPartialProjGrad_(negPartialProjGrad), faceBasisGradAtGradEPoints_(faceBasisGradAtGradEPoints), + basisGradAtBasisGradEPoints_(basisGradAtBasisGradEPoints), basisGradEWeights_(basisGradEWeights), wBasisGradAtGradEPoints_(wBasisGradAtGradEPoints), + targetGradEWeights_(targetGradEWeights), + basisGradAtTargetGradEPoints_(basisGradAtTargetGradEPoints), wBasisGradBasisAtTargetGradEPoints_(wBasisGradBasisAtTargetGradEPoints), + computedDofs_(computedDofs), tagToOrdinal_(tagToOrdinal), orts_(orts), targetGradTanAtTargetGradEPoints_(targetGradTanAtTargetGradEPoints), + targetGradAtTargetGradEPoints_(targetGradAtTargetGradEPoints), refFacesTangents_(refFacesTangents), + faceCardinality_(faceCardinality), offsetBasisGrad_(offsetBasisGrad), + offsetTargetGrad_(offsetTargetGrad), numVertexEdgeDofs_(numVertexEdgeDofs), numFaces_(numFaces), + faceDim_(faceDim), dim_(dim), iface_(iface), topoKey_(topoKey) + {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + typename ViewType2::value_type ortJacData[2*2]; + auto ortJac = ViewType2(ortJacData, 2, 2); + + ordinal_type fOrt[6]; + orts_(ic).getFaceOrientation(fOrt, numFaces_); + ordinal_type ort = fOrt[iface_]; + + ordinal_type numBasisGradEPoints = basisGradEWeights_.extent(0); + ordinal_type numTargetGradEPoints = targetGradEWeights_.extent(0); + Impl::OrientationTools::getJacobianOfOrientationMap(ortJac, topoKey_, ort); + for(ordinal_type j=0; j +struct ComputeBasisCoeffsOnCells_HGRAD { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProjGrad_; + const ViewType2 cellBasisGradAtGradEPoints_; + const ViewType2 basisGradAtBasisGradEPoints_; + const ViewType3 basisGradEWeights_; + const ViewType2 wBasisGradAtGradEPoints_; + const ViewType3 targetGradEWeights_; + const ViewType2 basisGradAtTargetGradEPoints_; + const ViewType2 wBasisGradBasisAtTargetGradEPoints_; + const ViewType4 computedDofs_; + const ViewType5 elemDof_; + ordinal_type dim_; + ordinal_type numElemDofs_; + ordinal_type offsetBasisGrad_; + ordinal_type offsetTargetGrad_; + ordinal_type numVertexEdgeFaceDofs_; + + ComputeBasisCoeffsOnCells_HGRAD(const ViewType1 basisCoeffs, ViewType2 negPartialProjGrad, const ViewType2 cellBasisGradAtGradEPoints, + const ViewType2 basisGradAtBasisGradEPoints, const ViewType3 basisGradEWeights, const ViewType2 wBasisGradAtGradEPoints, const ViewType3 targetGradEWeights, + const ViewType2 basisGradAtTargetGradEPoints, const ViewType2 wBasisGradBasisAtTargetGradEPoints, const ViewType4 computedDofs, const ViewType5 elemDof, + ordinal_type dim, ordinal_type numElemDofs, ordinal_type offsetBasisGrad, ordinal_type offsetTargetGrad, ordinal_type numVertexEdgeFaceDofs) : + basisCoeffs_(basisCoeffs), negPartialProjGrad_(negPartialProjGrad), cellBasisGradAtGradEPoints_(cellBasisGradAtGradEPoints), + basisGradAtBasisGradEPoints_(basisGradAtBasisGradEPoints), basisGradEWeights_(basisGradEWeights), wBasisGradAtGradEPoints_(wBasisGradAtGradEPoints), targetGradEWeights_(targetGradEWeights), + basisGradAtTargetGradEPoints_(basisGradAtTargetGradEPoints), wBasisGradBasisAtTargetGradEPoints_(wBasisGradBasisAtTargetGradEPoints), + computedDofs_(computedDofs), elemDof_(elemDof), dim_(dim), numElemDofs_(numElemDofs), offsetBasisGrad_(offsetBasisGrad), + offsetTargetGrad_(offsetTargetGrad), numVertexEdgeFaceDofs_(numVertexEdgeFaceDofs) {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + ordinal_type numBasisGradEPoints = basisGradEWeights_.extent(0); + ordinal_type numTargetGradEPoints = targetGradEWeights_.extent(0); + for(ordinal_type j=0; j template void -ProjectionTools::getHGradEvaluationPoints(typename BasisType::ScalarViewType evaluationPoints, - typename BasisType::ScalarViewType extDerivEvaluationPoints, +ProjectionTools::getHGradEvaluationPoints(typename BasisType::ScalarViewType ePoints, + typename BasisType::ScalarViewType gradEPoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct, @@ -71,8 +333,9 @@ ProjectionTools::getHGradEvaluationPoints(typename BasisType::ScalarViewTyp typedef Kokkos::DynRankView ScalarViewType; typedef Kokkos::pair range_type; const auto cellTopo = cellBasis->getBaseCellTopology(); + //const auto cellTopoKey = cellBasis->getBaseCellTopology().getKey(); ordinal_type dim = cellTopo.getDimension(); - ordinal_type numCells = evaluationPoints.extent(0); + ordinal_type numCells = ePoints.extent(0); const ordinal_type edgeDim = 1; const ordinal_type faceDim = 2; @@ -80,81 +343,80 @@ ProjectionTools::getHGradEvaluationPoints(typename BasisType::ScalarViewTyp ordinal_type numEdges = (cellBasis->getDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount() : 0; - Kokkos::View eOrt("eOrt", numEdges), fOrt("fOrt", numFaces); + CellTools::setSubcellParametrization(); + typename CellTools::subcellParamViewType subcellParamEdge, subcellParamFace; + if(numEdges>0) + CellTools::getSubcellParametrization(subcellParamEdge, edgeDim, cellBasis->getBaseCellTopology()); + if(numFaces>0) + CellTools::getSubcellParametrization(subcellParamFace, faceDim, cellBasis->getBaseCellTopology()); + + auto refTopologyKey = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTopologyKey()); + + auto ePointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getPointsRange(evalPointType)); + auto gradEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getDerivPointsRange(evalPointType)); if(numVertices>0) { - //TODO: use lattice to retrieve vertex coordinates. - ScalarViewType dofCoords("dofCoords", cellBasis->getCardinality(), dim); - cellBasis->getDofCoords(dofCoords); for(ordinal_type iv=0; ivgetDofOrdinal(0, iv, 0); - for(ordinal_type d=0; dgetEvalPoints(0,iv,evalPointType)); + RealSpaceTools::clone(Kokkos::subview(ePoints, Kokkos::ALL(), + ePointsRange(0, iv), Kokkos::ALL()), vertexEPoints); } } + ScalarViewType workView("workView", numCells, projStruct->getMaxNumDerivPoints(evalPointType), dim-1); for(ordinal_type ie=0; iegetTargetDerivPointsRange(edgeDim, ie); - cubPoints = projStruct->getTargetDerivEvalPoints(edgeDim, ie); - } - else { - edgeGradPointsRange = projStruct->getBasisDerivPointsRange(edgeDim, ie); - cubPoints = projStruct->getBasisDerivEvalPoints(edgeDim, ie); - } - ScalarViewType orientedTargetCubPoints("orientedTargetCubPoints", cubPoints.extent(0),edgeDim); + auto edgeGradEPointsRange = gradEPointsRange(edgeDim, ie); + auto edgeRefGradEPointsRange = range_type(0, range_size(edgeGradEPointsRange)); + auto edgeGradEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getDerivEvalPoints(edgeDim,ie,evalPointType)); - const auto topoKey = projStruct->getTopologyKey(edgeDim,ie); + const auto topoKey = refTopologyKey(edgeDim,ie); - for(ordinal_type ic=0; ic::mapToReferenceSubcell(Kokkos::subview(extDerivEvaluationPoints,ic,edgeGradPointsRange,Kokkos::ALL()), orientedTargetCubPoints, edgeDim, ie, cellBasis->getBaseCellTopology()); - } + Kokkos::parallel_for + ("Evaluate Points Edges ", + Kokkos::RangePolicy (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { + + + ordinal_type eOrt[12]; + orts(ic).getEdgeOrientation(eOrt, numEdges); + ordinal_type ort = eOrt[ie]; + + auto orientedEdgeEPoints = Kokkos::subview(workView, ic, edgeRefGradEPointsRange, range_type(0, edgeDim)); + Impl::OrientationTools::mapToModifiedReference(orientedEdgeEPoints,edgeGradEPoints,topoKey,ort); + CellTools::mapToReferenceSubcell(Kokkos::subview(gradEPoints,ic,edgeGradEPointsRange,Kokkos::ALL()), orientedEdgeEPoints, subcellParamEdge, edgeDim, ie, dim); + }); } for(ordinal_type iface=0; ifacegetTargetDerivEvalPoints(faceDim, iface); - faceGradPointsRange = projStruct->getTargetDerivPointsRange(faceDim, iface); - } else { - gradCubPoints = projStruct->getBasisDerivEvalPoints(faceDim, iface); - faceGradPointsRange = projStruct->getBasisDerivPointsRange(faceDim, iface); - } + auto faceGradEPointsRange = gradEPointsRange(faceDim, iface); + auto faceRefGradEPointsRange = range_type(0, range_size(faceGradEPointsRange)); + auto refGradEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getDerivEvalPoints(faceDim,iface,evalPointType)); - ScalarViewType faceGradCubPoints("faceGradCubPoints", gradCubPoints.extent(0), faceDim); - const auto topoKey = projStruct->getTopologyKey(faceDim,iface); - for(ordinal_type ic=0; ic (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { + auto faceGradEPoints = Kokkos::subview(workView, ic, faceRefGradEPointsRange, Kokkos::ALL()); - orts(ic).getFaceOrientation(fOrt.data(), numFaces); + ordinal_type fOrt[6]; + orts(ic).getFaceOrientation(fOrt, numFaces); + ordinal_type ort = fOrt[iface]; - ordinal_type ort = fOrt(iface); - Impl::OrientationTools::mapToModifiedReference(faceGradCubPoints,gradCubPoints,topoKey,ort); - CellTools::mapToReferenceSubcell(Kokkos::subview(extDerivEvaluationPoints, ic, faceGradPointsRange, Kokkos::ALL()), faceGradCubPoints, faceDim, iface, cellBasis->getBaseCellTopology()); - } + Impl::OrientationTools::mapToModifiedReference(faceGradEPoints,refGradEPoints,topoKey,ort); + CellTools::mapToReferenceSubcell(Kokkos::subview(gradEPoints, ic, faceGradEPointsRange, Kokkos::ALL()), faceGradEPoints, subcellParamFace, faceDim, iface, dim); + }); } if(cellBasis->getDofCount(dim,0)>0) { - range_type cellGradPointsRange; - ScalarViewType gradCubPoints; - if(evalPointType == TARGET) { - gradCubPoints = projStruct->getTargetDerivEvalPoints(dim, 0); - cellGradPointsRange = projStruct->getTargetDerivPointsRange(dim, 0); - } else { - gradCubPoints = projStruct->getBasisDerivEvalPoints(dim, 0); - cellGradPointsRange = projStruct->getBasisDerivPointsRange(dim, 0); - } - RealSpaceTools::clone(Kokkos::subview(extDerivEvaluationPoints, Kokkos::ALL(), cellGradPointsRange, Kokkos::ALL()), gradCubPoints); + auto gradPointsRange = gradEPointsRange(dim, 0); + //auto refGradEPointsRange = range_type(0, range_size(gradPointsRange)); + auto refCellGradEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getDerivEvalPoints(dim,0,evalPointType)); + RealSpaceTools::clone(Kokkos::subview(gradEPoints, Kokkos::ALL(), gradPointsRange, Kokkos::ALL()), refCellGradEPoints); } } @@ -166,10 +428,10 @@ typename BasisType, typename ortValueType,class ...ortProperties> void ProjectionTools::getHGradBasisCoeffs(Kokkos::DynRankView basisCoeffs, - const Kokkos::DynRankView targetAtEvalPoints, - const Kokkos::DynRankView targetGradAtGradEvalPoints, - const typename BasisType::ScalarViewType evaluationPoints, - const typename BasisType::ScalarViewType extDerivEvaluationPoints, + const Kokkos::DynRankView targetAtTargetEPoints, + const Kokkos::DynRankView targetGradAtTargetGradEPoints, + const typename BasisType::ScalarViewType targetEPoints, + const typename BasisType::ScalarViewType targetGradEPoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct){ @@ -180,10 +442,10 @@ ProjectionTools::getHGradBasisCoeffs(Kokkos::DynRankView range_type; const auto cellTopo = cellBasis->getBaseCellTopology(); ordinal_type dim = cellTopo.getDimension(); - ordinal_type numTotalEvaluationPoints(targetAtEvalPoints.extent(1)), - numTotalGradEvaluationPoints(targetGradAtGradEvalPoints.extent(1)); + ordinal_type numTotalTargetEPoints(targetAtTargetEPoints.extent(1)), + numTotalTargetGradEPoints(targetGradAtTargetGradEPoints.extent(1)); ordinal_type basisCardinality = cellBasis->getCardinality(); - ordinal_type numCells = targetAtEvalPoints.extent(0); + ordinal_type numCells = targetAtTargetEPoints.extent(0); const ordinal_type edgeDim = 1; const ordinal_type faceDim = 2; @@ -191,12 +453,8 @@ ProjectionTools::getHGradBasisCoeffs(Kokkos::DynRankViewgetDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount() : 0; - Kokkos::View eOrt("eOrt", numEdges); - Kokkos::View fOrt("fOrt", numFaces); - ScalarViewType refEdgeTan("refEdgeTan", dim); - ScalarViewType refFaceTangents("refFaceTangents", dim, 2); - auto refFaceTanU = Kokkos::subview(refFaceTangents, Kokkos::ALL, 0); - auto refFaceTanV = Kokkos::subview(refFaceTangents, Kokkos::ALL, 1); + ScalarViewType refEdgesTan("refEdgesTan", numEdges, dim); + ScalarViewType refFacesTangents("refFacesTangents", numFaces, dim, faceDim); ordinal_type numVertexDofs = numVertices; @@ -210,354 +468,206 @@ ProjectionTools::getHGradBasisCoeffs(Kokkos::DynRankView computedDofs("computedDofs",numVertexDofs+numEdgeDofs+numFaceDofs); - ordinal_type computedDofsCount = 0; + ordinal_type numTotalBasisEPoints = projStruct->getNumBasisEvalPoints(), + numTotalBasisGradEPoints = projStruct->getNumBasisDerivEvalPoints(); + ScalarViewType basisEPoints("basisEPoints",numCells,numTotalBasisEPoints, dim); + ScalarViewType basisGradEPoints("basisGradEPoints",numCells,numTotalBasisGradEPoints, dim); + getHGradEvaluationPoints(basisEPoints, basisGradEPoints, orts, cellBasis, projStruct, EvalPointsType::BASIS); - ordinal_type numTotalCubPoints = projStruct->getNumBasisEvalPoints(), - numTotalGradCubPoints = projStruct->getNumBasisDerivEvalPoints(); - ScalarViewType cubPoints("cubPoints",numCells,numTotalCubPoints, dim); - ScalarViewType gradCubPoints("gradCubPoints",numCells,numTotalGradCubPoints, dim); - getHGradEvaluationPoints(cubPoints, gradCubPoints, orts, cellBasis, projStruct, BASIS); - - ScalarViewType basisAtCubPoints("basisAtCubPoints",numCells,basisCardinality, numTotalCubPoints); - ScalarViewType basisAtTargetCubPoints("basisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints); + ScalarViewType basisAtTargetEPoints("basisAtTargetEPoints",numCells,basisCardinality, numTotalTargetEPoints); { - ScalarViewType nonOrientedBasisAtCubPoints("nonOrientedBasisAtCubPoints",numCells,basisCardinality, numTotalCubPoints); - ScalarViewType nonOrientedBasisAtTargetCubPoints("nonOrientedBasisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints); - + ScalarViewType nonOrientedBasisAtTargetEPoints("nonOrientedBasisAtTargetEPoints",numCells,basisCardinality, numTotalTargetEPoints); for(ordinal_type ic=0; icgetValues(Kokkos::subview(nonOrientedBasisAtTargetCubPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(evaluationPoints, ic, Kokkos::ALL(), Kokkos::ALL())); - cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtCubPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(cubPoints, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtTargetEPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(targetEPoints, ic, Kokkos::ALL(), Kokkos::ALL())); } - - OrientationTools::modifyBasisByOrientation(basisAtCubPoints, nonOrientedBasisAtCubPoints, orts, cellBasis); - OrientationTools::modifyBasisByOrientation(basisAtTargetCubPoints, nonOrientedBasisAtTargetCubPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisAtTargetEPoints, nonOrientedBasisAtTargetEPoints, orts, cellBasis); } - ScalarViewType basisGradAtGradCubPoints; - ScalarViewType basisGradAtTargetGradCubPoints; - if(numTotalGradEvaluationPoints>0) { - ScalarViewType nonOrientedBasisGradAtTargetGradCubPoints, nonOrientedBasisGradAtGradCubPoints; + ScalarViewType basisGradAtBasisGradEPoints; + ScalarViewType basisGradAtTargetGradEPoints; + if(numTotalBasisGradEPoints>0) { + ScalarViewType nonOrientedBasisGradAtTargetGradEPoints, nonOrientedBasisGradAtBasisGradEPoints; - basisGradAtGradCubPoints = ScalarViewType ("basisGradAtGradCubPoints",numCells,basisCardinality, numTotalGradCubPoints, dim); - nonOrientedBasisGradAtGradCubPoints = ScalarViewType ("nonOrientedBasisGradAtGradCubPoints",numCells,basisCardinality, numTotalGradCubPoints, dim); - basisGradAtTargetGradCubPoints = ScalarViewType("basisGradAtTargetGradCubPoints",numCells,basisCardinality, numTotalGradEvaluationPoints, dim); - nonOrientedBasisGradAtTargetGradCubPoints = ScalarViewType("nonOrientedBasisGradAtTargetGradCubPoints",numCells,basisCardinality, numTotalGradEvaluationPoints, dim); + basisGradAtBasisGradEPoints = ScalarViewType ("basisGradAtBasisGradEPoints",numCells,basisCardinality, numTotalBasisGradEPoints, dim); + nonOrientedBasisGradAtBasisGradEPoints = ScalarViewType ("nonOrientedBasisGradAtBasisGradEPoints",numCells,basisCardinality, numTotalBasisGradEPoints, dim); + basisGradAtTargetGradEPoints = ScalarViewType("basisGradAtTargetGradEPoints",numCells,basisCardinality, numTotalTargetGradEPoints, dim); + nonOrientedBasisGradAtTargetGradEPoints = ScalarViewType("nonOrientedBasisGradAtTargetGradEPoints",numCells,basisCardinality, numTotalTargetGradEPoints, dim); for(ordinal_type ic=0; icgetValues(Kokkos::subview(nonOrientedBasisGradAtGradCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(gradCubPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_GRAD); - cellBasis->getValues(Kokkos::subview(nonOrientedBasisGradAtTargetGradCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(extDerivEvaluationPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_GRAD); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisGradAtBasisGradEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(basisGradEPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_GRAD); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisGradAtTargetGradEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(targetGradEPoints, ic, Kokkos::ALL(), Kokkos::ALL()),OPERATOR_GRAD); } - OrientationTools::modifyBasisByOrientation(basisGradAtGradCubPoints, nonOrientedBasisGradAtGradCubPoints, orts, cellBasis); - OrientationTools::modifyBasisByOrientation(basisGradAtTargetGradCubPoints, nonOrientedBasisGradAtTargetGradCubPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisGradAtBasisGradEPoints, nonOrientedBasisGradAtBasisGradEPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisGradAtTargetGradEPoints, nonOrientedBasisGradAtTargetGradEPoints, orts, cellBasis); } - for(ordinal_type iv=0; ivgetDofOrdinal(0, iv, 0); - computedDofs(computedDofsCount++) = idof; - for(ordinal_type ic=0; icgetTargetPointsRange()); + auto targetGradEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetDerivPointsRange()); - for(ordinal_type ie=0; iegetBasisDerivPointsRange()); - ordinal_type edgeCardinality = cellBasis->getDofCount(edgeDim,ie); - ordinal_type numCubPoints = projStruct->getNumBasisDerivEvalPoints(edgeDim, ie); - ordinal_type numTargetCubPoints = projStruct->getNumTargetDerivEvalPoints(edgeDim, ie); - - CellTools::getReferenceEdgeTangent(refEdgeTan, ie, cellBasis->getBaseCellTopology()); - - ScalarViewType edgeBasisAtCubPoints("tanBasisAtElemCubPoints",numCells,edgeCardinality, numCubPoints); - ScalarViewType edgeTargetAtTargetCubPoints("tanBasisAtTargetCubPoints",numCells, numTargetCubPoints); - ScalarViewType weightedBasisAtElemCubPoints("weightedTanBasisAtElemCubPoints",numCells,edgeCardinality, numCubPoints); - ScalarViewType weightedBasisAtTargetCubPoints("weightedTanBasisAtTargetCubPoints",numCells,edgeCardinality, numTargetCubPoints); - ScalarViewType mComputedProjection("mComputedProjection", numCells, numCubPoints); - - ScalarViewType targetEvalWeights = projStruct->getTargetDerivEvalWeights(edgeDim, ie); - ScalarViewType basisEvalWeights = projStruct->getBasisDerivEvalWeights(edgeDim, ie); - - //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection - ordinal_type offsetBasis = projStruct->getBasisDerivPointsRange(edgeDim, ie).first; - ordinal_type offsetTarget = projStruct->getTargetDerivPointsRange(edgeDim, ie).first; - for(ordinal_type j=0; j getDofOrdinal(edgeDim, ie, j); - for(ordinal_type ic=0; icgetTopologyKey()); - for(ordinal_type ic=0; icgetAllDofOrdinal()); + ordinal_type computedDofsCount = 0; + for(ordinal_type iv=0; iv policy(0, numCells); + typedef ComputeBasisCoeffsOnVertices_HGRAD functorType; + Kokkos::parallel_for(policy, functorType(basisCoeffs, tagToOrdinal, targetEPointsRange, + targetAtTargetEPoints, basisAtTargetEPoints, numVertices)); - ScalarViewType cubWeights_("cubWeights_", numCells, 1, basisEvalWeights.extent(0)), targetEvalWeights_("targetEvalWeights", numCells, 1, targetEvalWeights.extent(0)); - RealSpaceTools::clone(cubWeights_, basisEvalWeights); - RealSpaceTools::clone(targetEvalWeights_, targetEvalWeights); + for(ordinal_type ie=0; ie::integrate(edgeMassMat_, edgeBasisAtCubPoints, weightedBasisAtElemCubPoints); - FunctionSpaceTools::integrate(edgeRhsMat_, edgeTargetAtTargetCubPoints, weightedBasisAtTargetCubPoints); - FunctionSpaceTools::integrate(edgeRhsMat_, mComputedProjection, weightedBasisAtElemCubPoints,true); + ordinal_type edgeCardinality = cellBasis->getDofCount(edgeDim,ie); + ordinal_type offsetBasis = basisGradEPointsRange(edgeDim, ie).first; + ordinal_type offsetTarget = targetGradEPointsRange(edgeDim, ie).first; + ordinal_type numBasisGradEPoints = range_size(basisGradEPointsRange(edgeDim, ie)); + ordinal_type numTargetGradEPoints = range_size(targetGradEPointsRange(edgeDim, ie)); + auto basisGradEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisDerivEvalWeights(edgeDim,ie)); + auto targetGradEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetDerivEvalWeights(edgeDim,ie)); + + auto edgeTan = Kokkos::subview(refEdgesTan, ie, Kokkos::ALL()); + auto edgeTanHost = Kokkos::create_mirror_view(edgeTan); + CellTools::getReferenceEdgeTangent(edgeTanHost,ie, cellTopo); + Kokkos::deep_copy(edgeTan,edgeTanHost); + + ScalarViewType basisTanAtEPoints("basisTanAtEPoints",numCells,edgeCardinality, numBasisGradEPoints); + ScalarViewType targetGradTanAtTargetGradEPoints("tanBasisAtTargetGradEPoints",numCells, numTargetGradEPoints); + ScalarViewType wBasisAtBasisGradEPoints("wTanBasisAtBasisGradEPoints",numCells,edgeCardinality, numBasisGradEPoints); + ScalarViewType wBasisAtTargetGradEPoints("wTanBasisAtTargetGradEPoints",numCells,edgeCardinality, numTargetGradEPoints); + ScalarViewType negPartialProjGrad("negPartialProjGrad", numCells, numBasisGradEPoints); + + typedef ComputeBasisCoeffsOnEdges_HGRAD functorTypeEdge; + + Kokkos::parallel_for(policy, functorTypeEdge(basisCoeffs, negPartialProjGrad, basisTanAtEPoints, + basisGradAtBasisGradEPoints, basisGradEWeights, wBasisAtBasisGradEPoints, targetGradEWeights, + basisGradAtTargetGradEPoints, wBasisAtTargetGradEPoints, computedDofs, tagToOrdinal, + targetGradTanAtTargetGradEPoints, targetGradAtTargetGradEPoints, refEdgesTan, + edgeCardinality, offsetBasis, + offsetTarget, numVertexDofs, edgeDim, dim, ie)); + ScalarViewType edgeMassMat_("edgeMassMat_", numCells, edgeCardinality, edgeCardinality), + edgeRhsMat_("rhsMat_", numCells, edgeCardinality); - Kokkos::View edgeMassMat("edgeMassMat", edgeCardinality,edgeCardinality); - Kokkos::View edgeRhsMat("edgeRhsMat",edgeCardinality, 1); + FunctionSpaceTools::integrate(edgeMassMat_, basisTanAtEPoints, wBasisAtBasisGradEPoints); + FunctionSpaceTools::integrate(edgeRhsMat_, targetGradTanAtTargetGradEPoints, wBasisAtTargetGradEPoints); + FunctionSpaceTools::integrate(edgeRhsMat_, negPartialProjGrad, wBasisAtBasisGradEPoints,true); - Teuchos::LAPACK lapack; - ordinal_type info = 0; - for(ordinal_type ic=0; ic WorkArrayViewType; + ScalarViewType t_("t",numCells, edgeCardinality); + WorkArrayViewType w_("w",numCells, edgeCardinality); - lapack.POSV('U', edgeCardinality, 1, - edgeMassMat.data(), - edgeMassMat.stride_1(), - edgeRhsMat.data(), - edgeRhsMat.stride_1(), - &info); - - if (info) { - std::stringstream ss; - ss << ">>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } + auto edgeDofs = Kokkos::subview(tagToOrdinal, edgeDim, ie, Kokkos::ALL()); + typedef SolveSystem functorTypeCellSys; + Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDofs, edgeCardinality)); - for(ordinal_type i=0; igetDofOrdinal(edgeDim, ie, i); - basisCoeffs(ic,edge_dof) = edgeRhsMat(i,0); - } - } for(ordinal_type i=0; igetDofOrdinal(edgeDim, ie, i); + computedDofs(computedDofsCount++) = tagToOrdinal(edgeDim, ie, i); } - ScalarViewType ortJacobian("ortJacobian", faceDim, faceDim); - for(ordinal_type iface=0; ifacegetTopologyKey(faceDim,iface); - + const auto topoKey = refTopologyKey(faceDim,iface); ordinal_type faceCardinality = cellBasis->getDofCount(faceDim,iface); - ordinal_type numTargetGradCubPoints = projStruct->getNumTargetDerivEvalPoints(faceDim, iface); - ordinal_type numGradCubPoints = projStruct->getNumBasisDerivEvalPoints(faceDim, iface); - - CellTools::getReferenceFaceTangents(refFaceTanU, refFaceTanV,iface, cellTopo); - - ScalarViewType faceBasisGradAtGradCubPoints("normaBasisGradAtGradCubPoints",numCells,faceCardinality, numGradCubPoints,faceDim); - ScalarViewType wBasisGradAtGradCubPoints("weightedNormalBasisGradAtGradCubPoints",numCells,faceCardinality, numGradCubPoints,faceDim); - - ScalarViewType faceBasisGradAtTargetGradCubPoints("normalBasisGradAtTargetGradCubPoints",numCells,faceCardinality, numTargetGradCubPoints,faceDim); - ScalarViewType wBasisGradBasisAtTargetGradCubPoints("weightedNormalBasisGradAtTargetGradCubPoints",numCells,faceCardinality, numTargetGradCubPoints,faceDim); - - ScalarViewType targetGradAtTargetGradCubPoints("targetGradAtTargetGradCubPoints",numCells, numTargetGradCubPoints,faceDim); - ScalarViewType mComputedProjectionGrad("mNormalComputedProjection", numCells,numGradCubPoints,faceDim); - - ordinal_type offsetBasisGrad = projStruct->getBasisDerivPointsRange(faceDim, iface).first; - ordinal_type offsetTargetGrad = projStruct->getTargetDerivPointsRange(faceDim, iface).first; - ScalarViewType targetGradCubWeights = projStruct->getTargetDerivEvalWeights(faceDim, iface); - ScalarViewType gradCubWeights = projStruct->getBasisDerivEvalWeights(faceDim, iface); - - //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection - for(ordinal_type ic=0; icgetDofOrdinal(faceDim, iface, j); - for(ordinal_type itan=0; itan getBasisDerivEvalWeights(faceDim,iface)); + auto targetGradEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetDerivEvalWeights(faceDim,iface)); + + auto refFaceTanU = Kokkos::subview(refFacesTangents, iface, Kokkos::ALL, 0); + auto refFaceTanV = Kokkos::subview(refFacesTangents, iface, Kokkos::ALL,1); + auto refFaceTanUHost = Kokkos::create_mirror_view(refFaceTanU); + auto refFaceTanVHost = Kokkos::create_mirror_view(refFaceTanV); + CellTools::getReferenceFaceTangents(refFaceTanUHost, refFaceTanVHost, iface, cellTopo); + Kokkos::deep_copy(refFaceTanU, refFaceTanUHost); + Kokkos::deep_copy(refFaceTanV, refFaceTanVHost); + + ScalarViewType faceBasisGradAtGradEPoints("normaBasisGradAtGradEPoints",numCells,faceCardinality, numGradEPoints,faceDim); + ScalarViewType wBasisGradAtGradEPoints("wNormalBasisGradAtGradEPoints",numCells,faceCardinality, numGradEPoints,faceDim); + ScalarViewType wBasisGradBasisAtTargetGradEPoints("wNormalBasisGradAtTargetGradEPoints",numCells,faceCardinality, numTargetGradEPoints,faceDim); + ScalarViewType targetGradTanAtTargetGradEPoints("targetGradTanAtTargetGradEPoints",numCells, numTargetGradEPoints,faceDim); + ScalarViewType negPartialProjGrad("mNormalComputedProjection", numCells,numGradEPoints,faceDim); + + typedef ComputeBasisCoeffsOnFaces_HGRAD functorTypeFace_HGRAD; + + Kokkos::parallel_for(policy, functorTypeFace_HGRAD(basisCoeffs, negPartialProjGrad, faceBasisGradAtGradEPoints, + basisGradAtBasisGradEPoints, basisGradEWeights, wBasisGradAtGradEPoints, targetGradEWeights, + basisGradAtTargetGradEPoints,wBasisGradBasisAtTargetGradEPoints, computedDofs, tagToOrdinal, + orts,targetGradTanAtTargetGradEPoints,targetGradAtTargetGradEPoints, + refFacesTangents, faceCardinality, offsetBasisGrad, + offsetTargetGrad, numVertexDofs+numEdgeDofs, numFaces, faceDim, + dim, iface, topoKey)); ScalarViewType faceMassMat_("faceMassMat_", numCells, faceCardinality, faceCardinality), faceRhsMat_("rhsMat_", numCells, faceCardinality); - FunctionSpaceTools::integrate(faceMassMat_, faceBasisGradAtGradCubPoints, wBasisGradAtGradCubPoints); + FunctionSpaceTools::integrate(faceMassMat_, faceBasisGradAtGradEPoints, wBasisGradAtGradEPoints); - FunctionSpaceTools::integrate(faceRhsMat_, targetGradAtTargetGradCubPoints, wBasisGradBasisAtTargetGradCubPoints); - FunctionSpaceTools::integrate(faceRhsMat_, mComputedProjectionGrad, wBasisGradAtGradCubPoints,true); + FunctionSpaceTools::integrate(faceRhsMat_, targetGradTanAtTargetGradEPoints, wBasisGradBasisAtTargetGradEPoints); + FunctionSpaceTools::integrate(faceRhsMat_, negPartialProjGrad, wBasisGradAtGradEPoints,true); Kokkos::View faceMassMat("faceMassMat", faceCardinality,faceCardinality); Kokkos::View faceRhsMat("faceRhsMat",faceCardinality, 1); - Teuchos::LAPACK lapack; - ordinal_type info = 0; - for(ordinal_type ic=0; ic>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } + typedef Kokkos::DynRankView WorkArrayViewType; + ScalarViewType t_("t",numCells, faceCardinality); + WorkArrayViewType w_("w",numCells, faceCardinality); - for(ordinal_type i=0; igetDofOrdinal(faceDim, iface, i); - basisCoeffs(ic,face_dof) = faceRhsMat(i,0); - } - } + auto faceDofs = Kokkos::subview(tagToOrdinal, faceDim, iface, Kokkos::ALL()); + typedef SolveSystem functorTypeCellSys; + Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDofs, faceCardinality)); for(ordinal_type i=0; igetDofOrdinal(faceDim, iface, i); + computedDofs(computedDofsCount++) = tagToOrdinal(faceDim, iface, i); } ordinal_type numElemDofs = cellBasis->getDofCount(dim,0); if(numElemDofs>0) { - range_type cellGradPointsRange = projStruct->getTargetDerivPointsRange(dim, 0); - - ordinal_type numTargetGradCubPoints = projStruct->getNumTargetDerivEvalPoints(dim,0); - ordinal_type numGradCubPoints = projStruct->getNumBasisDerivEvalPoints(dim,0); - - ScalarViewType internalBasisGradAtGradCubPoints("internalBasisGradAtCubPoints",numCells,numElemDofs, numGradCubPoints, dim); - ScalarViewType internalBasisGradAtTargetGradCubPoints("weightedBasisGradAtGradCubPoints",numCells,numElemDofs, numTargetGradCubPoints,dim); - ScalarViewType mComputedProjectionGrad("mComputedProjectionGrad", numCells, numGradCubPoints, dim); - - ScalarViewType targetGradCubWeights = projStruct->getTargetDerivEvalWeights(dim, 0); - ScalarViewType cubGradWeights = projStruct->getBasisDerivEvalWeights(dim, 0); - ordinal_type offsetBasisGrad = projStruct->getBasisDerivPointsRange(dim, 0).first; - ordinal_type offsetTargetGrad = projStruct->getTargetDerivPointsRange(dim, 0).first; - - - ScalarViewType wBasisGradAtGradCubPoints("weightedBasisGradAtGradCubPoints",numCells,numElemDofs, numGradCubPoints,dim); - ScalarViewType wBasisGradBasisAtTargetGradCubPoints("weightedBasisGradAtTargetGradCubPoints",numCells,numElemDofs, numTargetGradCubPoints,dim); - for(ordinal_type j=0; j getDofOrdinal(dim, 0, j); - for(ordinal_type ic=0; icgetTargetDerivEvalWeights(dim,0)); + auto basisGradEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisDerivEvalWeights(dim,0)); + + ScalarViewType cellBasisGradAtGradEPoints("internalBasisGradAtEPoints",numCells,numElemDofs, numGradEPoints, dim); + ScalarViewType negPartialProjGrad("negPartialProjGrad", numCells, numGradEPoints, dim); + ScalarViewType wBasisGradAtGradEPoints("wBasisGradAtGradEPoints",numCells,numElemDofs, numGradEPoints,dim); + ScalarViewType wBasisGradBasisAtTargetGradEPoints("wBasisGradAtTargetGradEPoints",numCells,numElemDofs, numTargetGradEPoints,dim); + + auto elemDof = Kokkos::subview(tagToOrdinal, dim, 0, Kokkos::ALL()); + typedef ComputeBasisCoeffsOnCells_HGRAD functorTypeCell_HGRAD; + Kokkos::parallel_for(policy, functorTypeCell_HGRAD(basisCoeffs, negPartialProjGrad, cellBasisGradAtGradEPoints, + basisGradAtBasisGradEPoints, basisGradEWeights, wBasisGradAtGradEPoints, targetGradEWeights, + basisGradAtTargetGradEPoints, wBasisGradBasisAtTargetGradEPoints, computedDofs, elemDof, + dim, numElemDofs, offsetBasisGrad, offsetTargetGrad, numVertexDofs+numEdgeDofs+numFaceDofs)); ScalarViewType cellMassMat_("cellMassMat_", numCells, numElemDofs, numElemDofs), cellRhsMat_("rhsMat_", numCells, numElemDofs); - FunctionSpaceTools::integrate(cellMassMat_, internalBasisGradAtGradCubPoints, wBasisGradAtGradCubPoints); - FunctionSpaceTools::integrate(cellRhsMat_, Kokkos::subview(targetGradAtGradEvalPoints,Kokkos::ALL(),cellGradPointsRange,Kokkos::ALL()), wBasisGradBasisAtTargetGradCubPoints); - FunctionSpaceTools::integrate(cellRhsMat_, mComputedProjectionGrad, wBasisGradAtGradCubPoints, true); + FunctionSpaceTools::integrate(cellMassMat_, cellBasisGradAtGradEPoints, wBasisGradAtGradEPoints); + FunctionSpaceTools::integrate(cellRhsMat_, Kokkos::subview(targetGradAtTargetGradEPoints,Kokkos::ALL(),cellTargetGradEPointsRange,Kokkos::ALL()), wBasisGradBasisAtTargetGradEPoints); + FunctionSpaceTools::integrate(cellRhsMat_, negPartialProjGrad, wBasisGradAtGradEPoints, true); - Kokkos::View cellMassMat("cellMassMat", numElemDofs,numElemDofs); - Kokkos::View cellRhsMat("cellRhsMat",numElemDofs, 1); + typedef Kokkos::DynRankView WorkArrayViewType; + ScalarViewType t_("t",numCells, numElemDofs); + WorkArrayViewType w_("w",numCells, numElemDofs); - Teuchos::LAPACK lapack; - ordinal_type info = 0; - for(ordinal_type ic=0; icgetDofOrdinal(dim, 0, i); - basisCoeffs(ic,idof) = cellRhsMat(i,0); - } - - if (info) { - std::stringstream ss; - ss << ">>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } - } + auto cellDofs = Kokkos::subview(tagToOrdinal, dim, 0, Kokkos::ALL()); + typedef SolveSystem functorTypeCellSys; + Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numElemDofs)); } } } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHVOL.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHVOL.hpp index 9ad65440990a..3245dc4d02fd 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHVOL.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHVOL.hpp @@ -61,22 +61,15 @@ template template void -ProjectionTools::getHVolEvaluationPoints(typename BasisType::ScalarViewType evaluationPoints, +ProjectionTools::getHVolEvaluationPoints(typename BasisType::ScalarViewType ePoints, const Kokkos::DynRankView /*orts*/, const BasisType* cellBasis, ProjectionStruct * projStruct, - const EvalPointsType evalPointType) { - typedef typename BasisType::scalarType scalarType; - typedef Kokkos::DynRankView ScalarViewType; + const EvalPointsType ePointType) { ordinal_type dim = cellBasis->getBaseCellTopology().getDimension(); - - ScalarViewType cubPoints; - if(evalPointType == TARGET) { - cubPoints = projStruct->getTargetEvalPoints(dim, 0); - } else { - cubPoints = projStruct->getBasisEvalPoints(dim, 0); - } - RealSpaceTools::clone(evaluationPoints,cubPoints); + auto refEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getEvalPoints(dim,0,ePointType)); + auto ePointsRange = projStruct->getPointsRange(ePointType); + RealSpaceTools::clone(Kokkos::subview(ePoints, Kokkos::ALL(), ePointsRange(dim, 0), Kokkos::ALL()), refEPoints); } @@ -87,87 +80,75 @@ typename BasisType, typename ortValueType,class ...ortProperties> void ProjectionTools::getHVolBasisCoeffs(Kokkos::DynRankView basisCoeffs, - const Kokkos::DynRankView targetAtEvalPoints, - const typename BasisType::ScalarViewType evaluationPoints, + const Kokkos::DynRankView targetAtTargetEPoints, + const typename BasisType::ScalarViewType targetEPoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct){ - typedef typename Kokkos::Impl::is_space::host_mirror_space::execution_space host_space_type; typedef typename BasisType::scalarType scalarType; typedef Kokkos::DynRankView ScalarViewType; ordinal_type dim = cellBasis->getBaseCellTopology().getDimension(); ordinal_type basisCardinality = cellBasis->getCardinality(); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(dim, 0); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(dim, 0); - ScalarViewType cubPoints = projStruct->getBasisEvalPoints(dim, 0); - ScalarViewType cubWeights = projStruct->getBasisEvalWeights(dim, 0); - ScalarViewType cubTargetWeights = projStruct->getTargetEvalWeights(dim, 0); + ordinal_type numCells = targetAtTargetEPoints.extent(0); + + auto refTargetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(dim,0)); + auto targetEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetPointsRange()); - ordinal_type numCells = targetAtEvalPoints.extent(0); + auto refBasisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(dim,0)); + auto basisEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisPointsRange()); - ScalarViewType basisAtCubPoints("basisAtcubPoints", basisCardinality, numCubPoints); - ScalarViewType basisAtcubTargetPoints("basisAtcubTargetPoints", basisCardinality, numTargetCubPoints); + ordinal_type numTargetEPoints = range_size(targetEPointsRange(dim,0)); + ordinal_type numBasisEPoints = range_size(basisEPointsRange(dim,0)); - cellBasis->getValues(basisAtCubPoints, cubPoints); - if(evaluationPoints.rank()==3) - cellBasis->getValues(basisAtcubTargetPoints, Kokkos::subview(evaluationPoints,0,Kokkos::ALL(),Kokkos::ALL())); + ScalarViewType basisAtBasisEPoints("basisAtBasisEPoints", 1, basisCardinality, numBasisEPoints); + ScalarViewType basisAtTargetEPoints("basisAtTargetEPoints", basisCardinality, numTargetEPoints); + + ScalarViewType basisEPoints("basisEPoints",numCells,projStruct->getNumBasisEvalPoints(), dim); + getHVolEvaluationPoints(basisEPoints, orts, cellBasis, projStruct, EvalPointsType::BASIS); + + cellBasis->getValues(Kokkos::subview(basisAtBasisEPoints, 0, Kokkos::ALL(), Kokkos::ALL()), Kokkos::subview(basisEPoints,0, Kokkos::ALL(), Kokkos::ALL())); + if(targetEPoints.rank()==3) + cellBasis->getValues(basisAtTargetEPoints, Kokkos::subview(targetEPoints, 0, Kokkos::ALL(), Kokkos::ALL())); else - cellBasis->getValues(basisAtcubTargetPoints, evaluationPoints); - - - ScalarViewType weightedBasisAtcubTargetPoints_("weightedBasisAtcubTargetPoints_",numCells, basisCardinality, numTargetCubPoints); - ScalarViewType cubWeights_(cubWeights.data(),1,numCubPoints); - ScalarViewType evaluationWeights_(cubTargetWeights.data(),1,numTargetCubPoints); - ScalarViewType basisAtcubTargetPoints_(basisAtcubTargetPoints.data(),1, basisCardinality, numTargetCubPoints); - ScalarViewType basisAtCubPoints_(basisAtCubPoints.data(),1, basisCardinality, numCubPoints); - ScalarViewType weightedBasisAtCubPoints("weightedBasisAtCubPoints",1,basisCardinality, numCubPoints); - ScalarViewType weightedBasisAtcubTargetPoints("weightedBasisAtcubTargetPoints",1, basisCardinality, numTargetCubPoints); - ArrayTools::scalarMultiplyDataField( weightedBasisAtCubPoints, cubWeights_, basisAtCubPoints_, false); - ArrayTools::scalarMultiplyDataField( weightedBasisAtcubTargetPoints, evaluationWeights_, basisAtcubTargetPoints, false); - RealSpaceTools::clone(weightedBasisAtcubTargetPoints_,Kokkos::subview(weightedBasisAtcubTargetPoints,0,Kokkos::ALL(), Kokkos::ALL())); - - Kokkos::View - massMat("massMat", basisCardinality, basisCardinality), - rhsMat("rhsMat", basisCardinality, numCells ); - - Kokkos::DynRankView massMat_(massMat.data(),1,basisCardinality,basisCardinality); - Kokkos::DynRankView rhsMatTrans("rhsMatTrans",numCells,basisCardinality); - - FunctionSpaceTools::integrate(massMat_, basisAtCubPoints_, weightedBasisAtCubPoints); - FunctionSpaceTools::integrate(rhsMatTrans, targetAtEvalPoints, weightedBasisAtcubTargetPoints_); - - for(ordinal_type i=0; i lapack; - ordinal_type info = 0; - - lapack.POSV('U', basisCardinality, numCells, - massMat.data(), - massMat.stride_1(), - rhsMat.data(), - rhsMat.stride_1(), - &info); - - for(ordinal_type i=0; i>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); + cellBasis->getValues(basisAtTargetEPoints, targetEPoints); + + ScalarViewType weightedBasisAtTargetEPoints("weightedBasisAtTargetEPoints_",numCells, basisCardinality, numTargetEPoints); + ScalarViewType weightedBasisAtBasisEPoints("weightedBasisAtBasisEPoints", 1, basisCardinality, numBasisEPoints); + + auto tagToOrdinal = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(), cellBasis->getAllDofOrdinal()); + auto cellDofs = Kokkos::subview(tagToOrdinal, dim, 0, Kokkos::ALL()); + + ScalarViewType + massMat0("massMat0", 1, basisCardinality, basisCardinality), + massMat("massMat", numCells, basisCardinality, basisCardinality), + rhsMat("rhsMat", numCells, basisCardinality ); + + ordinal_type offsetBasis = basisEPointsRange(dim,0).first; + ordinal_type offsetTarget = targetEPointsRange(dim,0).first; + for(ordinal_type j=0; j ::integrate(massMat0, basisAtBasisEPoints, weightedBasisAtBasisEPoints); + RealSpaceTools::clone(massMat, Kokkos::subview(massMat0,0,Kokkos::ALL(), Kokkos::ALL())); + RealSpaceTools::clone(weightedBasisAtTargetEPoints, Kokkos::subview(weightedBasisAtTargetEPoints,0,Kokkos::ALL(), Kokkos::ALL())); + FunctionSpaceTools::integrate(rhsMat, targetAtTargetEPoints, weightedBasisAtTargetEPoints); + typedef Kokkos::DynRankView WorkArrayViewType; + ScalarViewType t_("t",numCells, basisCardinality); + WorkArrayViewType w_("w",numCells,basisCardinality); + + const Kokkos::RangePolicy policy(0, numCells); + typedef SolveSystem functorType; + Kokkos::parallel_for(policy, functorType( basisCoeffs, massMat, rhsMat, t_, w_, cellDofs, basisCardinality)); +} } } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefL2.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefL2.hpp index 049c057ec99b..876fb4a4dc8f 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefL2.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefL2.hpp @@ -57,107 +57,367 @@ namespace Intrepid2 { namespace Experimental { + +template +struct ComputeBasisCoeffsOnVertices_L2 { + ViewType1 basisCoeffs_; + const ViewType2 tagToOrdinal_; + const ViewType3 targetEPointsRange_; + const ViewType4 targetAtTargetEPoints_; + const ViewType5 basisAtTargetEPoints_; + ordinal_type numVertices_; + + + ComputeBasisCoeffsOnVertices_L2(ViewType1 basisCoeffs, ViewType2 tagToOrdinal, ViewType3 targetEPointsRange, + ViewType4 targetAtTargetEPoints, ViewType5 basisAtTargetEPoints, ordinal_type numVertices) : + basisCoeffs_(basisCoeffs), tagToOrdinal_(tagToOrdinal), targetEPointsRange_(targetEPointsRange), + targetAtTargetEPoints_(targetAtTargetEPoints), basisAtTargetEPoints_(basisAtTargetEPoints), numVertices_(numVertices) {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + for(ordinal_type iv=0; iv +struct ComputeBasisCoeffsOnEdges_L2 { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProj_; + const ViewType2 basisDofDofAtBasisEPoints_; + const ViewType2 basisAtBasisEPoints_; + const ViewType3 basisEWeights_; + const ViewType2 wBasisDofAtBasisEPoints_; + const ViewType3 targetEWeights_; + const ViewType2 basisAtTargetEPoints_; + const ViewType2 wBasisDofAtTargetEPoints_; + const ViewType4 computedDofs_; + const ViewType5 tagToOrdinal_; + const ViewType6 targetAtTargetEPoints_; + const ViewType2 targetTanAtTargetEPoints_; + const ViewType2 refEdgesVec_; + ordinal_type fieldDim_; + ordinal_type edgeCardinality_; + ordinal_type offsetBasis_; + ordinal_type offsetTarget_; + ordinal_type numVertexDofs_; + ordinal_type edgeDim_; + ordinal_type iedge_; + + ComputeBasisCoeffsOnEdges_L2(const ViewType1 basisCoeffs, ViewType2 negPartialProj, const ViewType2 basisDofDofAtBasisEPoints, + const ViewType2 basisAtBasisEPoints, const ViewType3 basisEWeights, const ViewType2 wBasisDofAtBasisEPoints, const ViewType3 targetEWeights, + const ViewType2 basisAtTargetEPoints, const ViewType2 wBasisDofAtTargetEPoints, const ViewType4 computedDofs, const ViewType5 tagToOrdinal, + const ViewType6 targetAtTargetEPoints, const ViewType2 targetTanAtTargetEPoints, const ViewType2 refEdgesVec, + ordinal_type fieldDim, ordinal_type edgeCardinality, ordinal_type offsetBasis, + ordinal_type offsetTarget, ordinal_type numVertexDofs, ordinal_type edgeDim, ordinal_type iedge) : + basisCoeffs_(basisCoeffs), negPartialProj_(negPartialProj), basisDofDofAtBasisEPoints_(basisDofDofAtBasisEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), basisEWeights_(basisEWeights), wBasisDofAtBasisEPoints_(wBasisDofAtBasisEPoints), targetEWeights_(targetEWeights), + basisAtTargetEPoints_(basisAtTargetEPoints), wBasisDofAtTargetEPoints_(wBasisDofAtTargetEPoints), + computedDofs_(computedDofs), tagToOrdinal_(tagToOrdinal), targetAtTargetEPoints_(targetAtTargetEPoints), + targetTanAtTargetEPoints_(targetTanAtTargetEPoints), refEdgesVec_(refEdgesVec), + fieldDim_(fieldDim), edgeCardinality_(edgeCardinality), offsetBasis_(offsetBasis), + offsetTarget_(offsetTarget), numVertexDofs_(numVertexDofs), edgeDim_(edgeDim), iedge_(iedge) + {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + for(ordinal_type j=0; j +struct ComputeBasisCoeffsOnFaces_L2 { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProj_; + const ViewType2 faceBasisDofAtBasisEPoints_; + const ViewType2 basisAtBasisEPoints_; + const ViewType3 basisEWeights_; + const ViewType2 wBasisDofAtBasisEPoints_; + const ViewType3 targetEWeights_; + const ViewType2 basisAtTargetEPoints_; + const ViewType2 wBasisDofAtTargetEPoints_; + const ViewType4 computedDofs_; + const ViewType5 tagToOrdinal_; + const ViewType6 orts_; + const ViewType7 targetAtTargetEPoints_; + const ViewType2 targetDofAtTargetEPoints_; + const ViewType2 ortJacobian_; + const ViewType2 faceCoeff_; + const ViewType2 refFacesTangents_; + const ViewType2 refFacesNormal_; + ordinal_type fieldDim_; + ordinal_type faceCardinality_; + ordinal_type offsetBasis_; + ordinal_type offsetTarget_; + ordinal_type numVertexEdgeDofs_; + ordinal_type numFaces_; + ordinal_type faceDim_; + ordinal_type faceDofDim_; + ordinal_type dim_; + ordinal_type iface_; + unsigned topoKey_; + bool isHCurlBasis_, isHDivBasis_; + + ComputeBasisCoeffsOnFaces_L2(const ViewType1 basisCoeffs, ViewType2 negPartialProj, const ViewType2 faceBasisDofAtBasisEPoints, + const ViewType2 basisAtBasisEPoints, const ViewType3 basisEWeights, const ViewType2 wBasisDofAtBasisEPoints, const ViewType3 targetEWeights, + const ViewType2 basisAtTargetEPoints, const ViewType2 wBasisDofAtTargetEPoints, const ViewType4 computedDofs, const ViewType5 tagToOrdinal, + const ViewType6 orts, const ViewType7 targetAtTargetEPoints, const ViewType2 targetDofAtTargetEPoints, const ViewType2 ortJacobian, const ViewType2 faceCoeff, + const ViewType2 refFacesTangents, const ViewType2 refFacesNormal, ordinal_type fieldDim, ordinal_type faceCardinality, ordinal_type offsetBasis, + ordinal_type offsetTarget, ordinal_type numVertexEdgeDofs, ordinal_type numFaces, ordinal_type faceDim, ordinal_type faceDofDim, + ordinal_type dim, ordinal_type iface, unsigned topoKey, bool isHCurlBasis, bool isHDivBasis) : + basisCoeffs_(basisCoeffs), negPartialProj_(negPartialProj), faceBasisDofAtBasisEPoints_(faceBasisDofAtBasisEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), basisEWeights_(basisEWeights), wBasisDofAtBasisEPoints_(wBasisDofAtBasisEPoints), targetEWeights_(targetEWeights), + basisAtTargetEPoints_(basisAtTargetEPoints), wBasisDofAtTargetEPoints_(wBasisDofAtTargetEPoints), + computedDofs_(computedDofs), tagToOrdinal_(tagToOrdinal), orts_(orts), targetAtTargetEPoints_(targetAtTargetEPoints), + targetDofAtTargetEPoints_(targetDofAtTargetEPoints), ortJacobian_(ortJacobian), faceCoeff_(faceCoeff), + refFacesTangents_(refFacesTangents), refFacesNormal_(refFacesNormal), + fieldDim_(fieldDim), faceCardinality_(faceCardinality), offsetBasis_(offsetBasis), + offsetTarget_(offsetTarget), numVertexEdgeDofs_(numVertexEdgeDofs), numFaces_(numFaces), + faceDim_(faceDim), faceDofDim_(faceDofDim), dim_(dim), iface_(iface), topoKey_(topoKey), + isHCurlBasis_(isHCurlBasis), isHDivBasis_(isHDivBasis) + {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + ordinal_type fOrt[6]; + orts_(ic).getFaceOrientation(fOrt, numFaces_); + ordinal_type ort = fOrt[iface_]; + //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection + + auto ortJacobian = Kokkos::subview(ortJacobian_, ic, Kokkos::ALL(), Kokkos::ALL()); + if(isHCurlBasis_) { + Impl::OrientationTools::getJacobianOfOrientationMap(ortJacobian, topoKey_, ort); + for(ordinal_type d=0; d +struct ComputeBasisCoeffsOnCells_L2 { + const ViewType1 basisCoeffs_; + const ViewType2 negPartialProj_; + const ViewType2 internalBasisAtBasisEPoints_; + const ViewType2 basisAtBasisEPoints_; + const ViewType3 basisEWeights_; + const ViewType2 wBasisAtBasisEPoints_; + const ViewType3 targetEWeights_; + const ViewType2 basisAtTargetEPoints_; + const ViewType2 wBasisDofAtTargetEPoints_; + const ViewType4 computedDofs_; + const ViewType5 elemDof_; + ordinal_type fieldDim_; + ordinal_type numElemDofs_; + ordinal_type offsetBasis_; + ordinal_type offsetTarget_; + ordinal_type numVertexEdgeFaceDofs_; + + ComputeBasisCoeffsOnCells_L2(const ViewType1 basisCoeffs, ViewType2 negPartialProj, const ViewType2 internalBasisAtBasisEPoints, + const ViewType2 basisAtBasisEPoints, const ViewType3 basisEWeights, const ViewType2 wBasisAtBasisEPoints, const ViewType3 targetEWeights, + const ViewType2 basisAtTargetEPoints, const ViewType2 wBasisDofAtTargetEPoints, const ViewType4 computedDofs, const ViewType5 elemDof, + ordinal_type fieldDim, ordinal_type numElemDofs, ordinal_type offsetBasis, ordinal_type offsetTarget, ordinal_type numVertexEdgeFaceDofs) : + basisCoeffs_(basisCoeffs), negPartialProj_(negPartialProj), internalBasisAtBasisEPoints_(internalBasisAtBasisEPoints), + basisAtBasisEPoints_(basisAtBasisEPoints), basisEWeights_(basisEWeights), wBasisAtBasisEPoints_(wBasisAtBasisEPoints), targetEWeights_(targetEWeights), + basisAtTargetEPoints_(basisAtTargetEPoints), wBasisDofAtTargetEPoints_(wBasisDofAtTargetEPoints), + computedDofs_(computedDofs), elemDof_(elemDof), fieldDim_(fieldDim), numElemDofs_(numElemDofs), offsetBasis_(offsetBasis), + offsetTarget_(offsetTarget), numVertexEdgeFaceDofs_(numVertexEdgeFaceDofs) {} + + void + KOKKOS_INLINE_FUNCTION + operator()(const ordinal_type ic) const { + + for(ordinal_type j=0; j template void -ProjectionTools::getL2EvaluationPoints(typename BasisType::ScalarViewType evaluationPoints, +ProjectionTools::getL2EvaluationPoints(typename BasisType::ScalarViewType ePoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct, - const EvalPointsType evalPointType) { + const EvalPointsType ePointType) { typedef typename BasisType::scalarType scalarType; - typedef Kokkos::DynRankView ScalarViewType; + typedef Kokkos::DynRankView ScalarViewType; typedef Kokkos::pair range_type; const auto cellTopo = cellBasis->getBaseCellTopology(); + //const auto cellTopoKey = cellBasis->getBaseCellTopology().getKey(); ordinal_type dim = cellTopo.getDimension(); - ordinal_type numCells = evaluationPoints.extent(0); + ordinal_type numCells = ePoints.extent(0); const ordinal_type edgeDim = 1; const ordinal_type faceDim = 2; ordinal_type numVertices = (cellBasis->getDofCount(0, 0) > 0) ? cellTopo.getVertexCount() : 0; - ordinal_type numEdges = (cellBasis->getDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; - ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount() : 0; + ordinal_type numEdges = (cellBasis->getDofCount(edgeDim, 0) > 0) ? cellTopo.getEdgeCount() : 0; + ordinal_type numFaces = (cellBasis->getDofCount(faceDim, 0) > 0) ? cellTopo.getFaceCount() : 0; + ordinal_type numVols = (cellBasis->getDofCount(dim, 0) > 0); - Kokkos::View eOrt("eOrt", numEdges), fOrt("fOrt", numFaces); + CellTools::setSubcellParametrization(); - if(numVertices>0) { - //TODO: use lattice to retrieve vertex coordinates. - ScalarViewType dofCoords("dofCoords", cellBasis->getCardinality(), dim); - cellBasis->getDofCoords(dofCoords); - for(ordinal_type iv=0; ivgetDofOrdinal(0, iv, 0); - for(ordinal_type d=0; dgetPointsRange(ePointType)); - for(ordinal_type ie=0; iegetTargetPointsRange(edgeDim, ie); - cubPoints = projStruct->getTargetEvalPoints(edgeDim, ie); - } - else { - edgePointsRange = projStruct->getBasisPointsRange(edgeDim, ie); - cubPoints = projStruct->getBasisEvalPoints(edgeDim, ie); - } + typename CellTools::subcellParamViewType subcellParamEdge, subcellParamFace; + if(numEdges>0) + CellTools::getSubcellParametrization(subcellParamEdge, edgeDim, cellTopo); + if(numFaces>0) + CellTools::getSubcellParametrization(subcellParamFace, faceDim, cellTopo); - ScalarViewType orientedTargetCubPoints("orientedTargetCubPoints", cubPoints.extent(0),edgeDim); + auto refTopologyKey = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTopologyKey()); - const auto topoKey = projStruct->getTopologyKey(edgeDim,ie); + ScalarViewType workView("workView", numCells, projStruct->getMaxNumEvalPoints(ePointType), dim-1); - for(ordinal_type ic=0; ic::mapToReferenceSubcell(Kokkos::subview(evaluationPoints,ic,edgePointsRange,Kokkos::ALL()), orientedTargetCubPoints, edgeDim, ie, cellBasis->getBaseCellTopology()); + if(numVertices>0) { + for(ordinal_type iv=0; ivgetEvalPoints(0,iv,ePointType)); + RealSpaceTools::clone(Kokkos::subview(ePoints, Kokkos::ALL(), + ePointsRange(0, iv), Kokkos::ALL()), vertexEPoints); } } + for(ordinal_type ie=0; iegetEvalPoints(edgeDim,ie,ePointType)); - for(ordinal_type iface=0; iface (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { - ScalarViewType cubPoints; - range_type facePointsRange; - if(evalPointType == TARGET) { - cubPoints = projStruct->getTargetEvalPoints(faceDim, iface); - facePointsRange = projStruct->getTargetPointsRange(faceDim, iface); - } else { - cubPoints = projStruct->getBasisEvalPoints(faceDim, iface); - facePointsRange = projStruct->getBasisPointsRange(faceDim, iface); - } - - ScalarViewType faceCubPoints("faceCubPoints", cubPoints.extent(0), faceDim); + ordinal_type eOrt[12]; + orts(ic).getEdgeOrientation(eOrt, numEdges); + ordinal_type ort = eOrt[ie]; - const auto topoKey = projStruct->getTopologyKey(faceDim,iface); - for(ordinal_type ic=0; ic::mapToReferenceSubcell(Kokkos::subview(ePoints,ic,edgePointsRange,Kokkos::ALL()), orientedEdgeEPoints, subcellParamEdge, edgeDim, ie, dim); + }); + } - ordinal_type ort = fOrt(iface); - Impl::OrientationTools::mapToModifiedReference(faceCubPoints,cubPoints,topoKey,ort); - CellTools::mapToReferenceSubcell(Kokkos::subview(evaluationPoints, ic, facePointsRange, Kokkos::ALL()), faceCubPoints, faceDim, iface, cellBasis->getBaseCellTopology()); - } + for(ordinal_type iface=0; ifacegetEvalPoints(faceDim,iface,ePointType)); + + Kokkos::parallel_for + ("Evaluate Points", + Kokkos::RangePolicy (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { + ordinal_type fOrt[6]; + orts(ic).getFaceOrientation(fOrt, numFaces); + ordinal_type ort = fOrt[iface]; + + auto orientedFaceEPoints = Kokkos::subview(workView, ic, faceRefPointsRange, Kokkos::ALL()); + + Impl::OrientationTools::mapToModifiedReference(orientedFaceEPoints,faceEPoints,refTopologyKey(faceDim,iface),ort); + CellTools::mapToReferenceSubcell(Kokkos::subview(ePoints, ic, facePointsRange, Kokkos::ALL()), orientedFaceEPoints, subcellParamFace, faceDim, iface, dim); + }); } - if(cellBasis->getDofCount(dim,0)>0) { - range_type cellPointsRange; - ScalarViewType cubPoints; - if(evalPointType == TARGET) { - cubPoints = projStruct->getTargetEvalPoints(dim, 0); - cellPointsRange = projStruct->getTargetPointsRange(dim, 0); - } else { - cubPoints = projStruct->getBasisEvalPoints(dim, 0); - cellPointsRange = projStruct->getBasisPointsRange(dim, 0); - } - RealSpaceTools::clone(Kokkos::subview(evaluationPoints, Kokkos::ALL(), cellPointsRange, Kokkos::ALL()), cubPoints); + + if(numVols > 0) { + auto pointsRange = ePointsRange(dim, 0); + auto cellEPoints = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getEvalPoints(dim,0,ePointType)); + RealSpaceTools::clone(Kokkos::subview(ePoints, Kokkos::ALL(), pointsRange, Kokkos::ALL()), cellEPoints); } } - template template void ProjectionTools::getL2BasisCoeffs(Kokkos::DynRankView basisCoeffs, - const Kokkos::DynRankView targetAtEvalPoints, - const typename BasisType::ScalarViewType evaluationPoints, + const Kokkos::DynRankView targetAtTargetEPoints, + const typename BasisType::ScalarViewType targetEPoints, const Kokkos::DynRankView orts, const BasisType* cellBasis, ProjectionStruct * projStruct){ - typedef typename Kokkos::Impl::is_space::host_mirror_space::execution_space host_space_type; typedef typename BasisType::scalarType scalarType; typedef Kokkos::DynRankView ScalarViewType; typedef Kokkos::pair range_type; const auto cellTopo = cellBasis->getBaseCellTopology(); ordinal_type dim = cellTopo.getDimension(); - ordinal_type numTotalEvaluationPoints(targetAtEvalPoints.extent(1)); + ordinal_type numTotalTargetEPoints(targetAtTargetEPoints.extent(1)); ordinal_type basisCardinality = cellBasis->getCardinality(); - ordinal_type numCells = targetAtEvalPoints.extent(0); + ordinal_type numCells = targetAtTargetEPoints.extent(0); const ordinal_type edgeDim = 1; const ordinal_type faceDim = 2; - const ordinal_type fieldDim = (targetAtEvalPoints.rank()==2) ? 1 : targetAtEvalPoints.extent(2); - - const std::string& name = cellBasis->getName(); + const ordinal_type fieldDim = (targetAtTargetEPoints.rank()==2) ? 1 : targetAtTargetEPoints.extent(2); ordinal_type numVertices = (cellBasis->getDofCount(0, 0) > 0) ? cellTopo.getVertexCount() : 0; ordinal_type numEdges = (cellBasis->getDofCount(1, 0) > 0) ? cellTopo.getEdgeCount() : 0; ordinal_type numFaces = (cellBasis->getDofCount(2, 0) > 0) ? cellTopo.getFaceCount() : 0; - Kokkos::View eOrt("eOrt", numEdges); - Kokkos::View fOrt("fOrt", numFaces); - ScalarViewType refEdgeTan("refEdgeTan", dim); - ScalarViewType refEdgeNormal("refEdgeNormal", dim); - ScalarViewType refFaceTangents("refFaceTangents", dim, 2); - ScalarViewType refFaceNormal("refFaceNormal", dim); - auto refFaceTanU = Kokkos::subview(refFaceTangents, Kokkos::ALL, 0); - auto refFaceTanV = Kokkos::subview(refFaceTangents, Kokkos::ALL, 1); + ScalarViewType refEdgesVec("refEdgesVec", numEdges, dim); + ScalarViewType refFacesTangents("refFaceTangents", numFaces, dim, 2); + ScalarViewType refFacesNormal("refFaceNormal", numFaces, dim); ordinal_type numVertexDofs = numVertices; @@ -209,376 +461,257 @@ ProjectionTools::getL2BasisCoeffs(Kokkos::DynRankViewgetDofCount(faceDim,iface); - Kokkos::View computedDofs("computedDofs",numVertexDofs+numEdgeDofs+numFaceDofs); + Kokkos::View computedDofs("computedDofs", numVertexDofs+numEdgeDofs+numFaceDofs); - ordinal_type computedDofsCount = 0; + auto targetEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetPointsRange()); + auto basisEPointsRange = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisPointsRange()); - ordinal_type numTotalCubPoints = projStruct->getNumBasisEvalPoints(); - ScalarViewType cubPoints("cubPoints",numCells,numTotalCubPoints, dim); - getL2EvaluationPoints(cubPoints, orts, cellBasis, projStruct, BASIS); + auto refTopologyKey = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTopologyKey()); - ScalarViewType basisAtCubPoints("basisAtCubPoints",numCells,basisCardinality, numTotalCubPoints, fieldDim); - ScalarViewType basisAtTargetCubPoints("basisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints, fieldDim); + ordinal_type numTotalBasisEPoints = projStruct->getNumBasisEvalPoints(); + ScalarViewType basisEPoints("basisEPoints",numCells,numTotalBasisEPoints, dim); + getL2EvaluationPoints(basisEPoints, orts, cellBasis, projStruct, EvalPointsType::BASIS); + + auto tagToOrdinal = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(), cellBasis->getAllDofOrdinal()); + + ScalarViewType basisAtBasisEPoints("basisAtBasisEPoints",numCells,basisCardinality, numTotalBasisEPoints, fieldDim); + ScalarViewType basisAtTargetEPoints("basisAtTargetEPoints",numCells,basisCardinality, numTotalTargetEPoints, fieldDim); { if(fieldDim == 1) { - ScalarViewType nonOrientedBasisAtCubPoints("nonOrientedBasisAtCubPoints",numCells,basisCardinality, numTotalCubPoints); - ScalarViewType nonOrientedBasisAtTargetCubPoints("nonOrientedBasisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints); + ScalarViewType nonOrientedBasisAtBasisEPoints("nonOrientedBasisAtBasisEPoints",numCells,basisCardinality, numTotalBasisEPoints); + ScalarViewType nonOrientedBasisAtTargetEPoints("nonOrientedBasisAtTargetEPoints",numCells,basisCardinality, numTotalTargetEPoints); for(ordinal_type ic=0; icgetValues(Kokkos::subview(nonOrientedBasisAtTargetCubPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(evaluationPoints, ic, Kokkos::ALL(), Kokkos::ALL())); - cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtCubPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(cubPoints, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtTargetEPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(targetEPoints, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtBasisEPoints,ic,Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(basisEPoints, ic, Kokkos::ALL(), Kokkos::ALL())); } - OrientationTools::modifyBasisByOrientation(Kokkos::subview(basisAtCubPoints, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL(),0), nonOrientedBasisAtCubPoints, orts, cellBasis); - OrientationTools::modifyBasisByOrientation(Kokkos::subview(basisAtTargetCubPoints, Kokkos::ALL(), - Kokkos::ALL(), Kokkos::ALL(),0), nonOrientedBasisAtTargetCubPoints, orts, cellBasis); - + OrientationTools::modifyBasisByOrientation(Kokkos::subview(basisAtBasisEPoints, Kokkos::ALL(), Kokkos::ALL(), + Kokkos::ALL(),0), nonOrientedBasisAtBasisEPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(Kokkos::subview(basisAtTargetEPoints, Kokkos::ALL(), + Kokkos::ALL(), Kokkos::ALL(),0), nonOrientedBasisAtTargetEPoints, orts, cellBasis); } else { - ScalarViewType nonOrientedBasisAtCubPoints("nonOrientedBasisAtCubPoints",numCells,basisCardinality, numTotalCubPoints,fieldDim); - ScalarViewType nonOrientedBasisAtTargetCubPoints("nonOrientedBasisAtTargetCubPoints",numCells,basisCardinality, numTotalEvaluationPoints,fieldDim); + ScalarViewType nonOrientedBasisAtBasisEPoints("nonOrientedBasisAtBasisEPoints",numCells,basisCardinality, numTotalBasisEPoints,fieldDim); + ScalarViewType nonOrientedBasisAtTargetEPoints("nonOrientedBasisAtTargetEPoints",numCells,basisCardinality, numTotalTargetEPoints,fieldDim); for(ordinal_type ic=0; icgetValues(Kokkos::subview(nonOrientedBasisAtTargetCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(evaluationPoints, ic, Kokkos::ALL(), Kokkos::ALL())); - cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtCubPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(cubPoints, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtTargetEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(targetEPoints, ic, Kokkos::ALL(), Kokkos::ALL())); + cellBasis->getValues(Kokkos::subview(nonOrientedBasisAtBasisEPoints,ic,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()), Kokkos::subview(basisEPoints, ic, Kokkos::ALL(), Kokkos::ALL())); } - OrientationTools::modifyBasisByOrientation(basisAtCubPoints, nonOrientedBasisAtCubPoints, orts, cellBasis); - OrientationTools::modifyBasisByOrientation(basisAtTargetCubPoints, nonOrientedBasisAtTargetCubPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisAtBasisEPoints, nonOrientedBasisAtBasisEPoints, orts, cellBasis); + OrientationTools::modifyBasisByOrientation(basisAtTargetEPoints, nonOrientedBasisAtTargetEPoints, orts, cellBasis); } } - for(ordinal_type iv=0; ivgetDofOrdinal(0, iv, 0); - computedDofs(computedDofsCount++) = idof; - for(ordinal_type ic=0; icgetDofCount(edgeDim,ie); + for(ordinal_type i=0; igetDofCount(faceDim,iface); + for(ordinal_type i=0; igetFunctionSpace() == FUNCTION_SPACE_HGRAD); + bool isHCurlBasis = (cellBasis->getFunctionSpace() == FUNCTION_SPACE_HCURL); + bool isHDivBasis = (cellBasis->getFunctionSpace() == FUNCTION_SPACE_HDIV); + ordinal_type faceDofDim = isHCurlBasis ? 2 : 1; + ScalarViewType edgeCoeff("edgeCoeff", fieldDim); - ordinal_type faceDofDim = isHCurlBAsis ? 2 : 1; - ScalarViewType edgeCoeff("edgeCoeff", fieldDim); + const Kokkos::RangePolicy policy(0, numCells); + + if(isHGradBasis) { + + typedef ComputeBasisCoeffsOnVertices_L2 functorType; + Kokkos::parallel_for(policy, functorType(basisCoeffs, tagToOrdinal, targetEPointsRange, + targetAtTargetEPoints, basisAtTargetEPoints, numVertices)); + } + for(ordinal_type ie=0; ie::getReferenceEdgeTangent(refEdgeTan,ie, cellTopo); - Kokkos::deep_copy(edgeCoeff,refEdgeTan); + if(isHCurlBasis) { + CellTools::getReferenceEdgeTangent(edgeVecHost,ie, cellTopo); + } else if(isHDivBasis) { + CellTools::getReferenceSideNormal(edgeVecHost, ie, cellTopo); } else { - CellTools::getReferenceSideNormal(refEdgeNormal, ie, cellTopo); - Kokkos::deep_copy(edgeCoeff,refEdgeNormal); + edgeVecHost(0) = 1; } + Kokkos::deep_copy(edgeVec,edgeVecHost); ordinal_type edgeCardinality = cellBasis->getDofCount(edgeDim,ie); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(edgeDim, ie); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(edgeDim, ie); + ordinal_type numBasisEPoints = range_size(basisEPointsRange(edgeDim, ie)); + ordinal_type numTargetEPoints = range_size(targetEPointsRange(edgeDim, ie)); - ScalarViewType edgeBasisAtCubPoints("tanBasisAtElemCubPoints",numCells,edgeCardinality, numCubPoints); - ScalarViewType edgeTargetAtTargetCubPoints("tanBasisAtTargetCubPoints",numCells, numTargetCubPoints); - ScalarViewType weightedBasisAtElemCubPoints("weightedTanBasisAtElemCubPoints",numCells,edgeCardinality, numCubPoints); - ScalarViewType weightedBasisAtTargetCubPoints("weightedTanBasisAtTargetCubPoints",numCells,edgeCardinality, numTargetCubPoints); - ScalarViewType mComputedProjection("mComputedProjection", numCells, numCubPoints); + ScalarViewType basisDofAtBasisEPoints("BasisDofAtBasisEPoints",numCells,edgeCardinality, numBasisEPoints); + ScalarViewType tragetDofAtTargetEPoints("TargetDofAtTargetEPoints",numCells, numTargetEPoints); + ScalarViewType weightedBasisAtBasisEPoints("weightedTanBasisAtBasisEPoints",numCells,edgeCardinality, numBasisEPoints); + ScalarViewType weightedBasisAtTargetEPoints("weightedTanBasisAtTargetEPoints",numCells,edgeCardinality, numTargetEPoints); + ScalarViewType negPartialProj("negPartialProj", numCells, numBasisEPoints); - ScalarViewType targetEvalWeights = projStruct->getTargetEvalWeights(edgeDim, ie); - ScalarViewType basisEvalWeights = projStruct->getBasisEvalWeights(edgeDim, ie); + auto targetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(edgeDim,ie)); + auto basisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(edgeDim,ie)); //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection - ordinal_type offsetBasis = projStruct->getBasisPointsRange(edgeDim, ie).first; - ordinal_type offsetTarget = projStruct->getTargetPointsRange(edgeDim, ie).first; - for(ordinal_type j=0; j getDofOrdinal(edgeDim, ie, j); - for(ordinal_type ic=0; ic functorTypeEdge; - ScalarViewType edgeMassMat_("edgeMassMat_", numCells, edgeCardinality, edgeCardinality), - edgeRhsMat_("rhsMat_", numCells, edgeCardinality); + Kokkos::parallel_for(policy, functorTypeEdge(basisCoeffs, negPartialProj, basisDofAtBasisEPoints, + basisAtBasisEPoints, basisEWeights, weightedBasisAtBasisEPoints, targetEWeights, + basisAtTargetEPoints, weightedBasisAtTargetEPoints, computedDofs, tagToOrdinal, + targetAtTargetEPoints,tragetDofAtTargetEPoints, refEdgesVec, fieldDim, + edgeCardinality, offsetBasis, offsetTarget, numVertexDofs, edgeDim, ie)); - ScalarViewType cubWeights_("cubWeights_", numCells, 1, basisEvalWeights.extent(0)), targetEvalWeights_("targetEvalWeights", numCells, 1, targetEvalWeights.extent(0)); - RealSpaceTools::clone(cubWeights_, basisEvalWeights); - RealSpaceTools::clone(targetEvalWeights_, targetEvalWeights); - FunctionSpaceTools::integrate(edgeMassMat_, edgeBasisAtCubPoints, weightedBasisAtElemCubPoints); - FunctionSpaceTools::integrate(edgeRhsMat_, edgeTargetAtTargetCubPoints, weightedBasisAtTargetCubPoints); - FunctionSpaceTools::integrate(edgeRhsMat_, mComputedProjection, weightedBasisAtElemCubPoints,true); + ScalarViewType edgeMassMat_("edgeMassMat_", numCells, edgeCardinality, edgeCardinality), + edgeRhsMat_("rhsMat_", numCells, edgeCardinality); + FunctionSpaceTools::integrate(edgeMassMat_, basisDofAtBasisEPoints, weightedBasisAtBasisEPoints); + FunctionSpaceTools::integrate(edgeRhsMat_, tragetDofAtTargetEPoints, weightedBasisAtTargetEPoints); + FunctionSpaceTools::integrate(edgeRhsMat_, negPartialProj, weightedBasisAtBasisEPoints,true); - Kokkos::View edgeMassMat("edgeMassMat", edgeCardinality,edgeCardinality); - Kokkos::View edgeRhsMat("edgeRhsMat",edgeCardinality, 1); - Teuchos::LAPACK lapack; - ordinal_type info = 0; - for(ordinal_type ic=0; ic WorkArrayViewType; + ScalarViewType t_("t",numCells, edgeCardinality); + WorkArrayViewType w_("w",numCells, edgeCardinality); - lapack.POSV('U', edgeCardinality, 1, - edgeMassMat.data(), - edgeMassMat.stride_1(), - edgeRhsMat.data(), - edgeRhsMat.stride_1(), - &info); - - if (info) { - std::stringstream ss; - ss << ">>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } + auto edgeDof = Kokkos::subview(tagToOrdinal, edgeDim, ie, Kokkos::ALL()); - for(ordinal_type i=0; igetDofOrdinal(edgeDim, ie, i); - basisCoeffs(ic,edge_dof) = edgeRhsMat(i,0); - } - } - for(ordinal_type i=0; igetDofOrdinal(edgeDim, ie, i); + typedef SolveSystem functorType; + Kokkos::parallel_for(policy, functorType( basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDof, edgeCardinality)); } - ScalarViewType ortJacobian("ortJacobian", faceDim, faceDim); + ScalarViewType ortJacobian_("ortJacobian", numCells, faceDim, faceDim); - ScalarViewType faceCoeff("faceCoeff", fieldDim, faceDofDim); + ScalarViewType faceCoeff("faceCoeff", numCells, fieldDim, faceDofDim); for(ordinal_type iface=0; ifacegetTopologyKey(faceDim,iface); + const auto topoKey = refTopologyKey(faceDim,iface); ordinal_type faceCardinality = cellBasis->getDofCount(faceDim,iface); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(faceDim, iface); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(faceDim, iface); - - if(fieldDim == 1) - faceCoeff(0,0) = 1; - else if(isHCurlBAsis) { - CellTools::getReferenceFaceTangents(refFaceTanU, refFaceTanV,iface, cellTopo); - } else { - CellTools::getReferenceFaceNormal(refFaceNormal, iface, cellTopo); - for(ordinal_type d=0; d ::getReferenceFaceTangents(refFaceTanUHost, refFaceTanVHost, iface, cellTopo); + Kokkos::deep_copy(refFaceTanU, refFaceTanUHost); + Kokkos::deep_copy(refFaceTanV, refFaceTanVHost); + } else if(isHDivBasis) { + auto faceNormal = Kokkos::subview(refFacesNormal,iface,Kokkos::ALL()); + auto faceNormalHost = Kokkos::create_mirror_view(faceNormal); + CellTools::getReferenceFaceNormal(faceNormalHost, iface, cellTopo); + Kokkos::deep_copy(faceNormal, faceNormalHost); } - ScalarViewType faceBasisDofAtCubPoints("normaBasisAtCubPoints",numCells,faceCardinality, numCubPoints,faceDofDim); - ScalarViewType wBasisDofAtCubPoints("weightedNormalBasisAtCubPoints",numCells,faceCardinality, numCubPoints,faceDofDim); + ScalarViewType faceBasisDofAtBasisEPoints("normaBasisAtBasisEPoints",numCells,faceCardinality, numBasisEPoints,faceDofDim); + ScalarViewType wBasisDofAtBasisEPoints("weightedNormalBasisAtBasisEPoints",numCells,faceCardinality, numBasisEPoints,faceDofDim); - ScalarViewType faceBasisAtTargetCubPoints("normalBasisAtTargetCubPoints",numCells,faceCardinality, numTargetCubPoints,faceDofDim); - ScalarViewType wBasisBasisAtTargetCubPoints("weightedNormalBasisAtTargetCubPoints",numCells,faceCardinality, numTargetCubPoints,faceDofDim); + ScalarViewType faceBasisAtTargetEPoints("normalBasisAtTargetEPoints",numCells,faceCardinality, numTargetEPoints,faceDofDim); + ScalarViewType wBasisDofAtTargetEPoints("weightedNormalBasisAtTargetEPoints",numCells,faceCardinality, numTargetEPoints,faceDofDim); - ScalarViewType targetAtTargetCubPoints("targetAtTargetCubPoints",numCells, numTargetCubPoints,faceDofDim); - ScalarViewType mComputedProjection("mNormalComputedProjection", numCells,numCubPoints,faceDofDim); + ScalarViewType targetDofAtTargetEPoints("targetDofAtTargetEPoints",numCells, numTargetEPoints,faceDofDim); + ScalarViewType negPartialProj("mNormalComputedProjection", numCells,numBasisEPoints,faceDofDim); - ordinal_type offsetBasis = projStruct->getBasisPointsRange(faceDim, iface).first; - ordinal_type offsetTarget = projStruct->getTargetPointsRange(faceDim, iface).first; - ScalarViewType targetCubWeights = projStruct->getTargetEvalWeights(faceDim, iface); - ScalarViewType CubWeights = projStruct->getBasisEvalWeights(faceDim, iface); + ordinal_type offsetBasis = basisEPointsRange(faceDim, iface).first; + ordinal_type offsetTarget = targetEPointsRange(faceDim, iface).first; + auto targetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(faceDim,iface)); + auto basisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(faceDim,iface)); - //Note: we are not considering the jacobian of the orientation map since it is simply a scalar term for the integrals and it does not affect the projection - for(ordinal_type ic=0; icgetDofOrdinal(faceDim, iface, j); - for(ordinal_type itan=0; itan functorTypeFace; + + Kokkos::parallel_for(policy, functorTypeFace(basisCoeffs, negPartialProj,faceBasisDofAtBasisEPoints, + basisAtBasisEPoints, basisEWeights, wBasisDofAtBasisEPoints, targetEWeights, + basisAtTargetEPoints, wBasisDofAtTargetEPoints, computedDofs, tagToOrdinal, + orts, targetAtTargetEPoints,targetDofAtTargetEPoints, ortJacobian_, faceCoeff, + refFacesTangents, refFacesNormal, fieldDim, faceCardinality, offsetBasis, + offsetTarget, numVertexDofs+numEdgeDofs, numFaces, faceDim,faceDofDim, + dim, iface, topoKey, isHCurlBasis, isHDivBasis)); + typedef Kokkos::DynRankView WorkArrayViewType; ScalarViewType faceMassMat_("faceMassMat_", numCells, faceCardinality, faceCardinality), faceRhsMat_("rhsMat_", numCells, faceCardinality); - FunctionSpaceTools::integrate(faceMassMat_, faceBasisDofAtCubPoints, wBasisDofAtCubPoints); - FunctionSpaceTools::integrate(faceRhsMat_, targetAtTargetCubPoints, wBasisBasisAtTargetCubPoints); - FunctionSpaceTools::integrate(faceRhsMat_, mComputedProjection, wBasisDofAtCubPoints,true); + FunctionSpaceTools::integrate(faceMassMat_, faceBasisDofAtBasisEPoints, wBasisDofAtBasisEPoints); + FunctionSpaceTools::integrate(faceRhsMat_, targetDofAtTargetEPoints, wBasisDofAtTargetEPoints); + FunctionSpaceTools::integrate(faceRhsMat_, negPartialProj, wBasisDofAtBasisEPoints,true); - Kokkos::View faceMassMat("faceMassMat", faceCardinality,faceCardinality); - Kokkos::View faceRhsMat("faceRhsMat",faceCardinality, 1); + ScalarViewType t_("t",numCells, faceCardinality); + WorkArrayViewType w_("w",numCells,faceCardinality); - Teuchos::LAPACK lapack; - ordinal_type info = 0; - for(ordinal_type ic=0; ic>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } - - for(ordinal_type i=0; igetDofOrdinal(faceDim, iface, i); - basisCoeffs(ic,face_dof) = faceRhsMat(i,0); - } - } + auto faceDof = Kokkos::subview(tagToOrdinal, faceDim, iface, Kokkos::ALL()); - for(ordinal_type i=0; igetDofOrdinal(faceDim, iface, i); + typedef SolveSystem functorType; + Kokkos::parallel_for(policy, functorType( basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDof, faceCardinality)); } ordinal_type numElemDofs = cellBasis->getDofCount(dim,0); + if(numElemDofs>0) { - range_type cellPointsRange = projStruct->getTargetPointsRange(dim, 0); + auto cellDofs = Kokkos::subview(tagToOrdinal, dim, 0, Kokkos::ALL()); - ordinal_type numTargetCubPoints = projStruct->getNumTargetEvalPoints(dim,0); - ordinal_type numCubPoints = projStruct->getNumBasisEvalPoints(dim,0); + range_type cellPointsRange = targetEPointsRange(dim, 0); - ScalarViewType internalBasisAtCubPoints("internalBasisAtCubPoints",numCells,numElemDofs, numCubPoints, fieldDim); - ScalarViewType mComputedProjection("mComputedProjection", numCells, numCubPoints, fieldDim); + ordinal_type numTargetEPoints = range_size(targetEPointsRange(dim,0)); + ordinal_type numBasisEPoints = range_size(basisEPointsRange(dim,0)); - ScalarViewType targetCubWeights = projStruct->getTargetEvalWeights(dim, 0); - ScalarViewType cubWeights = projStruct->getBasisEvalWeights(dim, 0); - ordinal_type offsetBasis = projStruct->getBasisPointsRange(dim, 0).first; - ordinal_type offsetTarget = projStruct->getTargetPointsRange(dim, 0).first; + ScalarViewType internalBasisAtBasisEPoints("internalBasisAtBasisEPoints",numCells,numElemDofs, numBasisEPoints, fieldDim); + ScalarViewType negPartialProj("negPartialProj", numCells, numBasisEPoints, fieldDim); + auto targetEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getTargetEvalWeights(dim,0)); + auto basisEWeights = Kokkos::create_mirror_view_and_copy(typename SpT::memory_space(),projStruct->getBasisEvalWeights(dim,0)); + ordinal_type offsetBasis = basisEPointsRange(dim, 0).first; + ordinal_type offsetTarget = targetEPointsRange(dim, 0).first; - ScalarViewType wBasisAtCubPoints("weightedBasisAtCubPoints",numCells,numElemDofs, numCubPoints,fieldDim); - ScalarViewType wBasisBasisAtTargetCubPoints("weightedBasisAtTargetCubPoints",numCells,numElemDofs, numTargetCubPoints,fieldDim); - for(ordinal_type j=0; j getDofOrdinal(dim, 0, j); - for(ordinal_type ic=0; ic functorType; + Kokkos::parallel_for(policy, functorType( basisCoeffs, negPartialProj, internalBasisAtBasisEPoints, + basisAtBasisEPoints, basisEWeights, wBasisAtBasisEPoints, targetEWeights, basisAtTargetEPoints, wBasisDofAtTargetEPoints, + computedDofs, cellDofs, fieldDim, numElemDofs, offsetBasis, offsetTarget, numVertexDofs+numEdgeDofs+numFaceDofs)); + typedef Kokkos::DynRankView WorkArrayViewType; ScalarViewType cellMassMat_("cellMassMat_", numCells, numElemDofs, numElemDofs), cellRhsMat_("rhsMat_", numCells, numElemDofs); - FunctionSpaceTools::integrate(cellMassMat_, internalBasisAtCubPoints, wBasisAtCubPoints); + FunctionSpaceTools::integrate(cellMassMat_, internalBasisAtBasisEPoints, wBasisAtBasisEPoints); if(fieldDim==1) - FunctionSpaceTools::integrate(cellRhsMat_, Kokkos::subview(targetAtEvalPoints,Kokkos::ALL(),cellPointsRange,Kokkos::ALL()), - Kokkos::subview(wBasisBasisAtTargetCubPoints,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL(),0)); + FunctionSpaceTools::integrate(cellRhsMat_, Kokkos::subview(targetAtTargetEPoints,Kokkos::ALL(),cellPointsRange,Kokkos::ALL()), + Kokkos::subview(wBasisDofAtTargetEPoints,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL(),0)); else - FunctionSpaceTools::integrate(cellRhsMat_, Kokkos::subview(targetAtEvalPoints,Kokkos::ALL(),cellPointsRange,Kokkos::ALL()), wBasisBasisAtTargetCubPoints); - FunctionSpaceTools::integrate(cellRhsMat_, mComputedProjection, wBasisAtCubPoints, true); - - Kokkos::View cellMassMat("cellMassMat", numElemDofs,numElemDofs); - Kokkos::View cellRhsMat("cellRhsMat",numElemDofs, 1); - - Teuchos::LAPACK lapack; - ordinal_type info = 0; - for(ordinal_type ic=0; ic::integrate(cellRhsMat_, Kokkos::subview(targetAtTargetEPoints,Kokkos::ALL(),cellPointsRange,Kokkos::ALL()), wBasisDofAtTargetEPoints); + FunctionSpaceTools::integrate(cellRhsMat_, negPartialProj, wBasisAtBasisEPoints, true); - lapack.POSV('U', numElemDofs, 1, - cellMassMat.data(), - cellMassMat.stride_1(), - cellRhsMat.data(), - cellRhsMat.stride_1(), - &info); - - for(ordinal_type i=0; igetDofOrdinal(dim, 0, i); - basisCoeffs(ic,idof) = cellRhsMat(i,0); - } - - if (info) { - std::stringstream ss; - ss << ">>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " - << "LAPACK return with error code: " - << info; - INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - } - } + ScalarViewType t_("t",numCells, numElemDofs); + WorkArrayViewType w_("w",numCells,numElemDofs); + typedef SolveSystem functorType2; + Kokkos::parallel_for(policy, functorType2( basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numElemDofs)); } } } From 0caf38972354aef5a17f5c6c9bcfd84e65846507 Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Sat, 18 Apr 2020 12:02:08 -0600 Subject: [PATCH 09/86] Intrepid2: Modified tests to exercise projection of Hierarchical basis --- .../unit-test/Projection/Cuda/CMakeLists.txt | 36 + .../Projection/OpenMP/CMakeLists.txt | 36 + .../Projection/test_convergence_HEX.hpp | 1199 ++++++++-------- .../Projection/test_convergence_QUAD.hpp | 1173 ++++++++-------- .../Projection/test_convergence_TET.hpp | 1202 +++++++++-------- .../Projection/test_convergence_TRI.hpp | 1169 ++++++++-------- .../test_interpolation_projection_HEX.hpp | 19 +- .../test_interpolation_projection_QUAD.hpp | 12 +- .../test_interpolation_projection_TET.hpp | 38 +- .../test_interpolation_projection_TRI.hpp | 12 +- 10 files changed, 2534 insertions(+), 2362 deletions(-) diff --git a/packages/intrepid2/unit-test/Projection/Cuda/CMakeLists.txt b/packages/intrepid2/unit-test/Projection/Cuda/CMakeLists.txt index c2e08f7f117b..47e2d962cade 100644 --- a/packages/intrepid2/unit-test/Projection/Cuda/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Projection/Cuda/CMakeLists.txt @@ -32,4 +32,40 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 1 PASS_REGULAR_EXPRESSION "TEST PASSED" ADD_DIR_TO_NAME + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Test_Convergence_HEX + SOURCES test_convergence_HEX.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Test_Convergence_TET + SOURCES test_convergence_TET.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Test_Convergence_QUAD + SOURCES test_convergence_QUAD.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Test_Convergence_TRI + SOURCES test_convergence_TRI.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME ) \ No newline at end of file diff --git a/packages/intrepid2/unit-test/Projection/OpenMP/CMakeLists.txt b/packages/intrepid2/unit-test/Projection/OpenMP/CMakeLists.txt index c2e08f7f117b..47e2d962cade 100644 --- a/packages/intrepid2/unit-test/Projection/OpenMP/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Projection/OpenMP/CMakeLists.txt @@ -32,4 +32,40 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 1 PASS_REGULAR_EXPRESSION "TEST PASSED" ADD_DIR_TO_NAME + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Test_Convergence_HEX + SOURCES test_convergence_HEX.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Test_Convergence_TET + SOURCES test_convergence_TET.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Test_Convergence_QUAD + SOURCES test_convergence_QUAD.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + Test_Convergence_TRI + SOURCES test_convergence_TRI.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME ) \ No newline at end of file diff --git a/packages/intrepid2/unit-test/Projection/test_convergence_HEX.hpp b/packages/intrepid2/unit-test/Projection/test_convergence_HEX.hpp index e3f310787398..97a6a08ec557 100644 --- a/packages/intrepid2/unit-test/Projection/test_convergence_HEX.hpp +++ b/packages/intrepid2/unit-test/Projection/test_convergence_HEX.hpp @@ -46,7 +46,7 @@ The test considers a uniform and structured hexahedral mesh of the cube [-1,1]^3, formed by N^3 hexas, and checks the accuracy of the HGRAD, HCURL, HDIV, HVOL projections of analytic - target functions for increasing N. + target functions for for Hierarchical and Nodal basis functions as N increases. The accuracy is computed in the H^1, H^{curl}, H^{div} and L^2 norms respectively. The optimal order of convergence equates the basis degree. @@ -244,8 +244,9 @@ int ConvergenceHex(const bool verbose) { int NX = 2; constexpr int numRefinements = 2; - // Expected values of the projection errors in H1, Hcurl, Hdiv and L2 norms for HGRAD, HDIV, HCURL and HVOL elements respectively. + // Expected values of the projection errors in H1, Hcurl, HDiv and L2 norms for HGRAD, HDIV, HCURL and HVOL elements respectively. // These values have been computed running the code with numRefinements=4 and the convergence rates are close to the optimal ones. + // Note that these values are independent of the basis choice (Hierarchical or Nodal) as long as they generate the same functional space. // We currently only test two mesh refinements to make the test run faster, so this is used as a regression test rather than // a convergence test, but the test can be use for verifying optimal accuracy as well. ValueType hgradNorm[numRefinements], hcurlNorm[numRefinements], hdivNorm[numRefinements], hvolNorm[numRefinements]; @@ -367,7 +368,10 @@ int ConvergenceHex(const bool verbose) { DynRankView ConstructWithLabel(weights, numRefCoords); cell_cub->getCubature(refPoints, weights); - + using basisType = Basis; + using CG_NBasis = NodalBasisFamily; + using CG_HBasis = HierarchicalBasisFamily; + //using CG_DNBasis = DerivedNodalBasisFamily; *outStream @@ -386,180 +390,188 @@ int ConvergenceHex(const bool verbose) { Kokkos::DynRankView elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, hexa); - Basis_HGRAD_HEX_Cn_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_HEX_C1_FEM hexaLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords); - hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HGRAD_HEX(order)); + basis_set.push_back(new typename CG_HBasis::HGRAD_HEX(order)); + + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; - Fun fun; - GradFun gradFun; - DynRankView ConstructWithLabel(funAtRefCoords, numElems, numRefCoords); - DynRankView ConstructWithLabel(funGradAtPhysRefCoords, numElems, numRefCoords, dim); - for(ordinal_type i=0; i hexaLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords); + hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; - projStruct.createHGradProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + // compute projection-based interpolation of fun into HGRAD + DynRankView ConstructWithLabel(basisCoeffsHGrad, numElems, basisCardinality); + { + ordinal_type targetCubDegree(basis.getDegree()),targetDerivCubDegree(basis.getDegree()); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numGradPoints = projStruct.getNumTargetDerivEvalPoints(); + Experimental::ProjectionStruct projStruct; + projStruct.createHGradProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationGradPoints, numElems, numGradPoints, dim); + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numGradPoints = projStruct.getNumTargetDerivEvalPoints(); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationGradPoints, numElems, numGradPoints, dim); - pts::getHGradEvaluationPoints(evaluationPoints, - evaluationGradPoints, - elemOrts, - &basis, - &projStruct); + pts::getHGradEvaluationPoints(evaluationPoints, + evaluationGradPoints, + elemOrts, + &basis, + &projStruct); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); - DynRankView ConstructWithLabel(targetGradAtEvalPoints, numElems, numGradPoints, dim); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalGradPoints, numElems, numGradPoints, dim); - { - Basis_HGRAD_HEX_C1_FEM hexLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, hexa.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalGradPoints, hexa.getNodeCount(), numGradPoints); - - for(ordinal_type i=0; i0) - hexLinearBasis.getValues(hexLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL())); - for(ordinal_type d=0; d hexLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, hexa.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalGradPoints, hexa.getNodeCount(), numGradPoints); + + for(ordinal_type i=0; i0) + hexLinearBasis.getValues(hexLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL())); + for(ordinal_type d=0; d0) - ct::setJacobian(jacobian, evaluationGradPoints, physVertexes, hexa); + //transform the target function and its derivative to the reference element (inverse of pullback operator) + DynRankView ConstructWithLabel(jacobian, numElems, numGradPoints, dim, dim); + if(numGradPoints>0) + ct::setJacobian(jacobian, evaluationGradPoints, physVertexes, hexa); + + GradFun gradFun; + Kokkos::deep_copy(targetGradAtEvalPoints,0.); + for(int ic=0; ic relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hgradNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hgradNorm[iter] = std::sqrt(norm2); + auto expected_error = hgrad_errors[iter]; + if(std::abs(hgradNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hgradNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HGRAD Error: " << hgradNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, hexa); - Basis_HCURL_HEX_In_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_HEX_C1_FEM hexaLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords); - hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i projStruct; - projStruct.createHCurlProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numCurlPoints = projStruct.getNumTargetDerivEvalPoints(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationCurlPoints, numElems, numCurlPoints, dim); - pts::getHCurlEvaluationPoints(evaluationPoints, - evaluationCurlPoints, - elemOrts, - &basis, - &projStruct); + std::vector basis_set; + basis_set.push_back(new typename CG_NBasis::HCURL_HEX(order)); + basis_set.push_back(new typename CG_HBasis::HCURL_HEX(order)); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetCurlAtEvalPoints, numElems, numCurlPoints, dim); + ordinal_type basisCardinality = basis.getCardinality(); - - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalCurlPoints, numElems, numCurlPoints, dim); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); { - Basis_HGRAD_HEX_C1_FEM hexLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, hexa.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalCurlPoints, hexa.getNodeCount(), numCurlPoints); + Basis_HGRAD_HEX_C1_FEM hexaLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords); + hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; + projStruct.createHCurlProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numCurlPoints = projStruct.getNumTargetDerivEvalPoints(); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationCurlPoints, numElems, numCurlPoints, dim); + pts::getHCurlEvaluationPoints(evaluationPoints, + evaluationCurlPoints, + elemOrts, + &basis, + &projStruct); + + + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(targetCurlAtEvalPoints, numElems, numCurlPoints, dim); + + + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalCurlPoints, numElems, numCurlPoints, dim); + { + Basis_HGRAD_HEX_C1_FEM hexLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, hexa.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalCurlPoints, hexa.getNodeCount(), numCurlPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hcurlNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + } + hcurlNorm[iter] = std::sqrt(norm2); + auto expected_error = hcurl_errors[iter]; + if(std::abs(hcurlNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hcurlNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HCURL Error: " << hcurlNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, hexa); - Basis_HDIV_HEX_In_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - DynRankView ConstructWithLabel(physDofCoords, numElems, basisCardinality, dim); - { - Basis_HGRAD_HEX_C1_FEM hexaLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords); - hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HDIV_HEX(order)); + basis_set.push_back(new typename CG_HBasis::HDIV_HEX(order)); - FunDiv fun; - DivFunDiv funDiv; - DynRankView ConstructWithLabel(funAtRefCoords, numElems, numRefCoords, dim); - DynRankView ConstructWithLabel(funDivAtPhysRefCoords, numElems, numRefCoords); - for(ordinal_type i=0; i projStruct; - projStruct.createHDivProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numDivPoints = projStruct.getNumTargetDerivEvalPoints(); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationDivPoints, numElems, numDivPoints, dim); + ordinal_type basisCardinality = basis.getCardinality(); - pts::getHDivEvaluationPoints(evaluationPoints, - evaluationDivPoints, - elemOrts, - &basis, - &projStruct); - - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetDivAtEvalPoints, numElems, numDivPoints); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); + DynRankView ConstructWithLabel(physDofCoords, numElems, basisCardinality, dim); + { + Basis_HGRAD_HEX_C1_FEM hexaLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords); + hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i hexLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, hexa.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalDivPoints, hexa.getNodeCount(), numDivPoints); - - for(ordinal_type i=0; i projStruct; + projStruct.createHDivProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numDivPoints = projStruct.getNumTargetDerivEvalPoints(); + + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationDivPoints, numElems, numDivPoints, dim); + + pts::getHDivEvaluationPoints(evaluationPoints, + evaluationDivPoints, + elemOrts, + &basis, + &projStruct); + + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(targetDivAtEvalPoints, numElems, numDivPoints); + + + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalDivPoints, numElems, numDivPoints, dim); + { + Basis_HGRAD_HEX_C1_FEM hexLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, hexa.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalDivPoints, hexa.getNodeCount(), numDivPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hdivNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hdivNorm[iter] = std::sqrt(norm2); + auto expected_error = hdiv_errors[iter]; + if(std::abs(hdivNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hdivNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HDIV Error: " << hdivNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, hexa); - Basis_HVOL_HEX_Cn_FEM basis(order-1); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_HEX_C1_FEM hexaLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords); - hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HVOL_HEX(order-1)); + basis_set.push_back(new typename CG_HBasis::HVOL_HEX(order-1)); - // compute projection-based interpolation of fun into HVOL - DynRankView ConstructWithLabel(basisCoeffsHVol, numElems, basisCardinality); - { - ordinal_type targetCubDegree(basis.getDegree()); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; - Experimental::ProjectionStruct projStruct; - projStruct.createHVolProjectionStruct(&basis, targetCubDegree); + ordinal_type basisCardinality = basis.getCardinality(); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); + { + Basis_HGRAD_HEX_C1_FEM hexaLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords); + hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; + projStruct.createHVolProjectionStruct(&basis, targetCubDegree); + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - { - Basis_HGRAD_HEX_C1_FEM hexLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, hexa.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - for(ordinal_type i=0; i hexLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, hexa.getNodeCount(), numPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hvolNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hvolNorm[iter] = std::sqrt(norm2); + auto expected_error = hvol_errors[iter]; + if(std::abs(hvolNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hvolNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HVOL Error: " << hvolNorm[iter] <getCubature(refPoints, weights); - + using basisType = Basis; + using CG_NBasis = NodalBasisFamily; + using CG_HBasis = HierarchicalBasisFamily; + //using CG_DNBasis = DerivedNodalBasisFamily; *outStream @@ -345,180 +349,188 @@ int ConvergenceQuad(const bool verbose) { Kokkos::DynRankView elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, quad); - Basis_HGRAD_QUAD_Cn_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords); - quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HGRAD_QUAD(order)); + basis_set.push_back(new typename CG_HBasis::HGRAD_QUAD(order)); + + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; + ordinal_type basisCardinality = basis.getCardinality(); + + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); + { + Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords); + quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; - projStruct.createHGradProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + Experimental::ProjectionStruct projStruct; + projStruct.createHGradProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numGradPoints = projStruct.getNumTargetDerivEvalPoints(); + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numGradPoints = projStruct.getNumTargetDerivEvalPoints(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationGradPoints, numElems, numGradPoints, dim); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationGradPoints, numElems, numGradPoints, dim); - pts::getHGradEvaluationPoints(evaluationPoints, - evaluationGradPoints, - elemOrts, - &basis, - &projStruct); + pts::getHGradEvaluationPoints(evaluationPoints, + evaluationGradPoints, + elemOrts, + &basis, + &projStruct); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); - DynRankView ConstructWithLabel(targetGradAtEvalPoints, numElems, numGradPoints, dim); + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); + DynRankView ConstructWithLabel(targetGradAtEvalPoints, numElems, numGradPoints, dim); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalGradPoints, numElems, numGradPoints, dim); - { - Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalPoints, quad.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalGradPoints, quad.getNodeCount(), numGradPoints); + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalGradPoints, numElems, numGradPoints, dim); + { + Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalPoints, quad.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalGradPoints, quad.getNodeCount(), numGradPoints); - for(ordinal_type i=0; i0) - quadLinearBasis.getValues(quadLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL())); - for(ordinal_type d=0; d0) + quadLinearBasis.getValues(quadLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL())); + for(ordinal_type d=0; d0) - ct::setJacobian(jacobian, evaluationGradPoints, physVertexes, quad); + //transform the target function and its derivative to the reference element (inverse of pullback operator) + DynRankView ConstructWithLabel(jacobian, numElems, numGradPoints, dim, dim); + if(numGradPoints>0) + ct::setJacobian(jacobian, evaluationGradPoints, physVertexes, quad); + + GradFun gradFun; + Kokkos::deep_copy(targetGradAtEvalPoints,0.); + for(int ic=0; ic relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hgradNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hgradNorm[iter] = std::sqrt(norm2); + auto expected_error = hgrad_errors[iter]; + if(std::abs(hgradNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hgradNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HGRAD Error: " << hgradNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, quad); - Basis_HCURL_QUAD_In_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords); - quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i projStruct; - projStruct.createHCurlProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numCurlPoints = projStruct.getNumTargetDerivEvalPoints(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationCurlPoints, numElems, numCurlPoints, dim); - pts::getHCurlEvaluationPoints(evaluationPoints, - evaluationCurlPoints, - elemOrts, - &basis, - &projStruct); - + std::vector basis_set; + basis_set.push_back(new typename CG_NBasis::HCURL_QUAD(order)); + basis_set.push_back(new typename CG_HBasis::HCURL_QUAD(order)); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetCurlAtEvalPoints, numElems, numCurlPoints); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; + ordinal_type basisCardinality = basis.getCardinality(); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalCurlPoints, numElems, numCurlPoints, dim); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); { Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalPoints, quad.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalCurlPoints, quad.getNodeCount(), numCurlPoints); - - for(ordinal_type i=0; i projStruct; + projStruct.createHCurlProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numCurlPoints = projStruct.getNumTargetDerivEvalPoints(); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationCurlPoints, numElems, numCurlPoints, dim); + pts::getHCurlEvaluationPoints(evaluationPoints, + evaluationCurlPoints, + elemOrts, + &basis, + &projStruct); + + + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(targetCurlAtEvalPoints, numElems, numCurlPoints); + + + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalCurlPoints, numElems, numCurlPoints, dim); + { + Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalPoints, quad.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalCurlPoints, quad.getNodeCount(), numCurlPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hcurlNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hcurlNorm[iter] = std::sqrt(norm2); + auto expected_error = hcurl_errors[iter]; + if(std::abs(hcurlNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hcurlNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HCURL Error: " << hcurlNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, quad); - Basis_HDIV_QUAD_In_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - DynRankView ConstructWithLabel(physDofCoords, numElems, basisCardinality, dim); - { - Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords); - quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i projStruct; - projStruct.createHDivProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + std::vector basis_set; + basis_set.push_back(new typename CG_NBasis::HDIV_QUAD(order)); + basis_set.push_back(new typename CG_HBasis::HDIV_QUAD(order)); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numDivPoints = projStruct.getNumTargetDerivEvalPoints(); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; + ordinal_type basisCardinality = basis.getCardinality(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationDivPoints, numElems, numDivPoints, dim); - - pts::getHDivEvaluationPoints(evaluationPoints, - evaluationDivPoints, - elemOrts, - &basis, - &projStruct); - - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetDivAtEvalPoints, numElems, numDivPoints); - - - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalDivPoints, numElems, numDivPoints, dim); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); + DynRankView ConstructWithLabel(physDofCoords, numElems, basisCardinality, dim); { Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalPoints, quad.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalDivPoints, quad.getNodeCount(), numDivPoints); + DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords); + quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; + projStruct.createHDivProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numDivPoints = projStruct.getNumTargetDerivEvalPoints(); + + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationDivPoints, numElems, numDivPoints, dim); + + pts::getHDivEvaluationPoints(evaluationPoints, + evaluationDivPoints, + elemOrts, + &basis, + &projStruct); + + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(targetDivAtEvalPoints, numElems, numDivPoints); + + + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalDivPoints, numElems, numDivPoints, dim); + { + Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalPoints, quad.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalDivPoints, quad.getNodeCount(), numDivPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hdivNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hdivNorm[iter] = std::sqrt(norm2); + auto expected_error = hdiv_errors[iter]; + if(std::abs(hdivNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hdivNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HDIV Error: " << hdivNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, quad); - Basis_HVOL_QUAD_Cn_FEM basis(order-1); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords); - quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HVOL_QUAD(order-1)); + basis_set.push_back(new typename CG_HBasis::HVOL_QUAD(order-1)); - //check function reproducibility - Fun fun; - DynRankView ConstructWithLabel(funAtRefCoords, numElems, numRefCoords); - for(ordinal_type i=0; i quadLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords); + quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; - projStruct.createHVolProjectionStruct(&basis, targetCubDegree); + Experimental::ProjectionStruct projStruct; + projStruct.createHVolProjectionStruct(&basis, targetCubDegree); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(); + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - pts::getHVolEvaluationPoints(evaluationPoints, - elemOrts, - &basis, - &projStruct); + pts::getHVolEvaluationPoints(evaluationPoints, + elemOrts, + &basis, + &projStruct); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - { - Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalPoints, quad.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + { + Basis_HGRAD_QUAD_C1_FEM quadLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(quadLinearBasisValuesAtEvalPoints, quad.getNodeCount(), numPoints); - for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hvolNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hvolNorm[iter] = std::sqrt(norm2); + auto expected_error = hvol_errors[iter]; + if(std::abs(hvolNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hvolNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HVOL Error: " << hvolNorm[iter] <getCubature(refPoints, weights); - + using basisType = Basis; + using CG_NBasis = NodalBasisFamily; + using CG_HBasis = HierarchicalBasisFamily; + //using CG_DNBasis = DerivedNodalBasisFamily; *outStream @@ -418,180 +420,187 @@ int ConvergenceTet(const bool verbose) { Kokkos::DynRankView elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, tet); - Basis_HGRAD_TET_Cn_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_TET_C1_FEM tetLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords); - tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HGRAD_TET(order)); + basis_set.push_back(new typename CG_HBasis::HGRAD_TET(order)); + + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << " " << basis.requireOrientation() << std::endl; + ordinal_type basisCardinality = basis.getCardinality(); - Fun fun; - GradFun gradFun; - DynRankView ConstructWithLabel(funAtRefCoords, numElems, numRefCoords); - DynRankView ConstructWithLabel(funGradAtPhysRefCoords, numElems, numRefCoords, dim); - for(ordinal_type i=0; i tetLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords); + tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; - projStruct.createHGradProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + // compute projection-based interpolation of fun into HGRAD + DynRankView ConstructWithLabel(basisCoeffsHGrad, numElems, basisCardinality); + { + ordinal_type targetCubDegree(basis.getDegree()),targetDerivCubDegree(basis.getDegree()); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numGradPoints = projStruct.getNumTargetDerivEvalPoints(); + Experimental::ProjectionStruct projStruct; + projStruct.createHGradProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationGradPoints, numElems, numGradPoints, dim); + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numGradPoints = projStruct.getNumTargetDerivEvalPoints(); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationGradPoints, numElems, numGradPoints, dim); - pts::getHGradEvaluationPoints(evaluationPoints, - evaluationGradPoints, - elemOrts, - &basis, - &projStruct); + pts::getHGradEvaluationPoints(evaluationPoints, + evaluationGradPoints, + elemOrts, + &basis, + &projStruct); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); - DynRankView ConstructWithLabel(targetGradAtEvalPoints, numElems, numGradPoints, dim); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalGradPoints, numElems, numGradPoints, dim); - { - Basis_HGRAD_TET_C1_FEM hexLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, tet.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalGradPoints, tet.getNodeCount(), numGradPoints); - - for(ordinal_type i=0; i0) - hexLinearBasis.getValues(hexLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL())); - for(ordinal_type d=0; d hexLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, tet.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalGradPoints, tet.getNodeCount(), numGradPoints); + + for(ordinal_type i=0; i0) + hexLinearBasis.getValues(hexLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL())); + for(ordinal_type d=0; d0) - ct::setJacobian(jacobian, evaluationGradPoints, physVertexes, tet); + //transform the target function and its derivative to the reference element (inverse of pullback operator) + DynRankView ConstructWithLabel(jacobian, numElems, numGradPoints, dim, dim); + if(numGradPoints>0) + ct::setJacobian(jacobian, evaluationGradPoints, physVertexes, tet); + + GradFun gradFun; + Kokkos::deep_copy(targetGradAtEvalPoints,0.); + for(int ic=0; ic relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hgradNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hgradNorm[iter] = std::sqrt(norm2); + auto expected_error = hgrad_errors[iter]; + if(std::abs(hgradNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hgradNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HGRAD Error: " << hgradNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, tet); - Basis_HCURL_TET_In_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_TET_C1_FEM tetLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords); - tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i projStruct; - projStruct.createHCurlProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numCurlPoints = projStruct.getNumTargetDerivEvalPoints(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationCurlPoints, numElems, numCurlPoints, dim); - pts::getHCurlEvaluationPoints(evaluationPoints, - evaluationCurlPoints, - elemOrts, - &basis, - &projStruct); + std::vector basis_set; + basis_set.push_back(new typename CG_NBasis::HCURL_TET(order)); + //basis_set.push_back(new typename CG_HBasis::HCURL_TET(order)); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; + ordinal_type basisCardinality = basis.getCardinality(); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetCurlAtEvalPoints, numElems, numCurlPoints, dim); - - - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalCurlPoints, numElems, numCurlPoints, dim); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); { - Basis_HGRAD_TET_C1_FEM hexLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, tet.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalCurlPoints, tet.getNodeCount(), numCurlPoints); + Basis_HGRAD_TET_C1_FEM tetLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords); + tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; + projStruct.createHCurlProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numCurlPoints = projStruct.getNumTargetDerivEvalPoints(); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationCurlPoints, numElems, numCurlPoints, dim); + pts::getHCurlEvaluationPoints(evaluationPoints, + evaluationCurlPoints, + elemOrts, + &basis, + &projStruct); + + + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(targetCurlAtEvalPoints, numElems, numCurlPoints, dim); + + + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalCurlPoints, numElems, numCurlPoints, dim); + { + Basis_HGRAD_TET_C1_FEM hexLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, tet.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalCurlPoints, tet.getNodeCount(), numCurlPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hcurlNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + } + hcurlNorm[iter] = std::sqrt(norm2); + auto expected_error = hcurl_errors[iter]; + if(std::abs(hcurlNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hcurlNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HCURL Error: " << hcurlNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, tet); - Basis_HDIV_TET_In_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - DynRankView ConstructWithLabel(physDofCoords, numElems, basisCardinality, dim); - { - Basis_HGRAD_TET_C1_FEM tetLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords); - tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HDIV_TET(order)); + //basis_set.push_back(new typename CG_HBasis::HDIV_TET(order)); - FunDiv fun; - DivFunDiv funDiv; - DynRankView ConstructWithLabel(funAtRefCoords, numElems, numRefCoords, dim); - DynRankView ConstructWithLabel(funDivAtPhysRefCoords, numElems, numRefCoords); - for(ordinal_type i=0; i projStruct; - projStruct.createHDivProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numDivPoints = projStruct.getNumTargetDerivEvalPoints(); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; ordinal_type basisCardinality = basis.getCardinality(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationDivPoints, numElems, numDivPoints, dim); - - pts::getHDivEvaluationPoints(evaluationPoints, - evaluationDivPoints, - elemOrts, - &basis, - &projStruct); - - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetDivAtEvalPoints, numElems, numDivPoints); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); + DynRankView ConstructWithLabel(physDofCoords, numElems, basisCardinality, dim); + { + Basis_HGRAD_TET_C1_FEM tetLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords); + tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i hexLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, tet.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalDivPoints, tet.getNodeCount(), numDivPoints); - - for(ordinal_type i=0; i projStruct; + projStruct.createHDivProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numDivPoints = projStruct.getNumTargetDerivEvalPoints(); + + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationDivPoints, numElems, numDivPoints, dim); + + pts::getHDivEvaluationPoints(evaluationPoints, + evaluationDivPoints, + elemOrts, + &basis, + &projStruct); + + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(targetDivAtEvalPoints, numElems, numDivPoints); + + + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalDivPoints, numElems, numDivPoints, dim); + { + Basis_HGRAD_TET_C1_FEM hexLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, tet.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalDivPoints, tet.getNodeCount(), numDivPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hdivNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hdivNorm[iter] = std::sqrt(norm2); + auto expected_error = hdiv_errors[iter]; + if(std::abs(hdivNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hdivNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HDIV Error: " << hdivNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, tet); - Basis_HVOL_TET_Cn_FEM basis(order-1); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_TET_C1_FEM tetLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords); - tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HVOL_TET(order-1)); + //basis_set.push_back(new typename CG_HBasis::HVOL_TET(order-1)); - //check function reproducibility - Fun fun; - DynRankView ConstructWithLabel(funAtRefCoords, numElems, numRefCoords); - for(ordinal_type i=0; i tetLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords); + tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; - projStruct.createHVolProjectionStruct(&basis, targetCubDegree); + // compute projection-based interpolation of fun into HVOL + DynRankView ConstructWithLabel(basisCoeffsHVol, numElems, basisCardinality); + { + ordinal_type targetCubDegree(basis.getDegree()); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(); + Experimental::ProjectionStruct projStruct; + projStruct.createHVolProjectionStruct(&basis, targetCubDegree); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(); - pts::getHVolEvaluationPoints(evaluationPoints, - elemOrts, - &basis, - &projStruct); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); + pts::getHVolEvaluationPoints(evaluationPoints, + elemOrts, + &basis, + &projStruct); + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - { - Basis_HGRAD_TET_C1_FEM hexLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, tet.getNodeCount(), numPoints); - for(ordinal_type i=0; i hexLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(hexLinearBasisValuesAtEvalPoints, tet.getNodeCount(), numPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hvolNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hvolNorm[iter] = std::sqrt(norm2); + auto expected_error = hvol_errors[iter]; + if(std::abs(hvolNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hvolNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HVOL Error: " << hvolNorm[iter] <getCubature(refPoints, weights); - + using basisType = Basis; + using CG_NBasis = NodalBasisFamily; + using CG_HBasis = HierarchicalBasisFamily; + //using CG_DNBasis = DerivedNodalBasisFamily; *outStream @@ -355,180 +359,187 @@ int ConvergenceTri(const bool verbose) { Kokkos::DynRankView elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, tri); - Basis_HGRAD_TRI_Cn_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(triLinearBasisValuesAtRefCoords, tri.getNodeCount(), numRefCoords); - triLinearBasis.getValues(triLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HGRAD_TRI(order)); + basis_set.push_back(new typename CG_HBasis::HGRAD_TRI(order)); + + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; + ordinal_type basisCardinality = basis.getCardinality(); - Fun fun; - GradFun gradFun; - DynRankView ConstructWithLabel(funAtRefCoords, numElems, numRefCoords); - DynRankView ConstructWithLabel(funGradAtPhysRefCoords, numElems, numRefCoords, dim); - for(ordinal_type i=0; i triLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(triLinearBasisValuesAtRefCoords, tri.getNodeCount(), numRefCoords); + triLinearBasis.getValues(triLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; - projStruct.createHGradProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + Experimental::ProjectionStruct projStruct; + projStruct.createHGradProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numGradPoints = projStruct.getNumTargetDerivEvalPoints(); + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numGradPoints = projStruct.getNumTargetDerivEvalPoints(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationGradPoints, numElems, numGradPoints, dim); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationGradPoints, numElems, numGradPoints, dim); - pts::getHGradEvaluationPoints(evaluationPoints, - evaluationGradPoints, - elemOrts, - &basis, - &projStruct); + pts::getHGradEvaluationPoints(evaluationPoints, + evaluationGradPoints, + elemOrts, + &basis, + &projStruct); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); - DynRankView ConstructWithLabel(targetGradAtEvalPoints, numElems, numGradPoints, dim); + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); + DynRankView ConstructWithLabel(targetGradAtEvalPoints, numElems, numGradPoints, dim); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalGradPoints, numElems, numGradPoints, dim); - { - Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalPoints, tri.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalGradPoints, tri.getNodeCount(), numGradPoints); + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalGradPoints, numElems, numGradPoints, dim); + { + Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalPoints, tri.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalGradPoints, tri.getNodeCount(), numGradPoints); - for(ordinal_type i=0; i0) - triLinearBasis.getValues(triLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL())); - for(ordinal_type d=0; d0) + triLinearBasis.getValues(triLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL())); + for(ordinal_type d=0; d0) - ct::setJacobian(jacobian, evaluationGradPoints, physVertexes, tri); + //transform the target function and its derivative to the reference element (inverse of pullback operator) + DynRankView ConstructWithLabel(jacobian, numElems, numGradPoints, dim, dim); + if(numGradPoints>0) + ct::setJacobian(jacobian, evaluationGradPoints, physVertexes, tri); + + GradFun gradFun; + Kokkos::deep_copy(targetGradAtEvalPoints,0.); + for(int ic=0; ic relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hgradNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hgradNorm[iter] = std::sqrt(norm2); + auto expected_error = hgrad_errors[iter]; + if(std::abs(hgradNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hgradNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HGRAD Error: " << hgradNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, tri); - Basis_HCURL_TRI_In_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(triLinearBasisValuesAtRefCoords, tri.getNodeCount(), numRefCoords); - triLinearBasis.getValues(triLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i projStruct; - projStruct.createHCurlProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); - - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numCurlPoints = projStruct.getNumTargetDerivEvalPoints(); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationCurlPoints, numElems, numCurlPoints, dim); - pts::getHCurlEvaluationPoints(evaluationPoints, - evaluationCurlPoints, - elemOrts, - &basis, - &projStruct); + std::vector basis_set; + basis_set.push_back(new typename CG_NBasis::HCURL_TRI(order)); + //basis_set.push_back(new typename CG_HBasis::HCURL_TRI(order)); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; + ordinal_type basisCardinality = basis.getCardinality(); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetCurlAtEvalPoints, numElems, numCurlPoints); - - - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalCurlPoints, numElems, numCurlPoints, dim); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); { Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalPoints, tri.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalCurlPoints, tri.getNodeCount(), numCurlPoints); - - for(ordinal_type i=0; i projStruct; + projStruct.createHCurlProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numCurlPoints = projStruct.getNumTargetDerivEvalPoints(); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationCurlPoints, numElems, numCurlPoints, dim); + pts::getHCurlEvaluationPoints(evaluationPoints, + evaluationCurlPoints, + elemOrts, + &basis, + &projStruct); + + + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(targetCurlAtEvalPoints, numElems, numCurlPoints); + + + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalCurlPoints, numElems, numCurlPoints, dim); + { + Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalPoints, tri.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalCurlPoints, tri.getNodeCount(), numCurlPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hcurlNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hcurlNorm[iter] = std::sqrt(norm2); + auto expected_error = hcurl_errors[iter]; + if(std::abs(hcurlNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hcurlNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HCURL Error: " << hcurlNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, tri); - Basis_HDIV_TRI_In_FEM basis(order); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - DynRankView ConstructWithLabel(physDofCoords, numElems, basisCardinality, dim); - { - Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(triLinearBasisValuesAtRefCoords, tri.getNodeCount(), numRefCoords); - triLinearBasis.getValues(triLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HDIV_TRI(order)); + //basis_set.push_back(new typename CG_HBasis::HDIV_TRI(order)); - Experimental::ProjectionStruct projStruct; - projStruct.createHDivProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + for (auto basisPtr:basis_set) { + auto& basis = *basisPtr; + *outStream << " " << basis.getName() << std::endl; + ordinal_type basisCardinality = basis.getCardinality(); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numDivPoints = projStruct.getNumTargetDerivEvalPoints(); - - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(evaluationDivPoints, numElems, numDivPoints, dim); - - pts::getHDivEvaluationPoints(evaluationPoints, - evaluationDivPoints, - elemOrts, - &basis, - &projStruct); - - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetDivAtEvalPoints, numElems, numDivPoints); - - - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(physEvalDivPoints, numElems, numDivPoints, dim); + //Compute physical Dof Coordinates and Reference coordinates + DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); + DynRankView ConstructWithLabel(physDofCoords, numElems, basisCardinality, dim); { Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalPoints, tri.getNodeCount(), numPoints); - DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalDivPoints, tri.getNodeCount(), numDivPoints); + DynRankView ConstructWithLabel(triLinearBasisValuesAtRefCoords, tri.getNodeCount(), numRefCoords); + triLinearBasis.getValues(triLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; + projStruct.createHDivProjectionStruct(&basis, targetCubDegree, targetDerivCubDegree); + + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(), numDivPoints = projStruct.getNumTargetDerivEvalPoints(); + + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(evaluationDivPoints, numElems, numDivPoints, dim); + + pts::getHDivEvaluationPoints(evaluationPoints, + evaluationDivPoints, + elemOrts, + &basis, + &projStruct); + + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(targetDivAtEvalPoints, numElems, numDivPoints); + + + DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); + DynRankView ConstructWithLabel(physEvalDivPoints, numElems, numDivPoints, dim); + { + Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalPoints, tri.getNodeCount(), numPoints); + DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalDivPoints, tri.getNodeCount(), numDivPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hdivNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hdivNorm[iter] = std::sqrt(norm2); + auto expected_error = hdiv_errors[iter]; + if(std::abs(hdivNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hdivNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HDIV Error: " << hdivNorm[iter] < elemOrts("elemOrts", numElems); ots::getOrientation(elemOrts, elemNodes, tri); - Basis_HVOL_TRI_Cn_FEM basis(order-1); - ordinal_type basisCardinality = basis.getCardinality(); - - //Compute physical Dof Coordinates and Reference coordinates - DynRankView ConstructWithLabel(physRefCoords, numElems, numRefCoords, dim); - { - Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(triLinearBasisValuesAtRefCoords, tri.getNodeCount(), numRefCoords); - triLinearBasis.getValues(triLinearBasisValuesAtRefCoords, refPoints); - for(ordinal_type i=0; i basis_set; + basis_set.push_back(new typename CG_NBasis::HVOL_TRI(order-1)); + //basis_set.push_back(new typename CG_HBasis::HVOL_TRI(order-1)); - //check function reproducibility - Fun fun; - DynRankView ConstructWithLabel(funAtRefCoords, numElems, numRefCoords); - for(ordinal_type i=0; i triLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(triLinearBasisValuesAtRefCoords, tri.getNodeCount(), numRefCoords); + triLinearBasis.getValues(triLinearBasisValuesAtRefCoords, refPoints); + for(ordinal_type i=0; i projStruct; - projStruct.createHVolProjectionStruct(&basis, targetCubDegree); + // compute projection-based interpolation of fun into HVOL + DynRankView ConstructWithLabel(basisCoeffsHVol, numElems, basisCardinality); + { + ordinal_type targetCubDegree(basis.getDegree()); - ordinal_type numPoints = projStruct.getNumTargetEvalPoints(); + Experimental::ProjectionStruct projStruct; + projStruct.createHVolProjectionStruct(&basis, targetCubDegree); - DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); + ordinal_type numPoints = projStruct.getNumTargetEvalPoints(); - pts::getHVolEvaluationPoints(evaluationPoints, - elemOrts, - &basis, - &projStruct); + DynRankView ConstructWithLabel(evaluationPoints, numElems, numPoints, dim); - DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); + pts::getHVolEvaluationPoints(evaluationPoints, + elemOrts, + &basis, + &projStruct); + DynRankView ConstructWithLabel(targetAtEvalPoints, numElems, numPoints); - DynRankView ConstructWithLabel(physEvalPoints, numElems, numPoints, dim); - { - Basis_HGRAD_TRI_C1_FEM triLinearBasis; //used for computing physical coordinates - DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalPoints, tri.getNodeCount(), numPoints); - for(ordinal_type i=0; i triLinearBasis; //used for computing physical coordinates + DynRankView ConstructWithLabel(triLinearBasisValuesAtEvalPoints, tri.getNodeCount(), numPoints); + + for(ordinal_type i=0; i relTol){ - errorFlag++; - *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "For N = " << NX << ", computed error (" << hvolNorm[iter] << ") is different than expected one (" << expected_error << ")"; - *outStream << std::endl; + hvolNorm[iter] = std::sqrt(norm2); + auto expected_error = hvol_errors[iter]; + if(std::abs(hvolNorm[iter]-expected_error)/expected_error > relTol){ + errorFlag++; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + *outStream << "For N = " << NX << ", computed error (" << hvolNorm[iter] << ") is different than expected one (" << expected_error << ")"; + *outStream << std::endl; + } + delete basisPtr; } *outStream << "HVOL Error: " << hvolNorm[iter] < pow(16, degree)*tol) { //heuristic relation on how round-off error depends on degree + if(diffErr > pow(20, degree)*tol) { //heuristic relation on how round-off error depends on degree errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "HGRAD_C" << degree << ": The weights recovered with the optimization are different than the one used for generating the functon."<< + *outStream << "HVOL_C" << degree << ": The weights recovered with the optimization are different than the one used for generating the functon."<< "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } @@ -1854,14 +1846,13 @@ int InterpolationProjectionHex(const bool verbose) { diffErr = std::max(diffErr, std::abs(basisCoeffsLI(ic,k) - basisCoeffsL2(ic,k))); } - if(diffErr > pow(16, degree)*tol) { //heuristic relation on how round-off error depends on degree + if(diffErr > pow(20, degree)*tol) { //heuristic relation on how round-off error depends on degree errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "HGRAD_C" << degree << ": The weights recovered with the L2 optimization are different than the one used for generating the functon."<< + *outStream << "HVOL_C" << degree << ": The weights recovered with the L2 optimization are different than the one used for generating the functon."<< "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } catch (std::exception &err) { std::cout << " Exeption\n"; diff --git a/packages/intrepid2/unit-test/Projection/test_interpolation_projection_QUAD.hpp b/packages/intrepid2/unit-test/Projection/test_interpolation_projection_QUAD.hpp index 03feda807ac6..db39efba259c 100644 --- a/packages/intrepid2/unit-test/Projection/test_interpolation_projection_QUAD.hpp +++ b/packages/intrepid2/unit-test/Projection/test_interpolation_projection_QUAD.hpp @@ -428,7 +428,6 @@ int InterpolationProjectionQuad(const bool verbose) { } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHGrad, numCells, basisCardinality); { @@ -570,7 +569,6 @@ int InterpolationProjectionQuad(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -818,7 +816,6 @@ int InterpolationProjectionQuad(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHCurl, numCells, basisCardinality); { @@ -959,7 +956,6 @@ int InterpolationProjectionQuad(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -1213,7 +1209,6 @@ int InterpolationProjectionQuad(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHDiv, numCells, basisCardinality); { @@ -1357,7 +1352,6 @@ int InterpolationProjectionQuad(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -1546,7 +1540,6 @@ int InterpolationProjectionQuad(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHVol, numCells, basisCardinality); { @@ -1602,7 +1595,7 @@ int InterpolationProjectionQuad(const bool verbose) { if(diffErr > pow(16, degree)*tol) { //heuristic relation on how round-off error depends on degree errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "HGRAD_C" << degree << ": The weights recovered with the optimization are different than the one used for generating the functon."<< + *outStream << "HVOL_C" << degree << ": The weights recovered with the optimization are different than the one used for generating the functon."<< "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } @@ -1662,11 +1655,10 @@ int InterpolationProjectionQuad(const bool verbose) { if(diffErr > pow(16, degree)*tol) { //heuristic relation on how round-off error depends on degree errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "HGRAD_C" << degree << ": The weights recovered with the L2 optimization are different than the one used for generating the functon."<< + *outStream << "HVOL_C" << degree << ": The weights recovered with the L2 optimization are different than the one used for generating the functon."<< "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } catch (std::exception &err) { std::cout << " Exeption\n"; diff --git a/packages/intrepid2/unit-test/Projection/test_interpolation_projection_TET.hpp b/packages/intrepid2/unit-test/Projection/test_interpolation_projection_TET.hpp index 9ebcb34e3584..3e6dcb801508 100644 --- a/packages/intrepid2/unit-test/Projection/test_interpolation_projection_TET.hpp +++ b/packages/intrepid2/unit-test/Projection/test_interpolation_projection_TET.hpp @@ -322,13 +322,7 @@ int InterpolationProjectionTet(const bool verbose) { //compute Lagrangian Interpolation of fun { li::getDofCoordsAndCoeffs(dofCoordsOriented, dofCoeffsPhys, basisPtr.get(), POINTTYPE_EQUISPACED, elemOrts); - Kokkos::fence(); - *outStream << "\n\nFunction DOFs for Tet 0 are:"; - for(ordinal_type j=0;j tetLinearBasis; //used for computing physical coordinates @@ -431,14 +425,14 @@ int InterpolationProjectionTet(const bool verbose) { { errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - //*outStream << "Function DOFs on common edge " << iEdge << " computed using Tet 0 basis functions are not consistent with those computed using Tet 1\n"; - //*outStream << "Function DOFs for Tet 0 are:"; - //for(ordinal_type j=0;jgetDofOrdinal(1,edgeIndexes[0][iEdge],j)); - //*outStream << "\nFunction DOFs for Tet 1 are:"; - //for(ordinal_type j=0;jgetDofOrdinal(1,edgeIndexes[1][iEdge],j)); - //*outStream << std::endl; + *outStream << "Function DOFs on common edge " << iEdge << " computed using Tet 0 basis functions are not consistent with those computed using Tet 1\n"; + *outStream << "Function DOFs for Tet 0 are:"; + for(ordinal_type j=0;jgetDofOrdinal(1,edgeIndexes[0][iEdge],j)); + *outStream << "\nFunction DOFs for Tet 1 are:"; + for(ordinal_type j=0;jgetDofOrdinal(1,edgeIndexes[1][iEdge],j)); + *outStream << std::endl; } } } @@ -486,8 +480,6 @@ int InterpolationProjectionTet(const bool verbose) { } } - -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHGrad, numCells, basisCardinality); { @@ -629,7 +621,6 @@ int InterpolationProjectionTet(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -924,7 +915,7 @@ int InterpolationProjectionTet(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA + //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHCurl, numCells, basisCardinality); { @@ -1067,7 +1058,6 @@ int InterpolationProjectionTet(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -1348,7 +1338,6 @@ int InterpolationProjectionTet(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHDiv, numCells, basisCardinality); { @@ -1493,7 +1482,6 @@ int InterpolationProjectionTet(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -1686,7 +1674,6 @@ int InterpolationProjectionTet(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHVol, numCells, basisCardinality); { @@ -1743,7 +1730,7 @@ int InterpolationProjectionTet(const bool verbose) { if(diffErr > pow(7, degree-1)*tol) { //heuristic relation on how round-off error depends on degree errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "HGRAD_C" << degree << ": The weights recovered with the optimization are different than the one used for generating the functon."<< + *outStream << "HVOL_C" << degree << ": The weights recovered with the optimization are different than the one used for generating the functon."<< "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } @@ -1803,11 +1790,10 @@ int InterpolationProjectionTet(const bool verbose) { if(diffErr > pow(7, degree-1)*tol) { //heuristic relation on how round-off error depends on degree errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "HGRAD_C" << degree << ": The weights recovered with the L2 optimization are different than the one used for generating the functon."<< + *outStream << "HVOL_C" << degree << ": The weights recovered with the L2 optimization are different than the one used for generating the functon."<< "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } catch (std::exception &err) { std::cout << " Exeption\n"; diff --git a/packages/intrepid2/unit-test/Projection/test_interpolation_projection_TRI.hpp b/packages/intrepid2/unit-test/Projection/test_interpolation_projection_TRI.hpp index d3254e5db1f3..321b42dd2f58 100644 --- a/packages/intrepid2/unit-test/Projection/test_interpolation_projection_TRI.hpp +++ b/packages/intrepid2/unit-test/Projection/test_interpolation_projection_TRI.hpp @@ -428,7 +428,6 @@ int InterpolationProjectionTri(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHGrad, numCells, basisCardinality); { @@ -570,7 +569,6 @@ int InterpolationProjectionTri(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -816,7 +814,6 @@ int InterpolationProjectionTri(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHCurl, numCells, basisCardinality); { @@ -957,7 +954,6 @@ int InterpolationProjectionTri(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -1210,7 +1206,6 @@ int InterpolationProjectionTri(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHDiv, numCells, basisCardinality); { @@ -1354,7 +1349,6 @@ int InterpolationProjectionTri(const bool verbose) { "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } } while(std::next_permutation(&reorder[0]+1, &reorder[0]+4)); //reorder vertices of common face @@ -1545,7 +1539,6 @@ int InterpolationProjectionTri(const bool verbose) { } } -#ifndef KOKKOS_ENABLE_CUDA //compute projection-based interpolation of the Lagrangian interpolation DynRankView ConstructWithLabel(basisCoeffsHVol, numCells, basisCardinality); { @@ -1601,7 +1594,7 @@ int InterpolationProjectionTri(const bool verbose) { if(diffErr > pow(16, degree)*tol) { //heuristic relation on how round-off error depends on degree errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "HGRAD_C" << degree << ": The weights recovered with the optimization are different than the one used for generating the functon."<< + *outStream << "HVOL_C" << degree << ": The weights recovered with the optimization are different than the one used for generating the functon."<< "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } @@ -1661,11 +1654,10 @@ int InterpolationProjectionTri(const bool verbose) { if(diffErr > pow(16, degree)*tol) { //heuristic relation on how round-off error depends on degree errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; - *outStream << "HGRAD_C" << degree << ": The weights recovered with the L2 optimization are different than the one used for generating the functon."<< + *outStream << "HVOL_C" << degree << ": The weights recovered with the L2 optimization are different than the one used for generating the functon."<< "\nThe max The infinite norm of the difference between the weights is: " << diffErr << std::endl; } } -#endif } } catch (std::exception &err) { std::cout << " Exeption\n"; From fe2cbb8c25fd9896ea5dc2044ee355837758cc6e Mon Sep 17 00:00:00 2001 From: Victor Brunini Date: Tue, 5 May 2020 11:44:16 -0600 Subject: [PATCH 10/86] tpetra: Include multivector length in deciding where kernels should run. Instead of just the current sync state. For long multivectors it is faster to run on the device even if the data is currently on the host. This reduces GPU solve times by ~30% on the Aria milestone test. --- .../core/src/Tpetra_MultiVector_def.hpp | 51 ++++++++++++------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index b33620380461..8558e0154ce5 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -329,9 +329,23 @@ namespace { // (anonymous) } } + template + bool + runKernelOnHost ( Kokkos::DualView imports ) + { + if (! imports.need_sync_device ()) { + return false; // most up-to-date on device + } + else { // most up-to-date on host + constexpr size_t localLengthThreshold = 10000; + return imports.extent(0) <= localLengthThreshold; + } + } + + template bool - multiVectorRunNormOnHost (const ::Tpetra::MultiVector& X) + runKernelOnHost (const ::Tpetra::MultiVector& X) { if (! X.need_sync_device ()) { return false; // most up-to-date on device @@ -360,7 +374,7 @@ namespace { // (anonymous) const bool isConstantStride = X.isConstantStride (); const bool isDistributed = X.isDistributed (); - const bool runOnHost = multiVectorRunNormOnHost (X); + const bool runOnHost = runKernelOnHost (X); if (runOnHost) { using view_type = typename dual_view_type::t_host; using array_layout = typename view_type::array_layout; @@ -981,7 +995,7 @@ namespace Tpetra { << permuteFromLIDs.extent (0) << "."); // We've already called checkSizes(), so this cast must succeed. - const MV& sourceMV = dynamic_cast (sourceObj); + MV& sourceMV = const_cast(dynamic_cast (sourceObj)); const size_t numCols = this->getNumVectors (); // sourceMV doesn't belong to us, so we can't sync it. Do the @@ -990,7 +1004,7 @@ namespace Tpetra { (sourceMV.need_sync_device () && sourceMV.need_sync_host (), std::logic_error, "Input MultiVector needs sync to both host " "and device."); - const bool copyOnHost = sourceMV.need_sync_device (); + const bool copyOnHost = runKernelOnHost(sourceMV); if (verbose) { std::ostringstream os; os << *prefix << "copyOnHost=" << (copyOnHost ? "true" : "false") << endl; @@ -998,12 +1012,12 @@ namespace Tpetra { } if (copyOnHost) { - if (this->need_sync_host ()) { - this->sync_host (); - } + sourceMV.sync_host(); + this->sync_host (); this->modify_host (); } else { + sourceMV.sync_device(); if (this->need_sync_device ()) { this->sync_device (); } @@ -1296,7 +1310,7 @@ namespace Tpetra { } // We've already called checkSizes(), so this cast must succeed. - const MV& sourceMV = dynamic_cast (sourceObj); + MV& sourceMV = const_cast(dynamic_cast (sourceObj)); const size_t numCols = sourceMV.getNumVectors (); @@ -1349,7 +1363,7 @@ namespace Tpetra { (sourceMV.need_sync_device () && sourceMV.need_sync_host (), std::logic_error, "Input MultiVector needs sync to both host " "and device."); - const bool packOnHost = sourceMV.need_sync_device (); + const bool packOnHost = runKernelOnHost(sourceMV); auto src_dev = sourceMV.getLocalViewHost (); auto src_host = sourceMV.getLocalViewDevice (); if (printDebugOutput) { @@ -1369,6 +1383,7 @@ namespace Tpetra { // Clearing the sync flags prevents this possible case. exports.clear_sync_state (); exports.modify_host (); + sourceMV.sync_host(); } else { // nde 06 Feb 2020: If 'exports' does not require resize @@ -1378,6 +1393,7 @@ namespace Tpetra { // Clearing the sync flags prevents this possible case. exports.clear_sync_state (); exports.modify_device (); + sourceMV.sync_device(); } if (numCols == 1) { // special case for one column only @@ -1582,7 +1598,7 @@ namespace Tpetra { // mfh 12 Apr 2016, 04 Feb 2019: Decide where to unpack based on // the memory space in which the imports buffer was last modified. // DistObject::doTransferNew gets to decide this. - const bool unpackOnHost = imports.need_sync_device (); + const bool unpackOnHost = runKernelOnHost(imports); if (printDebugOutput) { std::ostringstream os; @@ -1594,15 +1610,13 @@ namespace Tpetra { // We have to sync before modifying, because this method may read // as well as write (depending on the CombineMode). if (unpackOnHost) { - if (this->need_sync_host ()) { - this->sync_host (); - } + imports.sync_host(); + this->sync_host (); this->modify_host (); } else { - if (this->need_sync_device ()) { - this->sync_device (); - } + imports.sync_device(); + this->sync_device (); this->modify_device (); } auto X_d = this->getLocalViewDevice (); @@ -2319,9 +2333,10 @@ namespace Tpetra { // avoids sync'ing, which could violate users' expectations. // // If we need sync to device, then host has the most recent version. - const bool runOnHost = this->need_sync_device (); + const bool runOnHost = runKernelOnHost(*this); - if (! runOnHost) { // last modified in device memory + this->clear_sync_state(); + if (! runOnHost) { this->modify_device (); auto X = this->getLocalViewDevice (); if (this->isConstantStride ()) { From 7ed21cec304779544cb3e306e7917c7409923b30 Mon Sep 17 00:00:00 2001 From: Geoffrey C Danielson Date: Fri, 15 May 2020 12:00:30 -0600 Subject: [PATCH 11/86] adds details support for long multivector thresholds --- packages/tpetra/core/src/Tpetra_Details_Behavior.cpp | 11 +++++++++++ packages/tpetra/core/src/Tpetra_Details_Behavior.hpp | 9 +++++++++ packages/tpetra/core/src/Tpetra_MultiVector_def.hpp | 2 +- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_Behavior.cpp b/packages/tpetra/core/src/Tpetra_Details_Behavior.cpp index c17dcb98b479..a769c7aef494 100644 --- a/packages/tpetra/core/src/Tpetra_Details_Behavior.cpp +++ b/packages/tpetra/core/src/Tpetra_Details_Behavior.cpp @@ -358,6 +358,17 @@ size_t Behavior::longRowMinNumEntries () (value_, initialized_, envVarName, defaultValue); } +size_t Behavior::multivectorKernelLocationThreshold () +{ + constexpr char envVarName[] = "TPETRA_VECTOR_DEVICE_THRESHOLD"; + constexpr size_t defaultValue (10000); + + static size_t value_ = defaultValue; + static bool initialized_ = false; + return idempotentlyGetEnvironmentVariableAsSize + (value_, initialized_, envVarName, defaultValue); +} + bool Behavior::profilingRegionUseTeuchosTimers () { constexpr char envVarName[] = "TPETRA_USE_TEUCHOS_TIMERS"; diff --git a/packages/tpetra/core/src/Tpetra_Details_Behavior.hpp b/packages/tpetra/core/src/Tpetra_Details_Behavior.hpp index 3f248bca2fb5..20dd43c9e40f 100644 --- a/packages/tpetra/core/src/Tpetra_Details_Behavior.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_Behavior.hpp @@ -200,6 +200,15 @@ class Behavior { /// separate question. static size_t longRowMinNumEntries (); + /// \brief the threshold for transitioning from device to host + /// + /// If the number of elements in the multivector exceeds this + /// threshold and the data is on host, then run the calculation on + /// host. Otherwise, run on device. + /// By default this is 10000, but may be altered by the environment + /// variable TPETRA_VECTOR_DEVICE_THRESHOLD + static size_t multivectorKernelLocationThreshold (); + /// \brief Use Teuchos::Timer in Tpetra::ProfilingRegion /// /// This is disabled by default. You may control this at run time via the diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 8558e0154ce5..fce3cd5703fa 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -337,7 +337,7 @@ namespace { // (anonymous) return false; // most up-to-date on device } else { // most up-to-date on host - constexpr size_t localLengthThreshold = 10000; + size_t localLengthThreshold = Tpetra::Details::Behavior::multivectorKernelLocationThreshold(); return imports.extent(0) <= localLengthThreshold; } } From 3b42a49ea2765e46903b03e94b1ecfb718c5cc39 Mon Sep 17 00:00:00 2001 From: Geoffrey C Danielson Date: Fri, 15 May 2020 15:05:59 -0600 Subject: [PATCH 12/86] added a couple of large mv tests --- .../MultiVector/MultiVector_UnitTests.cpp | 404 ++++++++++++++++++ 1 file changed, 404 insertions(+) diff --git a/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp b/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp index f4ba0759ea2c..5f0eeb0a1a1a 100644 --- a/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp +++ b/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp @@ -282,6 +282,83 @@ namespace { } + //// + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, large, LO, GO, Scalar , Node ) + { + using map_type = Tpetra::Map; + using MV = Tpetra::MultiVector; + using vec_type = Tpetra::Vector; + typedef typename ScalarTraits::magnitudeType Magnitude; + constexpr bool debug = true; + + RCP outPtr = debug ? + Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr)) : + Teuchos::rcpFromRef (out); + Teuchos::FancyOStream& myOut = *outPtr; + + myOut << "Test: MultiVector, basic" << endl; + Teuchos::OSTab tab0 (myOut); + + const global_size_t INVALID = OrdinalTraits::invalid (); + RCP > comm = getDefaultComm (); + const int numImages = comm->getSize (); + + myOut << "Create Map" << endl; + const size_t numLocal = 15000; + const size_t numVecs = 10; + const GO indexBase = 0; + RCP map = + rcp (new map_type (INVALID, numLocal, indexBase, comm)); + + myOut << "Test MultiVector's & Vector's default constructors" << endl; + { + MV defaultConstructedMultiVector; + auto dcmv_map = defaultConstructedMultiVector.getMap (); + TEST_ASSERT( dcmv_map.get () != nullptr ); + if (dcmv_map.get () != nullptr) { + TEST_EQUALITY( dcmv_map->getGlobalNumElements (), + Tpetra::global_size_t (0) ); + } + vec_type defaultConstructedVector; + auto dcv_map = defaultConstructedVector.getMap (); + TEST_ASSERT( dcv_map.get () != nullptr ); + if (dcv_map.get () != nullptr) { + TEST_EQUALITY( dcv_map->getGlobalNumElements (), + Tpetra::global_size_t (0) ); + } + } + + myOut << "Test MultiVector's usual constructor" << endl; + RCP mvec; + TEST_NOTHROW( mvec = rcp (new MV (map, numVecs, true)) ); + if (mvec.is_null ()) { + myOut << "MV constructor threw an exception: returning" << endl; + return; + } + TEST_EQUALITY( mvec->getNumVectors(), numVecs ); + TEST_EQUALITY( mvec->getLocalLength(), numLocal ); + TEST_EQUALITY( mvec->getGlobalLength(), numImages*numLocal ); + + myOut << "Test that all norms are zero" << endl; + Array norms(numVecs), zeros(numVecs); + std::fill(zeros.begin(),zeros.end(),ScalarTraits::zero()); + TEST_NOTHROW( mvec->norm2(norms) ); + TEST_COMPARE_FLOATING_ARRAYS(norms,zeros,ScalarTraits::zero()); + TEST_NOTHROW( mvec->norm1(norms) ); + TEST_COMPARE_FLOATING_ARRAYS(norms,zeros,ScalarTraits::zero()); + TEST_NOTHROW( mvec->normInf(norms) ); + TEST_COMPARE_FLOATING_ARRAYS(norms,zeros,ScalarTraits::zero()); + // print it + myOut << *mvec << endl; + + // Make sure that the test passed on all processes, not just Proc 0. + int lclSuccess = success ? 1 : 0; + int gblSuccess = 1; + reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess)); + TEST_ASSERT( gblSuccess == 1 ); + } + + //// TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, BadConstLDA, LO, GO, Scalar , Node ) { @@ -1221,6 +1298,331 @@ namespace { TEST_ASSERT( gblSuccess == 1 ); } + // Test Tpetra::MultiVector::elementWiseMultiply on a large multivector + // + // Be sure to exercise all combinations of the cases alpha = + // {-1,0,1,other} and beta = {-1,0,1,other}, as these commonly have + // special cases. + // + // Also be sure to exercise the common case (also often with a + // special-case implementation) where all the MultiVectors have one + // column. + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, ElementWiseMultiplyLg, LO , GO , ST , Node ) + { + using Teuchos::View; + typedef Tpetra::global_size_t GST; + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MT; + typedef Teuchos::ScalarTraits STM; + typedef Tpetra::Map map_type; + typedef Tpetra::MultiVector MV; + typedef Tpetra::Vector V; + typedef typename Kokkos::Details::ArithTraits::val_type IST; + + out << "Tpetra::MultiVector::elementWiseMultiplyLg test" << endl; + Teuchos::OSTab tab0 (out); + + // Create a Map. + RCP > comm = getDefaultComm (); + const size_t lclNumRows = 15000; + const GST gblNumRows = comm->getSize () * lclNumRows; + const GO indexBase = 0; + RCP map3n = + rcp (new map_type (gblNumRows, lclNumRows, indexBase, comm)); + + const MT M0 = STM::zero (); + const ST S0 = STS::zero (); + const ST S1 = STS::one (); + + // In what follows, '@' (without single quotes) denotes + // element-wise multiplication -- that is, what + // MultiVector::elementWiseMultiply implements. + + const size_t maxNumVecs = 3; + + // Test for various numbers of columns. + for (size_t numVecs = 1; numVecs <= maxNumVecs; ++numVecs) { + out << "Test numVecs = " << numVecs << endl; + Teuchos::OSTab tab1 (out); + + // A (always) has 1 vector, and B and C have numVecs vectors. + V A (map3n); + MV B (map3n, numVecs); + MV C (map3n, numVecs); + MV C_exp (map3n, numVecs); + Array C_norms (C.getNumVectors ()); + Array C_zeros (C.getNumVectors ()); + std::fill (C_zeros.begin (), C_zeros.end (), M0); + + int caseNum = 0; + + caseNum++; + out << "Case " << caseNum << ": C = 0*C + 0*(A @ B)" << endl; + // Fill A and B initially with nonzero values, just for + // generality. C should get filled with zeros afterwards. + // Prefill C with NaN, to ensure that the method follows BLAS + // update rules. + { + A.putScalar (S1); + B.putScalar (S1); + + // Prefill C with NaN, if NaN exists for ST. + const ST nan = static_cast (Kokkos::Details::ArithTraits::nan ()); + C.putScalar (nan); + + C.elementWiseMultiply (S0, A, B, S0); + + C_exp.putScalar (S0); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = 1*C + 0*(A @ B)" << endl; + // Fill A and B with NaN to check that the method follows BLAS + // update rules. + { + const ST S3 = S1 + S1 + S1; + + // Prefill A and B with NaN, if NaN exists for ST. + const ST nan = static_cast (Kokkos::Details::ArithTraits::nan ()); + A.putScalar (nan); + B.putScalar (nan); + C.putScalar (S3); + + C.elementWiseMultiply (S0, A, B, S1); + + C_exp.putScalar (S3); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = (-1)*C + 0*(A @ B)" << endl; + // Fill A and B with NaN to check that the method follows BLAS + // update rules. + { + // Prefill A and B with NaN, if NaN exists for ST. + const ST nan = static_cast (Kokkos::Details::ArithTraits::nan ()); + A.putScalar (nan); + B.putScalar (nan); + C.putScalar (S1); + + C.elementWiseMultiply (S0, A, B, -S1); + + C_exp.putScalar (-S1); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = 2*C + 0*(A @ B)" << endl; + // Fill A and B with NaN to check that the method follows BLAS + // update rules. + { + const ST S2 = S1 + S1; + + // Prefill A and B with NaN, if NaN exists for ST. + const ST nan = static_cast (Kokkos::Details::ArithTraits::nan ()); + A.putScalar (nan); + B.putScalar (nan); + C.putScalar (S1); + + C.elementWiseMultiply (S0, A, B, S2); + + C_exp.putScalar (S2); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = 0*C + 1*(A @ B)" << endl; + // A and B will be filled with 1s, so C should get filled with 1s. + // Prefill C with NaN, to ensure that the method follows BLAS + // update rules. + { + A.putScalar (S1); + B.putScalar (S1); + + // Prefill C with NaN, if NaN exists for ST. + const ST nan = static_cast (Kokkos::Details::ArithTraits::nan ()); + C.putScalar (nan); + + C.elementWiseMultiply (S1, A, B, S0); + + C_exp.putScalar (S1); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = 0*C + (-1)*(A @ B)" << endl; + // A and B will be filled with 1, so C should get filled with -1. + // Prefill C with NaN, to ensure that the method follows BLAS + // update rules. + { + A.putScalar (S1); + B.putScalar (S1); + + // Prefill C with NaN, if NaN exists for ST. + const ST nan = static_cast (Kokkos::Details::ArithTraits::nan ()); + C.putScalar (nan); + + C.elementWiseMultiply (-S1, A, B, S0); + + C_exp.putScalar (-S1); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = 1*C + 1*(A @ B)" << endl; + // Fill A with 1, B with 2, and C with 3. C should be 5 after. + { + const ST S2 = S1 + S1; + const ST S3 = S1 + S1 + S1; + const ST S5 = S2 + S3; + A.putScalar (S1); + B.putScalar (S2); + C.putScalar (S3); + + C.elementWiseMultiply (S1, A, B, S1); + + C_exp.putScalar (S5); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = (-1)*C + 1*(A @ B)" << endl; + // Fill A with 1, B with 2, and C with 3. C should be -1 after. + { + const ST S2 = S1 + S1; + const ST S3 = S1 + S1 + S1; + A.putScalar (S1); + B.putScalar (S2); + C.putScalar (S3); + + C.elementWiseMultiply (S1, A, B, -S1); + + C_exp.putScalar (-S1); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = 1*C + (-1)*(A @ B)" << endl; + // Fill A with 2, B with 3, and C with 1. C should be -5 after. + { + const ST S2 = S1 + S1; + const ST S3 = S2 + S1; + const ST S5 = S2 + S3; + + A.putScalar (S2); + B.putScalar (S3); + C.putScalar (S1); + + C.elementWiseMultiply (-S1, A, B, S1); + + C_exp.putScalar (-S5); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = (-1)*C + (-1)*(A @ B)" << endl; + // Fill A with 1, B with 2, and C with 3. C should be -5 after. + { + const ST S2 = S1 + S1; + const ST S3 = S1 + S1 + S1; + const ST S5 = S2 + S3; + A.putScalar (S1); + B.putScalar (S2); + C.putScalar (S3); + + C.elementWiseMultiply (-S1, A, B, -S1); + + C_exp.putScalar (-S5); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = 0*C + 2*(A @ B)" << endl; + // Fill A with 3 and B with 4. C should be 24 after. + { + const ST S2 = S1 + S1; + const ST S3 = S2 + S1; + const ST S4 = S3 + S1; + const ST S24 = S2 * S3 * S4; + + A.putScalar (S3); + B.putScalar (S4); + + // Prefill C with NaN, if NaN exists for ST. + const ST nan = static_cast (Kokkos::Details::ArithTraits::nan ()); + C.putScalar (nan); + + C.elementWiseMultiply (S2, A, B, S0); + + C_exp.putScalar (S24); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + + caseNum++; + out << "Case " << caseNum << ": C = (-2)*C + 2*(A @ B)" << endl; + // Fill A with 3, B with 4, and C with 5. C should be 14 after. + { + const ST S2 = S1 + S1; + const ST S3 = S2 + S1; + const ST S4 = S3 + S1; + const ST S5 = S4 + S1; + const ST S14 = S5 * S2 + S4; + + A.putScalar (S3); + B.putScalar (S4); + C.putScalar (S5); + + C.elementWiseMultiply (S2, A, B, -S2); + + C_exp.putScalar (S14); + C_exp.update (S1, C, -S1); + C_exp.normInf (C_norms ()); + + TEST_COMPARE_FLOATING_ARRAYS( C_norms, C_zeros, M0 ); + } + } + + // Make sure that the test passed on all processes, not just Proc 0. + int lclSuccess = success ? 1 : 0; + int gblSuccess = 1; + reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess)); + TEST_ASSERT( gblSuccess == 1 ); + } + //// TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, BadConstAA, LO , GO , Scalar , Node ) @@ -4800,6 +5202,7 @@ namespace { #define UNIT_TEST_GROUP_BASE( SCALAR, LO, GO, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, basic , LO, GO, SCALAR, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, large , LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, NonMemberConstructors, LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, BadConstLDA , LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, BadConstAA , LO, GO, SCALAR, NODE ) \ @@ -4822,6 +5225,7 @@ namespace { TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, SingleVecNormalize, LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, Multiply , LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, ElementWiseMultiply,LO, GO, SCALAR, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, ElementWiseMultiplyLg,LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, NonContigView , LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, Describable , LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, Typedefs , LO, GO, SCALAR, NODE ) \ From 52f2dcc6f370af49326b2927203adb717d8a95df Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Mon, 18 May 2020 10:55:04 -0600 Subject: [PATCH 13/86] Epetra: zero-out nonlocal matrix in Epetra_FECrsMatrix at the end of GlobalAssemble This makes the behavior of the matirx where a nonlocal matrix is used consistent with the case where simple std::vector's are used to tempoarily store nonlocal data. Closes #7349 --- packages/epetra/src/Epetra_FECrsMatrix.cpp | 39 ++++++++++++---------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/packages/epetra/src/Epetra_FECrsMatrix.cpp b/packages/epetra/src/Epetra_FECrsMatrix.cpp index b40a77b42fa7..8100e6bcf551 100644 --- a/packages/epetra/src/Epetra_FECrsMatrix.cpp +++ b/packages/epetra/src/Epetra_FECrsMatrix.cpp @@ -1009,29 +1009,32 @@ int Epetra_FECrsMatrix::GlobalAssemble(const Epetra_Map& domain_map, const Epetra_CrsGraph& graph = tempMat_->Graph(); Epetra_CrsGraph& nonconst_graph = const_cast(graph); nonconst_graph.SetIndicesAreGlobal(true); - } - } - //Now we need to call FillComplete on our temp matrix. We need to - //pass a DomainMap and RangeMap, which are not the same as the RowMap - //and ColMap that we constructed the matrix with. - EPETRA_CHK_ERR(tempMat_->FillComplete(domain_map, range_map)); + } + } - if (exporter_ == NULL) - exporter_ = new Epetra_Export(tempMat_->RowMap(), RowMap()); + //Now we need to call FillComplete on our temp matrix. We need to + //pass a DomainMap and RangeMap, which are not the same as the RowMap + //and ColMap that we constructed the matrix with. + EPETRA_CHK_ERR(tempMat_->FillComplete(domain_map, range_map)); - EPETRA_CHK_ERR(Export(*tempMat_, *exporter_, combineMode)); + if (exporter_ == NULL) + exporter_ = new Epetra_Export(tempMat_->RowMap(), RowMap()); - if(callFillComplete) { - EPETRA_CHK_ERR(FillComplete(domain_map, range_map)); - } + EPETRA_CHK_ERR(Export(*tempMat_, *exporter_, combineMode)); - //now reset the values in our nonlocal data - if (!useNonlocalMatrix_) { - for(size_t i=0; iPutScalar(0.0); + } else { + for(size_t i=0; i Date: Mon, 18 May 2020 11:12:14 -0600 Subject: [PATCH 14/86] Epetra: overridden Print method in Epetra_FECrsMatrix This allows to print also nonlocal data (if any), while assembly is active. --- packages/epetra/src/Epetra_FECrsMatrix.cpp | 74 ++++++++++++++++++++++ packages/epetra/src/Epetra_FECrsMatrix.h | 1 + 2 files changed, 75 insertions(+) diff --git a/packages/epetra/src/Epetra_FECrsMatrix.cpp b/packages/epetra/src/Epetra_FECrsMatrix.cpp index 8100e6bcf551..a8a38f0d7854 100644 --- a/packages/epetra/src/Epetra_FECrsMatrix.cpp +++ b/packages/epetra/src/Epetra_FECrsMatrix.cpp @@ -315,6 +315,80 @@ Epetra_FECrsMatrix::~Epetra_FECrsMatrix() DeleteMemory(); } +void Epetra_FECrsMatrix::Print(std::ostream& os) const +{ + Epetra_CrsMatrix::Print(os); + + if (ignoreNonLocalEntries_ || RowMap().Comm().NumProc()==1) return; + + int MyPID = RowMap().Comm().MyPID(); + int NumProc = RowMap().Comm().NumProc(); + + if (useNonlocalMatrix_) { + if (MyPID==0) { + os << "[FECrsMatrix] Nonlocal matrix:\n"; + } + nonlocalMatrix_->Print(os); + return; + } + + if (MyPID==0) { + os << "[FECrsMatrix] Nonlocal rows:\n"; + os.width(8); + os << " Processor "; + os.width(10); + os << " Row Index "; + os.width(10); + os << " Col Index "; + os.width(20); + os << " Value "; + os << std::endl; + } + for (int iproc=0; iproc < NumProc; iproc++) { + if (MyPID==iproc) { + if(RowMap().GlobalIndicesInt()) { + const int nnr = nonlocalRows_int_.size(); + for (int i=0; i Date: Wed, 20 May 2020 14:57:18 -0600 Subject: [PATCH 15/86] ATDM/ats2: 2019.06.24->rolling (ATDV-351) --- ...243-gnu-7.3.1-spmpi-rolling_static_dbg.sh} | 0 ...243-gnu-7.3.1-spmpi-rolling_static_opt.sh} | 0 ...-7.3.1-spmpi-rolling_serial_static_dbg.sh} | 0 ...-7.3.1-spmpi-rolling_serial_static_opt.sh} | 0 cmake/std/atdm/ats2/all_supported_builds.sh | 12 +++---- cmake/std/atdm/ats2/custom_builds.sh | 32 +++++++++---------- cmake/std/atdm/ats2/environment.sh | 14 ++++---- 7 files changed, 29 insertions(+), 29 deletions(-) rename cmake/ctest/drivers/atdm/ats2/drivers/{Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24_static_dbg.sh => Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-rolling_static_dbg.sh} (100%) rename cmake/ctest/drivers/atdm/ats2/drivers/{Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24_static_opt.sh => Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-rolling_static_opt.sh} (100%) rename cmake/ctest/drivers/atdm/ats2/drivers/{Trilinos-atdm-ats2-gnu-7.3.1-spmpi-2019.06.24_serial_static_dbg.sh => Trilinos-atdm-ats2-gnu-7.3.1-spmpi-rolling_serial_static_dbg.sh} (100%) rename cmake/ctest/drivers/atdm/ats2/drivers/{Trilinos-atdm-ats2-gnu-7.3.1-spmpi-2019.06.24_serial_static_opt.sh => Trilinos-atdm-ats2-gnu-7.3.1-spmpi-rolling_serial_static_opt.sh} (100%) diff --git a/cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24_static_dbg.sh b/cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-rolling_static_dbg.sh similarity index 100% rename from cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24_static_dbg.sh rename to cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-rolling_static_dbg.sh diff --git a/cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24_static_opt.sh b/cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-rolling_static_opt.sh similarity index 100% rename from cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24_static_opt.sh rename to cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-cuda-10.1.243-gnu-7.3.1-spmpi-rolling_static_opt.sh diff --git a/cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-gnu-7.3.1-spmpi-2019.06.24_serial_static_dbg.sh b/cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-gnu-7.3.1-spmpi-rolling_serial_static_dbg.sh similarity index 100% rename from cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-gnu-7.3.1-spmpi-2019.06.24_serial_static_dbg.sh rename to cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-gnu-7.3.1-spmpi-rolling_serial_static_dbg.sh diff --git a/cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-gnu-7.3.1-spmpi-2019.06.24_serial_static_opt.sh b/cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-gnu-7.3.1-spmpi-rolling_serial_static_opt.sh similarity index 100% rename from cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-gnu-7.3.1-spmpi-2019.06.24_serial_static_opt.sh rename to cmake/ctest/drivers/atdm/ats2/drivers/Trilinos-atdm-ats2-gnu-7.3.1-spmpi-rolling_serial_static_opt.sh diff --git a/cmake/std/atdm/ats2/all_supported_builds.sh b/cmake/std/atdm/ats2/all_supported_builds.sh index 2b6a9e39e9a5..f7ab090e4753 100644 --- a/cmake/std/atdm/ats2/all_supported_builds.sh +++ b/cmake/std/atdm/ats2/all_supported_builds.sh @@ -1,11 +1,11 @@ export ATDM_CONFIG_CTEST_S_BUILD_NAME_PREFIX=Trilinos-atdm- -# ats2-xl-2019.08.20-spmpi-2019.06.24_openmp_static_opt -# ats2-xl-2019.08.20-spmpi-2019.06.24_openmp_static_dbg +# ats2-xl-2019.08.20-spmpi-rolling_openmp_static_opt +# ats2-xl-2019.08.20-spmpi-rolling_openmp_static_dbg export ATDM_CONFIG_ALL_SUPPORTED_BUILDS=( - ats2-gnu-7.3.1-spmpi-2019.06.24_serial_static_opt - ats2-gnu-7.3.1-spmpi-2019.06.24_serial_static_dbg - ats2-cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24_static_opt - ats2-cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24_static_dbg + ats2-gnu-7.3.1-spmpi-rolling_serial_static_opt + ats2-gnu-7.3.1-spmpi-rolling_serial_static_dbg + ats2-cuda-10.1.243-gnu-7.3.1-spmpi-rolling_static_opt + ats2-cuda-10.1.243-gnu-7.3.1-spmpi-rolling_static_dbg ) diff --git a/cmake/std/atdm/ats2/custom_builds.sh b/cmake/std/atdm/ats2/custom_builds.sh index 9d1d132dbc78..f2c831f1877c 100644 --- a/cmake/std/atdm/ats2/custom_builds.sh +++ b/cmake/std/atdm/ats2/custom_builds.sh @@ -7,26 +7,26 @@ # Custom compiler selection logic -if [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-xl-2019.08.20-spmpi-2019.06.24"* ]] \ - || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243_xl-2019.08.20_spmpi-2019.06.24"* ]] \ +if [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-xl-2019.08.20-spmpi-rolling"* ]] \ + || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243_xl-2019.08.20_spmpi-rolling"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-xl-2019.08.20"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243_xl-2019.08.20"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-xl-2019"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243_xl-2019"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-xl"* ]] \ ; then - export ATDM_CONFIG_COMPILER=CUDA-10.1.243_XL-2019.08.20_SPMPI-2019.06.24 + export ATDM_CONFIG_COMPILER=CUDA-10.1.243_XL-2019.08.20_SPMPI-rolling -elif [[ $ATDM_CONFIG_BUILD_NAME == *"xl-2019.08.20-spmpi-2019.06.24"* ]] \ - || [[ $ATDM_CONFIG_BUILD_NAME == *"xl-2019.08.20_spmpi-2019.06.24"* ]] \ +elif [[ $ATDM_CONFIG_BUILD_NAME == *"xl-2019.08.20-spmpi-rolling"* ]] \ + || [[ $ATDM_CONFIG_BUILD_NAME == *"xl-2019.08.20_spmpi-rolling"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"xl-2019.08.20"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"xl-2019"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"xl"* ]] \ ; then - export ATDM_CONFIG_COMPILER=XL-2019.08.20_SPMPI-2019.06.24 + export ATDM_CONFIG_COMPILER=XL-2019.08.20_SPMPI-rolling -elif [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24"* ]] \ - || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243_gnu-7.3.1_spmpi-2019.06.24"* ]] \ +elif [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-gnu-7.3.1-spmpi-rolling"* ]] \ + || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243_gnu-7.3.1_spmpi-rolling"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-gnu-7.3.1"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243_gnu-7.3.1"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-gnu-7"* ]] \ @@ -36,17 +36,17 @@ elif [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24"* || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda-gnu"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"cuda"* ]] \ ; then - export ATDM_CONFIG_COMPILER=CUDA-10.1.243_GNU-7.3.1_SPMPI-2019.06.24 + export ATDM_CONFIG_COMPILER=CUDA-10.1.243_GNU-7.3.1_SPMPI-rolling # NOTE: Default 'cuda' must be last cuda listed! -elif [[ $ATDM_CONFIG_BUILD_NAME == *"gnu-7.3.1-spmpi-2019.06.24"* ]] \ - || [[ $ATDM_CONFIG_BUILD_NAME == *"gnu-7.3.1_spmpi-2019.06.24"* ]] \ +elif [[ $ATDM_CONFIG_BUILD_NAME == *"gnu-7.3.1-spmpi-rolling"* ]] \ + || [[ $ATDM_CONFIG_BUILD_NAME == *"gnu-7.3.1_spmpi-rolling"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"gnu-7.3.1"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"gnu-7"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"gnu"* ]] \ || [[ $ATDM_CONFIG_BUILD_NAME == *"default" ]] \ ; then - export ATDM_CONFIG_COMPILER=GNU-7.3.1_SPMPI-2019.06.24 + export ATDM_CONFIG_COMPILER=GNU-7.3.1_SPMPI-rolling # NOTE: Defaut 'gnu' must be last 'gnu' listed! else @@ -56,10 +56,10 @@ else echo "***" echo "*** Supported compilers include:" echo "***" - echo "**** gnu-7.3.1_spmpi-2019.06.24 (default, default gnu)" - echo "**** cuda-10.1.243_gnu-7.3.1_spmpi-2019.06.24 (default cuda)" - echo "**** xl-2019.08.20_spmpi-2019.06.24 (disabled)" - echo "**** cuda-10.1.243-gnu-7.3.1-spmpi-2019.06.24 (disabled)" + echo "**** gnu-7.3.1_spmpi-rolling (default, default gnu)" + echo "**** cuda-10.1.243_gnu-7.3.1_spmpi-rolling (default cuda)" + echo "**** xl-2019.08.20_spmpi-rolling (disabled)" + echo "**** cuda-10.1.243-gnu-7.3.1-spmpi-rolling (disabled)" echo "***" return diff --git a/cmake/std/atdm/ats2/environment.sh b/cmake/std/atdm/ats2/environment.sh index dc0cbad72013..b74b4b4a4746 100644 --- a/cmake/std/atdm/ats2/environment.sh +++ b/cmake/std/atdm/ats2/environment.sh @@ -81,7 +81,7 @@ module load cmake/3.14.5 # Load compiler modules, TPL modules, and point to SPARC TPL install base dirs # -if [[ "$ATDM_CONFIG_COMPILER" == *"GNU-7.3.1_SPMPI-2019.06.24" ]]; then +if [[ "$ATDM_CONFIG_COMPILER" == *"GNU-7.3.1_SPMPI-rolling" ]]; then module load gcc/7.3.1 module load lapack/3.8.0-gcc-4.9.3 @@ -101,13 +101,13 @@ if [[ "$ATDM_CONFIG_COMPILER" == *"GNU-7.3.1_SPMPI-2019.06.24" ]]; then if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1.243_"* ]]; then sparc_tpl_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_gcc-7.3.1 - sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_gcc-7.3.1_spmpi-2019.06.24 + sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_gcc-7.3.1_spmpi-rolling else sparc_tpl_ext=ats2-${sparc_tpl_arch}_gcc-7.3.1 - sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_gcc-7.3.1_spmpi-2019.06.24 + sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_gcc-7.3.1_spmpi-rolling fi -elif [[ "$ATDM_CONFIG_COMPILER" == *"XL-2019.08.20_SPMPI-2019.06.24_DISABLED" ]]; then +elif [[ "$ATDM_CONFIG_COMPILER" == *"XL-2019.08.20_SPMPI-rolling_DISABLED" ]]; then module load xl/2019.08.20 module load lapack/3.8.0-xl-2019.08.20 module load gmake/4.2.1 @@ -132,10 +132,10 @@ elif [[ "$ATDM_CONFIG_COMPILER" == *"XL-2019.08.20_SPMPI-2019.06.24_DISABLED" ]] if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1.243_"* ]]; then export LD_LIBRARY_PATH=${BINUTILS_ROOT}/rh/lib/gcc/ppc64le-redhat-linux/7:${LD_LIBRARY_PATH} sparc_tpl_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_xl-2019.08.20 - sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_xl-2019.08.20_spmpi-2019.06.24 + sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_xl-2019.08.20_spmpi-rolling else sparc_tpl_ext=ats2-${sparc_tpl_arch}_xl-2019.08.20 - sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_xl-2019.08.20_spmpi-2019.06.24 + sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_xl-2019.08.20_spmpi-rolling fi else @@ -203,7 +203,7 @@ fi # # Common module - requires compiler to be loaded first -module load spectrum-mpi/2019.06.24 +module load spectrum-mpi/rolling # Prepend path to ninja after all of the modules are loaded export PATH=/projects/atdm_devops/vortex/ninja-fortran-1.8.2:$PATH From 849057984975aa5fe40d4c2db18eb210eeb74357 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Wed, 20 May 2020 16:18:02 -0600 Subject: [PATCH 16/86] ATDM/ats2: Switch back to dev modules (ATDV-351) --- cmake/std/atdm/ats2/environment.sh | 60 ++---------------------------- 1 file changed, 4 insertions(+), 56 deletions(-) diff --git a/cmake/std/atdm/ats2/environment.sh b/cmake/std/atdm/ats2/environment.sh index b74b4b4a4746..b08393a753b0 100644 --- a/cmake/std/atdm/ats2/environment.sh +++ b/cmake/std/atdm/ats2/environment.sh @@ -75,37 +75,13 @@ fi # Load common modules for all builds module load git/2.20.0 -module load cmake/3.14.5 # # Load compiler modules, TPL modules, and point to SPARC TPL install base dirs # if [[ "$ATDM_CONFIG_COMPILER" == *"GNU-7.3.1_SPMPI-rolling" ]]; then - module load gcc/7.3.1 - module load lapack/3.8.0-gcc-4.9.3 - - export CBLAS_ROOT=/usr/tcetmp/packages/lapack/lapack-3.8.0-gcc-4.9.3 - export LAPACK_ROOT=/usr/tcetmp/packages/lapack/lapack-3.8.0-gcc-4.9.3 - export COMPILER_ROOT=/usr/tce/packages/gcc/gcc-7.3.1 - export SPARC_HDF5=hdf5-1.10.5 - - # eharvey: TODO: remove COMPILER_ROOT and other unused exports below. - export PATH=${COMPILER_ROOT}/bin:${PATH} - export LD_LIBRARY_PATH=${COMPILER_ROOT}/lib:${LD_LIBRARY_PATH} - export BINUTILS_ROOT=${COMPILER_ROOT} - export LIBRARY_PATH=${BINUTILS_ROOT}/lib - export LIBRARY_PATH=${CBLAS_ROOT}/lib:${LIBRARY_PATH} - export INCLUDE=${BINUTILS_ROOT}/include:${INCLUDE} - export CPATH=${BINUTILS_ROOT}/include:${CPATH} - - if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1.243_"* ]]; then - sparc_tpl_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_gcc-7.3.1 - sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_gcc-7.3.1_spmpi-rolling - else - sparc_tpl_ext=ats2-${sparc_tpl_arch}_gcc-7.3.1 - sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_gcc-7.3.1_spmpi-rolling - fi + module load sparc-dev/gcc-7.3.1_spmpi-rolling elif [[ "$ATDM_CONFIG_COMPILER" == *"XL-2019.08.20_SPMPI-rolling_DISABLED" ]]; then module load xl/2019.08.20 @@ -153,21 +129,17 @@ fi if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1.243_"* ]]; then - module load cuda/10.1.243 + sparc-dev/cuda-10.1.243_gcc-7.3.1_spmpi-rolling # OpenMPI Settings + # NOTE: the below export overrides the value set by the module load above export OMPI_CXX=${ATDM_CONFIG_NVCC_WRAPPER} if [ ! -x "$OMPI_CXX" ]; then echo "No nvcc_wrapper found" return fi - export OMPI_CC=`which gcc` - export OMPI_FC=`which gfortran` - export LLNL_USE_OMPI_VARS="y" # CUDA Settings - export CUDA_LAUNCH_BLOCKING=1 - export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 if [[ ! -d /tmp/${USER} ]] ; then echo "Creating /tmp/${USER} for nvcc wrapper!" mkdir /tmp/${USER} @@ -202,14 +174,11 @@ fi # Final setup for all build configurations # -# Common module - requires compiler to be loaded first -module load spectrum-mpi/rolling - # Prepend path to ninja after all of the modules are loaded export PATH=/projects/atdm_devops/vortex/ninja-fortran-1.8.2:$PATH # Prepend path to updated CMake 3.16.5 -module unload cmake/3.14.5 +module unload cmake export PATH=/projects/atdm_devops/vortex/cmake-3.16.5/bin:$PATH # ATDM specific config variables @@ -219,20 +188,6 @@ export ATDM_CONFIG_BLAS_LIBS="-L${BLAS_ROOT}/lib;-lblas;-lgfortran;-lgomp;-lm" # NOTE: Invalid libbfd.so requires below for Trilinos to compile export ATDM_CONFIG_BINUTILS_LIBS="${BINUTILS_ROOT}/lib/libbfd.a;-lz;${BINUTILS_ROOT}/lib/libiberty.a" -export ATDM_CONFIG_SPARC_TPL_BASE=/projects/sparc/tpls/ats2-${sparc_tpl_arch} - -sparc_tpl_base=${ATDM_CONFIG_SPARC_TPL_BASE} - -# Commont ROOT config variables -export BOOST_ROOT=${sparc_tpl_base}/boost-1.65.1/00000000/${sparc_tpl_ext} -export HDF5_ROOT=${sparc_tpl_base}/hdf5-1.10.5/00000000/${sparc_tpl_mpi_ext} -export CGNS_ROOT=${sparc_tpl_base}/cgns-c09a5cd/27e5681f1b74c679b5dcb337ac71036d16c47977/${sparc_tpl_mpi_ext} -export PNETCDF_ROOT=${sparc_tpl_base}/pnetcdf-1.10.0/6144dc67b2041e4093063a04e89fc1e33398bd09/${sparc_tpl_mpi_ext} -export NETCDF_ROOT=${sparc_tpl_base}/netcdf-4.7.0/58bc48d95be2cc9272a18488fea52e1be1f0b42a/${sparc_tpl_mpi_ext} -export PARMETIS_ROOT=${sparc_tpl_base}/parmetis-4.0.3/00000000/${sparc_tpl_mpi_ext} -export METIS_ROOT=${sparc_tpl_base}/parmetis-4.0.3/00000000/${sparc_tpl_mpi_ext} -export SUPERLUDIST_ROOT=${sparc_tpl_base}/superlu_dist-5.4.0/a3121eaff44f7bf7d44e625c3b3d2a9911e58876/${sparc_tpl_mpi_ext} - export ATDM_CONFIG_USE_HWLOC=OFF export ATDM_CONFIG_HDF5_LIBS="-L${HDF5_ROOT}/lib;${HDF5_ROOT}/lib/libhdf5_hl.a;${HDF5_ROOT}/lib/libhdf5.a;-lz;-ldl" export ATDM_CONFIG_NETCDF_LIBS="-L${NETCDF_ROOT}/lib;${NETCDF_ROOT}/lib/libnetcdf.a;${PNETCDF_ROOT}/lib/libpnetcdf.a;${ATDM_CONFIG_HDF5_LIBS};-lcurl" @@ -252,13 +207,6 @@ export ATDM_CONFIG_MPI_EXEC=${ATDM_SCRIPT_DIR}/ats2/trilinos_jsrun export ATDM_CONFIG_MPI_POST_FLAGS="--rs_per_socket;4" export ATDM_CONFIG_MPI_EXEC_NUMPROCS_FLAG="-p" -# Set common default compilers -export CC=mpicc -export CXX=mpicxx -export F77=mpifort -export FC=mpifort -export F90=mpifort - # System-info for what ATS-2 system we are using if [[ "${ATDM_CONFIG_KNOWN_HOSTNAME}" == "vortex" ]] ; then export ATDM_CONFIG_ATS2_LOGIN_NODE=vortex60 From 95de28dad0d1b9db98765b8cb4098fd5c528b571 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Wed, 20 May 2020 16:23:59 -0600 Subject: [PATCH 17/86] ATDM/ats2: Point to binutils (ATDV-351) --- cmake/std/atdm/ats2/environment.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cmake/std/atdm/ats2/environment.sh b/cmake/std/atdm/ats2/environment.sh index b08393a753b0..c736be1c666b 100644 --- a/cmake/std/atdm/ats2/environment.sh +++ b/cmake/std/atdm/ats2/environment.sh @@ -83,6 +83,14 @@ module load git/2.20.0 if [[ "$ATDM_CONFIG_COMPILER" == *"GNU-7.3.1_SPMPI-rolling" ]]; then module load sparc-dev/gcc-7.3.1_spmpi-rolling + export COMPILER_ROOT=/usr/tce/packages/gcc/gcc-7.3.1 + export LD_LIBRARY_PATH=${COMPILER_ROOT}/lib:${LD_LIBRARY_PATH} + export BINUTILS_ROOT=${COMPILER_ROOT} + export LIBRARY_PATH=${BINUTILS_ROOT}/lib + export LIBRARY_PATH=${CBLAS_ROOT}/lib:${LIBRARY_PATH} + export INCLUDE=${BINUTILS_ROOT}/include:${INCLUDE} + export CPATH=${BINUTILS_ROOT}/include:${CPATH} + elif [[ "$ATDM_CONFIG_COMPILER" == *"XL-2019.08.20_SPMPI-rolling_DISABLED" ]]; then module load xl/2019.08.20 module load lapack/3.8.0-xl-2019.08.20 From 644744e455e9811f4f2a289bb37945f16752e084 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Wed, 20 May 2020 17:01:08 -0600 Subject: [PATCH 18/86] ATDM/ats2: Point to cuda (ATDV-351) --- cmake/std/atdm/ats2/environment.sh | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cmake/std/atdm/ats2/environment.sh b/cmake/std/atdm/ats2/environment.sh index c736be1c666b..51879b5f8a38 100644 --- a/cmake/std/atdm/ats2/environment.sh +++ b/cmake/std/atdm/ats2/environment.sh @@ -19,7 +19,6 @@ if [[ "$ATDM_CONFIG_COMPILER" == "GNU"* || \ if [[ "$ATDM_CONFIG_KOKKOS_ARCH" == "DEFAULT" || \ "$ATDM_CONFIG_KOKKOS_ARCH" == "Power9" ]] ; then export ATDM_CONFIG_KOKKOS_ARCH=Power9 - sparc_tpl_arch=pwr9 else echo echo "***" @@ -34,7 +33,6 @@ elif [[ "$ATDM_CONFIG_COMPILER" == "CUDA"* ]] ; then "$ATDM_CONFIG_KOKKOS_ARCH" == "Power9" || \ "$ATDM_CONFIG_KOKKOS_ARCH" == "Volta70" ]] ; then export ATDM_CONFIG_KOKKOS_ARCH=Power9,Volta70 - sparc_tpl_arch=v100 else echo echo "***" @@ -115,11 +113,6 @@ elif [[ "$ATDM_CONFIG_COMPILER" == *"XL-2019.08.20_SPMPI-rolling_DISABLED" ]]; t if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1.243_"* ]]; then export LD_LIBRARY_PATH=${BINUTILS_ROOT}/rh/lib/gcc/ppc64le-redhat-linux/7:${LD_LIBRARY_PATH} - sparc_tpl_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_xl-2019.08.20 - sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_cuda-10.1.243_xl-2019.08.20_spmpi-rolling - else - sparc_tpl_ext=ats2-${sparc_tpl_arch}_xl-2019.08.20 - sparc_tpl_mpi_ext=ats2-${sparc_tpl_arch}_xl-2019.08.20_spmpi-rolling fi else @@ -138,6 +131,7 @@ fi if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1.243_"* ]]; then sparc-dev/cuda-10.1.243_gcc-7.3.1_spmpi-rolling + export CUDA_BIN_PATH=$CUDA_HOME # OpenMPI Settings # NOTE: the below export overrides the value set by the module load above From 80932f942027dc65f5ef3c19a3e48d6bc298e060 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Wed, 20 May 2020 17:08:11 -0600 Subject: [PATCH 19/86] ATDM/ats2: Fix typo (ATDV-351) --- cmake/std/atdm/ats2/environment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/std/atdm/ats2/environment.sh b/cmake/std/atdm/ats2/environment.sh index 51879b5f8a38..12528c5916e6 100644 --- a/cmake/std/atdm/ats2/environment.sh +++ b/cmake/std/atdm/ats2/environment.sh @@ -130,7 +130,7 @@ fi if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1.243_"* ]]; then - sparc-dev/cuda-10.1.243_gcc-7.3.1_spmpi-rolling + module load sparc-dev/cuda-10.1.243_gcc-7.3.1_spmpi-rolling export CUDA_BIN_PATH=$CUDA_HOME # OpenMPI Settings From 485013efd01df10e69778c558c2c065d9dafc093 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 21 May 2020 09:12:39 -0600 Subject: [PATCH 20/86] ATDM/ats2: Disable hack around error message --- cmake/std/atdm/ats2/trilinos_jsrun | 69 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/cmake/std/atdm/ats2/trilinos_jsrun b/cmake/std/atdm/ats2/trilinos_jsrun index 454e9f6855a7..020ed13075d2 100755 --- a/cmake/std/atdm/ats2/trilinos_jsrun +++ b/cmake/std/atdm/ats2/trilinos_jsrun @@ -40,21 +40,22 @@ function debug_print { } ################################################################################ -# @brief: Hack around JSM server error "Error: Remote JSM server is not -# responding" +# @brief: Evaluate the jsrun command and return the jsrun return value. # This function assumes that "args" has been initialized -# @return void +# @return the jsrun return value ################################################################################ function evaluate_jsrun_command { local jsrun_ret - local out_file - local retry - local grep_status + #local out_file + #local retry + #local grep_status - # Create process ID specific output file - # Instead of attempting to find the test name in $@, let's use - # the hash of the test command to avoid race conditions. - out_file=$(printf "'%s' " "${args[@]}" | md5sum | awk '{print $1".out"}') + # Use the hash of the test name and arguments as the output file. This still + # causes race conditions among tests as some tests run the same exe with the + # same arguments but post to a different ctest test names. We need the ctest + # test name in the environment. + #out_file=$(printf "'%s' " "${args[@]}" | md5sum | awk '{print $1".out"}') + #out_file="$CTEST_TEST_NAME"."$out_file" if [ "$ECHO_CMD" == "1" ]; then echo "BEFORE: jsrun " $(printf "'%s' " "${orig_args[@]}") @@ -62,35 +63,37 @@ function evaluate_jsrun_command { fi # Set retry, assume JSRUN_WRAPPER_NUM_RETRIES is valid if set - if [[ ! "$JSRUN_WRAPPER_NUM_RETRIES" == "" ]]; then - retry=${JSRUN_WRAPPER_NUM_RETRIES} - else - retry=3 - fi - grep_status=0 + #if [[ ! "$JSRUN_WRAPPER_NUM_RETRIES" == "" ]]; then + # retry=${JSRUN_WRAPPER_NUM_RETRIES} + #else + # retry=3 + #fi + #grep_status=0 # Retry jsrun command until retry is exhausted or JSM server error is not found - while [[ $grep_status -eq 0 && $retry -ne 0 ]]; do + #while [[ $grep_status -eq 0 && $retry -ne 0 ]]; do # Evaluate the command passed in and redirect all output - eval jsrun $(printf "'%s' " "${args[@]}") &> $out_file - jsrun_ret=$? + #eval jsrun $(printf "'%s' " "${args[@]}") &> $out_file + #jsrun_ret=$? - grep 'Error: Remote JSM server is not responding' $out_file &> /dev/null - grep_status=$? + #grep 'Error: Remote JSM server is not responding' $out_file &> /dev/null + #grep_status=$? # Sleep for a few seconds before retrying the test. - if [[ $grep_status -eq 0 && $retry -ne 1 ]]; then - sleep 3 - fi - - retry=$((retry-1)) - done - - if [ "$ECHO_CMD" == "1" ]; then - echo "out_file=$out_file" - fi - cat $out_file - rm $out_file + #if [[ $grep_status -eq 0 && $retry -ne 1 ]]; then + # sleep 3 + #fi + + #retry=$((retry-1)) + #done + + #if [ "$ECHO_CMD" == "1" ]; then + # echo "out_file=$out_file" + #fi + #cat $out_file + #rm $out_file + eval jsrun $(printf "'%s' " "${args[@]}") + jsrun_ret=$? return $jsrun_ret } From ba317f22f8aa8e2b961d84d5dae8ace09c79b2e6 Mon Sep 17 00:00:00 2001 From: Eric Phipps Date: Thu, 21 May 2020 12:24:49 -0600 Subject: [PATCH 21/86] Sacado: Make the new Fad design the default. Let the mayhem begin! --- packages/sacado/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/sacado/CMakeLists.txt b/packages/sacado/CMakeLists.txt index ac34eca63a24..add024520a9a 100644 --- a/packages/sacado/CMakeLists.txt +++ b/packages/sacado/CMakeLists.txt @@ -28,7 +28,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( ${PACKAGE_NAME}_NEW_FAD_DESIGN_IS_DEFAULT SACADO_NEW_FAD_DESIGN_IS_DEFAULT "Make the new Fad design the default, replacing the old one." - FALSE + ON ) TRIBITS_ADD_OPTION_AND_DEFINE( From 98d6064d77413a326dffd999b41c7cee2d77edec Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Fri, 22 May 2020 08:48:56 -0600 Subject: [PATCH 22/86] Return all PT and ST changed packages (#6697) This will fix the Trilinos PR tester to test PR and ST packages. --- commonTools/framework/CMakeLists.txt | 21 ++++++++++++++----- .../get-changed-trilinos-packages.sh | 12 +++++------ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/commonTools/framework/CMakeLists.txt b/commonTools/framework/CMakeLists.txt index ea75d1609f2f..eac21e8591d0 100644 --- a/commonTools/framework/CMakeLists.txt +++ b/commonTools/framework/CMakeLists.txt @@ -63,7 +63,7 @@ TRIBITS_ADD_ADVANCED_TEST( PullRequestLinuxDriverTest_UnitTests FUNCTION(create_get_changed_trilinos_packages_test TEST_POSTFIX FILES_CHANGED CHANGED_PACKAGES_FULL_LIST - CHANGED_PACKAGES_PT_LIST + CHANGED_PACKAGES_ST_LIST EXPECTED_ENABLES_REGEX ) @@ -102,7 +102,7 @@ MOCK_PROGRAM_OUTPUT: ${FILES_CHANGED} ARGS sha1-from sha1-to packagesToEnable.cmake PASS_REGULAR_EXPRESSION_ALL "CHANGED_PACKAGES_FULL_LIST='${CHANGED_PACKAGES_FULL_LIST}'" - "CHANGED_PACKAGES_PT_LIST='${CHANGED_PACKAGES_PT_LIST}'" + "CHANGED_PACKAGES_ST_LIST='${CHANGED_PACKAGES_ST_LIST}'" TEST_2 CMND ${CMAKE_COMMAND} ARGS -P packagesToEnable.cmake @@ -126,11 +126,22 @@ create_get_changed_trilinos_packages_test(ProjectsList_TeuchosCore create_get_changed_trilinos_packages_test(cmake_std_atdm_TeuchosCore_PyTrilinos "cmake/std/atdm/anything\npackages/teuchos/core/CMakeLists.txt\npackages/PyTrilinos/anything" "TrilinosATDMConfigTests,TeuchosCore,PyTrilinos" - "TrilinosATDMConfigTests,TeuchosCore" - "Setting Trilinos_ENABLE_TeuchosCore = ON" + "TrilinosATDMConfigTests,TeuchosCore,PyTrilinos" + "Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_PyTrilinos = ON" ) # The above test ensures that cmake/std/atdm/ changes don't trigger global - # builds and it makes sure that ST packages (PyTrilinos) don't get enabled. + # builds and it makes sure that PT (TeuchosCore) and ST packages + # (PyTrilinos) do get enabled. + + +create_get_changed_trilinos_packages_test(cmake_std_TeuchosCore_PyTrilinos_NewPackage + "packages/teuchos/core/CMakeLists.txt\npackages/PyTrilinos/anything\npackages/new_package/anything" + "TeuchosCore,PyTrilinos,NewPackage" + "TeuchosCore,PyTrilinos" + "Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_PyTrilinos = ON" + ) + # The above test ensures PT (TeuchosCore) andST packages (PyTrilinos) do get + # enabled but EX packages (NewPackage) don't. create_get_changed_trilinos_packages_test(cmake_std_atdm diff --git a/commonTools/framework/get-changed-trilinos-packages.sh b/commonTools/framework/get-changed-trilinos-packages.sh index 86b870e116c7..e7f93781a62e 100755 --- a/commonTools/framework/get-changed-trilinos-packages.sh +++ b/commonTools/framework/get-changed-trilinos-packages.sh @@ -149,14 +149,14 @@ echo "CHANGED_PACKAGES_FULL_LIST='$CHANGED_PACKAGES_FULL_LIST'" echo echo "D) Filter list of changed packages to get only the PT packages" echo -CHANGED_PACKAGES_PT_LIST=`$TRIBITS_DIR/ci_support/filter-packages-list.py \ +CHANGED_PACKAGES_ST_LIST=`$TRIBITS_DIR/ci_support/filter-packages-list.py \ --deps-xml-file=TrilinosPackageDependencies.xml \ --input-packages-list=$CHANGED_PACKAGES_FULL_LIST \ - --keep-test-test-categories=PT` -echo "CHANGED_PACKAGES_PT_LIST='$CHANGED_PACKAGES_PT_LIST'" + --keep-test-test-categories=PT,ST` +echo "CHANGED_PACKAGES_ST_LIST='$CHANGED_PACKAGES_ST_LIST'" echo -echo "E) Generate the *.cmake enables file" +echo "E) Generate the ${CMAKE_PACKAGE_ENABLES_OUT} enables file" echo echo " @@ -166,8 +166,8 @@ MACRO(PR_ENABLE_BOOL VAR_NAME VAR_VAL) ENDMACRO() " > $CMAKE_PACKAGE_ENABLES_OUT -if [ "$CHANGED_PACKAGES_PT_LIST" != "" ] ; then - echo "$CHANGED_PACKAGES_PT_LIST" | sed -n 1'p' | tr ',' '\n' | while read PKG_NAME ; do +if [ "$CHANGED_PACKAGES_ST_LIST" != "" ] ; then + echo "$CHANGED_PACKAGES_ST_LIST" | sed -n 1'p' | tr ',' '\n' | while read PKG_NAME ; do #echo $PKG_NAME echo "PR_ENABLE_BOOL(Trilinos_ENABLE_${PKG_NAME} ON)" >> $CMAKE_PACKAGE_ENABLES_OUT done From af035f13e78fa6271bbdb820bc7fd6b00d2cf6fe Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Fri, 22 May 2020 13:40:26 -0600 Subject: [PATCH 23/86] Automatic snapshot commit from tribits at 52acd38 Origin repo remote tracking branch: 'github/master' Origin repo remote repo URL: 'github = git@github.com:TriBITSPub/TriBITS.git' At commit: commit 52acd38d2c480c86f5924b475073836d476041bc Author: Roscoe A. Bartlett Date: Fri May 22 13:24:22 2020 -0600 Summary: Add get-tribits-packages.py (trilinos/Trilinos#6697) --- .../tribits/ci_support/TribitsDependencies.py | 19 +++- .../ci_support/get-tribits-packages.py | 86 +++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) create mode 100755 cmake/tribits/ci_support/get-tribits-packages.py diff --git a/cmake/tribits/ci_support/TribitsDependencies.py b/cmake/tribits/ci_support/TribitsDependencies.py index 964fd24642ca..f6ff47b6c6b0 100644 --- a/cmake/tribits/ci_support/TribitsDependencies.py +++ b/cmake/tribits/ci_support/TribitsDependencies.py @@ -47,7 +47,6 @@ from FindGeneralScriptSupport import * from GeneralScriptSupport import * - # # Default file locations # @@ -242,6 +241,22 @@ def numPackages(self): return len(self.__packagesList) + def getPackagesNamesList(self, onlyTopLevelPackages=True): + packagesNamesList = [] + for packageDep in self.__packagesList: + #print ("packageDep.packageName = "+packageDep.packageName) + #print ("packageDep.parentPackage = "+packageDep.parentPackage) + if packageDep.parentPackage == "": + addPackage = True + elif not onlyTopLevelPackages: + addPackage = True + else: + addPackage = False + if addPackage: + packagesNamesList.append(packageDep.packageName) + return packagesNamesList + + def packageNameToID(self, packageName): return self.__packagesNameToID.get(packageName, -1) @@ -262,6 +277,7 @@ def getPackageByDir(self, packageDir): return None + # Note: Path must contain ending "/" def getPackageNameFromPath(self, fullPath): for packageDep in self.__packagesList: regexFilePath = packageDep.packageDir+"/" @@ -275,6 +291,7 @@ def getPackageNameFromPath(self, fullPath): # packages because subpackages are listed before packages! + # Returns the paraent package name given a test name def getPackageNameFromTestName(self, testName): for packageDep in self.__packagesList: startTestName = packageDep.packageName+"_" diff --git a/cmake/tribits/ci_support/get-tribits-packages.py b/cmake/tribits/ci_support/get-tribits-packages.py new file mode 100755 index 000000000000..0c3c77fd57aa --- /dev/null +++ b/cmake/tribits/ci_support/get-tribits-packages.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +# @HEADER +# ************************************************************************ +# +# TriBITS: Tribal Build, Integrate, and Test System +# Copyright 2013 Sandia Corporation +# +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ************************************************************************ +# @HEADER + +from FindGeneralScriptSupport import * +from TribitsPackageFilePathUtils import * +from gitdist import addOptionParserChoiceOption + + +# +# Read in the commandline arguments +# + +usageHelp = \ +r"""get-tribits-packages.py --deps-xml-file= \ + --only-top-level-packages=[on|off] + +This script returns a comma-separated list of all of the project's top-level +or packages or the full set of SE packages (i.e. parent and subpackages). +""" + +from optparse import OptionParser + +clp = OptionParser(usage=usageHelp) + +clp.add_option( + "--deps-xml-file", dest="depsXmlFile", type="string", + help="File containing TriBITS-generated XML data-structure the listing"+\ + " of packages, dir names, dependencies, etc.") + +addOptionParserChoiceOption( + "--only-top-level-packages", "onlyTopLevelPackagesStr", + ("on", "off"), 0, + "If 'on', then only top-level packages will be included. If 'off', then"+\ + " top-level and subpackages will be included in the list (in order).", + clp ) + +(options, args) = clp.parse_args() + +if options.onlyTopLevelPackagesStr == "on": + onlyTopLevelPackages = True +else: + onlyTopLevelPackages = False + +trilinosDependencies = getProjectDependenciesFromXmlFile(options.depsXmlFile) + +packagesNamesList = trilinosDependencies.getPackagesNamesList(onlyTopLevelPackages) + +print(','.join(packagesNamesList)) From a28215b12a83539626de92de44aa3665b14deb48 Mon Sep 17 00:00:00 2001 From: Eric Phipps Date: Fri, 22 May 2020 14:14:53 -0600 Subject: [PATCH 24/86] TrilinosCouplings: Fix uninitialized variable use in many examples. All of these examples have the same pattern of using an array of scalars without initializing them: Scalar flux[3] = {0.0, 0.0, 0.0}; ... for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) flux[i] += material[i][j]*grad_u[j]; In this case Scalar = SFad. This worked with the old design, because SFad always zero-initialized its derivative array, but currently the new design does not. It is unclear if the new design should be changed to maintain consistency, but I decided to go ahead and change these examples anyway, because I don't think any reasonable person would expect the code to work correctly as it was written. For most scalar types, including builtin ones, the code wouldn't have worked correctly. --- .../scaling/TrilinosCouplings_EpetraIntrepidPoissonExample.cpp | 2 +- .../TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp | 2 +- .../TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp | 2 +- .../scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp | 2 +- ...TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp | 2 +- packages/trilinoscouplings/examples/scaling/example_Poisson.cpp | 2 +- .../trilinoscouplings/examples/scaling/example_Poisson2D.cpp | 2 +- .../trilinoscouplings/examples/scaling/example_Poisson2D_p2.cpp | 2 +- .../examples/scaling/example_Poisson2D_p2_tpetra.cpp | 2 +- .../trilinoscouplings/examples/scaling/example_Poisson2D_pn.cpp | 2 +- .../examples/scaling/example_Poisson2D_pn_tpetra.cpp | 2 +- .../examples/scaling/example_Poisson_BlockMaterials.cpp | 2 +- .../trilinoscouplings/examples/scaling/example_Poisson_NoFE.cpp | 2 +- .../examples/scaling/example_Poisson_NoFE_Epetra.cpp | 2 +- .../examples/scaling/example_Poisson_NoFE_Tpetra.cpp | 2 +- .../trilinoscouplings/examples/scaling/example_Poisson_stk.cpp | 2 +- .../examples/scaling/example_StabilizedADR.cpp | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_EpetraIntrepidPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_EpetraIntrepidPoissonExample.cpp index 7f5ffbb4b8ab..9709b7f7038a 100644 --- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_EpetraIntrepidPoissonExample.cpp +++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_EpetraIntrepidPoissonExample.cpp @@ -1433,7 +1433,7 @@ sourceTerm (Scalar& x, Scalar& y, Scalar& z) { Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp index 9566e9c847a5..5a04e3936e13 100644 --- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp +++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson2DExample.cpp @@ -1336,7 +1336,7 @@ sourceTerm (Scalar& x, Scalar& y, Scalar& z) { Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp index ce21b6a98efd..3fcd95596071 100644 --- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp +++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidHybridPoisson3DExample.cpp @@ -1338,7 +1338,7 @@ sourceTerm (Scalar& x, Scalar& y, Scalar& z) { Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp index 5c47e5c39e90..9211af969ddb 100644 --- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp +++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidPoissonExample.cpp @@ -1870,7 +1870,7 @@ sourceTerm (Scalar& x, Scalar& y, Scalar& z) { Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp index 06662777134c..e9083969fbca 100644 --- a/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp +++ b/packages/trilinoscouplings/examples/scaling/TrilinosCouplings_TpetraIntrepidStructuredPoissonExample.cpp @@ -1330,7 +1330,7 @@ sourceTerm (Scalar& x, Scalar& y, Scalar& z) { Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson.cpp index 052d12613f26..8556cd536438 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson.cpp @@ -1407,7 +1407,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y, Scalar& z){ Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D.cpp index 7c5ccb803bee..069b0b43f3b7 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D.cpp @@ -1296,7 +1296,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y){ Scalar u; Scalar grad_u[2]; - Scalar flux[2]; + Scalar flux[2] = {0.0, 0.0}; Scalar material[2][2]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2.cpp index c2a27edfc52b..338612bf32a0 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2.cpp @@ -1367,7 +1367,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y){ Scalar u; Scalar grad_u[2]; - Scalar flux[2]; + Scalar flux[2] = {0.0, 0.0}; Scalar material[2][2]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp index c0a68a0cd2b6..5888f9b8e4d9 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_p2_tpetra.cpp @@ -1446,7 +1446,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y){ Scalar u; Scalar grad_u[2]; - Scalar flux[2]; + Scalar flux[2] = {0.0, 0.0}; Scalar material[2][2]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn.cpp index b19ed44d46ef..20974e98b7c4 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn.cpp @@ -1339,7 +1339,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y){ Scalar u; Scalar grad_u[2]; - Scalar flux[2]; + Scalar flux[2] = {0.0, 0.0}; Scalar material[2][2]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp index e98bf01b322e..8a46fdc6d77d 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson2D_pn_tpetra.cpp @@ -1856,7 +1856,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y){ Scalar u; Scalar grad_u[2]; - Scalar flux[2]; + Scalar flux[2] = {0.0, 0.0}; Scalar material[2][2]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson_BlockMaterials.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson_BlockMaterials.cpp index 700b74223ae6..7a9b7838f11a 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson_BlockMaterials.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson_BlockMaterials.cpp @@ -1402,7 +1402,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y, Scalar& z){ Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE.cpp index a88d39d2d629..5167e9393036 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE.cpp @@ -1401,7 +1401,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y, Scalar& z){ Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Epetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Epetra.cpp index dd548bf7ec2f..a2ea04f0c9dc 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Epetra.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Epetra.cpp @@ -1348,7 +1348,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y, Scalar& z){ Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp index 54aa5c6d5bbf..5a0e53ed4752 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson_NoFE_Tpetra.cpp @@ -1459,7 +1459,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y, Scalar& z){ Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_Poisson_stk.cpp b/packages/trilinoscouplings/examples/scaling/example_Poisson_stk.cpp index f81c6a6613be..99395e73ac50 100644 --- a/packages/trilinoscouplings/examples/scaling/example_Poisson_stk.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_Poisson_stk.cpp @@ -1064,7 +1064,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y, Scalar& z){ Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar material[3][3]; Scalar f = 0.; diff --git a/packages/trilinoscouplings/examples/scaling/example_StabilizedADR.cpp b/packages/trilinoscouplings/examples/scaling/example_StabilizedADR.cpp index cf0814517933..d15203cbcca7 100644 --- a/packages/trilinoscouplings/examples/scaling/example_StabilizedADR.cpp +++ b/packages/trilinoscouplings/examples/scaling/example_StabilizedADR.cpp @@ -1281,7 +1281,7 @@ const Scalar sourceTerm(Scalar& x, Scalar& y, Scalar& z){ Scalar u; Scalar grad_u[3]; - Scalar flux[3]; + Scalar flux[3] = {0.0, 0.0, 0.0}; Scalar diffusion[3][3]; Scalar advection[3]; Scalar f = 0.; From ec31123b096add44e52f9b05850aceac8b717bec Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Fri, 22 May 2020 10:31:37 -0600 Subject: [PATCH 25/86] Add support for generating CTEST_LABELS_FOR_SUBPROJECTS (#6697) This will provide the right list of Trilinos top-level TriBITS packages to display on CDash for PR testing. --- commonTools/framework/CMakeLists.txt | 20 ++++- .../get-changed-trilinos-packages.sh | 73 +++++++++++++++++-- 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/commonTools/framework/CMakeLists.txt b/commonTools/framework/CMakeLists.txt index eac21e8591d0..e559ed12d731 100644 --- a/commonTools/framework/CMakeLists.txt +++ b/commonTools/framework/CMakeLists.txt @@ -65,6 +65,7 @@ FUNCTION(create_get_changed_trilinos_packages_test TEST_POSTFIX CHANGED_PACKAGES_FULL_LIST CHANGED_PACKAGES_ST_LIST EXPECTED_ENABLES_REGEX + EXPECTED_CTEST_LABELS_FOR_SUBPROJECTS_REGEX ) SET(TEST_NAME "get_changed_trilinos_packages_test_${TEST_POSTFIX}") @@ -99,7 +100,7 @@ MOCK_PROGRAM_OUTPUT: ${FILES_CHANGED} TEST_1 MESSAGE "Run get-changed-trilinos-packages.sh with mock git" CMND "${CMAKE_CURRENT_SOURCE_DIR}/get-changed-trilinos-packages.sh" - ARGS sha1-from sha1-to packagesToEnable.cmake + ARGS sha1-from sha1-to packagesToEnable.cmake package_subproject_list.cmake PASS_REGULAR_EXPRESSION_ALL "CHANGED_PACKAGES_FULL_LIST='${CHANGED_PACKAGES_FULL_LIST}'" "CHANGED_PACKAGES_ST_LIST='${CHANGED_PACKAGES_ST_LIST}'" @@ -108,6 +109,11 @@ MOCK_PROGRAM_OUTPUT: ${FILES_CHANGED} ARGS -P packagesToEnable.cmake PASS_REGULAR_EXPRESSION_ALL "${EXPECTED_ENABLES_REGEX}" + TEST_3 CMND cat + ARGS package_subproject_list.cmake + PASS_REGULAR_EXPRESSION_ALL + "set[(]CTEST_LABELS_FOR_SUBPROJECTS ${EXPECTED_CTEST_LABELS_FOR_SUBPROJECTS_REGEX}[)]" + ) ENDFUNCTION() @@ -118,27 +124,32 @@ create_get_changed_trilinos_packages_test(ProjectsList_TeuchosCore "ALL_PACKAGES,TeuchosCore" "ALL_PACKAGES,TeuchosCore" "Setting Trilinos_ENABLE_ALL_PACKAGES = ON;Setting Trilinos_ENABLE_TeuchosCore = ON" + "TrilinosFrameworkTests TrilinosATDMConfigTests .*Kokkos Teuchos KokkosKernels .*EpetraExt Tpetra .*Domi Thyra .*SEACAS .*MueLu .*ShyLU_DD ShyLU .*Tempus Stokhos ROL Piro Panzer PyTrilinos Adelus TrilinosCouplings Pike" ) # The above test ensures that it can detect global build files as well as - # package files. + # package files. Above, we check several of the parent packages i Trilinos + # and we want to ensure that no subpackages are listed in the array + # CTEST_LABELS_FOR_SUBPROJECTS. create_get_changed_trilinos_packages_test(cmake_std_atdm_TeuchosCore_PyTrilinos "cmake/std/atdm/anything\npackages/teuchos/core/CMakeLists.txt\npackages/PyTrilinos/anything" "TrilinosATDMConfigTests,TeuchosCore,PyTrilinos" "TrilinosATDMConfigTests,TeuchosCore,PyTrilinos" - "Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_PyTrilinos = ON" + "Setting Trilinos_ENABLE_TrilinosATDMConfigTests = ON;Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_PyTrilinos = ON" + "TrilinosATDMConfigTests TeuchosCore PyTrilinos" ) # The above test ensures that cmake/std/atdm/ changes don't trigger global # builds and it makes sure that PT (TeuchosCore) and ST packages # (PyTrilinos) do get enabled. -create_get_changed_trilinos_packages_test(cmake_std_TeuchosCore_PyTrilinos_NewPackage +create_get_changed_trilinos_packages_test(TeuchosCore_PyTrilinos_NewPackage "packages/teuchos/core/CMakeLists.txt\npackages/PyTrilinos/anything\npackages/new_package/anything" "TeuchosCore,PyTrilinos,NewPackage" "TeuchosCore,PyTrilinos" "Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_PyTrilinos = ON" + "TeuchosCore PyTrilinos" ) # The above test ensures PT (TeuchosCore) andST packages (PyTrilinos) do get # enabled but EX packages (NewPackage) don't. @@ -149,6 +160,7 @@ create_get_changed_trilinos_packages_test(cmake_std_atdm "TrilinosATDMConfigTests" "TrilinosATDMConfigTests" "Setting Trilinos_ENABLE_TrilinosATDMConfigTests = ON" + "TrilinosATDMConfigTests" ) # The above test ensures changes to just cmake/std/atdm/ triggers the enable # of only the TrilinosATDMConfigTests package and does not trigger the diff --git a/commonTools/framework/get-changed-trilinos-packages.sh b/commonTools/framework/get-changed-trilinos-packages.sh index e7f93781a62e..0a20034f66cf 100755 --- a/commonTools/framework/get-changed-trilinos-packages.sh +++ b/commonTools/framework/get-changed-trilinos-packages.sh @@ -42,6 +42,7 @@ GIT_COMMIT_FROM=$1 GIT_COMMIT_TO=$2 CMAKE_PACKAGE_ENABLES_OUT=$3 +CTEST_LABELS_FOR_SUBPROJETS_OUT=$4 if [ "$GIT_COMMIT_FROM" == "" ] ; then echo "ERROR: Must specify first argument !" @@ -58,6 +59,45 @@ if [ "$CMAKE_PACKAGE_ENABLES_OUT" == "" ] ; then exit 1 fi +# +# Functions +# + +function trilinos_filter_packages_to_test() { + $TRIBITS_DIR/ci_support/filter-packages-list.py \ + --deps-xml-file=TrilinosPackageDependencies.xml \ + --input-packages-list="$1" \ + --keep-test-test-categories=PT,ST +} + + +function trilinos_get_all_toplevel_packages() { + $TRIBITS_DIR/ci_support/get-tribits-packages.py \ + --deps-xml-file=TrilinosPackageDependencies.xml +} + +# Zero is success! +function trilinos_contains_all_packages() { + echo "$1" | sed -n 1'p' | tr ',' '\n' | while read PKG_NAME ; do + #echo "PKG_NAME='${PKG_NAME}'" + if [[ "${PKG_NAME}" == "ALL_PACKAGES" ]] ; then + #echo "Contains ALL_PACKAGES!" + return 0 + break + fi + return 1 +done + + +} + + +############################################ +# +# Executable script +# +############################################ + echo echo "***" echo "*** Generating set of Trilinos enables given modified packages from" @@ -149,11 +189,8 @@ echo "CHANGED_PACKAGES_FULL_LIST='$CHANGED_PACKAGES_FULL_LIST'" echo echo "D) Filter list of changed packages to get only the PT packages" echo -CHANGED_PACKAGES_ST_LIST=`$TRIBITS_DIR/ci_support/filter-packages-list.py \ - --deps-xml-file=TrilinosPackageDependencies.xml \ - --input-packages-list=$CHANGED_PACKAGES_FULL_LIST \ - --keep-test-test-categories=PT,ST` -echo "CHANGED_PACKAGES_ST_LIST='$CHANGED_PACKAGES_ST_LIST'" +CHANGED_PACKAGES_ST_LIST=$(trilinos_filter_packages_to_test "${CHANGED_PACKAGES_FULL_LIST}") +echo "CHANGED_PACKAGES_ST_LIST='${CHANGED_PACKAGES_ST_LIST}'" echo echo "E) Generate the ${CMAKE_PACKAGE_ENABLES_OUT} enables file" @@ -176,3 +213,29 @@ else fi echo "Wrote file '$CMAKE_PACKAGE_ENABLES_OUT'" + +echo +echo "F) Generate the ${CTEST_LABELS_FOR_SUBPROJETS_OUT} enables file" +echo + +printf "set(CTEST_LABELS_FOR_SUBPROJECTS" > $CTEST_LABELS_FOR_SUBPROJETS_OUT + +if [[ "$CHANGED_PACKAGES_ST_LIST" != "" ]] ; then + + if trilinos_contains_all_packages "$CHANGED_PACKAGES_ST_LIST"; then + ALL_PACKAGES=$(trilinos_get_all_toplevel_packages) + PR_PACKAGES=$(trilinos_filter_packages_to_test "${ALL_PACKAGES}") + else + PR_PACKAGES="$CHANGED_PACKAGES_ST_LIST" + fi + + echo "$PR_PACKAGES" | sed -n 1'p' | tr ',' '\n' | while read PKG_NAME ; do + #echo $PKG_NAME + printf " ${PKG_NAME}" >> $CTEST_LABELS_FOR_SUBPROJETS_OUT + done + +fi + +echo ")" >> $CTEST_LABELS_FOR_SUBPROJETS_OUT + +echo "Wrote file '$CTEST_LABELS_FOR_SUBPROJETS_OUT'" From 2f46801aa723c63f4d6824792dfc7fb4f521e8c5 Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Fri, 22 May 2020 14:41:56 -0600 Subject: [PATCH 26/86] Use generated package_subproject_list.cmake file (#3272) The generated file will now always be-up-to-date and correct for the PR branch being tested. --- cmake/std/PullRequestLinuxDriverTest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/std/PullRequestLinuxDriverTest.py b/cmake/std/PullRequestLinuxDriverTest.py index 97a13b451dfc..0cf33393a58e 100755 --- a/cmake/std/PullRequestLinuxDriverTest.py +++ b/cmake/std/PullRequestLinuxDriverTest.py @@ -548,7 +548,8 @@ def createPackageEnables(arguments): 'get-changed-trilinos-packages.sh'), os.path.join('origin', arguments.targetBranch), 'HEAD', - 'packageEnables.cmake']) + 'packageEnables.cmake', + 'package_subproject_list.cmake']) else: with open('packageEnables.cmake', 'w') as f_out: f_out.write(dedent('''\ @@ -686,8 +687,7 @@ def run(): 'std', config_script), '-Dpackage_enables=../packageEnables.cmake', - '-Dsubprojects_file=../TFW_single_configure_support_scripts/'+ - 'package_subproject_list.cmake']) + '-Dsubprojects_file=../package_subproject_list.cmake']) return return_value From 242e85649cfde072f5b7f4626f2180030bd4bd06 Mon Sep 17 00:00:00 2001 From: GeoffDanielson Date: Fri, 22 May 2020 15:39:58 -0600 Subject: [PATCH 27/86] Update Tpetra_Details_Behavior.hpp exceed->does not exceed --- packages/tpetra/core/src/Tpetra_Details_Behavior.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_Behavior.hpp b/packages/tpetra/core/src/Tpetra_Details_Behavior.hpp index 20dd43c9e40f..1bc24215d64e 100644 --- a/packages/tpetra/core/src/Tpetra_Details_Behavior.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_Behavior.hpp @@ -202,7 +202,7 @@ class Behavior { /// \brief the threshold for transitioning from device to host /// - /// If the number of elements in the multivector exceeds this + /// If the number of elements in the multivector does not exceed this /// threshold and the data is on host, then run the calculation on /// host. Otherwise, run on device. /// By default this is 10000, but may be altered by the environment From 346f9ab299eef694058d8e687fea4805917319be Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Fri, 22 May 2020 16:21:07 -0600 Subject: [PATCH 28/86] MueLu Utilities: Add Kokkos version of ApplyOAZToMatrixRows --- .../src/Utils/MueLu_UtilitiesBase_decl.hpp | 23 +++++--- .../src/Utils/MueLu_Utilities_kokkos_decl.hpp | 4 ++ .../src/Utils/MueLu_Utilities_kokkos_def.hpp | 58 +++++++++++++++++++ 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp index 5bb15ec566b5..79683bb04759 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp @@ -852,7 +852,7 @@ namespace MueLu { // Takes a vector of row indices static void ApplyOAZToMatrixRows(Teuchos::RCP >& A, const std::vector& dirichletRows) { - RCP Rmap = A->getColMap(); + RCP Rmap = A->getRowMap(); RCP Cmap = A->getColMap(); Scalar one =Teuchos::ScalarTraits::one(); Scalar zero =Teuchos::ScalarTraits::zero(); @@ -878,11 +878,15 @@ namespace MueLu { // Takes a Boolean array. static void ApplyOAZToMatrixRows(Teuchos::RCP >& A, const Teuchos::ArrayRCP& dirichletRows) { - RCP Rmap = A->getColMap(); + TEUCHOS_ASSERT(A->isFillComplete()); + RCP domMap = A->getDomainMap(); + RCP ranMap = A->getRangeMap(); + RCP Rmap = A->getRowMap(); RCP Cmap = A->getColMap(); - Scalar one =Teuchos::ScalarTraits::one(); - Scalar zero =Teuchos::ScalarTraits::zero(); - + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == Rmap->getNodeNumElements()); + const Scalar one = Teuchos::ScalarTraits::one(); + const Scalar zero = Teuchos::ScalarTraits::zero(); + A->resumeFill(); for(size_t i=0; i<(size_t) dirichletRows.size(); i++) { if (dirichletRows[i]){ GlobalOrdinal row_gid = Rmap->getGlobalElement(i); @@ -890,16 +894,18 @@ namespace MueLu { Teuchos::ArrayView indices; Teuchos::ArrayView values; A->getLocalRowView(i,indices,values); - // NOTE: This won't work with fancy node types. - Scalar* valuesNC = const_cast(values.getRawPtr()); + + Teuchos::ArrayRCP valuesNC(values.size()); for(size_t j=0; j<(size_t)indices.size(); j++) { if(Cmap->getGlobalElement(indices[j])==row_gid) valuesNC[j]=one; else valuesNC[j]=zero; } + A->replaceLocalValues(i,indices,valuesNC()); } } + A->fillComplete(domMap, ranMap); } // Zeros out rows @@ -923,6 +929,7 @@ namespace MueLu { static void ZeroDirichletRows(Teuchos::RCP >& A, const Teuchos::ArrayRCP& dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits::zero()) { + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == A->getRowMap()->getNodeNumElements()); for(size_t i=0; i<(size_t) dirichletRows.size(); i++) { if (dirichletRows[i]) { Teuchos::ArrayView indices; @@ -941,6 +948,7 @@ namespace MueLu { static void ZeroDirichletRows(Teuchos::RCP >& X, const Teuchos::ArrayRCP& dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits::zero()) { + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == X->getMap()->getNodeNumElements()); for(size_t i=0; i<(size_t) dirichletRows.size(); i++) { if (dirichletRows[i]) { for(size_t j=0; jgetNumVectors(); j++) @@ -954,6 +962,7 @@ namespace MueLu { static void ZeroDirichletCols(Teuchos::RCP& A, const Teuchos::ArrayRCP& dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits::zero()) { + TEUCHOS_ASSERT(static_cast(dirichletCols.size()) == A->getColMap()->getNodeNumElements()); for(size_t i=0; igetNodeNumRows(); i++) { Teuchos::ArrayView indices; Teuchos::ArrayView values; diff --git a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp index bf7ee2dec152..5fc84d4848aa 100644 --- a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp @@ -304,6 +304,8 @@ namespace MueLu { */ static RCP > CuthillMcKee(const Matrix &Op); + static void ApplyOAZToMatrixRows(RCP& A, const Kokkos::View& dirichletRows); + }; // class Utils @@ -716,6 +718,8 @@ namespace MueLu { */ static RCP > CuthillMcKee(const Matrix &Op); + static void ApplyOAZToMatrixRows(RCP& A, const Kokkos::View& dirichletRows); + }; // class Utilities (specialization SC=double LO=GO=int) diff --git a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_def.hpp b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_def.hpp index a39e17cc030f..8144de4b0ff2 100644 --- a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_def.hpp +++ b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_def.hpp @@ -729,6 +729,64 @@ namespace MueLu { return MueLu::CuthillMcKee(Op); } + // Applies Ones-and-Zeros to matrix rows + // Takes a Boolean array. + template + void + ApplyOAZToMatrixRows(Teuchos::RCP >& A, + const Kokkos::View& dirichletRows) { + TEUCHOS_ASSERT(A->isFillComplete()); + using ATS = Kokkos::ArithTraits; + using impl_ATS = Kokkos::ArithTraits; + using range_type = Kokkos::RangePolicy; + + RCP > domMap = A->getDomainMap(); + RCP > ranMap = A->getRangeMap(); + RCP > Rmap = A->getRowMap(); + RCP > Cmap = A->getColMap(); + + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == Rmap->getNodeNumElements()); + + const Scalar one = impl_ATS::one(); + const Scalar zero = impl_ATS::zero(); + + auto localMatrix = A->getLocalMatrix(); + auto localRmap = Rmap->getLocalMap(); + auto localCmap = Cmap->getLocalMap(); + + Kokkos::parallel_for("MueLu::Utils::ApplyOAZ",range_type(0,dirichletRows.extent(0)), + KOKKOS_LAMBDA(const LocalOrdinal row) { + if (dirichletRows(row)){ + auto rowView = localMatrix.row(row); + auto length = rowView.length; + auto row_gid = localRmap.getGlobalElement(row); + auto row_lid = localCmap.getLocalElement(row_gid); + + for (decltype(length) colID = 0; colID < length; colID++) + if (rowView.colidx(colID) == row_lid) + rowView.value(colID) = one; + else + rowView.value(colID) = zero; + } + }); + } + + template + void + Utilities_kokkos:: + ApplyOAZToMatrixRows(Teuchos::RCP >& A, + const Kokkos::View& dirichletRows) { + MueLu::ApplyOAZToMatrixRows(A, dirichletRows); + } + + template + void + Utilities_kokkos:: + ApplyOAZToMatrixRows(Teuchos::RCP >& A, + const Kokkos::View& dirichletRows) { + MueLu::ApplyOAZToMatrixRows(A, dirichletRows); + } + } //namespace MueLu #define MUELU_UTILITIES_KOKKOS_SHORT From b04428a4c0de8782a4c2d2490a870a1a21852e2f Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 22 May 2020 17:47:30 -0600 Subject: [PATCH 29/86] ROL: add missing calls to update functions --- packages/rol/src/function/constraint/ROL_ConstraintDef.hpp | 1 + packages/rol/src/step/ROL_AugmentedLagrangianStep.hpp | 1 + .../rol/src/step/augmentedlagrangian/ROL_QuadraticPenalty.hpp | 1 + 3 files changed, 3 insertions(+) diff --git a/packages/rol/src/function/constraint/ROL_ConstraintDef.hpp b/packages/rol/src/function/constraint/ROL_ConstraintDef.hpp index 9e9645f22257..22e75d8a810b 100644 --- a/packages/rol/src/function/constraint/ROL_ConstraintDef.hpp +++ b/packages/rol/src/function/constraint/ROL_ConstraintDef.hpp @@ -592,6 +592,7 @@ Real Constraint::checkAdjointConsistencyJacobian(const Vector &w, ROL::Ptr > Jv = dualw.clone(); ROL::Ptr > Jw = dualv.clone(); + this->update(x); applyJacobian(*Jv,v,x,tol); applyAdjointJacobian(*Jw,w,x,tol); diff --git a/packages/rol/src/step/ROL_AugmentedLagrangianStep.hpp b/packages/rol/src/step/ROL_AugmentedLagrangianStep.hpp index b32976ff1817..9d29feb3ae6a 100644 --- a/packages/rol/src/step/ROL_AugmentedLagrangianStep.hpp +++ b/packages/rol/src/step/ROL_AugmentedLagrangianStep.hpp @@ -450,6 +450,7 @@ class AugmentedLagrangianStep : public Step { algo_state.snorm = s.norm(); algo_state.iter++; // Update objective function value + obj.update(x); algo_state.value = augLag.getObjectiveValue(x); // Update constraint value augLag.getConstraintVec(*(state->constraintVec),x); diff --git a/packages/rol/src/step/augmentedlagrangian/ROL_QuadraticPenalty.hpp b/packages/rol/src/step/augmentedlagrangian/ROL_QuadraticPenalty.hpp index 29dd5f426013..b9504bc34ebf 100644 --- a/packages/rol/src/step/augmentedlagrangian/ROL_QuadraticPenalty.hpp +++ b/packages/rol/src/step/augmentedlagrangian/ROL_QuadraticPenalty.hpp @@ -180,6 +180,7 @@ class QuadraticPenalty : public Objective { virtual void hessVec( Vector &hv, const Vector &v, const Vector &x, Real &tol ) { // Apply objective Hessian to a vector if (HessianApprox_ < 3) { + con_->update(x); con_->applyJacobian(*primalConVector_,v,x,tol); con_->applyAdjointJacobian(hv,primalConVector_->dual(),x,tol); if (!useScaling_) { From f37c63bfd7cb9ace5c77cf973670a93d35532d33 Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Thu, 21 May 2020 09:54:12 -0600 Subject: [PATCH 30/86] Intrepid2: Changes to Projection Tools - made dependency on KokkosKernels optional - improved solution of element projections by re-using matrix factorization when possible --- packages/intrepid2/cmake/Dependencies.cmake | 4 +- .../intrepid2/cmake/Intrepid2_config.h.in | 3 + .../Projection/Intrepid2_ProjectionTools.hpp | 331 ++++++++++++++---- .../Intrepid2_ProjectionToolsDefHCURL.hpp | 14 +- .../Intrepid2_ProjectionToolsDefHDIV.hpp | 9 +- .../Intrepid2_ProjectionToolsDefHGRAD.hpp | 14 +- .../Intrepid2_ProjectionToolsDefHVOL.hpp | 5 +- .../Intrepid2_ProjectionToolsDefL2.hpp | 12 +- 8 files changed, 290 insertions(+), 102 deletions(-) diff --git a/packages/intrepid2/cmake/Dependencies.cmake b/packages/intrepid2/cmake/Dependencies.cmake index 91ac866a6fe9..fdf06cf1d4d1 100644 --- a/packages/intrepid2/cmake/Dependencies.cmake +++ b/packages/intrepid2/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ -SET(LIB_REQUIRED_DEP_PACKAGES TeuchosCore TeuchosNumerics Shards KokkosCore KokkosContainers KokkosAlgorithms KokkosKernels) -SET(LIB_OPTIONAL_DEP_PACKAGES Sacado) +SET(LIB_REQUIRED_DEP_PACKAGES TeuchosCore TeuchosNumerics Shards KokkosCore KokkosContainers KokkosAlgorithms) +SET(LIB_OPTIONAL_DEP_PACKAGES Sacado KokkosKernels) SET(TEST_REQUIRED_DEP_PACKAGES) SET(TEST_OPTIONAL_DEP_PACKAGES Sacado) SET(LIB_REQUIRED_DEP_TPLS) diff --git a/packages/intrepid2/cmake/Intrepid2_config.h.in b/packages/intrepid2/cmake/Intrepid2_config.h.in index d6e6614728e6..7e4f3ff2d41e 100644 --- a/packages/intrepid2/cmake/Intrepid2_config.h.in +++ b/packages/intrepid2/cmake/Intrepid2_config.h.in @@ -7,3 +7,6 @@ /* Define if want to build with sacado enabled */ #cmakedefine HAVE_INTREPID2_SACADO +/* Define if want to build with KokkosKernels enabled */ +#cmakedefine HAVE_INTREPID2_KOKKOSKERNELS + diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp index 389d28b91e55..426fbd53e582 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionTools.hpp @@ -102,10 +102,12 @@ #include "Intrepid2_ProjectionStruct.hpp" +#ifdef HAVE_INTREPID2_KOKKOSKERNELS #include "KokkosBatched_QR_Serial_Internal.hpp" #include "KokkosBatched_ApplyQ_Serial_Internal.hpp" #include "KokkosBatched_Trsv_Serial_Internal.hpp" #include "KokkosBatched_Util.hpp" +#endif namespace Intrepid2 { @@ -504,94 +506,277 @@ class ProjectionTools { ProjectionStruct * projStruct); - - /** \brief Functor to solve a square system A x = b on each cell using QR method implemented in KokkosKernels + /** \brief Class to solve a square system A x = b on each cell A is expected to be saddle a point (KKT) matrix of the form [C B; B^T 0], where C has size nxn and B nxm, with n>0, m>=0. B^T is copied from B, so one does not have to define the B^T portion of A. b will contain the solution x. The first n-entries of x are copied into the provided basis coefficients using the provided indexing. + The system is solved either with a QR factorization implemented in KokkosKernels or + with Lapack GELS function. */ - template - struct SolveSystem { - ViewType1 basisCoeffs_; // rank-2 view (C,F) containing the basis coefficients on each cell - ViewType2 elemMat_; // rank-3 view (C,P,P) containing the element matrix on each cell - ViewType2 elemRhs_; // rank-2 view (C,P) containing the element rhs on each cell - ViewType2 tau_; // rank-2 view (C,P) used to store the QR factorization - ViewType3 w_; // rank-2 view (C,P) used has a workspace (needs to be of Layout Right) + struct ElemSystem { - const ViewType4 elemDof_; // rank-1 view having dimension n, containing the basis numbering - ordinal_type n_, m_; // basis cardinality and dimension of the constraint of the KKT system + + std::string systemName_; + bool matrixIndependentOfCell_; /** \brief Functor constructor + \param systemName [in] - string containing the name of the system (passed to parallel for) + \param matrixIndependentOfCell [in] - bool: whether the local cell matrix of the system changes from cell to cell + if true, the matrix factorization is preformed only on the first cell + and reused on other cells. + */ + + ElemSystem (std::string systemName, bool matrixIndependentOfCell) : + systemName_(systemName), matrixIndependentOfCell_(matrixIndependentOfCell){}; + + + + /** \brief Solve the system and returns the basis coefficients + solve the system either using Kokkos Kernel QR or Lapack GELS + depending on whether Kokkos Kernel is enabled. + + \code + C - num. cells + P - num. evaluation points + \endcode + + + \param basisCoeffs [out] - rank-2 view (C,F) containing the basis coefficients + \param elemMat [in/out] - rank-3 view (C,P,P) containing the element matrix of size + numCells x (n+m)x(n+m) on each cell + it will be overwritten. + \param elemRhs [in/out] - rank-2 view (C,P) containing the element rhs on each cell + of size numCells x (n+m) + it will contain the solution of the system on output + \param tau [out] - rank-2 view (C,P) used to store the QR factorization + size: numCells x (n+m) + \param w [out] - rank-2 view (C,P) used has a workspace + Layout Right, size: numCells x (n+m) + \param elemDof [in] - rank-1 view having dimension n, containing the basis numbering + \param n [in] - ordinal_type, basis cardinality + \param m [in] - ordinal_type, dimension of the constraint of the KKT system + */ + template + void solve(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 tau, + ViewType3 w,const ViewType4 elemDof, ordinal_type n, ordinal_type m=0) { +#ifdef HAVE_INTREPID2_KOKKOSKERNELS + solveParallel(basisCoeffs, elemMat, elemRhs, tau, + w, elemDof, n, m); +#else + solveSerial(basisCoeffs, elemMat, elemRhs, tau, + w, elemDof, n, m); +#endif + + } + + /** \brief Parallel implementation of solve, using Kokkos Kernels QR factoriation + */ +#ifdef HAVE_INTREPID2_KOKKOSKERNELS + template + void solveParallel(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 taul, + ViewType3 work,const ViewType4 elemDof, ordinal_type n, ordinal_type m) { + + ordinal_type numCells = basisCoeffs.extent(0); + + if(matrixIndependentOfCell_) { + auto A0 = Kokkos::subview(elemMat, 0, Kokkos::ALL(), Kokkos::ALL()); + auto tau0 = Kokkos::subview(taul, 0, Kokkos::ALL()); + + auto A0_host = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), A0); + auto tau0_host = Kokkos::create_mirror_view(typename ExecSpaceType::memory_space(), tau0); + + + for(ordinal_type i=n; i (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { + auto w = Kokkos::subview(work, ic, Kokkos::ALL()); + + auto b = Kokkos::subview(elemRhs, ic, Kokkos::ALL()); + + //b'*Q0 -> b + KokkosBatched::SerialApplyQ_RightNoTransForwardInternal::invoke( + 1, A0.extent(0), A0.extent(1), + A0.data(), A0.stride_0(), A0.stride_1(), + tau0.data(), tau0.stride_0(), + b.data(), 1, b.stride_0(), + w.data()); + + // R0^{-1} b -> b + KokkosBatched::SerialTrsvInternalUpper::invoke(false, + A0.extent(0), + 1.0, + A0.data(), A0.stride_0(), A0.stride_1(), + b.data(), b.stride_0()); + + //scattering b into the basis coefficients + for(ordinal_type i=0; i (0, numCells), + KOKKOS_LAMBDA (const size_t ic) { + + auto A = Kokkos::subview(elemMat, ic, Kokkos::ALL(), Kokkos::ALL()); + auto tau = Kokkos::subview(taul, ic, Kokkos::ALL()); + auto w = Kokkos::subview(work, ic, Kokkos::ALL()); + + for(ordinal_type i=n; i b + KokkosBatched::SerialApplyQ_RightNoTransForwardInternal::invoke( + 1, A.extent(0), A.extent(1), + A.data(), A.stride_0(), A.stride_1(), + tau.data(), tau.stride_0(), + b.data(), 1, b.stride_0(), + w.data()); + + // R^{-1} b -> b + KokkosBatched::SerialTrsvInternalUpper::invoke(false, + A.extent(0), + 1.0, + A.data(), A.stride_0(), A.stride_1(), + b.data(), b.stride_0()); + + //scattering b into the basis coefficients + for(ordinal_type i=0; i b - KokkosBatched::SerialApplyQ_RightNoTransForwardInternal::invoke( - 1, A.extent(0), A.extent(1), - A.data(), A.stride_0(), A.stride_1(), - tau.data(), tau.stride_0(), - b.data(), 1, b.stride_0(), - w.data()); - - // R^{-1} b -> b - KokkosBatched::SerialTrsvInternalUpper::invoke(false, - A.extent(0), - 1.0, - A.data(), A.stride_0(), A.stride_1(), - b.data(), b.stride_0()); - - //scattering b into the basis coefficients - for(ordinal_type i=0; i + void solveSerial(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 , + ViewType3, const ViewType4 elemDof, ordinal_type n, ordinal_type m) { + using valueType = typename ViewType2::value_type; + using host_space_type = typename Kokkos::Impl::is_space::host_mirror_space::execution_space; + Kokkos::View + serialElemMat("serialElemMat", n+m, n+m); + Teuchos::LAPACK lapack_; + ordinal_type numCells = basisCoeffs.extent(0); + + if(matrixIndependentOfCell_) { + ViewType2 elemRhsTrans("transRhs", elemRhs.extent(1), elemRhs.extent(0)); + Kokkos::View + pivVec("pivVec", m+n + std::max(m+n, numCells), 1); + + Kokkos::View serialElemRhs("serialElemRhs", n+m, numCells); + + auto A = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), + Kokkos::subview(elemMat, 0, Kokkos::ALL(), Kokkos::ALL())); + auto b = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), elemRhs); + + auto serialBasisCoeffs = Kokkos::create_mirror_view_and_copy( + typename ExecSpaceType::memory_space(), basisCoeffs); + + for(ordinal_type i=0; i pivVec("pivVec", 2*(m+n), 1); + Kokkos::View serialElemRhs("serialElemRhs", n+m, 1 ); + for (ordinal_type ic = 0; ic < numCells; ic++) { + auto A = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), + Kokkos::subview(elemMat, ic, Kokkos::ALL(), Kokkos::ALL())); + auto b = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), + Kokkos::subview(elemRhs, ic, Kokkos::ALL())); + auto basisCoeffs_ = Kokkos::subview(basisCoeffs, ic, Kokkos::ALL()); + auto serialBasisCoeffs = Kokkos::create_mirror_view_and_copy(typename ExecSpaceType::memory_space(), + basisCoeffs_); + + Kokkos::deep_copy(serialElemMat,valueType(0)); //LAPACK might overwrite the matrix + + for(ordinal_type i=0; i>> ERROR (Intrepid::ProjectionTools::getBasisCoeffs): " + << "LAPACK return with error code: " + << info; + INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); + } + + for(ordinal_type i=0; i::getHCurlBasisCoeffs(Kokkos::DynRankView functorTypeCellSys; - Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDofs, edgeCardinality, 1)); + ElemSystem edgeSystem("edgeSystem", false); + edgeSystem.solve(basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDofs, edgeCardinality, 1); for(ordinal_type i=0; igetDofOrdinal(edgeDim, ie, i); @@ -668,7 +668,6 @@ ProjectionTools::getHCurlBasisCoeffs(Kokkos::DynRankView>> ERROR (Intrepid2::ProjectionTools::getHCurlBasisCoeffs): " << "Method not implemented for basis " << name; INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - return; } { @@ -772,8 +771,8 @@ ProjectionTools::getHCurlBasisCoeffs(Kokkos::DynRankView functorTypeCellSys; - Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDofs, numFaceDofs, hgradCardinality)); + ElemSystem faceSystem( "faceSystem", false); + faceSystem.solve(basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDofs, numFaceDofs, hgradCardinality); for(ordinal_type i=0; igetDofOrdinal(faceDim, iface, i); @@ -796,7 +795,6 @@ ProjectionTools::getHCurlBasisCoeffs(Kokkos::DynRankView>> ERROR (Intrepid2::ProjectionTools::getHCurlBasisCoeffs): " << "Method not implemented for basis " << name; INTREPID2_TEST_FOR_EXCEPTION( true, std::runtime_error, ss.str().c_str() ); - return; } range_type cellPointsRange = targetEPointsRange(dim, 0); @@ -871,8 +869,8 @@ ProjectionTools::getHCurlBasisCoeffs(Kokkos::DynRankView functorTypeCellSys; - Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numCellDofs, hgradCardinality)); + ElemSystem cellSystem( "cellSystem", true); + cellSystem.solve(basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numCellDofs, hgradCardinality); delete hgradBasis; } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHDIV.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHDIV.hpp index d155ef3866d5..66f74efd33a8 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHDIV.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHDIV.hpp @@ -455,8 +455,8 @@ ProjectionTools::getHDivBasisCoeffs(Kokkos::DynRankView functorType; - Kokkos::parallel_for(policy, functorType( basisCoeffs, sideMassMat_, sideRhsMat_, t_, w_, sideDof, sideCardinality, 1)); + ElemSystem sideSystem("sideSystem", false); + sideSystem.solve(basisCoeffs, sideMassMat_, sideRhsMat_, t_, w_, sideDof, sideCardinality, 1); } @@ -506,6 +506,7 @@ ProjectionTools::getHDivBasisCoeffs(Kokkos::DynRankViewgetCardinality(); ordinal_type numCurlInteriorDOFs = hcurlBasis->getDofCount(dim,0); @@ -561,8 +562,8 @@ ProjectionTools::getHDivBasisCoeffs(Kokkos::DynRankView functorTypeCellSys; - Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, massMat_, rhsMatTrans, t_, w_, cellDofs, numCellDofs, numCurlInteriorDOFs)); + ElemSystem cellSystem("cellSystem", true); + cellSystem.solve(basisCoeffs, massMat_, rhsMatTrans, t_, w_, cellDofs, numCellDofs, numCurlInteriorDOFs); } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHGRAD.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHGRAD.hpp index a171f8d2e50b..5a6f0ee0526e 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHGRAD.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHGRAD.hpp @@ -564,8 +564,9 @@ ProjectionTools::getHGradBasisCoeffs(Kokkos::DynRankView functorTypeCellSys; - Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDofs, edgeCardinality)); + + ElemSystem edgeSystem("edgeSystem", false); + edgeSystem.solve(basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDofs, edgeCardinality); for(ordinal_type i=0; i::getHGradBasisCoeffs(Kokkos::DynRankView functorTypeCellSys; - Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDofs, faceCardinality)); + + ElemSystem faceSystem("faceSystem", false); + faceSystem.solve(basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDofs, faceCardinality); for(ordinal_type i=0; i::getHGradBasisCoeffs(Kokkos::DynRankView functorTypeCellSys; - Kokkos::parallel_for(policy, functorTypeCellSys( basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numElemDofs)); + ElemSystem cellSystem("cellSystem", true); + cellSystem.solve(basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numElemDofs); } } } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHVOL.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHVOL.hpp index 3245dc4d02fd..eb9bfa6bcc12 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHVOL.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefHVOL.hpp @@ -145,9 +145,8 @@ ProjectionTools::getHVolBasisCoeffs(Kokkos::DynRankView policy(0, numCells); - typedef SolveSystem functorType; - Kokkos::parallel_for(policy, functorType( basisCoeffs, massMat, rhsMat, t_, w_, cellDofs, basisCardinality)); + ElemSystem cellSystem("cellSystem", true); + cellSystem.solve(basisCoeffs, massMat, rhsMat, t_, w_, cellDofs, basisCardinality); } } diff --git a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefL2.hpp b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefL2.hpp index 876fb4a4dc8f..c69007800144 100644 --- a/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefL2.hpp +++ b/packages/intrepid2/src/Projection/Intrepid2_ProjectionToolsDefL2.hpp @@ -591,8 +591,8 @@ ProjectionTools::getL2BasisCoeffs(Kokkos::DynRankView functorType; - Kokkos::parallel_for(policy, functorType( basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDof, edgeCardinality)); + ElemSystem edgeSystem("edgeSystem", false); + edgeSystem.solve(basisCoeffs, edgeMassMat_, edgeRhsMat_, t_, w_, edgeDof, edgeCardinality); } ScalarViewType ortJacobian_("ortJacobian", numCells, faceDim, faceDim); @@ -663,8 +663,8 @@ ProjectionTools::getL2BasisCoeffs(Kokkos::DynRankView functorType; - Kokkos::parallel_for(policy, functorType( basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDof, faceCardinality)); + ElemSystem faceSystem("faceSystem", false); + faceSystem.solve(basisCoeffs, faceMassMat_, faceRhsMat_, t_, w_, faceDof, faceCardinality); } ordinal_type numElemDofs = cellBasis->getDofCount(dim,0); @@ -710,8 +710,8 @@ ProjectionTools::getL2BasisCoeffs(Kokkos::DynRankView functorType2; - Kokkos::parallel_for(policy, functorType2( basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numElemDofs)); + ElemSystem cellSystem("cellSystem", true); + cellSystem.solve(basisCoeffs, cellMassMat_, cellRhsMat_, t_, w_, cellDofs, numElemDofs); } } } From 59dbde0b8584d31a72f1cee8eb310279647cffb5 Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 22 May 2020 17:50:29 -0600 Subject: [PATCH 31/86] ROL: Changes to Thyra Adapters - improved performance by carefully avoiding recomputing objects - output messages useful for debug purposes - added ASSERTS in debug mode to ensure parameters are properly updated --- .../ROL_ThyraProductME_Constraint_SimOpt.hpp | 468 ++++++++++++------ .../function/ROL_ThyraProductME_Objective.hpp | 82 ++- .../ROL_ThyraProductME_Objective_SimOpt.hpp | 177 +++++-- 3 files changed, 533 insertions(+), 194 deletions(-) diff --git a/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Constraint_SimOpt.hpp b/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Constraint_SimOpt.hpp index da6a5e0ae984..087daa2f8012 100644 --- a/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Constraint_SimOpt.hpp +++ b/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Constraint_SimOpt.hpp @@ -52,6 +52,7 @@ #include "ROL_Constraint_SimOpt.hpp" #include "Tpetra_CrsMatrix.hpp" +#include "Teuchos_VerbosityLevel.hpp" using namespace ROL; @@ -61,16 +62,22 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { public: - ThyraProductME_Constraint_SimOpt(Thyra::ModelEvaluatorDefaultBase& thyra_model_, int g_index_, const std::vector& p_indices_,Teuchos::RCP params_ = Teuchos::null, bool recompute = false) : - thyra_model(thyra_model_), g_index(g_index_), p_indices(p_indices_), params(params_) { + ThyraProductME_Constraint_SimOpt(Thyra::ModelEvaluatorDefaultBase& thyra_model_, int g_index_, const std::vector& p_indices_, + Teuchos::RCP params_ = Teuchos::null, Teuchos::EVerbosityLevel verbLevel= Teuchos::VERB_HIGH) : + thyra_model(thyra_model_), g_index(g_index_), p_indices(p_indices_), params(params_), + out(Teuchos::VerboseObjectBase::getDefaultOStream()), + verbosityLevel(verbLevel){ thyra_solver = Teuchos::null; - updateValue = updateJacobian1 = true; - value_ = 0; + computeValue = computeJacobian1 = solveConstraint = true; num_responses = -1; - x_ptr = Teuchos::null; - grad_ptr = Teuchos::null; - if(params != Teuchos::null) + value_ptr_ = Teuchos::null; + rol_u_ptr = Teuchos::null; + rol_z_ptr = Teuchos::null; + jac1 = Teuchos::null; + if(params != Teuchos::null) { params->set("Optimizer Iteration Number", -1); + params->set > >("Optimization Variable", Teuchos::null); + } }; void setExternalSolver(Teuchos::RCP> thyra_solver_) { @@ -82,55 +89,83 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { } void value(Vector &c, const Vector &u, const Vector &z, Real &tol) { +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling value + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif - const ThyraVector & thyra_p = dynamic_cast&>(z); - const ThyraVector & thyra_x = dynamic_cast&>(u); - ThyraVector & thyra_f = dynamic_cast&>(c); - Teuchos::RCP > thyra_prodvec_p = Teuchos::rcp_dynamic_cast>(thyra_p.getVector()); - - Thyra::ModelEvaluatorBase::InArgs inArgs = thyra_model.createInArgs(); - Thyra::ModelEvaluatorBase::OutArgs outArgs = thyra_model.createOutArgs(); - - outArgs.set_f(thyra_f.getVector()); - for(std::size_t i=0; igetVectorBlock(i)); - inArgs.set_x(thyra_x.getVector()); - - thyra_model.evalModel(inArgs, outArgs); - + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Constraint_SimOpt::value" << std::endl; - /* { - const ThyraVector & thyra_p = dynamic_cast&>(z); - const ThyraVector & thyra_x = dynamic_cast&>(u); - const ThyraVector & thyra_f = dynamic_cast&>(c); + if(!computeValue) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Constraint_SimOpt::value, Skipping Value Computation" << std::endl; + TEUCHOS_ASSERT(Teuchos::nonnull(value_ptr_)); + c.set(*value_ptr_); + } + else { + const ThyraVector & thyra_p = dynamic_cast&>(z); + const ThyraVector & thyra_x = dynamic_cast&>(u); + ThyraVector & thyra_f = dynamic_cast&>(c); + Teuchos::RCP > thyra_prodvec_p = Teuchos::rcp_dynamic_cast>(thyra_p.getVector()); - Thyra::ConstDetachedVectorView x_view(thyra_x.getVector()); - Thyra::ConstDetachedVectorView p_view(thyra_p.getVector()); - Thyra::ConstDetachedVectorView f_view(thyra_f.getVector()); + Thyra::ModelEvaluatorBase::InArgs inArgs = thyra_model.createInArgs(); + Thyra::ModelEvaluatorBase::OutArgs outArgs = thyra_model.createOutArgs(); - std::cout << "\nEnd of value... x:" << " "; - for (std::size_t i=0; igetVectorBlock(i)); + inArgs.set_x(thyra_x.getVector()); - std::cout << "Norm: " << c.norm() < & thyra_p = dynamic_cast&>(z); + const ThyraVector & thyra_x = dynamic_cast&>(u); + const ThyraVector & thyra_f = dynamic_cast&>(c); + + Thyra::ConstDetachedVectorView x_view(thyra_x.getVector()); + Thyra::ConstDetachedVectorView p_view(thyra_p.getVector()); + Thyra::ConstDetachedVectorView f_view(thyra_f.getVector()); + + std::cout << "\nEnd of value... x:" << " "; + for (std::size_t i=0; iset(c); - updateValue = false; + computeValue = false; + } } void applyJacobian_1(Vector &jv, const Vector &v, const Vector &u, const Vector &z, Real &tol) { - if(updateJacobian1) { + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyJacobian_1" << std::endl; + +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling applyJacobian_1 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif + + if(computeJacobian1) { // Create Jacobian const ThyraVector & thyra_x = dynamic_cast&>(u); const ThyraVector & thyra_p = dynamic_cast&>(z); @@ -150,7 +185,11 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { outArgs.set_W_op(lop); thyra_model.evalModel(inArgs, outArgs); jac1 = lop; - updateJacobian1 = false; + + computeJacobian1 = false; + } else { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyJacobian_1, Skipping Jacobian Computation" << std::endl; } const ThyraVector & thyra_v = dynamic_cast&>(v); @@ -161,7 +200,15 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { void applyJacobian_2(Vector &jv, const Vector &v, const Vector &u, const Vector &z, Real &tol) { - //std::cout << "Jacobian 2: " << tol <= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyJacobian_2" << std::endl; + +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling applyJacobian_1 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif const ThyraVector & thyra_p = dynamic_cast&>(z); const ThyraVector & thyra_x = dynamic_cast&>(u); @@ -181,59 +228,56 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { auto p_space = thyra_model.get_p_space(i); auto f_space = thyra_model.get_f_space(); - int num_params = p_space->dim(); - int num_resids = f_space->dim(); auto p_space_plus = Teuchos::rcp_dynamic_cast>(p_space); auto f_space_plus = Teuchos::rcp_dynamic_cast>(f_space); - bool p_dist = !p_space_plus->isLocallyReplicated();//p_space->DistributedGlobal(); - bool f_dist = !f_space_plus->isLocallyReplicated();//f_space->DistributedGlobal(); Thyra::ModelEvaluatorBase::DerivativeSupport ds = outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DfDp,i); // Determine which layout to use for df/dp. Ideally one would look - // at num_params, num_resids, what is supported by the underlying + // at the parameter and residual dimensions, what is supported by the underlying // model evaluator, and the sensitivity method, and make the best // choice to minimze the number of solves. However this choice depends // also on what layout of dg/dx is supported (e.g., if only the operator // form is supported for forward sensitivities, then df/dp must be // DERIV_MV_BY_COL). For simplicity, we order the conditional tests // to get the right layout in most situations. - enum DerivativeLayout { OP, COL, ROW } dfdp_layout; - { // if (sensitivity_method == "Adjoint") - if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) - dfdp_layout = OP; - else if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM) && !f_dist) - dfdp_layout = ROW; - else if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM) && !p_dist) - dfdp_layout = COL; + + if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) { + auto dfdp_op = thyra_model.create_DfDp_op(i); + TEUCHOS_TEST_FOR_EXCEPTION( + dfdp_op == Teuchos::null, std::logic_error, + std::endl << "ROL::ThyraProductME_Constraint_SimOpt::applyJacobian_2(): " << + "Needed df/dp operator (" << i << ") is null!" << std::endl); + outArgs.set_DfDp(i,dfdp_op); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(!ds.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP), + std::logic_error, + std::endl << + "ROL::ThyraProductME_Constraint_SimOpt::applyJacobian_2(): " << + "The code related to df/dp multivector has been commented out because never tested. " << + std::endl); + + /* + if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM) && f_space_plus->isLocallyReplicated()) { + auto dfdp = Thyra::createMembers(p_space, f_space->dim()); + + Thyra::ModelEvaluatorBase::DerivativeMultiVector + dmv_dfdp(dfdp, Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM); + outArgs.set_DfDp(i,dmv_dfdp); + } else if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM) && p_space_plus->isLocallyReplicated()) { + auto dfdp = Thyra::createMembers(f_space, p_space->dim()); + Thyra::ModelEvaluatorBase::DerivativeMultiVector + dmv_dfdp(dfdp, Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM); + outArgs.set_DfDp(i,dmv_dfdp); + } else TEUCHOS_TEST_FOR_EXCEPTION( true, std::logic_error, - std::endl << "Piro::NOXSolver::evalModel(): " << + std::endl << "ROL::ThyraProductME_Constraint_SimOpt::applyJacobian_2(): " << "For df/dp(" << i <<") with adjoint sensitivities, " << "underlying ModelEvaluator must support DERIV_LINEAR_OP, " << "DERIV_MV_BY_COL with p not distributed, or " "DERIV_TRANS_MV_BY_ROW with f not distributed." << std::endl); - } - if (dfdp_layout == COL) { - auto dfdp = Thyra::createMembers(f_space, num_params); - Thyra::ModelEvaluatorBase::DerivativeMultiVector - dmv_dfdp(dfdp, Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM); - outArgs.set_DfDp(i,dmv_dfdp); - } - else if (dfdp_layout == ROW) { - auto dfdp = Thyra::createMembers(p_space, num_resids); - - Thyra::ModelEvaluatorBase::DerivativeMultiVector - dmv_dfdp(dfdp, Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM); - outArgs.set_DfDp(i,dmv_dfdp); - } - else if (dfdp_layout == OP) { - auto dfdp_op = thyra_model.create_DfDp_op(i); - TEUCHOS_TEST_FOR_EXCEPTION( - dfdp_op == Teuchos::null, std::logic_error, - std::endl << "Piro::NOXSolver::evalModel(): " << - "Needed df/dp operator (" << i << ") is null!" << std::endl); - outArgs.set_DfDp(i,dfdp_op); + */ } } @@ -251,6 +295,14 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { dfdp_op->apply(Thyra::NOTRANS,*thyra_prodvec_v->getVectorBlock(i), temp_jv_ptr->getVector().ptr(),1.0, 0.0); thyra_jv.axpy(1.0, *temp_jv_ptr); } else { + TEUCHOS_TEST_FOR_EXCEPTION( + dfdp_op == Teuchos::null, + std::logic_error, + std::endl << + "ROL::ThyraProductME_Constraint_SimOpt::applyJacobian_2(): " << + "The code related to df/dp multivector has been commented out because never tested. " << + std::endl); + /* Thyra::ModelEvaluatorBase::EDerivativeMultiVectorOrientation dfdp_orient = outArgs.get_DfDp(i).getMultiVectorOrientation(); Thyra::ModelEvaluatorBase::EDerivativeMultiVectorOrientation dgdx_orient = @@ -284,16 +336,27 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { jv_view(ix) += v_view(0,ip)*dfdp_view(ix,ip); } } + */ } } } - virtual void applyInverseJacobian_1(Vector &ijv, + void applyInverseJacobian_1(Vector &ijv, const Vector &v, const Vector &u, const Vector &z, Real &tol) { + + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyInverseJacobian_1" << std::endl; + +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling applyJacobian_2 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif + const ThyraVector & thyra_p = dynamic_cast&>(z); const ThyraVector & thyra_x = dynamic_cast&>(u); const ThyraVector & thyra_v = dynamic_cast&>(v); @@ -314,10 +377,13 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { Teuchos::RCP< const Thyra::LinearOpWithSolveFactoryBase > lows_factory = thyra_model.get_W_factory(); TEUCHOS_ASSERT(Teuchos::nonnull(lows_factory)); Teuchos::RCP< Thyra::LinearOpBase > lop; - if(updateJacobian1) + if(computeJacobian1) lop = thyra_model.create_W_op(); - else + else { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyInverseJacobian_1, Skipping Jacobian Computation" << std::endl; lop = jac1; + } Teuchos::RCP< const ::Thyra::DefaultLinearOpSource > losb = Teuchos::rcp(new ::Thyra::DefaultLinearOpSource(lop)); @@ -331,14 +397,15 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { } const Teuchos::RCP > jacobian = lows_factory->createOp(); - if(updateJacobian1) + if(computeJacobian1) { outArgs.set_W_op(lop); thyra_model.evalModel(inArgs, outArgs); outArgs.set_W_op(Teuchos::null); inArgs.set_x(Teuchos::null); jac1 = lop; - updateJacobian1 = false; + + computeJacobian1 = false; } if (Teuchos::nonnull(prec_factory)) @@ -372,8 +439,18 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { void applyAdjointJacobian_1(Vector &ajv, const Vector &v, const Vector &u, const Vector &z, Real &tol) { +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling applyAdjointJacobian_1 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif + + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyAdjointJacobian_1" << std::endl; + + Teuchos::RCP< Thyra::LinearOpBase > lop; - if(updateJacobian1){ + if(computeJacobian1){ const ThyraVector & thyra_p = dynamic_cast&>(z); const ThyraVector & thyra_x = dynamic_cast&>(u); @@ -395,10 +472,15 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { outArgs.set_W_op(lop); thyra_model.evalModel(inArgs, outArgs); jac1 = lop; - updateJacobian1 = false; + + computeJacobian1 = false; } - else + else { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyAdjointJacobian_1, Skipping Jacobian Computation" << std::endl; lop = jac1; + } + const ThyraVector & thyra_v = dynamic_cast&>(v); ThyraVector & thyra_ajv = dynamic_cast&>(ajv); @@ -406,12 +488,21 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { } - virtual void applyInverseAdjointJacobian_1(Vector &iajv, + void applyInverseAdjointJacobian_1(Vector &iajv, const Vector &v, const Vector &u, const Vector &z, Real &tol) { + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyInverseAdjointJacobian_1" << std::endl; + +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling applyInverseAdjointJacobian_1 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif + const ThyraVector & thyra_p = dynamic_cast&>(z); const ThyraVector & thyra_x = dynamic_cast&>(u); const ThyraVector & thyra_v = dynamic_cast&>(v); @@ -431,10 +522,13 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { TEUCHOS_ASSERT(Teuchos::nonnull(lows_factory)); Teuchos::RCP< Thyra::LinearOpBase > lop; - if(updateJacobian1) + if(computeJacobian1) lop = thyra_model.create_W_op(); - else + else { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyInverseAdjointJacobian_1, Skipping Jacobian Computation" << std::endl; lop = jac1; + } Teuchos::RCP< const ::Thyra::DefaultLinearOpSource > losb = Teuchos::rcp(new ::Thyra::DefaultLinearOpSource(lop)); @@ -448,13 +542,14 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { } const Teuchos::RCP > jacobian = lows_factory->createOp(); - if(updateJacobian1) + if(computeJacobian1) { outArgs.set_W_op(lop); thyra_model.evalModel(inArgs, outArgs); outArgs.set_W_op(Teuchos::null); jac1 = lop; - updateJacobian1 = false; + + computeJacobian1 = false; } if (Teuchos::nonnull(prec_factory)) @@ -487,13 +582,21 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { void applyAdjointJacobian_2(Vector &ajv, const Vector &v, const Vector &u, const Vector &z, Real &tol) { - // std::cout << "Adjoint Jacobian 2" <= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Constraint_SimOpt::applyAdjointJacobian_2" << std::endl; + +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling applyAdjointJacobian_2 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif const ThyraVector & thyra_p = dynamic_cast&>(z); const ThyraVector & thyra_x = dynamic_cast&>(u); const ThyraVector & thyra_v = dynamic_cast&>(v); + ThyraVector & thyra_ajv = dynamic_cast&>(ajv); - //Teuchos::RCP< Thyra::VectorBase > thyra_f = Thyra::createMember(thyra_model.get_f_space()); Teuchos::RCP > thyra_prodvec_p = Teuchos::rcp_dynamic_cast>(thyra_p.getVector()); Teuchos::RCP > thyra_prodvec_ajv = Teuchos::rcp_dynamic_cast>(thyra_ajv.getVector()); @@ -508,59 +611,50 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { auto p_space = thyra_model.get_p_space(i); auto f_space = thyra_model.get_f_space(); - int num_params = p_space->dim(); - int num_resids = f_space->dim(); auto p_space_plus = Teuchos::rcp_dynamic_cast>(p_space); auto f_space_plus = Teuchos::rcp_dynamic_cast>(f_space); - bool p_dist = !p_space_plus->isLocallyReplicated();//p_space->DistributedGlobal(); - bool f_dist = !f_space_plus->isLocallyReplicated();//f_space->DistributedGlobal(); Thyra::ModelEvaluatorBase::DerivativeSupport ds = outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DfDp,i); - // Determine which layout to use for df/dp. Ideally one would look - // at num_params, num_resids, what is supported by the underlying - // model evaluator, and the sensitivity method, and make the best - // choice to minimze the number of solves. However this choice depends - // also on what layout of dg/dx is supported (e.g., if only the operator - // form is supported for forward sensitivities, then df/dp must be - // DERIV_MV_BY_COL). For simplicity, we order the conditional tests - // to get the right layout in most situations. - enum DerivativeLayout { OP, COL, ROW } dfdp_layout; - { // if (sensitivity_method == "Adjoint") - if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) - dfdp_layout = OP; - else if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM) && !f_dist) - dfdp_layout = ROW; - else if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM) && !p_dist) - dfdp_layout = COL; + // Determine which layout to use for df/dp. + + if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) { + auto dfdp_op = thyra_model.create_DfDp_op(i); + TEUCHOS_TEST_FOR_EXCEPTION( + dfdp_op == Teuchos::null, std::logic_error, + std::endl << "ROL::ThyraProductME_Constraint_SimOpt::applyAdjointJacobian_2: " << + "Needed df/dp operator (" << i << ") is null!" << std::endl); + outArgs.set_DfDp(i,dfdp_op); + } else { + TEUCHOS_TEST_FOR_EXCEPTION( + !ds.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP), + std::logic_error, + std::endl << + "ROL::ThyraProductME_Constraint_SimOpt::applyAdjointJacobian_2(): " << + "The code related to df/dp multivector has been commented out because never tested. " << + std::endl); + + /* + if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM) && f_space_plus->isLocallyReplicated()) { + auto dfdp = Thyra::createMembers(p_space, f_space->dim()); + + Thyra::ModelEvaluatorBase::DerivativeMultiVector + dmv_dfdp(dfdp, Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM); + outArgs.set_DfDp(i,dmv_dfdp); + } else if (ds.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM) && p_space_plus->isLocallyReplicated()) { + auto dfdp = Thyra::createMembers(f_space, p_space->dim()); + Thyra::ModelEvaluatorBase::DerivativeMultiVector + dmv_dfdp(dfdp, Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM); + outArgs.set_DfDp(i,dmv_dfdp); + } else TEUCHOS_TEST_FOR_EXCEPTION( true, std::logic_error, - std::endl << "Piro::NOXSolver::evalModel(): " << + std::endl << "ROL::ThyraProductME_Constraint_SimOpt::applyAdjointJacobian_2(): " << "For df/dp(" << i <<") with adjoint sensitivities, " << "underlying ModelEvaluator must support DERIV_LINEAR_OP, " << "DERIV_MV_BY_COL with p not distributed, or " "DERIV_TRANS_MV_BY_ROW with f not distributed." << std::endl); - } - if (dfdp_layout == COL) { - auto dfdp = Thyra::createMembers(f_space, num_params); - Thyra::ModelEvaluatorBase::DerivativeMultiVector - dmv_dfdp(dfdp, Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM); - outArgs.set_DfDp(i,dmv_dfdp); - } - else if (dfdp_layout == ROW) { - auto dfdp = Thyra::createMembers(p_space, num_resids); - - Thyra::ModelEvaluatorBase::DerivativeMultiVector - dmv_dfdp(dfdp, Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM); - outArgs.set_DfDp(i,dmv_dfdp); - } - else if (dfdp_layout == OP) { - auto dfdp_op = thyra_model.create_DfDp_op(i); - TEUCHOS_TEST_FOR_EXCEPTION( - dfdp_op == Teuchos::null, std::logic_error, - std::endl << "Piro::NOXSolver::evalModel(): " << - "Needed df/dp operator (" << i << ") is null!" << std::endl); - outArgs.set_DfDp(i,dfdp_op); + */ } } @@ -576,6 +670,14 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { dfdp_op->apply(Thyra::TRANS,*thyra_v.getVector(), thyra_prodvec_ajv->getNonconstVectorBlock(i).ptr(),1.0, 0.0); // Thyra::update(1.0, *tmp, thyra_ajv.getMultiVector().ptr()); } else { + TEUCHOS_TEST_FOR_EXCEPTION( + dfdp_op == Teuchos::null, + std::logic_error, + std::endl << + "ROL::ThyraProductME_Constraint_SimOpt::applyAdjointJacobian_2(): " << + "The code related to df/dp multivector has been commented out because never tested. " << + std::endl); + /* Thyra::ModelEvaluatorBase::EDerivativeMultiVectorOrientation dfdp_orient = outArgs.get_DfDp(i).getMultiVectorOrientation(); Thyra::ModelEvaluatorBase::EDerivativeMultiVectorOrientation dgdx_orient = @@ -608,6 +710,7 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { ajv_view(ip) += v_view(0,ix)*dfdp_view(ix,ip); } } + */ } } } @@ -617,6 +720,17 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { const Vector &z, Real &tol) { + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Constraint_SimOpt::solve" << std::endl; + + + if(!solveConstraint) { + TEUCHOS_ASSERT(Teuchos::nonnull(rol_u_ptr)); + u.set(*rol_u_ptr); + value(c, u, z, tol); + return; + } + if(thyra_solver.is_null()) Constraint_SimOpt::solve(c,u,z,tol); else { @@ -654,43 +768,91 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { this->update_1(u); } - updateValue = false; + if (Teuchos::is_null(value_ptr_)) + value_ptr_ = c.clone(); + value_ptr_->set(c); + + computeValue = solveConstraint = false; } /** \brief Update constraint functions with respect to Sim variable. x is the optimization variable, - flag = true if optimization variable is changed, + flag = ??, iter is the outer algorithm iterations count. */ - void update_1( const Vector &u, bool flag = true, int iter = -1 ) { - if (flag == true) { - updateValue = true; - updateJacobian1 = true; + void update_1( const Vector &u, bool /*flag*/ = true, int iter = -1 ) { + if(u_hasChanged(u)) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Constraint_SimOpt::update_1, The State Changed" << std::endl; + computeValue = computeJacobian1 = true; + + if (Teuchos::is_null(rol_u_ptr)) + rol_u_ptr = u.clone(); + rol_u_ptr->set(u); } + if(params != Teuchos::null) params->set("Optimizer Iteration Number", iter); } /** \brief Update constraint functions with respect to Opt variable. x is the optimization variable, - flag = true if optimization variable is changed, + flag = ??, iter is the outer algorithm iterations count. */ - void update_2( const Vector &z, bool flag = true, int iter = -1 ) { - if (flag == true) { - updateValue = true; - updateJacobian1 = true; + void update_2( const Vector &z, bool /*flag*/ = true, int iter = -1 ) { + if(z_hasChanged(z)) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Constraint_SimOpt::update_2, The Parameter Changed" << std::endl; + computeValue = computeJacobian1 = solveConstraint = true; + + if (Teuchos::is_null(rol_z_ptr)) + rol_z_ptr = z.clone(); + rol_z_ptr->set(z); } + if(Teuchos::nonnull(params)) { + auto& z_stored_ptr = params->get > >("Optimization Variable"); + if(Teuchos::is_null(z_stored_ptr) || z_hasChanged(*z_stored_ptr)) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Constraint_SimOpt::update_2, Signaling That Parameter Changed" << std::endl; + params->set("Optimization Variables Changed", true); + if(Teuchos::is_null(z_stored_ptr)) + z_stored_ptr = z.clone(); + z_stored_ptr->set(z); + } + params->set("Optimizer Iteration Number", iter); - if(flag == true) - params->set("Optimization Variables Changed",true); } } + bool z_hasChanged(const Vector &rol_z) const { + bool changed = true; + if (Teuchos::nonnull(rol_z_ptr)) { + auto diff = rol_z.clone(); + diff->set(*rol_z_ptr); + diff->axpy( -1.0, rol_z ); + Real norm = diff->norm(); + changed = (norm == 0) ? false : true; + } + return changed; + } + + bool u_hasChanged(const Vector &rol_u) const { + bool changed = true; + if (Teuchos::nonnull(rol_u_ptr)) { + auto diff = rol_u.clone(); + diff->set(*rol_u_ptr); + diff->axpy( -1.0, rol_u ); + Real norm = diff->norm(); + changed = (norm == 0) ? false : true; + } + return changed; + } + public: - bool updateValue, updateJacobian1; + bool computeValue, computeJacobian1, solveConstraint; private: Teuchos::RCP> thyra_solver; @@ -698,10 +860,12 @@ class ThyraProductME_Constraint_SimOpt : public Constraint_SimOpt { const int g_index; const std::vector p_indices; int num_responses; - Real value_; - Teuchos::RCP > x_ptr, grad_ptr; + Teuchos::RCP > value_ptr_; + Teuchos::RCP > rol_u_ptr, rol_z_ptr; Teuchos::RCP params; + Teuchos::RCP out; Teuchos::RCP< Thyra::LinearOpBase > jac1; + Teuchos::EVerbosityLevel verbosityLevel; }; diff --git a/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Objective.hpp b/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Objective.hpp index a5745b8a8b2a..0bb75042d7fd 100644 --- a/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Objective.hpp +++ b/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Objective.hpp @@ -50,7 +50,7 @@ //#include "Thyra_DefaultProductVectorSpace.hpp" #include "Thyra_ProductVectorBase.hpp" #include -#include +#include "Teuchos_VerbosityLevel.hpp" /** \class ROL::ThyraProductME_Objective \brief Implements the ROL::Objective interface for a Thyra Model Evaluator Objective. @@ -62,11 +62,14 @@ template class ThyraProductME_Objective : public Objective { public: - ThyraProductME_Objective(Thyra::ModelEvaluatorDefaultBase& thyra_model_, int g_index_, const std::vector& p_indices_,Teuchos::RCP params_ = Teuchos::null) : - thyra_model(thyra_model_), g_index(g_index_), p_indices(p_indices_), params(params_) { - computeValue = true; + ThyraProductME_Objective(Thyra::ModelEvaluatorDefaultBase& thyra_model_, int g_index_, const std::vector& p_indices_, + Teuchos::RCP params_ = Teuchos::null, Teuchos::EVerbosityLevel verbLevel= Teuchos::VERB_HIGH) : + thyra_model(thyra_model_), g_index(g_index_), p_indices(p_indices_), params(params_), + out(Teuchos::VerboseObjectBase::getDefaultOStream()), + verbosityLevel(verbLevel) { + computeValue = computeGradient = true; value_ = 0; - if(params != Teuchos::null) { + if(Teuchos::nonnull(params)) { params->set("Optimizer Iteration Number", -1); params->set("Compute State", true); } @@ -80,8 +83,19 @@ class ThyraProductME_Objective : public Objective { */ Real value( const Vector &rol_x, Real &tol ) { - if(!computeValue) - return value_; +#ifdef HAVE_ROL_DEBUG + //x should be updated in the update function before calling value + TEUCHOS_ASSERT(!x_hasChanged(rol_x)); +#endif + + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Objective::value" << std::endl; + + if(!computeValue) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective::value, Skipping Value Computation" << std::endl; + return value_; + } // Real norm = rol_x.norm(); // std::cout << "Value norm: " << norm << std::endl; @@ -119,6 +133,20 @@ class ThyraProductME_Objective : public Objective { */ void gradient( Vector &rol_g, const Vector &rol_x, Real &tol ) { +#ifdef HAVE_ROL_DEBUG + //x should be updated in the update function before calling value + TEUCHOS_ASSERT(!x_hasChanged(rol_x)); +#endif + + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Objective::gradient" << std::endl; + + if(!computeGradient) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective::gradient, Skipping Gradient Computation" << std::endl; + return rol_g.set(*grad_ptr_); + } + // Real norm = rol_x.norm(); // std::cout << "In Gradient, Value norm: " << norm << std::endl; @@ -139,6 +167,8 @@ class ThyraProductME_Objective : public Objective { Teuchos::RCP< Thyra::VectorBase > g; if(computeValue) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective::gradient, Computing Value" << std::endl; g = Thyra::createMember(thyra_model.get_g_space(g_index)); outArgs.set_g(g_index, g); } @@ -164,28 +194,58 @@ class ThyraProductME_Objective : public Objective { value_ = ::Thyra::get_ele(*g,0); computeValue = false; } + + if (grad_ptr_ == Teuchos::null) + grad_ptr_ = rol_g.clone(); + grad_ptr_->set(rol_g); + + computeGradient = false; }; - void update( const Vector & /*x*/, bool flag = true, int iter = -1 ) { - computeValue = flag; + void update( const Vector & x, bool flag = true, int iter = -1 ) { if(Teuchos::nonnull(params)) { params->set("Optimizer Iteration Number", iter); - if(flag == true) { + } + + if(x_hasChanged(x)) { + + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective::update, The Parameter Changed" << std::endl; + computeValue = computeGradient = true; + + if(Teuchos::nonnull(params)) { params->set("Compute State", true); params->set("Optimization Variables Changed", true); } } } + + bool x_hasChanged(const Vector &rol_x) { + bool changed = true; + if (Teuchos::nonnull(rol_x_ptr)) { + rol_x_ptr->axpy( -1.0, rol_x ); + Real norm = rol_x_ptr->norm(); + changed = (norm == 0) ? false : true; + } else { + rol_x_ptr = rol_x.clone(); + } + rol_x_ptr->set(rol_x); + return changed; + } public: - bool computeValue; + bool computeValue, computeGradient; private: Thyra::ModelEvaluatorDefaultBase& thyra_model; const int g_index; const std::vector p_indices; Real value_; + Teuchos::RCP > grad_ptr_; + Teuchos::RCP > rol_x_ptr; Teuchos::RCP params; + Teuchos::RCP out; + Teuchos::EVerbosityLevel verbosityLevel; }; // class Objective diff --git a/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Objective_SimOpt.hpp b/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Objective_SimOpt.hpp index 3faec1f2b38d..fae187499e71 100644 --- a/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Objective_SimOpt.hpp +++ b/packages/rol/adapters/thyra/src/function/ROL_ThyraProductME_Objective_SimOpt.hpp @@ -48,6 +48,7 @@ #include "ROL_StdVector.hpp" #include "ROL_Objective_SimOpt.hpp" #include "ROL_Types.hpp" +#include "Teuchos_VerbosityLevel.hpp" using namespace ROL; @@ -57,44 +58,77 @@ class ThyraProductME_Objective_SimOpt : public Objective_SimOpt { public: - ThyraProductME_Objective_SimOpt(Thyra::ModelEvaluatorDefaultBase& thyra_model_, int g_index_, const std::vector& p_indices_,Teuchos::RCP params_ = Teuchos::null) : - thyra_model(thyra_model_), g_index(g_index_), p_indices(p_indices_), params(params_) { - updateValue = true; + ThyraProductME_Objective_SimOpt(Thyra::ModelEvaluatorDefaultBase& thyra_model_, int g_index_, const std::vector& p_indices_, + Teuchos::RCP params_ = Teuchos::null, Teuchos::EVerbosityLevel verbLevel= Teuchos::VERB_HIGH) : + thyra_model(thyra_model_), g_index(g_index_), p_indices(p_indices_), params(params_), + out(Teuchos::VerboseObjectBase::getDefaultOStream()), + verbosityLevel(verbLevel) { + computeValue = computeGradient1 = computeGradient2 = true; value_ = 0; - x_ptr = Teuchos::null; + rol_u_ptr = rol_z_ptr = Teuchos::null; if(params != Teuchos::null) { params->set("Optimizer Iteration Number", -1); + params->set > >("Optimization Variable", Teuchos::null); } }; Real value(const Vector &u, const Vector &z, Real &tol ) { - if(updateValue) { - const ThyraVector & thyra_p = dynamic_cast&>(z); - const ThyraVector & thyra_x = dynamic_cast&>(u); - Teuchos::RCP< Thyra::VectorBase > g = Thyra::createMember(thyra_model.get_g_space(g_index)); - Teuchos::RCP > thyra_prodvec_p = Teuchos::rcp_dynamic_cast>(thyra_p.getVector()); +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling applyAdjointJacobian_2 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif - Thyra::ModelEvaluatorBase::InArgs inArgs = thyra_model.createInArgs(); - Thyra::ModelEvaluatorBase::OutArgs outArgs = thyra_model.createOutArgs(); + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Objective_SimOpt::value" << std::endl; - outArgs.set_g(g_index, g); - for(std::size_t i=0; igetVectorBlock(i)); - inArgs.set_x(thyra_x.getVector()); + if(!computeValue) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective_SimOpt::value, Skipping Computation of Value" << std::endl; + return value_; + } - thyra_model.evalModel(inArgs, outArgs); + const ThyraVector & thyra_p = dynamic_cast&>(z); + const ThyraVector & thyra_x = dynamic_cast&>(u); + Teuchos::RCP< Thyra::VectorBase > g = Thyra::createMember(thyra_model.get_g_space(g_index)); + Teuchos::RCP > thyra_prodvec_p = Teuchos::rcp_dynamic_cast>(thyra_p.getVector()); - value_ = ::Thyra::get_ele(*g,0); + Thyra::ModelEvaluatorBase::InArgs inArgs = thyra_model.createInArgs(); + Thyra::ModelEvaluatorBase::OutArgs outArgs = thyra_model.createOutArgs(); + + outArgs.set_g(g_index, g); + for(std::size_t i=0; igetVectorBlock(i)); + inArgs.set_x(thyra_x.getVector()); + + thyra_model.evalModel(inArgs, outArgs); + + value_ = ::Thyra::get_ele(*g,0); + + computeValue = false; - updateValue = false; - } return value_; } void gradient_1(Vector &g, const Vector &u, const Vector &z, Real &tol ) { +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling gradient_1 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif + + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Objective_SimOpt::gradient_1" << std::endl; + + if(!computeGradient1) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective_SimOpt::gradient_1, Skipping Computation of Gradient 1" << std::endl; + return g.set(*grad1_ptr_); + } + const ThyraVector & thyra_p = dynamic_cast&>(z); const ThyraVector & thyra_x = dynamic_cast&>(u); @@ -111,7 +145,7 @@ class ThyraProductME_Objective_SimOpt : public Objective_SimOpt { Teuchos::RCP< Thyra::VectorBase > thyra_g; - if(updateValue) { + if(computeValue) { thyra_g = Thyra::createMember(thyra_model.get_g_space(g_index)); outArgs.set_g(g_index, thyra_g); } @@ -133,14 +167,38 @@ class ThyraProductME_Objective_SimOpt : public Objective_SimOpt { } thyra_model.evalModel(inArgs, outArgs); - if(updateValue) { + if(computeValue) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective_SimOpt::gradient_1, Computing Value" << std::endl; value_ = ::Thyra::get_ele(*thyra_g,0); - updateValue = false; + computeValue = false; } + + if (Teuchos::is_null(grad1_ptr_)) + grad1_ptr_ = g.clone(); + grad1_ptr_->set(g); + + computeGradient1 = false; } void gradient_2(Vector &g, const Vector &u, const Vector &z, Real &tol ) { +#ifdef HAVE_ROL_DEBUG + //u and z should be updated in the update functions before calling gradient_2 + TEUCHOS_ASSERT(!u_hasChanged(u)); + TEUCHOS_ASSERT(!z_hasChanged(z)); +#endif + + if(verbosityLevel >= Teuchos::VERB_MEDIUM) + *out << "ROL::ThyraProductME_Objective_SimOpt::gradient_2" << std::endl; + + + if(!computeGradient2) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective_SimOpt::gradient_2, Skipping Computation of Gradient 2" << std::endl; + return g.set(*grad2_ptr_); + } + const ThyraVector & thyra_p = dynamic_cast&>(z); const ThyraVector & thyra_x = dynamic_cast&>(u); @@ -160,7 +218,9 @@ class ThyraProductME_Objective_SimOpt : public Objective_SimOpt { Teuchos::RCP< Thyra::VectorBase > thyra_g; - if(updateValue) { + if(computeValue) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective_SimOpt::gradient_2, Computing Value" << std::endl; thyra_g = Thyra::createMember(thyra_model.get_g_space(g_index)); outArgs.set_g(g_index, thyra_g); } @@ -182,31 +242,86 @@ class ThyraProductME_Objective_SimOpt : public Objective_SimOpt { } thyra_model.evalModel(inArgs, outArgs); - if(updateValue) { + if(computeValue) { value_ = ::Thyra::get_ele(*thyra_g,0); - updateValue = false; + computeValue = false; } + + if (grad2_ptr_ == Teuchos::null) + grad2_ptr_ = g.clone(); + grad2_ptr_->set(g); + + computeGradient2 = false; } - void update( const Vector &/*u*/, const Vector &/*z*/, bool flag = true, int iter = -1) { - updateValue = flag; + void update( const Vector &u, const Vector &z, bool /*flag*/ = true, int iter = -1) { + if(z_hasChanged(z) || u_hasChanged(u)) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective_SimOpt::update, Either The State Or The Parameters Changed" << std::endl; + computeValue = computeGradient1 = computeGradient2 = true; + + if (Teuchos::is_null(rol_z_ptr)) + rol_z_ptr = z.clone(); + rol_z_ptr->set(z); + + if (Teuchos::is_null(rol_u_ptr)) + rol_u_ptr = u.clone(); + rol_u_ptr->set(u); + } + if(params != Teuchos::null) { + auto& z_stored_ptr = params->get > >("Optimization Variable"); + if(Teuchos::is_null(z_stored_ptr) || z_hasChanged(*z_stored_ptr)) { + if(verbosityLevel >= Teuchos::VERB_HIGH) + *out << "ROL::ThyraProductME_Objective_SimOpt::update, Signaling That Parameter Changed" << std::endl; + params->set("Optimization Variables Changed", true); + if(Teuchos::is_null(z_stored_ptr)) + z_stored_ptr = z.clone(); + z_stored_ptr->set(z); + } params->set("Optimizer Iteration Number", iter); - if(flag == true) - params->set("Optimization Variables Changed",true); } } + bool z_hasChanged(const Vector &rol_z) const { + bool changed = true; + if (Teuchos::nonnull(rol_z_ptr)) { + auto diff = rol_z.clone(); + diff->set(*rol_z_ptr); + diff->axpy( -1.0, rol_z ); + Real norm = diff->norm(); + changed = (norm == 0) ? false : true; + } + return changed; + } + + bool u_hasChanged(const Vector &rol_u) const { + bool changed = true; + if (Teuchos::nonnull(rol_u_ptr)) { + auto diff = rol_u.clone(); + diff->set(*rol_u_ptr); + diff->axpy( -1.0, rol_u ); + Real norm = diff->norm(); + changed = (norm == 0) ? false : true; + } + return changed; + } + public: - bool updateValue; + bool computeValue, computeGradient1, computeGradient2; private: Thyra::ModelEvaluatorDefaultBase& thyra_model; const int g_index; const std::vector p_indices; Real value_; - Teuchos::RCP > x_ptr; + Teuchos::RCP > grad1_ptr_; + Teuchos::RCP > grad2_ptr_; + Teuchos::RCP > rol_z_ptr; + Teuchos::RCP > rol_u_ptr; Teuchos::RCP params; + Teuchos::RCP out; + Teuchos::EVerbosityLevel verbosityLevel; }; From d9fd96dc0ea42ca4f5116128e495b72eff4d092d Mon Sep 17 00:00:00 2001 From: Mauro Perego Date: Fri, 22 May 2020 17:54:42 -0600 Subject: [PATCH 32/86] Piro: pass verbosity level to ROL objective and Constraint classes. --- packages/piro/src/Piro_PerformAnalysis.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/packages/piro/src/Piro_PerformAnalysis.cpp b/packages/piro/src/Piro_PerformAnalysis.cpp index e20f99fdf37f..a3db2d7d0ade 100644 --- a/packages/piro/src/Piro_PerformAnalysis.cpp +++ b/packages/piro/src/Piro_PerformAnalysis.cpp @@ -210,8 +210,19 @@ Piro::PerformROLAnalysis( RCP< Thyra::VectorBase >& p) { auto rolParams = analysisParams.sublist("ROL"); + #ifdef HAVE_PIRO_ROL + int verbose = rolParams.get("Verbosity Level", 3); + Teuchos::EVerbosityLevel verbosityLevel; + switch(verbose) { + case 1: verbosityLevel= Teuchos::VERB_LOW; break; + case 2: verbosityLevel= Teuchos::VERB_MEDIUM; break; + case 3: verbosityLevel= Teuchos::VERB_HIGH; break; + case 4: verbosityLevel= Teuchos::VERB_EXTREME; break; + default: verbosityLevel= Teuchos::VERB_NONE; + } + if(rolParams.isParameter("Use Old Reduced Space Interface") && rolParams.get("Use Old Reduced Space Interface")) { using std::string; @@ -244,7 +255,7 @@ Piro::PerformROLAnalysis( ROL::ThyraVector rol_p(p_prod); - ROL::ThyraProductME_Objective obj(piroModel, g_index, p_indices, Teuchos::rcp(&analysisParams.sublist("Optimization Status"),false)); + ROL::ThyraProductME_Objective obj(piroModel, g_index, p_indices, Teuchos::rcp(&analysisParams.sublist("Optimization Status"),false),verbosityLevel); bool print = rolParams.get("Print Output", false); @@ -450,9 +461,8 @@ Piro::PerformROLAnalysis( Teuchos::RCP> lambda_vec = Thyra::createMember(x_space); ROL::ThyraVector rol_lambda(lambda_vec); - bool always_recompute = true; - ThyraProductME_Objective_SimOpt obj(*model, g_index, p_indices, Teuchos::rcp(&analysisParams.sublist("Optimization Status"),false)); - ThyraProductME_Constraint_SimOpt constr(*model, g_index, p_indices, Teuchos::rcp(&analysisParams.sublist("Optimization Status"),false),always_recompute); + ThyraProductME_Objective_SimOpt obj(*model, g_index, p_indices, Teuchos::rcp(&analysisParams.sublist("Optimization Status"),false),verbosityLevel); + ThyraProductME_Constraint_SimOpt constr(*model, g_index, p_indices, Teuchos::rcp(&analysisParams.sublist("Optimization Status"),false),verbosityLevel); constr.setSolveParameters(rolParams.sublist("ROL Options")); From 5893de8486038af53b6205aa0aadc13fb6b83384 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Fri, 22 May 2020 16:22:24 -0600 Subject: [PATCH 33/86] MueLu RefMaxwell: Apply BCs to A_nodal --- .../adapters/xpetra/MueLu_RefMaxwell_decl.hpp | 17 +- .../adapters/xpetra/MueLu_RefMaxwell_def.hpp | 246 +++++++++++++----- 2 files changed, 194 insertions(+), 69 deletions(-) diff --git a/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_decl.hpp b/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_decl.hpp index 3a22591f61cd..98a53a002a16 100644 --- a/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_decl.hpp +++ b/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_decl.hpp @@ -389,6 +389,15 @@ namespace MueLu { //! dump out real-valued multivector void dumpCoords(const RealValuedMultiVector& X, std::string name) const; + //! dump out boolean ArrayView + void dump(const Teuchos::ArrayRCP& v, std::string name) const; + +#ifdef HAVE_MUELU_KOKKOS_REFACTOR + //! dump out boolean Kokkos::View + void dump(const Kokkos::View& v, std::string name) const; +#endif + + //! get a (synced) timer Teuchos::RCP getTimer(std::string name, RCP > comm=Teuchos::null) const; //! set parameters @@ -413,12 +422,10 @@ namespace MueLu { Teuchos::RCP A_nodal_Matrix_, P11_, R11_, AH_, A22_, Addon_Matrix_; //! Vectors for BCs #ifdef HAVE_MUELU_KOKKOS_REFACTOR - Kokkos::View BCrowsKokkos_; - Kokkos::View BCcolsKokkos_; + Kokkos::View BCrowsKokkos_, BCcolsKokkos_, BCdomainKokkos_; #endif - int BCrowcount_, BCcolcount_; - Teuchos::ArrayRCP BCrows_; - Teuchos::ArrayRCP BCcols_; + int BCedges_, BCnodes_; + Teuchos::ArrayRCP BCrows_, BCcols_, BCdomain_; //! Nullspace Teuchos::RCP Nullspace_; //! Coordinates diff --git a/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_def.hpp b/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_def.hpp index 7faeb69df280..161c0a7935af 100644 --- a/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_def.hpp +++ b/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_def.hpp @@ -105,6 +105,120 @@ namespace MueLu { + template + void FindNonZeros(const Teuchos::ArrayRCP vals, + Teuchos::ArrayRCP nonzeros) { + TEUCHOS_ASSERT(vals.size() == nonzeros.size()); + typedef typename Teuchos::ScalarTraits::magnitudeType magnitudeType; + const magnitudeType eps = 2.0*Teuchos::ScalarTraits::eps(); + for(size_t i=0; i(vals.size()); i++) { + nonzeros[i] = (Teuchos::ScalarTraits::magnitude(vals[i]) > eps); + } + } + + + template + void DetectDirichletCols(const Xpetra::Matrix& A, + const Teuchos::ArrayRCP& dirichletRows, + Teuchos::ArrayRCP dirichletCols, + Teuchos::ArrayRCP dirichletDomain) { + const Scalar one = Teuchos::ScalarTraits::one(); + RCP > domMap = A.getDomainMap(); + RCP > rowMap = A.getRowMap(); + RCP > colMap = A.getColMap(); + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == rowMap->getNodeNumElements()); + TEUCHOS_ASSERT(static_cast(dirichletCols.size()) == colMap->getNodeNumElements()); + TEUCHOS_ASSERT(static_cast(dirichletDomain.size()) == domMap->getNodeNumElements()); + RCP > myColsToZero = Xpetra::MultiVectorFactory::Build(colMap, 1, /*zeroOut=*/true); + // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0 + for(size_t i=0; i<(size_t) dirichletRows.size(); i++) { + if (dirichletRows[i]) { + ArrayView indices; + ArrayView values; + A.getLocalRowView(i,indices,values); + for(size_t j=0; j(indices.size()); j++) + myColsToZero->replaceLocalValue(indices[j],0,one); + } + } + + RCP > globalColsToZero; + RCP > importer = A.getCrsGraph()->getImporter(); + if (!importer.is_null()) { + globalColsToZero = Xpetra::MultiVectorFactory::Build(domMap, 1, /*zeroOut=*/true); + // export to domain map + globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD); + // import to column map + myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT); + } + else + globalColsToZero = myColsToZero; + + FindNonZeros(globalColsToZero->getData(0),dirichletDomain); + FindNonZeros(myColsToZero->getData(0),dirichletCols); + } + + +#ifdef HAVE_MUELU_KOKKOS_REFACTOR + + template + void FindNonZeros(const typename Xpetra::MultiVector::dual_view_type::t_dev_um vals, + Kokkos::View nonzeros) { + using ATS = Kokkos::ArithTraits; + using range_type = Kokkos::RangePolicy; + TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0)); + const typename ATS::magnitudeType eps = 2.0*ATS::eps(); + + Kokkos::parallel_for("MueLu:RefMaxwell::FindNonZeros", range_type(0,vals.extent(0)), + KOKKOS_LAMBDA (const size_t i) { + nonzeros(i) = (ATS::magnitude(vals(i,0)) > eps); + }); + } + + template + void DetectDirichletCols(const Xpetra::Matrix& A, + const Kokkos::View & dirichletRows, + Kokkos::View dirichletCols, + Kokkos::View dirichletDomain) { + using range_type = Kokkos::RangePolicy; + const Scalar one = Teuchos::ScalarTraits::one(); + RCP > domMap = A.getDomainMap(); + RCP > rowMap = A.getRowMap(); + RCP > colMap = A.getColMap(); + TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getNodeNumElements()); + TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getNodeNumElements()); + TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getNodeNumElements()); + RCP > myColsToZero = Xpetra::VectorFactory::Build(colMap, /*zeroOut=*/true); + // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0 + auto myColsToZeroView = myColsToZero->template getLocalView(); + auto localMatrix = A.getLocalMatrix(); + Kokkos::parallel_for("MueLu:RefMaxwell::DetectDirichletCols", range_type(0,rowMap->getNodeNumElements()), + KOKKOS_LAMBDA(const LocalOrdinal row) { + if (dirichletRows(row)) { + auto rowView = localMatrix.row(row); + auto length = rowView.length; + + for (decltype(length) colID = 0; colID < length; colID++) + myColsToZeroView(rowView.colidx(colID),0) = one; + } + }); + + RCP > globalColsToZero; + RCP > importer = A.getCrsGraph()->getImporter(); + if (!importer.is_null()) { + globalColsToZero = Xpetra::VectorFactory::Build(domMap, /*zeroOut=*/true); + // export to domain map + globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD); + // import to column map + myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT); + } + else + globalColsToZero = myColsToZero; + FindNonZeros(globalColsToZero->template getLocalView(),dirichletDomain); + FindNonZeros(myColsToZero->template getLocalView(),dirichletCols); + } + +#endif + template Teuchos::RCP > RefMaxwell::getDomainMap() const { return SM_Matrix_->getDomainMap(); @@ -303,6 +417,8 @@ namespace MueLu { // Find rows with only 1 or 2 nonzero entries, record them in BCrows_. // BCrows_[i] is true, iff i is a boundary row // BCcols_[i] is true, iff i is a boundary column + int BCedgesLocal = 0; + int BCnodesLocal = 0; #ifdef HAVE_MUELU_KOKKOS_REFACTOR if (useKokkos_) { BCrowsKokkos_ = Utilities_kokkos::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits::eps(),/*count_twos_as_dirichlet=*/true); @@ -330,31 +446,22 @@ namespace MueLu { } } - BCcolsKokkos_ = Utilities_kokkos::DetectDirichletCols(*D0_Matrix_,BCrowsKokkos_); + BCcolsKokkos_ = Kokkos::View(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_Matrix_->getColMap()->getNodeNumElements()); + BCdomainKokkos_ = Kokkos::View(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_Matrix_->getDomainMap()->getNodeNumElements()); + DetectDirichletCols(*D0_Matrix_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_); + + dump(BCrowsKokkos_, "BCrows.m"); + dump(BCcolsKokkos_, "BCcols.m"); + dump(BCdomainKokkos_, "BCdomain.m"); - int BCrowcountLocal = 0; for (size_t i = 0; igetRowMap()->getComm(), BCrowcountLocal, BCrowcount_); -#else - BCrowcount_ = BCrowcountLocal; -#endif - int BCcolcountLocal = 0; - for (size_t i = 0; igetRowMap()->getComm(), BCcolcountLocal, BCcolcount_); -#else - BCcolcount_ = BCcolcountLocal; -#endif - if (IsPrint(Statistics2)) { - GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCrowcount_ << " BC rows and " << BCcolcount_ << " BC columns." << std::endl; - } + BCedgesLocal += 1; + for (size_t i = 0; i(Utilities::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits::eps(),/*count_twos_as_dirichlet=*/true)); @@ -381,57 +488,36 @@ namespace MueLu { } } - BCcols_ = Utilities::DetectDirichletCols(*D0_Matrix_,BCrows_); - int BCrowcountLocal = 0; + BCcols_.resize(D0_Matrix_->getColMap()->getNodeNumElements()); + BCdomain_.resize(D0_Matrix_->getDomainMap()->getNodeNumElements()); + DetectDirichletCols(*D0_Matrix_,BCrows_,BCcols_,BCdomain_); + + dump(BCrows_, "BCrows.m"); + dump(BCcols_, "BCcols.m"); + dump(BCdomain_, "BCdomain.m"); + for (auto it = BCrows_.begin(); it != BCrows_.end(); ++it) if (*it) - BCrowcountLocal += 1; -#ifdef HAVE_MPI - MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCrowcountLocal, BCrowcount_); -#else - BCrowcount_ = BCrowcountLocal; -#endif - int BCcolcountLocal = 0; - for (auto it = BCcols_.begin(); it != BCcols_.end(); ++it) + BCedgesLocal += 1; + for (auto it = BCdomain_.begin(); it != BCdomain_.end(); ++it) if (*it) - BCcolcountLocal += 1; + BCnodesLocal += 1; + } + #ifdef HAVE_MPI - MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCcolcountLocal, BCcolcount_); + MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCedgesLocal, BCedges_); + MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCnodesLocal, BCnodes_); #else - BCcolcount_ = BCcolcountLocal; + BCedges_ = BCedgesLocal; + BCnodes_ = BCnodesLocal; #endif - if (IsPrint(Statistics2)) { - GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCrowcount_ << " BC rows and " << BCcolcount_ << " BC columns." << std::endl; - } - } + if (IsPrint(Statistics2)) { + GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl; + } - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(BCrowcount_) >= D0_Matrix_->getRangeMap()->getGlobalNumElements(), Exceptions::RuntimeError, + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(BCedges_) >= D0_Matrix_->getRangeMap()->getGlobalNumElements(), Exceptions::RuntimeError, "All edges are detected as boundary edges!"); - if (dump_matrices_) { - GetOStream(Runtime0) << "Dumping BCrows, BCcols" << std::endl; - std::ofstream outBCrows("BCrows.m"); - std::ofstream outBCcols("BCcols.m"); -#ifdef HAVE_MUELU_KOKKOS_REFACTOR - if (useKokkos_) { - auto BCrows = Kokkos::create_mirror_view (BCrowsKokkos_); - Kokkos::deep_copy(BCrows , BCrowsKokkos_); - for (size_t i = 0; i < BCrows.size(); i++) - outBCrows << BCrows[i] << "\n"; - - auto BCcols = Kokkos::create_mirror_view (BCcolsKokkos_); - Kokkos::deep_copy(BCcols , BCcolsKokkos_); - for (size_t i = 0; i < BCcols.size(); i++) - outBCcols << BCcols[i] << "\n"; - } else -#endif - { - for (size_t i = 0; i < Teuchos::as(BCrows_.size()); i++) - outBCrows << BCrows_[i] << "\n"; - for (size_t i = 0; i < Teuchos::as(BCcols_.size()); i++) - outBCcols << BCcols_[i] << "\n"; - } - } } //////////////////////////////////////////////////////////////////////////////// @@ -542,6 +628,14 @@ namespace MueLu { coarseLevel.Request("A", rapFact.get()); A_nodal_Matrix_ = coarseLevel.Get< RCP >("A", rapFact.get()); + + // Apply boundary conditions to A_nodal +#ifdef HAVE_MUELU_KOKKOS_REFACTOR + if (useKokkos_) + Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_); + else +#endif + Utilities::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomain_); dump(*A_nodal_Matrix_, "A_nodal.m"); // build special prolongator @@ -997,7 +1091,7 @@ namespace MueLu { std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower); std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper); } - if (BCrowcount_ == 0 && + if (BCedges_ == 0 && (coarseType == "" || coarseType == "Klu" || coarseType == "Klu2") && @@ -1258,6 +1352,30 @@ namespace MueLu { } + template + void RefMaxwell::dump(const Teuchos::ArrayRCP& v, std::string name) const { + if (dump_matrices_) { + GetOStream(Runtime0) << "Dumping to " << name << std::endl; + std::ofstream out(name); + for (size_t i = 0; i < Teuchos::as(v.size()); i++) + out << v[i] << "\n"; + } + } + +#ifdef HAVE_MUELU_KOKKOS_REFACTOR + template + void RefMaxwell::dump(const Kokkos::View& v, std::string name) const { + if (dump_matrices_) { + GetOStream(Runtime0) << "Dumping to " << name << std::endl; + std::ofstream out(name); + auto vH = Kokkos::create_mirror_view (v); + Kokkos::deep_copy(vH , v); + for (size_t i = 0; i < vH.size(); i++) + out << vH[i] << "\n"; + } + } +#endif + template Teuchos::RCP RefMaxwell::getTimer(std::string name, RCP > comm) const { if (IsPrint(Timings)) { From 0727899394e5630b10ef1e1a61e7fcbc75f0adcd Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Sat, 23 May 2020 13:33:33 -0600 Subject: [PATCH 34/86] Write a package_subproject_list.cmake file for Python-only builds (#6697) --- cmake/std/PullRequestLinuxDriverTest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/std/PullRequestLinuxDriverTest.py b/cmake/std/PullRequestLinuxDriverTest.py index 0cf33393a58e..1d5bca05c8d3 100755 --- a/cmake/std/PullRequestLinuxDriverTest.py +++ b/cmake/std/PullRequestLinuxDriverTest.py @@ -560,6 +560,10 @@ def createPackageEnables(arguments): PR_ENABLE_BOOL(Trilinos_ENABLE_''' + enable_map[arguments.job_base_name] + ''' ON) ''')) + with open ('package_subproject_list.cmake', 'w') as f_out: + f_out.write(dedent('''\ + set(CTEST_LABELS_FOR_SUBPROJECTS''' + enable_map[arguments.job_base_name] + ''') + ''')) print('Enabled packages:') cmake_rstring = subprocess.check_output(['cmake', '-P', From fadca396b4be1df2134acc9e167271c01797d37e Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Sat, 23 May 2020 17:40:24 -0600 Subject: [PATCH 35/86] Move shunit2 to be reused in TrilinosFrameworkTests (#6697) --- cmake/std/atdm/ats1/custom_builds_unit_tests.sh | 3 ++- cmake/std/atdm/cee-rhel6/custom_builds_unit_tests.sh | 3 ++- .../atdm/test/unit_tests/atdm_match_any_keyword_unit_tests.sh | 3 ++- .../std/atdm/test/unit_tests/atdm_match_keyword_unit_tests.sh | 3 ++- cmake/std/atdm/test/unit_tests/set_build_options_unit_tests.sh | 3 ++- cmake/std/atdm/van1-tx2/custom_builds_unit_tests.sh | 3 ++- {cmake/std/atdm => commonTools}/test/shunit2/.githooks/generic | 0 .../test/shunit2/.githooks/pre-commit.shellcheck | 0 {cmake/std/atdm => commonTools}/test/shunit2/.gitignore | 0 {cmake/std/atdm => commonTools}/test/shunit2/.travis.yml | 0 .../std/atdm => commonTools}/test/shunit2/CODE_OF_CONDUCT.md | 0 {cmake/std/atdm => commonTools}/test/shunit2/LICENSE | 0 {cmake/std/atdm => commonTools}/test/shunit2/README.md | 0 .../std/atdm => commonTools}/test/shunit2/doc/CHANGES-2.1.md | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.0.txt | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.1.txt | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.2.txt | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.3.txt | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.4.txt | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.5.txt | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.6.txt | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.7.md | 0 .../test/shunit2/doc/RELEASE_NOTES-2.1.8.md | 0 {cmake/std/atdm => commonTools}/test/shunit2/doc/TODO.txt | 0 .../std/atdm => commonTools}/test/shunit2/doc/contributors.md | 0 .../std/atdm => commonTools}/test/shunit2/doc/design_doc.txt | 0 .../test/shunit2/examples/equality_test.sh | 0 .../atdm => commonTools}/test/shunit2/examples/lineno_test.sh | 0 {cmake/std/atdm => commonTools}/test/shunit2/examples/math.inc | 0 .../atdm => commonTools}/test/shunit2/examples/math_test.sh | 0 .../atdm => commonTools}/test/shunit2/examples/mkdir_test.sh | 0 .../atdm => commonTools}/test/shunit2/examples/mock_file.sh | 0 .../test/shunit2/examples/mock_file_test.sh | 0 .../atdm => commonTools}/test/shunit2/examples/party_test.sh | 0 .../atdm => commonTools}/test/shunit2/examples/suite_test.sh | 0 {cmake/std/atdm => commonTools}/test/shunit2/init_githooks.sh | 0 {cmake/std/atdm => commonTools}/test/shunit2/lib/shflags | 0 {cmake/std/atdm => commonTools}/test/shunit2/lib/versions | 0 {cmake/std/atdm => commonTools}/test/shunit2/shunit2 | 0 .../std/atdm => commonTools}/test/shunit2/shunit2_args_test.sh | 0 .../atdm => commonTools}/test/shunit2/shunit2_asserts_test.sh | 0 .../atdm => commonTools}/test/shunit2/shunit2_failures_test.sh | 0 .../atdm => commonTools}/test/shunit2/shunit2_macros_test.sh | 0 .../std/atdm => commonTools}/test/shunit2/shunit2_misc_test.sh | 0 .../test/shunit2/shunit2_standalone_test.sh | 0 .../std/atdm => commonTools}/test/shunit2/shunit2_test_helpers | 0 {cmake/std/atdm => commonTools}/test/shunit2/test_runner | 0 47 files changed, 12 insertions(+), 6 deletions(-) rename {cmake/std/atdm => commonTools}/test/shunit2/.githooks/generic (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/.githooks/pre-commit.shellcheck (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/.gitignore (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/.travis.yml (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/CODE_OF_CONDUCT.md (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/LICENSE (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/README.md (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/CHANGES-2.1.md (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.0.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.1.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.2.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.3.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.4.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.5.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.6.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.7.md (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/RELEASE_NOTES-2.1.8.md (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/TODO.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/contributors.md (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/doc/design_doc.txt (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/equality_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/lineno_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/math.inc (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/math_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/mkdir_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/mock_file.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/mock_file_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/party_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/examples/suite_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/init_githooks.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/lib/shflags (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/lib/versions (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/shunit2 (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/shunit2_args_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/shunit2_asserts_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/shunit2_failures_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/shunit2_macros_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/shunit2_misc_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/shunit2_standalone_test.sh (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/shunit2_test_helpers (100%) rename {cmake/std/atdm => commonTools}/test/shunit2/test_runner (100%) diff --git a/cmake/std/atdm/ats1/custom_builds_unit_tests.sh b/cmake/std/atdm/ats1/custom_builds_unit_tests.sh index ba0394775339..54e69f419aba 100755 --- a/cmake/std/atdm/ats1/custom_builds_unit_tests.sh +++ b/cmake/std/atdm/ats1/custom_builds_unit_tests.sh @@ -67,4 +67,5 @@ testAll() { # Run the unit tests # -. ${ATDM_CONFIG_SCRIPT_DIR}/test/shunit2/shunit2 +SHUNIT2_DIR=`readlink -f ${ATDM_CONFIG_SCRIPT_DIR}/../../../commonTools/test/shunit2` +. ${SHUNIT2_DIR}/shunit2 diff --git a/cmake/std/atdm/cee-rhel6/custom_builds_unit_tests.sh b/cmake/std/atdm/cee-rhel6/custom_builds_unit_tests.sh index 4d2625229255..e012085aa1cc 100755 --- a/cmake/std/atdm/cee-rhel6/custom_builds_unit_tests.sh +++ b/cmake/std/atdm/cee-rhel6/custom_builds_unit_tests.sh @@ -132,4 +132,5 @@ testAll() { # Run the unit tests # -. ${ATDM_CONFIG_SCRIPT_DIR}/test/shunit2/shunit2 +SHUNIT2_DIR=`readlink -f ${ATDM_CONFIG_SCRIPT_DIR}/../../../commonTools/test/shunit2` +. ${SHUNIT2_DIR}/shunit2 diff --git a/cmake/std/atdm/test/unit_tests/atdm_match_any_keyword_unit_tests.sh b/cmake/std/atdm/test/unit_tests/atdm_match_any_keyword_unit_tests.sh index d0d60754980c..fd2a394c58cb 100755 --- a/cmake/std/atdm/test/unit_tests/atdm_match_any_keyword_unit_tests.sh +++ b/cmake/std/atdm/test/unit_tests/atdm_match_any_keyword_unit_tests.sh @@ -38,4 +38,5 @@ testMatch_1() { # Run the unit tests # -. ${ATDM_CONFIG_SCRIPT_DIR}/test/shunit2/shunit2 +SHUNIT2_DIR=`readlink -f ${ATDM_CONFIG_SCRIPT_DIR}/../../../commonTools/test/shunit2` +. ${SHUNIT2_DIR}/shunit2 diff --git a/cmake/std/atdm/test/unit_tests/atdm_match_keyword_unit_tests.sh b/cmake/std/atdm/test/unit_tests/atdm_match_keyword_unit_tests.sh index 53c15fe7472f..be87afe19432 100755 --- a/cmake/std/atdm/test/unit_tests/atdm_match_keyword_unit_tests.sh +++ b/cmake/std/atdm/test/unit_tests/atdm_match_keyword_unit_tests.sh @@ -65,4 +65,5 @@ testBasicMatches() { # Run the unit tests # -. ${ATDM_CONFIG_SCRIPT_DIR}/test/shunit2/shunit2 +SHUNIT2_DIR=`readlink -f ${ATDM_CONFIG_SCRIPT_DIR}/../../../commonTools/test/shunit2` +. ${SHUNIT2_DIR}/shunit2 diff --git a/cmake/std/atdm/test/unit_tests/set_build_options_unit_tests.sh b/cmake/std/atdm/test/unit_tests/set_build_options_unit_tests.sh index 2384a6bcee22..e377c48fea2b 100755 --- a/cmake/std/atdm/test/unit_tests/set_build_options_unit_tests.sh +++ b/cmake/std/atdm/test/unit_tests/set_build_options_unit_tests.sh @@ -477,4 +477,5 @@ testPrimaryTested() { # Run the unit tests # -. ${ATDM_CONFIG_SCRIPT_DIR}/test/shunit2/shunit2 +SHUNIT2_DIR=`readlink -f ${ATDM_CONFIG_SCRIPT_DIR}/../../../commonTools/test/shunit2` +. ${SHUNIT2_DIR}/shunit2 diff --git a/cmake/std/atdm/van1-tx2/custom_builds_unit_tests.sh b/cmake/std/atdm/van1-tx2/custom_builds_unit_tests.sh index 35b562c2c7fe..78fb6def891b 100755 --- a/cmake/std/atdm/van1-tx2/custom_builds_unit_tests.sh +++ b/cmake/std/atdm/van1-tx2/custom_builds_unit_tests.sh @@ -52,4 +52,5 @@ testAll() { # Run the unit tests # -. ${ATDM_CONFIG_SCRIPT_DIR}/test/shunit2/shunit2 +SHUNIT2_DIR=`readlink -f ${ATDM_CONFIG_SCRIPT_DIR}/../../../commonTools/test/shunit2` +. ${SHUNIT2_DIR}/shunit2 diff --git a/cmake/std/atdm/test/shunit2/.githooks/generic b/commonTools/test/shunit2/.githooks/generic similarity index 100% rename from cmake/std/atdm/test/shunit2/.githooks/generic rename to commonTools/test/shunit2/.githooks/generic diff --git a/cmake/std/atdm/test/shunit2/.githooks/pre-commit.shellcheck b/commonTools/test/shunit2/.githooks/pre-commit.shellcheck similarity index 100% rename from cmake/std/atdm/test/shunit2/.githooks/pre-commit.shellcheck rename to commonTools/test/shunit2/.githooks/pre-commit.shellcheck diff --git a/cmake/std/atdm/test/shunit2/.gitignore b/commonTools/test/shunit2/.gitignore similarity index 100% rename from cmake/std/atdm/test/shunit2/.gitignore rename to commonTools/test/shunit2/.gitignore diff --git a/cmake/std/atdm/test/shunit2/.travis.yml b/commonTools/test/shunit2/.travis.yml similarity index 100% rename from cmake/std/atdm/test/shunit2/.travis.yml rename to commonTools/test/shunit2/.travis.yml diff --git a/cmake/std/atdm/test/shunit2/CODE_OF_CONDUCT.md b/commonTools/test/shunit2/CODE_OF_CONDUCT.md similarity index 100% rename from cmake/std/atdm/test/shunit2/CODE_OF_CONDUCT.md rename to commonTools/test/shunit2/CODE_OF_CONDUCT.md diff --git a/cmake/std/atdm/test/shunit2/LICENSE b/commonTools/test/shunit2/LICENSE similarity index 100% rename from cmake/std/atdm/test/shunit2/LICENSE rename to commonTools/test/shunit2/LICENSE diff --git a/cmake/std/atdm/test/shunit2/README.md b/commonTools/test/shunit2/README.md similarity index 100% rename from cmake/std/atdm/test/shunit2/README.md rename to commonTools/test/shunit2/README.md diff --git a/cmake/std/atdm/test/shunit2/doc/CHANGES-2.1.md b/commonTools/test/shunit2/doc/CHANGES-2.1.md similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/CHANGES-2.1.md rename to commonTools/test/shunit2/doc/CHANGES-2.1.md diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.0.txt b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.0.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.0.txt rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.0.txt diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.1.txt b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.1.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.1.txt rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.1.txt diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.2.txt b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.2.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.2.txt rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.2.txt diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.3.txt b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.3.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.3.txt rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.3.txt diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.4.txt b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.4.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.4.txt rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.4.txt diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.5.txt b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.5.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.5.txt rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.5.txt diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.6.txt b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.6.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.6.txt rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.6.txt diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.7.md b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.7.md similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.7.md rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.7.md diff --git a/cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.8.md b/commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.8.md similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/RELEASE_NOTES-2.1.8.md rename to commonTools/test/shunit2/doc/RELEASE_NOTES-2.1.8.md diff --git a/cmake/std/atdm/test/shunit2/doc/TODO.txt b/commonTools/test/shunit2/doc/TODO.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/TODO.txt rename to commonTools/test/shunit2/doc/TODO.txt diff --git a/cmake/std/atdm/test/shunit2/doc/contributors.md b/commonTools/test/shunit2/doc/contributors.md similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/contributors.md rename to commonTools/test/shunit2/doc/contributors.md diff --git a/cmake/std/atdm/test/shunit2/doc/design_doc.txt b/commonTools/test/shunit2/doc/design_doc.txt similarity index 100% rename from cmake/std/atdm/test/shunit2/doc/design_doc.txt rename to commonTools/test/shunit2/doc/design_doc.txt diff --git a/cmake/std/atdm/test/shunit2/examples/equality_test.sh b/commonTools/test/shunit2/examples/equality_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/equality_test.sh rename to commonTools/test/shunit2/examples/equality_test.sh diff --git a/cmake/std/atdm/test/shunit2/examples/lineno_test.sh b/commonTools/test/shunit2/examples/lineno_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/lineno_test.sh rename to commonTools/test/shunit2/examples/lineno_test.sh diff --git a/cmake/std/atdm/test/shunit2/examples/math.inc b/commonTools/test/shunit2/examples/math.inc similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/math.inc rename to commonTools/test/shunit2/examples/math.inc diff --git a/cmake/std/atdm/test/shunit2/examples/math_test.sh b/commonTools/test/shunit2/examples/math_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/math_test.sh rename to commonTools/test/shunit2/examples/math_test.sh diff --git a/cmake/std/atdm/test/shunit2/examples/mkdir_test.sh b/commonTools/test/shunit2/examples/mkdir_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/mkdir_test.sh rename to commonTools/test/shunit2/examples/mkdir_test.sh diff --git a/cmake/std/atdm/test/shunit2/examples/mock_file.sh b/commonTools/test/shunit2/examples/mock_file.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/mock_file.sh rename to commonTools/test/shunit2/examples/mock_file.sh diff --git a/cmake/std/atdm/test/shunit2/examples/mock_file_test.sh b/commonTools/test/shunit2/examples/mock_file_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/mock_file_test.sh rename to commonTools/test/shunit2/examples/mock_file_test.sh diff --git a/cmake/std/atdm/test/shunit2/examples/party_test.sh b/commonTools/test/shunit2/examples/party_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/party_test.sh rename to commonTools/test/shunit2/examples/party_test.sh diff --git a/cmake/std/atdm/test/shunit2/examples/suite_test.sh b/commonTools/test/shunit2/examples/suite_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/examples/suite_test.sh rename to commonTools/test/shunit2/examples/suite_test.sh diff --git a/cmake/std/atdm/test/shunit2/init_githooks.sh b/commonTools/test/shunit2/init_githooks.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/init_githooks.sh rename to commonTools/test/shunit2/init_githooks.sh diff --git a/cmake/std/atdm/test/shunit2/lib/shflags b/commonTools/test/shunit2/lib/shflags similarity index 100% rename from cmake/std/atdm/test/shunit2/lib/shflags rename to commonTools/test/shunit2/lib/shflags diff --git a/cmake/std/atdm/test/shunit2/lib/versions b/commonTools/test/shunit2/lib/versions similarity index 100% rename from cmake/std/atdm/test/shunit2/lib/versions rename to commonTools/test/shunit2/lib/versions diff --git a/cmake/std/atdm/test/shunit2/shunit2 b/commonTools/test/shunit2/shunit2 similarity index 100% rename from cmake/std/atdm/test/shunit2/shunit2 rename to commonTools/test/shunit2/shunit2 diff --git a/cmake/std/atdm/test/shunit2/shunit2_args_test.sh b/commonTools/test/shunit2/shunit2_args_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/shunit2_args_test.sh rename to commonTools/test/shunit2/shunit2_args_test.sh diff --git a/cmake/std/atdm/test/shunit2/shunit2_asserts_test.sh b/commonTools/test/shunit2/shunit2_asserts_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/shunit2_asserts_test.sh rename to commonTools/test/shunit2/shunit2_asserts_test.sh diff --git a/cmake/std/atdm/test/shunit2/shunit2_failures_test.sh b/commonTools/test/shunit2/shunit2_failures_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/shunit2_failures_test.sh rename to commonTools/test/shunit2/shunit2_failures_test.sh diff --git a/cmake/std/atdm/test/shunit2/shunit2_macros_test.sh b/commonTools/test/shunit2/shunit2_macros_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/shunit2_macros_test.sh rename to commonTools/test/shunit2/shunit2_macros_test.sh diff --git a/cmake/std/atdm/test/shunit2/shunit2_misc_test.sh b/commonTools/test/shunit2/shunit2_misc_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/shunit2_misc_test.sh rename to commonTools/test/shunit2/shunit2_misc_test.sh diff --git a/cmake/std/atdm/test/shunit2/shunit2_standalone_test.sh b/commonTools/test/shunit2/shunit2_standalone_test.sh similarity index 100% rename from cmake/std/atdm/test/shunit2/shunit2_standalone_test.sh rename to commonTools/test/shunit2/shunit2_standalone_test.sh diff --git a/cmake/std/atdm/test/shunit2/shunit2_test_helpers b/commonTools/test/shunit2/shunit2_test_helpers similarity index 100% rename from cmake/std/atdm/test/shunit2/shunit2_test_helpers rename to commonTools/test/shunit2/shunit2_test_helpers diff --git a/cmake/std/atdm/test/shunit2/test_runner b/commonTools/test/shunit2/test_runner similarity index 100% rename from cmake/std/atdm/test/shunit2/test_runner rename to commonTools/test/shunit2/test_runner From f3868f36e1cfe3d10bbe1633145d0398982bcabc Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Sat, 23 May 2020 19:10:20 -0600 Subject: [PATCH 36/86] Exclude some ST packages from PR testing selection (#6697) --- commonTools/framework/CMakeLists.txt | 45 ++--- ...ed-trilinos-packages-helpers-unit-tests.sh | 155 ++++++++++++++++++ .../get-changed-trilinos-packages-helpers.sh | 123 ++++++++++++++ .../get-changed-trilinos-packages.sh | 103 ++++++------ 4 files changed, 349 insertions(+), 77 deletions(-) create mode 100755 commonTools/framework/get-changed-trilinos-packages-helpers-unit-tests.sh create mode 100644 commonTools/framework/get-changed-trilinos-packages-helpers.sh diff --git a/commonTools/framework/CMakeLists.txt b/commonTools/framework/CMakeLists.txt index e559ed12d731..56b34fb8a3db 100644 --- a/commonTools/framework/CMakeLists.txt +++ b/commonTools/framework/CMakeLists.txt @@ -124,34 +124,39 @@ create_get_changed_trilinos_packages_test(ProjectsList_TeuchosCore "ALL_PACKAGES,TeuchosCore" "ALL_PACKAGES,TeuchosCore" "Setting Trilinos_ENABLE_ALL_PACKAGES = ON;Setting Trilinos_ENABLE_TeuchosCore = ON" - "TrilinosFrameworkTests TrilinosATDMConfigTests .*Kokkos Teuchos KokkosKernels .*EpetraExt Tpetra .*Domi Thyra .*SEACAS .*MueLu .*ShyLU_DD ShyLU .*Tempus Stokhos ROL Piro Panzer PyTrilinos Adelus TrilinosCouplings Pike" + "TrilinosFrameworkTests TrilinosATDMConfigTests .*Kokkos Teuchos KokkosKernels .*EpetraExt Tpetra .*Domi Thyra .*SEACAS .*Teko Intrepid .*MueLu .*ShyLU_DD ShyLU .*Tempus Stokhos ROL Piro Panzer Adelus TrilinosCouplings Pike" ) # The above test ensures that it can detect global build files as well as - # package files. Above, we check several of the parent packages i Trilinos + # package files. Above, we check several of the parent packages in Trilinos # and we want to ensure that no subpackages are listed in the array - # CTEST_LABELS_FOR_SUBPROJECTS. - - -create_get_changed_trilinos_packages_test(cmake_std_atdm_TeuchosCore_PyTrilinos - "cmake/std/atdm/anything\npackages/teuchos/core/CMakeLists.txt\npackages/PyTrilinos/anything" - "TrilinosATDMConfigTests,TeuchosCore,PyTrilinos" - "TrilinosATDMConfigTests,TeuchosCore,PyTrilinos" - "Setting Trilinos_ENABLE_TrilinosATDMConfigTests = ON;Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_PyTrilinos = ON" - "TrilinosATDMConfigTests TeuchosCore PyTrilinos" + # CTEST_LABELS_FOR_SUBPROJECTS. We also check to confirm that excluded + # packages TriKota and PyTrilinos are excluded. To do that, we list non EX + # packages before and after them as they currently appear in the + # Trilinos/PackagesList.cmake file. If the PackagesList.cmake file changes, + # then this test may have to change to accomidate that. Therefore, we don't + # want to list too many packages above in the regex. + + +create_get_changed_trilinos_packages_test(cmake_std_atdm_TeuchosCore_PyTrilinos_Pliris + "cmake/std/atdm/anything\npackages/teuchos/core/CMakeLists.txt\npackages/PyTrilinos/anything\npackages/pliris/CMakeLists.txt" + "TrilinosATDMConfigTests,TeuchosCore,PyTrilinos,Pliris" + "TrilinosATDMConfigTests,TeuchosCore,Pliris" + "Setting Trilinos_ENABLE_TrilinosATDMConfigTests = ON;Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_Pliris = ON" + "TrilinosATDMConfigTests TeuchosCore Pliris" ) # The above test ensures that cmake/std/atdm/ changes don't trigger global - # builds and it makes sure that PT (TeuchosCore) and ST packages - # (PyTrilinos) do get enabled. + # builds and it makes sure that PT (TeuchosCore) and ST packages (Pliris) do + # get enabled (but excluded ST package PyTrilinos does not get included). -create_get_changed_trilinos_packages_test(TeuchosCore_PyTrilinos_NewPackage - "packages/teuchos/core/CMakeLists.txt\npackages/PyTrilinos/anything\npackages/new_package/anything" - "TeuchosCore,PyTrilinos,NewPackage" - "TeuchosCore,PyTrilinos" - "Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_PyTrilinos = ON" - "TeuchosCore PyTrilinos" +create_get_changed_trilinos_packages_test(TeuchosCore_TriKota_NewPackage_Pliris + "packages/teuchos/core/CMakeLists.txt\npackages/TriKota/anything\npackages/new_package/anything\npackages/pliris/a.txt" + "TeuchosCore,TriKota,NewPackage,Pliris" + "TeuchosCore,Pliris" + "Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_Pliris = ON" + "TeuchosCore Pliris" ) - # The above test ensures PT (TeuchosCore) andST packages (PyTrilinos) do get + # The above test ensures PT (TeuchosCore) and ST packages (Piro) do get # enabled but EX packages (NewPackage) don't. diff --git a/commonTools/framework/get-changed-trilinos-packages-helpers-unit-tests.sh b/commonTools/framework/get-changed-trilinos-packages-helpers-unit-tests.sh new file mode 100755 index 000000000000..c3548fed4425 --- /dev/null +++ b/commonTools/framework/get-changed-trilinos-packages-helpers-unit-tests.sh @@ -0,0 +1,155 @@ +#!/bin/bash + +CURRENT_SCRIPTS_DIR=`echo $BASH_SOURCE | sed "s/\(.*\)\/.*\.sh/\1/g"` + +source ${CURRENT_SCRIPTS_DIR}/get-changed-trilinos-packages-helpers.sh + +# +# Unit tests +# + +test_comma_list_to_list() { + + list=$(comma_list_to_list "a") + assertEquals "${list}" "a" + + list=$(comma_list_to_list "aaa,bbb,ccc") + assertEquals "${list}" "aaa bbb ccc" + +} + + +test_list_to_comma_list() { + + comma_list=$(list_to_comma_list "a") + assertEquals "${comma_list}" "a" + + comma_list=$(list_to_comma_list "aaa bbb ccc") + assertEquals "${comma_list}" "aaa,bbb,ccc" + +} + + +test_list_contains_ele() { + + list_contains_ele "aaa" "aaa bbb ccc" + ${_ASSERT_EQUALS_} $? 0 + + list_contains_ele "aa" "aaa bbb ccc" + ${_ASSERT_EQUALS_} $? 1 + + list_contains_ele "bbb" "aaa bbb ccc" + ${_ASSERT_EQUALS_} $? 0 + + list_contains_ele "ccc" "aaa bbb ccc" + ${_ASSERT_EQUALS_} $? 0 + + list_contains_ele "aa" "" + ${_ASSERT_EQUALS_} $? 1 + + list_contains_ele "aa" "" + ${_ASSERT_EQUALS_} $? 1 + + list_contains_ele "" "" + ${_ASSERT_EQUALS_} $? 1 + + list_contains_ele "aaa" "bbb" + ${_ASSERT_EQUALS_} $? 1 + + list_contains_ele "bbb" "aa" + ${_ASSERT_EQUALS_} $? 1 + +} + + +test_comma_list_contains_ele() { + + comma_list_contains_ele "aaa" "aaa,bbb,ccc" + ${_ASSERT_EQUALS_} $? 0 + + comma_list_contains_ele "aa" "aaa,bbb,ccc" + ${_ASSERT_EQUALS_} $? 1 + + comma_list_contains_ele "bbb" "aaa,bbb,ccc" + ${_ASSERT_EQUALS_} $? 0 + + comma_list_contains_ele "ccc" "aaa,bbb,ccc" + ${_ASSERT_EQUALS_} $? 0 + + comma_list_contains_ele "aa" "" + ${_ASSERT_EQUALS_} $? 1 + + comma_list_contains_ele "aa" "" + ${_ASSERT_EQUALS_} $? 1 + + comma_list_contains_ele "" "" + ${_ASSERT_EQUALS_} $? 1 + + comma_list_contains_ele "aaa" "bbb" + ${_ASSERT_EQUALS_} $? 1 + + comma_list_contains_ele "aa" "bbb" + ${_ASSERT_EQUALS_} $? 1 + +} + + +test_trilinos_filter_packages_to_test() { + + generate_trilinos_package_dependencies_xml_file + + TRILINOS_EXCLUDE_PACKAGES_FROM_PR_TESTING= + + filtered_packages=$(trilinos_filter_packages_to_test "") + assertEquals "${filtered_packages}" "" + + filtered_packages=$(trilinos_filter_packages_to_test "Teuchos,Tpetra") + ${_ASSERT_EQUALS_} "${filtered_packages}" "Teuchos,Tpetra" + + filtered_packages=$(trilinos_filter_packages_to_test "Teuchos,Tpetra,PyTrilinos,Panzer") + ${_ASSERT_EQUALS_} "${filtered_packages}" "Teuchos,Tpetra,PyTrilinos,Panzer" + + filtered_packages=$(trilinos_filter_packages_to_test "TriKota,Teuchos,Tpetra,PyTrilinos,Panzer") + ${_ASSERT_EQUALS_} "${filtered_packages}" "TriKota,Teuchos,Tpetra,PyTrilinos,Panzer" + + TRILINOS_EXCLUDE_PACKAGES_FROM_PR_TESTING=(TriKota PyTrilinos) + + filtered_packages=$(trilinos_filter_packages_to_test "") + assertEquals "${filtered_packages}" "" + + filtered_packages=$(trilinos_filter_packages_to_test "Teuchos,Tpetra") + ${_ASSERT_EQUALS_} "${filtered_packages}" "Teuchos,Tpetra" + + filtered_packages=$(trilinos_filter_packages_to_test "Teuchos,Tpetra,PyTrilinos,Panzer") + ${_ASSERT_EQUALS_} "${filtered_packages}" "Teuchos,Tpetra,Panzer" + + filtered_packages=$(trilinos_filter_packages_to_test "TriKota,Teuchos,Tpetra,PyTrilinos,Panzer") + ${_ASSERT_EQUALS_} "${filtered_packages}" "Teuchos,Tpetra,Panzer" + +} + + +test_trilinos_filter_packages_to_test() { + + generate_trilinos_package_dependencies_xml_file + + all_toplevel_packages=$(trilinos_get_all_toplevel_packages) + #echo "all_toplevel_packages='${all_toplevel_packages}'" + assertContains "${all_toplevel_packages}" "TrilinosFrameworkTests," + assertContains "${all_toplevel_packages}" ",TrilinosATDMConfigTests," + assertContains "${all_toplevel_packages}" ",Teuchos," + assertContains "${all_toplevel_packages}" ",Tpetra," + assertContains "${all_toplevel_packages}" ",TriKota," + assertContains "${all_toplevel_packages}" ",PyTrilinos," + assertContains "${all_toplevel_packages}" ",NewPackage," + assertContains "${all_toplevel_packages}" ",Panzer," + +} + + +# +# Run the unit tests +# + +SHUNIT2_DIR=`readlink -f ${CURRENT_SCRIPTS_DIR}/../test/shunit2` +. ${SHUNIT2_DIR}/shunit2 diff --git a/commonTools/framework/get-changed-trilinos-packages-helpers.sh b/commonTools/framework/get-changed-trilinos-packages-helpers.sh new file mode 100644 index 000000000000..fb8664af7a27 --- /dev/null +++ b/commonTools/framework/get-changed-trilinos-packages-helpers.sh @@ -0,0 +1,123 @@ +# +# Determine paths +# + +if [ "$TRILINOS_DIR" == "" ] ; then + # Grab from the symlink (only works on Linux) + _ABS_FILE_PATH=`readlink -f $0` || \ + echo "Could not follow symlink to set TRILINOS_DIR!" + if [ "$_ABS_FILE_PATH" != "" ] ; then + _SCRIPT_DIR=`dirname $_ABS_FILE_PATH` + TRILINOS_DIR=$_SCRIPT_DIR/../.. + fi +fi + +if [ "$TRILINOS_DIR" == "" ] ; then + echo "ERROR: Cannot determine TRILINOS_DIR! Please set env var TRILINOS_DIR!" + exit 4 +fi + +echo "TRILINOS_DIR=$TRILINOS_DIR" + +# Allow a different source tree for the Trilinos scripts +if [ "$TRILINOS_SCRIPTS_DIR" == "" ] ; then + TRILINOS_SCRIPTS_DIR=${TRILINOS_DIR} +fi + +echo "TRILINOS_SCRIPTS_DIR=$TRILINOS_SCRIPTS_DIR" + +ORIG_CWD=$PWD + +# Allow override of TriBITS for testing purposes +if [ "${GCTP_TRIBITS_DIR_OVERRIDE}" != "" ] ; then + TRIBITS_DIR=$GCTP_TRIBITS_DIR_OVERRIDE +else + TRIBITS_DIR=$TRILINOS_SCRIPTS_DIR/cmake/tribits +fi +echo "TRIBITS_DIR=$TRIBITS_DIR" + + +# +# Functions +# + + +function comma_list_to_list() { + echo "$1" | sed "s|,| |g" +} + + +function list_to_comma_list() { + echo "$@" | sed "s| |,|g" +} + + +# list_contains_ele ... +# +# Returns if is contained in list (0 is success) +function list_contains_ele() { + ele_to_find="$1" ; shift + list="$@" + #echo "ele_to_find='${ele_to_find}'" + for ELE in ${list} ; do + #echo "ELE='${ELE}'" + if [[ "${ELE}" == "${ele_to_find}" ]] ; then + #echo "Contains ${ele_to_find}!" + return 0 + fi + done + return 1 +} + + +# comma_list_contains_ele ,,... +# +# Returns if is contained in list (0 is success) +function comma_list_contains_ele() { + ele="$1" + comma_list="$2" + list=$(comma_list_to_list "${comma_list}") + if list_contains_ele "${ele}" "${list}"; then + return 0 + else + return 1 + fi +} + + +# Generates TrilinosPackageDependencies.xml +function generate_trilinos_package_dependencies_xml_file() { + cmake \ + -D Trilinos_DEPS_XML_OUTPUT_FILE=TrilinosPackageDependencies.xml \ + -P $TRIBITS_DIR/ci_support/TribitsDumpDepsXmlScript.cmake \ + &> TribitsDumpDepsXmlScript.log + echo "Wrote the file 'TrilinosPackageDependencies.xml'" +} + + +# Take in and return a filtered comma-seprated list of packages +# +# Non PT and ST packages as well as packages listed in +# TRILINOS_EXCLUDE_PACKAGES_FROM_PR_TESTING are filtered out. +function trilinos_filter_packages_to_test() { + input_packages_comma_list="$1" + fullFilteredPackagesCommaList=$( + ${TRIBITS_DIR}/ci_support/filter-packages-list.py \ + --deps-xml-file=TrilinosPackageDependencies.xml \ + --input-packages-list="${input_packages_comma_list}" \ + --keep-test-test-categories=PT,ST) + fullFilteredPackagesList=$(comma_list_to_list "$fullFilteredPackagesCommaList") + filteredPackagesList=() + for pkg in ${fullFilteredPackagesList} ; do + if ! list_contains_ele "${pkg}" "${TRILINOS_EXCLUDE_PACKAGES_FROM_PR_TESTING[@]}";then + filteredPackagesList+=($pkg) + fi + done + list_to_comma_list "${filteredPackagesList[@]}" +} + + +function trilinos_get_all_toplevel_packages() { + $TRIBITS_DIR/ci_support/get-tribits-packages.py \ + --deps-xml-file=TrilinosPackageDependencies.xml +} diff --git a/commonTools/framework/get-changed-trilinos-packages.sh b/commonTools/framework/get-changed-trilinos-packages.sh index 0a20034f66cf..cc43727b23f3 100755 --- a/commonTools/framework/get-changed-trilinos-packages.sh +++ b/commonTools/framework/get-changed-trilinos-packages.sh @@ -3,12 +3,14 @@ # Usage: # # get-changed-trilinos-packages.sh \ -# +# [] # # This script takes a range of git commits .. # and then generates a CMake fragment file which # provides the set of enables of Trilinos packages needed to test the changed -# files. +# files and optionally also a CMake fragment +# file provides a 'set(CTEST_LABELS_FOR_SUBPROJECTS ...)' statment which +# provides the list of subprojects (TriBITS packages) to display on CDash. # # For example, to generate a file for the set of enables to test changes in # the current version of Trilinos w.r.t. to develop branch, one would do: @@ -36,7 +38,17 @@ # giving the relative or absolute path. # -# Get command-line arguments +# A) Data that may change +# + +TRILINOS_EXCLUDE_PACKAGES_FROM_PR_TESTING=( + TriKota + PyTrilinos + ) + + +# +# B) Get command-line arguments # GIT_COMMIT_FROM=$1 @@ -59,54 +71,9 @@ if [ "$CMAKE_PACKAGE_ENABLES_OUT" == "" ] ; then exit 1 fi -# -# Functions -# - -function trilinos_filter_packages_to_test() { - $TRIBITS_DIR/ci_support/filter-packages-list.py \ - --deps-xml-file=TrilinosPackageDependencies.xml \ - --input-packages-list="$1" \ - --keep-test-test-categories=PT,ST -} - - -function trilinos_get_all_toplevel_packages() { - $TRIBITS_DIR/ci_support/get-tribits-packages.py \ - --deps-xml-file=TrilinosPackageDependencies.xml -} - -# Zero is success! -function trilinos_contains_all_packages() { - echo "$1" | sed -n 1'p' | tr ',' '\n' | while read PKG_NAME ; do - #echo "PKG_NAME='${PKG_NAME}'" - if [[ "${PKG_NAME}" == "ALL_PACKAGES" ]] ; then - #echo "Contains ALL_PACKAGES!" - return 0 - break - fi - return 1 -done - - -} - - -############################################ -# -# Executable script -# -############################################ - -echo -echo "***" -echo "*** Generating set of Trilinos enables given modified packages from" -echo "*** git commit ${GIT_COMMIT_FROM} to ${GIT_COMMIT_TO}" -echo "***" -echo # -# Determine TRILINOS_DIR +# C) Determine paths # if [ "$TRILINOS_DIR" == "" ] ; then @@ -143,16 +110,38 @@ else fi echo "TRIBITS_DIR=$TRIBITS_DIR" + +# +# D) Import functions and vars +# + +ABS_FILE_PATH=`readlink -f $0` || \ + echo "Could not follow symlink to set TRILINOS_DIR!" +if [ "$_ABS_FILE_PATH" != "" ] ; then + SCRIPT_DIR=`dirname $_ABS_FILE_PATH` +fi + +source "${TRILINOS_DIR}/commonTools/framework/get-changed-trilinos-packages-helpers.sh" + + +############################################ +# +# Executable script +# +############################################ + echo -echo "A) Generate the Trilinos Packages definition and depencencies XML file" +echo "***" +echo "*** Generating set of Trilinos enables given modified packages from" +echo "*** git commit ${GIT_COMMIT_FROM} to ${GIT_COMMIT_TO}" +echo "***" echo -cmake \ - -D Trilinos_DEPS_XML_OUTPUT_FILE=TrilinosPackageDependencies.xml \ - -P $TRIBITS_DIR/ci_support/TribitsDumpDepsXmlScript.cmake \ - &> TribitsDumpDepsXmlScript.log +echo +echo "A) Generate the Trilinos Packages definition and depencencies XML file" +echo -echo "Wrote the file 'TrilinosPackageDependencies.xml'" +generate_trilinos_package_dependencies_xml_file echo echo "B) Get the set of changed files" @@ -215,14 +204,14 @@ fi echo "Wrote file '$CMAKE_PACKAGE_ENABLES_OUT'" echo -echo "F) Generate the ${CTEST_LABELS_FOR_SUBPROJETS_OUT} enables file" +echo "F) Generate the ${CTEST_LABELS_FOR_SUBPROJETS_OUT} file" echo printf "set(CTEST_LABELS_FOR_SUBPROJECTS" > $CTEST_LABELS_FOR_SUBPROJETS_OUT if [[ "$CHANGED_PACKAGES_ST_LIST" != "" ]] ; then - if trilinos_contains_all_packages "$CHANGED_PACKAGES_ST_LIST"; then + if comma_list_contains_ele "ALL_PACKAGES" "$CHANGED_PACKAGES_ST_LIST"; then ALL_PACKAGES=$(trilinos_get_all_toplevel_packages) PR_PACKAGES=$(trilinos_filter_packages_to_test "${ALL_PACKAGES}") else From 0767257231db739d8d676df3ac0c88dc9233144e Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Sat, 23 May 2020 19:17:56 -0600 Subject: [PATCH 37/86] Fix tests for addition of package_subproject_list.cmake file (#6697) --- cmake/std/unittests/TestPullRequestLinuxDriverTest.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cmake/std/unittests/TestPullRequestLinuxDriverTest.py b/cmake/std/unittests/TestPullRequestLinuxDriverTest.py index 6c032110c769..ac238c275e47 100755 --- a/cmake/std/unittests/TestPullRequestLinuxDriverTest.py +++ b/cmake/std/unittests/TestPullRequestLinuxDriverTest.py @@ -222,7 +222,7 @@ def test_verifyTargetBranch_passes_with_master_target_mm_source(self): '-Dbuild_dir=/dev/null/workspace/pull_request_test', '-Dconfigure_script=/dev/null/workspace/Trilinos/cmake/std/dummyConfig.cmake', '-Dpackage_enables=../packageEnables.cmake', - '-Dsubprojects_file=../TFW_single_configure_support_scripts/package_subproject_list.cmake']) + '-Dsubprojects_file=../package_subproject_list.cmake']) def test_verifyTargetBranch_passes_with_develop_target(self): @@ -291,7 +291,7 @@ def test_verifyTargetBranch_passes_with_develop_target(self): '-Dbuild_dir=/dev/null/workspace/pull_request_test', '-Dconfigure_script=/dev/null/workspace/Trilinos/cmake/std/dummyConfig.cmake', '-Dpackage_enables=../packageEnables.cmake', - '-Dsubprojects_file=../TFW_single_configure_support_scripts/package_subproject_list.cmake']) + '-Dsubprojects_file=../package_subproject_list.cmake']) @@ -345,7 +345,8 @@ def test_call_success(self): 'get-changed-trilinos-packages.sh'), os.path.join('origin', self.target_branch), - 'HEAD', 'packageEnables.cmake']) + 'HEAD', 'packageEnables.cmake', + 'package_subproject_list.cmake']) self.assertEqual(expected_output, m_stdout.getvalue()) os.unlink('packageEnables.cmake') @@ -383,7 +384,8 @@ def test_call_failure(self): 'get-changed-trilinos-packages.sh'), os.path.join('origin', self.target_branch), - 'HEAD', 'packageEnables.cmake']) + 'HEAD', 'packageEnables.cmake', + 'package_subproject_list.cmake']) self.assertEqual(expected_output, m_stdout.getvalue()) From 7bff3cb481926e562fc12cd969d8e15e7dcf73e1 Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Sat, 23 May 2020 19:33:15 -0600 Subject: [PATCH 38/86] Fix a space error in Python-only builds CTEST_LABELS_FOR_SUBPROJECTS var (#6697) --- cmake/std/PullRequestLinuxDriverTest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/std/PullRequestLinuxDriverTest.py b/cmake/std/PullRequestLinuxDriverTest.py index 1d5bca05c8d3..5daae006f411 100755 --- a/cmake/std/PullRequestLinuxDriverTest.py +++ b/cmake/std/PullRequestLinuxDriverTest.py @@ -562,7 +562,7 @@ def createPackageEnables(arguments): ''')) with open ('package_subproject_list.cmake', 'w') as f_out: f_out.write(dedent('''\ - set(CTEST_LABELS_FOR_SUBPROJECTS''' + enable_map[arguments.job_base_name] + ''') + set(CTEST_LABELS_FOR_SUBPROJECTS ''' + enable_map[arguments.job_base_name] + ''') ''')) print('Enabled packages:') cmake_rstring = subprocess.check_output(['cmake', From 81dbf7c5f18dc577354a3b014acb84c4de314443 Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Sun, 24 May 2020 10:16:23 -0600 Subject: [PATCH 39/86] Add running of get-changed-trilinos-packages-helpers-unit-tests.sh (#6697) I forgot to add a driver to run these unit tests. --- commonTools/framework/CMakeLists.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/commonTools/framework/CMakeLists.txt b/commonTools/framework/CMakeLists.txt index 56b34fb8a3db..e1fb6da0ce22 100644 --- a/commonTools/framework/CMakeLists.txt +++ b/commonTools/framework/CMakeLists.txt @@ -7,6 +7,17 @@ TRIBITS_ADD_ADVANCED_TEST( ProjectCiFileChangeLogic_UnitTests TEST_0 CMND ${PYTHON_EXECUTABLE} ARGS ${CMAKE_CURRENT_SOURCE_DIR}/ProjectCiFileChangeLogic_UnitTests.py -v PASS_REGULAR_EXPRESSION "OK" + ALWAYS_FAIL_ON_NONZERO_RETURN + ) + + +TRIBITS_ADD_ADVANCED_TEST( get-changed-trilinos-packages-helpers-unit-tests + OVERALL_WORKING_DIRECTORY TEST_NAME + OVERALL_NUM_MPI_PROCS 1 + TEST_0 CMND + "${CMAKE_CURRENT_SOURCE_DIR}/get-changed-trilinos-packages-helpers-unit-tests.sh" + PASS_REGULAR_EXPRESSION "OK" + ALWAYS_FAIL_ON_NONZERO_RETURN ) From 3e9a1ac9faf4b595f768ec6785cf9d86a974d261 Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Sun, 24 May 2020 16:50:52 -0600 Subject: [PATCH 40/86] Add test for ShyLU_NodeHTS and list all packages for CDash (#6697, #7229) I added an explict test for the subpackage ShyLU_NodeHTS to make sure it will get tested if there are any changes (see #7229). I also changed the list of toplevel packages as CDash subproject to list all of the PR packages. This is because you can't just list the changed SE packages, you have to list all of the top-level packages that have any enabled subpackages downstream. That type of logic will require doing a full TriBITS dependency analysis in CMake code calling the macro TRIBITS_ADJUST_AND_PRINT_PACKAGE_DEPENDENCIES(). That will require some refactoring that I don't have the time to do now. But note that at least the list of toplevel packages will always be correct as there are changes to the Trilinos/PackagesList.cmake file. That is better than using a static list that never gets updated. --- commonTools/framework/CMakeLists.txt | 30 ++++++++++++++++--- .../get-changed-trilinos-packages.sh | 8 ++--- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/commonTools/framework/CMakeLists.txt b/commonTools/framework/CMakeLists.txt index e1fb6da0ce22..620c9135828f 100644 --- a/commonTools/framework/CMakeLists.txt +++ b/commonTools/framework/CMakeLists.txt @@ -130,12 +130,16 @@ MOCK_PROGRAM_OUTPUT: ${FILES_CHANGED} ENDFUNCTION() +set(ALL_TOPLEVEL_PACKAGES_REGEX + "TrilinosFrameworkTests TrilinosATDMConfigTests .*Kokkos Teuchos KokkosKernels .*EpetraExt Tpetra .*Domi Thyra .*SEACAS .*Teko Intrepid .*MueLu .*ShyLU_DD ShyLU .*Tempus Stokhos ROL Piro Panzer Adelus TrilinosCouplings Pike") + + create_get_changed_trilinos_packages_test(ProjectsList_TeuchosCore "ProjectsList.cmake\npackages/teuchos/core/CMakeLists.txt" "ALL_PACKAGES,TeuchosCore" "ALL_PACKAGES,TeuchosCore" "Setting Trilinos_ENABLE_ALL_PACKAGES = ON;Setting Trilinos_ENABLE_TeuchosCore = ON" - "TrilinosFrameworkTests TrilinosATDMConfigTests .*Kokkos Teuchos KokkosKernels .*EpetraExt Tpetra .*Domi Thyra .*SEACAS .*Teko Intrepid .*MueLu .*ShyLU_DD ShyLU .*Tempus Stokhos ROL Piro Panzer Adelus TrilinosCouplings Pike" + "${ALL_TOPLEVEL_PACKAGES_REGEX}" ) # The above test ensures that it can detect global build files as well as # package files. Above, we check several of the parent packages in Trilinos @@ -153,7 +157,7 @@ create_get_changed_trilinos_packages_test(cmake_std_atdm_TeuchosCore_PyTrilinos_ "TrilinosATDMConfigTests,TeuchosCore,PyTrilinos,Pliris" "TrilinosATDMConfigTests,TeuchosCore,Pliris" "Setting Trilinos_ENABLE_TrilinosATDMConfigTests = ON;Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_Pliris = ON" - "TrilinosATDMConfigTests TeuchosCore Pliris" + "${ALL_TOPLEVEL_PACKAGES_REGEX}" ) # The above test ensures that cmake/std/atdm/ changes don't trigger global # builds and it makes sure that PT (TeuchosCore) and ST packages (Pliris) do @@ -165,7 +169,7 @@ create_get_changed_trilinos_packages_test(TeuchosCore_TriKota_NewPackage_Pliris "TeuchosCore,TriKota,NewPackage,Pliris" "TeuchosCore,Pliris" "Setting Trilinos_ENABLE_TeuchosCore = ON;Setting Trilinos_ENABLE_Pliris = ON" - "TeuchosCore Pliris" + "${ALL_TOPLEVEL_PACKAGES_REGEX}" ) # The above test ensures PT (TeuchosCore) and ST packages (Piro) do get # enabled but EX packages (NewPackage) don't. @@ -176,11 +180,29 @@ create_get_changed_trilinos_packages_test(cmake_std_atdm "TrilinosATDMConfigTests" "TrilinosATDMConfigTests" "Setting Trilinos_ENABLE_TrilinosATDMConfigTests = ON" - "TrilinosATDMConfigTests" + "${ALL_TOPLEVEL_PACKAGES_REGEX}" ) # The above test ensures changes to just cmake/std/atdm/ triggers the enable # of only the TrilinosATDMConfigTests package and does not trigger the # enable of every package. +create_get_changed_trilinos_packages_test(ShyLU_NodeHTS + "packages/shylu/shylu_node/hts/test/CMakeLists.txt" + "ShyLU_NodeHTS" + "ShyLU_NodeHTS" + "Setting Trilinos_ENABLE_ShyLU_NodeHTS = ON" + "${ALL_TOPLEVEL_PACKAGES_REGEX}" + ) + # The above test ensures changes to packages/shylu/shylu_node/hts/ trigger + # testing of the ST ShyLU_NodeST subpackage. (This will cause PRs like + # #7229 to actually test code.) + + +# NOTE: Above, currently every top-level package is listed as a CDash +# subproject. Once we can implement TriBITS forward-package logic, then the +# above tests will be checking a subset of packages. + + + TRIBITS_PACKAGE_POSTPROCESS() diff --git a/commonTools/framework/get-changed-trilinos-packages.sh b/commonTools/framework/get-changed-trilinos-packages.sh index cc43727b23f3..2b5bcd52c3f8 100755 --- a/commonTools/framework/get-changed-trilinos-packages.sh +++ b/commonTools/framework/get-changed-trilinos-packages.sh @@ -211,12 +211,8 @@ printf "set(CTEST_LABELS_FOR_SUBPROJECTS" > $CTEST_LABELS_FOR_SUBPROJETS_OUT if [[ "$CHANGED_PACKAGES_ST_LIST" != "" ]] ; then - if comma_list_contains_ele "ALL_PACKAGES" "$CHANGED_PACKAGES_ST_LIST"; then - ALL_PACKAGES=$(trilinos_get_all_toplevel_packages) - PR_PACKAGES=$(trilinos_filter_packages_to_test "${ALL_PACKAGES}") - else - PR_PACKAGES="$CHANGED_PACKAGES_ST_LIST" - fi + ALL_PACKAGES=$(trilinos_get_all_toplevel_packages) + PR_PACKAGES=$(trilinos_filter_packages_to_test "${ALL_PACKAGES}") echo "$PR_PACKAGES" | sed -n 1'p' | tr ',' '\n' | while read PKG_NAME ; do #echo $PKG_NAME From 02ea8c7af6bd564d565b4de0d9a296344a21f195 Mon Sep 17 00:00:00 2001 From: Jennifer Loe Date: Mon, 25 May 2020 10:18:41 -0600 Subject: [PATCH 41/86] Update all solver managers to use the OrthoFactory. --- packages/belos/src/BelosBlockCGSolMgr.hpp | 118 +++++------- packages/belos/src/BelosBlockGmresSolMgr.hpp | 40 ++--- packages/belos/src/BelosGmresPolyOp.hpp | 31 +--- packages/belos/src/BelosGmresPolySolMgr.hpp | 14 +- .../belos/src/BelosOrthoManagerFactory.hpp | 2 +- packages/belos/src/BelosPCPGSolMgr.hpp | 119 +++++-------- .../belos/src/BelosPseudoBlockGmresSolMgr.hpp | 168 +++++------------- 7 files changed, 165 insertions(+), 327 deletions(-) diff --git a/packages/belos/src/BelosBlockCGSolMgr.hpp b/packages/belos/src/BelosBlockCGSolMgr.hpp index a1b0a1dda7c6..a11cf3e55772 100644 --- a/packages/belos/src/BelosBlockCGSolMgr.hpp +++ b/packages/belos/src/BelosBlockCGSolMgr.hpp @@ -55,9 +55,7 @@ #include "BelosCGIter.hpp" #include "BelosCGSingleRedIter.hpp" #include "BelosBlockCGIter.hpp" -#include "BelosDGKSOrthoManager.hpp" -#include "BelosICGSOrthoManager.hpp" -#include "BelosIMGSOrthoManager.hpp" +#include "BelosOrthoManagerFactory.hpp" #include "BelosStatusTestMaxIters.hpp" #include "BelosStatusTestGenResNorm.hpp" #include "BelosStatusTestCombo.hpp" @@ -548,54 +546,6 @@ setParameters (const Teuchos::RCP ¶ms) } } - // Check if the orthogonalization changed. - if (params->isParameter("Orthogonalization")) { - std::string tempOrthoType = params->get("Orthogonalization",orthoType_default_); - TEUCHOS_TEST_FOR_EXCEPTION( tempOrthoType != "DGKS" && tempOrthoType != "ICGS" && tempOrthoType != "IMGS", - std::invalid_argument, - "Belos::BlockCGSolMgr: \"Orthogonalization\" must be either \"DGKS\", \"ICGS\", or \"IMGS\"."); - if (tempOrthoType != orthoType_) { - orthoType_ = tempOrthoType; - params_->set("Orthogonalization", orthoType_); - // Create orthogonalization manager - if (orthoType_=="DGKS") { - if (orthoKappa_ <= 0) { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( label_ ) ); - } - else { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( label_ ) ); - Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); - } - } - else if (orthoType_=="ICGS") { - ortho_ = Teuchos::rcp( new ICGSOrthoManager( label_ ) ); - } - else if (orthoType_=="IMGS") { - ortho_ = Teuchos::rcp( new IMGSOrthoManager( label_ ) ); - } - } - } - - // Check which orthogonalization constant to use. - if (params->isParameter("Orthogonalization Constant")) { - if (params->isType ("Orthogonalization Constant")) { - orthoKappa_ = params->get ("Orthogonalization Constant", - static_cast (DefaultSolverParameters::orthoKappa)); - } - else { - orthoKappa_ = params->get ("Orthogonalization Constant", - DefaultSolverParameters::orthoKappa); - } - - // Update parameter in our list. - params_->set("Orthogonalization Constant",orthoKappa_); - if (orthoType_=="DGKS") { - if (orthoKappa_ > 0 && ortho_ != Teuchos::null) { - Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); - } - } - } - // Check for a change in verbosity level if (params->isParameter("Verbosity")) { if (Teuchos::isParameterType(*params,"Verbosity")) { @@ -650,6 +600,48 @@ setParameters (const Teuchos::RCP ¶ms) printer_ = Teuchos::rcp( new OutputManager(verbosity_, outputStream_) ); } + // Check if the orthogonalization changed. + bool changedOrthoType = false; + if (params->isParameter("Orthogonalization")) { + std::string tempOrthoType = params->get("Orthogonalization",orthoType_default_); + if (tempOrthoType != orthoType_) { + orthoType_ = tempOrthoType; + changedOrthoType = true; + } + } + params_->set("Orthogonalization", orthoType_); + + // Check which orthogonalization constant to use. + if (params->isParameter("Orthogonalization Constant")) { + if (params->isType ("Orthogonalization Constant")) { + orthoKappa_ = params->get ("Orthogonalization Constant", + static_cast (DefaultSolverParameters::orthoKappa)); + } + else { + orthoKappa_ = params->get ("Orthogonalization Constant", + DefaultSolverParameters::orthoKappa); + } + + // Update parameter in our list. + params_->set("Orthogonalization Constant",orthoKappa_); + if (orthoType_=="DGKS") { + if (orthoKappa_ > 0 && ortho_ != Teuchos::null && !changedOrthoType) { + Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); + } + } + } + + // Create orthogonalization manager if we need to. + if (ortho_ == Teuchos::null || changedOrthoType) { + Belos::OrthoManagerFactory factory; + Teuchos::RCP paramsOrtho; // can be null + if (orthoType_=="DGKS" && orthoKappa_ > 0) { + paramsOrtho->set ("depTol", orthoKappa_ ); + } + + ortho_ = factory.makeMatOrthoManager (orthoType_, Teuchos::null, printer_, label_, paramsOrtho); + } + // Convergence typedef Belos::StatusTestCombo StatusTestCombo_t; typedef Belos::StatusTestGenResNorm StatusTestResNorm_t; @@ -752,30 +744,6 @@ setParameters (const Teuchos::RCP ¶ms) } - // Create orthogonalization manager if we need to. - if (ortho_ == Teuchos::null) { - params_->set("Orthogonalization", orthoType_); - if (orthoType_=="DGKS") { - if (orthoKappa_ <= 0) { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( label_ ) ); - } - else { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( label_ ) ); - Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); - } - } - else if (orthoType_=="ICGS") { - ortho_ = Teuchos::rcp( new ICGSOrthoManager( label_ ) ); - } - else if (orthoType_=="IMGS") { - ortho_ = Teuchos::rcp( new IMGSOrthoManager( label_ ) ); - } - else { - TEUCHOS_TEST_FOR_EXCEPTION(orthoType_!="ICGS"&&orthoType_!="DGKS"&&orthoType_!="IMGS",std::logic_error, - "Belos::BlockCGSolMgr(): Invalid orthogonalization type."); - } - } - // BelosCgIter accepts a parameter specifying whether to assert for the positivity of p^H*A*p in the CG iteration if (params->isParameter("Assert Positive Definiteness")) { assertPositiveDefiniteness_ = Teuchos::getParameter(*params,"Assert Positive Definiteness"); diff --git a/packages/belos/src/BelosBlockGmresSolMgr.hpp b/packages/belos/src/BelosBlockGmresSolMgr.hpp index 17fd28455a48..93d47e92b646 100644 --- a/packages/belos/src/BelosBlockGmresSolMgr.hpp +++ b/packages/belos/src/BelosBlockGmresSolMgr.hpp @@ -631,24 +631,15 @@ void BlockGmresSolMgr::setParameters( const Teuchos::RCPisParameter("Orthogonalization")) { std::string tempOrthoType = params->get("Orthogonalization",orthoType_default_); if (tempOrthoType != orthoType_) { orthoType_ = tempOrthoType; - params_->set("Orthogonalization", orthoType_); - // Create orthogonalization manager - Belos::OrthoManagerFactory factory; - Teuchos::RCP paramsOrtho; // can be null - if (orthoType_=="DGKS" && orthoKappa_ > 0) { - paramsOrtho->set ("depTol", orthoKappa_ ); - } - - ortho_ = factory.makeMatOrthoManager (orthoType_, Teuchos::null, printer_, "Belos", paramsOrtho); - TEUCHOS_TEST_FOR_EXCEPTION - (ortho_.get () == nullptr, std::runtime_error, "BlockGmres: Failed to " - "create (Mat)OrthoManager of type \"" << orthoType_ << "\"."); + changedOrthoType = true; } } + params_->set("Orthogonalization", orthoType_); // Check which orthogonalization constant to use. if (params->isParameter("Orthogonalization Constant")) { @@ -664,12 +655,23 @@ void BlockGmresSolMgr::setParameters( const Teuchos::RCPset("Orthogonalization Constant",orthoKappa_); if (orthoType_=="DGKS") { - if (orthoKappa_ > 0 && ortho_ != Teuchos::null) { + if (orthoKappa_ > 0 && ortho_ != Teuchos::null && !changedOrthoType) { Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); } } } + // Create orthogonalization manager if we need to. + if (ortho_ == Teuchos::null || changedOrthoType) { + Belos::OrthoManagerFactory factory; + Teuchos::RCP paramsOrtho; // can be null + if (orthoType_=="DGKS" && orthoKappa_ > 0) { + paramsOrtho->set ("depTol", orthoKappa_ ); + } + + ortho_ = factory.makeMatOrthoManager (orthoType_, Teuchos::null, printer_, label_, paramsOrtho); + } + // Check for convergence tolerance if (params->isParameter("Convergence Tolerance")) { if (params->isType ("Convergence Tolerance")) { @@ -743,7 +745,6 @@ void BlockGmresSolMgr::setParameters( const Teuchos::RCPisParameter("Show Maximum Residual Norm Only")) { showMaxResNormOnly_ = Teuchos::getParameter(*params,"Show Maximum Residual Norm Only"); @@ -755,17 +756,6 @@ void BlockGmresSolMgr::setParameters( const Teuchos::RCPsetShowMaxResNormOnly( showMaxResNormOnly_ ); } - // Create orthogonalization manager if we need to. - if (ortho_ == Teuchos::null) { - params_->set("Orthogonalization", orthoType_); - Belos::OrthoManagerFactory factory; - Teuchos::RCP paramsOrtho; // can be null - if (orthoType_=="DGKS" && orthoKappa_ > 0) { - paramsOrtho->set ("depTol", orthoKappa_ ); - } - - ortho_ = factory.makeMatOrthoManager (orthoType_, Teuchos::null, printer_, "Belos", paramsOrtho); - } // Create the timer if we need to. if (timerSolve_ == Teuchos::null) { diff --git a/packages/belos/src/BelosGmresPolyOp.hpp b/packages/belos/src/BelosGmresPolyOp.hpp index 16c62fc93dad..f1364b045a3e 100644 --- a/packages/belos/src/BelosGmresPolyOp.hpp +++ b/packages/belos/src/BelosGmresPolyOp.hpp @@ -57,10 +57,7 @@ #include "BelosGmresIteration.hpp" #include "BelosBlockGmresIter.hpp" - -#include "BelosDGKSOrthoManager.hpp" -#include "BelosICGSOrthoManager.hpp" -#include "BelosIMGSOrthoManager.hpp" +#include "BelosOrthoManagerFactory.hpp" #include "BelosStatusTestMaxIters.hpp" #include "BelosStatusTestGenResNorm.hpp" @@ -394,9 +391,6 @@ namespace Belos { if (params_in->isParameter("Orthogonalization")) { orthoType_ = params_in->get("Orthogonalization",orthoType_default_); - TEUCHOS_TEST_FOR_EXCEPTION( orthoType_ != "DGKS" && orthoType_ != "ICGS" && orthoType_ != "IMGS", - std::invalid_argument, - "Belos::GmresPolyOp: \"Orthogonalization\" must be either \"DGKS\", \"ICGS\", or \"IMGS\"."); } // Check for timer label @@ -553,26 +547,17 @@ namespace Belos { polyList.set("Block Size",1); polyList.set("Keep Hessenberg", true); + // Create output manager. + printer_ = Teuchos::rcp( new OutputManager(verbosity_, outputStream_) ); + // Create orthogonalization manager if we need to. if (ortho_.is_null()) { params_->set("Orthogonalization", orthoType_); - if (orthoType_=="DGKS") { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( polyLabel ) ); - } - else if (orthoType_=="ICGS") { - ortho_ = Teuchos::rcp( new ICGSOrthoManager( polyLabel ) ); - } - else if (orthoType_=="IMGS") { - ortho_ = Teuchos::rcp( new IMGSOrthoManager( polyLabel ) ); - } - else { - TEUCHOS_TEST_FOR_EXCEPTION(orthoType_!="ICGS"&&orthoType_!="DGKS"&&orthoType_!="IMGS",std::invalid_argument, - "Belos::GmresPolyOp(): Invalid orthogonalization type."); - } - } + Belos::OrthoManagerFactory factory; + Teuchos::RCP paramsOrtho; // can be null - // Create output manager. - printer_ = Teuchos::rcp( new OutputManager(verbosity_, outputStream_) ); + ortho_ = factory.makeMatOrthoManager (orthoType_, Teuchos::null, printer_, polyLabel, paramsOrtho); + } // Create a simple status test that either reaches the relative residual tolerance or maximum polynomial size. Teuchos::RCP > maxItrTst = diff --git a/packages/belos/src/BelosGmresPolySolMgr.hpp b/packages/belos/src/BelosGmresPolySolMgr.hpp index 4f3759e851a8..b2958ace47f8 100644 --- a/packages/belos/src/BelosGmresPolySolMgr.hpp +++ b/packages/belos/src/BelosGmresPolySolMgr.hpp @@ -54,6 +54,7 @@ #include "BelosSolverManager.hpp" #include "BelosGmresPolyOp.hpp" #include "BelosSolverFactory_Generic.hpp" +#include "BelosOrthoManagerFactory.hpp" #include "Teuchos_as.hpp" #ifdef BELOS_TEUCHOS_TIME_MONITOR #include "Teuchos_TimeMonitor.hpp" @@ -507,9 +508,16 @@ setParameters (const Teuchos::RCP& params) // Check if the orthogonalization changed. if (params->isParameter("Orthogonalization")) { std::string tempOrthoType = params->get("Orthogonalization",orthoType_default_); - TEUCHOS_TEST_FOR_EXCEPTION( tempOrthoType != "DGKS" && tempOrthoType != "ICGS" && tempOrthoType != "IMGS", - std::invalid_argument, - "Belos::GmresPolySolMgr: \"Orthogonalization\" must be either \"DGKS\", \"ICGS\", or \"IMGS\"."); + OrthoManagerFactory factory; + // Ensure that the specified orthogonalization type is valid. + if (! factory.isValidName (tempOrthoType)) { + std::ostringstream os; + os << "Belos::GCRODRSolMgr: Invalid orthogonalization name \"" + << tempOrthoType << "\". The following are valid options " + << "for the \"Orthogonalization\" name parameter: "; + factory.printValidNames (os); + throw std::invalid_argument (os.str()); + } if (tempOrthoType != orthoType_) { orthoType_ = tempOrthoType; } diff --git a/packages/belos/src/BelosOrthoManagerFactory.hpp b/packages/belos/src/BelosOrthoManagerFactory.hpp index 2054ed885e87..bc92642f48b0 100644 --- a/packages/belos/src/BelosOrthoManagerFactory.hpp +++ b/packages/belos/src/BelosOrthoManagerFactory.hpp @@ -112,9 +112,9 @@ namespace Belos { OrthoManagerFactory () : theList_ (numOrthoManagers()) { int index = 0; - theList_[index++] = "DGKS"; theList_[index++] = "ICGS"; theList_[index++] = "IMGS"; + theList_[index++] = "DGKS"; #ifdef HAVE_BELOS_TSQR theList_[index++] = "TSQR"; #endif // HAVE_BELOS_TSQR diff --git a/packages/belos/src/BelosPCPGSolMgr.hpp b/packages/belos/src/BelosPCPGSolMgr.hpp index 9780857a94ca..0bc7890d22d9 100644 --- a/packages/belos/src/BelosPCPGSolMgr.hpp +++ b/packages/belos/src/BelosPCPGSolMgr.hpp @@ -53,9 +53,7 @@ #include "BelosPCPGIter.hpp" -#include "BelosDGKSOrthoManager.hpp" -#include "BelosICGSOrthoManager.hpp" -#include "BelosIMGSOrthoManager.hpp" +#include "BelosOrthoManagerFactory.hpp" #include "BelosStatusTestMaxIters.hpp" #include "BelosStatusTestGenResNorm.hpp" #include "BelosStatusTestCombo.hpp" @@ -541,54 +539,6 @@ void PCPGSolMgr::setParameters( const Teuchos::RCPisParameter("Orthogonalization")) { - std::string tempOrthoType = params->get("Orthogonalization",orthoType_default_); - TEUCHOS_TEST_FOR_EXCEPTION( tempOrthoType != "DGKS" && tempOrthoType != "ICGS" && tempOrthoType != "IMGS", - std::invalid_argument, - "Belos::PCPGSolMgr: \"Orthogonalization\" must be either \"DGKS\", \"ICGS\", or \"IMGS\"."); - if (tempOrthoType != orthoType_) { - orthoType_ = tempOrthoType; - params_->set("Orthogonalization", orthoType_); - // Create orthogonalization manager - if (orthoType_=="DGKS") { - if (orthoKappa_ <= 0) { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( label_ ) ); - } - else { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( label_ ) ); - Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); - } - } - else if (orthoType_=="ICGS") { - ortho_ = Teuchos::rcp( new ICGSOrthoManager( label_ ) ); - } - else if (orthoType_=="IMGS") { - ortho_ = Teuchos::rcp( new IMGSOrthoManager( label_ ) ); - } - } - } - - // Check which orthogonalization constant to use. - if (params->isParameter("Orthogonalization Constant")) { - if (params->isType ("Orthogonalization Constant")) { - orthoKappa_ = params->get ("Orthogonalization Constant", - static_cast (DefaultSolverParameters::orthoKappa)); - } - else { - orthoKappa_ = params->get ("Orthogonalization Constant", - DefaultSolverParameters::orthoKappa); - } - - // Update parameter in our list. - params_->set("Orthogonalization Constant",orthoKappa_); - if (orthoType_=="DGKS") { - if (orthoKappa_ > 0 && ortho_ != Teuchos::null) { - Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); - } - } - } - // Check for a change in verbosity level if (params->isParameter("Verbosity")) { if (Teuchos::isParameterType(*params,"Verbosity")) { @@ -643,6 +593,48 @@ void PCPGSolMgr::setParameters( const Teuchos::RCP(verbosity_, outputStream_) ); } + // Check if the orthogonalization changed. + bool changedOrthoType = false; + if (params->isParameter("Orthogonalization")) { + std::string tempOrthoType = params->get("Orthogonalization",orthoType_default_); + if (tempOrthoType != orthoType_) { + orthoType_ = tempOrthoType; + changedOrthoType = true; + } + } + params_->set("Orthogonalization", orthoType_); + + // Check which orthogonalization constant to use. + if (params->isParameter("Orthogonalization Constant")) { + if (params->isType ("Orthogonalization Constant")) { + orthoKappa_ = params->get ("Orthogonalization Constant", + static_cast (DefaultSolverParameters::orthoKappa)); + } + else { + orthoKappa_ = params->get ("Orthogonalization Constant", + DefaultSolverParameters::orthoKappa); + } + + // Update parameter in our list. + params_->set("Orthogonalization Constant",orthoKappa_); + if (orthoType_=="DGKS") { + if (orthoKappa_ > 0 && ortho_ != Teuchos::null && !changedOrthoType) { + Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); + } + } + } + + // Create orthogonalization manager if we need to. + if (ortho_ == Teuchos::null || changedOrthoType) { + Belos::OrthoManagerFactory factory; + Teuchos::RCP paramsOrtho; // can be null + if (orthoType_=="DGKS" && orthoKappa_ > 0) { + paramsOrtho->set ("depTol", orthoKappa_ ); + } + + ortho_ = factory.makeMatOrthoManager (orthoType_, Teuchos::null, printer_, label_, paramsOrtho); + } + // Convergence typedef Belos::StatusTestCombo StatusTestCombo_t; typedef Belos::StatusTestGenResNorm StatusTestResNorm_t; @@ -683,31 +675,6 @@ void PCPGSolMgr::setParameters( const Teuchos::RCPsetSolverDesc( solverDesc ); - - // Create orthogonalization manager if we need to. - if (ortho_ == Teuchos::null) { - params_->set("Orthogonalization", orthoType_); - if (orthoType_=="DGKS") { - if (orthoKappa_ <= 0) { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( label_ ) ); - } - else { - ortho_ = Teuchos::rcp( new DGKSOrthoManager( label_ ) ); - Teuchos::rcp_dynamic_cast >(ortho_)->setDepTol( orthoKappa_ ); - } - } - else if (orthoType_=="ICGS") { - ortho_ = Teuchos::rcp( new ICGSOrthoManager( label_ ) ); - } - else if (orthoType_=="IMGS") { - ortho_ = Teuchos::rcp( new IMGSOrthoManager( label_ ) ); - } - else { - TEUCHOS_TEST_FOR_EXCEPTION(orthoType_!="ICGS"&&orthoType_!="DGKS"&&orthoType_!="IMGS",std::logic_error, - "Belos::PCPGSolMgr(): Invalid orthogonalization type."); - } - } - // Create the timer if we need to. if (timerSolve_ == Teuchos::null) { std::string solveLabel = label_ + ": PCPGSolMgr total solve time"; diff --git a/packages/belos/src/BelosPseudoBlockGmresSolMgr.hpp b/packages/belos/src/BelosPseudoBlockGmresSolMgr.hpp index ee50651808a0..0c485fbda3af 100644 --- a/packages/belos/src/BelosPseudoBlockGmresSolMgr.hpp +++ b/packages/belos/src/BelosPseudoBlockGmresSolMgr.hpp @@ -53,12 +53,7 @@ #include "BelosSolverManager.hpp" #include "BelosPseudoBlockGmresIter.hpp" -#include "BelosDGKSOrthoManager.hpp" -#include "BelosICGSOrthoManager.hpp" -#include "BelosIMGSOrthoManager.hpp" -#ifdef HAVE_BELOS_TSQR -# include "BelosTsqrOrthoManager.hpp" -#endif // HAVE_BELOS_TSQR +#include "BelosOrthoManagerFactory.hpp" #include "BelosStatusTestFactory.hpp" #include "BelosStatusTestOutputFactory.hpp" #include "BelosOutputManager.hpp" @@ -683,78 +678,6 @@ setParameters (const Teuchos::RCP& params) } } - // Check if the orthogonalization changed. - if (params->isParameter ("Orthogonalization")) { - std::string tempOrthoType = params->get ("Orthogonalization", orthoType_default_); -#ifdef HAVE_BELOS_TSQR - TEUCHOS_TEST_FOR_EXCEPTION( - tempOrthoType != "DGKS" && tempOrthoType != "ICGS" && - tempOrthoType != "IMGS" && tempOrthoType != "TSQR", - std::invalid_argument, - "Belos::PseudoBlockGmresSolMgr::setParameters: " - "The \"Orthogonalization\" parameter must be one of \"DGKS\", \"ICGS\", " - "\"IMGS\", or \"TSQR\"."); -#else - TEUCHOS_TEST_FOR_EXCEPTION( - tempOrthoType != "DGKS" && tempOrthoType != "ICGS" && - tempOrthoType != "IMGS", - std::invalid_argument, - "Belos::PseudoBlockGmresSolMgr::setParameters: " - "The \"Orthogonalization\" parameter must be one of \"DGKS\", \"ICGS\", " - "or \"IMGS\"."); -#endif // HAVE_BELOS_TSQR - - if (tempOrthoType != orthoType_) { - orthoType_ = tempOrthoType; - params_->set("Orthogonalization", orthoType_); - // Create orthogonalization manager - if (orthoType_ == "DGKS") { - typedef DGKSOrthoManager ortho_type; - if (orthoKappa_ <= 0) { - ortho_ = rcp (new ortho_type (label_)); - } - else { - ortho_ = rcp (new ortho_type (label_)); - rcp_dynamic_cast (ortho_)->setDepTol (orthoKappa_); - } - } - else if (orthoType_ == "ICGS") { - typedef ICGSOrthoManager ortho_type; - ortho_ = rcp (new ortho_type (label_)); - } - else if (orthoType_ == "IMGS") { - typedef IMGSOrthoManager ortho_type; - ortho_ = rcp (new ortho_type (label_)); - } -#ifdef HAVE_BELOS_TSQR - else if (orthoType_ == "TSQR") { - typedef TsqrMatOrthoManager ortho_type; - ortho_ = rcp (new ortho_type (label_)); - } -#endif // HAVE_BELOS_TSQR - } - } - - // Check which orthogonalization constant to use. - if (params->isParameter ("Orthogonalization Constant")) { - if (params->isType ("Orthogonalization Constant")) { - orthoKappa_ = params->get ("Orthogonalization Constant", - static_cast (DefaultSolverParameters::orthoKappa)); - } - else { - orthoKappa_ = params->get ("Orthogonalization Constant", - DefaultSolverParameters::orthoKappa); - } - - // Update parameter in our list. - params_->set ("Orthogonalization Constant", orthoKappa_); - if (orthoType_ == "DGKS") { - if (orthoKappa_ > 0 && ! ortho_.is_null ()) { - typedef DGKSOrthoManager ortho_type; - rcp_dynamic_cast (ortho_)->setDepTol (orthoKappa_); - } - } - } // Check for a change in verbosity level if (params->isParameter ("Verbosity")) { @@ -828,6 +751,49 @@ setParameters (const Teuchos::RCP& params) printer_ = rcp (new OutputManager (verbosity_, outputStream_)); } + // Check if the orthogonalization changed. + bool changedOrthoType = false; + if (params->isParameter ("Orthogonalization")) { + std::string tempOrthoType = params->get ("Orthogonalization", orthoType_default_); + if (tempOrthoType != orthoType_) { + orthoType_ = tempOrthoType; + changedOrthoType = true; + } + } + params_->set("Orthogonalization", orthoType_); + + // Check which orthogonalization constant to use. + if (params->isParameter ("Orthogonalization Constant")) { + if (params->isType ("Orthogonalization Constant")) { + orthoKappa_ = params->get ("Orthogonalization Constant", + static_cast (DefaultSolverParameters::orthoKappa)); + } + else { + orthoKappa_ = params->get ("Orthogonalization Constant", + DefaultSolverParameters::orthoKappa); + } + + // Update parameter in our list. + params_->set ("Orthogonalization Constant", orthoKappa_); + if (orthoType_ == "DGKS") { + if (orthoKappa_ > 0 && ! ortho_.is_null() && !changedOrthoType) { + typedef DGKSOrthoManager ortho_type; + rcp_dynamic_cast (ortho_)->setDepTol (orthoKappa_); + } + } + } + + // Create orthogonalization manager if we need to. + if (ortho_.is_null() || changedOrthoType) { + Belos::OrthoManagerFactory factory; + Teuchos::RCP paramsOrtho; // can be null + if (orthoType_=="DGKS" && orthoKappa_ > 0) { + paramsOrtho->set ("depTol", orthoKappa_ ); + } + + ortho_ = factory.makeMatOrthoManager (orthoType_, Teuchos::null, printer_, label_, paramsOrtho); + } + // Convergence // Check for convergence tolerance @@ -1009,52 +975,6 @@ setParameters (const Teuchos::RCP& params) } } - // Create orthogonalization manager if we need to. - if (ortho_.is_null ()) { - params_->set("Orthogonalization", orthoType_); - if (orthoType_ == "DGKS") { - typedef DGKSOrthoManager ortho_type; - if (orthoKappa_ <= 0) { - ortho_ = rcp (new ortho_type (label_)); - } - else { - ortho_ = rcp (new ortho_type (label_)); - rcp_dynamic_cast (ortho_)->setDepTol (orthoKappa_); - } - } - else if (orthoType_ == "ICGS") { - typedef ICGSOrthoManager ortho_type; - ortho_ = rcp (new ortho_type (label_)); - } - else if (orthoType_ == "IMGS") { - typedef IMGSOrthoManager ortho_type; - ortho_ = rcp (new ortho_type (label_)); - } -#ifdef HAVE_BELOS_TSQR - else if (orthoType_ == "TSQR") { - typedef TsqrMatOrthoManager ortho_type; - ortho_ = rcp (new ortho_type (label_)); - } -#endif // HAVE_BELOS_TSQR - else { -#ifdef HAVE_BELOS_TSQR - TEUCHOS_TEST_FOR_EXCEPTION( - orthoType_ != "ICGS" && orthoType_ != "DGKS" && - orthoType_ != "IMGS" && orthoType_ != "TSQR", - std::logic_error, - "Belos::PseudoBlockGmresSolMgr::setParameters(): " - "Invalid orthogonalization type \"" << orthoType_ << "\"."); -#else - TEUCHOS_TEST_FOR_EXCEPTION( - orthoType_ != "ICGS" && orthoType_ != "DGKS" && - orthoType_ != "IMGS", - std::logic_error, - "Belos::PseudoBlockGmresSolMgr::setParameters(): " - "Invalid orthogonalization type \"" << orthoType_ << "\"."); -#endif // HAVE_BELOS_TSQR - } - } - // Create the timer if we need to. if (timerSolve_ == Teuchos::null) { std::string solveLabel = label_ + ": PseudoBlockGmresSolMgr total solve time"; From 4a8dddfe8c9db0c270d7a8a6f8ab20c263073f80 Mon Sep 17 00:00:00 2001 From: Paul Wolfenbarger Date: Tue, 26 May 2020 07:15:34 -0600 Subject: [PATCH 42/86] This simply unlinks the 'package_subproject_list.cmake' temporary created during testing I also repaired an issue that only showed up on smaller machines that what the PR testing runs on (my laptop in this case.) --- .../TestPullRequestLinuxDriverTest.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/cmake/std/unittests/TestPullRequestLinuxDriverTest.py b/cmake/std/unittests/TestPullRequestLinuxDriverTest.py index ac238c275e47..c0320458cbb0 100755 --- a/cmake/std/unittests/TestPullRequestLinuxDriverTest.py +++ b/cmake/std/unittests/TestPullRequestLinuxDriverTest.py @@ -207,6 +207,7 @@ def test_verifyTargetBranch_passes_with_master_target_mm_source(self): l_environ, \ mock.patch('PullRequestLinuxDriverTest.createPackageEnables'), \ mock.patch('PullRequestLinuxDriverTest.setBuildEnviron'), \ + mock.patch('PullRequestLinuxDriverTest.compute_n', return_value=20), \ mock.patch('PullRequestLinuxDriverTest.getCDashTrack') as m_cdtr: PullRequestLinuxDriverTest.run() @@ -319,14 +320,16 @@ def setUp(self): def success_side_effect(self): with open('packageEnables.cmake', 'w') as f_out: - f_out.write(''' -MACRO(PR_ENABLE_BOOL VAR_NAME VAR_VAL) - MESSAGE("-- Setting ${VAR_NAME} = ${VAR_VAL}") - SET(${VAR_NAME} ${VAR_VAL} CACHE BOOL "Set in $CMAKE_PACKAGE_ENABLES_OUT") -ENDMACRO() -''') + f_out.write(dedent('''\ + MACRO(PR_ENABLE_BOOL VAR_NAME VAR_VAL) + MESSAGE("-- Setting ${VAR_NAME} = ${VAR_VAL}") + SET(${VAR_NAME} ${VAR_VAL} CACHE BOOL "Set in $CMAKE_PACKAGE_ENABLES_OUT") + ENDMACRO() + ''')) f_out.write("PR_ENABLE_BOOL(Trilinos_ENABLE_FooPackageBar ON)") - + with open ('package_subproject_list.cmake', 'w') as f_out: + f_out.write(dedent('''\ + set(CTEST_LABELS_FOR_SUBPROJECTS TrilinosFrameworkTests ''')) def test_call_success(self): expected_output = '''Enabled packages: @@ -349,6 +352,7 @@ def test_call_success(self): 'package_subproject_list.cmake']) self.assertEqual(expected_output, m_stdout.getvalue()) os.unlink('packageEnables.cmake') + os.unlink('package_subproject_list.cmake') def test_call_python2(self): @@ -366,6 +370,7 @@ def test_call_python2(self): m_out.assert_not_called() self.assertEqual(expected_output, m_stdout.getvalue()) os.unlink('packageEnables.cmake') + os.unlink('package_subproject_list.cmake') def test_call_failure(self): From 60181b53a90aeca1cb6b7cb7a44bc64ca45d345f Mon Sep 17 00:00:00 2001 From: micheldemessieres Date: Wed, 4 Mar 2020 13:52:00 -0500 Subject: [PATCH 43/86] Amesos2: Refactor superlu to use Kokkos views --- packages/amesos2/src/Amesos2_Superlu.cpp | 3 + .../src/Amesos2_Superlu_FunctionMap.hpp | 40 ++- .../amesos2/src/Amesos2_Superlu_TypeMap.cpp | 16 +- .../amesos2/src/Amesos2_Superlu_TypeMap.hpp | 302 +----------------- packages/amesos2/src/Amesos2_Superlu_decl.hpp | 55 ++-- packages/amesos2/src/Amesos2_Superlu_def.hpp | 149 ++++----- .../src/Amesos2_TpetraMultiVecAdapter_def.hpp | 39 ++- .../amesos2/test/solvers/superlu_test.xml | 114 +++++-- 8 files changed, 269 insertions(+), 449 deletions(-) diff --git a/packages/amesos2/src/Amesos2_Superlu.cpp b/packages/amesos2/src/Amesos2_Superlu.cpp index 2145ba8752d2..4f9964e7dffb 100644 --- a/packages/amesos2/src/Amesos2_Superlu.cpp +++ b/packages/amesos2/src/Amesos2_Superlu.cpp @@ -65,4 +65,7 @@ TPETRA_ETI_MANGLING_TYPEDEFS() TPETRA_INSTANTIATE_SLGN_NO_ORDINAL_SCALAR(AMESOS2_SUPERLU_LOCAL_INSTANT) +#define AMESOS2_KOKKOS_IMPL_SOLVER_NAME Superlu +#include "Amesos2_Kokkos_Impl.hpp" + #endif // HAVE_AMESOS2_EXPLICIT_INSTANTIATION diff --git a/packages/amesos2/src/Amesos2_Superlu_FunctionMap.hpp b/packages/amesos2/src/Amesos2_Superlu_FunctionMap.hpp index 835532b05567..87b9b7a51a93 100644 --- a/packages/amesos2/src/Amesos2_Superlu_FunctionMap.hpp +++ b/packages/amesos2/src/Amesos2_Superlu_FunctionMap.hpp @@ -79,6 +79,19 @@ namespace SLU { #include "slu_util.h" #include "superlu_enum_consts.h" + #ifdef HAVE_TEUCHOS_COMPLEX + namespace C { + #undef __SUPERLU_SCOMPLEX // TODO #undef after include as well? + #undef SCOMPLEX_INCLUDE + #include "slu_scomplex.h" // single-precision complex data type definitions + } + + namespace Z { + #undef __SUPERLU_DCOMPLEX // TODO #undef after include as well? + #undef DCOMPLEX_INCLUDE + #include "slu_dcomplex.h" // double-precision complex data type definitions + } + #endif // HAVE_TEUCHOS_COMPLEX namespace S { // single-precision real definitions @@ -724,11 +737,8 @@ namespace Amesos2 { #ifdef HAVE_TEUCHOS_COMPLEX - /* The specializations for Teuchos::as<> for SLU::complex and - * SLU::doublecomplex are provided in Amesos2_Superlu_Type.hpp - */ template <> - struct FunctionMap + struct FunctionMap> { #ifdef HAVE_AMESOS2_SUPERLU5_API typedef typename SLU::C::GlobalLU_t GlobalLU_type; @@ -805,10 +815,11 @@ namespace Amesos2 { } static void create_CompCol_Matrix(SLU::SuperMatrix* A, int m, int n, int nnz, - SLU::C::complex* nzval, int* rowind, int* colptr, + void * nzval, int* rowind, int* colptr, SLU::Stype_t stype, SLU::Dtype_t dtype, SLU::Mtype_t mtype) { - SLU::C::cCreate_CompCol_Matrix(A, m, n, nnz, nzval, rowind, colptr, + SLU::C::complex * slu_nzval = reinterpret_cast(nzval); + SLU::C::cCreate_CompCol_Matrix(A, m, n, nnz, slu_nzval, rowind, colptr, stype, dtype, mtype); } @@ -821,10 +832,11 @@ namespace Amesos2 { } static void create_Dense_Matrix(SLU::SuperMatrix* X, int m, int n, - SLU::C::complex* x, int ldx, SLU::Stype_t stype, + void * x, int ldx, SLU::Stype_t stype, SLU::Dtype_t dtype, SLU::Mtype_t mtype) { - SLU::C::cCreate_Dense_Matrix(X, m, n, x, ldx, stype, dtype, mtype); + SLU::C::complex * slu_x = reinterpret_cast(x); + SLU::C::cCreate_Dense_Matrix(X, m, n, slu_x, ldx, stype, dtype, mtype); } static void gsequ(SLU::SuperMatrix* A, float* R, float* C, @@ -842,7 +854,7 @@ namespace Amesos2 { template <> - struct FunctionMap + struct FunctionMap> { #ifdef HAVE_AMESOS2_SUPERLU5_API typedef typename SLU::Z::GlobalLU_t GlobalLU_type; @@ -919,10 +931,11 @@ namespace Amesos2 { } static void create_CompCol_Matrix(SLU::SuperMatrix* A, int m, int n, int nnz, - SLU::Z::doublecomplex* nzval, int* rowind, int* colptr, + void * nzval, int* rowind, int* colptr, SLU::Stype_t stype, SLU::Dtype_t dtype, SLU::Mtype_t mtype) { - SLU::Z::zCreate_CompCol_Matrix(A, m, n, nnz, nzval, rowind, colptr, + SLU::Z::doublecomplex * slu_nzval = reinterpret_cast(nzval); + SLU::Z::zCreate_CompCol_Matrix(A, m, n, nnz, slu_nzval, rowind, colptr, stype, dtype, mtype); TEUCHOS_TEST_FOR_EXCEPTION( A == NULL, @@ -944,10 +957,11 @@ namespace Amesos2 { } static void create_Dense_Matrix(SLU::SuperMatrix* X, int m, int n, - SLU::Z::doublecomplex* x, int ldx, SLU::Stype_t stype, + void * x, int ldx, SLU::Stype_t stype, SLU::Dtype_t dtype, SLU::Mtype_t mtype) { - SLU::Z::zCreate_Dense_Matrix(X, m, n, x, ldx, stype, dtype, mtype); + SLU::Z::doublecomplex * slu_x = reinterpret_cast(x); + SLU::Z::zCreate_Dense_Matrix(X, m, n, slu_x, ldx, stype, dtype, mtype); } static void gsequ(SLU::SuperMatrix* A, double* R, double* C, diff --git a/packages/amesos2/src/Amesos2_Superlu_TypeMap.cpp b/packages/amesos2/src/Amesos2_Superlu_TypeMap.cpp index 808e73e2d9e0..8689a4a007c0 100644 --- a/packages/amesos2/src/Amesos2_Superlu_TypeMap.cpp +++ b/packages/amesos2/src/Amesos2_Superlu_TypeMap.cpp @@ -63,21 +63,9 @@ namespace Amesos2 { SLU::Dtype_t TypeMap >::dtype = SLU::SLU_Z; - SLU::Dtype_t TypeMap::dtype = SLU::SLU_C; + SLU::Dtype_t TypeMap>::dtype = SLU::SLU_C; - SLU::Dtype_t TypeMap::dtype = SLU::SLU_Z; + SLU::Dtype_t TypeMap>::dtype = SLU::SLU_Z; #endif } - -#ifdef HAVE_TEUCHOS_COMPLEX -namespace std { - ostream& operator<<(ostream& out, const SLU::Z::doublecomplex z){ - return (out << "(" << z.r << "," << z.i << ")"); - } - - ostream& operator<<(ostream& out, const SLU::C::complex c){ - return (out << "(" << c.r << "," << c.i << ")"); - } -} -#endif diff --git a/packages/amesos2/src/Amesos2_Superlu_TypeMap.hpp b/packages/amesos2/src/Amesos2_Superlu_TypeMap.hpp index 77a46a47070c..e3f596325d1b 100644 --- a/packages/amesos2/src/Amesos2_Superlu_TypeMap.hpp +++ b/packages/amesos2/src/Amesos2_Superlu_TypeMap.hpp @@ -66,11 +66,6 @@ #include "Amesos2_TypeMap.hpp" - -/* The SuperLU comples headers file only need to be included if - complex has been enabled in Teuchos. In addition we only need to - define the conversion and printing functions if complex has been - enabled. */ namespace SLU { typedef int int_t; @@ -79,294 +74,9 @@ extern "C" { #undef __SUPERLU_SUPERMATRIX #include "supermatrix.h" // for Dtype_t declaration - -#ifdef HAVE_TEUCHOS_COMPLEX -namespace C { -#undef __SUPERLU_SCOMPLEX -#undef SCOMPLEX_INCLUDE -#include "slu_scomplex.h" // single-precision complex data type definitions -} - -namespace Z { -#undef __SUPERLU_DCOMPLEX -#undef DCOMPLEX_INCLUDE -#include "slu_dcomplex.h" // double-precision complex data type definitions -} -#endif // HAVE_TEUCHOS_COMPLEX - } // end extern "C" - // Declare and specialize a std::binary_funtion class for - // multiplication of SuperLU types - template - struct slu_mult {}; - - // This specialization handles the generic case were the scalar and - // magnitude types are double or float. - template - struct slu_mult : std::multiplies {}; - -#ifdef HAVE_TEUCHOS_COMPLEX - - // For namespace/macro reasons, we prefix our variables with amesos_* - template <> - struct slu_mult - : std::binary_function { - C::complex operator()(C::complex amesos_c, float amesos_f) { - C::complex amesos_cr; - cs_mult(&amesos_cr, &amesos_c, amesos_f); // cs_mult is a macro, so no namespacing - return( amesos_cr ); - } - }; - - template <> - struct slu_mult - : std::binary_function { - C::complex operator()(C::complex amesos_c1, C::complex amesos_c2) { - C::complex amesos_cr; - cc_mult(&amesos_cr, &amesos_c1, &amesos_c2); // cc_mult is a macro, so no namespacing - return( amesos_cr ); - } - }; - - template <> - struct slu_mult - : std::binary_function { - Z::doublecomplex operator()(Z::doublecomplex amesos_z, double amesos_d) { - Z::doublecomplex amesos_zr; - zd_mult(&amesos_zr, &amesos_z, amesos_d); // zd_mult is a macro, so no namespacing - return( amesos_zr ); - } - }; - - template <> - struct slu_mult - : std::binary_function { - Z::doublecomplex operator()(Z::doublecomplex amesos_z1, Z::doublecomplex amesos_z2) { - Z::doublecomplex amesos_zr; - zz_mult(&amesos_zr, &amesos_z1, &amesos_z2); // zz_mult is a macro, so no namespacing - return( amesos_zr ); - } - }; - -#endif // HAVE_TEUCHOS_COMPLEX } // end namespace SLU -#ifdef HAVE_TEUCHOS_COMPLEX - -/* ==================== Conversion ==================== */ -namespace Teuchos { - -/** - * \defgroup slu_conversion Conversion definitions for SLU types. - * - * Define specializations of Teuchos::as<> for the SLU types. - * - * These specializations are meant to work with any complex data type that - * implements the same interface as the STL complex type. - * - * @{ - */ - -template <> -class ValueTypeConversionTraits> -{ -public: - static SLU::C::complex convert( const std::complex t ) - { - SLU::C::complex ret; - ret.r = Teuchos::as(t.real()); - ret.i = Teuchos::as(t.imag()); - return( ret ); - } - - static SLU::C::complex safeConvert( const std::complex t ) - { - SLU::C::complex ret; - ret.r = Teuchos::as(t.real()); - ret.i = Teuchos::as(t.imag()); - return( ret ); - } -}; - -template <> -class ValueTypeConversionTraits> -{ -public: - static SLU::C::complex convert( const std::complex t ) - { - SLU::C::complex ret; - ret.r = Teuchos::as(t.real()); - ret.i = Teuchos::as(t.imag()); - return( ret ); - } - - static SLU::C::complex safeConvert( const std::complex t ) - { - SLU::C::complex ret; - ret.r = Teuchos::as(t.real()); - ret.i = Teuchos::as(t.imag()); - return( ret ); - } -}; - - -template <> -class ValueTypeConversionTraits> -{ -public: - static SLU::Z::doublecomplex convert( const std::complex t ) - { - SLU::Z::doublecomplex ret; - ret.r = Teuchos::as(t.real()); - ret.i = Teuchos::as(t.imag()); - return( ret ); - } - - static SLU::Z::doublecomplex safeConvert( const std::complex t ) - { - SLU::Z::doublecomplex ret; - ret.r = Teuchos::as(t.real()); - ret.i = Teuchos::as(t.imag()); - return( ret ); - } -}; - -template <> -class ValueTypeConversionTraits> -{ -public: - static SLU::Z::doublecomplex convert( const std::complex t ) - { - SLU::Z::doublecomplex ret; - ret.r = Teuchos::as(t.real()); - ret.i = Teuchos::as(t.imag()); - return( ret ); - } - - static SLU::Z::doublecomplex safeConvert( const std::complex t ) - { - SLU::Z::doublecomplex ret; - ret.r = Teuchos::as(t.real()); - ret.i = Teuchos::as(t.imag()); - return( ret ); - } -}; - - -// Also convert from SLU types - -template <> -class ValueTypeConversionTraits, SLU::C::complex> -{ -public: - static std::complex convert( const SLU::C::complex t ) - { - typedef typename std::complex::value_type value_type; - value_type ret_r = Teuchos::as( t.r ); - value_type ret_i = Teuchos::as( t.i ); - return ( std::complex( ret_r, ret_i ) ); - } - - // No special checks for safe Convert - static std::complex safeConvert( const SLU::C::complex t ) - { - typedef typename std::complex::value_type value_type; - value_type ret_r = Teuchos::as( t.r ); - value_type ret_i = Teuchos::as( t.i ); - return ( std::complex( ret_r, ret_i ) ); - } -}; - -template <> -class ValueTypeConversionTraits, SLU::C::complex> -{ -public: - static std::complex convert( const SLU::C::complex t ) - { - typedef typename std::complex::value_type value_type; - value_type ret_r = Teuchos::as( t.r ); - value_type ret_i = Teuchos::as( t.i ); - return ( std::complex( ret_r, ret_i ) ); - } - - // No special checks for safe Convert - static std::complex safeConvert( const SLU::C::complex t ) - { - typedef typename std::complex::value_type value_type; - value_type ret_r = Teuchos::as( t.r ); - value_type ret_i = Teuchos::as( t.i ); - return ( std::complex( ret_r, ret_i ) ); - } -}; - - -template <> -class ValueTypeConversionTraits, SLU::Z::doublecomplex> -{ -public: - static std::complex convert( const SLU::Z::doublecomplex t ) - { - typedef typename std::complex::value_type value_type; - value_type ret_r = Teuchos::as( t.r ); - value_type ret_i = Teuchos::as( t.i ); - return ( std::complex( ret_r, ret_i ) ); - } - - // No special checks for safe Convert - static std::complex safeConvert( const SLU::Z::doublecomplex t ) - { - typedef typename std::complex::value_type value_type; - value_type ret_r = Teuchos::as( t.r ); - value_type ret_i = Teuchos::as( t.i ); - return ( std::complex( ret_r, ret_i ) ); - } -}; - -template <> -class ValueTypeConversionTraits, SLU::Z::doublecomplex> -{ -public: - static std::complex convert( const SLU::Z::doublecomplex t ) - { - typedef typename std::complex::value_type value_type; - value_type ret_r = Teuchos::as( t.r ); - value_type ret_i = Teuchos::as( t.i ); - return ( std::complex( ret_r, ret_i ) ); - } - - // No special checks for safe Convert - static std::complex safeConvert( const SLU::Z::doublecomplex t ) - { - typedef typename std::complex::value_type value_type; - value_type ret_r = Teuchos::as( t.r ); - value_type ret_i = Teuchos::as( t.i ); - return ( std::complex( ret_r, ret_i ) ); - } -}; - -template -class SerializationTraits - : public DirectSerializationTraits -{}; - -template -class SerializationTraits - : public DirectSerializationTraits -{}; - -//@} End Conversion group - -} // end namespace Teuchos - -// C++-style output functions for Superlu complex types -namespace std { - ostream& operator<<(ostream& out, const SLU::Z::doublecomplex z); - - ostream& operator<<(ostream& out, const SLU::C::complex c); -} - -#endif // HAVE_TEUCHOS_COMPLEX - namespace Amesos2 { @@ -400,7 +110,7 @@ template <> struct TypeMap > { static SLU::Dtype_t dtype; - typedef SLU::C::complex type; + typedef Kokkos::complex type; typedef float magnitude_type; }; @@ -409,25 +119,25 @@ template <> struct TypeMap > { static SLU::Dtype_t dtype; - typedef SLU::Z::doublecomplex type; + typedef Kokkos::complex type; typedef double magnitude_type; }; template <> -struct TypeMap +struct TypeMap > { static SLU::Dtype_t dtype; - typedef SLU::C::complex type; + typedef Kokkos::complex type; typedef float magnitude_type; }; template <> -struct TypeMap +struct TypeMap > { static SLU::Dtype_t dtype; - typedef SLU::Z::doublecomplex type; + typedef Kokkos::complex type; typedef double magnitude_type; }; diff --git a/packages/amesos2/src/Amesos2_Superlu_decl.hpp b/packages/amesos2/src/Amesos2_Superlu_decl.hpp index c21b23b6ea1a..fecdabbfcdd2 100644 --- a/packages/amesos2/src/Amesos2_Superlu_decl.hpp +++ b/packages/amesos2/src/Amesos2_Superlu_decl.hpp @@ -232,6 +232,7 @@ class Superlu : public SolverCore */ bool loadA_impl(EPhase current_phase); + typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; // struct holds all data necessary to make a superlu factorization or solve call mutable struct SLUData { @@ -245,13 +246,17 @@ class Superlu : public SolverCore #endif SLU::SuperLUStat_t stat; - Teuchos::Array berr; ///< backward error bounds - Teuchos::Array ferr; ///< forward error bounds - Teuchos::Array perm_r; - Teuchos::Array perm_c; - Teuchos::Array etree; - Teuchos::Array R; - Teuchos::Array C; + + + typedef Kokkos::View host_mag_array; + typedef Kokkos::View host_int_array; + host_mag_array berr; ///< backward error bounds + host_mag_array ferr; ///< forward error bounds + host_int_array perm_r; + host_int_array perm_c; + host_int_array etree; + host_mag_array R; + host_mag_array C; char equed; bool rowequ, colequ; // flags what type of equilibration @@ -261,18 +266,28 @@ class Superlu : public SolverCore int panel_size; } data_; + typedef Kokkos::View host_size_type_array; + typedef Kokkos::View host_ordinal_type_array; + typedef Kokkos::View host_value_type_array; + // The following Arrays are persisting storage arrays for A, X, and B /// Stores the values of the nonzero entries for SuperLU - Teuchos::Array nzvals_; + host_value_type_array host_nzvals_view_; /// Stores the location in \c Ai_ and Aval_ that starts row j - Teuchos::Array rowind_; + host_size_type_array host_rows_view_; /// Stores the row indices of the nonzero entries - Teuchos::Array colptr_; + host_ordinal_type_array host_col_ptr_view_; + + typedef typename Kokkos::View + host_solve_array_t; /// Persisting 1D store for X - Teuchos::Array xvals_; int ldx_; + mutable host_solve_array_t xValues_; + int ldx_; + /// Persisting 1D store for B - Teuchos::Array bvals_; int ldb_; + mutable host_solve_array_t bValues_; + int ldb_; /* Note: In the above, must use "Amesos2::Superlu" rather than * "Superlu" because otherwise the compiler references the @@ -312,17 +327,21 @@ class Superlu : public SolverCore template <> struct solver_traits { #ifdef HAVE_TEUCHOS_COMPLEX - typedef Meta::make_list6, - std::complex, - SLU::C::complex, - SLU::Z::doublecomplex> supported_scalars; + typedef Meta::make_list6, std::complex, + Kokkos::complex, Kokkos::complex> + supported_scalars; #else typedef Meta::make_list2 supported_scalars; #endif }; +template +struct solver_supports_matrix> { + static const bool value = true; +}; + } // end namespace Amesos2 #endif // AMESOS2_SUPERLU_DECL_HPP diff --git a/packages/amesos2/src/Amesos2_Superlu_def.hpp b/packages/amesos2/src/Amesos2_Superlu_def.hpp index 3beab666fee7..a8d6faee2527 100644 --- a/packages/amesos2/src/Amesos2_Superlu_def.hpp +++ b/packages/amesos2/src/Amesos2_Superlu_def.hpp @@ -69,9 +69,6 @@ Superlu::Superlu( Teuchos::RCP X, Teuchos::RCP B ) : SolverCore(A, X, B) - , nzvals_() // initialize to empty arrays - , rowind_() - , colptr_() , is_contiguous_(true) { // ilu_set_default_options is called later in set parameter list if required. @@ -85,11 +82,11 @@ Superlu::Superlu( SLU::StatInit(&(data_.stat)); - data_.perm_r.resize(this->globalNumRows_); - data_.perm_c.resize(this->globalNumCols_); - data_.etree.resize(this->globalNumCols_); - data_.R.resize(this->globalNumRows_); - data_.C.resize(this->globalNumCols_); + Kokkos::resize(data_.perm_r, this->globalNumRows_); + Kokkos::resize(data_.perm_c, this->globalNumCols_); + Kokkos::resize(data_.etree, this->globalNumCols_); + Kokkos::resize(data_.R, this->globalNumRows_); + Kokkos::resize(data_.C, this->globalNumCols_); data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size @@ -199,7 +196,7 @@ Superlu::preOrdering_impl() Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_); #endif - SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.getRawPtr()); + SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data()); } return(0); @@ -263,8 +260,8 @@ Superlu::numericFactorization_impl() int info2 = 0; // calculate row and column scalings - function_map::gsequ(&(data_.A), data_.R.getRawPtr(), - data_.C.getRawPtr(), &rowcnd, &colcnd, + function_map::gsequ(&(data_.A), data_.R.data(), + data_.C.data(), &rowcnd, &colcnd, &amax, &info2); TEUCHOS_TEST_FOR_EXCEPTION (info2 < 0, std::runtime_error, @@ -295,8 +292,8 @@ Superlu::numericFactorization_impl() } // apply row and column scalings if necessary - function_map::laqgs(&(data_.A), data_.R.getRawPtr(), - data_.C.getRawPtr(), rowcnd, colcnd, + function_map::laqgs(&(data_.A), data_.R.data(), + data_.C.data(), rowcnd, colcnd, amax, &(data_.equed)); // // check what types of equilibration was actually done @@ -306,8 +303,8 @@ Superlu::numericFactorization_impl() // Apply the column permutation computed in preOrdering. Place the // column-permuted matrix in AC - SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.getRawPtr(), - data_.etree.getRawPtr(), &(data_.AC)); + SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(), + data_.etree.data(), &(data_.AC)); { // Do factorization #ifdef HAVE_AMESOS2_TIMERS @@ -323,8 +320,8 @@ Superlu::numericFactorization_impl() if(ILU_Flag_==false) { function_map::gstrf(&(data_.options), &(data_.AC), - data_.relax, data_.panel_size, data_.etree.getRawPtr(), - NULL, 0, data_.perm_c.getRawPtr(), data_.perm_r.getRawPtr(), + data_.relax, data_.panel_size, data_.etree.data(), + NULL, 0, data_.perm_c.data(), data_.perm_r.data(), &(data_.L), &(data_.U), #ifdef HAVE_AMESOS2_SUPERLU5_API &(data_.lu), @@ -333,8 +330,8 @@ Superlu::numericFactorization_impl() } else { function_map::gsitrf(&(data_.options), &(data_.AC), - data_.relax, data_.panel_size, data_.etree.getRawPtr(), - NULL, 0, data_.perm_c.getRawPtr(), data_.perm_r.getRawPtr(), + data_.relax, data_.panel_size, data_.etree.data(), + NULL, 0, data_.perm_c.data(), data_.perm_r.data(), &(data_.L), &(data_.U), #ifdef HAVE_AMESOS2_SUPERLU5_API &(data_.lu), @@ -379,35 +376,46 @@ Superlu::solve_impl(const Teuchos::Ptr > const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0; const size_t nrhs = X->getGlobalNumVectors(); - const size_t val_store_size = as(ld_rhs * nrhs); - Teuchos::Array xValues(val_store_size); - Teuchos::Array bValues(val_store_size); - { // Get values from RHS B #ifdef HAVE_AMESOS2_TIMERS Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_); Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ ); #endif - if ( is_contiguous_ == true ) { - Util::get_1d_copy_helper, - slu_type>::do_get(B, bValues(), - as(ld_rhs), - ROOTED, this->rowIndexBase_); - } - else { - Util::get_1d_copy_helper, - slu_type>::do_get(B, bValues(), - as(ld_rhs), - CONTIGUOUS_AND_ROOTED, this->rowIndexBase_); - } + + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(B, bValues_, + as(ld_rhs), + (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, + this->rowIndexBase_); + + // In general we may want to write directly to the x space without a copy. + // So we 'get' x which may be a direct view assignment to the MV. + Util::get_1d_copy_helper_kokkos_view, + host_solve_array_t>::do_get(X, xValues_, + as(ld_rhs), + (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, + this->rowIndexBase_); + } + + // If equilibration was applied at numeric, then gssvx and gsisx are going to + // modify B, so we can't use the optimized assignment to B since we'll change + // the source test vector and then fail the subsequent cycle. We need a copy. + // TODO: If above get_1d_copy_helper_kokkos_view already copied then we can + // skip this. Generally need an API which tells us what happened internally + // in above get_1d_copy_helper_kokkos_view - whether is was copy or assign. + if(data_.equed != 'N') { + host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"), + bValues_.extent(0), bValues_.extent(1)); + Kokkos::deep_copy(copyB, bValues_); + bValues_ = copyB; } int ierr = 0; // returned error code magnitude_type rpg, rcond; if ( this->root_ ) { - data_.ferr.resize(nrhs); - data_.berr.resize(nrhs); + Kokkos::resize(data_.ferr, nrhs); + Kokkos::resize(data_.berr, nrhs); { #ifdef HAVE_AMESOS2_TIMERS @@ -416,10 +424,10 @@ Superlu::solve_impl(const Teuchos::Ptr > SLU::Dtype_t dtype = type_map::dtype; int i_ld_rhs = as(ld_rhs); function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as(nrhs), - bValues.getRawPtr(), i_ld_rhs, + bValues_.data(), i_ld_rhs, SLU::SLU_DN, dtype, SLU::SLU_GE); function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as(nrhs), - xValues.getRawPtr(), i_ld_rhs, + xValues_.data(), i_ld_rhs, SLU::SLU_DN, dtype, SLU::SLU_GE); } @@ -433,11 +441,11 @@ Superlu::solve_impl(const Teuchos::Ptr > if(ILU_Flag_==false) { function_map::gssvx(&(data_.options), &(data_.A), - data_.perm_c.getRawPtr(), data_.perm_r.getRawPtr(), - data_.etree.getRawPtr(), &(data_.equed), data_.R.getRawPtr(), - data_.C.getRawPtr(), &(data_.L), &(data_.U), NULL, 0, &(data_.B), - &(data_.X), &rpg, &rcond, data_.ferr.getRawPtr(), - data_.berr.getRawPtr(), + data_.perm_c.data(), data_.perm_r.data(), + data_.etree.data(), &(data_.equed), data_.R.data(), + data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B), + &(data_.X), &rpg, &rcond, data_.ferr.data(), + data_.berr.data(), #ifdef HAVE_AMESOS2_SUPERLU5_API &(data_.lu), #endif @@ -445,9 +453,9 @@ Superlu::solve_impl(const Teuchos::Ptr > } else { function_map::gsisx(&(data_.options), &(data_.A), - data_.perm_c.getRawPtr(), data_.perm_r.getRawPtr(), - data_.etree.getRawPtr(), &(data_.equed), data_.R.getRawPtr(), - data_.C.getRawPtr(), &(data_.L), &(data_.U), NULL, 0, &(data_.B), + data_.perm_c.data(), data_.perm_r.data(), + data_.etree.data(), &(data_.equed), data_.R.data(), + data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B), &(data_.X), &rpg, &rcond, #ifdef HAVE_AMESOS2_SUPERLU5_API &(data_.lu), @@ -485,18 +493,11 @@ Superlu::solve_impl(const Teuchos::Ptr > Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); #endif - if ( is_contiguous_ == true ) { - Util::put_1d_data_helper< - MultiVecAdapter,slu_type>::do_put(X, xValues(), - as(ld_rhs), - ROOTED, this->rowIndexBase_); - } - else { - Util::put_1d_data_helper< - MultiVecAdapter,slu_type>::do_put(X, xValues(), - as(ld_rhs), - CONTIGUOUS_AND_ROOTED, this->rowIndexBase_); - } + Util::put_1d_data_helper_kokkos_view< + MultiVecAdapter,host_solve_array_t>::do_put(X, xValues_, + as(ld_rhs), + (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED, + this->rowIndexBase_); } @@ -747,9 +748,9 @@ Superlu::loadA_impl(EPhase current_phase) // Only the root image needs storage allocated if( this->root_ ){ - nzvals_.resize(this->globalNumNonZeros_); - rowind_.resize(this->globalNumNonZeros_); - colptr_.resize(this->globalNumCols_ + 1); + Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_); + Kokkos::resize(host_rows_view_, this->globalNumNonZeros_); + Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1); } int nnz_ret = 0; @@ -763,18 +764,20 @@ Superlu::loadA_impl(EPhase current_phase) "Row and column maps have different indexbase "); if ( is_contiguous_ == true ) { - Util::get_ccs_helper< - MatrixAdapter,slu_type,int,int>::do_get(this->matrixA_.ptr(), - nzvals_(), rowind_(), - colptr_(), nnz_ret, ROOTED, + Util::get_ccs_helper_kokkos_view< + MatrixAdapter,host_value_type_array,host_ordinal_type_array, + host_size_type_array>::do_get(this->matrixA_.ptr(), + host_nzvals_view_, host_rows_view_, + host_col_ptr_view_, nnz_ret, ROOTED, ARBITRARY, this->rowIndexBase_); } else { - Util::get_ccs_helper< - MatrixAdapter,slu_type,int,int>::do_get(this->matrixA_.ptr(), - nzvals_(), rowind_(), - colptr_(), nnz_ret, CONTIGUOUS_AND_ROOTED, + Util::get_ccs_helper_kokkos_view< + MatrixAdapter,host_value_type_array,host_ordinal_type_array, + host_size_type_array>::do_get(this->matrixA_.ptr(), + host_nzvals_view_, host_rows_view_, + host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED, ARBITRARY, this->rowIndexBase_); } @@ -791,9 +794,9 @@ Superlu::loadA_impl(EPhase current_phase) function_map::create_CompCol_Matrix( &(data_.A), this->globalNumRows_, this->globalNumCols_, nnz_ret, - nzvals_.getRawPtr(), - rowind_.getRawPtr(), - colptr_.getRawPtr(), + host_nzvals_view_.data(), + host_rows_view_.data(), + host_col_ptr_view_.data(), SLU::SLU_NC, dtype, SLU::SLU_GE); } diff --git a/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp index dd3bd3c98d9d..13f68698ec4e 100644 --- a/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraMultiVecAdapter_def.hpp @@ -302,6 +302,11 @@ namespace Amesos2 { redist_mv.doExport (*mv_, *exporter_, Tpetra::REPLACE); if ( distribution != CONTIGUOUS_AND_ROOTED ) { + // Do this if GIDs contiguous - existing functionality + // Copy the imported (multi)vector's data into the Kokkos View. + deep_copy_or_assign_view(kokkos_view, redist_mv.getLocalViewDevice()); + } + else { if(redist_mv.isConstantStride()) { redist_mv.sync_device(); // no testing of this right now - since UVM on deep_copy_or_assign_view(kokkos_view, redist_mv.getLocalViewDevice()); @@ -310,9 +315,6 @@ namespace Amesos2 { TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Kokkos adapter non-constant stride not imlemented."); } } - else { - TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Kokkos adapter CONTIGUOUS_AND_ROOTED path not implemented for get1dCopy_kokkos_view()."); - } } } @@ -564,7 +566,36 @@ namespace Amesos2 { mv_->doImport (source_mv, *importer_, Tpetra::REPLACE); } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Kokkos adapter CONTIGUOUS_AND_ROOTED not implemented for put1dData_kokkos_view."); + multivec_t redist_mv (srcMap, num_vecs); // unused for ROOTED case + typedef typename multivec_t::dual_view_type dual_view_type; + typedef typename dual_view_type::host_mirror_space host_execution_space; + redist_mv.template modify< host_execution_space > (); + + // Cuda solvers won't currently use this path since they are just serial + // right now, so this mirror should be harmless (and not strictly necessary). + // Adding it for future possibilities though we may then refactor this + // for better efficiency if the kokkos_new_data view is on device. + auto host_kokkos_new_data = Kokkos::create_mirror_view(kokkos_new_data); + Kokkos::deep_copy(host_kokkos_new_data, kokkos_new_data); + if ( redist_mv.isConstantStride() ) { + auto contig_local_view_2d = redist_mv.template getLocalView(); + for ( size_t j = 0; j < num_vecs; ++j) { + auto av_j = Kokkos::subview(host_kokkos_new_data, Kokkos::ALL, j); + for ( size_t i = 0; i < lda; ++i ) { + contig_local_view_2d(i,j) = av_j(i); + } + } + } + else { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Kokkos adapter " + "CONTIGUOUS_AND_ROOTED not implemented for put1dData_kokkos_view " + "with non constant stride."); + } + + typedef typename multivec_t::node_type::memory_space memory_space; + redist_mv.template sync (); + + mv_->doImport (redist_mv, *importer_, Tpetra::REPLACE); } } diff --git a/packages/amesos2/test/solvers/superlu_test.xml b/packages/amesos2/test/solvers/superlu_test.xml index 80b3544f4c55..cfc4b7d9ccb9 100644 --- a/packages/amesos2/test/solvers/superlu_test.xml +++ b/packages/amesos2/test/solvers/superlu_test.xml @@ -28,13 +28,13 @@ - + - + @@ -43,35 +43,60 @@ - + - - - - - - - - - - - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -89,31 +114,58 @@ - - - - - - - - + - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From b9ae885e4ed2b5a3dcb9fbfcdb0f99c59e991b65 Mon Sep 17 00:00:00 2001 From: micheldemessieres Date: Sun, 26 Apr 2020 17:12:46 -0400 Subject: [PATCH 44/86] Amesos2: Clean up loading for Kokkos adapter No longer loads via Tpetra and converts to Kokkos. Just load directly via Kokkos calls. Only loads data for the root rank and empty for others. This mimics the behavior you would get for Tpetra and something like SuperLU if the test runs on more than 1 rank. For Tpetra we load the matrix distributed but then the adapter collects it all to the root rank and the others are empty. For kokkos just load everything initially on the root rank and leave the others empty, This fixes the crash that would occur if you tried to run SuperLU on multiple ranks. Also resolves refactor setup was not getting randomized for any of the tests.. --- packages/amesos2/test/solvers/Solver_Test.cpp | 257 +++++------------- 1 file changed, 63 insertions(+), 194 deletions(-) diff --git a/packages/amesos2/test/solvers/Solver_Test.cpp b/packages/amesos2/test/solvers/Solver_Test.cpp index a4077d5f525d..d4deab60cef4 100644 --- a/packages/amesos2/test/solvers/Solver_Test.cpp +++ b/packages/amesos2/test/solvers/Solver_Test.cpp @@ -52,6 +52,9 @@ */ #include "KokkosBlas.hpp" +#include "Kokkos_Random.hpp" +#include "KokkosSparse_spmv.hpp" +#include "KokkosKernels_IOUtils.hpp" #include #include @@ -847,6 +850,7 @@ bool do_epetra_test(const string& mm_file, } A2->ReplaceMyValues(0, l_fst_row_nnz, values.getRawPtr(), indices.getRawPtr()); + x2->Random(); A2->Multiply(transpose, *x2, *b2); } // else A2 is never read @@ -1031,6 +1035,7 @@ bool do_tpetra_test_with_types(const string& mm_file, A2->replaceLocalValues (0, indices, values); A2->fillComplete (A->getDomainMap (), A->getRangeMap ()); + x2->randomize(); A2->apply (*x2, *b2, trans); } // else A2 is never read @@ -1417,218 +1422,84 @@ bool test_tpetra(const string& mm_file, template + typename Node> bool do_kokkos_test_with_types(const string& mm_file, const string& solver_name, ParameterList solve_params) { - // Here I am using Tpetra as a helper to load and make solutions. - typedef DefaultNode TpetraNode; - - using Tpetra::CrsMatrix; - using Tpetra::MultiVector; - using Teuchos::Comm; - using Teuchos::ScalarTraits; using std::endl; using std::flush; - typedef Kokkos::Device device_t; + typedef typename Node::execution_space execution_space; + typedef typename Node::memory_space memory_space; + typedef Kokkos::Device device_t; typedef KokkosSparse::CrsMatrix MAT; typedef Kokkos::View view_t; - const size_t numVecs = 5; // arbitrary number - const size_t numRHS = 5; // also arbitrary + RCP > comm = Tpetra::getDefaultComm(); - bool transpose = solve_params.get("Transpose", false); + // Kokkos adapter doesn't support the distributed modes. + // We just load to the root rank which allows something like SuperLU to + // run the kokkos tests even if the test is set to more than 1 rank. + bool bEmptyLoad = (comm->getRank() != 0); - RCP > comm = Tpetra::getDefaultComm(); + bool transpose = solve_params.get("Transpose", false); if (verbosity > 2) { *fos << endl << " Reading matrix from " << mm_file << " ... " << flush; } std::string path = filedir + mm_file; - // not sure about the loading schemes for kokkos - just use Tpetra right now - // and load it into the Kokkos CrsMatrix - typedef Tpetra::Map<>::global_ordinal_type TpetraGO; - typedef Tpetra::CrsMatrix tpetra_crsmatrix_t; - RCP tpetraM = - Tpetra::MatrixMarket::Reader::readSparseFile (path, comm); - - Teuchos::ArrayRCP rowPointers; - Teuchos::ArrayRCP columnIndices; - Teuchos::ArrayRCP values; - tpetraM->getAllValues(rowPointers, columnIndices, values); - - // convert Tpetra size_t row ptrs to kokkos crsmatrix LO type - Teuchos::ArrayRCP kokkosRowPointers = Teuchos::arcp(new LocalOrdinal[rowPointers.size()], 0, rowPointers.size()); - for(int n = 0; n < rowPointers.size(); ++n) { - kokkosRowPointers[n] = Teuchos::as(rowPointers[n]); - } + RCP A = rcp(new MAT); - // convert Tpetra values to kokkos - for complex this will be converting - // std::complex to Kokkos::complex - Teuchos::ArrayRCP kokkosValues = Teuchos::arcp(new Scalar[values.size()], 0, values.size()); - for(int n = 0; n < values.size(); ++n) { - kokkosValues[n] = Teuchos::as(values[n]); - } + const size_t numVecs = bEmptyLoad ? 0 : 5; // arbitrary number + const size_t numRHS = 5; // also arbitrary - auto num_rows = tpetraM->getNodeNumRows(); - auto num_cols = tpetraM->getNodeNumCols(); - - // Kokkos CrsMatrix builds with non const ptrs while Tpetra loads into const - Teuchos::ArrayRCP non_const_values = Teuchos::arcp_const_cast(kokkosValues); - Teuchos::ArrayRCP non_const_kokkosRowPointers = Teuchos::arcp_const_cast(kokkosRowPointers); - Teuchos::ArrayRCP non_const_columnIndices = Teuchos::arcp_const_cast(columnIndices); - RCP A = rcp(new MAT("Kokkos CrsMatrix", - num_rows, - num_cols, - tpetraM->getGlobalNumEntries(), - non_const_values.getRawPtr(), - non_const_kokkosRowPointers.getRawPtr(), - non_const_columnIndices.getRawPtr())); + if(!bEmptyLoad) { + *A = KokkosKernels::Impl::read_kokkos_crst_matrix(path.c_str()); + } if (verbosity > 2) { *fos << endl << " Creating right-hand side and solution vectors" << endl; } - ETransp trans = transpose ? CONJ_TRANS : NO_TRANS; - typedef Tpetra::MultiVector MV; - RCP > dmnmap = tpetraM->getDomainMap(); - RCP > rngmap = tpetraM->getRangeMap(); - Array > xMV(numRHS); - Array > bMV(numRHS); - for( size_t i = 0; i < numRHS; ++i ){ - if( transpose ){ - xMV[i] = rcp(new MV(dmnmap,numVecs)); - bMV[i] = rcp(new MV(rngmap,numVecs)); - } else { - xMV[i] = rcp(new MV(rngmap,numVecs)); - bMV[i] = rcp(new MV(dmnmap,numVecs)); - } - std::ostringstream xlabel, blabel; - xlabel << "x[" << i << "]"; - blabel << "b[" << i << "]"; - xMV[i]->setObjectLabel(xlabel.str()); - bMV[i]->setObjectLabel(blabel.str()); + auto num_rows = bEmptyLoad ? 0 : A->graph.numRows(); - xMV[i]->randomize(); - tpetraM->apply(*xMV[i], *bMV[i], trans); - } + RCP A2 = rcp(new MAT); + RCP Xhat = rcp(new view_t(Kokkos::ViewAllocateWithoutInitializing("Xhat"), num_rows, numVecs)); Array> x(numRHS); Array> b(numRHS); - for( size_t i = 0; i < numRHS; ++i ){ - std::ostringstream xlabel, blabel; - xlabel << "x[" << i << "]"; - blabel << "b[" << i << "]"; - if( transpose ){ - x[i] = rcp(new view_t(xlabel.str(), num_cols, numRHS)); - b[i] = rcp(new view_t(blabel.str(), num_cols, numRHS)); - } else { - x[i] = rcp(new view_t(xlabel.str(), num_rows, numRHS)); - b[i] = rcp(new view_t(blabel.str(), num_rows, numRHS)); - } - - // MDM Right now I'm employing the Tpetra version to generate the random - // values. But probably should make a pure kokkos version though I'd like - // them all to be the same. - RCP xMV, bMV; - if( transpose ){ - xMV = rcp(new MV(rngmap,numVecs)); - bMV = rcp(new MV(dmnmap,numVecs)); - } - else { - xMV = rcp(new MV(dmnmap,numVecs)); - bMV = rcp(new MV(rngmap,numVecs)); - } - xMV->randomize(); - tpetraM->apply(*xMV, *bMV, trans); - - Kokkos::deep_copy(*x[i], xMV->getLocalViewHost()); - Kokkos::deep_copy(*b[i], bMV->getLocalViewHost()); - } - - RCP temp_tpetraM = - Tpetra::MatrixMarket::Reader::readSparseFile (path, comm); - RCP A2; - RCP Xhat, x2, b2; - - if (transpose) { - Xhat = rcp(new view_t("Xhat", num_rows, numVecs)); - if (refactor) { - x2 = rcp(new view_t("x2", num_rows, numVecs)); - b2 = rcp(new view_t("b2", num_cols, numVecs)); - } - } else { - Xhat = rcp(new view_t("Xhat", num_cols, numVecs)); - if (refactor) { - x2 = rcp(new view_t("x2", num_cols, numVecs)); - b2 = rcp(new view_t ("b2", num_rows, numVecs)); + uint64_t random_seed = 28713; + Kokkos::Random_XorShift64_Pool random(random_seed); + for( size_t i = 0; i < numRHS; ++i ){ + x[i] = rcp(new view_t(Kokkos::ViewAllocateWithoutInitializing("x"), num_rows, numVecs)); + b[i] = rcp(new view_t(Kokkos::ViewAllocateWithoutInitializing("b"), num_rows, numVecs)); + if(!bEmptyLoad) { + Kokkos::fill_random(*x[i], random, Scalar(1)); + KokkosSparse::spmv(transpose?"T":"N", Scalar(1.0), *A, *x[i], Scalar(0.0), *b[i]); } } + RCP x2, b2; if (refactor) { if (verbosity > 2) { *fos << endl << " Creating near-copy of matrix for refactor test" << endl; } - - RCP tpetraM2 = - Tpetra::MatrixMarket::Reader::readSparseFile (path, comm); - - // perturb the values just a bit (element-wise square of first row) - size_t l_fst_row_nnz = tpetraM2->getNumEntriesInLocalRow(0); - Array indices(l_fst_row_nnz); - Array values(l_fst_row_nnz); - tpetraM2->getLocalRowCopy(0, indices, values, l_fst_row_nnz); - for( size_t i = 0; i < l_fst_row_nnz; ++i ){ - values[i] = values[i] * values[i]; - } - tpetraM2->resumeFill (); - tpetraM2->replaceLocalValues (0, indices, values); - tpetraM2->fillComplete (tpetraM->getDomainMap (), tpetraM->getRangeMap ()); - - // Get Tpetra ahain - Teuchos::ArrayRCP rowPointers2; - Teuchos::ArrayRCP columnIndices2; - Teuchos::ArrayRCP values2; - tpetraM2->getAllValues(rowPointers2, columnIndices2, values2); - - Teuchos::ArrayRCP kokkosRowPointers2 = Teuchos::arcp(new LocalOrdinal[rowPointers2.size()], 0, rowPointers2.size()); - for(int n = 0; n < rowPointers2.size(); ++n) { - kokkosRowPointers2[n] = Teuchos::as(rowPointers2[n]); + x2 = rcp(new view_t(Kokkos::ViewAllocateWithoutInitializing("x2"), num_rows, numVecs)); + b2 = rcp(new view_t(Kokkos::ViewAllocateWithoutInitializing("b2"), num_rows, numVecs)); + if(!bEmptyLoad) { + *A2 = KokkosKernels::Impl::read_kokkos_crst_matrix(path.c_str()); + auto vals = A2->values; // don't use RCP in kernel + // perturb the values just a bit (element-wise square of first row) + Kokkos::RangePolicy policy(0, vals.size()); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(size_t i) { + vals(i) = vals(i) * vals(i); + }); + Kokkos::fill_random(*x2, random, Scalar(1)); + KokkosSparse::spmv(transpose?"T":"N", Scalar(1.0), *A2, *x2, Scalar(0.0), *b2); } - - auto num_rows2 = tpetraM2->getNodeNumRows(); - auto num_cols2 = tpetraM2->getNodeNumCols(); - - A2 = rcp(new MAT("Kokkos CrsMatrix 2", - num_rows2, - num_cols2, - tpetraM2->getGlobalNumEntries(), - (Scalar*)values2.getRawPtr(), - (LocalOrdinal*)kokkosRowPointers2.getRawPtr(), - (LocalOrdinal*)columnIndices2.getRawPtr())); - - RCP x2MV, b2MV; - if (transpose) { - if (refactor) { - x2MV = rcp(new MV(dmnmap,numVecs)); - b2MV = rcp(new MV(rngmap,numVecs)); - } - } else { - if (refactor) { - x2MV = rcp(new MV(rngmap,numVecs)); - b2MV = rcp(new MV(dmnmap,numVecs)); - } - } - - tpetraM2->apply (*x2MV, *b2MV, trans); - - Kokkos::deep_copy(*x2, x2MV->getLocalViewHost()); - Kokkos::deep_copy(*b2, b2MV->getLocalViewHost()); } // else A2 is never read return do_solve_routine(solver_name, A, A2, @@ -1715,7 +1586,7 @@ bool test_kokkos(const string& mm_file, bool test_done = false; -#define AMESOS2_SOLVER_KOKKOS_TEST(S,LO,N,TpetraScalar) \ +#define AMESOS2_SOLVER_KOKKOS_TEST(S,LO,N) \ test_done = true; \ if (verbosity > 1) { \ *fos << std::endl \ @@ -1726,7 +1597,7 @@ bool test_kokkos(const string& mm_file, << std::endl; \ } \ bool run_success = \ - do_kokkos_test_with_types \ + do_kokkos_test_with_types \ (mm_file,solver_name, solve_params_copy); \ if (verbosity > 1) { \ if (!run_success) \ @@ -1748,12 +1619,12 @@ bool test_kokkos(const string& mm_file, if( scalar == "float" ) { #ifdef HAVE_TPETRA_INST_FLOAT// Because of Tpetra maps this is currently needed for Kokkos adapter if( node == "default" ) { - AMESOS2_SOLVER_KOKKOS_TEST(float,int,DefaultNode,float); + AMESOS2_SOLVER_KOKKOS_TEST(float,int,DefaultNode); } else if( node == "serial" ) { #ifdef KOKKOS_ENABLE_SERIAL *fos << "KokkosSerialWrapperNode float "; - AMESOS2_SOLVER_KOKKOS_TEST(float,int,Kokkos::Serial,float); + AMESOS2_SOLVER_KOKKOS_TEST(float,int,Kokkos::Serial); #else *fos << "node=serial was not enabled at configure time" << std::endl; #endif @@ -1761,7 +1632,7 @@ bool test_kokkos(const string& mm_file, else if( node == "cuda" ) { #ifdef KOKKOS_ENABLE_CUDA *fos << "KokkosCudaWrapperNode float "; - AMESOS2_SOLVER_KOKKOS_TEST(float,int,Kokkos::Cuda,float); + AMESOS2_SOLVER_KOKKOS_TEST(float,int,Kokkos::Cuda); #else *fos << "node=cuda was not enabled at configure time" << std::endl; #endif @@ -1769,7 +1640,7 @@ bool test_kokkos(const string& mm_file, else if( node == "cudauvmoff" ) { #ifdef KOKKOS_ENABLE_CUDA *fos << "KokkosCudaUVMOffWrapperNode float "; - AMESOS2_SOLVER_KOKKOS_TEST(float,int,uvm_off_node_t,float); + AMESOS2_SOLVER_KOKKOS_TEST(float,int,uvm_off_node_t); #else *fos << "node=cudauvmoff was not enabled at configure time" << std::endl; #endif @@ -1781,12 +1652,12 @@ bool test_kokkos(const string& mm_file, else if( scalar == "double" ) { #ifdef HAVE_TPETRA_INST_DOUBLE // Because of Tpetra maps this is currently needed for Kokkos adapter if( node == "default" ) { - AMESOS2_SOLVER_KOKKOS_TEST(double,int,DefaultNode,double); + AMESOS2_SOLVER_KOKKOS_TEST(double,int,DefaultNode); } else if( node == "serial" ) { #ifdef KOKKOS_ENABLE_SERIAL *fos << "KokkosSerialWrapperNode double "; - AMESOS2_SOLVER_KOKKOS_TEST(double,int,Kokkos::Serial,double); + AMESOS2_SOLVER_KOKKOS_TEST(double,int,Kokkos::Serial); #else *fos << "node=serial was not enabled at configure time" << std::endl; #endif @@ -1794,7 +1665,7 @@ bool test_kokkos(const string& mm_file, else if( node == "cuda" ) { #ifdef KOKKOS_ENABLE_CUDA *fos << "KokkosCudaWrapperNode double "; - AMESOS2_SOLVER_KOKKOS_TEST(double,int,Kokkos::Cuda,double); + AMESOS2_SOLVER_KOKKOS_TEST(double,int,Kokkos::Cuda); #else *fos << "node=cuda was not enabled at configure time" << std::endl; #endif @@ -1802,7 +1673,7 @@ bool test_kokkos(const string& mm_file, else if( node == "cudauvmoff" ) { #ifdef KOKKOS_ENABLE_CUDA *fos << "KokkosCudaUVMOffWrapperNode double "; - AMESOS2_SOLVER_KOKKOS_TEST(double,int,uvm_off_node_t,double); + AMESOS2_SOLVER_KOKKOS_TEST(double,int,uvm_off_node_t); #else *fos << "node=cudauvmoff was not enabled at configure time" << std::endl; #endif @@ -1821,15 +1692,14 @@ bool test_kokkos(const string& mm_file, if( mag == "float" ){ #ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT typedef Kokkos::complex cmplx_float; - typedef std::complex tpetra_cmplx_float; if( lo == "int" ){ if( node == "default" ) { - AMESOS2_SOLVER_KOKKOS_TEST(cmplx_float,int,DefaultNode,tpetra_cmplx_float); + AMESOS2_SOLVER_KOKKOS_TEST(cmplx_float,int,DefaultNode); } else if( node == "serial" ) { #ifdef KOKKOS_ENABLE_SERIAL *fos << "KokkosSerialWrapperNode complex "; - AMESOS2_SOLVER_KOKKOS_TEST(cmplx_float,int,Kokkos::Serial,tpetra_cmplx_float); + AMESOS2_SOLVER_KOKKOS_TEST(cmplx_float,int,Kokkos::Serial); #else *fos << "node=serial was not enabled at configure time" << std::endl; #endif @@ -1837,7 +1707,7 @@ bool test_kokkos(const string& mm_file, else if( node == "cuda" ) { #ifdef KOKKOS_ENABLE_CUDA *fos << "KokkosCudaWrapperNode complex "; - AMESOS2_SOLVER_KOKKOS_TEST(cmplx_float,int,Kokkos::Cuda,tpetra_cmplx_float); + AMESOS2_SOLVER_KOKKOS_TEST(cmplx_float,int,Kokkos::Cuda); #else *fos << "node=cuda was not enabled at configure time" << std::endl; #endif @@ -1845,7 +1715,7 @@ bool test_kokkos(const string& mm_file, else if( node == "cudauvmoff" ) { #ifdef KOKKOS_ENABLE_CUDA *fos << "KokkosCudaUVMOffWrapperNode complex "; - AMESOS2_SOLVER_KOKKOS_TEST(cmplx_float,int,uvm_off_node_t,tpetra_cmplx_float); + AMESOS2_SOLVER_KOKKOS_TEST(cmplx_float,int,uvm_off_node_t); #else *fos << "node=cudauvmoff was not enabled at configure time" << std::endl; #endif @@ -1866,15 +1736,14 @@ bool test_kokkos(const string& mm_file, if( mag == "double" ){ #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE typedef Kokkos::complex cmplx_double; - typedef std::complex tpetra_cmplx_double; if( lo == "int" ){ if( node == "default" ) { - AMESOS2_SOLVER_KOKKOS_TEST(cmplx_double,int,DefaultNode,tpetra_cmplx_double); + AMESOS2_SOLVER_KOKKOS_TEST(cmplx_double,int,DefaultNode); } else if( node == "serial" ) { #ifdef KOKKOS_ENABLE_SERIAL *fos << "KokkosSerialWrapperNode complex "; - AMESOS2_SOLVER_KOKKOS_TEST(cmplx_double,int,Kokkos::Serial,tpetra_cmplx_double); + AMESOS2_SOLVER_KOKKOS_TEST(cmplx_double,int,Kokkos::Serial); #else *fos << "node=serial was not enabled at configure time" << std::endl; #endif @@ -1882,7 +1751,7 @@ bool test_kokkos(const string& mm_file, else if( node == "cuda" ) { #ifdef KOKKOS_ENABLE_CUDA *fos << "KokkosCudaWrapperNode complex "; - AMESOS2_SOLVER_KOKKOS_TEST(cmplx_double,int,Kokkos::Cuda,tpetra_cmplx_double); + AMESOS2_SOLVER_KOKKOS_TEST(cmplx_double,int,Kokkos::Cuda); #else *fos << "node=cuda was not enabled at configure time" << std::endl; #endif @@ -1890,7 +1759,7 @@ bool test_kokkos(const string& mm_file, else if( node == "cudauvmoff" ) { #ifdef KOKKOS_ENABLE_CUDA *fos << "KokkosCudaUVMOffWrapperNode complex "; - AMESOS2_SOLVER_KOKKOS_TEST(cmplx_double,int,uvm_off_node_t,tpetra_cmplx_double); + AMESOS2_SOLVER_KOKKOS_TEST(cmplx_double,int,uvm_off_node_t); #else *fos << "node=cudauvmoff was not enabled at configure time" << std::endl; #endif From 70e87244f64fa1f65e9005bf80b3bf331cbd0e6b Mon Sep 17 00:00:00 2001 From: Michel de Messieres Date: Thu, 30 Apr 2020 19:19:37 -0600 Subject: [PATCH 45/86] Amesos2: Fix cuSOLVER refactor Now that refactor is fixed to test a random vector, and not all 0's vector, this code needed to be fixed to properly update the sort for a modified matrix. --- packages/amesos2/src/Amesos2_Util.hpp | 93 +- .../amesos2/src/Amesos2_cuSOLVER_decl.hpp | 1 + packages/amesos2/src/Amesos2_cuSOLVER_def.hpp | 22 +- packages/amesos2/test/solvers/simple.cpp | 1928 +++++++++++++++++ 4 files changed, 2006 insertions(+), 38 deletions(-) create mode 100644 packages/amesos2/test/solvers/simple.cpp diff --git a/packages/amesos2/src/Amesos2_Util.hpp b/packages/amesos2/src/Amesos2_Util.hpp index c5f87b662075..e53c1173036a 100644 --- a/packages/amesos2/src/Amesos2_Util.hpp +++ b/packages/amesos2/src/Amesos2_Util.hpp @@ -1024,11 +1024,10 @@ namespace Amesos2 { } } - template + template void - reorder(values_view_t & values, row_ptr_view_t & row_ptr, cols_view_t & cols, - per_view_t & perm, per_view_t & peri) + reorder(row_ptr_view_t & row_ptr, cols_view_t & cols, + per_view_t & perm, per_view_t & peri, size_t & nnz) { #ifndef HAVE_AMESOS2_METIS TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, @@ -1080,36 +1079,24 @@ namespace Amesos2 { TEUCHOS_TEST_FOR_EXCEPTION(err != METIS_OK, std::runtime_error, "METIS_NodeND failed to sort matrix."); - // exec space to resort the matrix - // right now we're assuming all three vectors are in fact on the same - // memory space but this is not necessarily true for future use. Then - // we'll need to make the exec space an option to set and they'll all - // need to be mirrored into that space. Add a compilation failure here - // to make that clear to a future user. - typedef typename values_view_t::execution_space exec_space_t; - typedef typename cols_view_t::execution_space cols_exec_space_t; - typedef typename row_ptr_view_t::execution_space row_ptr_exec_space_t; - static_assert(std::is_same::value && - std::is_same::value, - "Amesos2 reorder method is currently assuming all three " - "matrix vectors are on the same memory space. If this " - "requirement is changing it will need some minor updates " - "to make it work with differing memory spaces."); - // put the permutations on our saved device ptrs // these will be used to permute x and b when we solve + typedef typename cols_view_t::execution_space exec_space_t; auto device_perm = Kokkos::create_mirror_view(exec_space_t(), host_perm); auto device_peri = Kokkos::create_mirror_view(exec_space_t(), host_peri); deep_copy(device_perm, host_perm); deep_copy(device_peri, host_peri); + // also set the permutation which may need to convert the type from + // metis to the native ordinal_type + deep_copy_or_assign_view(perm, device_perm); + deep_copy_or_assign_view(peri, device_peri); + // we'll permute matrix on device to a new set of arrays row_ptr_view_t new_row_ptr( Kokkos::ViewAllocateWithoutInitializing("new_row_ptr"), row_ptr.size()); cols_view_t new_cols( Kokkos::ViewAllocateWithoutInitializing("new_cols"), cols.size() - new_nnz/2); - values_view_t new_values( - Kokkos::ViewAllocateWithoutInitializing("new_values"), values.size() - new_nnz/2); // permute row indices Kokkos::RangePolicy policy_row(0, row_ptr.size()); @@ -1143,7 +1130,6 @@ namespace Amesos2 { const ordinal_type j = device_peri(cols(sk)); if(i >= j) { new_cols(tk) = j; - new_values(tk) = values(sk); ++t; } } @@ -1152,23 +1138,63 @@ namespace Amesos2 { // finally set the inputs to the new sorted arrays row_ptr = new_row_ptr; cols = new_cols; - values = new_values; - // also set the permutation which may need to convert the type from - // metis to the native ordinal_type - deep_copy_or_assign_view(perm, device_perm); - deep_copy_or_assign_view(peri, device_peri); - #endif + nnz = new_nnz; + #endif // HAVE_AMESOS2_METIS + } + + template + void + reorder_values(values_view_t & values, const row_ptr_view_t & orig_row_ptr, + const row_ptr_view_t & new_row_ptr, + const cols_view_t & orig_cols, const per_view_t & perm, const per_view_t & peri, + size_t nnz) + { + typedef typename cols_view_t::value_type ordinal_type; + typedef typename row_ptr_view_t::value_type size_type; + + typedef typename cols_view_t::execution_space exec_space_t; + + auto device_perm = Kokkos::create_mirror_view(exec_space_t(), perm); + auto device_peri = Kokkos::create_mirror_view(exec_space_t(), peri); + deep_copy(device_perm, perm); + deep_copy(device_peri, peri); + + const ordinal_type size = orig_row_ptr.size() - 1; + + auto host_orig_row_ptr = Kokkos::create_mirror_view(orig_row_ptr); + auto new_nnz = host_orig_row_ptr(size); // TODO: Maybe optimize this by caching + + values_view_t new_values( + Kokkos::ViewAllocateWithoutInitializing("new_values"), values.size() - new_nnz/2); + + // permute col indices + Kokkos::RangePolicy policy_col(0, size); + Kokkos::parallel_for(policy_col, KOKKOS_LAMBDA(ordinal_type i) { + const ordinal_type kbeg = new_row_ptr(i); + const ordinal_type row = device_perm(i); + const ordinal_type col_beg = orig_row_ptr(row); + const ordinal_type col_end = orig_row_ptr(row + 1); + const ordinal_type nk = col_end - col_beg; + for(ordinal_type k = 0, t = 0; k < nk; ++k) { + const ordinal_type tk = kbeg + t; + const ordinal_type sk = col_beg + k; + const ordinal_type j = device_peri(orig_cols(sk)); + if(i >= j) { + new_values(tk) = values(sk); + ++t; + } + } + }); + + values = new_values; } template void apply_reorder_permutation(const array_view_t & array, array_view_t & permuted_array, const per_view_t & permutation) { - #ifndef HAVE_AMESOS2_METIS - TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, - "Cannot apply_reorder_permutation for cuSolver because no METIS is available."); - #else if(permuted_array.extent(0) != array.extent(0) || permuted_array.extent(1) != array.extent(1)) { permuted_array = array_view_t( Kokkos::ViewAllocateWithoutInitializing("permuted_array"), @@ -1181,7 +1207,6 @@ namespace Amesos2 { permuted_array(i, j) = array(permutation(i), j); } }); - #endif } /** @} */ diff --git a/packages/amesos2/src/Amesos2_cuSOLVER_decl.hpp b/packages/amesos2/src/Amesos2_cuSOLVER_decl.hpp index 765b26756916..a33624d4e45f 100644 --- a/packages/amesos2/src/Amesos2_cuSOLVER_decl.hpp +++ b/packages/amesos2/src/Amesos2_cuSOLVER_decl.hpp @@ -203,6 +203,7 @@ class cuSOLVER : public SolverCore device_value_type_array device_nzvals_view_; device_size_type_array device_row_ptr_view_; device_ordinal_type_array device_cols_view_; + size_t sorted_nnz; // data for reordering typedef Kokkos::View permute_array_t; diff --git a/packages/amesos2/src/Amesos2_cuSOLVER_def.hpp b/packages/amesos2/src/Amesos2_cuSOLVER_def.hpp index aa3831a049bf..2da60eb10cc0 100644 --- a/packages/amesos2/src/Amesos2_cuSOLVER_def.hpp +++ b/packages/amesos2/src/Amesos2_cuSOLVER_def.hpp @@ -91,13 +91,12 @@ cuSOLVER::preOrdering_impl() if(do_optimization()) { this->matrixA_->returnRowPtr_kokkos_view(device_row_ptr_view_); this->matrixA_->returnColInd_kokkos_view(device_cols_view_); - this->matrixA_->returnValues_kokkos_view(device_nzvals_view_); // reorder to optimize cuSolver if(data_.bReorder) { Amesos2::Util::reorder( - device_nzvals_view_, device_row_ptr_view_, device_cols_view_, - device_perm_, device_peri_); + device_row_ptr_view_, device_cols_view_, + device_perm_, device_peri_, sorted_nnz); } } @@ -135,7 +134,22 @@ int cuSOLVER::numericFactorization_impl() { int err = 0; - if ( this->root_ ) { + if(do_optimization()) { // just supporting one rank right now + this->matrixA_->returnValues_kokkos_view(device_nzvals_view_); + + // reorder to optimize cuSolver + if(data_.bReorder) { + // must have original row and cols - maybe cache this from 1st symbiolic setup + // this setup exists to support the refactor option + device_size_type_array orig_device_row_ptr_view; + device_ordinal_type_array orig_device_cols_view; + this->matrixA_->returnRowPtr_kokkos_view(orig_device_row_ptr_view); + this->matrixA_->returnColInd_kokkos_view(orig_device_cols_view); + Amesos2::Util::reorder_values( + device_nzvals_view_, orig_device_row_ptr_view, device_row_ptr_view_, orig_device_cols_view, + device_perm_, device_peri_, sorted_nnz); + } + const int size = this->globalNumRows_; const int nnz = device_cols_view_.size(); // reorder may have changed this const cusolver_type * values = device_nzvals_view_.data(); diff --git a/packages/amesos2/test/solvers/simple.cpp b/packages/amesos2/test/solvers/simple.cpp new file mode 100644 index 000000000000..a4077d5f525d --- /dev/null +++ b/packages/amesos2/test/solvers/simple.cpp @@ -0,0 +1,1928 @@ +// @HEADER +// +// *********************************************************************** +// +// Amesos2: Templated Direct Sparse Solver Package +// Copyright 2011 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +// +// @HEADER + +/** + * \file Solver_Test.cpp + * \author Eric Bavier + * \date Wed May 25 12:17:25 2011 + * + * \brief Tests Amesos2 solver interfaces using various matrix/vector + * objects, scalar/ordinal types, and input matrices. Test + * parameters are specified by an input XML file. + */ + +#include "KokkosBlas.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include // For reading matrix-market files + +#include "Amesos2.hpp" // includes everything from Amesos2 + +// #ifdef HAVE_TPETRA_INST_INT_INT +#if defined(HAVE_AMESOS2_EPETRA) && defined(HAVE_AMESOS2_EPETRAEXT) +#ifdef HAVE_MPI +#include +#include +#else +#include +#endif +#include +#include +#include +#include +#endif // HAVE_AMESOS2_EPETRAEXT +//#endif + +using std::string; + +using Teuchos::rcp; +using Teuchos::RCP; +using Teuchos::ptrInArg; +using Teuchos::outArg; +using Teuchos::ETransp; +using Teuchos::TRANS; +using Teuchos::NO_TRANS; +using Teuchos::CONJ_TRANS; +using Teuchos::ParameterList; +using Teuchos::Time; +using Teuchos::TimeMonitor; +using Teuchos::Array; +using Teuchos::ArrayView; + +/* + * An example input xml file can be found in the default: "solvers_test.xml" + */ + +// TODO: flush out the timers +RCP