trilinos · trilinos-autotester · Dec 5, 2018 · Dec 5, 2018 · Dec 5, 2018 · Dec 5, 2018
@@ -388,7 +388,8 @@ int main (int argc, char* argv[])
 
   Kokkos::initialize(argc,argv);
 
-  {
+  int status = 0;
+  try {
 
     Teuchos::GlobalMPISession mpiSession(&argc, &argv);
     RCP<const Teuchos::MpiComm<int> > tComm = Teuchos::rcp(new Teuchos::MpiComm<int>(MPI_COMM_WORLD));
@@ -604,11 +605,11 @@ int main (int argc, char* argv[])
       if (has_interface_condition)
         checkInterfaceConnections(conn_manager, dofManager->getComm());
     }
-  
+
     // construct some linear algebra object, build object to pass to evaluators
     Teuchos::RCP<panzer::LinearObjFactory<panzer::Traits> > linObjFactory
       = Teuchos::rcp(new panzer::BlockedEpetraLinearObjFactory<panzer::Traits,int>(tComm.getConst(),dofManager));
-  
+
     std::vector<std::string> names;
     std::vector<std::vector<std::string> > eblocks;
     const int c_name_start = 3;
@@ -642,7 +643,7 @@ int main (int argc, char* argv[])
         errorResponseLibrary->addResponse(names[i] + " L2 Error", eblocks[i], builder);
       }
     }
-  
+
     // setup closure model
     /////////////////////////////////////////////////////////////
 
@@ -671,7 +672,7 @@ int main (int argc, char* argv[])
     }
 
     Teuchos::ParameterList user_data("User Data"); // user data can be empty here
-  
+
     // setup field manager builder
     /////////////////////////////////////////////////////////////
 
@@ -697,7 +698,7 @@ int main (int argc, char* argv[])
     user_data.set<int>("Workset Size", workset_size);
     if (po.check_error)
       errorResponseLibrary->buildResponseEvaluators(physicsBlocks, cm_factory, closure_models, user_data);
-  
+
     // assemble and solve
     /////////////////////////////////////////////////////////////
     Teuchos::RCP<panzer::EpetraLinearObjContainer> ep_container;
@@ -714,19 +715,20 @@ int main (int argc, char* argv[])
       linObjFactory->initializeGhostedContainer(panzer::LinearObjContainer::X |
                                                 panzer::LinearObjContainer::F |
                                                 panzer::LinearObjContainer::Mat,*ghost_container);
+
       linObjFactory->initializeContainer(panzer::LinearObjContainer::X |
                                          panzer::LinearObjContainer::F |
                                          panzer::LinearObjContainer::Mat,*container);
       ghost_container->initialize();
       container->initialize();
-  
+
       panzer::AssemblyEngineInArgs input(ghost_container,container);
       input.alpha = 0;
       input.beta = 1;
 
       // evaluate physics: This does both the Jacobian and residual at once
       ae_tm.getAsObject<panzer::Traits::Jacobian>()->evaluate(input);
-  
+
       // solve linear system
       /////////////////////////////////////////////////////////////
       // convert generic linear object container to epetra container
@@ -745,7 +747,7 @@ int main (int argc, char* argv[])
 
       // solve the linear system
       solver.Iterate(1000,1e-5);
-  
+
       // we have now solved for the residual correction from
       // zero in the context of a Newton solve.
       //     J*e = -r = -(f - J*0) where f = J*u
@@ -755,13 +757,14 @@ int main (int argc, char* argv[])
     } else { // Some analysis and an outer iteration if necessary.
       Teuchos::RCP<Epetra_CrsMatrix> J_fd;
       assembleAndSolve(ae_tm, linObjFactory, ep_container, ghost_container, po);
+
       if (po.test_Jacobian) {
         const double nwre = testJacobian(ae_tm, linObjFactory, ep_container->get_x());
         out << "TEST JACOBIAN " << nwre << "\n";
         if (nwre < 0 || nwre > 1e-5) pass = false;
       }
     }
-    
+
     // output data (optional)
     /////////////////////////////////////////////////////////////
 
@@ -771,7 +774,7 @@ int main (int argc, char* argv[])
       EpetraExt::VectorToMatrixMarketFile("x_vec.mm",*ep_container->get_x());
       EpetraExt::VectorToMatrixMarketFile("b_vec.mm",*ep_container->get_f());
     }
-  
+
     if (po.check_error) {
       std::vector<Teuchos::RCP<panzer::Response_Functional<panzer::Traits::Residual> > > rfs(names.size());
       for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) {
@@ -815,13 +818,37 @@ int main (int argc, char* argv[])
       panzer_stk::write_solution_data(*dofManager,*mesh,*ep_ghost_container->get_x());
       mesh->writeToExodus("output.exo");
     }
-  
+
     // all done!
     /////////////////////////////////////////////////////////////
     out << (pass ? "PASS" : "FAIL") << " BASICS\n";
   }
+  catch (std::exception& e) {
+    std::cout << "*********** Caught Exception: Begin Error Report ***********" << std::endl;
+    std::cout << e.what() << std::endl;
+    std::cout << "************ Caught Exception: End Error Report ************" << std::endl;
+    status = -1;
+  }
+  catch (std::string& msg) {
+    std::cout << "*********** Caught Exception: Begin Error Report ***********" << std::endl;
+    std::cout << msg << std::endl;
+    std::cout << "************ Caught Exception: End Error Report ************" << std::endl;
+    status = -1;
+  }
+  catch (char* msg) {
+    std::cout << "*********** Caught Exception: Begin Error Report ***********" << std::endl;
+    std::cout << *msg << std::endl;
+    std::cout << "************ Caught Exception: End Error Report ************" << std::endl;
+    status = -1;
+  }
+  catch (...) {
+    std::cout << "*********** Caught Exception: Begin Error Report ***********" << std::endl;
+    std::cout << "Caught UNKOWN exception" << std::endl;
+    std::cout << "************ Caught Exception: End Error Report ************" << std::endl;
+    status = -1;
+  }
 
-  return 0;
+  return status;
 }
 
 void testInitialization(const Teuchos::RCP<Teuchos::ParameterList>& ipb, std::vector<panzer::BC>& bcs,

@@ -124,10 +124,10 @@ class GreedyTieBreak : public Tpetra::Details::TieBreak<LocalOrdinal,GlobalOrdin
                                     const std::vector<std::pair<int,LocalOrdinal> > & pid_and_lid) const
   {
     // always choose index of pair with smallest pid
-    auto numLids = pid_and_lid.size();
-    decltype(numLids) idx = 0;
-    auto minpid = pid_and_lid[0].first;
-    decltype(minpid) minidx = 0;
+    const std::size_t numLids = pid_and_lid.size();
+    std::size_t idx = 0;
+    int minpid = pid_and_lid[0].first;
+    std::size_t minidx = 0;
     for (idx = 0; idx < numLids; ++idx) {
       if (pid_and_lid[idx].first < minpid) {
         minpid = pid_and_lid[idx].first;
@@ -569,17 +569,22 @@ void DOFManager<LO,GO>::buildGlobalUnknowns(const Teuchos::RCP<const FieldPatter
     HashTable isOwned, remainingOwned;
 
     // owned_ is made up of owned_ids.: This doesn't work for high order
-    Teuchos::ArrayRCP<const GO> nvals = non_overlap_mv->get1dView();
-    Teuchos::ArrayRCP<const GO> tagged_vals = tagged_non_overlap_mv->get1dView();
+    non_overlap_mv->sync_host();
+    tagged_non_overlap_mv->sync_host();
+    PHX::Device::fence();
+    auto nvals = non_overlap_mv->template getLocalView<PHX::Device>();
+    auto tagged_vals = tagged_non_overlap_mv->template getLocalView<PHX::Device>();
     TEUCHOS_ASSERT(nvals.size()==tagged_vals.size());
-    for (int j = 0; j < nvals.size(); ++j) {
-      if (nvals[j] != -1) {
-        for(GO offset=0;offset<tagged_vals[j];offset++)
-          isOwned.insert(nvals[j]+offset);
-      }
-      else {
-        // sanity check
-        TEUCHOS_ASSERT(tagged_vals[j]==0)
+    for (size_t i = 0; i < nvals.extent(1); ++i) {
+      for (size_t j = 0; j < nvals.extent(0); ++j) {
+	if (nvals(j,i) != -1) {
+	  for(GO offset=0;offset<tagged_vals(j,i);++offset)
+	    isOwned.insert(nvals(j,i)+offset);
+	}
+	else {
+	  // sanity check
+	  TEUCHOS_ASSERT(tagged_vals(j,i)==0);
+	}
       }
     }
     remainingOwned = isOwned;
@@ -776,7 +781,6 @@ DOFManager<LO,GO>::buildGlobalUnknowns_GUN(const Tpetra::MultiVector<GO,LO,GO,pa
     non_overlap_mv = rcp(new MultiVector(*tagged_non_overlap_mv,Teuchos::Copy));
   }
 
-
  /* 10. Compute the local sum using Kokkos.
    */
 
@@ -792,8 +796,9 @@ DOFManager<LO,GO>::buildGlobalUnknowns_GUN(const Tpetra::MultiVector<GO,LO,GO,pa
     KV values = non_overlap_mv->template getLocalView<DMS>();
     auto mv_size = values.extent(0);
     Kokkos::parallel_reduce(mv_size,panzer::dof_functors::SumRank2<GO,KV>(values),localsum);
+    PHX::Device::fence();
   }
-
+  
  /* 11. Create a map using local sums to generate final GIDs.
    */
 
@@ -805,7 +810,7 @@ DOFManager<LO,GO>::buildGlobalUnknowns_GUN(const Tpetra::MultiVector<GO,LO,GO,pa
 
     // do a prefix sum
     GO scanResult = 0;
-    Teuchos::scan<int, GO> (*getComm(), Teuchos::REDUCE_SUM, static_cast<size_t> (localsum), Teuchos::outArg (scanResult));
+    Teuchos::scan<int, GO> (*getComm(), Teuchos::REDUCE_SUM, static_cast<int> (localsum), Teuchos::outArg (scanResult));
     myOffset = scanResult - localsum;
   }
 
@@ -820,24 +825,25 @@ DOFManager<LO,GO>::buildGlobalUnknowns_GUN(const Tpetra::MultiVector<GO,LO,GO,pa
 
   {
     PANZER_DOFMGR_FUNC_TIME_MONITOR("panzer::DOFManager::buildGlobalUnknowns_GUN::line_13-21 gid_assignment");
-
-    // ArrayView<const GO> owned_ids = gid_map->getNodeElementList();
     int which_id=0;
-    ArrayRCP<ArrayRCP<GO> > editnonoverlap = non_overlap_mv->get2dViewNonConst();
+    auto editnonoverlap = non_overlap_mv->getLocalViewHost();
     for(size_t i=0; i<non_overlap_mv->getLocalLength(); ++i){
       for(int j=0; j<numFields_; ++j){
-        if(editnonoverlap[j][i]!=0){
+        if(editnonoverlap(i,j)!=0){
           // editnonoverlap[j][i]=myOffset+which_id;
-          int ndof = Teuchos::as<int>(editnonoverlap[j][i]);
-          editnonoverlap[j][i]=myOffset+which_id;
+          int ndof = Teuchos::as<int>(editnonoverlap(i,j));
+          editnonoverlap(i,j)=myOffset+which_id;
           which_id+=ndof;
         }
         else{
-          editnonoverlap[j][i]=-1;
+          editnonoverlap(i,j)=-1;
         }
 
       }
     }
+    non_overlap_mv->modify_host();
+    non_overlap_mv->sync_device();
+    PHX::Device::fence();
   }
 
   // LINE 22: In the GUN paper. Were performed above, and the overlaped_mv is
@@ -909,6 +915,7 @@ DOFManager<LO,GO>::buildTaggedMultiVector(const ElementBlockAccess & ownedAccess
   }
 
   RCP<const Map> overlapmap       = buildOverlapMapFromElements(ownedAccess);
+  PHX::Device::fence();
 
   // LINE 22: In the GUN paper...the overlap_mv is reused for the tagged multivector.
   //          This is a bit of a practical abuse of the algorithm presented in the paper.
@@ -919,6 +926,7 @@ DOFManager<LO,GO>::buildTaggedMultiVector(const ElementBlockAccess & ownedAccess
 
     overlap_mv = Tpetra::createMultiVector<GO>(overlapmap,(size_t)numFields_);
     overlap_mv->putScalar(0); // if tpetra is not initialized with zeros
+    PHX::Device::fence();
   }
 
   /* 5.  Iterate through all local elements again, checking with the FP
@@ -930,7 +938,10 @@ DOFManager<LO,GO>::buildTaggedMultiVector(const ElementBlockAccess & ownedAccess
 
     // temporary working vector to fill each row in tagged array
     std::vector<int> working(overlap_mv->getNumVectors());
-    ArrayRCP<ArrayRCP<GO> > edittwoview = overlap_mv->get2dViewNonConst();
+    auto dual_view = overlap_mv->getDualView();
+    dual_view.sync_host();
+    PHX::Device::fence();
+    auto edittwoview_host = dual_view.view_host();
     for (size_t b = 0; b < blockOrder_.size(); ++b) {
       // there has to be a field pattern assocaited with the block
       if(fa_fps_[b]==Teuchos::null)
@@ -956,13 +967,16 @@ DOFManager<LO,GO>::buildTaggedMultiVector(const ElementBlockAccess & ownedAccess
             offset++;
           }
           for(std::size_t i=0;i<working.size();i++) {
-            auto current = edittwoview[i][lid];
-            edittwoview[i][lid] = (current > working[i]) ? current : working[i];
+            auto current = edittwoview_host(lid,i);
+            edittwoview_host(lid,i) = (current > working[i]) ? current : working[i];
           }
 
         }
       }
     }
+    overlap_mv->modify_host();
+    overlap_mv->sync_device();
+    PHX::Device::fence();
 
     // // verbose output for inspecting overlap_mv
     // for(int i=0;i<overlap_mv->getLocalLength(); i++) {
@@ -1284,6 +1298,10 @@ buildOverlapMapFromElements(const ElementBlockAccess & access) const
     }
   }
 
+
+
+
+
   Array<GO> overlapVector;
   for (typename std::set<GO>::const_iterator itr = overlapset.begin(); itr!=overlapset.end(); ++itr) {
     overlapVector.push_back(*itr);
@@ -1304,7 +1322,10 @@ fillGIDsFromOverlappedMV(const ElementBlockAccess & access,
   using Teuchos::ArrayRCP;
 
   //To generate elementGIDs we need to go through all of the local elements.
-  ArrayRCP<ArrayRCP<const GO> > twoview = overlap_mv.get2dView();
+  auto dual_view = overlap_mv.getDualView();
+  dual_view.sync_host();
+  PHX::Device::fence();
+  auto twoview_host = dual_view.view_host();
 
   //And for each of the things in fa_fp.fieldIds we go to that column. To the the row,
   //we move from globalID to localID in the map and use our local value for something.
@@ -1338,7 +1359,7 @@ fillGIDsFromOverlappedMV(const ElementBlockAccess & access,
           offset++;
           //Row will be lid. column will be whichField.
           //Shove onto local ordering
-          localOrdering.push_back(twoview[whichField][lid]+dofsPerField[whichField]);
+          localOrdering.push_back(twoview_host(lid,whichField)+dofsPerField[whichField]);
 
           dofsPerField[whichField]++;
         }

@@ -51,6 +51,7 @@
 
 #include "Kokkos_Core.hpp"
 #include "Kokkos_ArithTraits.hpp"
+#include "impl/Kokkos_Atomic_Generic.hpp"
 #include <sstream>
 #include <stdexcept>
 
@@ -736,23 +737,22 @@ outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
   };
 #endif // KOKKOS_ENABLE_SERIAL
 
+  template<class Scalar1, class Scalar2>
+  struct AbsMaxOper {
+    KOKKOS_FORCEINLINE_FUNCTION
+    static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
+      const auto val1_abs = Kokkos::Details::ArithTraits<Scalar1>::abs(val1);
+      const auto val2_abs = Kokkos::Details::ArithTraits<Scalar2>::abs(val2);
+      return val1_abs > val2_abs ? Scalar1(val1_abs) : Scalar1(val2_abs);
+    }
+  };
+
   template<class ExecutionSpace>
   struct AbsMaxOp {
-    // ETP:  Is this really what we want?  This seems very odd if
-    // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>)
-    //
-    // mfh: I didn't write this code, but note that we don't use T =
-    // Scalar here, we use T = ArithTraits<Scalar>::mag_type.  That
-    // makes this code reasonable.
-    template <typename T>
-    KOKKOS_INLINE_FUNCTION
-    T max(const T& a, const T& b) const { return a > b ? a : b; }
-
     template <typename Scalar>
     KOKKOS_INLINE_FUNCTION
     void operator() (Scalar& dest, const Scalar& src) const {
-      typedef Kokkos::Details::ArithTraits<Scalar> SCT;
-      Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src))));
+      Kokkos::Impl::atomic_fetch_oper (AbsMaxOper<Scalar,Scalar>(), &dest, src);
     }
   };