Skip to content

Commit

Permalink
Merge 'trilinos/Trilinos:develop' (dbde32c) into 'tcad-charon/Trilino…
Browse files Browse the repository at this point in the history
…s:develop' (b8a1b49).

* trilinos-develop:
  Fix suggested vector size when nr == 0
  ML: Oops
  Tpetra: Make CrsMatrix unpack benchmark only build in an MPI build
  Tpetra: Fix trilinos#6300 (build warnings)
  Tpetra: Improve CrsMatrix dense row unpack benchmark
  Tpetra: Finish CrsMatrix dense row unpack benchmark
  Tpetra::unpackCrsMatrixAndCombine: Fix build warnings
  ML: Moving output to higher verbosity level
  ML: Fixes to RefMaxwell default behavior
  Tpetra: Start work on benchmark for CrsMatrix dense row unpack
  Tpetra,Stokhos: Refactor PackTraits
  Tpetra: Simplify unpackRow(Count)
  Tpetra: Fix 'inconsistent override' warnings
  • Loading branch information
Jenkins Pipeline committed Nov 20, 2019
2 parents b8a1b49 + dbde32c commit 1683f23
Show file tree
Hide file tree
Showing 20 changed files with 1,052 additions and 360 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ inline int kk_get_suggested_vector_size(
break;
case Exec_CUDA:

suggested_vector_size_ = nnz / double (nr) + 0.5;
if (nr > 0)
suggested_vector_size_ = nnz / double (nr) + 0.5;
if (suggested_vector_size_ < 3){
suggested_vector_size_ = 2;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ int ML_Epetra::EdgeMatrixFreePreconditioner::FormCoarseMatrix()
// Epetra_CrsMatrix_Wrap_ML_Operator(CoarseMat_ML,*Comm_,*CoarseMap_,&CoarseMatrix);
int nnz=100;
double time;
ML_Operator2EpetraCrsMatrix(CoarseMat_ML,CoarseMatrix,nnz,true,time,0,verbose_);
ML_Operator2EpetraCrsMatrix(CoarseMat_ML,CoarseMatrix,nnz,true,time,0,very_verbose_);
// NTS: This is a hack to get around the sticking ones on the diagonal issue;

/* Cleanup */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ int ML_Epetra::FaceMatrixFreePreconditioner::FormCoarseMatrix()
/* Wrap to Epetra-land */
int nnz=100;
double time;
ML_Operator2EpetraCrsMatrix(CoarseMat_ML,CoarseMatrix,nnz,true,time,0,verbose_);
ML_Operator2EpetraCrsMatrix(CoarseMat_ML,CoarseMatrix,nnz,true,time,0,very_verbose_);
// NTS: This is a hack to get around the sticking ones on the diagonal issue;

/* Cleanup */
Expand Down
4 changes: 2 additions & 2 deletions packages/ml/src/RefMaxwell/ml_RefMaxwell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void FindLocalDirichletLikeRowsFromOnesAndZeros(const Epetra_CrsMatrix & Matrix,
}/*end for*/
// printf("[%2d] Dirichlet Rows Detected 11 %5d/%5d 22 %5d/%5d\n",Matrix.Comm().MyPID(),numBCRows11,Nrows,numBCRows22,Nrows);
dirichletRows11_rcp.resize(numBCRows11);
dirichletRows22_rcp.resize(numBCRows22);
dirichletRows22_rcp.resize(numBCRows22);
}/*end FindLocalDirichletLikeRowsFromOnesAndZeros*/


Expand Down Expand Up @@ -360,7 +360,7 @@ int ML_Epetra::RefMaxwellPreconditioner::ComputePreconditioner(const bool /* Che
}
else {
if(verbose_ && !Comm_->MyPID()) printf("EMFP: Using normal dirichlet conditions\n");
Apply_BCsToMatrixRows(dirichletNodes11.data(),dirichletNodes11.size(),*TMT_Agg_Matrix_);
// Apply_BCsToMatrixRows(dirichletNodes11.data(),dirichletNodes11.size(),*TMT_Agg_Matrix_);
}

Remove_Zeroed_Rows(*TMT_Agg_Matrix_);
Expand Down
13 changes: 8 additions & 5 deletions packages/ml/src/RefMaxwell/ml_RefMaxwell_Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -886,12 +886,12 @@ int ML_Epetra::RefMaxwell_Aggregate_Nodes(const Epetra_CrsMatrix & A, Teuchos::P
double Threshold = List.get("aggregation: threshold", 0.0);
double RowSum_Threshold = List.get("aggregation: rowsum threshold", -1.0);
double DampingFactor = List.get("aggregation: damping factor", 0.0);
int PSmSweeps = List.get("aggregation: smoothing sweeps", 1);
int PSmSweeps = List.get("aggregation: smoothing sweeps", DampingFactor == 0.0 ? 0 : 1);
std::string EigType = List.get("eigen-analysis: type","cg");
int NumEigenIts = List.get("eigen-analysis: iterations",10);
int NodesPerAggr = List.get("aggregation: nodes per aggregate",
ML_Aggregate_Get_OptimalNumberOfNodesPerAggregate());
int doQR = (int) List.get("aggregation: do qr",false);
int doQR = (int) List.get("aggregation: do qr",true);

bool UseAux = List.get("aggregation: aux: enable",false);
double AuxThreshold = List.get("aggregation: aux: threshold",0.0);
Expand All @@ -913,14 +913,17 @@ int ML_Epetra::RefMaxwell_Aggregate_Nodes(const Epetra_CrsMatrix & A, Teuchos::P
ML_Aggregate_Set_MaxLevels(MLAggr, 2);
ML_Aggregate_Set_StartLevel(MLAggr, 0);
ML_Aggregate_Set_Threshold(MLAggr, Threshold);
ML_Aggregate_Set_RowSum_Threshold(MLAggr, RowSum_Threshold);
if(RowSum_Threshold > 0.0) ML_Aggregate_Set_RowSum_Threshold(MLAggr, RowSum_Threshold);
ML_Aggregate_Set_MaxCoarseSize(MLAggr,1);
MLAggr->cur_level = 0;
ML_Aggregate_Set_Reuse(MLAggr);
ML_Aggregate_Set_Do_QR(MLAggr,doQR);

ML_Aggregate_Set_DampingFactor(MLAggr,DampingFactor);
ML_Aggregate_Set_DampingSweeps(MLAggr,PSmSweeps,0);
if(DampingFactor > 0.0) {
ML_Aggregate_Set_DampingFactor(MLAggr,DampingFactor);
ML_Aggregate_Set_DampingSweeps(MLAggr,PSmSweeps,0);
}

if( EigType == "cg" ) ML_Operator_Set_SpectralNormScheme_Calc(A_ML);
else if( EigType == "Anorm" ) ML_Operator_Set_SpectralNormScheme_Anorm(A_ML);
else if( EigType == "Anasazi" ) ML_Operator_Set_SpectralNormScheme_Anasazi(A_ML);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,56 +135,44 @@ struct DeviceForNode2< Kokkos::Compat::KokkosDeviceWrapperNode<Device> > {
}

#include "Tpetra_Details_PackTraits.hpp"
#include "Tpetra_Details_ScalarViewTraits.hpp"

namespace Tpetra {
namespace Details {

/// \brief Partial specialization of PackTraits for Sacado's PCE UQ type.
///
/// \tparam S The underlying scalar type in the PCE UQ type.
/// \tparam D The Kokkos "device" type.
template<typename S, typename D>
struct PackTraits< Sacado::UQ::PCE<S>, D > {
typedef Sacado::UQ::PCE<S> value_type;
typedef typename D::execution_space execution_space;
typedef D device_type;
typedef typename execution_space::size_type size_type;
template<class S>
struct PackTraits<Sacado::UQ::PCE<S>> {
using value_type = Sacado::UQ::PCE<S>;

/// \brief Whether the number of bytes required to pack one instance
/// of \c value_type is fixed at compile time.
static const bool compileTimeSize = false;

typedef Kokkos::View<const char*, device_type, Kokkos::MemoryUnmanaged> input_buffer_type;
typedef Kokkos::View<char*, device_type, Kokkos::MemoryUnmanaged> output_buffer_type;
typedef Kokkos::View<const value_type*, device_type, Kokkos::MemoryUnmanaged> input_array_type;
typedef Kokkos::View<value_type*, device_type, Kokkos::MemoryUnmanaged> output_array_type;
using input_buffer_type = Kokkos::View<const char*, Kokkos::AnonymousSpace>;
using output_buffer_type = Kokkos::View<char*, Kokkos::AnonymousSpace>;
using input_array_type = Kokkos::View<const value_type*, Kokkos::AnonymousSpace>;
using output_array_type = Kokkos::View<value_type*, Kokkos::AnonymousSpace>;

typedef typename value_type::value_type scalar_value_type;
typedef PackTraits< scalar_value_type, device_type > SPT;
typedef typename SPT::input_array_type scalar_input_array_type;
typedef typename SPT::output_array_type scalar_output_array_type;
using scalar_value_type = typename value_type::value_type;
using SPT = PackTraits<scalar_value_type>;
using scalar_input_array_type = typename SPT::input_array_type;
using scalar_output_array_type = typename SPT::output_array_type;

KOKKOS_INLINE_FUNCTION
static size_t numValuesPerScalar (const value_type& x) {
return x.size ();
}

static Kokkos::View<value_type*, device_type>
allocateArray (const value_type& x, const size_t numEnt, const std::string& label = "")
{
typedef Kokkos::View<value_type*, device_type> view_type;

const size_type numVals = numValuesPerScalar (x);
return view_type (label, static_cast<size_type> (numEnt), numVals);
}

KOKKOS_INLINE_FUNCTION
static Kokkos::pair<int, size_t>
packArray (char outBuf[],
const value_type inBuf[],
const size_t numEnt)
{
typedef Kokkos::pair<int, size_t> return_type;
using return_type = Kokkos::pair<int, size_t>;
size_t numBytes = 0;
int errorCode = 0;

Expand Down Expand Up @@ -232,7 +220,7 @@ struct PackTraits< Sacado::UQ::PCE<S>, D > {
const char inBuf[],
const size_t numEnt)
{
typedef Kokkos::pair<int, size_t> return_type;
using return_type = Kokkos::pair<int, size_t>;
size_t numBytes = 0;
int errorCode = 0;

Expand All @@ -242,7 +230,7 @@ struct PackTraits< Sacado::UQ::PCE<S>, D > {
else {
// Check whether output array is contiguously allocated based on the size
// of the first entry. We have a simpler method to unpack in this case
const size_type scalar_size = numValuesPerScalar (outBuf[0]);
const size_t scalar_size = numValuesPerScalar (outBuf[0]);
const scalar_value_type* last_coeff = outBuf[numEnt - 1].coeff ();
const scalar_value_type* last_coeff_expected =
outBuf[0].coeff () + (numEnt - 1) * scalar_size;
Expand Down Expand Up @@ -308,6 +296,27 @@ struct PackTraits< Sacado::UQ::PCE<S>, D > {
}
}; // struct PackTraits

/// \brief Partial specialization of ScalarViewTraits
/// for Sacado's PCE UQ type.
///
/// \tparam S The underlying scalar type in the PCE UQ type.
/// \tparam D The Kokkos "device" type.
template<typename S, typename D>
struct ScalarViewTraits<Sacado::UQ::PCE<S>, D> {
using value_type = Sacado::UQ::PCE<S>;
using device_type = D;

static Kokkos::View<value_type*, device_type>
allocateArray (const value_type& x,
const size_t numEnt,
const std::string& label = "")
{
const size_t numVals = PackTraits<value_type>::numValuesPerScalar (x);
using view_type = Kokkos::View<value_type*, device_type>;
return view_type (label, numEnt, numVals);
}
};

} // namespace Details
} // namespace Tpetra

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,12 @@ TRIBITS_ADD_EXECUTABLE(
SOURCES CrsMatrix_sumIntoLocalValues.cpp
COMM serial mpi
)

# This benchmark needs at least 2 MPI processes.
# It builds fine without MPI, but there's no point
# in building it for that case.
TRIBITS_ADD_EXECUTABLE(
CrsMatrixDenseRowUnpack
SOURCES CrsMatrixDenseRowUnpack.cpp
COMM mpi
)
Loading

0 comments on commit 1683f23

Please sign in to comment.