Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MueLu: Prep for CoalesceDrop refactor #13278

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/muelu/src/Graph/Containers/MueLu_LWGraphBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ class LWGraphBase {
return graph;
}

const std::string& getObjectLabel() const {
return objectLabel_;
}

private:
//! Underlying graph (with label)
mutable local_graph_type graph_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "MueLu_LWGraphBase.hpp"
#include "MueLu_LWGraph_kokkos_fwd.hpp"
#include "MueLu_LWGraph_fwd.hpp"

namespace MueLu {

Expand All @@ -28,6 +29,9 @@ namespace MueLu {
template <class LocalOrdinal, class GlobalOrdinal, class Node>
class LWGraph_kokkos : public MueLu::LWGraphBase<LocalOrdinal, GlobalOrdinal, Node, false> {
using LWGraphBase<LocalOrdinal, GlobalOrdinal, Node, false>::LWGraphBase;

public:
RCP<MueLu::LWGraph<LocalOrdinal, GlobalOrdinal, Node> > copyToHost();
};

} // namespace MueLu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,35 @@
#ifndef MUELU_LWGRAPH_KOKKOS_DEF_HPP
#define MUELU_LWGRAPH_KOKKOS_DEF_HPP

#include "MueLu_LWGraph.hpp"
#include "MueLu_LWGraph_kokkos_decl.hpp"

namespace MueLu {

template <class LocalOrdinal, class GlobalOrdinal, class Node>
RCP<MueLu::LWGraph<LocalOrdinal, GlobalOrdinal, Node> > MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::copyToHost() {
auto graph = this->getGraph();

auto row_map_h = Kokkos::create_mirror_view(graph.row_map);
auto entries_h = Kokkos::create_mirror_view(graph.entries);
Kokkos::deep_copy(row_map_h, graph.row_map);
Kokkos::deep_copy(entries_h, graph.entries);

using local_graph_type_host = typename MueLu::LWGraphBase<LocalOrdinal, GlobalOrdinal, Node, true>::local_graph_type;
auto graph_h = local_graph_type_host(entries_h, row_map_h);

auto lw_h = rcp(new MueLu::LWGraph<LocalOrdinal, GlobalOrdinal, Node>(graph_h, this->GetDomainMap(), this->GetImportMap(), this->getObjectLabel()));

using bndry_nodes_type = typename MueLu::LWGraphBase<LocalOrdinal, GlobalOrdinal, Node, true>::boundary_nodes_type;

auto bndry = this->GetBoundaryNodeMap();
auto bndry_h = bndry_nodes_type("boundary_nodes", bndry.extent(0));
Kokkos::deep_copy(bndry_h, bndry);
lw_h->SetBoundaryNodeMap(bndry_h);

return lw_h;
}

} // namespace MueLu

#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));

RCP<LocalOrdinalVector> ghostedBlockNumber;
ArrayRCP<const LO> g_block_id;

if (algo == "distance laplacian") {
// Grab the coordinates for distance laplacian
Expand All @@ -193,7 +192,6 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
} else {
ghostedBlockNumber = BlockNumber;
}
g_block_id = ghostedBlockNumber->getData(0);
// }
if (algo == "block diagonal colored signed classical")
generateColoringGraph = true;
Expand Down Expand Up @@ -443,11 +441,13 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
// RS style needs the max negative off-diagonal, SA style needs the diagonal
if (useSignedClassicalRS) {
if (ghostedBlockNumber.is_null()) {
negMaxOffDiagonal = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A);
auto negMaxOffDiagonalVec = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A);
negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
if (GetVerbLevel() & Statistics1)
GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
} else {
negMaxOffDiagonal = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
auto negMaxOffDiagonalVec = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
if (GetVerbLevel() & Statistics1)
GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
}
Expand All @@ -468,6 +468,10 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
}
}

ArrayRCP<const LO> g_block_id;
if (!ghostedBlockNumber.is_null())
g_block_id = ghostedBlockNumber->getData(0);

LO realnnz = 0;
rows(0) = 0;
for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ class ScalarFunctor {
LO col = rowView.colidx(colID);
impl_Scalar val = rowView.value(colID);

if (!dropFunctor(row, col, rowView.value(colID)) || row == col) {
if ((!bndNodes(row) && !dropFunctor(row, col, rowView.value(colID))) || row == col) {
colsAux(offset + rownnz) = col;

LO valID = (reuseGraph ? colID : rownnz);
Expand Down Expand Up @@ -216,7 +216,7 @@ class ScalarFunctor {
// boundaryNodes[row] = true;
// We do not do it this way now because there is no framework for distinguishing isolated
// and boundary nodes in the aggregation algorithms
bndNodes(row) = (rownnz == 1 && aggregationMayCreateDirichlet);
bndNodes(row) |= (rownnz == 1 && aggregationMayCreateDirichlet);

nnz += rownnz;
}
Expand Down Expand Up @@ -521,6 +521,9 @@ void CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
} else if (blkSize == 1 && threshold != zero) {
// Scalar problem with dropping

// Detect and record rows that correspond to Dirichlet boundary conditions
boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold);

typedef typename Matrix::local_matrix_type local_matrix_type;
typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type;
Expand Down Expand Up @@ -566,8 +569,6 @@ void CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA);
}

typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows);

LO nnzFA = 0;
{
if (algo == "classical") {
Expand All @@ -587,8 +588,8 @@ void CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite);

CoalesceDrop_Kokkos_Details::ClassicalDropFunctor<LO, decltype(ghostedDiagView)> dropFunctor(ghostedDiagView, threshold);
CoalesceDrop_Kokkos_Details::ScalarFunctor<typename ATS::val_type, LO, local_matrix_type, decltype(bndNodes), decltype(dropFunctor)>
scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet);
CoalesceDrop_Kokkos_Details::ScalarFunctor<typename ATS::val_type, LO, local_matrix_type, decltype(boundaryNodes), decltype(dropFunctor)>
scalarFunctor(kokkosMatrix, boundaryNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet);

Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows),
scalarFunctor, nnzFA);
Expand Down Expand Up @@ -659,8 +660,8 @@ void CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::

CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor<LO, decltype(ghostedLaplDiagView), decltype(distFunctor)>
dropFunctor(ghostedLaplDiagView, distFunctor, threshold);
CoalesceDrop_Kokkos_Details::ScalarFunctor<SC, LO, local_matrix_type, decltype(bndNodes), decltype(dropFunctor)>
scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true);
CoalesceDrop_Kokkos_Details::ScalarFunctor<SC, LO, local_matrix_type, decltype(boundaryNodes), decltype(dropFunctor)>
scalarFunctor(kokkosMatrix, boundaryNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true);

Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows),
scalarFunctor, nnzFA);
Expand All @@ -669,8 +670,6 @@ void CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
}
numDropped = nnzA - nnzFA;

boundaryNodes = bndNodes;

{
SubFactoryMonitor m2(*this, "CompressRows", currentLevel);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <Xpetra_Matrix_fwd.hpp>

#include "MueLu_LWGraph_fwd.hpp"
#include "MueLu_LWGraph_kokkos_fwd.hpp"
#include "MueLu_Exceptions.hpp"
#include "MueLu_SingleLevelFactoryBase.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include "MueLu_Aggregates.hpp"
#include "MueLu_LWGraph.hpp"
#include "MueLu_LWGraph_kokkos.hpp"
#include "MueLu_Level.hpp"
#include "MueLu_MasterList.hpp"
#include "MueLu_Monitor.hpp"
Expand Down Expand Up @@ -60,10 +61,10 @@ RCP<const ParameterList> NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrd
#undef SET_VALID_ENTRY

// general variables needed in AggregationFactory
validParamList->set<RCP<const FactoryBase> >("A", null, "Generating factory of the matrix");
validParamList->set<RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
validParamList->set<RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
validParamList->set<RCP<const FactoryBase> >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'");
validParamList->set<RCP<const FactoryBase>>("A", null, "Generating factory of the matrix");
validParamList->set<RCP<const FactoryBase>>("Graph", null, "Generating factory of the graph");
validParamList->set<RCP<const FactoryBase>>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
validParamList->set<RCP<const FactoryBase>>("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'");

return validParamList;
}
Expand Down Expand Up @@ -113,8 +114,14 @@ void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(L
"NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
" must be a strictly positive integer");

RCP<const LWGraph> graph = Get<RCP<LWGraph> >(currentLevel, "Graph");
RCP<const Matrix> A = Get<RCP<Matrix> >(currentLevel, "A");
RCP<const LWGraph> graph;
if (IsType<RCP<LWGraph>>(currentLevel, "Graph"))
graph = Get<RCP<LWGraph>>(currentLevel, "Graph");
else {
auto graph_k = Get<RCP<LWGraph_kokkos>>(currentLevel, "Graph");
graph = graph_k->copyToHost();
}
RCP<const Matrix> A = Get<RCP<Matrix>>(currentLevel, "A");

// Setup aggregates & aggStat objects
RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
Expand Down Expand Up @@ -161,8 +168,8 @@ void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(L
if (ordering == O_RANDOM)
MueLu::NotayUtils::RandomReorder(orderingVector);
else if (ordering == O_CUTHILL_MCKEE) {
RCP<Xpetra::Vector<LO, LO, GO, NO> > rcmVector = MueLu::Utilities<SC, LO, GO, NO>::CuthillMcKee(*A);
auto localVector = rcmVector->getData(0);
RCP<Xpetra::Vector<LO, LO, GO, NO>> rcmVector = MueLu::Utilities<SC, LO, GO, NO>::CuthillMcKee(*A);
auto localVector = rcmVector->getData(0);
for (LO i = 0; i < numRows; i++)
orderingVector[i] = localVector[i];
}
Expand Down Expand Up @@ -198,7 +205,7 @@ void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(L
// Directly compute rowsum from A, rather than coarseA
row_sum_type rowSum("rowSum", numLocalAggregates);
{
std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
std::vector<std::vector<LO>> agg2vertex(numLocalAggregates);
auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
for (LO i = 0; i < (LO)numRows; i++) {
if (aggStat[i] != AGGREGATED)
Expand Down Expand Up @@ -248,8 +255,8 @@ void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(L
if (ordering == O_RANDOM)
MueLu::NotayUtils::RandomReorder(localOrderingVector);
else if (ordering == O_CUTHILL_MCKEE) {
RCP<Xpetra::Vector<LO, LO, GO, NO> > rcmVector = MueLu::Utilities<SC, LO, GO, NO>::CuthillMcKee(*A);
auto localVector = rcmVector->getData(0);
RCP<Xpetra::Vector<LO, LO, GO, NO>> rcmVector = MueLu::Utilities<SC, LO, GO, NO>::CuthillMcKee(*A);
auto localVector = rcmVector->getData(0);
for (LO i = 0; i < numRows; i++)
localOrderingVector[i] = localVector[i];
}
Expand Down
4 changes: 2 additions & 2 deletions packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ class UtilitiesBase {
* @ret: vector containing max_{i\not=k}(-a_ik)
*/

static Teuchos::ArrayRCP<Magnitude> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A);
static Teuchos::RCP<Xpetra::Vector<Magnitude, LocalOrdinal, GlobalOrdinal, Node>> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A);

static Teuchos::ArrayRCP<Magnitude> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber);
static Teuchos::RCP<Xpetra::Vector<Magnitude, LocalOrdinal, GlobalOrdinal, Node>> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber);

/*! @brief Return vector containing inverse of input vector
*
Expand Down
103 changes: 67 additions & 36 deletions packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,53 +584,84 @@ UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
}

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Teuchos::ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>
Teuchos::RCP<Xpetra::Vector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>>
UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
size_t numRows = A.getRowMap()->getLocalNumElements();
Magnitude ZERO = Teuchos::ScalarTraits<Magnitude>::zero();
Teuchos::ArrayRCP<Magnitude> maxvec(numRows);
Teuchos::ArrayView<const LocalOrdinal> cols;
Teuchos::ArrayView<const Scalar> vals;
for (size_t i = 0; i < numRows; ++i) {
A.getLocalRowView(i, cols, vals);
Magnitude mymax = ZERO;
for (LocalOrdinal j = 0; j < cols.size(); ++j) {
if (Teuchos::as<size_t>(cols[j]) != i) {
mymax = std::max(mymax, -Teuchos::ScalarTraits<Scalar>::real(vals[j]));
}
}
maxvec[i] = mymax;
}
return maxvec;
// Get/Create distributed objects
RCP<const Map> rowMap = A.getRowMap();
auto diag = Xpetra::VectorFactory<Magnitude, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);

// Implement using Kokkos
using local_vector_type = typename Vector::dual_view_type::t_dev_um;
using local_matrix_type = typename Matrix::local_matrix_type;
using execution_space = typename local_vector_type::execution_space;
using values_type = typename local_matrix_type::values_type;
using scalar_type = typename values_type::non_const_value_type;
using mag_type = typename Kokkos::ArithTraits<scalar_type>::mag_type;
using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
using KAT_M = typename Kokkos::ArithTraits<mag_type>;
using size_type = typename local_matrix_type::non_const_size_type;

auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
auto local_mat_dev = A.getLocalMatrixDevice();
Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));

Kokkos::parallel_for(
"GetMatrixMaxMinusOffDiagonal", my_policy,
KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
auto mymax = KAT_M::zero();
auto row = local_mat_dev.row(rowIdx);
for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
if (rowIdx != row.colidx(entryIdx)) {
mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx)));
}
}
diag_dev(rowIdx, 0) = mymax;
});

return diag;
}

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Teuchos::ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>
Teuchos::RCP<Xpetra::Vector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>>
UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber) {
TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map.");

Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
// Get/Create distributed objects
RCP<const Map> rowMap = A.getRowMap();
auto diag = Xpetra::VectorFactory<Magnitude, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);

size_t numRows = A.getRowMap()->getLocalNumElements();
Magnitude ZERO = Teuchos::ScalarTraits<Magnitude>::zero();
Teuchos::ArrayRCP<Magnitude> maxvec(numRows);
Teuchos::ArrayView<const LocalOrdinal> cols;
Teuchos::ArrayView<const Scalar> vals;
for (size_t i = 0; i < numRows; ++i) {
A.getLocalRowView(i, cols, vals);
Magnitude mymax = ZERO;
for (LocalOrdinal j = 0; j < cols.size(); ++j) {
if (Teuchos::as<size_t>(cols[j]) != i && block_id[i] == block_id[cols[j]]) {
mymax = std::max(mymax, -Teuchos::ScalarTraits<Scalar>::real(vals[j]));
}
}
// printf("A(%d,:) row_scale(block) = %6.4e\n",(int)i,mymax);
// Implement using Kokkos
using local_vector_type = typename Vector::dual_view_type::t_dev_um;
using local_matrix_type = typename Matrix::local_matrix_type;
using execution_space = typename local_vector_type::execution_space;
using values_type = typename local_matrix_type::values_type;
using scalar_type = typename values_type::non_const_value_type;
using mag_type = typename Kokkos::ArithTraits<scalar_type>::mag_type;
using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
using KAT_M = typename Kokkos::ArithTraits<mag_type>;
using size_type = typename local_matrix_type::non_const_size_type;

maxvec[i] = mymax;
}
return maxvec;
auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
auto local_mat_dev = A.getLocalMatrixDevice();
auto local_block_dev = BlockNumber.getDeviceLocalView(Xpetra::Access::ReadOnly);
Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));

Kokkos::parallel_for(
"GetMatrixMaxMinusOffDiagonal", my_policy,
KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
auto mymax = KAT_M::zero();
auto row = local_mat_dev.row(rowIdx);
for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) {
mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx)));
}
}
diag_dev(rowIdx, 0) = mymax;
});

return diag;
}

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Expand Down
Loading
Loading