Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve crs/bsr sorting performance #2293

Merged
merged 15 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions common/src/KokkosKernels_SimpleUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,13 +358,19 @@ struct ReduceMaxFunctor {
};

template <typename view_type, typename MyExecSpace>
void kk_view_reduce_max(size_t num_elements, view_type view_to_reduce,
void kk_view_reduce_max(const MyExecSpace &exec, size_t num_elements, view_type view_to_reduce,
typename view_type::non_const_value_type &max_reduction) {
typedef Kokkos::RangePolicy<MyExecSpace> my_exec_space;
Kokkos::parallel_reduce("KokkosKernels::Common::ReduceMax", my_exec_space(0, num_elements),
typedef Kokkos::RangePolicy<MyExecSpace> policy_t;
Kokkos::parallel_reduce("KokkosKernels::Common::ReduceMax", policy_t(exec, 0, num_elements),
ReduceMaxFunctor<view_type>(view_to_reduce), max_reduction);
}

template <typename view_type, typename MyExecSpace>
void kk_view_reduce_max(size_t num_elements, view_type view_to_reduce,
typename view_type::non_const_value_type &max_reduction) {
kk_view_reduce_max(MyExecSpace(), num_elements, view_to_reduce, max_reduction);
}

// xorshift hash/pseudorandom function (supported for 32- and 64-bit integer
// types only)
template <typename Value>
Expand Down Expand Up @@ -429,10 +435,14 @@ struct SequentialFillFunctor {
val_type start;
};

template <typename ExecSpace, typename V>
void sequential_fill(const ExecSpace &exec, const V &v, typename V::non_const_value_type start = 0) {
Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(exec, 0, v.extent(0)), SequentialFillFunctor<V>(v, start));
}

template <typename V>
void sequential_fill(const V &v, typename V::non_const_value_type start = 0) {
Kokkos::parallel_for(Kokkos::RangePolicy<typename V::execution_space>(0, v.extent(0)),
SequentialFillFunctor<V>(v, start));
sequential_fill(typename V::execution_space(), v, start);
}

} // namespace Impl
Expand Down
6 changes: 6 additions & 0 deletions common/src/KokkosKernels_Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1076,6 +1076,12 @@ void view_reduce_max(size_t num_elements, view_type view_to_reduce,
kk_view_reduce_max<view_type, MyExecSpace>(num_elements, view_to_reduce, max_reduction);
}

template <typename view_type, typename MyExecSpace>
void view_reduce_max(const MyExecSpace &exec, size_t num_elements, view_type view_to_reduce,
typename view_type::non_const_value_type &max_reduction) {
kk_view_reduce_max<view_type, MyExecSpace>(exec, num_elements, view_to_reduce, max_reduction);
}

template <typename size_type>
struct ReduceRowSizeFunctor {
const size_type *rowmap_view_begins;
Expand Down
9 changes: 9 additions & 0 deletions perf_test/sparse/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ KOKKOSKERNELS_ADD_EXECUTABLE(
SOURCES KokkosSparse_mdf.cpp
)

# For the sake of build times, don't build this CRS sorting perf test by default.
# It can be enabled if needed by setting -DKokkosKernels_ENABLE_SORT_CRS_PERFTEST=ON.
if (KokkosKernels_ENABLE_SORT_CRS_PERFTEST)
KOKKOSKERNELS_ADD_EXECUTABLE(
sparse_sort_crs
SOURCES KokkosSparse_sort_crs.cpp
)
endif ()

if (KokkosKernels_ENABLE_BENCHMARK)
KOKKOSKERNELS_ADD_BENCHMARK(
sparse_par_ilut
Expand Down
103 changes: 103 additions & 0 deletions perf_test/sparse/KokkosSparse_sort_crs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <iostream>
#include <algorithm>
#include "KokkosKernels_config.h"
#include "KokkosSparse_IOUtils.hpp"
#include "KokkosKernels_perf_test_utilities.hpp"

#include "KokkosSparse_CrsMatrix.hpp"
#include "KokkosSparse_SortCrs.hpp"

using perf_test::CommonInputParams;

struct LocalParams {
std::string mtxFile;
};

void print_options() {
std::cerr << "Options\n" << std::endl;

std::cerr << perf_test::list_common_options();

std::cerr << "\t[Required] --mtx <path> :: matrix to sort\n";
std::cerr << "\t[Optional] --repeat :: how many times to repeat sorting\n";
}

int parse_inputs(LocalParams& params, int argc, char** argv) {
for (int i = 1; i < argc; ++i) {
if (perf_test::check_arg_str(i, argc, argv, "--mtx", params.mtxFile)) {
++i;
} else {
std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl;
print_options();
return 1;
}
}
return 0;
}

template <typename exec_space>
void run_experiment(int argc, char** argv, const CommonInputParams& common_params) {
using namespace KokkosSparse;

using mem_space = typename exec_space::memory_space;
using device_t = typename Kokkos::Device<exec_space, mem_space>;
using size_type = default_size_type;
using lno_t = default_lno_t;
using scalar_t = default_scalar;
using crsMat_t = KokkosSparse::CrsMatrix<scalar_t, lno_t, device_t, void, size_type>;

using graph_t = typename crsMat_t::StaticCrsGraphType;

LocalParams params;
if (parse_inputs(params, argc, argv)) return;

crsMat_t A = KokkosSparse::Impl::read_kokkos_crst_matrix<crsMat_t>(params.mtxFile.c_str());
std::cout << "Loaded matrix: " << A.numRows() << "x" << A.numCols() << " with " << A.nnz() << " entries.\n";
// This first sort call serves as a warm-up
KokkosSparse::sort_crs_matrix(A);
lno_t m = A.numRows();
lno_t n = A.numCols();
auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map);
auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries);
typename crsMat_t::index_type shuffledEntries("shuffled entries", A.nnz());
// Randomly shuffle the entries within each row, so that the rows aren't
// already sorted. Leave the values alone; this changes the matrix numerically
// but this doesn't affect sorting.
for (lno_t i = 0; i < m; i++) {
std::random_shuffle(entriesHost.data() + i, entriesHost.data() + i + 1);
}
Kokkos::deep_copy(shuffledEntries, entriesHost);
exec_space exec;
Kokkos::Timer timer;
double totalTime = 0;
for (int rep = 0; rep < common_params.repeat; rep++) {
Kokkos::deep_copy(exec, A.graph.entries, shuffledEntries);
exec.fence();
timer.reset();
KokkosSparse::sort_crs_matrix(exec, A);
exec.fence();
totalTime += timer.seconds();
}
std::cout << "Mean sort_crs_matrix time over " << common_params.repeat << " trials: ";
std::cout << totalTime / common_params.repeat << "\n";
}

#define KOKKOSKERNELS_PERF_TEST_NAME run_experiment
#include "KokkosKernels_perf_test_instantiation.hpp"
int main(int argc, char** argv) { return main_instantiation(argc, argv); } // main
Loading
Loading