Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kokkos snapshot #9958

Merged
merged 2 commits into from
Nov 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/kokkos-kernels/master_history.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ tag: 3.3.00 date: 12/16/2020 master: 42defc56 release: e5279e55
tag: 3.3.01 date: 01/18/2021 master: f64b1c57 release: 4e1cc00b
tag: 3.4.00 date: 04/26/2021 master: fe439b21 release: d3c33910
tag: 3.4.01 date: 05/20/2021 master: 564dccb3 release: 4c62eb86
tag: 3.5.00 date: 10/27/2021 master: 00189c0b release: ffe069e7
tag: 3.5.00 date: 11/19/2021 master: 00189c0b release: f171533d
113 changes: 80 additions & 33 deletions packages/kokkos/algorithms/src/Kokkos_Sort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,10 @@ class BinSort {
//----------------------------------------
// Constructor: takes the keys, the binning_operator and optionally whether to
// sort within bins (default false)
BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
BinSortOp bin_op_, bool sort_within_bins_ = false)
template <typename ExecutionSpace>
BinSort(const ExecutionSpace& exec, const_key_view_type keys_,
int range_begin_, int range_end_, BinSortOp bin_op_,
bool sort_within_bins_ = false)
: keys(keys_),
keys_rnd(keys_),
bin_op(bin_op_),
Expand All @@ -222,50 +224,63 @@ class BinSort {
"Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins());
bin_count_const = bin_count_atomic;
bin_offsets =
offset_type(view_alloc(WithoutInitializing,
offset_type(view_alloc(exec, WithoutInitializing,
"Kokkos::SortImpl::BinSortFunctor::bin_offsets"),
bin_op.max_bins());
sort_order =
offset_type(view_alloc(WithoutInitializing,
offset_type(view_alloc(exec, WithoutInitializing,
"Kokkos::SortImpl::BinSortFunctor::sort_order"),
range_end - range_begin);
}

BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
BinSortOp bin_op_, bool sort_within_bins_ = false)
: BinSort(execution_space{}, keys_, range_begin_, range_end_, bin_op_,
sort_within_bins_) {}

template <typename ExecutionSpace>
BinSort(const ExecutionSpace& exec, const_key_view_type keys_,
BinSortOp bin_op_, bool sort_within_bins_ = false)
: BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {}

BinSort(const_key_view_type keys_, BinSortOp bin_op_,
bool sort_within_bins_ = false)
: BinSort(keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {}
: BinSort(execution_space{}, keys_, bin_op_, sort_within_bins_) {}

//----------------------------------------
// Create the permutation vector, the bin_offset array and the bin_count
// array. Can be called again if keys changed
void create_permute_vector() {
template <class ExecutionSpace = execution_space>
void create_permute_vector(const ExecutionSpace& exec = execution_space{}) {
const size_t len = range_end - range_begin;
Kokkos::parallel_for(
"Kokkos::Sort::BinCount",
Kokkos::RangePolicy<execution_space, bin_count_tag>(0, len), *this);
Kokkos::RangePolicy<ExecutionSpace, bin_count_tag>(exec, 0, len),
*this);
Kokkos::parallel_scan("Kokkos::Sort::BinOffset",
Kokkos::RangePolicy<execution_space, bin_offset_tag>(
0, bin_op.max_bins()),
Kokkos::RangePolicy<ExecutionSpace, bin_offset_tag>(
exec, 0, bin_op.max_bins()),
*this);

Kokkos::deep_copy(bin_count_atomic, 0);
Kokkos::deep_copy(exec, bin_count_atomic, 0);
Kokkos::parallel_for(
"Kokkos::Sort::BinBinning",
Kokkos::RangePolicy<execution_space, bin_binning_tag>(0, len), *this);
Kokkos::RangePolicy<ExecutionSpace, bin_binning_tag>(exec, 0, len),
*this);

if (sort_within_bins)
Kokkos::parallel_for(
"Kokkos::Sort::BinSort",
Kokkos::RangePolicy<execution_space, bin_sort_bins_tag>(
0, bin_op.max_bins()),
Kokkos::RangePolicy<ExecutionSpace, bin_sort_bins_tag>(
exec, 0, bin_op.max_bins()),
*this);
}

// Sort a subset of a view with respect to the first dimension using the
// permutation array
template <class ValuesViewType>
void sort(ValuesViewType const& values, int values_range_begin,
int values_range_end) const {
template <class ExecutionSpace, class ValuesViewType>
void sort(const ExecutionSpace& exec, ValuesViewType const& values,
int values_range_begin, int values_range_end) const {
using scratch_view_type =
Kokkos::View<typename ValuesViewType::data_type,
typename ValuesViewType::array_layout,
Expand All @@ -279,7 +294,7 @@ class BinSort {
}

scratch_view_type sorted_values(
view_alloc(WithoutInitializing,
view_alloc(exec, WithoutInitializing,
"Kokkos::SortImpl::BinSortFunctor::sorted_values"),
values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
values.rank_dynamic > 1 ? values.extent(1)
Expand Down Expand Up @@ -308,18 +323,31 @@ class BinSort {
values_range_begin - range_begin);

parallel_for("Kokkos::Sort::CopyPermute",
Kokkos::RangePolicy<execution_space>(0, len), functor);
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, len), functor);
}

{
copy_functor<ValuesViewType, scratch_view_type> functor(
values, range_begin, sorted_values);

parallel_for("Kokkos::Sort::Copy",
Kokkos::RangePolicy<execution_space>(0, len), functor);
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, len), functor);
}
}

// Sort a subset of a view with respect to the first dimension using the
// permutation array
template <class ValuesViewType>
void sort(ValuesViewType const& values, int values_range_begin,
int values_range_end) const {
execution_space exec;
sort(exec, values, values_range_begin, values_range_end);
exec.fence("Kokkos::Sort: fence after sorting");
}

execution_space().fence("Kokkos::Sort: fence after sorting");
template <class ExecutionSpace, class ValuesViewType>
void sort(ExecutionSpace const& exec, ValuesViewType const& values) const {
this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin);
}

template <class ValuesViewType>
Expand Down Expand Up @@ -485,8 +513,8 @@ struct BinOp3D {

namespace Impl {

template <class ViewType>
bool try_std_sort(ViewType view) {
template <class ViewType, class ExecutionSpace>
bool try_std_sort(ViewType view, const ExecutionSpace& exec) {
bool possible = true;
size_t stride[8] = {view.stride_0(), view.stride_1(), view.stride_2(),
view.stride_3(), view.stride_4(), view.stride_5(),
Expand All @@ -497,6 +525,7 @@ bool try_std_sort(ViewType view) {
possible = possible && (ViewType::Rank == 1);
possible = possible && (stride[0] == 1);
if (possible) {
exec.fence("Kokkos::sort: Fence before sorting on the host");
std::sort(view.data(), view.data() + view.extent(0));
}
return possible;
Expand All @@ -519,45 +548,63 @@ struct min_max_functor {

} // namespace Impl

template <class ViewType>
void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
template <class ExecutionSpace, class ViewType>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
const ExecutionSpace& exec, ViewType const& view,
bool const always_use_kokkos_sort = false) {
if (!always_use_kokkos_sort) {
if (Impl::try_std_sort(view)) return;
if (Impl::try_std_sort(view, exec)) return;
}
using CompType = BinOp1D<ViewType>;

Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
parallel_reduce("Kokkos::Sort::FindExtent",
Kokkos::RangePolicy<typename ViewType::execution_space>(
0, view.extent(0)),
exec, 0, view.extent(0)),
Impl::min_max_functor<ViewType>(view), reducer);
if (result.min_val == result.max_val) return;
BinSort<ViewType, CompType> bin_sort(
view, CompType(view.extent(0) / 2, result.min_val, result.max_val), true);
bin_sort.create_permute_vector();
bin_sort.sort(view);
bin_sort.create_permute_vector(exec);
bin_sort.sort(exec, view);
}

template <class ViewType>
void sort(ViewType view, size_t const begin, size_t const end) {
void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
typename ViewType::execution_space exec;
sort(exec, view, always_use_kokkos_sort);
exec.fence("Kokkos::Sort: fence after sorting");
}

template <class ExecutionSpace, class ViewType>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
const ExecutionSpace& exec, ViewType view, size_t const begin,
size_t const end) {
using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
using CompType = BinOp1D<ViewType>;

Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);

parallel_reduce("Kokkos::Sort::FindExtent", range_policy(begin, end),
parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end),
Impl::min_max_functor<ViewType>(view), reducer);

if (result.min_val == result.max_val) return;

BinSort<ViewType, CompType> bin_sort(
view, begin, end,
exec, view, begin, end,
CompType((end - begin) / 2, result.min_val, result.max_val), true);

bin_sort.create_permute_vector();
bin_sort.sort(view, begin, end);
bin_sort.create_permute_vector(exec);
bin_sort.sort(exec, view, begin, end);
}

template <class ViewType>
void sort(ViewType view, size_t const begin, size_t const end) {
typename ViewType::execution_space exec;
sort(exec, view, begin, end);
exec.fence("Kokkos::Sort: fence after sorting");
}

} // namespace Kokkos
Expand Down
92 changes: 51 additions & 41 deletions packages/kokkos/algorithms/unit_tests/TestSort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
KeyViewType keys("Keys", n);

// Test sorting array with all numbers equal
Kokkos::deep_copy(keys, KeyType(1));
Kokkos::sort(keys, force_kokkos);
ExecutionSpace exec;
Kokkos::deep_copy(exec, keys, KeyType(1));
Kokkos::sort(exec, keys, force_kokkos);

Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys, g,
Expand All @@ -147,13 +148,16 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
double sum_after = 0.0;
unsigned int sort_fails = 0;

Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys), sum_before);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
sum<ExecutionSpace, KeyType>(keys), sum_before);

Kokkos::sort(keys, force_kokkos);
Kokkos::sort(exec, keys, force_kokkos);

Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys), sum_after);
Kokkos::parallel_reduce(
n - 1, is_sorted_struct<ExecutionSpace, KeyType>(keys), sort_fails);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
sum<ExecutionSpace, KeyType>(keys), sum_after);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n - 1),
is_sorted_struct<ExecutionSpace, KeyType>(keys),
sort_fails);

double ratio = sum_before / sum_after;
double epsilon = 1e-10;
Expand All @@ -177,8 +181,10 @@ void test_3D_sort_impl(unsigned int n) {
double sum_after = 0.0;
unsigned int sort_fails = 0;

Kokkos::parallel_reduce(keys.extent(0), sum3D<ExecutionSpace, KeyType>(keys),
sum_before);
ExecutionSpace exec;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0)),
sum3D<ExecutionSpace, KeyType>(keys), sum_before);

int bin_1d = 1;
while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2;
Expand All @@ -189,15 +195,17 @@ void test_3D_sort_impl(unsigned int n) {
using BinOp = Kokkos::BinOp3D<KeyViewType>;
BinOp bin_op(bin_max, min, max);
Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false);
Sorter.create_permute_vector();
Sorter.template sort<KeyViewType>(keys);

Kokkos::parallel_reduce(keys.extent(0), sum3D<ExecutionSpace, KeyType>(keys),
sum_after);
Kokkos::parallel_reduce(keys.extent(0) - 1,
bin3d_is_sorted_struct<ExecutionSpace, KeyType>(
keys, bin_1d, min[0], max[0]),
sort_fails);
Sorter.create_permute_vector(exec);
Sorter.sort(exec, keys);

Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0)),
sum3D<ExecutionSpace, KeyType>(keys), sum_after);
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0) - 1),
bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys, bin_1d, min[0],
max[0]),
sort_fails);

double ratio = sum_before / sum_after;
double epsilon = 1e-10;
Expand Down Expand Up @@ -229,36 +237,36 @@ void test_dynamic_view_sort_impl(unsigned int n) {
KeyViewType keys_view("KeysTmp", n);

// Test sorting array with all numbers equal
Kokkos::deep_copy(keys_view, KeyType(1));
ExecutionSpace exec;
Kokkos::deep_copy(exec, keys_view, KeyType(1));
Kokkos::deep_copy(keys, keys_view);
Kokkos::sort(keys, 0 /* begin */, n /* end */);
Kokkos::sort(exec, keys, 0 /* begin */, n /* end */);

Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys_view, g,
Kokkos::Random_XorShift64_Pool<
ExecutionSpace>::generator_type::MAX_URAND);

ExecutionSpace().fence();
exec.fence();
Kokkos::deep_copy(keys, keys_view);
// ExecutionSpace().fence();

double sum_before = 0.0;
double sum_after = 0.0;
unsigned int sort_fails = 0;

Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys_view),
sum_before);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
sum<ExecutionSpace, KeyType>(keys_view), sum_before);

Kokkos::sort(keys, 0 /* begin */, n /* end */);
Kokkos::sort(exec, keys, 0 /* begin */, n /* end */);

ExecutionSpace().fence(); // Need this fence to prevent BusError with Cuda
exec.fence(); // Need this fence to prevent BusError with Cuda
Kokkos::deep_copy(keys_view, keys);
// ExecutionSpace().fence();

Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys_view),
sum_after);
Kokkos::parallel_reduce(
n - 1, is_sorted_struct<ExecutionSpace, KeyType>(keys_view), sort_fails);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
sum<ExecutionSpace, KeyType>(keys_view), sum_after);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n - 1),
is_sorted_struct<ExecutionSpace, KeyType>(keys_view),
sort_fails);

double ratio = sum_before / sum_after;
double epsilon = 1e-10;
Expand Down Expand Up @@ -301,9 +309,10 @@ void test_issue_1160_impl() {
for (int i = 0; i < 10; ++i) {
h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i));
}
Kokkos::deep_copy(element_, h_element);
Kokkos::deep_copy(x_, h_x);
Kokkos::deep_copy(v_, h_v);
ExecutionSpace exec;
Kokkos::deep_copy(exec, element_, h_element);
Kokkos::deep_copy(exec, x_, h_x);
Kokkos::deep_copy(exec, v_, h_v);

using KeyViewType = decltype(element_);
using BinOp = Kokkos::BinOp1D<KeyViewType>;
Expand All @@ -316,15 +325,16 @@ void test_issue_1160_impl() {

Kokkos::BinSort<KeyViewType, BinOp> Sorter(element_, begin, end, binner,
false);
Sorter.create_permute_vector();
Sorter.sort(element_, begin, end);
Sorter.create_permute_vector(exec);
Sorter.sort(exec, element_, begin, end);

Sorter.sort(x_, begin, end);
Sorter.sort(v_, begin, end);
Sorter.sort(exec, x_, begin, end);
Sorter.sort(exec, v_, begin, end);

Kokkos::deep_copy(h_element, element_);
Kokkos::deep_copy(h_x, x_);
Kokkos::deep_copy(h_v, v_);
Kokkos::deep_copy(exec, h_element, element_);
Kokkos::deep_copy(exec, h_x, x_);
Kokkos::deep_copy(exec, h_v, v_);
exec.fence();

ASSERT_EQ(h_element(0), 9);
ASSERT_EQ(h_element(1), 8);
Expand Down
Loading