Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update column-based cuspatial::quadtree_on_points to use new header-only impl #640

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

namespace cuspatial {
namespace detail {
namespace utility {

namespace {
Expand Down Expand Up @@ -93,4 +94,5 @@ __device__ inline uint16_t z_order_y(uint32_t const index)
}

} // namespace utility
} // namespace detail
} // namespace cuspatial
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,7 @@

#include "utilities.cuh"

#include <utility/z_order.cuh>

#include <cudf/column/column_factories.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/type_dispatcher.hpp>
#include <cuspatial/detail/utility/z_order.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
Expand Down Expand Up @@ -58,43 +54,42 @@ namespace detail {
* @brief Compute Morton codes (z-order) for each point, and the sorted indices
* mapping original point index to sorted z-order.
*/
template <typename T>
inline std::pair<rmm::device_uvector<uint32_t>, std::unique_ptr<cudf::column>>
compute_point_keys_and_sorted_indices(cudf::column_view const& x,
cudf::column_view const& y,
T x_min,
T x_max,
T y_min,
T y_max,
T scale,
int8_t max_depth,
template <class PointIt, class Coord>
inline std::pair<rmm::device_uvector<uint32_t>, rmm::device_uvector<uint32_t>>
compute_point_keys_and_sorted_indices(PointIt points_first,
PointIt points_last,
Coord x_min,
Coord x_max,
Coord y_min,
Coord y_max,
Coord scale,
uint8_t max_depth,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
rmm::device_uvector<uint32_t> keys(x.size(), stream);
auto num_points = thrust::distance(points_first, points_last);
rmm::device_uvector<uint32_t> keys(num_points, stream);
thrust::transform(rmm::exec_policy(stream),
thrust::make_zip_iterator(x.begin<T>(), y.begin<T>()),
thrust::make_zip_iterator(x.begin<T>(), y.begin<T>()) + x.size(),
points_first,
points_last,
keys.begin(),
[=] __device__(auto const& point) {
T x, y;
thrust::tie(x, y) = point;
auto const& x = point.x;
auto const& y = point.y;
if (x < x_min || x > x_max || y < y_min || y > y_max) {
// If the point is outside the bbox, return a max_level key
return static_cast<uint32_t>((1 << (2 * max_depth)) - 1);
}
return cuspatial::utility::z_order((x - x_min) / scale, (y - y_min) / scale);
return cuspatial::detail::utility::z_order((x - x_min) / scale,
(y - y_min) / scale);
});

auto indices = make_fixed_width_column<uint32_t>(keys.size(), stream, mr);
rmm::device_uvector<uint32_t> indices(keys.size(), stream, mr);

thrust::sequence(rmm::exec_policy(stream),
indices->mutable_view().begin<uint32_t>(),
indices->mutable_view().end<uint32_t>());
thrust::sequence(rmm::exec_policy(stream), indices.begin(), indices.end());

// Sort the codes and point indices
thrust::stable_sort_by_key(
rmm::exec_policy(stream), keys.begin(), keys.end(), indices->mutable_view().begin<int32_t>());
thrust::stable_sort_by_key(rmm::exec_policy(stream), keys.begin(), keys.end(), indices.begin());

return std::make_pair(std::move(keys), std::move(indices));
}
Expand All @@ -109,13 +104,13 @@ template <typename InputIterator1,
typename OutputIterator1,
typename OutputIterator2,
typename BinaryOp>
inline cudf::size_type build_tree_level(InputIterator1 keys_begin,
InputIterator1 keys_end,
InputIterator2 vals_in,
OutputIterator1 keys_out,
OutputIterator2 vals_out,
BinaryOp binary_op,
rmm::cuda_stream_view stream)
inline int32_t build_tree_level(InputIterator1 keys_begin,
InputIterator1 keys_end,
InputIterator2 vals_in,
OutputIterator1 keys_out,
OutputIterator2 vals_out,
BinaryOp binary_op,
rmm::cuda_stream_view stream)
{
auto result = thrust::reduce_by_key(rmm::exec_policy(stream),
keys_begin,
Expand All @@ -133,22 +128,19 @@ inline cudf::size_type build_tree_level(InputIterator1 keys_begin,
* starting from the leaf levels and working up to the root node.
*/
template <typename KeysIterator, typename ValsIterator>
inline std::tuple<cudf::size_type,
cudf::size_type,
std::vector<cudf::size_type>,
std::vector<cudf::size_type>>
build_tree_levels(int8_t max_depth,
cudf::size_type num_top_quads,
KeysIterator keys_begin,
ValsIterator quad_point_count_begin,
ValsIterator quad_child_count_begin,
rmm::cuda_stream_view stream)
inline std::tuple<int32_t, int32_t, std::vector<int32_t>, std::vector<int32_t>> build_tree_levels(
uint8_t max_depth,
int32_t num_top_quads,
KeysIterator keys_begin,
ValsIterator quad_point_count_begin,
ValsIterator quad_child_count_begin,
rmm::cuda_stream_view stream)
{
// begin/end offsets
cudf::size_type begin{0};
cudf::size_type end{num_top_quads};
std::vector<cudf::size_type> begin_pos(max_depth);
std::vector<cudf::size_type> end_pos(max_depth);
int32_t begin{0};
int32_t end{num_top_quads};
std::vector<int32_t> begin_pos(max_depth);
std::vector<int32_t> end_pos(max_depth);

// iterator for the parent level's quad node keys
auto parent_keys = thrust::make_transform_iterator(
Expand All @@ -161,7 +153,7 @@ build_tree_levels(int8_t max_depth,
auto child_values =
thrust::make_zip_iterator(quad_point_count_begin, thrust::make_constant_iterator<uint32_t>(1));

for (cudf::size_type level = max_depth - 1; level >= 0; --level) {
for (int32_t level = max_depth - 1; level >= 0; --level) {
auto num_full_quads = build_tree_level(parent_keys + begin,
parent_keys + end,
child_values + begin,
Expand Down Expand Up @@ -199,21 +191,21 @@ inline std::tuple<rmm::device_uvector<uint32_t>,
reverse_tree_levels(rmm::device_uvector<uint32_t> const& quad_keys_in,
rmm::device_uvector<uint32_t> const& quad_point_count_in,
rmm::device_uvector<uint32_t> const& quad_child_count_in,
std::vector<cudf::size_type> const& begin_pos,
std::vector<cudf::size_type> const& end_pos,
int8_t max_depth,
std::vector<int32_t> const& begin_pos,
std::vector<int32_t> const& end_pos,
uint8_t max_depth,
rmm::cuda_stream_view stream)
{
rmm::device_uvector<uint32_t> quad_keys(quad_keys_in.size(), stream);
rmm::device_uvector<uint8_t> quad_levels(quad_keys_in.size(), stream);
rmm::device_uvector<uint32_t> quad_point_count(quad_point_count_in.size(), stream);
rmm::device_uvector<uint32_t> quad_child_count(quad_child_count_in.size(), stream);
cudf::size_type offset{0};
int32_t offset{0};

for (cudf::size_type level{0}; level < max_depth; ++level) {
cudf::size_type level_end = end_pos[level];
cudf::size_type level_begin = begin_pos[level];
cudf::size_type num_quads = level_end - level_begin;
for (int32_t level{0}; level < max_depth; ++level) {
int32_t level_end = end_pos[level];
int32_t level_begin = begin_pos[level];
int32_t num_quads = level_end - level_begin;
thrust::fill(rmm::exec_policy(stream),
quad_levels.begin() + offset,
quad_levels.begin() + offset + num_quads,
Expand Down Expand Up @@ -254,31 +246,32 @@ reverse_tree_levels(rmm::device_uvector<uint32_t> const& quad_keys_in,
* are freed on return if we end up swapping the levels.
*
*/
template <typename T>
inline auto make_full_levels(cudf::column_view const& x,
cudf::column_view const& y,
T x_min,
T x_max,
T y_min,
T y_max,
T scale,
int8_t max_depth,
cudf::size_type min_size,
template <class PointIt, class Coord>
inline auto make_full_levels(PointIt points_first,
PointIt points_last,
Coord x_min,
Coord x_max,
Coord y_min,
Coord y_max,
Coord scale,
uint8_t max_depth,
uint32_t min_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto num_points = thrust::distance(points_first, points_last);
// Compute point keys and sort into bottom-level quadrants
// (i.e. quads at level `max_depth - 1`)

// Compute Morton code (z-order) keys for each point
auto keys_and_indices = compute_point_keys_and_sorted_indices<T>(
x, y, x_min, x_max, y_min, y_max, scale, max_depth, stream, mr);
auto keys_and_indices = compute_point_keys_and_sorted_indices(
points_first, points_last, x_min, x_max, y_min, y_max, scale, max_depth, stream, mr);

auto& point_keys = keys_and_indices.first;
auto& point_indices = keys_and_indices.second;

rmm::device_uvector<uint32_t> quad_keys(x.size(), stream);
rmm::device_uvector<uint32_t> quad_point_count(x.size(), stream);
rmm::device_uvector<uint32_t> quad_keys(num_points, stream);
rmm::device_uvector<uint32_t> quad_point_count(num_points, stream);

// Construct quadrants at the finest level of detail, i.e. the quadrants
// furthest from the root. Reduces points with common z-order codes into
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@

#include "utilities.cuh"

#include <cudf/column/column_factories.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -56,15 +52,16 @@
namespace cuspatial {
namespace detail {

inline rmm::device_uvector<uint32_t> compute_leaf_positions(cudf::column_view const& indicator,
cudf::size_type num_valid_nodes,
rmm::cuda_stream_view stream)
inline rmm::device_uvector<uint32_t> compute_leaf_positions(
rmm::device_uvector<uint8_t> const& indicator,
int32_t num_valid_nodes,
rmm::cuda_stream_view stream)
{
rmm::device_uvector<uint32_t> leaf_pos(num_valid_nodes, stream);
auto result = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(0) + num_valid_nodes,
indicator.begin<bool>(),
indicator.begin(),
leaf_pos.begin(),
!thrust::placeholders::_1);
// Shrink leaf_pos's underlying device allocation
Expand All @@ -76,14 +73,14 @@ inline rmm::device_uvector<uint32_t> compute_leaf_positions(cudf::column_view co
inline rmm::device_uvector<uint32_t> flatten_point_keys(
rmm::device_uvector<uint32_t> const& quad_keys,
rmm::device_uvector<uint8_t> const& quad_level,
cudf::column_view const& indicator,
cudf::size_type num_valid_nodes,
int8_t max_depth,
rmm::device_uvector<uint8_t> const& indicator,
int32_t num_valid_nodes,
uint8_t max_depth,
rmm::cuda_stream_view stream)
{
rmm::device_uvector<uint32_t> flattened_keys(num_valid_nodes, stream);
auto keys_and_levels =
thrust::make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin<bool>());
thrust::make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin());
thrust::transform(rmm::exec_policy(stream),
keys_and_levels,
keys_and_levels + num_valid_nodes,
Expand All @@ -110,9 +107,9 @@ inline rmm::device_uvector<uint32_t> compute_flattened_first_point_positions(
rmm::device_uvector<uint32_t> const& quad_keys,
rmm::device_uvector<uint8_t> const& quad_level,
rmm::device_uvector<uint32_t>& quad_point_count,
cudf::column_view const& indicator,
cudf::size_type num_valid_nodes,
int8_t max_depth,
rmm::device_uvector<uint8_t> const& indicator,
int32_t num_valid_nodes,
uint8_t max_depth,
rmm::cuda_stream_view stream)
{
// Sort initial indices and temporary point counts by the flattened keys
Expand Down Expand Up @@ -192,8 +189,8 @@ inline rmm::device_uvector<uint32_t> compute_flattened_first_point_positions(

inline rmm::device_uvector<uint32_t> compute_parent_positions(
rmm::device_uvector<uint32_t> const& quad_child_count,
cudf::size_type num_parent_nodes,
cudf::size_type num_child_nodes,
int32_t num_parent_nodes,
int32_t num_child_nodes,
rmm::cuda_stream_view stream)
{
// Compute parent node start positions
Expand Down Expand Up @@ -245,10 +242,10 @@ inline std::pair<uint32_t, uint32_t> remove_unqualified_quads(
rmm::device_uvector<uint32_t>& quad_point_count,
rmm::device_uvector<uint32_t>& quad_child_count,
rmm::device_uvector<uint8_t>& quad_levels,
cudf::size_type num_parent_nodes,
cudf::size_type num_child_nodes,
cudf::size_type min_size,
cudf::size_type level_1_size,
int32_t num_parent_nodes,
int32_t num_child_nodes,
uint32_t min_size,
int32_t level_1_size,
rmm::cuda_stream_view stream)
{
// compute parent node start positions
Expand Down Expand Up @@ -311,42 +308,39 @@ inline std::pair<uint32_t, uint32_t> remove_unqualified_quads(
* @param min_size
* @param mr
* @param stream
* @return std::unique_ptr<cudf::column>
* @return rmm::device_uvector<uint8_t>
*/
inline std::unique_ptr<cudf::column> construct_non_leaf_indicator(
inline rmm::device_uvector<uint8_t> construct_non_leaf_indicator(
rmm::device_uvector<uint32_t>& quad_point_count,
cudf::size_type num_parent_nodes,
cudf::size_type num_valid_nodes,
cudf::size_type min_size,
int32_t num_parent_nodes,
int32_t num_valid_nodes,
uint32_t min_size,
rmm::mr::device_memory_resource* mr,
rmm::cuda_stream_view stream)
{
//
// Construct the indicator output column
auto is_quad = make_fixed_width_column<bool>(num_valid_nodes, stream, mr);
rmm::device_uvector<uint8_t> is_quad(num_valid_nodes, stream, mr);

// line 6 of algorithm in Fig. 5 in ref.
thrust::transform(rmm::exec_policy(stream),
quad_point_count.begin(),
quad_point_count.begin() + num_parent_nodes,
is_quad->mutable_view().begin<bool>(),
is_quad.begin(),
thrust::placeholders::_1 > min_size);

// line 7 of algorithm in Fig. 5 in ref.
thrust::replace_if(rmm::exec_policy(stream),
quad_point_count.begin(),
quad_point_count.begin() + num_parent_nodes,
is_quad->view().begin<bool>(),
is_quad.begin(),
thrust::placeholders::_1,
0);

if (num_valid_nodes > num_parent_nodes) {
// zero-fill the rest of the indicator column because
// device_memory_resources aren't required to initialize allocations
thrust::fill(rmm::exec_policy(stream),
is_quad->mutable_view().begin<bool>() + num_parent_nodes,
is_quad->mutable_view().end<bool>(),
0);
thrust::fill(rmm::exec_policy(stream), is_quad.begin() + num_parent_nodes, is_quad.end(), 0);
}

return is_quad;
Expand Down
Loading