rapidsai · trxcllnt · Jul 19, 2022 · Jul 25, 2022 · Aug 11, 2022 · Aug 11, 2022
@@ -17,6 +17,7 @@
 #pragma once
 
 namespace cuspatial {
+namespace detail {
 namespace utility {
 
 namespace {
@@ -93,4 +94,5 @@ __device__ inline uint16_t z_order_y(uint32_t const index)
 }
 
 }  // namespace utility
+}  // namespace detail
 }  // namespace cuspatial
@@ -18,11 +18,7 @@
 
 #include "utilities.cuh"
 
-#include <utility/z_order.cuh>
-
-#include <cudf/column/column_factories.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
+#include <cuspatial/detail/utility/z_order.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -58,43 +54,42 @@ namespace detail {
  * @brief Compute Morton codes (z-order) for each point, and the sorted indices
  * mapping original point index to sorted z-order.
  */
-template <typename T>
-inline std::pair<rmm::device_uvector<uint32_t>, std::unique_ptr<cudf::column>>
-compute_point_keys_and_sorted_indices(cudf::column_view const& x,
-                                      cudf::column_view const& y,
-                                      T x_min,
-                                      T x_max,
-                                      T y_min,
-                                      T y_max,
-                                      T scale,
-                                      int8_t max_depth,
+template <class PointIt, class Coord>
+inline std::pair<rmm::device_uvector<uint32_t>, rmm::device_uvector<uint32_t>>
+compute_point_keys_and_sorted_indices(PointIt points_first,
+                                      PointIt points_last,
+                                      Coord x_min,
+                                      Coord x_max,
+                                      Coord y_min,
+                                      Coord y_max,
+                                      Coord scale,
+                                      uint8_t max_depth,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
 {
-  rmm::device_uvector<uint32_t> keys(x.size(), stream);
+  auto num_points = thrust::distance(points_first, points_last);
+  rmm::device_uvector<uint32_t> keys(num_points, stream);
   thrust::transform(rmm::exec_policy(stream),
-                    thrust::make_zip_iterator(x.begin<T>(), y.begin<T>()),
-                    thrust::make_zip_iterator(x.begin<T>(), y.begin<T>()) + x.size(),
+                    points_first,
+                    points_last,
                     keys.begin(),
                     [=] __device__(auto const& point) {
-                      T x, y;
-                      thrust::tie(x, y) = point;
+                      auto const& x = point.x;
+                      auto const& y = point.y;
                       if (x < x_min || x > x_max || y < y_min || y > y_max) {
                         // If the point is outside the bbox, return a max_level key
                         return static_cast<uint32_t>((1 << (2 * max_depth)) - 1);
                       }
-                      return cuspatial::utility::z_order((x - x_min) / scale, (y - y_min) / scale);
+                      return cuspatial::detail::utility::z_order((x - x_min) / scale,
+                                                                 (y - y_min) / scale);
                     });
 
-  auto indices = make_fixed_width_column<uint32_t>(keys.size(), stream, mr);
+  rmm::device_uvector<uint32_t> indices(keys.size(), stream, mr);
 
-  thrust::sequence(rmm::exec_policy(stream),
-                   indices->mutable_view().begin<uint32_t>(),
-                   indices->mutable_view().end<uint32_t>());
+  thrust::sequence(rmm::exec_policy(stream), indices.begin(), indices.end());
 
   // Sort the codes and point indices
-  thrust::stable_sort_by_key(
-    rmm::exec_policy(stream), keys.begin(), keys.end(), indices->mutable_view().begin<int32_t>());
+  thrust::stable_sort_by_key(rmm::exec_policy(stream), keys.begin(), keys.end(), indices.begin());
 
   return std::make_pair(std::move(keys), std::move(indices));
 }
@@ -109,13 +104,13 @@ template <typename InputIterator1,
           typename OutputIterator1,
           typename OutputIterator2,
           typename BinaryOp>
-inline cudf::size_type build_tree_level(InputIterator1 keys_begin,
-                                        InputIterator1 keys_end,
-                                        InputIterator2 vals_in,
-                                        OutputIterator1 keys_out,
-                                        OutputIterator2 vals_out,
-                                        BinaryOp binary_op,
-                                        rmm::cuda_stream_view stream)
+inline int32_t build_tree_level(InputIterator1 keys_begin,
+                                InputIterator1 keys_end,
+                                InputIterator2 vals_in,
+                                OutputIterator1 keys_out,
+                                OutputIterator2 vals_out,
+                                BinaryOp binary_op,
+                                rmm::cuda_stream_view stream)
 {
   auto result = thrust::reduce_by_key(rmm::exec_policy(stream),
                                       keys_begin,
@@ -133,22 +128,19 @@ inline cudf::size_type build_tree_level(InputIterator1 keys_begin,
  * starting from the leaf levels and working up to the root node.
  */
 template <typename KeysIterator, typename ValsIterator>
-inline std::tuple<cudf::size_type,
-                  cudf::size_type,
-                  std::vector<cudf::size_type>,
-                  std::vector<cudf::size_type>>
-build_tree_levels(int8_t max_depth,
-                  cudf::size_type num_top_quads,
-                  KeysIterator keys_begin,
-                  ValsIterator quad_point_count_begin,
-                  ValsIterator quad_child_count_begin,
-                  rmm::cuda_stream_view stream)
+inline std::tuple<int32_t, int32_t, std::vector<int32_t>, std::vector<int32_t>> build_tree_levels(
+  uint8_t max_depth,
+  int32_t num_top_quads,
+  KeysIterator keys_begin,
+  ValsIterator quad_point_count_begin,
+  ValsIterator quad_child_count_begin,
+  rmm::cuda_stream_view stream)
 {
   // begin/end offsets
-  cudf::size_type begin{0};
-  cudf::size_type end{num_top_quads};
-  std::vector<cudf::size_type> begin_pos(max_depth);
-  std::vector<cudf::size_type> end_pos(max_depth);
+  int32_t begin{0};
+  int32_t end{num_top_quads};
+  std::vector<int32_t> begin_pos(max_depth);
+  std::vector<int32_t> end_pos(max_depth);
 
   // iterator for the parent level's quad node keys
   auto parent_keys = thrust::make_transform_iterator(
@@ -161,7 +153,7 @@ build_tree_levels(int8_t max_depth,
   auto child_values =
     thrust::make_zip_iterator(quad_point_count_begin, thrust::make_constant_iterator<uint32_t>(1));
 
-  for (cudf::size_type level = max_depth - 1; level >= 0; --level) {
+  for (int32_t level = max_depth - 1; level >= 0; --level) {
     auto num_full_quads = build_tree_level(parent_keys + begin,
                                            parent_keys + end,
                                            child_values + begin,
@@ -199,21 +191,21 @@ inline std::tuple<rmm::device_uvector<uint32_t>,
 reverse_tree_levels(rmm::device_uvector<uint32_t> const& quad_keys_in,
                     rmm::device_uvector<uint32_t> const& quad_point_count_in,
                     rmm::device_uvector<uint32_t> const& quad_child_count_in,
-                    std::vector<cudf::size_type> const& begin_pos,
-                    std::vector<cudf::size_type> const& end_pos,
-                    int8_t max_depth,
+                    std::vector<int32_t> const& begin_pos,
+                    std::vector<int32_t> const& end_pos,
+                    uint8_t max_depth,
                     rmm::cuda_stream_view stream)
 {
   rmm::device_uvector<uint32_t> quad_keys(quad_keys_in.size(), stream);
   rmm::device_uvector<uint8_t> quad_levels(quad_keys_in.size(), stream);
   rmm::device_uvector<uint32_t> quad_point_count(quad_point_count_in.size(), stream);
   rmm::device_uvector<uint32_t> quad_child_count(quad_child_count_in.size(), stream);
-  cudf::size_type offset{0};
+  int32_t offset{0};
 
-  for (cudf::size_type level{0}; level < max_depth; ++level) {
-    cudf::size_type level_end   = end_pos[level];
-    cudf::size_type level_begin = begin_pos[level];
-    cudf::size_type num_quads   = level_end - level_begin;
+  for (int32_t level{0}; level < max_depth; ++level) {
+    int32_t level_end   = end_pos[level];
+    int32_t level_begin = begin_pos[level];
+    int32_t num_quads   = level_end - level_begin;
     thrust::fill(rmm::exec_policy(stream),
                  quad_levels.begin() + offset,
                  quad_levels.begin() + offset + num_quads,
@@ -254,31 +246,32 @@ reverse_tree_levels(rmm::device_uvector<uint32_t> const& quad_keys_in,
  * are freed on return if we end up swapping the levels.
  *
  */
-template <typename T>
-inline auto make_full_levels(cudf::column_view const& x,
-                             cudf::column_view const& y,
-                             T x_min,
-                             T x_max,
-                             T y_min,
-                             T y_max,
-                             T scale,
-                             int8_t max_depth,
-                             cudf::size_type min_size,
+template <class PointIt, class Coord>
+inline auto make_full_levels(PointIt points_first,
+                             PointIt points_last,
+                             Coord x_min,
+                             Coord x_max,
+                             Coord y_min,
+                             Coord y_max,
+                             Coord scale,
+                             uint8_t max_depth,
+                             uint32_t min_size,
                              rmm::cuda_stream_view stream,
                              rmm::mr::device_memory_resource* mr)
 {
+  auto num_points = thrust::distance(points_first, points_last);
   // Compute point keys and sort into bottom-level quadrants
   // (i.e. quads at level `max_depth - 1`)
 
   // Compute Morton code (z-order) keys for each point
-  auto keys_and_indices = compute_point_keys_and_sorted_indices<T>(
-    x, y, x_min, x_max, y_min, y_max, scale, max_depth, stream, mr);
+  auto keys_and_indices = compute_point_keys_and_sorted_indices(
+    points_first, points_last, x_min, x_max, y_min, y_max, scale, max_depth, stream, mr);
 
   auto& point_keys    = keys_and_indices.first;
   auto& point_indices = keys_and_indices.second;
 
-  rmm::device_uvector<uint32_t> quad_keys(x.size(), stream);
-  rmm::device_uvector<uint32_t> quad_point_count(x.size(), stream);
+  rmm::device_uvector<uint32_t> quad_keys(num_points, stream);
+  rmm::device_uvector<uint32_t> quad_point_count(num_points, stream);
 
   // Construct quadrants at the finest level of detail, i.e. the quadrants
   // furthest from the root. Reduces points with common z-order codes into

@@ -18,10 +18,6 @@
 
 #include "utilities.cuh"
 
-#include <cudf/column/column_factories.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
@@ -56,15 +52,16 @@
 namespace cuspatial {
 namespace detail {
 
-inline rmm::device_uvector<uint32_t> compute_leaf_positions(cudf::column_view const& indicator,
-                                                            cudf::size_type num_valid_nodes,
-                                                            rmm::cuda_stream_view stream)
+inline rmm::device_uvector<uint32_t> compute_leaf_positions(
+  rmm::device_uvector<uint8_t> const& indicator,
+  int32_t num_valid_nodes,
+  rmm::cuda_stream_view stream)
 {
   rmm::device_uvector<uint32_t> leaf_pos(num_valid_nodes, stream);
   auto result = thrust::copy_if(rmm::exec_policy(stream),
                                 thrust::make_counting_iterator(0),
                                 thrust::make_counting_iterator(0) + num_valid_nodes,
-                                indicator.begin<bool>(),
+                                indicator.begin(),
                                 leaf_pos.begin(),
                                 !thrust::placeholders::_1);
   // Shrink leaf_pos's underlying device allocation
@@ -76,14 +73,14 @@ inline rmm::device_uvector<uint32_t> compute_leaf_positions(cudf::column_view co
 inline rmm::device_uvector<uint32_t> flatten_point_keys(
   rmm::device_uvector<uint32_t> const& quad_keys,
   rmm::device_uvector<uint8_t> const& quad_level,
-  cudf::column_view const& indicator,
-  cudf::size_type num_valid_nodes,
-  int8_t max_depth,
+  rmm::device_uvector<uint8_t> const& indicator,
+  int32_t num_valid_nodes,
+  uint8_t max_depth,
   rmm::cuda_stream_view stream)
 {
   rmm::device_uvector<uint32_t> flattened_keys(num_valid_nodes, stream);
   auto keys_and_levels =
-    thrust::make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin<bool>());
+    thrust::make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin());
   thrust::transform(rmm::exec_policy(stream),
                     keys_and_levels,
                     keys_and_levels + num_valid_nodes,
@@ -110,9 +107,9 @@ inline rmm::device_uvector<uint32_t> compute_flattened_first_point_positions(
   rmm::device_uvector<uint32_t> const& quad_keys,
   rmm::device_uvector<uint8_t> const& quad_level,
   rmm::device_uvector<uint32_t>& quad_point_count,
-  cudf::column_view const& indicator,
-  cudf::size_type num_valid_nodes,
-  int8_t max_depth,
+  rmm::device_uvector<uint8_t> const& indicator,
+  int32_t num_valid_nodes,
+  uint8_t max_depth,
   rmm::cuda_stream_view stream)
 {
   // Sort initial indices and temporary point counts by the flattened keys
@@ -192,8 +189,8 @@ inline rmm::device_uvector<uint32_t> compute_flattened_first_point_positions(
 
 inline rmm::device_uvector<uint32_t> compute_parent_positions(
   rmm::device_uvector<uint32_t> const& quad_child_count,
-  cudf::size_type num_parent_nodes,
-  cudf::size_type num_child_nodes,
+  int32_t num_parent_nodes,
+  int32_t num_child_nodes,
   rmm::cuda_stream_view stream)
 {
   // Compute parent node start positions
@@ -245,10 +242,10 @@ inline std::pair<uint32_t, uint32_t> remove_unqualified_quads(
   rmm::device_uvector<uint32_t>& quad_point_count,
   rmm::device_uvector<uint32_t>& quad_child_count,
   rmm::device_uvector<uint8_t>& quad_levels,
-  cudf::size_type num_parent_nodes,
-  cudf::size_type num_child_nodes,
-  cudf::size_type min_size,
-  cudf::size_type level_1_size,
+  int32_t num_parent_nodes,
+  int32_t num_child_nodes,
+  uint32_t min_size,
+  int32_t level_1_size,
   rmm::cuda_stream_view stream)
 {
   // compute parent node start positions
@@ -311,42 +308,39 @@ inline std::pair<uint32_t, uint32_t> remove_unqualified_quads(
  * @param min_size
  * @param mr
  * @param stream
- * @return std::unique_ptr<cudf::column>
+ * @return rmm::device_uvector<uint8_t>
  */
-inline std::unique_ptr<cudf::column> construct_non_leaf_indicator(
+inline rmm::device_uvector<uint8_t> construct_non_leaf_indicator(
   rmm::device_uvector<uint32_t>& quad_point_count,
-  cudf::size_type num_parent_nodes,
-  cudf::size_type num_valid_nodes,
-  cudf::size_type min_size,
+  int32_t num_parent_nodes,
+  int32_t num_valid_nodes,
+  uint32_t min_size,
   rmm::mr::device_memory_resource* mr,
   rmm::cuda_stream_view stream)
 {
   //
   // Construct the indicator output column
-  auto is_quad = make_fixed_width_column<bool>(num_valid_nodes, stream, mr);
+  rmm::device_uvector<uint8_t> is_quad(num_valid_nodes, stream, mr);
 
   // line 6 of algorithm in Fig. 5 in ref.
   thrust::transform(rmm::exec_policy(stream),
                     quad_point_count.begin(),
                     quad_point_count.begin() + num_parent_nodes,
-                    is_quad->mutable_view().begin<bool>(),
+                    is_quad.begin(),
                     thrust::placeholders::_1 > min_size);
 
   // line 7 of algorithm in Fig. 5 in ref.
   thrust::replace_if(rmm::exec_policy(stream),
                      quad_point_count.begin(),
                      quad_point_count.begin() + num_parent_nodes,
-                     is_quad->view().begin<bool>(),
+                     is_quad.begin(),
                      thrust::placeholders::_1,
                      0);
 
   if (num_valid_nodes > num_parent_nodes) {
     // zero-fill the rest of the indicator column because
     // device_memory_resources aren't required to initialize allocations
-    thrust::fill(rmm::exec_policy(stream),
-                 is_quad->mutable_view().begin<bool>() + num_parent_nodes,
-                 is_quad->mutable_view().end<bool>(),
-                 0);
+    thrust::fill(rmm::exec_policy(stream), is_quad.begin() + num_parent_nodes, is_quad.end(), 0);
   }
 
   return is_quad;