Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Bug fixes and tests for subset_trajectory_id() #16

Merged
merged 7 commits into from
Sep 5, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
## Improvements

## Bug Fixes

- PR #16 `cuspatial::subset_trajectory_id` test improvements and bug fixes
25 changes: 17 additions & 8 deletions cpp/include/cuspatial/trajectory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

typedef struct gdf_column_ gdf_column; // forward declaration
#include <cudf/types.h>

namespace cuspatial {

Expand Down Expand Up @@ -116,17 +116,26 @@ void trajectory_spatial_bounds(const gdf_column& x, const gdf_column& y,
* @param[out] out_timestamp: output timestamp ordered by (in_id,in_ts)
*
* @return number of trajectories returned
*
* @note the output columns are allocated by this function but they must
* be deallocated by the caller.
*
* @note this function is likely to be removed in the future since it is
* redundant to cuDF functionality
*
* Note: the API is useful for integrating with cuDF and serial Python APIs,
* @note: the API is useful for integrating with cuDF and serial Python APIs,
* e.g., query based on trajectory level information using serial Python APIs or
* cuDF APIs and identify a subset of trajectory IDs. These IDs can then be used
* to retrieve x/y/len/pos data for futher processing.
*/
uint32_t subset_trajectory_id(const gdf_column& id,
const gdf_column& in_x, const gdf_column& in_y,
const gdf_column& in_id,
const gdf_column& in_timestamp,
gdf_column& out_x, gdf_column& out_y,
gdf_column& out_id, gdf_column& out_timestamp);
gdf_size_type subset_trajectory_id(const gdf_column& id,
const gdf_column& in_x,
const gdf_column& in_y,
const gdf_column& in_id,
const gdf_column& in_timestamp,
gdf_column& out_x,
gdf_column& out_y,
gdf_column& out_id,
gdf_column& out_timestamp);

} // namespace cuspatial
164 changes: 82 additions & 82 deletions cpp/src/trajectory/subset_trajectories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <cudf/utilities/legacy/type_dispatcher.hpp>
#include <cudf/copying.hpp>
#include <utilities/cuda_utils.hpp>
#include <type_traits>
#include <thrust/device_vector.h>
Expand Down Expand Up @@ -45,72 +46,67 @@ struct subset_functor {
}

template <typename T, std::enable_if_t< is_supported<T>() >* = nullptr>
uint32_t operator()(const gdf_column& id,
const gdf_column& in_x, const gdf_column& in_y,
const gdf_column& in_id, const gdf_column& in_timestamp,
gdf_column& out_x, gdf_column& out_y,
gdf_column& out_id, gdf_column& out_timestamp)
gdf_size_type operator()(const gdf_column& id,
const gdf_column& in_x, const gdf_column& in_y,
const gdf_column& in_id,
const gdf_column& in_timestamp,
gdf_column& out_x, gdf_column& out_y,
gdf_column& out_id, gdf_column& out_timestamp)
{
T* in_x_ptr = static_cast<T*>(in_x.data);
T* in_y_ptr = static_cast<T*>(in_y.data);
uint32_t* in_id_ptr = static_cast<uint32_t*>(in_id.data);
cuspatial::its_timestamp* in_ts_ptr =
static_cast<cuspatial::its_timestamp*>(in_timestamp.data);
uint32_t* id_ptr = static_cast<uint32_t*>(id.data);

cudaStream_t stream{0};
auto exec_policy = rmm::exec_policy(stream)->on(stream);

gdf_size_type num_hit{0};
gdf_size_type num_id{id.size};
gdf_size_type num_rec{in_id.size};

rmm::device_vector<uint32_t> temp_id(id_ptr, id_ptr + num_id);
thrust::sort(exec_policy, temp_id.begin(), temp_id.end());
thrust::device_vector<bool> hit_vec(num_rec);
thrust::binary_search(exec_policy, temp_id.cbegin(), temp_id.cend(),
in_id_ptr, in_id_ptr + num_rec, hit_vec.begin());


uint32_t num_hit = thrust::count(exec_policy, hit_vec.begin(),
hit_vec.end(), true);
if (num_id > 0 && id.data != nullptr && num_rec > 0) {
int32_t* in_id_ptr = static_cast<int32_t*>(in_id.data);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CUDF_EXPECTS for in_id.dtype allows it to be int8/16/32/64, but this is always casting to int32_t.

Something should change here, and I'm guessing it's making the CUDF_EXPECTS more restrictive.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

int32_t* id_ptr = static_cast<int32_t*>(id.data);

cudaStream_t stream{0};
auto exec_policy = rmm::exec_policy(stream)->on(stream);

rmm::device_vector<int32_t> temp_id(id_ptr, id_ptr + num_id);
thrust::sort(exec_policy, temp_id.begin(), temp_id.end());
thrust::device_vector<bool> hit_vec(num_rec);
thrust::binary_search(exec_policy, temp_id.cbegin(), temp_id.cend(),
in_id_ptr, in_id_ptr + num_rec, hit_vec.begin());

num_hit = thrust::count(exec_policy, hit_vec.begin(),
hit_vec.end(), true);

if (num_hit > 0) {
out_x = cudf::allocate_like(in_x, num_hit);
out_y = cudf::allocate_like(in_y, num_hit);
out_id = cudf::allocate_like(in_id, num_hit);
out_timestamp = cudf::allocate_like(in_timestamp, num_hit);

auto in_itr = thrust::make_zip_iterator(thrust::make_tuple(
static_cast<T*>(in_x.data), static_cast<T*>(in_y.data),
static_cast<int32_t*>(in_id.data),
static_cast<cuspatial::its_timestamp*>(in_timestamp.data)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this supposed to be cudf::timestamp?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

auto out_itr = thrust::make_zip_iterator(thrust::make_tuple(
static_cast<T*>(out_x.data), static_cast<T*>(out_y.data),
static_cast<int32_t*>(out_id.data),
static_cast<cuspatial::its_timestamp*>(out_timestamp.data)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise, is this supposed to be cudf::timestamp?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


gdf_size_type num_keep = thrust::copy_if(exec_policy, in_itr,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd break this in to two lines. It's potentially confusing to have the subtraction of out_itr in the same line.

Suggested change
gdf_size_type num_keep = thrust::copy_if(exec_policy, in_itr,
auto end = thrust::copy_if(exec_policy, in_itr,
gdf_size_type num_keep = end - out_itr;

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea. Fixed.

in_itr + num_rec,
hit_vec.begin(), out_itr,
is_true()) - out_itr;

CUDF_EXPECTS(num_hit == num_keep,
"count_if and copy_if result mismatch");
}
}

RMM_TRY( RMM_ALLOC(&out_x.data, num_hit * sizeof(double), 0) );
RMM_TRY( RMM_ALLOC(&out_y.data, num_hit * sizeof(double), 0) );
RMM_TRY( RMM_ALLOC(&out_id.data, num_hit * sizeof(uint32_t), 0) );
RMM_TRY( RMM_ALLOC(&out_timestamp.data,
num_hit * sizeof(cuspatial::its_timestamp), 0) );

T* out_x_ptr = static_cast<T*>(out_x.data);
T* out_y_ptr = static_cast<T*>(out_y.data);
uint32_t* out_id_ptr = static_cast<uint32_t*>(out_id.data);
cuspatial::its_timestamp* out_ts_ptr =
static_cast<cuspatial::its_timestamp*>(out_timestamp.data);

auto in_itr = thrust::make_zip_iterator(thrust::make_tuple(in_x_ptr,
in_y_ptr,
in_id_ptr,
in_ts_ptr));
auto out_itr = thrust::make_zip_iterator(thrust::make_tuple(out_x_ptr,
out_y_ptr,
out_id_ptr,
out_ts_ptr));

uint32_t num_keep = thrust::copy_if(exec_policy, in_itr, in_itr+num_rec,
hit_vec.begin(), out_itr,
is_true()) - out_itr;

CUDF_EXPECTS(num_hit == num_keep,
"count_if and copy_if result mismatch");

return num_hit;
}

template <typename T, std::enable_if_t< !is_supported<T>() >* = nullptr>
uint32_t operator()(const gdf_column& ids,
const gdf_column& in_x, const gdf_column& in_y,
const gdf_column& in_id, const gdf_column& in_ts,
gdf_column& out_x, gdf_column& out_y,
gdf_column& out_id, gdf_column& out_ts)
gdf_size_type operator()(const gdf_column& ids,
const gdf_column& in_x, const gdf_column& in_y,
const gdf_column& in_id, const gdf_column& in_ts,
gdf_column& out_x, gdf_column& out_y,
gdf_column& out_id, gdf_column& out_ts)
{
CUDF_FAIL("Non-floating point operation is not supported");
}
Expand All @@ -120,37 +116,41 @@ struct subset_functor {

namespace cuspatial {

uint32_t subset_trajectory_id(const gdf_column& id,
const gdf_column& in_x, const gdf_column& in_y,
const gdf_column& in_id,
const gdf_column& in_timestamp,
gdf_column& out_x, gdf_column& out_y,
gdf_column& out_id, gdf_column& out_timestamp)
{
struct timeval t0,t1;
gettimeofday(&t0, nullptr);
gdf_size_type subset_trajectory_id(const gdf_column& id,
const gdf_column& in_x,
const gdf_column& in_y,
const gdf_column& in_id,
const gdf_column& in_timestamp,
gdf_column& out_x,
gdf_column& out_y,
gdf_column& out_id,
gdf_column& out_timestamp)
{
CUDF_EXPECTS(in_x.data != nullptr && in_x.data != nullptr &&
in_id.data != nullptr && in_timestamp.data != nullptr,
"x/y/in_id/in_ts data cannot be null");
"Null input data");
CUDF_EXPECTS(in_x.size == in_y.size && in_x.size == in_id.size &&
in_x.size == in_timestamp.size ,
"x/y/in_id/timestamp must have equal size");

// future versions might allow x/y/in_id/timestamp to have null_count > 0,
// which might be useful for taking query results as inputs
in_x.size == in_timestamp.size,
"Data size mismatch");
CUDF_EXPECTS(in_id.dtype == GDF_INT8 || in_id.dtype == GDF_INT16 ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In subset_functor, in_id is always being casted to int32_t, which isn't valid if the type is anything other than GDF_INT32.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

in_id.dtype == GDF_INT32 || in_id.dtype == GDF_INT64,
"Invalid trajectory ID datatype");
CUDF_EXPECTS(id.dtype == in_id.dtype,
"Trajectory ID datatype mismatch");
CUDF_EXPECTS(in_timestamp.dtype == GDF_TIMESTAMP,
"Invalid timestamp datatype");
CUDF_EXPECTS(in_x.null_count == 0 && in_y.null_count == 0 &&
in_id.null_count==0 && in_timestamp.null_count==0,
"NULL support unimplemented");
uint32_t num_hit=cudf::type_dispatcher(in_x.dtype, subset_functor(), id,
in_x, in_y, in_id, in_timestamp,
out_x, out_y, out_id, out_timestamp);
std::cout<<"number of resulting points:"<<num_hit<<std::endl;
gettimeofday(&t1, nullptr);
float subset_end2end_time =
cuspatial::calc_time("trajectory subset end-to-end time in ms=", t0, t1);
return num_hit;

out_x = cudf::empty_like(in_x);
out_y = cudf::empty_like(in_y);
out_id = cudf::empty_like(in_id);
out_timestamp = cudf::empty_like(in_timestamp);

return cudf::type_dispatcher(in_x.dtype, subset_functor(), id,
in_x, in_y, in_id, in_timestamp,
out_x, out_y, out_id, out_timestamp);
}

}// namespace cuspatial
5 changes: 3 additions & 2 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ endif()

add_library(cudftestutil STATIC
"${CUDF_TEST_INCLUDE}/tests/utilities/cudf_test_utils.cu"
"${CUDF_TEST_INCLUDE}/tests/utilities/valid_vectors.cpp")
"${CUDF_TEST_INCLUDE}/tests/utilities/valid_vectors.cpp"
"${CUDF_TEST_INCLUDE}/tests/utilities/nvcategory_utils.cu")

target_link_libraries(cudftestutil cudf cuspatial rmm cudart cuda nvrtc NVCategory NVStrings)

Expand Down Expand Up @@ -125,6 +126,6 @@ set(TRAJECTORY_SPATIAL_BOUND_SRC
ConfigureTest(TRAJECTORY_SPATIAL_BOUND_TEST "${TRAJECTORY_SPATIAL_BOUND_SRC}")

set(TRAJECTORY_SUBSET_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_subset_toy.cu"
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/test_trajectory_subset.cu"
)
ConfigureTest(TRAJECTORY_SUBSET_TEST "${TRAJECTORY_SUBSET_TEST_SRC}")
Loading