-
Notifications
You must be signed in to change notification settings - Fork 158
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[REVIEW] Bug fixes and tests for subset_trajectory_id() #16
Changes from 5 commits
a28da2e
b7b1727
056fc5b
d3fdeb2
456fae7
16cf431
4435ff9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,5 @@ | |
## Improvements | ||
|
||
## Bug Fixes | ||
|
||
- PR #16 `cuspatial::subset_trajectory_id` test improvements and bug fixes |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -15,6 +15,7 @@ | |||||||
*/ | ||||||||
|
||||||||
#include <cudf/utilities/legacy/type_dispatcher.hpp> | ||||||||
#include <cudf/copying.hpp> | ||||||||
#include <utilities/cuda_utils.hpp> | ||||||||
#include <type_traits> | ||||||||
#include <thrust/device_vector.h> | ||||||||
|
@@ -45,72 +46,67 @@ struct subset_functor { | |||||||
} | ||||||||
|
||||||||
template <typename T, std::enable_if_t< is_supported<T>() >* = nullptr> | ||||||||
uint32_t operator()(const gdf_column& id, | ||||||||
const gdf_column& in_x, const gdf_column& in_y, | ||||||||
const gdf_column& in_id, const gdf_column& in_timestamp, | ||||||||
gdf_column& out_x, gdf_column& out_y, | ||||||||
gdf_column& out_id, gdf_column& out_timestamp) | ||||||||
gdf_size_type operator()(const gdf_column& id, | ||||||||
const gdf_column& in_x, const gdf_column& in_y, | ||||||||
const gdf_column& in_id, | ||||||||
const gdf_column& in_timestamp, | ||||||||
gdf_column& out_x, gdf_column& out_y, | ||||||||
gdf_column& out_id, gdf_column& out_timestamp) | ||||||||
{ | ||||||||
T* in_x_ptr = static_cast<T*>(in_x.data); | ||||||||
T* in_y_ptr = static_cast<T*>(in_y.data); | ||||||||
uint32_t* in_id_ptr = static_cast<uint32_t*>(in_id.data); | ||||||||
cuspatial::its_timestamp* in_ts_ptr = | ||||||||
static_cast<cuspatial::its_timestamp*>(in_timestamp.data); | ||||||||
uint32_t* id_ptr = static_cast<uint32_t*>(id.data); | ||||||||
|
||||||||
cudaStream_t stream{0}; | ||||||||
auto exec_policy = rmm::exec_policy(stream)->on(stream); | ||||||||
|
||||||||
gdf_size_type num_hit{0}; | ||||||||
gdf_size_type num_id{id.size}; | ||||||||
gdf_size_type num_rec{in_id.size}; | ||||||||
|
||||||||
rmm::device_vector<uint32_t> temp_id(id_ptr, id_ptr + num_id); | ||||||||
thrust::sort(exec_policy, temp_id.begin(), temp_id.end()); | ||||||||
thrust::device_vector<bool> hit_vec(num_rec); | ||||||||
thrust::binary_search(exec_policy, temp_id.cbegin(), temp_id.cend(), | ||||||||
in_id_ptr, in_id_ptr + num_rec, hit_vec.begin()); | ||||||||
|
||||||||
|
||||||||
uint32_t num_hit = thrust::count(exec_policy, hit_vec.begin(), | ||||||||
hit_vec.end(), true); | ||||||||
if (num_id > 0 && id.data != nullptr && num_rec > 0) { | ||||||||
int32_t* in_id_ptr = static_cast<int32_t*>(in_id.data); | ||||||||
int32_t* id_ptr = static_cast<int32_t*>(id.data); | ||||||||
|
||||||||
cudaStream_t stream{0}; | ||||||||
auto exec_policy = rmm::exec_policy(stream)->on(stream); | ||||||||
|
||||||||
rmm::device_vector<int32_t> temp_id(id_ptr, id_ptr + num_id); | ||||||||
thrust::sort(exec_policy, temp_id.begin(), temp_id.end()); | ||||||||
thrust::device_vector<bool> hit_vec(num_rec); | ||||||||
thrust::binary_search(exec_policy, temp_id.cbegin(), temp_id.cend(), | ||||||||
in_id_ptr, in_id_ptr + num_rec, hit_vec.begin()); | ||||||||
|
||||||||
num_hit = thrust::count(exec_policy, hit_vec.begin(), | ||||||||
hit_vec.end(), true); | ||||||||
|
||||||||
if (num_hit > 0) { | ||||||||
out_x = cudf::allocate_like(in_x, num_hit); | ||||||||
out_y = cudf::allocate_like(in_y, num_hit); | ||||||||
out_id = cudf::allocate_like(in_id, num_hit); | ||||||||
out_timestamp = cudf::allocate_like(in_timestamp, num_hit); | ||||||||
|
||||||||
auto in_itr = thrust::make_zip_iterator(thrust::make_tuple( | ||||||||
static_cast<T*>(in_x.data), static_cast<T*>(in_y.data), | ||||||||
static_cast<int32_t*>(in_id.data), | ||||||||
static_cast<cuspatial::its_timestamp*>(in_timestamp.data))); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this supposed to be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||||||||
auto out_itr = thrust::make_zip_iterator(thrust::make_tuple( | ||||||||
static_cast<T*>(out_x.data), static_cast<T*>(out_y.data), | ||||||||
static_cast<int32_t*>(out_id.data), | ||||||||
static_cast<cuspatial::its_timestamp*>(out_timestamp.data))); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likewise, is this supposed to be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||||||||
|
||||||||
gdf_size_type num_keep = thrust::copy_if(exec_policy, in_itr, | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd break this in to two lines. It's potentially confusing to have the subtraction of
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea. Fixed. |
||||||||
in_itr + num_rec, | ||||||||
hit_vec.begin(), out_itr, | ||||||||
is_true()) - out_itr; | ||||||||
|
||||||||
CUDF_EXPECTS(num_hit == num_keep, | ||||||||
"count_if and copy_if result mismatch"); | ||||||||
} | ||||||||
} | ||||||||
|
||||||||
RMM_TRY( RMM_ALLOC(&out_x.data, num_hit * sizeof(double), 0) ); | ||||||||
RMM_TRY( RMM_ALLOC(&out_y.data, num_hit * sizeof(double), 0) ); | ||||||||
RMM_TRY( RMM_ALLOC(&out_id.data, num_hit * sizeof(uint32_t), 0) ); | ||||||||
RMM_TRY( RMM_ALLOC(&out_timestamp.data, | ||||||||
num_hit * sizeof(cuspatial::its_timestamp), 0) ); | ||||||||
|
||||||||
T* out_x_ptr = static_cast<T*>(out_x.data); | ||||||||
T* out_y_ptr = static_cast<T*>(out_y.data); | ||||||||
uint32_t* out_id_ptr = static_cast<uint32_t*>(out_id.data); | ||||||||
cuspatial::its_timestamp* out_ts_ptr = | ||||||||
static_cast<cuspatial::its_timestamp*>(out_timestamp.data); | ||||||||
|
||||||||
auto in_itr = thrust::make_zip_iterator(thrust::make_tuple(in_x_ptr, | ||||||||
in_y_ptr, | ||||||||
in_id_ptr, | ||||||||
in_ts_ptr)); | ||||||||
auto out_itr = thrust::make_zip_iterator(thrust::make_tuple(out_x_ptr, | ||||||||
out_y_ptr, | ||||||||
out_id_ptr, | ||||||||
out_ts_ptr)); | ||||||||
|
||||||||
uint32_t num_keep = thrust::copy_if(exec_policy, in_itr, in_itr+num_rec, | ||||||||
hit_vec.begin(), out_itr, | ||||||||
is_true()) - out_itr; | ||||||||
|
||||||||
CUDF_EXPECTS(num_hit == num_keep, | ||||||||
"count_if and copy_if result mismatch"); | ||||||||
|
||||||||
return num_hit; | ||||||||
} | ||||||||
|
||||||||
template <typename T, std::enable_if_t< !is_supported<T>() >* = nullptr> | ||||||||
uint32_t operator()(const gdf_column& ids, | ||||||||
const gdf_column& in_x, const gdf_column& in_y, | ||||||||
const gdf_column& in_id, const gdf_column& in_ts, | ||||||||
gdf_column& out_x, gdf_column& out_y, | ||||||||
gdf_column& out_id, gdf_column& out_ts) | ||||||||
gdf_size_type operator()(const gdf_column& ids, | ||||||||
const gdf_column& in_x, const gdf_column& in_y, | ||||||||
const gdf_column& in_id, const gdf_column& in_ts, | ||||||||
gdf_column& out_x, gdf_column& out_y, | ||||||||
gdf_column& out_id, gdf_column& out_ts) | ||||||||
{ | ||||||||
CUDF_FAIL("Non-floating point operation is not supported"); | ||||||||
} | ||||||||
|
@@ -120,37 +116,41 @@ struct subset_functor { | |||||||
|
||||||||
namespace cuspatial { | ||||||||
|
||||||||
uint32_t subset_trajectory_id(const gdf_column& id, | ||||||||
const gdf_column& in_x, const gdf_column& in_y, | ||||||||
const gdf_column& in_id, | ||||||||
const gdf_column& in_timestamp, | ||||||||
gdf_column& out_x, gdf_column& out_y, | ||||||||
gdf_column& out_id, gdf_column& out_timestamp) | ||||||||
{ | ||||||||
struct timeval t0,t1; | ||||||||
gettimeofday(&t0, nullptr); | ||||||||
gdf_size_type subset_trajectory_id(const gdf_column& id, | ||||||||
const gdf_column& in_x, | ||||||||
const gdf_column& in_y, | ||||||||
const gdf_column& in_id, | ||||||||
const gdf_column& in_timestamp, | ||||||||
gdf_column& out_x, | ||||||||
gdf_column& out_y, | ||||||||
gdf_column& out_id, | ||||||||
gdf_column& out_timestamp) | ||||||||
{ | ||||||||
CUDF_EXPECTS(in_x.data != nullptr && in_x.data != nullptr && | ||||||||
in_id.data != nullptr && in_timestamp.data != nullptr, | ||||||||
"x/y/in_id/in_ts data cannot be null"); | ||||||||
"Null input data"); | ||||||||
CUDF_EXPECTS(in_x.size == in_y.size && in_x.size == in_id.size && | ||||||||
in_x.size == in_timestamp.size , | ||||||||
"x/y/in_id/timestamp must have equal size"); | ||||||||
|
||||||||
// future versions might allow x/y/in_id/timestamp to have null_count > 0, | ||||||||
// which might be useful for taking query results as inputs | ||||||||
in_x.size == in_timestamp.size, | ||||||||
"Data size mismatch"); | ||||||||
CUDF_EXPECTS(in_id.dtype == GDF_INT8 || in_id.dtype == GDF_INT16 || | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||||||||
in_id.dtype == GDF_INT32 || in_id.dtype == GDF_INT64, | ||||||||
"Invalid trajectory ID datatype"); | ||||||||
CUDF_EXPECTS(id.dtype == in_id.dtype, | ||||||||
"Trajectory ID datatype mismatch"); | ||||||||
CUDF_EXPECTS(in_timestamp.dtype == GDF_TIMESTAMP, | ||||||||
"Invalid timestamp datatype"); | ||||||||
CUDF_EXPECTS(in_x.null_count == 0 && in_y.null_count == 0 && | ||||||||
in_id.null_count==0 && in_timestamp.null_count==0, | ||||||||
"NULL support unimplemented"); | ||||||||
uint32_t num_hit=cudf::type_dispatcher(in_x.dtype, subset_functor(), id, | ||||||||
in_x, in_y, in_id, in_timestamp, | ||||||||
out_x, out_y, out_id, out_timestamp); | ||||||||
std::cout<<"number of resulting points:"<<num_hit<<std::endl; | ||||||||
gettimeofday(&t1, nullptr); | ||||||||
float subset_end2end_time = | ||||||||
cuspatial::calc_time("trajectory subset end-to-end time in ms=", t0, t1); | ||||||||
return num_hit; | ||||||||
|
||||||||
out_x = cudf::empty_like(in_x); | ||||||||
out_y = cudf::empty_like(in_y); | ||||||||
out_id = cudf::empty_like(in_id); | ||||||||
out_timestamp = cudf::empty_like(in_timestamp); | ||||||||
|
||||||||
return cudf::type_dispatcher(in_x.dtype, subset_functor(), id, | ||||||||
in_x, in_y, in_id, in_timestamp, | ||||||||
out_x, out_y, out_id, out_timestamp); | ||||||||
} | ||||||||
|
||||||||
}// namespace cuspatial |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
CUDF_EXPECTS
forin_id.dtype
allows it to be int8/16/32/64, but this is always casting toint32_t
.Something should change here, and I'm guessing it's making the
CUDF_EXPECTS
more restrictive.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.