Skip to content

Commit

Permalink
Review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Feb 2, 2022
1 parent 272e7a6 commit 19cc6ee
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
34 changes: 33 additions & 1 deletion arrow/benches/filter_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use arrow::util::bench_util::*;

use arrow::array::*;
use arrow::compute::filter;
use arrow::datatypes::{Field, Float32Type, Schema, UInt8Type};
use arrow::datatypes::{Field, Float32Type, Int32Type, Schema, UInt8Type};

use criterion::{criterion_group, criterion_main, Criterion};

Expand Down Expand Up @@ -80,6 +80,38 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_built_filter(&sparse_filter, &data_array))
});

let data_array = create_primitive_array::<Int32Type>(size, 0.0);
c.bench_function("filter i32", |b| {
b.iter(|| bench_filter(&data_array, &filter_array))
});
c.bench_function("filter i32 high selectivity", |b| {
b.iter(|| bench_filter(&data_array, &dense_filter_array))
});
c.bench_function("filter i32 low selectivity", |b| {
b.iter(|| bench_filter(&data_array, &sparse_filter_array))
});

c.bench_function("filter context i32", |b| {
b.iter(|| bench_built_filter(&filter, &data_array))
});
c.bench_function("filter context i32 high selectivity", |b| {
b.iter(|| bench_built_filter(&dense_filter, &data_array))
});
c.bench_function("filter context i32 low selectivity", |b| {
b.iter(|| bench_built_filter(&sparse_filter, &data_array))
});

let data_array = create_primitive_array::<Int32Type>(size, 0.5);
c.bench_function("filter context i32 w NULLs", |b| {
b.iter(|| bench_built_filter(&filter, &data_array))
});
c.bench_function("filter context i32 w NULLs high selectivity", |b| {
b.iter(|| bench_built_filter(&dense_filter, &data_array))
});
c.bench_function("filter context i32 w NULLs low selectivity", |b| {
b.iter(|| bench_built_filter(&sparse_filter, &data_array))
});

let data_array = create_primitive_array::<UInt8Type>(size, 0.5);
c.bench_function("filter context u8 w NULLs", |b| {
b.iter(|| bench_built_filter(&filter, &data_array))
Expand Down
10 changes: 9 additions & 1 deletion arrow/src/compute/kernels/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ fn filter_count(filter: &BooleanArray) -> usize {
}

/// Function that can filter arbitrary arrays
///
/// Deprecated: Use [`FilterPredicate`] instead
#[deprecated]
pub type Filter<'a> = Box<dyn Fn(&ArrayData) -> ArrayData + 'a>;

Expand All @@ -194,6 +196,8 @@ pub type Filter<'a> = Box<dyn Fn(&ArrayData) -> ArrayData + 'a>;
/// same filter needs to be applied to multiple arrays (e.g. a multi-column `RecordBatch`).
/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
/// Therefore, it is considered undefined behavior to pass `filter` with null values.
///
/// Deprecated: Use [`FilterBuilder`] instead
#[deprecated]
#[allow(deprecated)]
pub fn build_filter(filter: &BooleanArray) -> Result<Filter> {
Expand Down Expand Up @@ -281,7 +285,7 @@ pub struct FilterBuilder {
}

impl FilterBuilder {
/// Create a new [`FilterBuilder`] that can be used construct [`FilterPredicate`]
/// Create a new [`FilterBuilder`] that can be used to construct a [`FilterPredicate`]
pub fn new(filter: &BooleanArray) -> Self {
let filter = match filter.null_count() {
0 => BooleanArray::from(filter.data().clone()),
Expand Down Expand Up @@ -485,6 +489,9 @@ fn filter_array(values: &dyn Array, predicate: &FilterPredicate) -> Result<Array
}
}

/// Computes a new null mask for `data` based on `predicate`
///
/// Returns `None` if no nulls in the result
fn filter_null_mask(
data: &ArrayData,
predicate: &FilterPredicate,
Expand All @@ -503,6 +510,7 @@ fn filter_null_mask(
Some((null_count, nulls))
}

/// Filter the packed bitmask `buffer`, with `predicate` starting at bit offset `offset`
fn filter_bits(buffer: &Buffer, offset: usize, predicate: &FilterPredicate) -> Buffer {
let src = buffer.as_slice();

Expand Down

0 comments on commit 19cc6ee

Please sign in to comment.