Skip to content

Commit

Permalink
Add specialized interleave implementation for primitives (#2898)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold authored Oct 21, 2022
1 parent f629a2e commit 9e5e477
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 4 deletions.
63 changes: 59 additions & 4 deletions arrow-select/src/interleave.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,22 @@
// specific language governing permissions and limitations
// under the License.

use arrow_array::{make_array, new_empty_array, Array, ArrayRef};
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
use arrow_array::cast::as_primitive_array;
use arrow_array::{
downcast_primitive, make_array, new_empty_array, Array, ArrayRef, ArrowPrimitiveType,
PrimitiveArray,
};
use arrow_data::transform::MutableArrayData;
use arrow_schema::ArrowError;
use arrow_data::ArrayDataBuilder;
use arrow_schema::{ArrowError, DataType};
use std::sync::Arc;

macro_rules! primitive_helper {
($t:ty, $values:ident, $indices:ident, $data_type:ident) => {
interleave_primitive::<$t>($values, $indices, $data_type)
};
}

///
/// Takes elements by index from a list of [`Array`], creating a new [`Array`] from those values.
Expand Down Expand Up @@ -70,9 +83,51 @@ pub fn interleave(
return Ok(new_empty_array(data_type));
}

// TODO: Add specialized implementations (#2864)
downcast_primitive! {
data_type => (primitive_helper, values, indices, data_type),
_ => interleave_fallback(values, indices)
}
}

fn interleave_primitive<T: ArrowPrimitiveType>(
values: &[&dyn Array],
indices: &[(usize, usize)],
data_type: &DataType,
) -> Result<ArrayRef, ArrowError> {
let mut has_nulls = false;
let cast: Vec<_> = values
.iter()
.map(|x| {
has_nulls = has_nulls || x.null_count() != 0;
as_primitive_array::<T>(*x)
})
.collect();

let mut values = BufferBuilder::<T::Native>::new(indices.len());
for (a, b) in indices {
let v = cast[*a].value(*b);
values.append(v)
}

let mut null_count = 0;
let nulls = has_nulls.then(|| {
let mut builder = BooleanBufferBuilder::new(indices.len());
for (a, b) in indices {
let v = cast[*a].is_valid(*b);
null_count += !v as usize;
builder.append(v)
}
builder.finish()
});

let builder = ArrayDataBuilder::new(data_type.clone())
.len(indices.len())
.add_buffer(values.finish())
.null_bit_buffer(nulls)
.null_count(null_count);

interleave_fallback(values, indices)
let data = unsafe { builder.build_unchecked() };
Ok(Arc::new(PrimitiveArray::<T>::from(data)))
}

/// Fallback implementation of interleave using [`MutableArrayData`]
Expand Down
5 changes: 5 additions & 0 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ name = "take_kernels"
harness = false
required-features = ["test_utils"]

[[bench]]
name = "interleave_kernels"
harness = false
required-features = ["test_utils"]

[[bench]]
name = "length_kernel"
harness = false
Expand Down
91 changes: 91 additions & 0 deletions arrow/benches/interleave_kernels.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;

use criterion::Criterion;
use std::ops::Range;

use rand::Rng;

extern crate arrow;

use arrow::datatypes::*;
use arrow::util::test_util::seedable_rng;
use arrow::{array::*, util::bench_util::*};
use arrow_select::interleave::interleave;

fn do_bench(
c: &mut Criterion,
prefix: &str,
len: usize,
base: &dyn Array,
slices: &[Range<usize>],
) {
let mut rng = seedable_rng();

let arrays: Vec<_> = slices
.iter()
.map(|r| base.slice(r.start, r.end - r.start))
.collect();
let values: Vec<_> = arrays.iter().map(|x| x.as_ref()).collect();

let indices: Vec<_> = (0..len)
.map(|_| {
let array_idx = rng.gen_range(0..values.len());
let value_idx = rng.gen_range(0..values[array_idx].len());
(array_idx, value_idx)
})
.collect();

c.bench_function(
&format!("interleave {} {} {:?}", prefix, len, slices),
|b| b.iter(|| criterion::black_box(interleave(&values, &indices).unwrap())),
);
}

fn add_benchmark(c: &mut Criterion) {
let a = create_primitive_array::<Int32Type>(1024, 0.);

do_bench(c, "i32(0.0)", 100, &a, &[0..100, 100..230, 450..1000]);
do_bench(c, "i32(0.0)", 400, &a, &[0..100, 100..230, 450..1000]);
do_bench(c, "i32(0.0)", 1024, &a, &[0..100, 100..230, 450..1000]);
do_bench(
c,
"i32(0.0)",
1024,
&a,
&[0..100, 100..230, 450..1000, 0..1000],
);

let a = create_primitive_array::<Int32Type>(1024, 0.5);

do_bench(c, "i32(0.5)", 100, &a, &[0..100, 100..230, 450..1000]);
do_bench(c, "i32(0.5)", 400, &a, &[0..100, 100..230, 450..1000]);
do_bench(c, "i32(0.5)", 1024, &a, &[0..100, 100..230, 450..1000]);
do_bench(
c,
"i32(0.5)",
1024,
&a,
&[0..100, 100..230, 450..1000, 0..1000],
);
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);

0 comments on commit 9e5e477

Please sign in to comment.