Skip to content

Commit

Permalink
ARROW-11108: [Rust] Fixed performance issue in mutableBuffer.
Browse files Browse the repository at this point in the history
This PR refactors `MutableBuffer::extend_from_slice` to remove the need to use `to_byte_slice` on every call, thereby removing its level of indirection, that does not allow the compiler to optimize out some code.

This is the second performance improvement originally presented in apache#8796 and, together with apache#9027 , brings the performance of "MutableBuffer" to the same level as `Vec<u8>`, in particular to building buffers on the fly.

Basically, when converting to a byte slice `&[u8]`, the compiler loses the type size information, and thus needs to perform extra checks and can't just optimize out the code.

This PR adopts the same API as `Vec<T>::extend_from_slice`, but since our buffers are in `u8` (i.e. a la `Vec<u8>`), I made the signature

```
pub fn extend_from_slice<T: ToByteSlice>(&mut self, items: &[T])
pub fn push<T: ToByteSlice>(&mut self, item: &T)
```

i.e. it consumes something that can be converted to a byte slice, but internally makes the conversion to bytes (as `to_byte_slice` was doing).

Credits for the root cause analysis that lead to this PR go to @Dandandan, [originally fielded here](apache#9016 (comment)).

> [...] current conversion to a byte slice may add some overhead? - @Dandandan

Benches (against master, so, both this PR and apache#9044 ):

```
Switched to branch 'perf_buffer'
Your branch and 'origin/perf_buffer' have diverged,
and have 6 and 1 different commits each, respectively.
  (use "git pull" to merge the remote branch into yours)
   Compiling arrow v3.0.0-SNAPSHOT (/Users/jorgecarleitao/projects/arrow/rust/arrow)
    Finished bench [optimized] target(s) in 1m 00s
     Running /Users/jorgecarleitao/projects/arrow/rust/target/release/deps/buffer_create-915da5f1abaf0471
Gnuplot not found, using plotters backend
mutable                 time:   [463.11 us 463.57 us 464.07 us]
                        change: [-19.508% -18.571% -17.526%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 10 outliers among 100 measurements (10.00%)
  1 (1.00%) high mild
  9 (9.00%) high severe

mutable prepared        time:   [527.84 us 528.46 us 529.14 us]
                        change: [-13.356% -12.522% -11.790%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 12 outliers among 100 measurements (12.00%)
  5 (5.00%) high mild
  7 (7.00%) high severe

Benchmarking from_slice: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 6.1s, enable flat sampling, or reduce sample count to 60.
from_slice              time:   [1.1968 ms 1.1979 ms 1.1991 ms]
                        change: [-6.8697% -6.2029% -5.5812%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 10 outliers among 100 measurements (10.00%)
  3 (3.00%) high mild
  7 (7.00%) high severe

from_slice prepared     time:   [917.49 us 918.89 us 920.60 us]
                        change: [-6.5111% -5.9102% -5.3038%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 10 outliers among 100 measurements (10.00%)
  4 (4.00%) high mild
  6 (6.00%) high severe
```

Closes apache#9076 from jorgecarleitao/perf_buffer

Authored-by: Jorge C. Leitao <[email protected]>
Signed-off-by: Jorge C. Leitao <[email protected]>
  • Loading branch information
jorgecarleitao authored and GeorgeAp committed Jun 7, 2021
1 parent bc03219 commit 5647a30
Show file tree
Hide file tree
Showing 38 changed files with 492 additions and 552 deletions.
3 changes: 1 addition & 2 deletions rust/arrow/benches/buffer_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ fn mutable_buffer(data: &[Vec<u32>], capacity: usize) -> Buffer {
criterion::black_box({
let mut result = MutableBuffer::new(capacity);

data.iter()
.for_each(|vec| result.extend_from_slice(vec.to_byte_slice()));
data.iter().for_each(|vec| result.extend_from_slice(vec));

result.into()
})
Expand Down
54 changes: 27 additions & 27 deletions rust/arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayDataRef,
FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
};
use crate::buffer::Buffer;
use crate::util::bit_util;
use crate::{buffer::Buffer, datatypes::ToByteSlice};
use crate::{buffer::MutableBuffer, datatypes::DataType};

/// Like OffsetSizeTrait, but specialized for Binary
Expand Down Expand Up @@ -110,8 +110,8 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}
let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
.len(v.len())
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
GenericBinaryArray::<OffsetSize>::from(array_data)
}
Expand Down Expand Up @@ -245,8 +245,8 @@ where

let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
.len(data_len)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.null_bit_buffer(null_buf.into())
.build();
Self::from(array_data)
Expand Down Expand Up @@ -368,7 +368,7 @@ impl From<Vec<Option<Vec<u8>>>> for FixedSizeBinaryArray {
.all(|item| item.len() == size));

let num_bytes = bit_util::ceil(len, 8);
let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
let null_slice = null_buf.as_slice_mut();

data.iter().enumerate().for_each(|(i, entry)| {
Expand Down Expand Up @@ -641,8 +641,8 @@ mod tests {
// Array data: ["hello", "", "parquet"]
let array_data = ArrayData::builder(DataType::Binary)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let binary_array = BinaryArray::from(array_data);
assert_eq!(3, binary_array.len());
Expand All @@ -664,8 +664,8 @@ mod tests {
let array_data = ArrayData::builder(DataType::Binary)
.len(4)
.offset(1)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let binary_array = BinaryArray::from(array_data);
assert_eq!(
Expand All @@ -688,8 +688,8 @@ mod tests {
// Array data: ["hello", "", "parquet"]
let array_data = ArrayData::builder(DataType::LargeBinary)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let binary_array = LargeBinaryArray::from(array_data);
assert_eq!(3, binary_array.len());
Expand All @@ -711,8 +711,8 @@ mod tests {
let array_data = ArrayData::builder(DataType::LargeBinary)
.len(4)
.offset(1)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let binary_array = LargeBinaryArray::from(array_data);
assert_eq!(
Expand All @@ -739,14 +739,14 @@ mod tests {
// Array data: ["hello", "", "parquet"]
let array_data1 = ArrayData::builder(DataType::Binary)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let binary_array1 = BinaryArray::from(array_data1);

let array_data2 = ArrayData::builder(DataType::Binary)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(values_data)
.build();
let list_array = ListArray::from(array_data2);
Expand Down Expand Up @@ -778,14 +778,14 @@ mod tests {
// Array data: ["hello", "", "parquet"]
let array_data1 = ArrayData::builder(DataType::LargeBinary)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let binary_array1 = LargeBinaryArray::from(array_data1);

let array_data2 = ArrayData::builder(DataType::Binary)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(values_data)
.build();
let list_array = LargeListArray::from(array_data2);
Expand Down Expand Up @@ -838,13 +838,13 @@ mod tests {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
.len(12)
.add_buffer(Buffer::from(values[..].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let offsets: [i32; 4] = [0, 5, 5, 12];

let array_data = ArrayData::builder(DataType::Utf8)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(values_data)
.build();
let list_array = ListArray::from(array_data);
Expand All @@ -860,14 +860,14 @@ mod tests {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
.len(12)
.add_buffer(Buffer::from(values[..].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&values))
.add_child_data(ArrayData::builder(DataType::Boolean).build())
.build();
let offsets: [i32; 4] = [0, 5, 5, 12];

let array_data = ArrayData::builder(DataType::Utf8)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(values_data)
.build();
let list_array = ListArray::from(array_data);
Expand Down Expand Up @@ -934,7 +934,7 @@ mod tests {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
.len(12)
.add_buffer(Buffer::from(values[..].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&values))
.add_child_data(ArrayData::builder(DataType::Boolean).build())
.build();

Expand All @@ -957,8 +957,8 @@ mod tests {
let offsets: [i32; 4] = [0, 5, 5, 12];
let array_data = ArrayData::builder(DataType::Binary)
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let binary_array = BinaryArray::from(array_data);
binary_array.value(4);
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/array/array_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.

let num_bytes = bit_util::ceil(data_len, 8);
let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
let mut val_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
let mut val_buf = MutableBuffer::from_len_zeroed(num_bytes);

let data = val_buf.as_slice_mut();

Expand Down
75 changes: 16 additions & 59 deletions rust/arrow/src/array/array_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,11 +298,7 @@ impl fmt::Debug for FixedSizeListArray {
#[cfg(test)]
mod tests {
use crate::{
array::ArrayData,
array::Int32Array,
buffer::Buffer,
datatypes::{Field, ToByteSlice},
memory,
array::ArrayData, array::Int32Array, buffer::Buffer, datatypes::Field,
util::bit_util,
};

Expand All @@ -313,12 +309,12 @@ mod tests {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);

// Construct a list array from the above two
let list_data_type =
Expand Down Expand Up @@ -383,12 +379,12 @@ mod tests {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let value_offsets = Buffer::from(&[0i64, 3, 6, 8].to_byte_slice());
let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 8]);

// Construct a list array from the above two
let list_data_type =
Expand Down Expand Up @@ -453,7 +449,7 @@ mod tests {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
.len(9)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7, 8].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8]))
.build();

// Construct a list array from the above two
Expand Down Expand Up @@ -522,7 +518,7 @@ mod tests {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build();

// Construct a list array from the above two
Expand All @@ -542,15 +538,12 @@ mod tests {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
.len(10)
.add_buffer(Buffer::from(
&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9].to_byte_slice(),
))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
.build();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
let value_offsets =
Buffer::from(&[0, 2, 2, 2, 4, 6, 6, 9, 9, 10].to_byte_slice());
let value_offsets = Buffer::from_slice_ref(&[0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
// 01011001 00000001
let mut null_bits: [u8; 2] = [0; 2];
bit_util::set_bit(&mut null_bits, 0);
Expand Down Expand Up @@ -607,15 +600,12 @@ mod tests {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
.len(10)
.add_buffer(Buffer::from(
&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9].to_byte_slice(),
))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
.build();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
let value_offsets =
Buffer::from(&[0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10].to_byte_slice());
let value_offsets = Buffer::from_slice_ref(&[0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
// 01011001 00000001
let mut null_bits: [u8; 2] = [0; 2];
bit_util::set_bit(&mut null_bits, 0);
Expand Down Expand Up @@ -674,9 +664,7 @@ mod tests {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
.len(10)
.add_buffer(Buffer::from(
&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9].to_byte_slice(),
))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
.build();

// Set null buts for the nested array:
Expand Down Expand Up @@ -737,7 +725,7 @@ mod tests {
fn test_list_array_invalid_buffer_len() {
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build();
let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
Expand All @@ -753,7 +741,7 @@ mod tests {
expected = "ListArray should contain a single child array (values array)"
)]
fn test_list_array_invalid_child_array_len() {
let value_offsets = Buffer::from(&[0, 2, 5, 7].to_byte_slice());
let value_offsets = Buffer::from_slice_ref(&[0, 2, 5, 7]);
let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
Expand All @@ -768,10 +756,10 @@ mod tests {
fn test_list_array_invalid_value_offset_start() {
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build();

let value_offsets = Buffer::from(&[2, 2, 5, 7].to_byte_slice());
let value_offsets = Buffer::from_slice_ref(&[2, 2, 5, 7]);

let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
Expand All @@ -782,35 +770,4 @@ mod tests {
.build();
ListArray::from(list_data);
}

#[test]
#[should_panic(expected = "memory is not aligned")]
fn test_primitive_array_alignment() {
let ptr = memory::allocate_aligned(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf2 = buf.slice(1);
let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build();
Int32Array::from(array_data);
}

#[test]
#[should_panic(expected = "memory is not aligned")]
fn test_list_array_alignment() {
let ptr = memory::allocate_aligned(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf2 = buf.slice(1);

let values: [i32; 8] = [0; 8];
let value_data = ArrayData::builder(DataType::Int32)
.add_buffer(Buffer::from(values.to_byte_slice()))
.build();

let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.add_buffer(buf2)
.add_child_data(value_data)
.build();
ListArray::from(list_data);
}
}
Loading

0 comments on commit 5647a30

Please sign in to comment.