Skip to content

Commit

Permalink
Add CI check for full validation mode (#1546)
Browse files Browse the repository at this point in the history
* Add force_validate feature

* Disable some redundant checks

* Add issue link

* Add test with force_validate feature flag

* fix up message

* disable due to #1547

* disable ipc test failure

* fix clippy

* Fix doctest to pass with force_validate enabled
  • Loading branch information
alamb authored Apr 14, 2022
1 parent d701939 commit 3fed612
Show file tree
Hide file tree
Showing 10 changed files with 54 additions and 5 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ jobs:
# Switch to arrow crate
cd arrow
# re-run tests on arrow crate with additional features
# re-run tests on arrow crate to ensure
# all arrays are created correctly
cargo test --features=force_validate
cargo test --features=prettyprint
# run test on arrow crate with minimal set of features
cargo test --no-default-features
Expand Down
4 changes: 4 additions & 0 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ prettyprint = ["comfy-table"]
# target without assuming an environment containing JavaScript.
test_utils = ["rand"]
pyarrow = ["pyo3"]
# force_validate runs full data validation for all arrays that are created
# this is not enabled by default as it is too computationally expensive
# but is run as part of our CI checks
force_validate = []

[dev-dependencies]
rand = "0.8"
Expand Down
3 changes: 3 additions & 0 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,9 @@ mod tests {
#[should_panic(
expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_fixed_size_binary_array_from_incorrect_list_array() {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
Expand Down
3 changes: 3 additions & 0 deletions arrow/src/array/array_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ mod tests {
#[test]
#[should_panic(expected = "BooleanArray data should contain a single buffer only \
(values buffer)")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_boolean_array_invalid_buffer_len() {
let data = unsafe {
ArrayData::builder(DataType::Boolean)
Expand Down
12 changes: 12 additions & 0 deletions arrow/src/array/array_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,9 @@ mod tests {
#[should_panic(
expected = "FixedSizeListArray child array length should be a multiple of 3"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_fixed_size_list_array_unequal_children() {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
Expand Down Expand Up @@ -1065,6 +1068,9 @@ mod tests {
#[should_panic(
expected = "ListArray data should contain a single buffer only (value offsets)"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_list_array_invalid_buffer_len() {
let value_data = unsafe {
ArrayData::builder(DataType::Int32)
Expand All @@ -1087,6 +1093,9 @@ mod tests {
#[should_panic(
expected = "ListArray should contain a single child array (values array)"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_list_array_invalid_child_array_len() {
let value_offsets = Buffer::from_slice_ref(&[0, 2, 5, 7]);
let list_data_type =
Expand Down Expand Up @@ -1137,6 +1146,9 @@ mod tests {

#[test]
#[should_panic(expected = "memory is not aligned")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_list_array_alignment() {
let ptr = alloc::allocate_aligned::<u8>(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
Expand Down
3 changes: 3 additions & 0 deletions arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,9 @@ mod tests {
#[test]
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
(values buffer)")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_primitive_array_invalid_buffer_len() {
let buffer = Buffer::from_slice_ref(&[0i32, 1, 2, 3, 4]);
let data = unsafe {
Expand Down
12 changes: 9 additions & 3 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ impl ArrayData {
/// Note: This is a low level API and most users of the arrow
/// crate should create arrays using the methods in the `array`
/// module.
#[allow(clippy::let_and_return)]
pub unsafe fn new_unchecked(
data_type: DataType,
len: usize,
Expand All @@ -286,15 +287,20 @@ impl ArrayData {
Some(null_count) => null_count,
};
let null_bitmap = null_bit_buffer.map(Bitmap::from);
Self {
let new_self = Self {
data_type,
len,
null_count,
offset,
buffers,
child_data,
null_bitmap,
}
};

// Provide a force_validate mode
#[cfg(feature = "force_validate")]
new_self.validate_full().unwrap();
new_self
}

/// Create a new ArrayData, validating that the provided buffers
Expand Down Expand Up @@ -2340,7 +2346,7 @@ mod tests {

#[test]
#[should_panic(
expected = "child #0 invalid: Invalid argument error: Value at position 1 out of bounds: -1 (should be in [0, 1])"
expected = "Value at position 1 out of bounds: -1 (should be in [0, 1])"
)]
/// test that children are validated recursively (aka bugs in child data of struct also are flagged)
fn test_validate_recursive() {
Expand Down
6 changes: 6 additions & 0 deletions arrow/src/compute/kernels/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1692,6 +1692,9 @@ mod tests {
}

#[test]
// Fails when validation enabled
// https://github.com/apache/arrow-rs/issues/1547
#[cfg(not(feature = "force_validate"))]
fn test_filter_union_array_sparse() {
let mut builder = UnionBuilder::new_sparse(3);
builder.append::<Int32Type>("A", 1).unwrap();
Expand All @@ -1703,6 +1706,9 @@ mod tests {
}

#[test]
// Fails when validation enabled
// https://github.com/apache/arrow-rs/issues/1547
#[cfg(not(feature = "force_validate"))]
fn test_filter_union_array_sparse_with_nulls() {
let mut builder = UnionBuilder::new_sparse(4);
builder.append::<Int32Type>("A", 1).unwrap();
Expand Down
10 changes: 9 additions & 1 deletion arrow/src/compute/kernels/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,23 @@ fn generic_substring<OffsetSize: StringOffsetSizeTrait>(
///
/// # Warning
///
/// This function **might** return in invalid utf-8 format if the character length falls on a non-utf8 boundary.
/// This function **might** return in invalid utf-8 format if the
/// character length falls on a non-utf8 boundary, which we
/// [hope to fix](https://github.com/apache/arrow-rs/issues/1531)
/// in a future release.
///
/// ## Example of getting an invalid substring
/// ```
/// # // Doesn't pass due to https://github.com/apache/arrow-rs/issues/1531
/// # #[cfg(not(feature = "force_validate"))]
/// # {
/// # use arrow::array::StringArray;
/// # use arrow::compute::kernels::substring::substring;
/// let array = StringArray::from(vec![Some("E=mc²")]);
/// let result = substring(&array, -1, &None).unwrap();
/// let result = result.as_any().downcast_ref::<StringArray>().unwrap();
/// assert_eq!(result.value(0).as_bytes(), &[0x00B2]); // invalid utf-8 format
/// # }
/// ```
///
/// # Error
Expand Down
2 changes: 2 additions & 0 deletions arrow/src/ipc/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,8 @@ mod tests {
}

#[test]
// https://github.com/apache/arrow-rs/issues/1548
#[cfg(not(feature = "force_validate"))]
fn projection_should_work() {
// complementary to the previous test
let testdata = crate::util::test_util::arrow_test_data();
Expand Down

0 comments on commit 3fed612

Please sign in to comment.