Skip to content

Commit

Permalink
fix: Field nullability mismatch when converting struct arrays to arrow (
Browse files Browse the repository at this point in the history
#2174)

seems to be part of issues I ran into as part of #2155.
  • Loading branch information
AdamGS authored Jan 31, 2025
1 parent cdc01d7 commit 6bb4149
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 9 deletions.
1 change: 0 additions & 1 deletion vortex-array/src/array/bool/compute/to_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ impl ToArrowFn<BoolArray> for BoolEncoding {
if data_type != &DataType::Boolean {
vortex_bail!("Unsupported data type: {data_type}");
}

Ok(Some(Arc::new(ArrowBoolArray::new(
array.boolean_buffer(),
array.logical_validity()?.to_null_buffer(),
Expand Down
67 changes: 62 additions & 5 deletions vortex-array/src/array/struct_/compute/to_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use itertools::Itertools;
use vortex_error::{vortex_bail, VortexResult};

use crate::array::{StructArray, StructEncoding};
use crate::compute::{to_arrow, ToArrowFn};
use crate::compute::{to_arrow, try_cast, ToArrowFn};
use crate::variants::StructArrayTrait;

impl ToArrowFn<StructArray> for StructEncoding {
Expand All @@ -24,6 +24,14 @@ impl ToArrowFn<StructArray> for StructEncoding {
.iter()
.zip_eq(array.children())
.map(|(field, arr)| {
// We check that the Vortex array nullability is compatible with the field
// nullability. In other words, make sure we don't return any nulls for a
// non-nullable field.
let _ = try_cast(
&arr,
&arr.dtype().with_nullability(field.is_nullable().into()),
)?;

to_arrow(arr, field.data_type()).map_err(|err| {
err.with_context(format!("Failed to canonicalize field {}", field))
})
Expand All @@ -42,12 +50,12 @@ impl ToArrowFn<StructArray> for StructEncoding {
.names()
.iter()
.zip(field_arrays.iter())
.zip(array.dtypes().iter())
.map(|((name, arrow_field), vortex_field)| {
.zip(target_fields.iter())
.map(|((name, field_array), target_field)| {
Field::new(
&**name,
arrow_field.data_type().clone(),
vortex_field.is_nullable(),
field_array.data_type().clone(),
target_field.is_nullable(),
)
})
.map(Arc::new)
Expand All @@ -61,3 +69,52 @@ impl ToArrowFn<StructArray> for StructEncoding {
}
}
}

#[cfg(test)]
mod tests {
use vortex_buffer::buffer;
use vortex_dtype::FieldNames;

use super::*;
use crate::array::PrimitiveArray;
use crate::arrow::IntoArrowArray;
use crate::validity::Validity;
use crate::IntoArray as _;

#[test]
fn nullable_non_null_to_arrow() {
let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::AllValid);

let struct_a = StructArray::try_new(
FieldNames::from(["xs".into()]),
vec![xs.into_array()],
5,
Validity::AllValid,
)
.unwrap();

let fields = vec![Field::new("xs", DataType::Int64, false)];
let arrow_dt = DataType::Struct(fields.into());

struct_a.into_array().into_arrow(&arrow_dt).unwrap();
}

#[test]
fn nullable_with_nulls_to_arrow() {
let xs =
PrimitiveArray::from_option_iter(vec![Some(0_i64), Some(1), Some(2), None, Some(3)]);

let struct_a = StructArray::try_new(
FieldNames::from(["xs".into()]),
vec![xs.into_array()],
5,
Validity::AllValid,
)
.unwrap();

let fields = vec![Field::new("xs", DataType::Int64, false)];
let arrow_dt = DataType::Struct(fields.into());

assert!(struct_a.into_array().into_arrow(&arrow_dt).is_err());
}
}
5 changes: 2 additions & 3 deletions vortex-datafusion/src/persistent/opener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ use datafusion_physical_expr::{split_conjunction, PhysicalExpr};
use futures::{FutureExt as _, StreamExt, TryStreamExt};
use object_store::ObjectStore;
use tokio::runtime::Handle;
use vortex_array::array::StructArray;
use vortex_array::arrow::FromArrowType;
use vortex_array::ContextRef;
use vortex_array::{ContextRef, IntoArrayVariant};
use vortex_dtype::{DType, FieldNames};
use vortex_error::VortexResult;
use vortex_expr::datafusion::convert_expr_to_vortex;
Expand Down Expand Up @@ -98,7 +97,7 @@ impl FileOpener for VortexFileOpener {
Ok(vxf
.scan(Scan::new(this.projection.clone()).with_some_filter(this.filter.clone()))?
.map_ok(move |array| {
let st = StructArray::try_from(array)?;
let st = array.into_struct()?;
st.into_record_batch_with_schema(projected_arrow_schema.as_ref())
})
.map(|r| r.and_then(|inner| inner))
Expand Down

0 comments on commit 6bb4149

Please sign in to comment.