diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs index ccce3cda9989..be861474f659 100644 --- a/arrow-array/src/array/binary_array.rs +++ b/arrow-array/src/array/binary_array.rs @@ -683,7 +683,7 @@ mod tests { let data = vec![None]; let array = BinaryArray::from(data); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } @@ -693,7 +693,7 @@ mod tests { let data = vec![None]; let array = LargeBinaryArray::from(data); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index 343fed76846a..dd6213d543ea 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -252,11 +252,11 @@ impl DictionaryArray { // Note: This use the ArrayDataBuilder::build_unchecked and afterwards // call the new function which only validates that the keys are in bounds. - let data = keys.data().clone(); + let data = keys.to_data(); let builder = data .into_builder() .data_type(dict_data_type) - .add_child_data(values.data().clone()); + .add_child_data(values.to_data()); // Safety: `validate` ensures key type is correct, and // `validate_values` ensures all offsets are within range @@ -397,7 +397,7 @@ impl DictionaryArray { Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()), )) - .child_data(vec![values.data().clone()]); + .child_data(vec![values.to_data()]); // SAFETY: // Offsets were valid before and verified length is greater than or equal @@ -789,7 +789,7 @@ mod tests { let dict_array = Int16DictionaryArray::from(dict_data); let values = dict_array.values(); - assert_eq!(&value_data, values.data()); + assert_eq!(value_data, values.to_data()); assert_eq!(DataType::Int8, dict_array.value_type()); assert_eq!(3, dict_array.len()); @@ -809,7 +809,7 @@ mod tests { let dict_array = Int16DictionaryArray::from(dict_data); let values = dict_array.values(); - assert_eq!(&value_data, values.data()); + assert_eq!(value_data, values.to_data()); assert_eq!(DataType::Int8, dict_array.value_type()); assert_eq!(2, dict_array.len()); assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4])); @@ -911,7 +911,7 @@ mod tests { let test = vec![None, None, None]; let array: DictionaryArray = test.into_iter().collect(); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } @@ -987,7 +987,7 @@ mod tests { assert_eq!(array.keys().data_type(), &DataType::Int32); assert_eq!(array.values().data_type(), &DataType::Utf8); - assert_eq!(array.data().null_count(), 1); + assert_eq!(array.null_count(), 1); assert!(array.keys().is_valid(0)); assert!(array.keys().is_valid(1)); @@ -1076,7 +1076,7 @@ mod tests { let boxed: ArrayRef = Arc::new(dict_array); let col: DictionaryArray = - DictionaryArray::::from(boxed.data().clone()); + DictionaryArray::::from(boxed.to_data()); let err = col.into_primitive_dict_builder::(); let returned = err.unwrap_err(); diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index fa303b4a8dbc..f8d2f04dee69 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -792,7 +792,7 @@ mod tests { FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0) .unwrap(); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index 4a592d869437..a56bb017f6b0 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -256,6 +256,8 @@ impl std::fmt::Debug for FixedSizeListArray { #[cfg(test)] mod tests { use super::*; + use crate::cast::AsArray; + use crate::types::Int32Type; use crate::Int32Array; use arrow_buffer::{bit_util, Buffer}; use arrow_schema::Field; @@ -281,7 +283,7 @@ mod tests { .unwrap(); let list_array = FixedSizeListArray::from(list_data); - assert_eq!(&value_data, list_array.values().data()); + assert_eq!(value_data, list_array.values().to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -310,19 +312,11 @@ mod tests { .unwrap(); let list_array = FixedSizeListArray::from(list_data); - assert_eq!(&value_data, list_array.values().data()); + assert_eq!(value_data, list_array.values().to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); - assert_eq!( - 3, - list_array - .value(0) - .as_any() - .downcast_ref::() - .unwrap() - .value(0) - ); + assert_eq!(3, list_array.value(0).as_primitive::().value(0)); assert_eq!(6, list_array.value_offset(1)); assert_eq!(3, list_array.value_length()); } @@ -386,7 +380,7 @@ mod tests { .unwrap(); let list_array = FixedSizeListArray::from(list_data); - assert_eq!(&value_data, list_array.values().data()); + assert_eq!(value_data, list_array.values().to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(5, list_array.len()); assert_eq!(2, list_array.null_count()); diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 8b314596d959..fb94fe12c87c 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -442,7 +442,7 @@ mod tests { let list_array = ListArray::from(list_data); let values = list_array.values(); - assert_eq!(&value_data, values.data()); + assert_eq!(value_data, values.to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -482,7 +482,7 @@ mod tests { let list_array = ListArray::from(list_data); let values = list_array.values(); - assert_eq!(&value_data, values.data()); + assert_eq!(value_data, values.to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(2, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -532,7 +532,7 @@ mod tests { let list_array = LargeListArray::from(list_data); let values = list_array.values(); - assert_eq!(&value_data, values.data()); + assert_eq!(value_data, values.to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -572,7 +572,7 @@ mod tests { let list_array = LargeListArray::from(list_data); let values = list_array.values(); - assert_eq!(&value_data, values.data()); + assert_eq!(value_data, values.to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(2, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -630,7 +630,7 @@ mod tests { let list_array = ListArray::from(list_data); let values = list_array.values(); - assert_eq!(&value_data, values.data()); + assert_eq!(value_data, values.to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(9, list_array.len()); assert_eq!(4, list_array.null_count()); @@ -694,7 +694,7 @@ mod tests { let list_array = LargeListArray::from(list_data); let values = list_array.values(); - assert_eq!(&value_data, values.data()); + assert_eq!(value_data, values.to_data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(9, list_array.len()); assert_eq!(4, list_array.null_count()); diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs index 3d78387cdf50..22ebbe533a2f 100644 --- a/arrow-array/src/array/map_array.rs +++ b/arrow-array/src/array/map_array.rs @@ -52,6 +52,11 @@ impl MapArray { &self.values } + /// Returns a reference to the [`StructArray`] entries of this map + pub fn entries(&self) -> &ArrayRef { + &self.entries + } + /// Returns the data type of the map's keys. pub fn key_type(&self) -> &DataType { self.keys.data_type() @@ -189,7 +194,7 @@ impl MapArray { let entry_struct = StructArray::from(vec![ (keys_field, Arc::new(keys_data) as ArrayRef), - (values_field, make_array(values.data().clone())), + (values_field, make_array(values.to_data())), ]); let map_data_type = DataType::Map( @@ -369,7 +374,7 @@ mod tests { .unwrap(); let map_array = MapArray::from(map_data); - assert_eq!(&value_data, map_array.values().data()); + assert_eq!(value_data, map_array.values().to_data()); assert_eq!(&DataType::UInt32, map_array.value_type()); assert_eq!(3, map_array.len()); assert_eq!(0, map_array.null_count()); @@ -400,16 +405,9 @@ mod tests { } // Now test with a non-zero offset - let map_data = ArrayData::builder(map_array.data_type().clone()) - .len(2) - .offset(1) - .add_buffer(map_array.data().buffers()[0].clone()) - .add_child_data(map_array.data().child_data()[0].clone()) - .build() - .unwrap(); - let map_array = MapArray::from(map_data); + let map_array = map_array.slice(1, 2); - assert_eq!(&value_data, map_array.values().data()); + assert_eq!(value_data, map_array.values().to_data()); assert_eq!(&DataType::UInt32, map_array.value_type()); assert_eq!(2, map_array.len()); assert_eq!(0, map_array.null_count()); @@ -446,7 +444,7 @@ mod tests { let sliced_array = map_array.slice(1, 2); assert_eq!(2, sliced_array.len()); assert_eq!(1, sliced_array.offset()); - let sliced_array_data = sliced_array.data(); + let sliced_array_data = sliced_array.to_data(); for array_data in sliced_array_data.child_data() { assert_eq!(array_data.offset(), 1); } diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 589cf1eaf4aa..41d5c8bebe29 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -290,7 +290,7 @@ impl Array for ArrayRef { } fn into_data(self) -> ArrayData { - self.data().clone() + self.to_data() } #[allow(deprecated)] @@ -357,7 +357,7 @@ impl<'a, T: Array> Array for &'a T { } fn into_data(self) -> ArrayData { - self.data().clone() + self.to_data() } #[allow(deprecated)] diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index f857e26c7f89..5dfcb4da4d16 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -466,9 +466,7 @@ impl PrimitiveArray { O: ArrowPrimitiveType, F: Fn(T::Native) -> O::Native, { - let data = self.data(); - - let nulls = data.nulls().cloned(); + let nulls = self.nulls().cloned(); let values = self.values().iter().map(|v| op(*v)); // JUSTIFICATION // Benefit @@ -593,9 +591,8 @@ impl PrimitiveArray { O: ArrowPrimitiveType, F: Fn(T::Native) -> Option, { - let data = self.data(); - let len = data.len(); - let (nulls, null_count, offset) = match data.nulls() { + let len = self.len(); + let (nulls, null_count, offset) = match self.nulls() { Some(n) => (Some(n.validity()), n.null_count(), n.offset()), None => (None, 0, 0), }; @@ -1185,7 +1182,7 @@ impl PrimitiveArray { pub fn precision(&self) -> u8 { match T::BYTE_LENGTH { 16 => { - if let DataType::Decimal128(p, _) = self.data().data_type() { + if let DataType::Decimal128(p, _) = self.data_type() { *p } else { unreachable!( @@ -1195,7 +1192,7 @@ impl PrimitiveArray { } } 32 => { - if let DataType::Decimal256(p, _) = self.data().data_type() { + if let DataType::Decimal256(p, _) = self.data_type() { *p } else { unreachable!( @@ -1212,7 +1209,7 @@ impl PrimitiveArray { pub fn scale(&self) -> i8 { match T::BYTE_LENGTH { 16 => { - if let DataType::Decimal128(_, s) = self.data().data_type() { + if let DataType::Decimal128(_, s) = self.data_type() { *s } else { unreachable!( @@ -1222,7 +1219,7 @@ impl PrimitiveArray { } } 32 => { - if let DataType::Decimal256(_, s) = self.data().data_type() { + if let DataType::Decimal256(_, s) = self.data_type() { *s } else { unreachable!( @@ -1874,7 +1871,7 @@ mod tests { let array = PrimitiveArray::::from(values.clone()); assert_eq!(array.values(), &values); - let array = PrimitiveArray::::from(array.data().clone()); + let array = PrimitiveArray::::from(array.to_data()); assert_eq!(array.values(), &values); } @@ -1894,7 +1891,7 @@ mod tests { let array = PrimitiveArray::::from(values.clone()); assert_eq!(array.values(), &values); - let array = PrimitiveArray::::from(array.data().clone()); + let array = PrimitiveArray::::from(array.to_data()); assert_eq!(array.values(), &values); } @@ -2190,7 +2187,7 @@ mod tests { let boxed: ArrayRef = Arc::new(array); - let col: Int32Array = PrimitiveArray::::from(boxed.data().clone()); + let col: Int32Array = PrimitiveArray::::from(boxed.to_data()); let err = col.into_builder(); match err { diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs index ada34b47f8a5..0754913e9d3e 100644 --- a/arrow-array/src/array/run_array.rs +++ b/arrow-array/src/array/run_array.rs @@ -104,8 +104,8 @@ impl RunArray { let len = RunArray::logical_len(run_ends); let builder = ArrayDataBuilder::new(ree_array_type) .len(len) - .add_child_data(run_ends.data().clone()) - .add_child_data(values.data().clone()); + .add_child_data(run_ends.to_data()) + .add_child_data(values.to_data()); // `build_unchecked` is used to avoid recursive validation of child arrays. let array_data = unsafe { builder.build_unchecked() }; @@ -665,7 +665,7 @@ mod tests { assert_eq!(ree_array.null_count(), 0); let values = ree_array.values(); - assert_eq!(&value_data.into_data(), values.data()); + assert_eq!(value_data.into_data(), values.to_data()); assert_eq!(&DataType::Int8, values.data_type()); let run_ends = ree_array.run_ends(); diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index 304f0ab3eee9..e042f29c22d1 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -456,7 +456,7 @@ mod tests { let data: Vec> = vec![None]; let array = StringArray::from(data); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } @@ -466,7 +466,7 @@ mod tests { let data: Vec> = vec![None]; let array = LargeStringArray::from(data); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index e31594d4b073..27e10a31fd00 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -464,7 +464,7 @@ mod tests { StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) .unwrap(); - let struct_data = arr.data(); + let struct_data = arr.into_data(); assert_eq!(4, struct_data.len()); assert_eq!(1, struct_data.null_count()); assert_eq!( @@ -488,8 +488,8 @@ mod tests { .build() .unwrap(); - assert_eq!(expected_string_data, *arr.column(0).data()); - assert_eq!(expected_int_data, *arr.column(1).data()); + assert_eq!(expected_string_data, struct_data.child_data()[0]); + assert_eq!(expected_int_data, struct_data.child_data()[1]); } #[test] @@ -579,8 +579,8 @@ mod tests { assert!(struct_array.is_valid(2)); assert!(struct_array.is_null(3)); assert!(struct_array.is_valid(4)); - assert_eq!(&boolean_data, struct_array.column(0).data()); - assert_eq!(&int_data, struct_array.column(1).data()); + assert_eq!(boolean_data, struct_array.column(0).to_data()); + assert_eq!(int_data, struct_array.column(1).to_data()); let c0 = struct_array.column(0); let c0 = c0.as_any().downcast_ref::().unwrap(); diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs index 67848b4a85cb..7b818f3130b7 100644 --- a/arrow-array/src/array/union_array.rs +++ b/arrow-array/src/array/union_array.rs @@ -219,7 +219,7 @@ impl UnionArray { let new_self = unsafe { Self::new_unchecked(field_type_ids, type_ids, value_offsets, child_arrays) }; - new_self.data().validate()?; + new_self.to_data().validate()?; Ok(new_self) } diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs index 5f726a5b121c..b6d0707982be 100644 --- a/arrow-array/src/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -17,7 +17,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::{ArrayBuilder, BufferBuilder}; -use crate::{ArrayRef, GenericListArray, OffsetSizeTrait}; +use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::Field; @@ -228,7 +228,7 @@ where pub fn finish(&mut self) -> GenericListArray { let len = self.len(); let values_arr = self.values_builder.finish(); - let values_data = values_arr.data(); + let values_data = values_arr.to_data(); let offset_buffer = self.offsets_builder.finish(); let null_bit_buffer = self.null_buffer_builder.finish(); @@ -242,7 +242,7 @@ where let array_data_builder = ArrayData::builder(data_type) .len(len) .add_buffer(offset_buffer) - .add_child_data(values_data.clone()) + .add_child_data(values_data) .null_bit_buffer(null_bit_buffer); let array_data = unsafe { array_data_builder.build_unchecked() }; @@ -254,7 +254,7 @@ where pub fn finish_cloned(&self) -> GenericListArray { let len = self.len(); let values_arr = self.values_builder.finish_cloned(); - let values_data = values_arr.data(); + let values_data = values_arr.to_data(); let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice()); let null_bit_buffer = self @@ -270,7 +270,7 @@ where let array_data_builder = ArrayData::builder(data_type) .len(len) .add_buffer(offset_buffer) - .add_child_data(values_data.clone()) + .add_child_data(values_data) .null_bit_buffer(null_bit_buffer); let array_data = unsafe { array_data_builder.build_unchecked() }; @@ -311,7 +311,6 @@ mod tests { use crate::cast::AsArray; use crate::types::Int32Type; use crate::{Array, Int32Array}; - use arrow_buffer::Buffer; use arrow_schema::DataType; fn _test_generic_list_array_builder() { @@ -332,12 +331,9 @@ mod tests { builder.append(true); let list_array = builder.finish(); - let values = list_array.values().data().buffers()[0].clone(); - assert_eq!(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]), values); - assert_eq!( - Buffer::from_slice_ref([0, 3, 6, 8].map(|n| O::from_usize(n).unwrap())), - list_array.data().buffers()[0].clone() - ); + let list_values = list_array.values().as_primitive::(); + assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]); + assert_eq!(list_array.value_offsets(), [0, 3, 6, 8].map(O::usize_as)); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -469,28 +465,22 @@ mod tests { builder.values().append(true); builder.append(true); - let list_array = builder.finish(); + let l1 = builder.finish(); - assert_eq!(4, list_array.len()); - assert_eq!(1, list_array.null_count()); - assert_eq!( - Buffer::from_slice_ref([0, 2, 5, 5, 6]), - list_array.data().buffers()[0].clone() - ); - - assert_eq!(6, list_array.values().data().len()); - assert_eq!(1, list_array.values().data().null_count()); - assert_eq!( - Buffer::from_slice_ref([0, 2, 4, 7, 7, 8, 10]), - list_array.values().data().buffers()[0].clone() - ); - - assert_eq!(10, list_array.values().data().child_data()[0].len()); - assert_eq!(0, list_array.values().data().child_data()[0].null_count()); - assert_eq!( - Buffer::from_slice_ref([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - list_array.values().data().child_data()[0].buffers()[0].clone() - ); + assert_eq!(4, l1.len()); + assert_eq!(1, l1.null_count()); + + assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]); + let l2 = l1.values().as_list::(); + + assert_eq!(6, l2.len()); + assert_eq!(1, l2.null_count()); + assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]); + + let i1 = l2.values().as_primitive::(); + assert_eq!(10, i1.len()); + assert_eq!(0, i1.null_count()); + assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); } #[test] diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 499ae183f3e9..ebffeafcf75f 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -233,7 +233,7 @@ impl StructBuilder { let mut child_data = Vec::with_capacity(self.field_builders.len()); for f in &mut self.field_builders { let arr = f.finish(); - child_data.push(arr.data().clone()); + child_data.push(arr.to_data()); } let length = self.len(); let null_bit_buffer = self.null_buffer_builder.finish(); @@ -254,7 +254,7 @@ impl StructBuilder { let mut child_data = Vec::with_capacity(self.field_builders.len()); for f in &self.field_builders { let arr = f.finish_cloned(); - child_data.push(arr.data().clone()); + child_data.push(arr.to_data()); } let length = self.len(); let null_bit_buffer = self @@ -330,9 +330,8 @@ mod tests { builder.append_null(); builder.append(true); - let arr = builder.finish(); + let struct_data = builder.finish().into_data(); - let struct_data = arr.data(); assert_eq!(4, struct_data.len()); assert_eq!(1, struct_data.null_count()); assert_eq!(&[11_u8], struct_data.nulls().unwrap().validity()); @@ -352,8 +351,8 @@ mod tests { .build() .unwrap(); - assert_eq!(expected_string_data, *arr.column(0).data()); - assert_eq!(expected_int_data, *arr.column(1).data()); + assert_eq!(expected_string_data, struct_data.child_data()[0]); + assert_eq!(expected_int_data, struct_data.child_data()[1]); } #[test] diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 5d7bea0e9d0f..0ea6332a7ea5 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -634,7 +634,7 @@ pub fn cast_with_options( let from_type = array.data_type(); // clone array if types are the same if from_type == to_type { - return Ok(make_array(array.data().clone())); + return Ok(make_array(array.to_data())); } match (from_type, to_type) { ( @@ -3108,7 +3108,7 @@ fn dictionary_cast( })?; let keys_array: ArrayRef = - Arc::new(PrimitiveArray::::from(dict_array.keys().data().clone())); + Arc::new(PrimitiveArray::::from(dict_array.keys().to_data())); let values_array = dict_array.values(); let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?; let cast_values = @@ -3182,7 +3182,7 @@ where // Note take requires first casting the indices to u32 let keys_array: ArrayRef = - Arc::new(PrimitiveArray::::from(dict_array.keys().data().clone())); + Arc::new(PrimitiveArray::::from(dict_array.keys().to_data())); let indices = cast_with_options(&keys_array, &DataType::UInt32, cast_options)?; let u32_indices = indices @@ -3379,7 +3379,7 @@ fn cast_list_inner( to_type: &DataType, cast_options: &CastOptions, ) -> Result { - let data = array.data().clone(); + let data = array.to_data(); let underlying_array = make_array(data.child_data()[0].clone()); let cast_array = cast_with_options(underlying_array.as_ref(), to.data_type(), cast_options)?; diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs index 75d078456f0a..03e427243b58 100644 --- a/arrow-ipc/src/reader.rs +++ b/arrow-ipc/src/reader.rs @@ -1570,7 +1570,7 @@ mod tests { let union1 = rb.column(0); let union2 = rb2.column(0); - assert_eq!(union1.data().buffers(), union2.data().buffers()); + assert_eq!(union1, union2); } #[test] @@ -1724,14 +1724,14 @@ mod tests { let values = StringArray::from(vec![Some("a"), None, Some("c"), None]); let keys = Int8Array::from_iter_values([0, 0, 1, 2, 0, 1, 3]); let dict_array = DictionaryArray::::try_new(&keys, &values).unwrap(); - let dict_data = dict_array.data(); + let dict_data = dict_array.to_data(); let value_offsets = Buffer::from_slice_ref(offsets); let list_data = ArrayData::builder(list_data_type) .len(4) .add_buffer(value_offsets) - .add_child_data(dict_data.clone()) + .add_child_data(dict_data) .build() .unwrap(); let list_array = GenericListArray::::from(list_data); @@ -1787,7 +1787,7 @@ mod tests { let values = StringArray::from(vec![Some("a"), None, Some("c"), None]); let keys = Int8Array::from_iter_values([0, 0, 1, 2, 0, 1, 3, 1, 2]); let dict_array = DictionaryArray::::try_new(&keys, &values).unwrap(); - let dict_data = dict_array.data(); + let dict_data = dict_array.to_data(); let list_data_type = DataType::FixedSizeList( Arc::new(Field::new_dict( @@ -1801,7 +1801,7 @@ mod tests { ); let list_data = ArrayData::builder(list_data_type) .len(3) - .add_child_data(dict_data.clone()) + .add_child_data(dict_data) .build() .unwrap(); let list_array = FixedSizeListArray::from(list_data); diff --git a/arrow-json/src/reader.rs b/arrow-json/src/reader.rs index f5bf884fb2ca..39f829052f59 100644 --- a/arrow-json/src/reader.rs +++ b/arrow-json/src/reader.rs @@ -2257,16 +2257,9 @@ mod tests { assert_eq!(10, bb.len()); assert_eq!(4.0, bb.value(9)); - let cc = batch - .column(c.0) - .as_any() - .downcast_ref::() - .unwrap(); + let cc = batch.column(c.0).as_list::(); // test that the list offsets are correct - assert_eq!( - *cc.data().buffers()[0], - Buffer::from_slice_ref([0i32, 2, 2, 4, 5]) - ); + assert_eq!(cc.value_offsets(), &[0, 2, 2, 4, 5]); let cc = cc.values().as_boolean(); let cc_expected = BooleanArray::from(vec![ Some(false), @@ -2275,18 +2268,11 @@ mod tests { None, Some(false), ]); - assert_eq!(cc.data_ref(), cc_expected.data_ref()); + assert_eq!(cc, &cc_expected); - let dd: &ListArray = batch - .column(d.0) - .as_any() - .downcast_ref::() - .unwrap(); + let dd = batch.column(d.0).as_list::(); // test that the list offsets are correct - assert_eq!( - *dd.data().buffers()[0], - Buffer::from_slice_ref([0i32, 1, 1, 2, 6]) - ); + assert_eq!(dd.value_offsets(), &[0, 1, 1, 2, 6]); let dd = dd.values().as_string::(); // values are 6 because a `d: null` is treated as a null slot @@ -2342,12 +2328,7 @@ mod tests { // compare `a` with result from json reader let batch = reader.next().unwrap().unwrap(); let read = batch.column(0); - assert!( - expected.data_ref() == read.data_ref(), - "{:?} != {:?}", - expected.data(), - read.data(), - ); + assert_eq!(&expected, read); } #[test] @@ -2425,12 +2406,9 @@ mod tests { let read = batch.column(0); assert_eq!(read.len(), 6); // compare the arrays the long way around, to better detect differences - let read: &ListArray = read.as_any().downcast_ref::().unwrap(); - let expected = expected.as_any().downcast_ref::().unwrap(); - assert_eq!( - *read.data().buffers()[0], - Buffer::from_slice_ref([0i32, 2, 3, 6, 6, 6, 7]) - ); + let read: &ListArray = read.as_list::(); + let expected = expected.as_list::(); + assert_eq!(read.value_offsets(), &[0, 2, 3, 6, 6, 6, 7]); // compare list null buffers assert_eq!(read.nulls(), expected.nulls()); // build struct from list @@ -2525,10 +2503,10 @@ mod tests { assert_eq!(batch.num_rows(), 3); assert_eq!(batch.num_columns(), 2); let col1 = batch.column(0); - assert_eq!(col1.data(), expected_accounts.data()); + assert_eq!(col1.as_ref(), &expected_accounts); // Compare the map let col2 = batch.column(1); - assert_eq!(col2.data(), expected_stocks.data()); + assert_eq!(col2.as_ref(), &expected_stocks); } #[test] diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs index d66d32017c26..cf65e8a9356b 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer.rs @@ -290,9 +290,9 @@ fn set_column_for_json_rows( | DataType::Duration(_) => { let options = FormatOptions::default(); let formatter = ArrayFormatter::try_new(array.as_ref(), &options)?; - let data = array.data(); + let nulls = array.nulls(); rows.iter_mut().enumerate().for_each(|(idx, row)| { - if data.is_valid(idx) { + if nulls.map(|x| x.is_valid(idx)).unwrap_or(true) { row.insert( col_name.to_string(), formatter.value(idx).to_string().into(), diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs index e68e064c775d..2927354da291 100644 --- a/arrow-ord/src/comparison.rs +++ b/arrow-ord/src/comparison.rs @@ -1192,7 +1192,7 @@ where { // TODO: Use take_boolean (#2967) let array = take(&dict_comparison, dict.keys(), None)?; - Ok(BooleanArray::from(array.data().clone())) + Ok(BooleanArray::from(array.to_data())) } /// Helper function to perform boolean lambda function on values from two arrays using @@ -3382,10 +3382,7 @@ mod tests { let array_b: PrimitiveArray = vec![2; item_count].into(); let result_mask = gt_eq(&array_a, &array_b).unwrap(); - assert_eq!( - result_mask.data().buffers()[0].len(), - select_mask.data().buffers()[0].len() - ); + assert_eq!(result_mask.values().len(), select_mask.values().len()); } // Expected behaviour: diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs index db1fff6d3e2f..bfe74d9e3e7a 100644 --- a/arrow-ord/src/ord.rs +++ b/arrow-ord/src/ord.rs @@ -17,6 +17,7 @@ //! Contains functions and function factories to compare arrays. +use arrow_array::cast::AsArray; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::ArrowNativeType; @@ -33,21 +34,21 @@ fn compare_primitives( where T::Native: ArrowNativeTypeOp, { - let left: PrimitiveArray = PrimitiveArray::from(left.data().clone()); - let right: PrimitiveArray = PrimitiveArray::from(right.data().clone()); + let left: PrimitiveArray = PrimitiveArray::from(left.to_data()); + let right: PrimitiveArray = PrimitiveArray::from(right.to_data()); Box::new(move |i, j| left.value(i).compare(right.value(j))) } fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left: BooleanArray = BooleanArray::from(left.data().clone()); - let right: BooleanArray = BooleanArray::from(right.data().clone()); + let left: BooleanArray = BooleanArray::from(left.to_data()); + let right: BooleanArray = BooleanArray::from(right.to_data()); Box::new(move |i, j| left.value(i).cmp(&right.value(j))) } fn compare_string(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left: StringArray = StringArray::from(left.data().clone()); - let right: StringArray = StringArray::from(right.data().clone()); + let left: StringArray = StringArray::from(left.to_data()); + let right: StringArray = StringArray::from(right.to_data()); Box::new(move |i, j| left.value(i).cmp(right.value(j))) } @@ -58,15 +59,13 @@ where V: ArrowPrimitiveType, V::Native: ArrowNativeTypeOp, { - let left = left.as_any().downcast_ref::>().unwrap(); - let right = right.as_any().downcast_ref::>().unwrap(); + let left = left.as_dictionary::(); + let right = right.as_dictionary::(); - let left_keys: PrimitiveArray = PrimitiveArray::from(left.keys().data().clone()); - let right_keys: PrimitiveArray = PrimitiveArray::from(right.keys().data().clone()); - let left_values: PrimitiveArray = - PrimitiveArray::from(left.values().data().clone()); - let right_values: PrimitiveArray = - PrimitiveArray::from(right.values().data().clone()); + let left_keys: PrimitiveArray = PrimitiveArray::from(left.keys().to_data()); + let right_keys: PrimitiveArray = PrimitiveArray::from(right.keys().to_data()); + let left_values: PrimitiveArray = left.values().to_data().into(); + let right_values: PrimitiveArray = right.values().to_data().into(); Box::new(move |i: usize, j: usize| { let key_left = left_keys.value(i).as_usize(); @@ -81,13 +80,13 @@ fn compare_dict_string(left: &dyn Array, right: &dyn Array) -> DynComparator where T: ArrowDictionaryKeyType, { - let left = left.as_any().downcast_ref::>().unwrap(); - let right = right.as_any().downcast_ref::>().unwrap(); + let left = left.as_dictionary::(); + let right = right.as_dictionary::(); - let left_keys: PrimitiveArray = PrimitiveArray::from(left.keys().data().clone()); - let right_keys: PrimitiveArray = PrimitiveArray::from(right.keys().data().clone()); - let left_values = StringArray::from(left.values().data().clone()); - let right_values = StringArray::from(right.values().data().clone()); + let left_keys: PrimitiveArray = PrimitiveArray::from(left.keys().to_data()); + let right_keys: PrimitiveArray = PrimitiveArray::from(right.keys().to_data()); + let left_values = StringArray::from(left.values().to_data()); + let right_values = StringArray::from(right.values().to_data()); Box::new(move |i: usize, j: usize| { let key_left = left_keys.value(i).as_usize(); @@ -264,10 +263,8 @@ pub fn build_compare( } } (FixedSizeBinary(_), FixedSizeBinary(_)) => { - let left: FixedSizeBinaryArray = - FixedSizeBinaryArray::from(left.data().clone()); - let right: FixedSizeBinaryArray = - FixedSizeBinaryArray::from(right.data().clone()); + let left: FixedSizeBinaryArray = left.to_data().into(); + let right: FixedSizeBinaryArray = right.to_data().into(); Box::new(move |i, j| left.value(i).cmp(right.value(j))) } diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 56b3ec2b36b0..9cc7b4f301cb 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -1651,9 +1651,9 @@ mod tests { DataType::Dictionary(_, v) if !exact => { assert_eq!(a.data_type(), v.as_ref()); let b = arrow_cast::cast(b, v).unwrap(); - assert_eq!(a.data(), b.data()) + assert_eq!(a, b.as_ref()) } - _ => assert_eq!(a.data(), b.data()), + _ => assert_eq!(a, b), } } @@ -1767,8 +1767,7 @@ mod tests { // Test struct nullability let data = s1 - .data() - .clone() + .to_data() .into_builder() .null_bit_buffer(Some(Buffer::from_slice_ref([0b00001010]))) .null_count(2) @@ -1786,7 +1785,7 @@ mod tests { assert_eq!(back.len(), 1); assert_eq!(&back[0], &s2); - back[0].data().validate_full().unwrap(); + back[0].to_data().validate_full().unwrap(); } #[test] @@ -1910,7 +1909,7 @@ mod tests { let back = converter.convert_rows(&rows).unwrap(); assert_eq!(back.len(), 1); - back[0].data().validate_full().unwrap(); + back[0].to_data().validate_full().unwrap(); assert_eq!(&back[0], &list); let options = SortOptions { @@ -1930,7 +1929,7 @@ mod tests { let back = converter.convert_rows(&rows).unwrap(); assert_eq!(back.len(), 1); - back[0].data().validate_full().unwrap(); + back[0].to_data().validate_full().unwrap(); assert_eq!(&back[0], &list); let options = SortOptions { @@ -1950,7 +1949,7 @@ mod tests { let back = converter.convert_rows(&rows).unwrap(); assert_eq!(back.len(), 1); - back[0].data().validate_full().unwrap(); + back[0].to_data().validate_full().unwrap(); assert_eq!(&back[0], &list); let options = SortOptions { @@ -1970,7 +1969,7 @@ mod tests { let back = converter.convert_rows(&rows).unwrap(); assert_eq!(back.len(), 1); - back[0].data().validate_full().unwrap(); + back[0].to_data().validate_full().unwrap(); assert_eq!(&back[0], &list); } @@ -2033,7 +2032,7 @@ mod tests { let back = converter.convert_rows(&rows).unwrap(); assert_eq!(back.len(), 1); - back[0].data().validate_full().unwrap(); + back[0].to_data().validate_full().unwrap(); assert_eq!(&back[0], &list); let options = SortOptions { @@ -2052,7 +2051,7 @@ mod tests { let back = converter.convert_rows(&rows).unwrap(); assert_eq!(back.len(), 1); - back[0].data().validate_full().unwrap(); + back[0].to_data().validate_full().unwrap(); assert_eq!(&back[0], &list); let options = SortOptions { @@ -2071,7 +2070,7 @@ mod tests { let back = converter.convert_rows(&rows).unwrap(); assert_eq!(back.len(), 1); - back[0].data().validate_full().unwrap(); + back[0].to_data().validate_full().unwrap(); assert_eq!(&back[0], &list); } @@ -2171,7 +2170,7 @@ mod tests { .into_data() .into_builder() .data_type(data_type) - .add_child_data(values.data().clone()) + .add_child_data(values.to_data()) .build() .unwrap(); diff --git a/arrow-row/src/list.rs b/arrow-row/src/list.rs index e232e717c9e8..e4ff878dd135 100644 --- a/arrow-row/src/list.rs +++ b/arrow-row/src/list.rs @@ -164,7 +164,7 @@ pub unsafe fn decode( let child = converter.convert_raw(&mut child_rows, validate_utf8)?; assert_eq!(child.len(), 1); - let child_data = child[0].data().clone(); + let child_data = child[0].to_data(); let builder = ArrayDataBuilder::new(field.data_type.clone()) .len(rows.len()) diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index 1cab72b6d9f2..ba8fc4a2cc1a 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -23,7 +23,7 @@ use arrow_array::builder::BooleanBufferBuilder; use arrow_array::cast::AsArray; use arrow_array::types::{ArrowDictionaryKeyType, ByteArrayType}; use arrow_array::*; -use arrow_buffer::bit_util; +use arrow_buffer::{bit_util, BooleanBuffer, NullBuffer}; use arrow_buffer::{Buffer, MutableBuffer}; use arrow_data::bit_iterator::{BitIndexIterator, BitSliceIterator}; use arrow_data::transform::MutableArrayData; @@ -317,7 +317,7 @@ fn filter_array( match predicate.strategy { IterationStrategy::None => Ok(new_empty_array(values.data_type())), - IterationStrategy::All => Ok(make_array(values.data().slice(0, predicate.count))), + IterationStrategy::All => Ok(values.slice(0, predicate.count)), // actually filter _ => downcast_primitive_array! { values => Ok(Arc::new(filter_primitive(values, predicate))), @@ -386,15 +386,15 @@ fn filter_array( /// in the filtered output, and `null_buffer` is the filtered null buffer /// fn filter_null_mask( - data: &ArrayData, + nulls: Option<&NullBuffer>, predicate: &FilterPredicate, ) -> Option<(usize, Buffer)> { - if data.null_count() == 0 { + let nulls = nulls?; + if nulls.null_count() == 0 { return None; } - let nulls = data.nulls()?; - let nulls = filter_bits(nulls.buffer(), nulls.offset(), predicate); + let nulls = filter_bits(nulls.inner(), predicate); // The filtered `nulls` has a length of `predicate.count` bits and // therefore the null count is this minus the number of valid bits let null_count = predicate.count - nulls.count_set_bits_offset(0, predicate.count); @@ -407,8 +407,9 @@ fn filter_null_mask( } /// Filter the packed bitmask `buffer`, with `predicate` starting at bit offset `offset` -fn filter_bits(buffer: &Buffer, offset: usize, predicate: &FilterPredicate) -> Buffer { - let src = buffer.as_slice(); +fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer { + let src = buffer.values(); + let offset = buffer.offset(); match &predicate.strategy { IterationStrategy::IndexIterator => { @@ -447,18 +448,14 @@ fn filter_bits(buffer: &Buffer, offset: usize, predicate: &FilterPredicate) -> B } /// `filter` implementation for boolean buffers -fn filter_boolean(values: &BooleanArray, predicate: &FilterPredicate) -> BooleanArray { - let data = values.data(); - assert_eq!(data.buffers().len(), 1); - assert_eq!(data.child_data().len(), 0); - - let values = filter_bits(data.buffers()[0], data.offset(), predicate); +fn filter_boolean(array: &BooleanArray, predicate: &FilterPredicate) -> BooleanArray { + let values = filter_bits(array.values(), predicate); let mut builder = ArrayDataBuilder::new(DataType::Boolean) .len(predicate.count) .add_buffer(values); - if let Some((null_count, nulls)) = filter_null_mask(data, predicate) { + if let Some((null_count, nulls)) = filter_null_mask(array.nulls(), predicate) { builder = builder.null_count(null_count).null_bit_buffer(Some(nulls)); } @@ -468,17 +465,13 @@ fn filter_boolean(values: &BooleanArray, predicate: &FilterPredicate) -> Boolean /// `filter` implementation for primitive arrays fn filter_primitive( - values: &PrimitiveArray, + array: &PrimitiveArray, predicate: &FilterPredicate, ) -> PrimitiveArray where T: ArrowPrimitiveType, { - let data = values.data(); - assert_eq!(data.buffers().len(), 1); - assert_eq!(data.child_data().len(), 0); - - let values = data.buffer::(0); + let values = array.values(); assert!(values.len() >= predicate.filter.len()); let buffer = match &predicate.strategy { @@ -514,11 +507,11 @@ where IterationStrategy::All | IterationStrategy::None => unreachable!(), }; - let mut builder = ArrayDataBuilder::new(data.data_type().clone()) + let mut builder = ArrayDataBuilder::new(array.data_type().clone()) .len(predicate.count) .add_buffer(buffer.into()); - if let Some((null_count, nulls)) = filter_null_mask(data, predicate) { + if let Some((null_count, nulls)) = filter_null_mask(array.nulls(), predicate) { builder = builder.null_count(null_count).null_bit_buffer(Some(nulls)); } @@ -554,7 +547,7 @@ where Self { src_offsets: array.value_offsets(), - src_values: array.data().buffers()[1], + src_values: array.value_data(), dst_offsets, dst_values, cur_offset, @@ -617,9 +610,6 @@ fn filter_bytes( where T: ByteArrayType, { - let data = array.data(); - assert_eq!(data.buffers().len(), 2); - assert_eq!(data.child_data().len(), 0); let mut filter = FilterBytes::new(predicate.count, array); match &predicate.strategy { @@ -639,7 +629,7 @@ where .add_buffer(filter.dst_offsets.into()) .add_buffer(filter.dst_values.into()); - if let Some((null_count, nulls)) = filter_null_mask(data, predicate) { + if let Some((null_count, nulls)) = filter_null_mask(array.nulls(), predicate) { builder = builder.null_count(null_count).null_bit_buffer(Some(nulls)); } diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs index 0895b99c7f59..6039d53eaedc 100644 --- a/arrow-select/src/nullif.rs +++ b/arrow-select/src/nullif.rs @@ -27,8 +27,8 @@ use arrow_schema::ArrowError; /// /// Typically used to implement NULLIF. pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result { - let left_data = left.data(); - let right_data = right.data(); + let left_data = left.to_data(); + let right_data = right.to_data(); if left_data.len() != right_data.len() { return Err(ArrowError::ComputeError( @@ -40,7 +40,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result Result>() .unwrap(); - let result_values: StringArray = result.values().data().clone().into(); + let result_values: StringArray = result.values().to_data().into(); // dictionary values should stay the same let expected_values = StringArray::from(vec!["foo", "bar", ""]); diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs index 7b6c7d50cac3..383ac5fd11c6 100644 --- a/arrow-string/src/like.rs +++ b/arrow-string/src/like.rs @@ -152,7 +152,7 @@ pub fn $fn_name( let dict_comparison = $fn_name(left.values().as_ref(), right)?; // TODO: Use take_boolean (#2967) let array = take(&dict_comparison, left.keys(), None)?; - Ok(BooleanArray::from(array.data().clone())) + Ok(BooleanArray::from(array.to_data())) } t => Err(ArrowError::ComputeError(format!( "Should be DictionaryArray but got: {}", t diff --git a/arrow/benches/array_data_validate.rs b/arrow/benches/array_data_validate.rs index 68fc66a635bc..529205e7e28f 100644 --- a/arrow/benches/array_data_validate.rs +++ b/arrow/benches/array_data_validate.rs @@ -37,8 +37,8 @@ fn create_binary_array_data(length: i32) -> ArrayData { .unwrap() } -fn validate_utf8_array(arr: &StringArray) { - arr.data().validate_values().unwrap(); +fn validate_utf8_array(arr: &ArrayData) { + arr.validate_values().unwrap(); } fn validate_benchmark(c: &mut Criterion) { @@ -48,7 +48,7 @@ fn validate_benchmark(c: &mut Criterion) { }); //Utf8 Array - let str_arr = StringArray::from(vec!["test"; 20000]); + let str_arr = StringArray::from(vec!["test"; 20000]).to_data(); c.bench_function("validate_utf8_array_data 20000", |b| { b.iter(|| validate_utf8_array(&str_arr)) }); diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs index 0249a70d168f..d4c284ad2cd1 100644 --- a/arrow/src/array/ffi.rs +++ b/arrow/src/array/ffi.rs @@ -57,8 +57,8 @@ pub unsafe fn export_array_into_raw( out_array: *mut ffi::FFI_ArrowArray, out_schema: *mut ffi::FFI_ArrowSchema, ) -> Result<()> { - let data = src.data(); - let array = ffi::FFI_ArrowArray::new(data); + let data = src.to_data(); + let array = ffi::FFI_ArrowArray::new(&data); let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?; std::ptr::write_unaligned(out_array, array); @@ -101,22 +101,22 @@ mod tests { #[test] fn test_u32() -> Result<()> { let array = UInt32Array::from(vec![Some(2), None, Some(1), None]); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] fn test_u64() -> Result<()> { let array = UInt64Array::from(vec![Some(2), None, Some(1), None]); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] fn test_i64() -> Result<()> { let array = Int64Array::from(vec![Some(2), None, Some(1), None]); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -148,8 +148,8 @@ mod tests { Arc::new(UInt32Array::from(vec![42, 28, 19, 31])), ), ]); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -169,8 +169,8 @@ mod tests { ]); let array = DictionaryArray::try_new(&keys, &values)?; - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -178,8 +178,8 @@ mod tests { let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]]; let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?; - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -195,8 +195,8 @@ mod tests { let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?; - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -214,8 +214,8 @@ mod tests { .build()?; let array = FixedSizeListArray::from(list_data); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -240,8 +240,8 @@ mod tests { .build()?; let array = FixedSizeListArray::from(list_data); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -278,7 +278,7 @@ mod tests { let array = FixedSizeListArray::from(list_data); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } } diff --git a/arrow/src/compute/kernels/limit.rs b/arrow/src/compute/kernels/limit.rs index 74cbd2096bfd..097b8e949443 100644 --- a/arrow/src/compute/kernels/limit.rs +++ b/arrow/src/compute/kernels/limit.rs @@ -172,8 +172,8 @@ mod tests { assert_eq!(5, struct_array.len()); assert_eq!(1, struct_array.null_count()); - assert_eq!(&boolean_data, struct_array.column(0).data()); - assert_eq!(&int_data, struct_array.column(1).data()); + assert_eq!(boolean_data, struct_array.column(0).to_data()); + assert_eq!(int_data, struct_array.column(1).to_data()); let array: ArrayRef = Arc::new(struct_array); diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 7b26cf7f25a5..0af1b1111ca4 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -1104,7 +1104,7 @@ mod tests { )]); // export it - let array = ArrowArray::try_from(struct_array.data().clone())?; + let array = ArrowArray::try_from(struct_array.to_data())?; // (simulate consumer) import it let data = ArrayData::try_from(array)?; @@ -1128,7 +1128,7 @@ mod tests { let union = builder.build().unwrap(); // export it - let array = ArrowArray::try_from(union.data().clone())?; + let array = ArrowArray::try_from(union.to_data())?; // (simulate consumer) import it let data = ArrayData::try_from(array)?; diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index 0b0a06875432..c1094b127bba 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -289,8 +289,8 @@ mod tests { } // Test that the list's child values are non-null let b_array = batch.column(1); - let list_array = b_array.as_any().downcast_ref::().unwrap(); - let child_array = make_array(list_array.data().child_data()[0].clone()); + let list_array = b_array.as_list::(); + let child_array = list_array.values(); assert_eq!(child_array.null_count(), 0); // There should be more values than the list, to show that it's a list assert!(child_array.len() > list_array.len()); diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs index 37968ec6a055..93296c3b0e43 100644 --- a/arrow/tests/array_equal.rs +++ b/arrow/tests/array_equal.rs @@ -372,7 +372,7 @@ fn test_empty_offsets_list_equal() { )))) .len(0) .add_buffer(Buffer::from(&empty_offsets)) - .add_child_data(values.data().clone()) + .add_child_data(values.to_data()) .null_bit_buffer(Some(Buffer::from(&empty_offsets))) .build() .unwrap() @@ -385,7 +385,7 @@ fn test_empty_offsets_list_equal() { )))) .len(0) .add_buffer(Buffer::from(&empty_offsets)) - .add_child_data(values.data().clone()) + .add_child_data(values.to_data()) .null_bit_buffer(Some(Buffer::from(&empty_offsets))) .build() .unwrap() @@ -400,11 +400,7 @@ fn test_empty_offsets_list_equal() { )))) .len(0) .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice())) - .add_child_data( - Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]) - .data() - .clone(), - ) + .add_child_data(Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]).into_data()) .null_bit_buffer(Some(Buffer::from(vec![0b00001001]))) .build() .unwrap() diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs index 97869544ddd0..30a8bad60368 100644 --- a/arrow/tests/array_transform.rs +++ b/arrow/tests/array_transform.rs @@ -59,8 +59,8 @@ fn test_decimal() { fn test_decimal_offset() { let decimal_array = create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3); - let decimal_array = decimal_array.slice(1, 3); // 2, null, 3 - let arrays = vec![decimal_array.data()]; + let decimal_array = decimal_array.slice(1, 3).into_data(); // 2, null, 3 + let arrays = vec![&decimal_array]; let mut a = MutableArrayData::new(arrays, true, 2); a.extend(0, 0, 2); // 2, null let result = a.freeze(); @@ -74,8 +74,8 @@ fn test_decimal_offset() { fn test_decimal_null_offset_nulls() { let decimal_array = create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3); - let decimal_array = decimal_array.slice(1, 3); // 2, null, 3 - let arrays = vec![decimal_array.data()]; + let decimal_array = decimal_array.slice(1, 3).into_data(); // 2, null, 3 + let arrays = vec![&decimal_array]; let mut a = MutableArrayData::new(arrays, true, 2); a.extend(0, 0, 2); // 2, null a.extend_nulls(3); // 2, null, null, null, null @@ -90,8 +90,8 @@ fn test_decimal_null_offset_nulls() { /// tests extending from a primitive array w/ offset nor nulls #[test] fn test_primitive() { - let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); - let arrays = vec![b.data()]; + let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]).into_data(); + let arrays = vec![&b]; let mut a = MutableArrayData::new(arrays, false, 3); a.extend(0, 0, 2); let result = a.freeze(); @@ -103,9 +103,9 @@ fn test_primitive() { /// tests extending from a primitive array with offset w/ nulls #[test] fn test_primitive_offset() { - let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); + let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]).into_data(); let b = b.slice(1, 2); - let arrays = vec![b.data()]; + let arrays = vec![&b]; let mut a = MutableArrayData::new(arrays, false, 2); a.extend(0, 0, 2); let result = a.freeze(); @@ -118,8 +118,8 @@ fn test_primitive_offset() { #[test] fn test_primitive_null_offset() { let b = UInt8Array::from(vec![Some(1), None, Some(3)]); - let b = b.slice(1, 2); - let arrays = vec![b.data()]; + let b = b.slice(1, 2).into_data(); + let arrays = vec![&b]; let mut a = MutableArrayData::new(arrays, false, 2); a.extend(0, 0, 2); let result = a.freeze(); @@ -130,9 +130,9 @@ fn test_primitive_null_offset() { #[test] fn test_primitive_null_offset_nulls() { - let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); + let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]).into_data(); let b = b.slice(1, 2); - let arrays = vec![b.data()]; + let arrays = vec![&b]; let mut a = MutableArrayData::new(arrays, true, 2); a.extend(0, 0, 2); a.extend_nulls(3); @@ -153,8 +153,8 @@ fn test_list_null_offset() { builder.append(true); builder.values().append_slice(&[6, 7, 8]); builder.append(true); - let array = builder.finish(); - let arrays = vec![array.data()]; + let array = builder.finish().into_data(); + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 0, 1); @@ -174,8 +174,9 @@ fn test_list_null_offset() { /// tests extending from a variable-sized (strings and binary) array w/ offset with nulls #[test] fn test_variable_sized_nulls() { - let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]); - let arrays = vec![array.data()]; + let array = + StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).into_data(); + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); @@ -193,9 +194,9 @@ fn test_variable_sized_nulls() { #[test] fn test_variable_sized_offsets() { let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]); - let array = array.slice(1, 3); + let array = array.into_data().slice(1, 3); - let arrays = vec![array.data()]; + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); @@ -211,9 +212,9 @@ fn test_variable_sized_offsets() { #[test] fn test_string_offsets() { let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]); - let array = array.slice(1, 3); + let array = array.into_data().slice(1, 3); - let arrays = vec![array.data()]; + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); @@ -228,10 +229,10 @@ fn test_string_offsets() { #[test] fn test_multiple_with_nulls() { - let array1 = StringArray::from(vec!["hello", "world"]); - let array2 = StringArray::from(vec![Some("1"), None]); + let array1 = StringArray::from(vec!["hello", "world"]).into_data(); + let array2 = StringArray::from(vec![Some("1"), None]).into_data(); - let arrays = vec![array1.data(), array2.data()]; + let arrays = vec![&array1, &array2]; let mut mutable = MutableArrayData::new(arrays, false, 5); @@ -248,9 +249,9 @@ fn test_multiple_with_nulls() { #[test] fn test_string_null_offset_nulls() { let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]); - let array = array.slice(1, 3); + let array = array.into_data().slice(1, 3); - let arrays = vec![array.data()]; + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, true, 0); @@ -266,8 +267,9 @@ fn test_string_null_offset_nulls() { #[test] fn test_bool() { - let array = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]); - let arrays = vec![array.data()]; + let array = + BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]).into_data(); + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); @@ -282,9 +284,9 @@ fn test_bool() { #[test] fn test_null() { - let array1 = NullArray::new(10); - let array2 = NullArray::new(5); - let arrays = vec![array1.data(), array2.data()]; + let array1 = NullArray::new(10).into_data(); + let array2 = NullArray::new(5).into_data(); + let arrays = vec![&array1, &array2]; let mut mutable = MutableArrayData::new(arrays, false, 0); @@ -352,8 +354,9 @@ fn test_struct() { let array = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) - .unwrap(); - let arrays = vec![array.data()]; + .unwrap() + .into_data(); + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); @@ -388,8 +391,9 @@ fn test_struct_offset() { let array = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) .unwrap() + .into_data() .slice(1, 3); - let arrays = vec![array.data()]; + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); @@ -424,8 +428,9 @@ fn test_struct_nulls() { let array = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) - .unwrap(); - let arrays = vec![array.data()]; + .unwrap() + .into_data(); + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); @@ -462,8 +467,9 @@ fn test_struct_many() { let array = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) - .unwrap(); - let arrays = vec![array.data(), array.data()]; + .unwrap() + .into_data(); + let arrays = vec![&array, &array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); @@ -488,10 +494,10 @@ fn test_binary_fixed_sized_offsets() { vec![vec![0, 0], vec![0, 1], vec![0, 2]].into_iter(), ) .expect("Failed to create FixedSizeBinaryArray from iterable"); - let array = array.slice(1, 2); + let array = array.slice(1, 2).into_data(); // = [[0, 1], [0, 2]] due to the offset = 1 - let arrays = vec![array.data()]; + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); @@ -517,7 +523,7 @@ fn test_list_append() { builder.values().append_slice(&[6, 7, 8]); builder.values().append_slice(&[9, 10, 11]); builder.append(true); - let a = builder.finish(); + let a = builder.finish().into_data(); let a_builder = Int64Builder::with_capacity(24); let mut a_builder = ListBuilder::::new(a_builder); @@ -526,11 +532,11 @@ fn test_list_append() { a_builder.append(true); a_builder.values().append_slice(&[14, 15]); a_builder.append(true); - let b = a_builder.finish(); + let b = a_builder.finish().into_data(); let c = b.slice(1, 2); - let mut mutable = MutableArrayData::new(vec![a.data(), b.data(), c.data()], false, 1); + let mut mutable = MutableArrayData::new(vec![&a, &b, &c], false, 1); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); mutable.extend(2, 0, c.len()); @@ -584,8 +590,7 @@ fn test_list_nulls_append() { builder.values().append_null(); builder.values().append_slice(&[9, 10, 11]); builder.append(true); - let a = builder.finish(); - let a = a.data(); + let a = builder.finish().into_data(); let mut builder = ListBuilder::::new(Int64Builder::with_capacity(32)); builder.values().append_slice(&[12, 13]); @@ -596,12 +601,11 @@ fn test_list_nulls_append() { builder.values().append_null(); builder.values().append_slice(&[14, 15]); builder.append(true); - let b = builder.finish(); - let b = b.data(); + let b = builder.finish().into_data(); let c = b.slice(1, 2); let d = b.slice(2, 2); - let mut mutable = MutableArrayData::new(vec![a, b, &c, &d], false, 10); + let mut mutable = MutableArrayData::new(vec![&a, &b, &c, &d], false, 10); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); @@ -671,8 +675,7 @@ fn test_map_nulls_append() { builder.values().append_slice(&[9, 10, 11]); builder.append(true).unwrap(); - let a = builder.finish(); - let a = a.data(); + let a = builder.finish().into_data(); let mut builder = MapBuilder::::new( None, @@ -691,12 +694,11 @@ fn test_map_nulls_append() { builder.values().append_slice(&[14, 15]); builder.append(true).unwrap(); - let b = builder.finish(); - let b = b.data(); + let b = builder.finish().into_data(); let c = b.slice(1, 2); let d = b.slice(2, 2); - let mut mutable = MutableArrayData::new(vec![a, b, &c, &d], false, 10); + let mut mutable = MutableArrayData::new(vec![&a, &b, &c, &d], false, 10); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); @@ -804,7 +806,7 @@ fn test_list_of_strings_append() { builder.values().append_value("Arrow"); builder.values().append_null(); builder.append(true); - let a = builder.finish(); + let a = builder.finish().into_data(); // [["alpha", "beta"], [None], ["gamma", "delta", None]] let mut builder = ListBuilder::new(StringBuilder::new()); @@ -817,9 +819,9 @@ fn test_list_of_strings_append() { builder.values().append_value("delta"); builder.values().append_null(); builder.append(true); - let b = builder.finish(); + let b = builder.finish().into_data(); - let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 10); + let mut mutable = MutableArrayData::new(vec![&a, &b], false, 10); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); @@ -869,7 +871,8 @@ fn test_list_of_strings_append() { fn test_fixed_size_binary_append() { let a = vec![Some(vec![1, 2]), Some(vec![3, 4]), Some(vec![5, 6])]; let a = FixedSizeBinaryArray::try_from_sparse_iter_with_size(a.into_iter(), 2) - .expect("Failed to create FixedSizeBinaryArray from iterable"); + .expect("Failed to create FixedSizeBinaryArray from iterable") + .into_data(); let b = vec![ None, @@ -880,9 +883,10 @@ fn test_fixed_size_binary_append() { None, ]; let b = FixedSizeBinaryArray::try_from_sparse_iter_with_size(b.into_iter(), 2) - .expect("Failed to create FixedSizeBinaryArray from iterable"); + .expect("Failed to create FixedSizeBinaryArray from iterable") + .into_data(); - let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 10); + let mut mutable = MutableArrayData::new(vec![&a, &b], false, 10); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); @@ -913,8 +917,9 @@ fn test_fixed_size_binary_append() { ]; let expected = FixedSizeBinaryArray::try_from_sparse_iter_with_size(expected.into_iter(), 2) - .expect("Failed to create FixedSizeBinaryArray from iterable"); - assert_eq!(&result, expected.data()); + .expect("Failed to create FixedSizeBinaryArray from iterable") + .into_data(); + assert_eq!(result, expected); } /* diff --git a/arrow/tests/array_validation.rs b/arrow/tests/array_validation.rs index 082d020ca462..67960ada6c98 100644 --- a/arrow/tests/array_validation.rs +++ b/arrow/tests/array_validation.rs @@ -606,7 +606,7 @@ fn test_validate_dictionary_index_too_large() { 2, None, 0, - vec![keys.data().buffers()[0].clone()], + vec![keys.into_data().buffers()[0].clone()], vec![values.into_data()], ) .unwrap(); @@ -630,7 +630,7 @@ fn test_validate_dictionary_index_negative() { 2, None, 0, - vec![keys.data().buffers()[0].clone()], + vec![keys.into_data().buffers()[0].clone()], vec![values.into_data()], ) .unwrap(); @@ -655,7 +655,7 @@ fn test_validate_dictionary_index_negative_but_not_referenced() { 1, None, 0, - vec![keys.data().buffers()[0].clone()], + vec![keys.into_data().buffers()[0].clone()], vec![values.into_data()], ) .unwrap(); @@ -681,7 +681,7 @@ fn test_validate_dictionary_index_giant_negative() { 2, None, 0, - vec![keys.data().buffers()[0].clone()], + vec![keys.into_data().buffers()[0].clone()], vec![values.into_data()], ) .unwrap(); @@ -1016,7 +1016,7 @@ fn test_decimal_validation() { builder.append_value(20000); let array = builder.finish(); - array.data().validate_full().unwrap(); + array.into_data().validate_full().unwrap(); } #[test] diff --git a/parquet/src/arrow/array_reader/byte_array_dictionary.rs b/parquet/src/arrow/array_reader/byte_array_dictionary.rs index c4ed7e9070cc..763a6ccee2c3 100644 --- a/parquet/src/arrow/array_reader/byte_array_dictionary.rs +++ b/parquet/src/arrow/array_reader/byte_array_dictionary.rs @@ -355,7 +355,8 @@ where assert_eq!(dict.data_type(), &self.value_type); - let dict_buffers = dict.data().buffers(); + let data = dict.to_data(); + let dict_buffers = data.buffers(); let dict_offsets = dict_buffers[0].typed_data::(); let dict_values = dict_buffers[1].as_slice(); @@ -391,8 +392,8 @@ where #[cfg(test)] mod tests { - use arrow_array::{Array, StringArray}; use arrow::compute::cast; + use arrow_array::{Array, StringArray}; use crate::arrow::array_reader::test_util::{ byte_array_all_encodings, encode_dictionary, utf8_column, diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs index 504591c0ca89..a6b354f902df 100644 --- a/parquet/src/arrow/array_reader/list_array.rs +++ b/parquet/src/arrow/array_reader/list_array.rs @@ -143,11 +143,9 @@ impl ArrayReader for ListArrayReader { let mut skipped = 0; // Builder used to construct the filtered child data, skipping empty lists and nulls - let mut child_data_builder = MutableArrayData::new( - vec![next_batch_array.data()], - false, - next_batch_array.len(), - ); + let data = next_batch_array.to_data(); + let mut child_data_builder = + MutableArrayData::new(vec![&data], false, next_batch_array.len()); def_levels.iter().zip(rep_levels).try_for_each(|(d, r)| { match r.cmp(&self.rep_level) { @@ -201,7 +199,7 @@ impl ArrayReader for ListArrayReader { let child_data = if skipped == 0 { // No filtered values - can reuse original array - next_batch_array.data().clone() + next_batch_array.to_data() } else { // One or more filtered values - must build new array if let Some(start) = filter_start.take() { diff --git a/parquet/src/arrow/array_reader/map_array.rs b/parquet/src/arrow/array_reader/map_array.rs index d7645a593505..9bfc047322a7 100644 --- a/parquet/src/arrow/array_reader/map_array.rs +++ b/parquet/src/arrow/array_reader/map_array.rs @@ -96,7 +96,7 @@ impl ArrayReader for MapArrayReader { // A MapArray is just a ListArray with a StructArray child // we can therefore just alter the ArrayData let array = self.reader.consume_batch().unwrap(); - let data = array.data().clone(); + let data = array.to_data(); let builder = data.into_builder().data_type(self.data_type.clone()); // SAFETY - we can assume that ListArrayReader produces valid ListArray diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs index 0670701a0375..11e019f29a59 100644 --- a/parquet/src/arrow/array_reader/struct_array.rs +++ b/parquet/src/arrow/array_reader/struct_array.rs @@ -17,7 +17,7 @@ use crate::arrow::array_reader::ArrayReader; use crate::errors::{ParquetError, Result}; -use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray}; +use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray, Array}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::DataType as ArrowType; use std::any::Any; @@ -130,7 +130,7 @@ impl ArrayReader for StructArrayReader { .child_data( children_array .iter() - .map(|x| x.data().clone()) + .map(|x| x.to_data()) .collect::>(), ); diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 4b88a33f3a25..57741283a2f9 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1761,7 +1761,7 @@ mod tests { let b = Arc::clone(batch.column(0)); assert_eq!(a.data_type(), b.data_type()); - assert_eq!(a.data(), b.data(), "{:#?} vs {:#?}", a.data(), b.data()); + assert_eq!(a.to_data(), b.to_data()); assert_eq!( a.as_any().type_id(), b.as_any().type_id(), @@ -1960,7 +1960,7 @@ mod tests { let batch = reader.into_iter().next().unwrap().unwrap(); assert_eq!(batch.schema().as_ref(), &expected_schema); assert_eq!(batch.num_rows(), 4); - assert_eq!(batch.column(0).data().null_count(), 2); + assert_eq!(batch.column(0).null_count(), 2); } #[test] @@ -2077,7 +2077,7 @@ mod tests { ); let get_dict = - |batch: &RecordBatch| batch.column(0).data().child_data()[0].clone(); + |batch: &RecordBatch| batch.column(0).to_data().child_data()[0].clone(); // First and second batch in same row group -> same dictionary assert_eq!(get_dict(&batches[0]), get_dict(&batches[1])); diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 4239f3fba59b..680d31480939 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -41,10 +41,9 @@ //! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding) use crate::errors::{ParquetError, Result}; -use arrow_array::{ - make_array, Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, StructArray, -}; -use arrow_data::ArrayData; +use arrow_array::cast::AsArray; +use arrow_array::{Array, ArrayRef, OffsetSizeTrait, StructArray}; +use arrow_buffer::NullBuffer; use arrow_schema::{DataType, Field}; use std::ops::Range; @@ -183,29 +182,37 @@ impl LevelInfoBuilder { self.write_leaf(array, range) } DataType::Struct(_) => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_struct(); self.write_struct(array, range) } DataType::List(_) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - self.write_list(array.value_offsets(), array.data(), range) + let array = array.as_list::(); + self.write_list( + array.value_offsets(), + array.nulls(), + array.values(), + range, + ) } DataType::LargeList(_) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - - self.write_list(array.value_offsets(), array.data(), range) + let array = array.as_list::(); + self.write_list( + array.value_offsets(), + array.nulls(), + array.values(), + range, + ) } DataType::Map(_, _) => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_map(); // A Map is just as ListArray with a StructArray child, we therefore // treat it as such to avoid code duplication - self.write_list(array.value_offsets(), array.data(), range) + self.write_list( + array.value_offsets(), + array.nulls(), + array.entries(), + range, + ) } _ => unreachable!(), } @@ -217,7 +224,8 @@ impl LevelInfoBuilder { fn write_list( &mut self, offsets: &[O], - list_data: &ArrayData, + nulls: Option<&NullBuffer>, + values: &ArrayRef, range: Range, ) { let (child, ctx) = match self { @@ -226,11 +234,10 @@ impl LevelInfoBuilder { }; let offsets = &offsets[range.start..range.end + 1]; - let child_array = make_array(list_data.child_data()[0].clone()); let write_non_null_slice = |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| { - child.write(&child_array, start_idx..end_idx); + child.write(values, start_idx..end_idx); child.visit_leaves(|leaf| { let rep_levels = leaf.rep_levels.as_mut().unwrap(); let mut rev = rep_levels.iter_mut().rev(); @@ -270,7 +277,7 @@ impl LevelInfoBuilder { }) }; - match list_data.nulls() { + match nulls { Some(nulls) => { let null_offset = range.start; // TODO: Faster bitmask iteration (#1757) @@ -485,7 +492,7 @@ mod tests { use arrow_array::*; use arrow_buffer::{Buffer, ToByteSlice}; use arrow_cast::display::array_value_to_string; - use arrow_data::ArrayDataBuilder; + use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{Fields, Schema}; #[test] @@ -1243,7 +1250,7 @@ mod tests { let array = Arc::new(list_builder.finish()); - let values_len = array.data().child_data()[0].len(); + let values_len = array.values().len(); assert_eq!(values_len, 5); let schema = Arc::new(Schema::new(vec![list_field])); @@ -1278,7 +1285,7 @@ mod tests { ]); // This test assumes that nulls don't take up space - assert_eq!(inner.data().child_data()[0].len(), 7); + assert_eq!(inner.values().len(), 7); let field = Field::new("list", inner.data_type().clone(), true); let array = Arc::new(inner) as ArrayRef; diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 86f7764ec4cf..4cf54dc8897e 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -22,7 +22,7 @@ use std::io::Write; use std::sync::Arc; use arrow_array::cast::AsArray; -use arrow_array::types::Decimal128Type; +use arrow_array::types::{Decimal128Type, Int32Type, Int64Type, UInt32Type, UInt64Type}; use arrow_array::{types, Array, ArrayRef, RecordBatch}; use arrow_schema::{DataType as ArrowDataType, IntervalUnit, SchemaRef}; @@ -33,11 +33,12 @@ use super::schema::{ use crate::arrow::arrow_writer::byte_array::ByteArrayWriter; use crate::column::writer::{ColumnWriter, ColumnWriterImpl}; +use crate::data_type::{ByteArray, DataType, FixedLenByteArray}; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{KeyValue, RowGroupMetaDataPtr}; use crate::file::properties::WriterProperties; +use crate::file::writer::SerializedFileWriter; use crate::file::writer::SerializedRowGroupWriter; -use crate::{data_type::*, file::writer::SerializedFileWriter}; use levels::{calculate_array_levels, LevelInfo}; mod byte_array; @@ -292,16 +293,21 @@ fn write_leaves( } col_writer.close() } - ArrowDataType::List(_) | ArrowDataType::LargeList(_) => { + ArrowDataType::List(_) => { let arrays: Vec<_> = arrays.iter().map(|array|{ - // write the child list - let data = array.data(); - arrow_array::make_array(data.child_data()[0].clone()) + array.as_list::().values().clone() }).collect(); write_leaves(row_group_writer, &arrays, levels)?; Ok(()) } + ArrowDataType::LargeList(_) => { + let arrays: Vec<_> = arrays.iter().map(|array|{ + array.as_list::().values().clone() + }).collect(); + write_leaves(row_group_writer, &arrays, levels)?; + Ok(()) + } ArrowDataType::Struct(fields) => { // Groups child arrays by field let mut field_arrays = vec![Vec::with_capacity(arrays.len()); fields.len()]; @@ -384,19 +390,15 @@ fn write_leaf( let array = arrow_cast::cast(column, &ArrowDataType::Date32)?; let array = arrow_cast::cast(&array, &ArrowDataType::Int32)?; - let array = array - .as_any() - .downcast_ref::() - .expect("Unable to get int32 array"); + let array = array.as_primitive::(); write_primitive(typed, array.values(), levels)? } ArrowDataType::UInt32 => { - let data = column.data(); - let offset = data.offset(); + let values = column.as_primitive::().values(); // follow C++ implementation and use overflow/reinterpret cast from u32 to i32 which will map // `(i32::MAX as u32)..u32::MAX` to `i32::MIN..0` - let array: &[i32] = data.buffers()[0].typed_data(); - write_primitive(typed, &array[offset..offset + data.len()], levels)? + let array = values.inner().typed_data::(); + write_primitive(typed, array, levels)? } ArrowDataType::Decimal128(_, _) => { // use the int32 to represent the decimal with low precision @@ -407,19 +409,13 @@ fn write_leaf( } _ => { let array = arrow_cast::cast(column, &ArrowDataType::Int32)?; - let array = array - .as_any() - .downcast_ref::() - .expect("Unable to get i32 array"); + let array = array.as_primitive::(); write_primitive(typed, array.values(), levels)? } } } ColumnWriter::BoolColumnWriter(ref mut typed) => { - let array = column - .as_any() - .downcast_ref::() - .expect("Unable to get boolean array"); + let array = column.as_boolean(); typed.write_batch( get_bool_array_slice(array, indices).as_slice(), levels.def_levels(), @@ -429,19 +425,15 @@ fn write_leaf( ColumnWriter::Int64ColumnWriter(ref mut typed) => { match column.data_type() { ArrowDataType::Int64 => { - let array = column - .as_any() - .downcast_ref::() - .expect("Unable to get i64 array"); + let array = column.as_primitive::(); write_primitive(typed, array.values(), levels)? } ArrowDataType::UInt64 => { + let values = column.as_primitive::().values(); // follow C++ implementation and use overflow/reinterpret cast from u64 to i64 which will map // `(i64::MAX as u64)..u64::MAX` to `i64::MIN..0` - let data = column.data(); - let offset = data.offset(); - let array: &[i64] = data.buffers()[0].typed_data(); - write_primitive(typed, &array[offset..offset + data.len()], levels)? + let array = values.inner().typed_data::(); + write_primitive(typed, array, levels)? } ArrowDataType::Decimal128(_, _) => { // use the int64 to represent the decimal with low precision @@ -452,10 +444,7 @@ fn write_leaf( } _ => { let array = arrow_cast::cast(column, &ArrowDataType::Int64)?; - let array = array - .as_any() - .downcast_ref::() - .expect("Unable to get i64 array"); + let array = array.as_primitive::(); write_primitive(typed, array.values(), levels)? } } @@ -642,6 +631,7 @@ mod tests { use arrow_schema::Fields; use crate::basic::Encoding; + use crate::data_type::AsBytes; use crate::file::metadata::ParquetMetaData; use crate::file::page_index::index_reader::read_pages_locations; use crate::file::properties::{ReaderProperties, WriterVersion}; @@ -723,8 +713,8 @@ mod tests { assert_eq!(expected_batch.num_columns(), actual_batch.num_columns()); assert_eq!(expected_batch.num_rows(), actual_batch.num_rows()); for i in 0..expected_batch.num_columns() { - let expected_data = expected_batch.column(i).data().clone(); - let actual_data = actual_batch.column(i).data().clone(); + let expected_data = expected_batch.column(i).to_data(); + let actual_data = actual_batch.column(i).to_data(); assert_eq!(expected_data, actual_data); } @@ -779,7 +769,7 @@ mod tests { // build a record batch let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap(); - assert_eq!(batch.column(0).data().null_count(), 1); + assert_eq!(batch.column(0).null_count(), 1); // This test fails if the max row group size is less than the batch's length // see https://github.com/apache/arrow-rs/issues/518 @@ -821,7 +811,7 @@ mod tests { // This test fails if the max row group size is less than the batch's length // see https://github.com/apache/arrow-rs/issues/518 - assert_eq!(batch.column(0).data().null_count(), 0); + assert_eq!(batch.column(0).null_count(), 0); roundtrip(batch, None); } @@ -928,7 +918,7 @@ mod tests { let g_list_data = ArrayData::builder(struct_field_g.data_type().clone()) .len(5) .add_buffer(g_value_offsets.clone()) - .add_child_data(g_value.data().clone()) + .add_child_data(g_value.to_data()) .build() .unwrap(); let g = ListArray::from(g_list_data); @@ -936,7 +926,7 @@ mod tests { let h_list_data = ArrayData::builder(struct_field_h.data_type().clone()) .len(5) .add_buffer(g_value_offsets) - .add_child_data(g_value.data().clone()) + .add_child_data(g_value.to_data()) .null_bit_buffer(Some(Buffer::from(vec![0b00011011]))) .build() .unwrap(); @@ -1251,9 +1241,9 @@ mod tests { assert_eq!(expected_batch.num_columns(), actual_batch.num_columns()); assert_eq!(expected_batch.num_rows(), actual_batch.num_rows()); for i in 0..expected_batch.num_columns() { - let expected_data = expected_batch.column(i).data(); - let actual_data = actual_batch.column(i).data(); - validate(expected_data, actual_data); + let expected_data = expected_batch.column(i).to_data(); + let actual_data = actual_batch.column(i).to_data(); + validate(&expected_data, &actual_data); } file diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs index 23ebea57b5b2..529c28872642 100644 --- a/parquet/src/arrow/buffer/dictionary_buffer.rs +++ b/parquet/src/arrow/buffer/dictionary_buffer.rs @@ -107,7 +107,8 @@ impl Self::Values { values } => Ok(values), Self::Dict { keys, values } => { let mut spilled = OffsetBuffer::default(); - let dict_buffers = values.data().buffers(); + let data = values.to_data(); + let dict_buffers = data.buffers(); let dict_offsets = dict_buffers[0].typed_data::(); let dict_values = dict_buffers[1].as_slice();