diff --git a/CHANGELOG.md b/CHANGELOG.md index aaf80832..76b602e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `ArraySubset::{extract_elements/extract_elements_unchecked}` and `ArrayExtractElementsError` +### Changed + - Add `ArraySubset::{overlap,overlap_unchecked}` and `ArraySubset::{relative_to,relative_to_unchecked}` + - These replace `ArraySubset::{in_subset,in_subset_unchecked}`, which are now deprecated + ### Fixed - Fix `cargo test` with `async` crate feature disabled diff --git a/src/array/array_async.rs b/src/array/array_async.rs index 88cf37c4..c8542cbe 100644 --- a/src/array/array_async.rs +++ b/src/array/array_async.rs @@ -197,8 +197,9 @@ impl Array { }; // Decode the subset of the chunk which intersects array_subset + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) }; let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) }; + unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) }; let decoded_bytes = self .async_retrieve_chunk_subset(chunk_indices, &array_subset_in_chunk_subset) .await?; @@ -206,7 +207,7 @@ impl Array { // Copy decoded bytes to the output let element_size = self.data_type().size() as u64; let chunk_subset_in_array_subset = - unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let mut decoded_offset = 0; for (array_subset_element_index, num_elements) in unsafe { chunk_subset_in_array_subset @@ -309,8 +310,11 @@ impl Array { }; // Decode the subset of the chunk which intersects array_subset + let overlap = unsafe { + array_subset.overlap_unchecked(&chunk_subset_in_array) + }; let array_subset_in_chunk_subset = unsafe { - array_subset.in_subset_unchecked(&chunk_subset_in_array) + overlap.relative_to_unchecked(chunk_subset_in_array.start()) }; let storage_handle = Arc::new(StorageHandle::new(&*self.storage)); @@ -342,9 +346,8 @@ impl Array { // Copy decoded bytes to the output let element_size = self.data_type().size() as u64; - let chunk_subset_in_array_subset = unsafe { - chunk_subset_in_array.in_subset_unchecked(array_subset) - }; + let chunk_subset_in_array_subset = + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let mut decoded_offset = 0; for (array_subset_element_index, num_elements) in unsafe { chunk_subset_in_array_subset @@ -918,8 +921,9 @@ impl // This skips the internal decoding occurring in store_chunk_subset self.async_store_chunk(chunk_indices, subset_bytes).await?; } else { + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) }; let chunk_subset_in_array_subset = - unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let chunk_subset_bytes = unsafe { chunk_subset_in_array_subset.extract_bytes_unchecked( &subset_bytes, @@ -930,8 +934,7 @@ impl // Store the chunk subset let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) }; - + unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) }; self.async_store_chunk_subset( chunk_indices, &array_subset_in_chunk_subset, @@ -948,10 +951,11 @@ impl .subset_unchecked(&chunk_indices, self.shape()) .unwrap() }; + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) }; let chunk_subset_in_array_subset = - unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) }; + unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) }; ( chunk_indices, chunk_subset_in_array_subset, @@ -992,8 +996,9 @@ impl .subset_unchecked(&chunk_indices, self.shape()) .unwrap() }; + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) }; let chunk_subset_in_array_subset = - unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let chunk_subset_bytes = unsafe { chunk_subset_in_array_subset.extract_bytes_unchecked( &subset_bytes, @@ -1002,7 +1007,7 @@ impl ) }; let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) }; + unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) }; self.async_store_chunk_subset( &chunk_indices, &array_subset_in_chunk_subset, diff --git a/src/array/array_sync.rs b/src/array/array_sync.rs index 54fe3c00..e262e253 100644 --- a/src/array/array_sync.rs +++ b/src/array/array_sync.rs @@ -190,15 +190,16 @@ impl Array { }; // Decode the subset of the chunk which intersects array_subset + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) }; let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) }; + unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) }; let decoded_bytes = self.retrieve_chunk_subset(chunk_indices, &array_subset_in_chunk_subset)?; // Copy decoded bytes to the output let element_size = self.data_type().size() as u64; let chunk_subset_in_array_subset = - unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let mut decoded_offset = 0; for (array_subset_element_index, num_elements) in unsafe { chunk_subset_in_array_subset @@ -820,8 +821,9 @@ impl Array Array Array Array { let offset = shard_index[shard_index_index]; let size = shard_index[shard_index_index + 1]; + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) }; let decoded_bytes = if offset == u64::MAX && size == u64::MAX { // The chunk is just the fill value fill_value.repeat(chunk_subset.num_elements_usize()) @@ -189,7 +190,7 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> { &chunk_representation, )?; let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(&chunk_subset) }; + unsafe { overlap.relative_to_unchecked(chunk_subset.start()) }; // Partial decoding is actually really slow with the blosc codec! Assume sharded chunks are small, and just decode the whole thing and extract bytes // TODO: Make this behaviour optional? @@ -208,7 +209,7 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> { // Copy decoded bytes to the output let chunk_subset_in_array_subset = - unsafe { chunk_subset.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let mut decoded_offset = 0; for (array_subset_element_index, num_elements) in unsafe { chunk_subset_in_array_subset @@ -280,8 +281,9 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> { let size = shard_index[shard_index_idx + 1]; // Get the subset of bytes from the chunk which intersect the array + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) }; let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(&chunk_subset) }; + unsafe { overlap.relative_to_unchecked(chunk_subset.start()) }; let decoded_bytes = if offset == u64::MAX && size == u64::MAX { // The chunk is just the fill value @@ -304,7 +306,7 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> { // Copy decoded bytes to the output let chunk_subset_in_array_subset = - unsafe { chunk_subset.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let mut decoded_offset = 0; for (array_subset_element_index, num_elements) in unsafe { chunk_subset_in_array_subset @@ -485,6 +487,7 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> { let offset = shard_index[shard_index_index]; let size = shard_index[shard_index_index + 1]; + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) }; let decoded_bytes = if offset == u64::MAX && size == u64::MAX { // The chunk is just the fill value fill_value.repeat(chunk_subset.num_elements_usize()) @@ -502,7 +505,7 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> { ) .await?; let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(&chunk_subset) }; + unsafe { overlap.relative_to_unchecked(chunk_subset.start()) }; partial_decoder .partial_decode(&[array_subset_in_chunk_subset.clone()]) .await? @@ -511,7 +514,7 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> { // Copy decoded bytes to the output let chunk_subset_in_array_subset = - unsafe { chunk_subset.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let mut decoded_offset = 0; for (array_subset_element_index, num_elements) in unsafe { chunk_subset_in_array_subset @@ -614,8 +617,9 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> { &chunk_representation, ) .await?; + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) }; let array_subset_in_chunk_subset = - unsafe { array_subset.in_subset_unchecked(chunk_subset) }; + unsafe { overlap.relative_to_unchecked(chunk_subset.start()) }; // Partial decoding is actually really slow with the blosc codec! Assume sharded chunks are small, and just decode the whole thing and extract bytes // TODO: Investigate further // let decoded_chunk = partial_decoder @@ -632,7 +636,7 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> { .extract_bytes(&decoded_chunk, chunk_subset.shape(), element_size) .unwrap(); let chunk_subset_in_array_subset = - unsafe { chunk_subset.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; Ok::<_, CodecError>((chunk_subset_in_array_subset, decoded_chunk)) }), ) @@ -678,8 +682,9 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> { // Write filled chunks filled_chunks.par_iter().for_each(|chunk_subset| { + let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) }; let chunk_subset_in_array_subset = - unsafe { chunk_subset.in_subset_unchecked(array_subset) }; + unsafe { overlap.relative_to_unchecked(array_subset.start()) }; let mut data_idx = 0; let element_size = self.decoded_representation.element_size() as u64; let shard_slice = unsafe { shard_slice.get() }; diff --git a/src/array_subset.rs b/src/array_subset.rs index 5e9aebbd..9ab049fc 100644 --- a/src/array_subset.rs +++ b/src/array_subset.rs @@ -421,7 +421,7 @@ impl ArraySubset { array_shape: &[u64], ) -> Vec { debug_assert_eq!(elements.len() as u64, array_shape.iter().product::()); - let num_elements = self.num_elements() as usize; + let num_elements = usize::try_from(self.num_elements()).unwrap(); let mut bytes_subset = vec![core::mem::MaybeUninit::::uninit(); num_elements]; let bytes_subset_slice = unsafe { std::slice::from_raw_parts_mut( @@ -651,6 +651,10 @@ impl ArraySubset { ChunksIterator::new_unchecked(self, chunk_shape) } + #[deprecated( + since = "0.7.2", + note = "please use `overlap` and `relative_to` instead" + )] /// Return the subset of this array subset in `subset_other`. /// The start of the returned array subset is from the start of this array subset. /// @@ -659,6 +663,7 @@ impl ArraySubset { /// Returns [`IncompatibleDimensionalityError`] if the dimensionality of `subset_other` does not match the dimensionality of this array subset. pub fn in_subset(&self, subset_other: &Self) -> Result { if subset_other.dimensionality() == self.dimensionality() { + #[allow(deprecated)] Ok(unsafe { self.in_subset_unchecked(subset_other) }) } else { Err(IncompatibleDimensionalityError::new( @@ -668,6 +673,10 @@ impl ArraySubset { } } + #[deprecated( + since = "0.7.2", + note = "please use `overlap` and `relative_to` instead" + )] /// Return the subset of this array subset in `subset_other`. /// The start of the returned array subset is from the start of this array subset. /// @@ -696,6 +705,82 @@ impl ArraySubset { unsafe { Self::new_with_start_shape_unchecked(starts, shapes) } } + /// Return the overlapping subset between this array subset and `subset_other`. + /// + /// # Errors + /// + /// Returns [`IncompatibleDimensionalityError`] if the dimensionality of `subset_other` does not match the dimensionality of this array subset. + pub fn overlap(&self, subset_other: &Self) -> Result { + if subset_other.dimensionality() == self.dimensionality() { + Ok(unsafe { self.overlap_unchecked(subset_other) }) + } else { + Err(IncompatibleDimensionalityError::new( + subset_other.dimensionality(), + self.dimensionality(), + )) + } + } + + /// Return the overlapping subset between this array subset and `subset_other`. + /// + /// # Safety + /// + /// Panics if the dimensionality of `subset_other` does not match the dimensionality of this array subset. + #[doc(hidden)] + #[must_use] + pub unsafe fn overlap_unchecked(&self, subset_other: &Self) -> Self { + debug_assert_eq!(subset_other.dimensionality(), self.dimensionality()); + let mut starts = Vec::with_capacity(self.start.len()); + let mut shapes = Vec::with_capacity(self.start.len()); + for (start, size, other_start, other_size) in izip!( + &self.start, + &self.shape, + subset_other.start(), + subset_other.shape(), + ) { + let overlap_start = *std::cmp::max(start, other_start); + let overlap_end = std::cmp::min(start + size, other_start + other_size); + let overlap_size = overlap_end - overlap_start; + starts.push(overlap_start); + shapes.push(overlap_size); + } + unsafe { Self::new_with_start_shape_unchecked(starts, shapes) } + } + + /// Return the subset relative to `start`. + /// + /// Creates an array subset starting at [`ArraySubset::start()`] - `start`. + /// + /// # Errors + /// Returns [`IncompatibleDimensionalityError`] if the length of `start` does not match the dimensionality of this array subset. + pub fn relative_to(&self, start: &[u64]) -> Result { + if start.len() == self.dimensionality() { + Ok(unsafe { self.relative_to_unchecked(start) }) + } else { + Err(IncompatibleDimensionalityError::new( + start.len(), + self.dimensionality(), + )) + } + } + + /// Return the subset relative to `start`. + /// + /// Creates an array subset starting at [`ArraySubset::start()`] - `start`. + /// + /// # Safety + /// Panics if the length of `start` does not match the dimensionality of this array subset. + #[must_use] + pub unsafe fn relative_to_unchecked(&self, start: &[u64]) -> Self { + debug_assert_eq!(start.len(), self.dimensionality()); + Self { + start: std::iter::zip(self.start(), start) + .map(|(a, b)| a - b) + .collect::>(), + shape: self.shape().to_vec(), + } + } + /// Returns true if the array subset is within the bounds of `array_shape`. #[must_use] pub fn inbounds(&self, array_shape: &[u64]) -> bool {