Skip to content

Commit

Permalink
Add ArraySubset::{overlap,relative_to} and deprecate `ArraySubset::…
Browse files Browse the repository at this point in the history
…in_subset`
  • Loading branch information
LDeakin committed Dec 17, 2023
1 parent 1c6e870 commit c8f2755
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 29 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- `ArraySubset::{extract_elements/extract_elements_unchecked}` and `ArrayExtractElementsError`

### Changed
- Add `ArraySubset::{overlap,overlap_unchecked}` and `ArraySubset::{relative_to,relative_to_unchecked}`
- These replace `ArraySubset::{in_subset,in_subset_unchecked}`, which are now deprecated

### Fixed
- Fix `cargo test` with `async` crate feature disabled

Expand Down
31 changes: 18 additions & 13 deletions src/array/array_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,16 +197,17 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits> Array<TStorage> {
};

// Decode the subset of the chunk which intersects array_subset
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) };
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) };
unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) };
let decoded_bytes = self
.async_retrieve_chunk_subset(chunk_indices, &array_subset_in_chunk_subset)
.await?;

// Copy decoded bytes to the output
let element_size = self.data_type().size() as u64;
let chunk_subset_in_array_subset =
unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let mut decoded_offset = 0;
for (array_subset_element_index, num_elements) in unsafe {
chunk_subset_in_array_subset
Expand Down Expand Up @@ -309,8 +310,11 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits> Array<TStorage> {
};

// Decode the subset of the chunk which intersects array_subset
let overlap = unsafe {
array_subset.overlap_unchecked(&chunk_subset_in_array)
};
let array_subset_in_chunk_subset = unsafe {
array_subset.in_subset_unchecked(&chunk_subset_in_array)
overlap.relative_to_unchecked(chunk_subset_in_array.start())
};

let storage_handle = Arc::new(StorageHandle::new(&*self.storage));
Expand Down Expand Up @@ -342,9 +346,8 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits> Array<TStorage> {

// Copy decoded bytes to the output
let element_size = self.data_type().size() as u64;
let chunk_subset_in_array_subset = unsafe {
chunk_subset_in_array.in_subset_unchecked(array_subset)
};
let chunk_subset_in_array_subset =
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let mut decoded_offset = 0;
for (array_subset_element_index, num_elements) in unsafe {
chunk_subset_in_array_subset
Expand Down Expand Up @@ -918,8 +921,9 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits + AsyncWritableStorageTraits>
// This skips the internal decoding occurring in store_chunk_subset
self.async_store_chunk(chunk_indices, subset_bytes).await?;
} else {
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) };
let chunk_subset_in_array_subset =
unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let chunk_subset_bytes = unsafe {
chunk_subset_in_array_subset.extract_bytes_unchecked(
&subset_bytes,
Expand All @@ -930,8 +934,7 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits + AsyncWritableStorageTraits>

// Store the chunk subset
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) };

unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) };
self.async_store_chunk_subset(
chunk_indices,
&array_subset_in_chunk_subset,
Expand All @@ -948,10 +951,11 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits + AsyncWritableStorageTraits>
.subset_unchecked(&chunk_indices, self.shape())
.unwrap()
};
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) };
let chunk_subset_in_array_subset =
unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) };
unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) };
(
chunk_indices,
chunk_subset_in_array_subset,
Expand Down Expand Up @@ -992,8 +996,9 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits + AsyncWritableStorageTraits>
.subset_unchecked(&chunk_indices, self.shape())
.unwrap()
};
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) };
let chunk_subset_in_array_subset =
unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let chunk_subset_bytes = unsafe {
chunk_subset_in_array_subset.extract_bytes_unchecked(
&subset_bytes,
Expand All @@ -1002,7 +1007,7 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits + AsyncWritableStorageTraits>
)
};
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) };
unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) };
self.async_store_chunk_subset(
&chunk_indices,
&array_subset_in_chunk_subset,
Expand Down
15 changes: 9 additions & 6 deletions src/array/array_sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,16 @@ impl<TStorage: ?Sized + ReadableStorageTraits> Array<TStorage> {
};

// Decode the subset of the chunk which intersects array_subset
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) };
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) };
unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) };
let decoded_bytes =
self.retrieve_chunk_subset(chunk_indices, &array_subset_in_chunk_subset)?;

// Copy decoded bytes to the output
let element_size = self.data_type().size() as u64;
let chunk_subset_in_array_subset =
unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let mut decoded_offset = 0;
for (array_subset_element_index, num_elements) in unsafe {
chunk_subset_in_array_subset
Expand Down Expand Up @@ -820,8 +821,9 @@ impl<TStorage: ?Sized + ReadableStorageTraits + WritableStorageTraits> Array<TSt
// This skips the internal decoding occurring in store_chunk_subset
self.store_chunk(chunk_indices, subset_bytes)?;
} else {
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) };
let chunk_subset_in_array_subset =
unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let chunk_subset_bytes = unsafe {
chunk_subset_in_array_subset.extract_bytes_unchecked(
&subset_bytes,
Expand All @@ -832,7 +834,7 @@ impl<TStorage: ?Sized + ReadableStorageTraits + WritableStorageTraits> Array<TSt

// Store the chunk subset
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) };
unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) };

self.store_chunk_subset(
chunk_indices,
Expand All @@ -847,8 +849,9 @@ impl<TStorage: ?Sized + ReadableStorageTraits + WritableStorageTraits> Array<TSt
.subset_unchecked(&chunk_indices, self.shape())
.unwrap()
};
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset_in_array) };
let chunk_subset_in_array_subset =
unsafe { chunk_subset_in_array.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let chunk_subset_bytes = unsafe {
chunk_subset_in_array_subset.extract_bytes_unchecked(
&subset_bytes,
Expand All @@ -859,7 +862,7 @@ impl<TStorage: ?Sized + ReadableStorageTraits + WritableStorageTraits> Array<TSt

// Store the chunk subset
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset_in_array) };
unsafe { overlap.relative_to_unchecked(chunk_subset_in_array.start()) };

self.store_chunk_subset(
&chunk_indices,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> {
let offset = shard_index[shard_index_index];
let size = shard_index[shard_index_index + 1];

let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) };
let decoded_bytes = if offset == u64::MAX && size == u64::MAX {
// The chunk is just the fill value
fill_value.repeat(chunk_subset.num_elements_usize())
Expand All @@ -189,7 +190,7 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> {
&chunk_representation,
)?;
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset) };
unsafe { overlap.relative_to_unchecked(chunk_subset.start()) };

// Partial decoding is actually really slow with the blosc codec! Assume sharded chunks are small, and just decode the whole thing and extract bytes
// TODO: Make this behaviour optional?
Expand All @@ -208,7 +209,7 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> {

// Copy decoded bytes to the output
let chunk_subset_in_array_subset =
unsafe { chunk_subset.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let mut decoded_offset = 0;
for (array_subset_element_index, num_elements) in unsafe {
chunk_subset_in_array_subset
Expand Down Expand Up @@ -280,8 +281,9 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> {
let size = shard_index[shard_index_idx + 1];

// Get the subset of bytes from the chunk which intersect the array
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) };
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset) };
unsafe { overlap.relative_to_unchecked(chunk_subset.start()) };

let decoded_bytes = if offset == u64::MAX && size == u64::MAX {
// The chunk is just the fill value
Expand All @@ -304,7 +306,7 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder<'_> {

// Copy decoded bytes to the output
let chunk_subset_in_array_subset =
unsafe { chunk_subset.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let mut decoded_offset = 0;
for (array_subset_element_index, num_elements) in unsafe {
chunk_subset_in_array_subset
Expand Down Expand Up @@ -485,6 +487,7 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> {
let offset = shard_index[shard_index_index];
let size = shard_index[shard_index_index + 1];

let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) };
let decoded_bytes = if offset == u64::MAX && size == u64::MAX {
// The chunk is just the fill value
fill_value.repeat(chunk_subset.num_elements_usize())
Expand All @@ -502,7 +505,7 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> {
)
.await?;
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(&chunk_subset) };
unsafe { overlap.relative_to_unchecked(chunk_subset.start()) };
partial_decoder
.partial_decode(&[array_subset_in_chunk_subset.clone()])
.await?
Expand All @@ -511,7 +514,7 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> {

// Copy decoded bytes to the output
let chunk_subset_in_array_subset =
unsafe { chunk_subset.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let mut decoded_offset = 0;
for (array_subset_element_index, num_elements) in unsafe {
chunk_subset_in_array_subset
Expand Down Expand Up @@ -614,8 +617,9 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> {
&chunk_representation,
)
.await?;
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) };
let array_subset_in_chunk_subset =
unsafe { array_subset.in_subset_unchecked(chunk_subset) };
unsafe { overlap.relative_to_unchecked(chunk_subset.start()) };
// Partial decoding is actually really slow with the blosc codec! Assume sharded chunks are small, and just decode the whole thing and extract bytes
// TODO: Investigate further
// let decoded_chunk = partial_decoder
Expand All @@ -632,7 +636,7 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> {
.extract_bytes(&decoded_chunk, chunk_subset.shape(), element_size)
.unwrap();
let chunk_subset_in_array_subset =
unsafe { chunk_subset.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
Ok::<_, CodecError>((chunk_subset_in_array_subset, decoded_chunk))
}),
)
Expand Down Expand Up @@ -678,8 +682,9 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder<'_> {

// Write filled chunks
filled_chunks.par_iter().for_each(|chunk_subset| {
let overlap = unsafe { array_subset.overlap_unchecked(&chunk_subset) };
let chunk_subset_in_array_subset =
unsafe { chunk_subset.in_subset_unchecked(array_subset) };
unsafe { overlap.relative_to_unchecked(array_subset.start()) };
let mut data_idx = 0;
let element_size = self.decoded_representation.element_size() as u64;
let shard_slice = unsafe { shard_slice.get() };
Expand Down
Loading

0 comments on commit c8f2755

Please sign in to comment.