Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VZV on FZV #2035

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion utils/zerovec/src/flexzerovec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,5 @@ pub(crate) mod slice;
pub(crate) mod vec;

pub use owned::FlexZeroVecOwned;
pub(crate) use slice::chunk_to_usize;
pub use slice::FlexZeroSlice;
pub use vec::FlexZeroVec;
142 changes: 129 additions & 13 deletions utils/zerovec/src/flexzerovec/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,11 @@ impl FlexZeroSlice {
/// ```
#[inline]
pub fn get(&self, index: usize) -> Option<usize> {
let w = self.get_width();
self.get_chunk(index).map(|chunk| chunk_to_usize(chunk, w))
if index >= self.len() {
None
} else {
Some(unsafe { self.get_unchecked(index) })
}
}

/// Gets the element at `index` as a chunk of bytes, or `None` if `index >= self.len()`.
Expand All @@ -213,10 +216,21 @@ impl FlexZeroSlice {
/// `index` must be in-range.
#[inline]
pub unsafe fn get_unchecked(&self, index: usize) -> usize {
let w = self.get_width();
let mut bytes = [0; USIZE_WIDTH];
core::ptr::copy_nonoverlapping(self.data.as_ptr().add(index * w), bytes.as_mut_ptr(), w);
usize::from_le_bytes(bytes)
match self.width {
1 => *self.data.get_unchecked(index) as usize,
2 => {
let ptr = self.data.as_ptr().add(index * 2);
u16::from_le_bytes(core::ptr::read(ptr as *const [u8; 2])) as usize
}
_ => {
let mut bytes = [0; USIZE_WIDTH];
let w = self.get_width();
assert!(w <= USIZE_WIDTH);
let ptr = self.data.as_ptr().add(index * w);
core::ptr::copy_nonoverlapping(ptr, bytes.as_mut_ptr(), w);
usize::from_le_bytes(bytes)
}
}
}

/// Gets the first element of the slice, or `None` if the slice is empty.
Expand Down Expand Up @@ -249,6 +263,31 @@ impl FlexZeroSlice {
.map(move |chunk| chunk_to_usize(chunk, w))
}

/// Gets an iterator over pairs of elements.
///
/// The second element of the final pair is `None`.
///
/// # Examples
///
/// ```
/// use zerovec::vecs::FlexZeroVec;
///
/// let nums: &[usize] = &[211, 281, 421, 461];
/// let fzv: FlexZeroVec = nums.iter().copied().collect();
///
/// let mut pairs_it = fzv.iter_pairs();
///
/// assert_eq!(pairs_it.next(), Some((211, Some(281))));
/// assert_eq!(pairs_it.next(), Some((281, Some(421))));
/// assert_eq!(pairs_it.next(), Some((421, Some(461))));
/// assert_eq!(pairs_it.next(), Some((461, None)));
/// assert_eq!(pairs_it.next(), None);
/// ```
pub fn iter_pairs(&self) -> impl Iterator<Item = (usize, Option<usize>)> + '_ {
self.iter()
.zip(self.iter().skip(1).map(Some).chain(core::iter::once(None)))
}

/// Creates a `Vec<usize>` from a [`FlexZeroSlice`] (or `FlexZeroVec`).
///
/// # Examples
Expand Down Expand Up @@ -294,7 +333,7 @@ impl FlexZeroSlice {

/// Binary searches a sorted range of a `FlexZeroSlice` for the given `usize` value.
///
/// Indices are returned relative to the start of the range.
/// The indices in the return value are relative to the start of the range.
///
/// # Examples
///
Expand Down Expand Up @@ -347,7 +386,7 @@ impl FlexZeroSlice {

/// Binary searches a sorted range of a `FlexZeroSlice` according to a predicate function.
///
/// Indices are returned relative to the start of the range.
/// The indices in the return value are relative to the start of the range.
#[inline]
pub fn binary_search_in_range_by(
&self,
Expand All @@ -363,22 +402,99 @@ impl FlexZeroSlice {
Some(self.binary_search_impl(predicate, scaled_slice))
}

/// Binary searches a `FlexZeroSlice` by its indices.
///
/// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`.
#[inline]
pub fn binary_search_with_index(
&self,
predicate: impl FnMut(usize) -> Ordering,
) -> Result<usize, usize> {
debug_assert!(self.len() <= self.data.len());
// Safety: self.len() <= self.data.len()
let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) };
self.binary_search_with_index_impl(predicate, scaled_slice)
}

/// Binary searches a range of a `FlexZeroSlice` by its indices.
///
/// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`, which are
/// relative to the start of the entire slice.
///
/// The indices in the return value are relative to the start of the range.
#[inline]
pub fn binary_search_in_range_with_index(
&self,
predicate: impl FnMut(usize) -> Ordering,
range: Range<usize>,
) -> Option<Result<usize, usize>> {
// Note: We need to check bounds separately, since `self.data.get(range)` does not return
// bounds errors, since it is indexing directly into the upscaled data array
if range.start >= self.len() || range.end > self.len() {
return None;
}
let scaled_slice = self.data.get(range)?;
Some(self.binary_search_with_index_impl(predicate, scaled_slice))
}

/// # Safety
///
/// `scaled_slice` must be a subslice of `self.data`
#[inline]
fn binary_search_impl(
&self,
mut predicate: impl FnMut(usize) -> Ordering,
scaled_slice: &[u8],
) -> Result<usize, usize> {
// See comments in components.rs regarding the following code.
self.binary_search_with_index_impl(
|index| {
// Safety: The contract of `binary_search_with_index_impl` says `index` is in bounds
let actual_probe = unsafe { self.get_unchecked(index) };
predicate(actual_probe)
},
scaled_slice,
)
}

/// `predicate` is passed a valid index as an argument.
///
/// # Safety
///
/// `scaled_slice` must be a subslice of `self.data`
fn binary_search_with_index_impl(
&self,
mut predicate: impl FnMut(usize) -> Ordering,
scaled_slice: &[u8],
) -> Result<usize, usize> {
// This code is an absolute atrocity. This code is not a place of honor. This
// code is known to the State of California to cause cancer.
//
// Unfortunately, the stdlib's `binary_search*` functions can only operate on slices.
// We do not have a slice. We have something we can .get() and index on, but that is not
// a slice.
//
// The `binary_search*` functions also do not have a variant where they give you the element's
// index, which we could otherwise use to directly index `self`.
// We do have `self.indices`, but these are indices into a byte buffer, which cannot in
// isolation be used to recoup the logical index of the element they refer to.
//
// However, `binary_search_by()` provides references to the elements of the slice being iterated.
// Since the layout of Rust slices is well-defined, we can do pointer arithmetic on these references
// to obtain the index being used by the search.
//
// It's worth noting that the slice we choose to search is irrelevant, as long as it has the appropriate
// length. `self.indices` is defined to have length `self.len()`, so it is convenient to use
// here and does not require additional allocations.
//
// The alternative to doing this is to implement our own binary search. This is significantly less fun.

// Note: We always use zero_index relative to the whole indices array, even if we are
// only searching a subslice of it.
let zero_index = self.data.as_ptr() as *const _ as usize;
scaled_slice.binary_search_by(|probe: &_| {
// Note: `scaled_slice` is a slice of u8
let index = probe as *const _ as usize - zero_index;
// Safety: we know this is in bounds
let actual_probe = unsafe { self.get_unchecked(index) };
predicate(actual_probe)
predicate(index)
})
}
}
Expand All @@ -390,7 +506,7 @@ impl fmt::Debug for &FlexZeroSlice {
}

#[inline]
fn get_item_width(item_bytes: &[u8; USIZE_WIDTH]) -> usize {
pub(crate) fn get_item_width(item_bytes: &[u8; USIZE_WIDTH]) -> usize {
USIZE_WIDTH - item_bytes.iter().rev().take_while(|b| **b == 0).count()
}

Expand Down
2 changes: 0 additions & 2 deletions utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,6 @@ pub use crate::map2d::map::ZeroMap2d;
pub use crate::varzerovec::{slice::VarZeroSlice, vec::VarZeroVec};
pub use crate::zerovec::{ZeroSlice, ZeroVec};

pub(crate) use flexzerovec::chunk_to_usize;

#[doc(hidden)]
pub mod __zerovec_internal_reexport {
pub use zerofrom::ZeroFrom;
Expand Down
4 changes: 2 additions & 2 deletions utils/zerovec/src/map/vecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ impl<'a> ZeroVecLike<usize> for FlexZeroVec<'a> {

#[inline]
fn zvl_get_as_t<R>(g: &[u8], f: impl FnOnce(&usize) -> R) -> R {
f(&crate::chunk_to_usize(g, g.len()))
f(&crate::flexzerovec::slice::chunk_to_usize(g, g.len()))
}
}

Expand Down Expand Up @@ -576,7 +576,7 @@ impl ZeroVecLike<usize> for FlexZeroSlice {

#[inline]
fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&usize) -> R) -> R {
f(&crate::chunk_to_usize(g, g.len()))
f(&crate::flexzerovec::slice::chunk_to_usize(g, g.len()))
}
}

Expand Down
Loading