Skip to content

Commit

Permalink
Adds ion-tests integration for the lazy reader (#639)
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton authored Sep 7, 2023
1 parent 7583129 commit 700e983
Show file tree
Hide file tree
Showing 19 changed files with 875 additions and 387 deletions.
221 changes: 152 additions & 69 deletions src/lazy/binary/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,106 +552,189 @@ impl<'a> ImmutableBuffer<'a> {

/// Reads a field ID and a value from the buffer.
pub(crate) fn peek_field(self) -> IonResult<Option<LazyRawBinaryValue<'a>>> {
self.peek_value(true)
}
let mut input = self;
if self.is_empty() {
// We're at the end of the struct
return Ok(None);
}
// Read the field ID
let (mut field_id_var_uint, mut input_after_field_id) = input.read_var_uint()?;
if input_after_field_id.is_empty() {
return IonResult::incomplete(
"found field name but no value",
input_after_field_id.offset(),
);
}

let mut type_descriptor = input_after_field_id.peek_type_descriptor()?;
if type_descriptor.is_nop() {
// Read past NOP fields until we find the first one that's an actual value
// or we run out of struct bytes. Note that we read the NOP field(s) from `self` (the
// initial input) rather than `input_after_field_id` because it simplifies
// the logic of `read_struct_field_nop_pad()`, which is very rarely called.
(field_id_var_uint, input_after_field_id) = match input.read_struct_field_nop_pad()? {
None => {
// There are no more fields, we're at the end of the struct.
return Ok(None);
}
Some((nop_length, field_id_var_uint, input_after_field_id)) => {
// Advance `input` beyond the NOP so that when we store it in the value it begins
// with the field ID.
input = input.consume(nop_length);
type_descriptor = input_after_field_id.peek_type_descriptor()?;
(field_id_var_uint, input_after_field_id)
}
};
}

let field_id_length = field_id_var_uint.size_in_bytes() as u8;
let field_id = field_id_var_uint.value();

/// Reads a value from the buffer.
pub(crate) fn peek_value_without_field_id(self) -> IonResult<Option<LazyRawBinaryValue<'a>>> {
self.peek_value(false)
let mut value = input_after_field_id.read_value(type_descriptor)?;
value.encoded_value.field_id = Some(field_id);
value.encoded_value.field_id_length = field_id_length;
value.encoded_value.total_length += field_id_length as usize;
value.input = input;
Ok(Some(value))
}

/// Reads a value from the buffer. If `has_field` is true, it will read a field ID first.
// This method consumes leading NOP bytes, but leaves the header representation in the buffer.
// The resulting LazyRawValue's buffer slice always starts with the first non-NOP byte in the
// header, which can be either a field ID, an annotations wrapper, or a type descriptor.
fn peek_value(self, has_field: bool) -> IonResult<Option<LazyRawBinaryValue<'a>>> {
let initial_input = self;
if initial_input.is_empty() {
return Ok(None);
}
let (field_id, field_id_length, mut input) = if has_field {
let (field_id_var_uint, input_after_field_id) = initial_input.read_var_uint()?;
#[cold]
/// Consumes (field ID, NOP pad) pairs until a non-NOP value is encountered in field position or
/// the buffer is empty. Returns a buffer starting at the field ID before the non-NOP value.
fn read_struct_field_nop_pad(self) -> IonResult<Option<(usize, VarUInt, ImmutableBuffer<'a>)>> {
let mut input_before_field_id = self;
loop {
if input_before_field_id.is_empty() {
return Ok(None);
}
let (field_id_var_uint, input_after_field_id) =
input_before_field_id.read_var_uint()?;
// If we're out of data (i.e. there's no field value) the struct is incomplete.
if input_after_field_id.is_empty() {
return IonResult::incomplete(
"found field name but no value",
"found a field name but no value",
input_after_field_id.offset(),
);
}
let field_id_length =
u8::try_from(field_id_var_uint.size_in_bytes()).map_err(|_| {
IonError::decoding_error("found a field id with length over 255 bytes")
})?;
(
Some(field_id_var_uint.value()),
field_id_length,
input_after_field_id,
)
} else {
(None, 0, initial_input)
};

let mut annotations_header_length = 0u8;
let mut annotations_sequence_length = 0u8;
let mut expected_value_length = None;
// Peek at the next value header. If it's a NOP, we need to repeat the process.
if input_after_field_id.peek_type_descriptor()?.is_nop() {
// Consume the NOP to position the buffer at the beginning of the next field ID.
(_, input_before_field_id) = input_after_field_id.read_nop_pad()?;
} else {
// If it isn't a NOP, return the field ID and the buffer slice containing the field
// value.
let nop_length = input_before_field_id.offset() - self.offset();
return Ok(Some((nop_length, field_id_var_uint, input_after_field_id)));
}
}
}

/// Reads a value without a field name from the buffer. This is applicable in lists, s-expressions,
/// and at the top level.
pub(crate) fn peek_sequence_value(self) -> IonResult<Option<LazyRawBinaryValue<'a>>> {
if self.is_empty() {
return Ok(None);
}
let mut input = self;
let mut type_descriptor = input.peek_type_descriptor()?;
if type_descriptor.is_annotation_wrapper() {
let (wrapper, input_after_annotations) =
input.read_annotations_wrapper(type_descriptor)?;
annotations_header_length = wrapper.header_length;
annotations_sequence_length = wrapper.sequence_length;
expected_value_length = Some(wrapper.expected_value_length);
input = input_after_annotations;
type_descriptor = input.peek_type_descriptor()?;
if type_descriptor.is_annotation_wrapper() {
return IonResult::decoding_error("found an annotations wrapper ");
// If we find a NOP...
if type_descriptor.is_nop() {
// ...skip through NOPs until we found the next non-NOP byte.
(_, input) = self.consume_nop_padding(type_descriptor)?;
// If there is no next byte, we're out of values.
if input.is_empty() {
return Ok(None);
}
} else if type_descriptor.is_nop() {
(_, input) = input.consume_nop_padding(type_descriptor)?;
// Otherwise, there's a value.
type_descriptor = input.peek_type_descriptor()?;
}
Ok(Some(input.read_value(type_descriptor)?))
}

/// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that
/// the next byte (`type_descriptor`) is not a NOP.
fn read_value(self, type_descriptor: TypeDescriptor) -> IonResult<LazyRawBinaryValue<'a>> {
if type_descriptor.is_annotation_wrapper() {
self.read_annotated_value(type_descriptor)
} else {
self.read_value_without_annotations(type_descriptor)
}
}

/// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that
/// the next byte (`type_descriptor`) is neither a NOP nor an annotations wrapper.
fn read_value_without_annotations(
self,
type_descriptor: TypeDescriptor,
) -> IonResult<LazyRawBinaryValue<'a>> {
let input = self;
let header = type_descriptor
.to_header()
.ok_or_else(|| IonError::decoding_error("found a non-value in value position"))?;

let header_offset = input.offset();
let (length, _) = input.consume(1).read_value_length(header)?;
let length_length = u8::try_from(length.size_in_bytes()).map_err(|_e| {
IonError::decoding_error("found a value with a header length field over 255 bytes long")
})?;
let length_length = length.size_in_bytes() as u8;
let value_length = length.value(); // ha
let total_length = field_id_length as usize
+ annotations_header_length as usize
+ 1 // Header byte
+ length_length as usize
+ value_length;

if let Some(expected_value_length) = expected_value_length {
let actual_value_length = 1 + length_length as usize + value_length;
if expected_value_length != actual_value_length {
println!("{} != {}", expected_value_length, actual_value_length);
return IonResult::decoding_error(
"value length did not match length declared by annotations wrapper",
);
}
}
let total_length = 1 // Header byte
+ length_length as usize
+ value_length;

let encoded_value = EncodedValue {
header,
field_id_length,
field_id,
annotations_header_length,
annotations_sequence_length,
// If applicable, these are populated by the caller: `peek_field()`
field_id_length: 0,
field_id: None,
// If applicable, these are populated by the caller: `read_annotated_value()`
annotations_header_length: 0,
annotations_sequence_length: 0,
header_offset,
length_length,
value_length,
total_length,
};
let lazy_value = LazyRawBinaryValue {
encoded_value,
input: initial_input,
// If this value has a field ID or annotations, this will be replaced by the caller.
input: self,
};
Ok(Some(lazy_value))
Ok(lazy_value)
}

/// Reads an annotations wrapper and its associated value from the buffer. The caller must confirm
/// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper.
fn read_annotated_value(
self,
mut type_descriptor: TypeDescriptor,
) -> IonResult<LazyRawBinaryValue<'a>> {
let input = self;
let (wrapper, input_after_annotations) = input.read_annotations_wrapper(type_descriptor)?;
type_descriptor = input_after_annotations.peek_type_descriptor()?;

// Confirm that the next byte begins a value, not a NOP or another annotations wrapper.
if type_descriptor.is_annotation_wrapper() {
return IonResult::decoding_error(
"found an annotations wrapper inside an annotations wrapper",
);
} else if type_descriptor.is_nop() {
return IonResult::decoding_error("found a NOP inside an annotations wrapper");
}

let mut lazy_value =
input_after_annotations.read_value_without_annotations(type_descriptor)?;
if wrapper.expected_value_length != lazy_value.encoded_value.total_length() {
return IonResult::decoding_error(
"value length did not match length declared by annotations wrapper",
);
}

lazy_value.encoded_value.annotations_header_length = wrapper.header_length;
lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length;
lazy_value.encoded_value.total_length += wrapper.header_length as usize;
// Modify the input to include the annotations
lazy_value.input = input;

Ok(lazy_value)
}
}

Expand Down
6 changes: 4 additions & 2 deletions src/lazy/binary/raw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ impl<'data> LazyRawBinaryReader<'data> {
&mut self,
buffer: ImmutableBuffer<'data>,
) -> IonResult<RawStreamItem<'data, BinaryEncoding>> {
let lazy_value = match ImmutableBuffer::peek_value_without_field_id(buffer)? {
let lazy_value = match ImmutableBuffer::peek_sequence_value(buffer)? {
Some(lazy_value) => lazy_value,
None => return Ok(RawStreamItem::EndOfStream),
};
Expand Down Expand Up @@ -142,7 +142,9 @@ impl<'data> DataSource<'data> {
Err(e) => return Err(e),
};

self.buffer = buffer;
// If the value we read doesn't start where we began reading, there was a NOP.
let num_nop_bytes = lazy_value.input.offset() - buffer.offset();
self.buffer = buffer.consume(num_nop_bytes);
self.bytes_to_skip = lazy_value.encoded_value.total_length();
Ok(Some(lazy_value))
}
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/binary/raw/sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ impl<'data> Iterator for RawBinarySequenceIterator<'data> {

fn next(&mut self) -> Option<Self::Item> {
self.source
.try_parse_next(ImmutableBuffer::peek_value_without_field_id)
.try_parse_next(ImmutableBuffer::peek_sequence_value)
.transpose()
}
}
4 changes: 2 additions & 2 deletions src/lazy/binary/raw/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ impl<'data> LazyRawBinaryValue<'data> {
return Ok(RawValueRef::Decimal(Decimal::new(0i32, 0i64)));
}

// Skip the type descriptor
let input = self.input.consume(1);
// Skip the type descriptor and length bytes
let input = ImmutableBuffer::new(self.value_body()?);

let (exponent_var_int, remaining) = input.read_var_int()?;
let coefficient_size_in_bytes =
Expand Down
32 changes: 21 additions & 11 deletions src/lazy/reader.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
use crate::binary::constants::v1_0::IVM;
use crate::element::reader::ElementReader;
use crate::element::Element;
use crate::lazy::any_encoding::AnyEncoding;
use crate::lazy::decoder::LazyDecoder;
use crate::lazy::encoding::BinaryEncoding;
use crate::lazy::system_reader::LazySystemReader;
use crate::lazy::encoding::{BinaryEncoding, TextEncoding};
use crate::lazy::system_reader::{LazySystemAnyReader, LazySystemBinaryReader, LazySystemReader};
use crate::lazy::value::LazyValue;
use crate::result::IonFailure;
use crate::{IonError, IonResult};

/// A binary reader that only reads each value that it visits upon request (that is: lazily).
///
/// Each time [`LazyReader::next`] is called, the reader will advance to the next top-level value
/// Each time [`LazyApplicationReader::next`] is called, the reader will advance to the next top-level value
/// in the input stream. Once positioned on a top-level value, users may visit nested values by
/// calling [`LazyValue::read`] and working with the resulting [`crate::lazy::value_ref::ValueRef`],
/// which may contain either a scalar value or a lazy container that may itself be traversed.
///
/// The values that the reader yields ([`LazyValue`],
/// [`LazyBinarySequence`](crate::lazy::sequence::LazyBinarySequence), and
/// [`LazyBinaryStruct`](crate::lazy::struct::LazyStruct)) are
/// immutable references to the data stream, and remain valid until [`LazyReader::next`] is called
/// immutable references to the data stream, and remain valid until [`LazyApplicationReader::next`] is called
/// again to advance the reader to the next top level value. This means that these references can
/// be stored, read, and re-read as long as the reader remains on the same top-level value.
/// ```
Expand Down Expand Up @@ -55,11 +56,11 @@ use crate::{IonError, IonResult};
///# Ok(())
///# }
/// ```
pub struct LazyReader<'data, D: LazyDecoder<'data>> {
pub struct LazyApplicationReader<'data, D: LazyDecoder<'data>> {
system_reader: LazySystemReader<'data, D>,
}

impl<'data, D: LazyDecoder<'data>> LazyReader<'data, D> {
impl<'data, D: LazyDecoder<'data>> LazyApplicationReader<'data, D> {
/// Returns the next top-level value in the input stream as `Ok(Some(lazy_value))`.
/// If there are no more top-level values in the stream, returns `Ok(None)`.
/// If the next value is incomplete (that is: only part of it is in the input buffer) or if the
Expand All @@ -75,7 +76,16 @@ impl<'data, D: LazyDecoder<'data>> LazyReader<'data, D> {
}
}

pub type LazyBinaryReader<'data> = LazyReader<'data, BinaryEncoding>;
pub type LazyBinaryReader<'data> = LazyApplicationReader<'data, BinaryEncoding>;
pub type LazyTextReader<'data> = LazyApplicationReader<'data, TextEncoding>;
pub type LazyReader<'data> = LazyApplicationReader<'data, AnyEncoding>;

impl<'data> LazyReader<'data> {
pub fn new(ion_data: &'data [u8]) -> LazyReader<'data> {
let system_reader = LazySystemAnyReader::new(ion_data);
LazyApplicationReader { system_reader }
}
}

impl<'data> LazyBinaryReader<'data> {
pub fn new(ion_data: &'data [u8]) -> IonResult<LazyBinaryReader<'data>> {
Expand All @@ -85,13 +95,13 @@ impl<'data> LazyBinaryReader<'data> {
return IonResult::decoding_error("input does not begin with an Ion version marker");
}

let system_reader = LazySystemReader::new(ion_data);
Ok(LazyReader { system_reader })
let system_reader = LazySystemBinaryReader::new(ion_data);
Ok(LazyApplicationReader { system_reader })
}
}

pub struct LazyElementIterator<'iter, 'data, D: LazyDecoder<'data>> {
lazy_reader: &'iter mut LazyReader<'data, D>,
lazy_reader: &'iter mut LazyApplicationReader<'data, D>,
}

impl<'iter, 'data, D: LazyDecoder<'data>> Iterator for LazyElementIterator<'iter, 'data, D> {
Expand All @@ -106,7 +116,7 @@ impl<'iter, 'data, D: LazyDecoder<'data>> Iterator for LazyElementIterator<'iter
}
}

impl<'data, D: LazyDecoder<'data>> ElementReader for LazyReader<'data, D> {
impl<'data, D: LazyDecoder<'data>> ElementReader for LazyApplicationReader<'data, D> {
type ElementIterator<'a> = LazyElementIterator<'a, 'data, D> where Self: 'a,;

fn read_next_element(&mut self) -> IonResult<Option<Element>> {
Expand Down
Loading

0 comments on commit 700e983

Please sign in to comment.