Skip to content

Commit

Permalink
Adds LazyRawTextReader support for reading lists (#617)
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton authored Aug 23, 2023
1 parent dc8579d commit cb1042a
Show file tree
Hide file tree
Showing 10 changed files with 223 additions and 43 deletions.
18 changes: 9 additions & 9 deletions src/lazy/binary/raw/sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ impl<'data> LazyRawBinarySequence<'data> {
self.value.ion_type()
}

pub fn iter(&self) -> RawSequenceIterator<'data> {
pub fn iter(&self) -> RawBinarySequenceIterator<'data> {
// Get as much of the sequence's body as is available in the input buffer.
// Reading a child value may fail as `Incomplete`
let buffer_slice = self.value.available_body();
RawSequenceIterator::new(buffer_slice)
RawBinarySequenceIterator::new(buffer_slice)
}
}

Expand All @@ -33,7 +33,7 @@ impl<'data> LazyContainerPrivate<'data, BinaryEncoding> for LazyRawBinarySequenc
}

impl<'data> LazyRawSequence<'data, BinaryEncoding> for LazyRawBinarySequence<'data> {
type Iterator = RawSequenceIterator<'data>;
type Iterator = RawBinarySequenceIterator<'data>;

fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> {
self.value.annotations()
Expand All @@ -54,7 +54,7 @@ impl<'data> LazyRawSequence<'data, BinaryEncoding> for LazyRawBinarySequence<'da

impl<'a, 'data> IntoIterator for &'a LazyRawBinarySequence<'data> {
type Item = IonResult<LazyRawBinaryValue<'data>>;
type IntoIter = RawSequenceIterator<'data>;
type IntoIter = RawBinarySequenceIterator<'data>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
Expand Down Expand Up @@ -99,19 +99,19 @@ impl<'a> Debug for LazyRawBinarySequence<'a> {
}
}

pub struct RawSequenceIterator<'data> {
pub struct RawBinarySequenceIterator<'data> {
source: DataSource<'data>,
}

impl<'data> RawSequenceIterator<'data> {
pub(crate) fn new(input: ImmutableBuffer<'data>) -> RawSequenceIterator<'data> {
RawSequenceIterator {
impl<'data> RawBinarySequenceIterator<'data> {
pub(crate) fn new(input: ImmutableBuffer<'data>) -> RawBinarySequenceIterator<'data> {
RawBinarySequenceIterator {
source: DataSource::new(input),
}
}
}

impl<'data> Iterator for RawSequenceIterator<'data> {
impl<'data> Iterator for RawBinarySequenceIterator<'data> {
type Item = IonResult<LazyRawBinaryValue<'data>>;

fn next(&mut self) -> Option<Self::Item> {
Expand Down
35 changes: 4 additions & 31 deletions src/lazy/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ use crate::lazy::binary::raw::reader::LazyRawBinaryReader;
use crate::lazy::binary::raw::sequence::LazyRawBinarySequence;
use crate::lazy::binary::raw::value::LazyRawBinaryValue;
use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate};
use crate::lazy::decoder::{LazyDecoder, LazyRawField, LazyRawSequence, LazyRawStruct};
use crate::lazy::decoder::{LazyDecoder, LazyRawField, LazyRawStruct};
use crate::lazy::raw_value_ref::RawValueRef;
use crate::lazy::text::raw::reader::LazyRawTextReader;
use crate::lazy::text::raw::sequence::LazyRawTextSequence;
use crate::lazy::text::value::LazyRawTextValue;
use crate::{IonResult, IonType, RawSymbolTokenRef};
use crate::{IonResult, RawSymbolTokenRef};
use std::marker::PhantomData;

// These types derive trait implementations in order to allow types that containing them
Expand All @@ -33,34 +34,6 @@ impl<'data> LazyDecoder<'data> for BinaryEncoding {
// === Placeholders ===
// The types below will need to be properly defined in order for the lazy text reader to be complete.
// The exist to satisfy various trait definitions.
#[derive(Debug, Clone)]
pub struct ToDoTextSequence;

impl<'data> LazyContainerPrivate<'data, TextEncoding> for ToDoTextSequence {
fn from_value(_value: LazyRawTextValue<'data>) -> Self {
todo!()
}
}

impl<'data> LazyRawSequence<'data, TextEncoding> for ToDoTextSequence {
type Iterator = Box<dyn Iterator<Item = IonResult<LazyRawTextValue<'data>>>>;

fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> {
todo!()
}

fn ion_type(&self) -> IonType {
todo!()
}

fn iter(&self) -> Self::Iterator {
todo!()
}

fn as_value(&self) -> &<TextEncoding as LazyDecoder<'data>>::Value {
todo!()
}
}

#[derive(Debug, Clone)]
pub struct ToDoTextStruct;
Expand Down Expand Up @@ -127,7 +100,7 @@ impl<'data> Iterator for ToDoTextAnnotationsIterator<'data> {
impl<'data> LazyDecoder<'data> for TextEncoding {
type Reader = LazyRawTextReader<'data>;
type Value = LazyRawTextValue<'data>;
type Sequence = ToDoTextSequence;
type Sequence = LazyRawTextSequence<'data>;
type Struct = ToDoTextStruct;
type AnnotationsIterator = ToDoTextAnnotationsIterator<'data>;
}
37 changes: 37 additions & 0 deletions src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,37 @@ impl<'data> TextBufferView<'data> {
))(self)
}

/// Matches a single value in a list OR the end of the list, allowing for leading whitespace
/// and comments in either case.
///
/// If a value is found, returns `Ok(Some(value))`. If the end of the list is found, returns
/// `Ok(None)`.
pub fn match_list_value(self) -> IonParseResult<'data, Option<LazyRawTextValue<'data>>> {
preceded(
// Some amount of whitespace/comments...
Self::match_optional_comments_and_whitespace,
// ...followed by either the end of the list...
alt((
value(None, tag("]")),
// ...or a value...
terminated(
Self::match_value.map(Some),
// ...followed by a comma or end-of-list
Self::match_delimiter_after_list_value,
),
)),
)(self)
}

/// Matches syntax that is expected to follow a value in a list: any amount of whitespace and/or
/// comments followed by either a comma (consumed) or an end-of-list `]` (not consumed).
fn match_delimiter_after_list_value(self) -> IonMatchResult<'data> {
preceded(
Self::match_optional_comments_and_whitespace,
alt((tag(","), peek(tag("]")))),
)(self)
}

/// Matches a single top-level scalar value, the beginning of a container, or an IVM.
pub fn match_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
let (remaining, value) = match self.match_value() {
Expand Down Expand Up @@ -285,6 +316,12 @@ impl<'data> TextBufferView<'data> {
)
},
),
map(
match_and_length(tag("[")),
|(_matched_list_start, length)| {
EncodedTextValue::new(MatchedValue::List, self.offset(), length)
},
),
// TODO: The other Ion types
))
.map(|encoded_value| LazyRawTextValue {
Expand Down
3 changes: 2 additions & 1 deletion src/lazy/text/encoded_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::ops::Range;
/// Each [`LazyRawTextValue`](crate::lazy::text::value::LazyRawTextValue) contains an `EncodedValue`,
/// allowing a user to re-read (that is: parse) the body of the value as many times as necessary
/// without re-parsing its header information each time.
#[derive(Clone, Copy, Debug, PartialEq)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct EncodedTextValue {
// Each encoded text value has up to three components, appearing in the following order:
//
Expand Down Expand Up @@ -117,6 +117,7 @@ impl EncodedTextValue {
MatchedValue::Float(_) => IonType::Float,
MatchedValue::String(_) => IonType::String,
MatchedValue::Symbol(_) => IonType::Symbol,
MatchedValue::List => IonType::List,
}
}

Expand Down
1 change: 1 addition & 0 deletions src/lazy/text/matched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pub(crate) enum MatchedValue {
Float(MatchedFloat),
String(MatchedString),
Symbol(MatchedSymbol),
List,
// TODO: ...the other types
}

Expand Down
20 changes: 20 additions & 0 deletions src/lazy/text/parse_result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,26 @@ impl<'data> ParseError<TextBufferView<'data>> for IonParseError<'data> {
}
}

/// `Result<Option<T>, _>` has a method called `transpose` that converts it into an `Option<Result<T, _>>`,
/// allowing it to be easily used in places like iterators that expect that return type.
/// This trait defines a similar extension method for `Result<(TextBufferView, Option<T>)>`.
pub(crate) trait ToIteratorOutput<'data, T> {
fn transpose(self) -> Option<IonResult<T>>;
}

impl<'data, T> ToIteratorOutput<'data, T> for IonResult<(TextBufferView<'data>, Option<T>)> {
fn transpose(self) -> Option<IonResult<T>> {
match self {
Ok((_remaining, Some(value))) => Some(Ok(value)),
Ok((_remaining, None)) => None,
Err(e) => Some(Err(e)),
}
}
}

/// Converts the output of a text Ion parser (any of `IonParseResult`, `IonParseError`,
/// or `nom::Err<IonParseError>`) into a general-purpose `IonResult`. If the implementing type
/// does not have its own `label` and `input`, the specified values will be used.
pub(crate) trait AddContext<'data, T> {
fn with_context(
self,
Expand Down
1 change: 1 addition & 0 deletions src/lazy/text/raw/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod reader;
pub mod sequence;
17 changes: 17 additions & 0 deletions src/lazy/text/raw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,16 @@ mod tests {
$0
$10
$733
[
// First item
1,
// Second item
2 /*comment before comma*/,
// Third item
3
]
"#,
);

Expand Down Expand Up @@ -253,6 +263,13 @@ mod tests {
RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(733)),
);

let list = reader.next()?.expect_value()?.read()?.expect_list()?;
let mut sum = 0;
for value in &list {
sum += value?.read()?.expect_i64()?;
}
assert_eq!(sum, 6);

Ok(())
}
}
126 changes: 126 additions & 0 deletions src/lazy/text/raw/sequence.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use crate::lazy::decoder::private::LazyContainerPrivate;
use crate::lazy::decoder::{LazyDecoder, LazyRawSequence, LazyRawValue};
use crate::lazy::encoding::TextEncoding;
use crate::lazy::text::buffer::TextBufferView;
use crate::lazy::text::parse_result::AddContext;
use crate::lazy::text::parse_result::ToIteratorOutput;
use crate::lazy::text::value::LazyRawTextValue;
use crate::{IonResult, IonType};
use std::fmt;
use std::fmt::{Debug, Formatter};

#[derive(Copy, Clone)]
pub struct LazyRawTextSequence<'data> {
pub(crate) value: LazyRawTextValue<'data>,
}

impl<'data> LazyRawTextSequence<'data> {
pub fn ion_type(&self) -> IonType {
self.value.ion_type()
}

pub fn iter(&self) -> RawTextSequenceIterator<'data> {
// Make an iterator over the input bytes that follow the initial `[`
RawTextSequenceIterator::new(self.value.input.slice_to_end(1))
}
}

impl<'data> LazyContainerPrivate<'data, TextEncoding> for LazyRawTextSequence<'data> {
fn from_value(value: LazyRawTextValue<'data>) -> Self {
LazyRawTextSequence { value }
}
}

impl<'data> LazyRawSequence<'data, TextEncoding> for LazyRawTextSequence<'data> {
type Iterator = RawTextSequenceIterator<'data>;

fn annotations(&self) -> <TextEncoding as LazyDecoder<'data>>::AnnotationsIterator {
todo!("lazy sequence annotations")
}

fn ion_type(&self) -> IonType {
self.value.ion_type()
}

fn iter(&self) -> Self::Iterator {
LazyRawTextSequence::iter(self)
}

fn as_value(&self) -> &LazyRawTextValue<'data> {
&self.value
}
}

impl<'a, 'data> IntoIterator for &'a LazyRawTextSequence<'data> {
type Item = IonResult<LazyRawTextValue<'data>>;
type IntoIter = RawTextSequenceIterator<'data>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a> Debug for LazyRawTextSequence<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self.value.encoded_value.ion_type() {
IonType::SExp => {
write!(f, "(")?;
for value in self {
write!(
f,
"{:?} ",
value
.map_err(|_| fmt::Error)?
.read()
.map_err(|_| fmt::Error)?
)?;
}
write!(f, ")").unwrap();
}
IonType::List => {
write!(f, "[")?;
for value in self {
write!(
f,
"{:?},",
value
.map_err(|_| fmt::Error)?
.read()
.map_err(|_| fmt::Error)?
)?;
}
write!(f, "]").unwrap();
}
_ => unreachable!("LazyRawSequence is only created for list and sexp"),
}

Ok(())
}
}

pub struct RawTextSequenceIterator<'data> {
input: TextBufferView<'data>,
}

impl<'data> RawTextSequenceIterator<'data> {
pub(crate) fn new(input: TextBufferView<'data>) -> RawTextSequenceIterator<'data> {
RawTextSequenceIterator { input }
}
}

impl<'data> Iterator for RawTextSequenceIterator<'data> {
type Item = IonResult<LazyRawTextValue<'data>>;

fn next(&mut self) -> Option<Self::Item> {
match self.input.match_list_value() {
Ok((remaining, Some(value))) => {
self.input = remaining;
Some(Ok(value))
}
Ok((_remaining, None)) => None,
Err(e) => e
.with_context("reading the next list value", self.input)
.transpose(),
}
}
}
Loading

0 comments on commit cb1042a

Please sign in to comment.