diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs
index 3778318d..59c2bb60 100644
--- a/src/lazy/binary/immutable_buffer.rs
+++ b/src/lazy/binary/immutable_buffer.rs
@@ -552,96 +552,142 @@ impl<'a> ImmutableBuffer<'a> {
/// Reads a field ID and a value from the buffer.
pub(crate) fn peek_field(self) -> IonResult>> {
- self.peek_value(true)
- }
+ let mut input = self;
+ if self.is_empty() {
+ // We're at the end of the struct
+ return Ok(None);
+ }
+ // Read the field ID
+ let (mut field_id_var_uint, mut input_after_field_id) = input.read_var_uint()?;
+ if input_after_field_id.is_empty() {
+ return IonResult::incomplete(
+ "found field name but no value",
+ input_after_field_id.offset(),
+ );
+ }
+
+ let mut type_descriptor = input_after_field_id.peek_type_descriptor()?;
+ if type_descriptor.is_nop() {
+ // Read past NOP fields until we find the first one that's an actual value
+ // or we run out of struct bytes. Note that we read the NOP field(s) from `self` (the
+ // initial input) rather than `input_after_field_id` because it simplifies
+ // the logic of `read_struct_field_nop_pad()`, which is very rarely called.
+ (field_id_var_uint, input_after_field_id) = match input.read_struct_field_nop_pad()? {
+ None => {
+ // There are no more fields, we're at the end of the struct.
+ return Ok(None);
+ }
+ Some((nop_length, field_id_var_uint, input_after_field_id)) => {
+ // Advance `input` beyond the NOP so that when we store it in the value it begins
+ // with the field ID.
+ input = input.consume(nop_length);
+ type_descriptor = input_after_field_id.peek_type_descriptor()?;
+ (field_id_var_uint, input_after_field_id)
+ }
+ };
+ }
+
+ let field_id_length = field_id_var_uint.size_in_bytes() as u8;
+ let field_id = field_id_var_uint.value();
- /// Reads a value from the buffer.
- pub(crate) fn peek_value_without_field_id(self) -> IonResult >> {
- self.peek_value(false)
+ let mut value = input_after_field_id.read_value(type_descriptor)?;
+ value.encoded_value.field_id = Some(field_id);
+ value.encoded_value.field_id_length = field_id_length;
+ value.encoded_value.total_length += field_id_length as usize;
+ value.input = input;
+ Ok(Some(value))
}
- /// Reads a value from the buffer. If `has_field` is true, it will read a field ID first.
- // This method consumes leading NOP bytes, but leaves the header representation in the buffer.
- // The resulting LazyRawValue's buffer slice always starts with the first non-NOP byte in the
- // header, which can be either a field ID, an annotations wrapper, or a type descriptor.
- fn peek_value(self, has_field: bool) -> IonResult >> {
- let initial_input = self;
- if initial_input.is_empty() {
- return Ok(None);
- }
- let (field_id, field_id_length, mut input) = if has_field {
- let (field_id_var_uint, input_after_field_id) = initial_input.read_var_uint()?;
+ #[cold]
+ /// Consumes (field ID, NOP pad) pairs until a non-NOP value is encountered in field position or
+ /// the buffer is empty. Returns a buffer starting at the field ID before the non-NOP value.
+ fn read_struct_field_nop_pad(self) -> IonResult )>> {
+ let mut input_before_field_id = self;
+ loop {
+ if input_before_field_id.is_empty() {
+ return Ok(None);
+ }
+ let (field_id_var_uint, input_after_field_id) =
+ input_before_field_id.read_var_uint()?;
+ // If we're out of data (i.e. there's no field value) the struct is incomplete.
if input_after_field_id.is_empty() {
return IonResult::incomplete(
- "found field name but no value",
+ "found a field name but no value",
input_after_field_id.offset(),
);
}
- let field_id_length =
- u8::try_from(field_id_var_uint.size_in_bytes()).map_err(|_| {
- IonError::decoding_error("found a field id with length over 255 bytes")
- })?;
- (
- Some(field_id_var_uint.value()),
- field_id_length,
- input_after_field_id,
- )
- } else {
- (None, 0, initial_input)
- };
-
- let mut annotations_header_length = 0u8;
- let mut annotations_sequence_length = 0u8;
- let mut expected_value_length = None;
+ // Peek at the next value header. If it's a NOP, we need to repeat the process.
+ if input_after_field_id.peek_type_descriptor()?.is_nop() {
+ // Consume the NOP to position the buffer at the beginning of the next field ID.
+ (_, input_before_field_id) = input_after_field_id.read_nop_pad()?;
+ } else {
+ // If it isn't a NOP, return the field ID and the buffer slice containing the field
+ // value.
+ let nop_length = input_before_field_id.offset() - self.offset();
+ return Ok(Some((nop_length, field_id_var_uint, input_after_field_id)));
+ }
+ }
+ }
+ /// Reads a value without a field name from the buffer. This is applicable in lists, s-expressions,
+ /// and at the top level.
+ pub(crate) fn peek_sequence_value(self) -> IonResult >> {
+ if self.is_empty() {
+ return Ok(None);
+ }
+ let mut input = self;
let mut type_descriptor = input.peek_type_descriptor()?;
- if type_descriptor.is_annotation_wrapper() {
- let (wrapper, input_after_annotations) =
- input.read_annotations_wrapper(type_descriptor)?;
- annotations_header_length = wrapper.header_length;
- annotations_sequence_length = wrapper.sequence_length;
- expected_value_length = Some(wrapper.expected_value_length);
- input = input_after_annotations;
- type_descriptor = input.peek_type_descriptor()?;
- if type_descriptor.is_annotation_wrapper() {
- return IonResult::decoding_error("found an annotations wrapper ");
+ // If we find a NOP...
+ if type_descriptor.is_nop() {
+ // ...skip through NOPs until we found the next non-NOP byte.
+ (_, input) = self.consume_nop_padding(type_descriptor)?;
+ // If there is no next byte, we're out of values.
+ if input.is_empty() {
+ return Ok(None);
}
- } else if type_descriptor.is_nop() {
- (_, input) = input.consume_nop_padding(type_descriptor)?;
+ // Otherwise, there's a value.
+ type_descriptor = input.peek_type_descriptor()?;
+ }
+ Ok(Some(input.read_value(type_descriptor)?))
+ }
+
+ /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that
+ /// the next byte (`type_descriptor`) is not a NOP.
+ fn read_value(self, type_descriptor: TypeDescriptor) -> IonResult> {
+ if type_descriptor.is_annotation_wrapper() {
+ self.read_annotated_value(type_descriptor)
+ } else {
+ self.read_value_without_annotations(type_descriptor)
}
+ }
+ /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that
+ /// the next byte (`type_descriptor`) is neither a NOP nor an annotations wrapper.
+ fn read_value_without_annotations(
+ self,
+ type_descriptor: TypeDescriptor,
+ ) -> IonResult> {
+ let input = self;
let header = type_descriptor
.to_header()
.ok_or_else(|| IonError::decoding_error("found a non-value in value position"))?;
let header_offset = input.offset();
let (length, _) = input.consume(1).read_value_length(header)?;
- let length_length = u8::try_from(length.size_in_bytes()).map_err(|_e| {
- IonError::decoding_error("found a value with a header length field over 255 bytes long")
- })?;
+ let length_length = length.size_in_bytes() as u8;
let value_length = length.value(); // ha
- let total_length = field_id_length as usize
- + annotations_header_length as usize
- + 1 // Header byte
- + length_length as usize
- + value_length;
-
- if let Some(expected_value_length) = expected_value_length {
- let actual_value_length = 1 + length_length as usize + value_length;
- if expected_value_length != actual_value_length {
- println!("{} != {}", expected_value_length, actual_value_length);
- return IonResult::decoding_error(
- "value length did not match length declared by annotations wrapper",
- );
- }
- }
+ let total_length = 1 // Header byte
+ + length_length as usize
+ + value_length;
let encoded_value = EncodedValue {
header,
- field_id_length,
- field_id,
- annotations_header_length,
- annotations_sequence_length,
+ // If applicable, these are populated by the caller: `peek_field()`
+ field_id_length: 0,
+ field_id: None,
+ // If applicable, these are populated by the caller: `read_annotated_value()`
+ annotations_header_length: 0,
+ annotations_sequence_length: 0,
header_offset,
length_length,
value_length,
@@ -649,9 +695,46 @@ impl<'a> ImmutableBuffer<'a> {
};
let lazy_value = LazyRawBinaryValue {
encoded_value,
- input: initial_input,
+ // If this value has a field ID or annotations, this will be replaced by the caller.
+ input: self,
};
- Ok(Some(lazy_value))
+ Ok(lazy_value)
+ }
+
+ /// Reads an annotations wrapper and its associated value from the buffer. The caller must confirm
+ /// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper.
+ fn read_annotated_value(
+ self,
+ mut type_descriptor: TypeDescriptor,
+ ) -> IonResult> {
+ let input = self;
+ let (wrapper, input_after_annotations) = input.read_annotations_wrapper(type_descriptor)?;
+ type_descriptor = input_after_annotations.peek_type_descriptor()?;
+
+ // Confirm that the next byte begins a value, not a NOP or another annotations wrapper.
+ if type_descriptor.is_annotation_wrapper() {
+ return IonResult::decoding_error(
+ "found an annotations wrapper inside an annotations wrapper",
+ );
+ } else if type_descriptor.is_nop() {
+ return IonResult::decoding_error("found a NOP inside an annotations wrapper");
+ }
+
+ let mut lazy_value =
+ input_after_annotations.read_value_without_annotations(type_descriptor)?;
+ if wrapper.expected_value_length != lazy_value.encoded_value.total_length() {
+ return IonResult::decoding_error(
+ "value length did not match length declared by annotations wrapper",
+ );
+ }
+
+ lazy_value.encoded_value.annotations_header_length = wrapper.header_length;
+ lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length;
+ lazy_value.encoded_value.total_length += wrapper.header_length as usize;
+ // Modify the input to include the annotations
+ lazy_value.input = input;
+
+ Ok(lazy_value)
}
}
diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs
index 99c28d2f..f56c65ce 100644
--- a/src/lazy/binary/raw/reader.rs
+++ b/src/lazy/binary/raw/reader.rs
@@ -49,7 +49,7 @@ impl<'data> LazyRawBinaryReader<'data> {
&mut self,
buffer: ImmutableBuffer<'data>,
) -> IonResult> {
- let lazy_value = match ImmutableBuffer::peek_value_without_field_id(buffer)? {
+ let lazy_value = match ImmutableBuffer::peek_sequence_value(buffer)? {
Some(lazy_value) => lazy_value,
None => return Ok(RawStreamItem::EndOfStream),
};
@@ -142,7 +142,9 @@ impl<'data> DataSource<'data> {
Err(e) => return Err(e),
};
- self.buffer = buffer;
+ // If the value we read doesn't start where we began reading, there was a NOP.
+ let num_nop_bytes = lazy_value.input.offset() - buffer.offset();
+ self.buffer = buffer.consume(num_nop_bytes);
self.bytes_to_skip = lazy_value.encoded_value.total_length();
Ok(Some(lazy_value))
}
diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs
index 7bb86d45..0014e288 100644
--- a/src/lazy/binary/raw/sequence.rs
+++ b/src/lazy/binary/raw/sequence.rs
@@ -157,7 +157,7 @@ impl<'data> Iterator for RawBinarySequenceIterator<'data> {
fn next(&mut self) -> Option {
self.source
- .try_parse_next(ImmutableBuffer::peek_value_without_field_id)
+ .try_parse_next(ImmutableBuffer::peek_sequence_value)
.transpose()
}
}
diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs
index 245f4706..5bdc8641 100644
--- a/src/lazy/binary/raw/value.rs
+++ b/src/lazy/binary/raw/value.rs
@@ -257,8 +257,8 @@ impl<'data> LazyRawBinaryValue<'data> {
return Ok(RawValueRef::Decimal(Decimal::new(0i32, 0i64)));
}
- // Skip the type descriptor
- let input = self.input.consume(1);
+ // Skip the type descriptor and length bytes
+ let input = ImmutableBuffer::new(self.value_body()?);
let (exponent_var_int, remaining) = input.read_var_int()?;
let coefficient_size_in_bytes =
diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs
index 2f3cfbb4..a8624193 100644
--- a/src/lazy/reader.rs
+++ b/src/lazy/reader.rs
@@ -1,16 +1,17 @@
use crate::binary::constants::v1_0::IVM;
use crate::element::reader::ElementReader;
use crate::element::Element;
+use crate::lazy::any_encoding::AnyEncoding;
use crate::lazy::decoder::LazyDecoder;
-use crate::lazy::encoding::BinaryEncoding;
-use crate::lazy::system_reader::LazySystemReader;
+use crate::lazy::encoding::{BinaryEncoding, TextEncoding};
+use crate::lazy::system_reader::{LazySystemAnyReader, LazySystemBinaryReader, LazySystemReader};
use crate::lazy::value::LazyValue;
use crate::result::IonFailure;
use crate::{IonError, IonResult};
/// A binary reader that only reads each value that it visits upon request (that is: lazily).
///
-/// Each time [`LazyReader::next`] is called, the reader will advance to the next top-level value
+/// Each time [`LazyApplicationReader::next`] is called, the reader will advance to the next top-level value
/// in the input stream. Once positioned on a top-level value, users may visit nested values by
/// calling [`LazyValue::read`] and working with the resulting [`crate::lazy::value_ref::ValueRef`],
/// which may contain either a scalar value or a lazy container that may itself be traversed.
@@ -18,7 +19,7 @@ use crate::{IonError, IonResult};
/// The values that the reader yields ([`LazyValue`],
/// [`LazyBinarySequence`](crate::lazy::sequence::LazyBinarySequence), and
/// [`LazyBinaryStruct`](crate::lazy::struct::LazyStruct)) are
-/// immutable references to the data stream, and remain valid until [`LazyReader::next`] is called
+/// immutable references to the data stream, and remain valid until [`LazyApplicationReader::next`] is called
/// again to advance the reader to the next top level value. This means that these references can
/// be stored, read, and re-read as long as the reader remains on the same top-level value.
/// ```
@@ -55,11 +56,11 @@ use crate::{IonError, IonResult};
///# Ok(())
///# }
/// ```
-pub struct LazyReader<'data, D: LazyDecoder<'data>> {
+pub struct LazyApplicationReader<'data, D: LazyDecoder<'data>> {
system_reader: LazySystemReader<'data, D>,
}
-impl<'data, D: LazyDecoder<'data>> LazyReader<'data, D> {
+impl<'data, D: LazyDecoder<'data>> LazyApplicationReader<'data, D> {
/// Returns the next top-level value in the input stream as `Ok(Some(lazy_value))`.
/// If there are no more top-level values in the stream, returns `Ok(None)`.
/// If the next value is incomplete (that is: only part of it is in the input buffer) or if the
@@ -75,7 +76,16 @@ impl<'data, D: LazyDecoder<'data>> LazyReader<'data, D> {
}
}
-pub type LazyBinaryReader<'data> = LazyReader<'data, BinaryEncoding>;
+pub type LazyBinaryReader<'data> = LazyApplicationReader<'data, BinaryEncoding>;
+pub type LazyTextReader<'data> = LazyApplicationReader<'data, TextEncoding>;
+pub type LazyReader<'data> = LazyApplicationReader<'data, AnyEncoding>;
+
+impl<'data> LazyReader<'data> {
+ pub fn new(ion_data: &'data [u8]) -> LazyReader<'data> {
+ let system_reader = LazySystemAnyReader::new(ion_data);
+ LazyApplicationReader { system_reader }
+ }
+}
impl<'data> LazyBinaryReader<'data> {
pub fn new(ion_data: &'data [u8]) -> IonResult> {
@@ -85,13 +95,13 @@ impl<'data> LazyBinaryReader<'data> {
return IonResult::decoding_error("input does not begin with an Ion version marker");
}
- let system_reader = LazySystemReader::new(ion_data);
- Ok(LazyReader { system_reader })
+ let system_reader = LazySystemBinaryReader::new(ion_data);
+ Ok(LazyApplicationReader { system_reader })
}
}
pub struct LazyElementIterator<'iter, 'data, D: LazyDecoder<'data>> {
- lazy_reader: &'iter mut LazyReader<'data, D>,
+ lazy_reader: &'iter mut LazyApplicationReader<'data, D>,
}
impl<'iter, 'data, D: LazyDecoder<'data>> Iterator for LazyElementIterator<'iter, 'data, D> {
@@ -106,7 +116,7 @@ impl<'iter, 'data, D: LazyDecoder<'data>> Iterator for LazyElementIterator<'iter
}
}
-impl<'data, D: LazyDecoder<'data>> ElementReader for LazyReader<'data, D> {
+impl<'data, D: LazyDecoder<'data>> ElementReader for LazyApplicationReader<'data, D> {
type ElementIterator<'a> = LazyElementIterator<'a, 'data, D> where Self: 'a,;
fn read_next_element(&mut self) -> IonResult> {
diff --git a/src/lazy/str_ref.rs b/src/lazy/str_ref.rs
index 850d0556..d8afc4df 100644
--- a/src/lazy/str_ref.rs
+++ b/src/lazy/str_ref.rs
@@ -1,6 +1,6 @@
use crate::lazy::bytes_ref::BytesRef;
use crate::text::text_formatter::IonValueFormatter;
-use crate::Str;
+use crate::{RawSymbolTokenRef, Str};
use std::borrow::Cow;
use std::fmt::{Display, Formatter};
use std::ops::Deref;
@@ -90,3 +90,9 @@ impl<'data> From> for BytesRef<'data> {
}
}
}
+
+impl<'data> From> for RawSymbolTokenRef<'data> {
+ fn from(value: StrRef<'data>) -> Self {
+ RawSymbolTokenRef::Text(value.text)
+ }
+}
diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs
index 4956728f..b02f86bb 100644
--- a/src/lazy/struct.rs
+++ b/src/lazy/struct.rs
@@ -185,7 +185,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
/// let lazy_struct = reader.expect_next()?.read()?.expect_struct()?;
///
/// assert_eq!(lazy_struct.get_expected("foo")?, ValueRef::String("hello".into()));
- /// assert!(dbg!(lazy_struct.get_expected("Ontario")).is_err());
+ /// assert!(lazy_struct.get_expected("Ontario").is_err());
///# Ok(())
///# }
/// ```
diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs
index e36e4e38..5f5c3837 100644
--- a/src/lazy/system_reader.rs
+++ b/src/lazy/system_reader.rs
@@ -1,4 +1,5 @@
-use crate::lazy::encoding::BinaryEncoding;
+use crate::lazy::any_encoding::{AnyEncoding, LazyRawAnyReader};
+use crate::lazy::encoding::{BinaryEncoding, TextEncoding};
use crate::result::IonFailure;
use crate::{IonResult, IonType, RawSymbolTokenRef, SymbolTable};
@@ -21,7 +22,7 @@ const SYMBOLS: RawSymbolTokenRef = RawSymbolTokenRef::SymbolId(7);
/// A binary reader that only reads each value that it visits upon request (that is: lazily).
///
-/// Unlike [`crate::lazy::reader::LazyReader`], which only exposes values that are part
+/// Unlike [`crate::lazy::reader::LazyApplicationReader`], which only exposes values that are part
/// of the application data model, [`LazySystemReader`] also yields Ion version markers
/// (as [`SystemStreamItem::VersionMarker`]) and structs representing a symbol table (as
/// [`SystemStreamItem::SymbolTable`]).
@@ -76,7 +77,9 @@ pub struct LazySystemReader<'data, D: LazyDecoder<'data>> {
pending_lst: PendingLst,
}
-pub type LazyBinarySystemReader<'data> = LazySystemReader<'data, BinaryEncoding>;
+pub type LazySystemBinaryReader<'data> = LazySystemReader<'data, BinaryEncoding>;
+pub type LazySystemTextReader<'data> = LazySystemReader<'data, TextEncoding>;
+pub type LazySystemAnyReader<'data> = LazySystemReader<'data, AnyEncoding>;
// If the reader encounters a symbol table in the stream, it will store all of the symbols that
// the table defines in this structure so that they may be applied when the reader next advances.
@@ -85,8 +88,22 @@ struct PendingLst {
symbols: Vec>,
}
-impl<'data> LazyBinarySystemReader<'data> {
- pub(crate) fn new(ion_data: &'data [u8]) -> LazyBinarySystemReader<'data> {
+impl<'data> LazySystemAnyReader<'data> {
+ pub fn new(ion_data: &'data [u8]) -> LazySystemAnyReader<'data> {
+ let raw_reader = LazyRawAnyReader::new(ion_data);
+ LazySystemReader {
+ raw_reader,
+ symbol_table: SymbolTable::new(),
+ pending_lst: PendingLst {
+ is_lst_append: false,
+ symbols: Vec::new(),
+ },
+ }
+ }
+}
+
+impl<'data> LazySystemBinaryReader<'data> {
+ pub(crate) fn new(ion_data: &'data [u8]) -> LazySystemBinaryReader<'data> {
let raw_reader = LazyRawBinaryReader::new(ion_data);
LazySystemReader {
raw_reader,
@@ -107,7 +124,7 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> {
return Ok(false);
}
if let Some(symbol_ref) = lazy_value.annotations().next() {
- return Ok(symbol_ref? == ION_SYMBOL_TABLE);
+ return Ok(symbol_ref?.matches_sid_or_text(3, "$ion_symbol_table"));
};
Ok(false)
}
@@ -208,7 +225,7 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> {
for field_result in symbol_table.iter() {
let field = field_result?;
- if field.name() == SYMBOLS {
+ if field.name().matches_sid_or_text(7, "symbols") {
if found_symbols_field {
return IonResult::decoding_error(
"found symbol table with multiple 'symbols' fields",
@@ -217,7 +234,7 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> {
found_symbols_field = true;
Self::process_symbols(pending_lst, &field.value())?;
}
- if field.name() == IMPORTS {
+ if field.name().matches_sid_or_text(6, "imports") {
if found_imports_field {
return IonResult::decoding_error(
"found symbol table with multiple 'imports' fields",
@@ -250,7 +267,7 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> {
fn process_imports(pending_lst: &mut PendingLst, imports: &D::Value) -> IonResult<()> {
match imports.read()? {
RawValueRef::Symbol(symbol_ref) => {
- if symbol_ref == RawSymbolTokenRef::SymbolId(3) {
+ if symbol_ref.matches_sid_or_text(3, "$ion_symbol_table") {
pending_lst.is_lst_append = true;
}
// Any other symbol is ignored
@@ -291,7 +308,7 @@ mod tests {
hello
"#,
)?;
- let mut system_reader = LazySystemReader::new(&ion_data);
+ let mut system_reader = LazySystemBinaryReader::new(&ion_data);
loop {
match system_reader.next_item()? {
SystemStreamItem::VersionMarker(major, minor) => {
@@ -316,7 +333,7 @@ mod tests {
)
"#,
)?;
- let mut system_reader = LazySystemReader::new(&ion_data);
+ let mut system_reader = LazySystemBinaryReader::new(&ion_data);
loop {
match system_reader.next_item()? {
SystemStreamItem::Value(value) => {
@@ -343,7 +360,7 @@ mod tests {
}
"#,
)?;
- let mut system_reader = LazySystemReader::new(&ion_data);
+ let mut system_reader = LazySystemBinaryReader::new(&ion_data);
loop {
match system_reader.next_item()? {
SystemStreamItem::Value(value) => {
diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index 00306eef..f0bb3f6d 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -5,20 +5,28 @@ use std::slice::Iter;
use std::str::FromStr;
use nom::branch::alt;
-use nom::bytes::streaming::{is_a, is_not, tag, take_until, take_while1, take_while_m_n};
+use nom::bytes::complete::{
+ is_a as complete_is_a, is_not as complete_is_not, tag as complete_tag,
+ take_while as complete_take_while, take_while1 as complete_take_while1,
+};
+use nom::bytes::streaming::{is_a, tag, take_until, take_while_m_n};
+use nom::character::complete::{
+ char as complete_char, digit1 as complete_digit1, one_of as complete_one_of,
+};
use nom::character::streaming::{alphanumeric1, char, digit1, one_of, satisfy};
-use nom::combinator::{consumed, map, not, opt, peek, recognize, success, value};
+use nom::combinator::{consumed, eof, map, not, opt, peek, recognize, success, value};
use nom::error::{ErrorKind, ParseError};
-use nom::multi::{fold_many1, many0_count, many1_count};
+use nom::multi::{fold_many1, fold_many_m_n, many0_count, many1_count};
use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
-use nom::{CompareResult, IResult, InputLength, InputTake, Needed, Parser};
+use nom::{AsBytes, CompareResult, IResult, InputLength, InputTake, Needed, Parser};
use crate::lazy::encoding::TextEncoding;
use crate::lazy::raw_stream_item::RawStreamItem;
use crate::lazy::text::encoded_value::EncodedTextValue;
use crate::lazy::text::matched::{
- MatchedBlob, MatchedClob, MatchedDecimal, MatchedFloat, MatchedHoursAndMinutes, MatchedInt,
- MatchedString, MatchedSymbol, MatchedTimestamp, MatchedTimestampOffset, MatchedValue,
+ MatchedBlob, MatchedClob, MatchedDecimal, MatchedFieldName, MatchedFloat,
+ MatchedHoursAndMinutes, MatchedInt, MatchedString, MatchedSymbol, MatchedTimestamp,
+ MatchedTimestampOffset, MatchedValue,
};
use crate::lazy::text::parse_result::{InvalidInputError, IonParseError};
use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
@@ -67,7 +75,23 @@ const WHITESPACE_CHARACTERS: &[char] = &[
];
/// Same as [WHITESPACE_CHARACTERS], but formatted as a string for use in some `nom` APIs
-const WHITESPACE_CHARACTERS_AS_STR: &str = " \t\r\n\x09\x0B\x0C";
+pub(crate) const WHITESPACE_CHARACTERS_AS_STR: &str = " \t\r\n\x09\x0B\x0C";
+
+/// This helper function takes a parser and returns a closure that performs the same parsing
+/// but prints the Result before returning the output. This is handy for debugging.
+// A better implementation would use a macro to auto-generate the label from the file name and
+// line number.
+fn dbg_parse>(
+ label: &'static str,
+ mut parser: P,
+) -> impl Parser {
+ move |input: I| {
+ let result = parser.parse(input);
+ #[cfg(debug_assertions)]
+ println!("{}: {:?}", label, result);
+ result
+ }
+}
/// A slice of unsigned bytes that can be cheaply copied and which defines methods for parsing
/// the various encoding elements of a text Ion stream.
@@ -165,7 +189,7 @@ impl<'data> TextBufferView<'data> {
}
pub fn match_whitespace(self) -> IonMatchResult<'data> {
- is_a(WHITESPACE_CHARACTERS_AS_STR)(self)
+ complete_is_a(WHITESPACE_CHARACTERS_AS_STR)(self)
}
/// Always succeeds and consumes none of the input. Returns an empty slice of the buffer.
@@ -209,13 +233,13 @@ impl<'data> TextBufferView<'data> {
fn match_rest_of_line_comment(self) -> IonMatchResult<'data> {
preceded(
// Matches a leading "//"...
- tag("//"),
+ complete_tag("//"),
// ...followed by either...
alt((
// ...one or more non-EOL characters...
- is_not("\r\n"),
+ complete_is_not("\r\n"),
// ...or any EOL character.
- peek(recognize(one_of("\r\n"))),
+ peek(recognize(complete_one_of("\r\n"))),
// In either case, the line ending will not be consumed.
)),
)(self)
@@ -225,18 +249,25 @@ impl<'data> TextBufferView<'data> {
fn match_multiline_comment(self) -> IonMatchResult<'data> {
recognize(delimited(
// Matches a leading "/*"...
- tag("/*"),
+ complete_tag("/*"),
// ...any number of non-"*/" characters...
take_until("*/"),
// ...and then a closing "*/"
- tag("*/"),
+ complete_tag("*/"),
))(self)
}
/// Matches an Ion version marker (e.g. `$ion_1_0` or `$ion_1_1`.)
pub fn match_ivm(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
- let (remaining, (major, minor)) =
- preceded(tag("$ion_"), separated_pair(digit1, tag("_"), digit1))(self)?;
+ let (remaining, (major, minor)) = terminated(
+ preceded(
+ complete_tag("$ion_"),
+ separated_pair(complete_digit1, complete_tag("_"), complete_digit1),
+ ),
+ // Look ahead to make sure the IVM isn't followed by a '::'. If it is, then it's not
+ // an IVM, it's an annotation.
+ peek(whitespace_and_then(not(complete_tag("::")))),
+ )(self)?;
// `major` and `minor` are base 10 digits. Turning them into `&str`s is guaranteed to succeed.
let major_version = u8::from_str(major.as_text().unwrap()).map_err(|_| {
let error = InvalidInputError::new(major)
@@ -266,7 +297,7 @@ impl<'data> TextBufferView<'data> {
pub fn match_annotation(self) -> IonParseResult<'data, (MatchedSymbol, Range)> {
terminated(
whitespace_and_then(match_and_span(Self::match_symbol)),
- whitespace_and_then(tag("::")),
+ whitespace_and_then(complete_tag("::")),
)(self)
}
@@ -280,7 +311,7 @@ impl<'data> TextBufferView<'data> {
// int `3` while recognizing the input `-3` as the int `-3`. If `match_operator` runs before
// `match_value`, it will consume the sign (`-`) of negative number values, treating
// `-3` as an operator (`-`) and an int (`3`). Thus, we run `match_value` first.
- alt((Self::match_value, Self::match_operator)),
+ whitespace_and_then(alt((Self::match_value, Self::match_operator))),
)
.map(|(maybe_annotations, mut value)| {
if let Some(annotations) = maybe_annotations {
@@ -296,28 +327,6 @@ impl<'data> TextBufferView<'data> {
.parse(self)
}
- /// Matches a single value in a list OR the end of the list, allowing for leading whitespace
- /// and comments in either case.
- ///
- /// If a value is found, returns `Ok(Some(value))`. If the end of the list is found, returns
- /// `Ok(None)`.
- pub fn match_list_value(self) -> IonParseResult<'data, Option>> {
- preceded(
- // Some amount of whitespace/comments...
- Self::match_optional_comments_and_whitespace,
- // ...followed by either the end of the list...
- alt((
- value(None, tag("]")),
- // ...or a value...
- terminated(
- Self::match_annotated_value.map(Some),
- // ...followed by a comma or end-of-list
- Self::match_delimiter_after_list_value,
- ),
- )),
- )(self)
- }
-
/// Matches a struct field name/value pair.
///
/// If a pair is found, returns `Some(field)` and consumes the following comma if present.
@@ -357,7 +366,7 @@ impl<'data> TextBufferView<'data> {
/// input bytes where the field name is found, and the value.
pub fn match_struct_field_name_and_value(
self,
- ) -> IonParseResult<'data, ((MatchedSymbol, Range), LazyRawTextValue<'data>)> {
+ ) -> IonParseResult<'data, ((MatchedFieldName, Range), LazyRawTextValue<'data>)> {
terminated(
separated_pair(
whitespace_and_then(match_and_span(Self::match_struct_field_name)),
@@ -392,32 +401,13 @@ impl<'data> TextBufferView<'data> {
/// * An identifier
/// * A symbol ID
/// * A short-form string
- pub fn match_struct_field_name(self) -> IonParseResult<'data, MatchedSymbol> {
+ pub fn match_struct_field_name(self) -> IonParseResult<'data, MatchedFieldName> {
alt((
- Self::match_symbol,
- Self::match_short_string.map(|s| {
- // NOTE: We're "casting" the matched short string to a matched symbol here.
- // This relies on the fact that the MatchedSymbol logic ignores
- // the first and last matched byte, which are usually single
- // quotes but in this case are double quotes.
- match s {
- MatchedString::ShortWithoutEscapes => MatchedSymbol::QuotedWithoutEscapes,
- MatchedString::ShortWithEscapes => MatchedSymbol::QuotedWithEscapes,
- _ => unreachable!("field name parser matched long string"),
- }
- }),
+ Self::match_string.map(MatchedFieldName::String),
+ Self::match_symbol.map(MatchedFieldName::Symbol),
))(self)
}
- /// Matches syntax that is expected to follow a value in a list: any amount of whitespace and/or
- /// comments followed by either a comma (consumed) or an end-of-list `]` (not consumed).
- fn match_delimiter_after_list_value(self) -> IonMatchResult<'data> {
- preceded(
- Self::match_optional_comments_and_whitespace,
- alt((tag(","), peek(tag("]")))),
- )(self)
- }
-
/// Matches a single top-level value, an IVM, or the end of the stream.
pub fn match_top_level_item(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
// If only whitespace/comments remain, we're at the end of the stream.
@@ -575,6 +565,37 @@ impl<'data> TextBufferView<'data> {
Ok((remaining, matched))
}
+ /// Matches a single value in a list OR the end of the list, allowing for leading whitespace
+ /// and comments in either case.
+ ///
+ /// If a value is found, returns `Ok(Some(value))`. If the end of the list is found, returns
+ /// `Ok(None)`.
+ pub fn match_list_value(self) -> IonParseResult<'data, Option>> {
+ preceded(
+ // Some amount of whitespace/comments...
+ Self::match_optional_comments_and_whitespace,
+ // ...followed by either the end of the list...
+ alt((
+ value(None, tag("]")),
+ // ...or a value...
+ terminated(
+ Self::match_annotated_value.map(Some),
+ // ...followed by a comma or end-of-list
+ Self::match_delimiter_after_list_value,
+ ),
+ )),
+ )(self)
+ }
+
+ /// Matches syntax that is expected to follow a value in a list: any amount of whitespace and/or
+ /// comments followed by either a comma (consumed) or an end-of-list `]` (not consumed).
+ fn match_delimiter_after_list_value(self) -> IonMatchResult<'data> {
+ preceded(
+ Self::match_optional_comments_and_whitespace,
+ alt((tag(","), peek(tag("]")))),
+ )(self)
+ }
+
/// Matches an s-expression (sexp).
///
/// If the input does not contain the entire s-expression, returns `IonError::Incomplete(_)`.
@@ -662,8 +683,8 @@ impl<'data> TextBufferView<'data> {
/// Matches and returns a null value.
pub fn read_null(self) -> IonParseResult<'data, IonType> {
delimited(
- tag("null"),
- opt(preceded(char('.'), Self::read_ion_type)),
+ complete_tag("null"),
+ opt(preceded(complete_char('.'), Self::read_ion_type)),
Self::peek_stop_character,
)
.map(|explicit_ion_type| explicit_ion_type.unwrap_or(IonType::Null))
@@ -673,25 +694,25 @@ impl<'data> TextBufferView<'data> {
/// Matches and returns an Ion type.
fn read_ion_type(self) -> IonParseResult<'data, IonType> {
alt((
- value(IonType::Null, tag("null")),
- value(IonType::Bool, tag("bool")),
- value(IonType::Int, tag("int")),
- value(IonType::Float, tag("float")),
- value(IonType::Decimal, tag("decimal")),
- value(IonType::Timestamp, tag("timestamp")),
- value(IonType::Symbol, tag("symbol")),
- value(IonType::String, tag("string")),
- value(IonType::Clob, tag("clob")),
- value(IonType::Blob, tag("blob")),
- value(IonType::List, tag("list")),
- value(IonType::SExp, tag("sexp")),
- value(IonType::Struct, tag("struct")),
+ value(IonType::Null, complete_tag("null")),
+ value(IonType::Bool, complete_tag("bool")),
+ value(IonType::Int, complete_tag("int")),
+ value(IonType::Float, complete_tag("float")),
+ value(IonType::Decimal, complete_tag("decimal")),
+ value(IonType::Timestamp, complete_tag("timestamp")),
+ value(IonType::Symbol, complete_tag("symbol")),
+ value(IonType::String, complete_tag("string")),
+ value(IonType::Clob, complete_tag("clob")),
+ value(IonType::Blob, complete_tag("blob")),
+ value(IonType::List, complete_tag("list")),
+ value(IonType::SExp, complete_tag("sexp")),
+ value(IonType::Struct, complete_tag("struct")),
))(self)
}
/// Matches any one of Ion's stop characters.
fn match_stop_character(self) -> IonMatchResult<'data> {
- recognize(one_of("{}[](),\"' \t\n\r\u{0b}\u{0c}")).parse(self)
+ alt((eof, recognize(one_of("{}[](),\"' \t\n\r\u{0b}\u{0c}"))))(self)
}
/// Matches--but does not consume--any one of Ion's stop characters.
@@ -701,7 +722,7 @@ impl<'data> TextBufferView<'data> {
/// Matches the three parts of an int--its base, its sign, and its digits--without actually
/// constructing an Int from them.
- fn match_int(self) -> IonParseResult<'data, MatchedInt> {
+ pub fn match_int(self) -> IonParseResult<'data, MatchedInt> {
terminated(
// We test for base 16 and base 2 so the '0x' or '0b' isn't confused for a leading zero
// in a base 10 number, which would be illegal.
@@ -719,7 +740,7 @@ impl<'data> TextBufferView<'data> {
fn match_base_2_int(self) -> IonParseResult<'data, MatchedInt> {
separated_pair(
opt(char('-')),
- alt((tag("0b"), tag("0B"))),
+ alt((complete_tag("0b"), complete_tag("0B"))),
Self::match_base_2_int_digits,
)
.map(|(maybe_sign, digits)| {
@@ -732,9 +753,9 @@ impl<'data> TextBufferView<'data> {
fn match_base_2_int_digits(self) -> IonMatchResult<'data> {
recognize(terminated(
// Zero or more digits-followed-by-underscores
- many0_count(pair(is_a("01"), char('_'))),
+ many0_count(pair(complete_is_a("01"), complete_tag("_"))),
// One or more digits
- is_a("01"),
+ complete_is_a("01"),
))(self)
}
@@ -750,12 +771,7 @@ impl<'data> TextBufferView<'data> {
/// Matches the digits of a base-10 integer. (i.e. An integer without a sign.)
fn match_base_10_int_digits(self) -> IonMatchResult<'data> {
- alt((
- // The number is either a zero...
- recognize(char('0')),
- // Or it's a non-zero followed by some number of '_'-separated digits
- Self::match_base_10_digits_before_dot,
- ))(self)
+ Self::match_base_10_digits_before_dot(self)
}
/// Matches either:
@@ -763,7 +779,9 @@ impl<'data> TextBufferView<'data> {
/// * a non-zero followed by some number of digits with optional underscores
fn match_base_10_digits_before_dot(self) -> IonMatchResult<'data> {
alt((
- tag("0"),
+ // The number is either a zero...
+ complete_tag("0"),
+ // Or it's a non-zero followed by some number of '_'-separated digits
recognize(pair(
Self::match_base_10_leading_digit,
Self::match_base_10_trailing_digits,
@@ -780,7 +798,7 @@ impl<'data> TextBufferView<'data> {
/// This parser accepts leading zeros, which is why it cannot be used for the beginning
/// of a number.
fn match_base_10_trailing_digits(self) -> IonMatchResult<'data> {
- recognize(many0_count(pair(opt(char('_')), digit1)))(self)
+ recognize(many0_count(pair(opt(complete_char('_')), complete_digit1)))(self)
}
/// Matches a base-10 notation integer (e.g. `0x0`, `0X20`, or `-0xCAFE`) and returns the
@@ -788,7 +806,7 @@ impl<'data> TextBufferView<'data> {
fn match_base_16_int(self) -> IonParseResult<'data, MatchedInt> {
separated_pair(
opt(char('-')),
- alt((tag("0x"), tag("0X"))),
+ alt((complete_tag("0x"), complete_tag("0X"))),
Self::match_base_16_int_trailing_digits,
)
.map(|(maybe_sign, digits)| {
@@ -801,7 +819,7 @@ impl<'data> TextBufferView<'data> {
fn match_base_16_int_trailing_digits(self) -> IonMatchResult<'data> {
recognize(terminated(
// Zero or more digits-followed-by-underscores
- many0_count(pair(Self::take_base_16_digits1, char('_'))),
+ many0_count(pair(Self::take_base_16_digits1, complete_tag("_"))),
// One or more digits
Self::take_base_16_digits1,
))(self)
@@ -810,35 +828,53 @@ impl<'data> TextBufferView<'data> {
/// Recognizes 1 or more consecutive base-16 digits.
// This function's "1" suffix is a style borrowed from `nom`.
fn take_base_16_digits1(self) -> IonMatchResult<'data> {
- take_while1(|b: u8| b.is_ascii_hexdigit())(self)
+ complete_take_while1(|b: u8| b.is_ascii_hexdigit())(self)
+ }
+
+ /// Matches `n` consecutive hex digits.
+ pub(crate) fn match_n_hex_digits(
+ count: usize,
+ ) -> impl Parser, TextBufferView<'data>, IonParseError<'data>> {
+ // `fold_many_m_n` allows us to repeat the same parser between 'm' and 'n' times,
+ // specifying an operation to perform on each match. In our case, we just need the parser
+ // to run 'n' times exactly so `recognize` can return the accepted slice; our operation
+ // is a no-op.
+ recognize(fold_many_m_n(
+ count,
+ count,
+ satisfy(|c| c.is_ascii_hexdigit()),
+ || 0,
+ // no-op
+ |accum, _item| accum,
+ ))
}
/// Matches an Ion float of any syntax
fn match_float(self) -> IonParseResult<'data, MatchedFloat> {
- alt((
- Self::match_float_special_value,
- Self::match_float_numeric_value,
- ))(self)
+ terminated(
+ alt((
+ Self::match_float_special_value,
+ Self::match_float_numeric_value,
+ )),
+ Self::peek_stop_character,
+ )(self)
}
/// Matches special IEEE-754 values, including +/- infinity and NaN.
fn match_float_special_value(self) -> IonParseResult<'data, MatchedFloat> {
alt((
- value(MatchedFloat::NotANumber, tag("nan")),
- value(MatchedFloat::PositiveInfinity, tag("+inf")),
- value(MatchedFloat::NegativeInfinity, tag("-inf")),
+ value(MatchedFloat::NotANumber, complete_tag("nan")),
+ value(MatchedFloat::PositiveInfinity, complete_tag("+inf")),
+ value(MatchedFloat::NegativeInfinity, complete_tag("-inf")),
))(self)
}
/// Matches numeric IEEE-754 floating point values.
fn match_float_numeric_value(self) -> IonParseResult<'data, MatchedFloat> {
- terminated(
- recognize(pair(
- Self::match_number_with_optional_dot_and_digits,
- Self::match_float_exponent_marker_and_digits,
- )),
- Self::peek_stop_character,
- )
+ recognize(pair(
+ Self::match_number_with_optional_dot_and_digits,
+ Self::match_float_exponent_marker_and_digits,
+ ))
.map(|_matched| MatchedFloat::Numeric)
.parse(self)
}
@@ -851,7 +887,7 @@ impl<'data> TextBufferView<'data> {
/// -25.2
fn match_number_with_optional_dot_and_digits(self) -> IonMatchResult<'data> {
recognize(tuple((
- opt(tag("-")),
+ opt(complete_tag("-")),
Self::match_base_10_digits_before_dot,
opt(Self::match_dot_followed_by_base_10_digits),
)))(self)
@@ -861,7 +897,7 @@ impl<'data> TextBufferView<'data> {
/// This includes either a single zero, or a non-zero followed by any sequence of digits.
fn match_digits_before_dot(self) -> IonMatchResult<'data> {
alt((
- tag("0"),
+ complete_tag("0"),
recognize(pair(Self::match_leading_digit, Self::match_trailing_digits)),
))(self)
}
@@ -873,21 +909,27 @@ impl<'data> TextBufferView<'data> {
/// Matches any number of base 10 digits, allowing underscores at any position except the end.
fn match_trailing_digits(self) -> IonMatchResult<'data> {
- recognize(many0_count(preceded(opt(char('_')), digit1)))(self)
+ recognize(many0_count(preceded(
+ opt(complete_char('_')),
+ complete_digit1,
+ )))(self)
}
/// Recognizes a decimal point followed by any number of base-10 digits.
fn match_dot_followed_by_base_10_digits(self) -> IonMatchResult<'data> {
- recognize(preceded(tag("."), opt(Self::match_digits_after_dot)))(self)
+ recognize(preceded(
+ complete_tag("."),
+ opt(Self::match_digits_after_dot),
+ ))(self)
}
/// Like `match_digits_before_dot`, but allows leading zeros.
fn match_digits_after_dot(self) -> IonMatchResult<'data> {
recognize(terminated(
// Zero or more digits-followed-by-underscores
- many0_count(pair(digit1, char('_'))),
+ many0_count(pair(complete_digit1, complete_char('_'))),
// One or more digits
- digit1,
+ complete_digit1,
))(self)
}
@@ -895,7 +937,7 @@ impl<'data> TextBufferView<'data> {
/// base 10 digits.
fn match_float_exponent_marker_and_digits(self) -> IonMatchResult<'data> {
preceded(
- one_of("eE"),
+ complete_one_of("eE"),
recognize(Self::match_exponent_sign_and_digits),
)(self)
}
@@ -921,45 +963,50 @@ impl<'data> TextBufferView<'data> {
///
/// This is used for matching exponent signs; most places in Ion do not allow `+`.
pub fn match_any_sign(self) -> IonParseResult<'data, char> {
- one_of("-+")(self)
+ complete_one_of("-+")(self)
}
pub fn match_decimal_exponent(self) -> IonParseResult<'data, (bool, TextBufferView<'data>)> {
- preceded(one_of("dD"), Self::match_exponent_sign_and_digits)(self)
+ preceded(complete_one_of("dD"), Self::match_exponent_sign_and_digits)(self)
}
/// Match an optional sign (if present), digits before the decimal point, then digits after the
/// decimal point (if present).
pub fn match_decimal(self) -> IonParseResult<'data, MatchedDecimal> {
- tuple((
- opt(tag("-")),
- Self::match_digits_before_dot,
- alt((
- // Either a decimal point and digits and optional d/D and exponent
- preceded(
- tag("."),
- pair(
- alt((Self::match_digits_after_dot, Self::match_nothing)),
+ terminated(
+ tuple((
+ opt(complete_tag("-")),
+ Self::match_digits_before_dot,
+ alt((
+ // Either a decimal point and digits and optional d/D and exponent
+ tuple((
+ complete_tag("."),
+ opt(Self::match_digits_after_dot),
opt(Self::match_decimal_exponent),
- ),
- )
- .map(|(digits_after_dot, maybe_exponent)| {
- let (exp_is_negative, exp_digits) = match maybe_exponent {
- Some(exponent) => exponent,
- None => (false, digits_after_dot.slice(digits_after_dot.len(), 0)),
- };
- (digits_after_dot, exp_is_negative, exp_digits)
- }),
- // or just a d/D and exponent
- consumed(Self::match_decimal_exponent).map(
- |(matched, (exp_is_negative, exp_digits))| {
- // Make an empty slice to represent the (absent) digits after dot
- let digits_after_dot = matched.slice(0, 0);
+ ))
+ .map(|(dot, maybe_digits_after_dot, maybe_exponent)| {
+ let digits_after_dot = match maybe_digits_after_dot {
+ Some(digits) => digits,
+ None => dot.slice(1, 0),
+ };
+ let (exp_is_negative, exp_digits) = match maybe_exponent {
+ Some(exponent) => exponent,
+ None => (false, digits_after_dot.slice(digits_after_dot.len(), 0)),
+ };
(digits_after_dot, exp_is_negative, exp_digits)
- },
- ),
+ }),
+ // or just a d/D and exponent
+ consumed(Self::match_decimal_exponent).map(
+ |(matched, (exp_is_negative, exp_digits))| {
+ // Make an empty slice to represent the (absent) digits after dot
+ let digits_after_dot = matched.slice(0, 0);
+ (digits_after_dot, exp_is_negative, exp_digits)
+ },
+ ),
+ )),
)),
- ))
+ Self::peek_stop_character,
+ )
.map(
|(maybe_sign, leading_digits, (digits_after_dot, exponent_is_negative, exp_digits))| {
let is_negative = maybe_sign.is_some();
@@ -971,14 +1018,18 @@ impl<'data> TextBufferView<'data> {
(leading_digits.len() + 1 + trailing_digits_length) as u16
}
};
- let trailing_digits_length = digits_after_dot.len() as u16;
+ let num_trailing_digits = digits_after_dot
+ .bytes()
+ .iter()
+ .filter(|b| b.is_ascii_digit())
+ .count() as u16;
let exponent_digits_offset = (exp_digits.offset() - self.offset()) as u16;
let exponent_digits_length = exp_digits.len() as u16;
MatchedDecimal::new(
is_negative,
digits_offset,
digits_length,
- trailing_digits_length,
+ num_trailing_digits,
exponent_is_negative,
exponent_digits_offset,
exponent_digits_length,
@@ -1009,7 +1060,7 @@ impl<'data> TextBufferView<'data> {
/// Returns a matched buffer and a boolean indicating whether any escaped characters were
/// found in the short string.
pub(crate) fn match_short_string_body(self) -> IonParseResult<'data, (Self, bool)> {
- Self::match_text_until_unescaped(self, b'\"')
+ Self::match_text_until_unescaped(self, b'\"', false)
}
/// Matches a long string comprised of any number of `'''`-enclosed segments interleaved
@@ -1041,7 +1092,11 @@ impl<'data> TextBufferView<'data> {
/// Matches a single long string segment enclosed by `'''` delimiters.
pub fn match_long_string_segment(self) -> IonParseResult<'data, (Self, bool)> {
- delimited(tag("'''"), Self::match_long_string_segment_body, tag("'''"))(self)
+ delimited(
+ complete_tag("'''"),
+ Self::match_long_string_segment_body,
+ complete_tag("'''"),
+ )(self)
}
/// Matches all input up to (but not including) the first unescaped instance of `'''`.
@@ -1079,7 +1134,7 @@ impl<'data> TextBufferView<'data> {
// Note that symbol ID integers:
// * CANNOT have underscores in them. For example: `$1_0` is considered an identifier.
// * CAN have leading zeros. There's precedent for this in ion-java.
- preceded(tag("$"), digit1),
+ preceded(tag("$"), complete_digit1),
// Peek at the next character to make sure it's unrelated to the symbol ID.
// The spec does not offer a formal definition of what ends a symbol ID.
// This checks for either a stop_character (which performs its own `peek()`)
@@ -1103,7 +1158,7 @@ impl<'data> TextBufferView<'data> {
Self::identifier_initial_character,
Self::identifier_trailing_characters,
),
- not(Self::identifier_trailing_character),
+ Self::identifier_terminator,
))(self)?;
// Ion defines a number of keywords that are syntactically indistinguishable from
// identifiers. Keywords take precedence; we must ensure that any identifier we find
@@ -1125,6 +1180,13 @@ impl<'data> TextBufferView<'data> {
Ok((remaining, MatchedSymbol::Identifier))
}
+ fn identifier_terminator(self) -> IonMatchResult<'data> {
+ alt((
+ eof,
+ recognize(peek(not(Self::identifier_trailing_character))),
+ ))(self)
+ }
+
/// Matches any character that can appear at the start of an identifier.
fn identifier_initial_character(self) -> IonParseResult<'data, Self> {
recognize(alt((one_of("$_"), satisfy(|c| c.is_ascii_alphabetic()))))(self)
@@ -1137,7 +1199,7 @@ impl<'data> TextBufferView<'data> {
/// Matches characters that are legal in an identifier, though not necessarily at the beginning.
fn identifier_trailing_characters(self) -> IonParseResult<'data, Self> {
- recognize(many0_count(Self::identifier_trailing_character))(self)
+ complete_take_while(|c: u8| c.is_ascii_alphanumeric() || b"$_".contains(&c))(self)
}
/// Matches a quoted symbol (`'foo'`).
@@ -1156,41 +1218,76 @@ impl<'data> TextBufferView<'data> {
/// Returns a matched buffer and a boolean indicating whether any escaped characters were
/// found in the short string.
fn match_quoted_symbol_body(self) -> IonParseResult<'data, (Self, bool)> {
- Self::match_text_until_unescaped(self, b'\'')
+ Self::match_text_until_unescaped(self, b'\'', false)
}
/// A helper method for matching bytes until the specified delimiter. Ignores any byte
/// (including the delimiter) that is prefaced by the escape character `\`.
- fn match_text_until_unescaped(self, delimiter: u8) -> IonParseResult<'data, (Self, bool)> {
- let mut is_escaped = false;
+ fn match_text_until_unescaped(
+ self,
+ delimiter: u8,
+ allow_unescaped_newlines: bool,
+ ) -> IonParseResult<'data, (Self, bool)> {
let mut contains_escaped_chars = false;
- for (index, byte) in self.bytes().iter().enumerate() {
- if is_escaped {
- // If we're escaped, the previous byte was a \ and we ignore this one.
- is_escaped = false;
- continue;
- }
- if *byte == b'\\' {
- is_escaped = true;
- contains_escaped_chars = true;
- continue;
- }
- if *byte == b'\r' {
- // If the text contains an unescaped carriage return, we may need to normalize it.
- // In some narrow cases, setting this flag to true may result in a sanitization buffer
- // being allocated when it isn't strictly necessary.
+ // This de-sugared syntax allows us to modify `iter` mid-loop.
+ let mut iter = self.bytes().iter().copied().enumerate();
+ while let Some((index, byte)) = iter.next() {
+ if byte == b'\\' {
+ // It's an escape sequence. For the purposes of finding the end delimiter, we can
+ // skip the next 1 byte unless this is \r\n, in which case we need to skip two.
+ // Other escape sequences that are followed by more than one byte (e.g. \u and \U)
+ // are always followed by ASCII letters, which aren't used as delimiters.
contains_escaped_chars = true;
+ // Peek at the next two bytes to see if this is a \r\n
+ let next_two_bytes = self.bytes().get(index + 1..index + 3);
+ let bytes_to_skip = if next_two_bytes == Some(&[b'\r', b'\n']) {
+ 2
+ } else {
+ 1
+ };
+ // Eagerly skip the next iterator values
+ let _ = iter.nth(bytes_to_skip - 1);
continue;
}
- if *byte == delimiter {
+ if byte == delimiter {
let matched = self.slice(0, index);
let remaining = self.slice_to_end(index);
return Ok((remaining, (matched, contains_escaped_chars)));
}
+ // If this is a control character, make sure it's a legal one.
+ if byte < 0x20 {
+ if byte == b'\r' {
+ // Carriage returns are not actual escapes, but do require a substitution
+ // as part of newline normalization when the string is read.
+ contains_escaped_chars = true;
+ } else {
+ self.validate_string_control_character(byte, index, allow_unescaped_newlines)?;
+ }
+ }
}
Err(nom::Err::Incomplete(Needed::Unknown))
}
+ #[cold]
+ fn validate_string_control_character(
+ self,
+ byte: u8,
+ index: usize,
+ allow_unescaped_newlines: bool,
+ ) -> IonParseResult<'data, ()> {
+ if byte == b'\n' && !allow_unescaped_newlines {
+ let error = InvalidInputError::new(self.slice_to_end(index))
+ .with_description("unescaped newlines are not allowed in short string literals");
+ return Err(nom::Err::Failure(IonParseError::Invalid(error)));
+ }
+ if !WHITESPACE_CHARACTERS_AS_STR.as_bytes().contains(&byte) {
+ let error = InvalidInputError::new(self.slice_to_end(index))
+ .with_description("unescaped control characters are not allowed in text literals");
+ return Err(nom::Err::Failure(IonParseError::Invalid(error)));
+ }
+ Ok((self.slice_to_end(1), ()))
+ }
+
/// A helper method for matching bytes until the specified delimiter. Ignores any byte
/// that is prefaced by the escape character `\`.
///
@@ -1211,7 +1308,7 @@ impl<'data> TextBufferView<'data> {
// `match_text_until_escaped` does NOT include the delimiter byte in the match,
// so `remaining_after_match` starts at the delimiter byte.
let (remaining_after_match, (_, segment_contained_escapes)) =
- remaining.match_text_until_unescaped(delimiter_head)?;
+ remaining.match_text_until_unescaped(delimiter_head, true)?;
contained_escapes |= segment_contained_escapes;
remaining = remaining_after_match;
@@ -1249,7 +1346,7 @@ impl<'data> TextBufferView<'data> {
fn match_timestamp_y(self) -> IonParseResult<'data, MatchedTimestamp> {
terminated(
Self::match_timestamp_year,
- pair(tag("T"), Self::peek_stop_character),
+ pair(complete_tag("T"), Self::peek_stop_character),
)
.map(|_year| MatchedTimestamp::new(TimestampPrecision::Year))
.parse(self)
@@ -1259,7 +1356,7 @@ impl<'data> TextBufferView<'data> {
fn match_timestamp_ym(self) -> IonParseResult<'data, MatchedTimestamp> {
terminated(
pair(Self::match_timestamp_year, Self::match_timestamp_month),
- pair(tag("T"), Self::peek_stop_character),
+ pair(complete_tag("T"), Self::peek_stop_character),
)
.map(|(_year, _month)| MatchedTimestamp::new(TimestampPrecision::Month))
.parse(self)
@@ -1273,7 +1370,7 @@ impl<'data> TextBufferView<'data> {
Self::match_timestamp_month,
Self::match_timestamp_day,
)),
- pair(opt(tag("T")), Self::peek_stop_character),
+ pair(opt(complete_tag("T")), Self::peek_stop_character),
)
.map(|_| MatchedTimestamp::new(TimestampPrecision::Day))
.parse(self)
@@ -1344,10 +1441,10 @@ impl<'data> TextBufferView<'data> {
/// Matches the month component of a timestamp, including a leading `-`.
fn match_timestamp_month(self) -> IonMatchResult<'data> {
preceded(
- tag("-"),
+ complete_tag("-"),
recognize(alt((
- pair(char('0'), one_of("123456789")),
- pair(char('1'), one_of("012")),
+ pair(complete_char('0'), complete_one_of("123456789")),
+ pair(complete_char('1'), complete_one_of("012")),
))),
)(self)
}
@@ -1355,11 +1452,11 @@ impl<'data> TextBufferView<'data> {
/// Matches the day component of a timestamp, including a leading `-`.
fn match_timestamp_day(self) -> IonMatchResult<'data> {
preceded(
- tag("-"),
+ complete_tag("-"),
recognize(alt((
- pair(char('0'), one_of("123456789")),
- pair(one_of("12"), Self::match_any_digit),
- pair(char('3'), one_of("01")),
+ pair(complete_char('0'), complete_one_of("123456789")),
+ pair(complete_one_of("12"), Self::match_any_digit),
+ pair(complete_char('3'), complete_one_of("01")),
))),
)(self)
}
@@ -1374,13 +1471,13 @@ impl<'data> TextBufferView<'data> {
separated_pair(
// Hour
recognize(alt((
- pair(one_of("01"), Self::match_any_digit),
- pair(char('2'), one_of("0123")),
+ pair(complete_one_of("01"), Self::match_any_digit),
+ pair(complete_char('2'), complete_one_of("0123")),
))),
// Delimiter
- tag(":"),
+ complete_tag(":"),
// Minutes
- recognize(pair(one_of("012345"), Self::match_any_digit)),
+ recognize(pair(complete_one_of("012345"), Self::match_any_digit)),
),
)(self)
}
@@ -1388,24 +1485,27 @@ impl<'data> TextBufferView<'data> {
/// Matches a leading `:`, and any two-digit second component from `00` to `59` inclusive.
fn match_timestamp_seconds(self) -> IonMatchResult<'data> {
preceded(
- tag(":"),
- recognize(pair(one_of("012345"), Self::match_any_digit)),
+ complete_tag(":"),
+ recognize(pair(complete_one_of("012345"), Self::match_any_digit)),
)(self)
}
/// Matches the fractional seconds component of a timestamp, including a leading `.`.
fn match_timestamp_fractional_seconds(self) -> IonMatchResult<'data> {
- preceded(tag("."), digit1)(self)
+ preceded(complete_tag("."), digit1)(self)
}
/// Matches a timestamp offset of any format.
fn match_timestamp_offset(self) -> IonParseResult<'data, MatchedTimestampOffset> {
alt((
- value(MatchedTimestampOffset::Zulu, tag("Z")),
- value(MatchedTimestampOffset::Zulu, tag("+00:00")),
- value(MatchedTimestampOffset::Unknown, tag("-00:00")),
+ value(MatchedTimestampOffset::Zulu, complete_tag("Z")),
+ value(MatchedTimestampOffset::Zulu, complete_tag("+00:00")),
+ value(MatchedTimestampOffset::Unknown, complete_tag("-00:00")),
map(
- pair(one_of("-+"), Self::match_timestamp_offset_hours_and_minutes),
+ pair(
+ complete_one_of("-+"),
+ Self::match_timestamp_offset_hours_and_minutes,
+ ),
|(sign, (hours, _minutes))| {
let is_negative = sign == '-';
let hours_offset = hours.offset();
@@ -1424,13 +1524,13 @@ impl<'data> TextBufferView<'data> {
separated_pair(
// Hour
recognize(alt((
- pair(one_of("01"), Self::match_any_digit),
- pair(char('2'), one_of("0123")),
+ pair(complete_one_of("01"), Self::match_any_digit),
+ pair(complete_char('2'), complete_one_of("0123")),
))),
// Delimiter
- tag(":"),
+ complete_tag(":"),
// Minutes
- recognize(pair(one_of("012345"), Self::match_any_digit)),
+ recognize(pair(complete_one_of("012345"), Self::match_any_digit)),
)(self)
}
@@ -1489,7 +1589,7 @@ impl<'data> TextBufferView<'data> {
fn validate_clob_text(self) -> IonMatchResult<'data> {
for byte in self.bytes().iter().copied() {
if !Self::byte_is_legal_clob_ascii(byte) {
- let message = format!("found an illegal byte '{:0x}'in clob", byte);
+ let message = format!("found an illegal byte '{:0x}' in clob", byte);
let error = InvalidInputError::new(self).with_description(message);
return Err(nom::Err::Failure(IonParseError::Invalid(error)));
}
@@ -1759,7 +1859,7 @@ mod tests {
/// contents of the input are considered a complete token.
fn new(input: &str) -> Self {
MatchTest {
- input: format!("{input}\n0"), // add whitespace and a trailing value
+ input: input.to_string(),
}
}
@@ -1776,11 +1876,12 @@ mod tests {
P: Parser, O, IonParseError<'data>>,
{
let result = self.try_match(parser);
- let (_remaining, match_length) = result.unwrap();
+ let (_remaining, match_length) = result
+ .unwrap_or_else(|_| panic!("Unexpected parse fail for input '{}'", self.input));
// Inputs have a trailing newline and `0` that should _not_ be part of the match
assert_eq!(
match_length,
- self.input.len() - 2,
+ self.input.len(),
"\nInput: '{}'\nMatched: '{}'\n",
self.input,
&self.input[..match_length]
@@ -1797,8 +1898,8 @@ mod tests {
if let Ok((_remaining, match_length)) = result {
assert_ne!(
match_length,
- self.input.len() - 1,
- "parser unexpectedly matched the complete input: '{:?}\nResult: {:?}",
+ self.input.len(),
+ "parser unexpectedly matched the complete input: {:?}\nResult: {:?}",
self.input,
result
);
@@ -1809,6 +1910,7 @@ mod tests {
#[test]
fn test_match_stop_char() {
MatchTest::new(" ").expect_match(match_length(TextBufferView::match_stop_character));
+ MatchTest::new("").expect_match(match_length(TextBufferView::match_stop_character));
}
#[test]
@@ -1961,7 +2063,7 @@ mod tests {
let good_inputs = &[
"0.0e0", "0E0", "0e0", "305e1", "305e+1", "305e-1", "305e100", "305e-100", "305e+100",
- "305.0e1", "0.279e3", "279e0", "279.5e0", "279.5E0",
+ "305.0e1", "0.279e3", "0.279e-3", "279e0", "279.5e0", "279.5E0",
];
for input in good_inputs {
match_float(input);
@@ -2147,7 +2249,7 @@ mod tests {
match_annotated_value(input);
}
- let bad_inputs = &["foo", "foo:bar", "foo:::bar"];
+ let bad_inputs = &["foo::", "foo:bar", "foo:::bar"];
for input in bad_inputs {
mismatch_annotated_value(input);
}
@@ -2169,7 +2271,8 @@ mod tests {
}
let bad_inputs = &[
- "5", "5d", "05d", "-5d", "5.d", "-5.d", "5.D", "-5.D", "-5.0+0",
+ "123._456", "5", "5d", "05d", "-5d", "5.d", "-5.d", "5.D", "-5.D", "5.1d", "-5.1d",
+ "5.1D", "-5.1D", "-5.0+0",
];
for input in bad_inputs {
mismatch_decimal(input);
diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs
index 970c0aa7..066d4ab0 100644
--- a/src/lazy/text/encoded_value.rs
+++ b/src/lazy/text/encoded_value.rs
@@ -1,5 +1,5 @@
use crate::lazy::text::buffer::TextBufferView;
-use crate::lazy::text::matched::{MatchedSymbol, MatchedValue};
+use crate::lazy::text::matched::{MatchedFieldName, MatchedValue};
use crate::result::IonFailure;
use crate::{IonResult, IonType, RawSymbolTokenRef};
use std::ops::Range;
@@ -63,7 +63,7 @@ pub(crate) struct EncodedTextValue {
// recognized during matching and partial information like subfield offsets can be stored here.
matched_value: MatchedValue,
- field_name_syntax: Option,
+ field_name_syntax: Option,
}
impl EncodedTextValue {
@@ -93,7 +93,7 @@ impl EncodedTextValue {
// $10
pub(crate) fn with_field_name(
mut self,
- field_name_syntax: MatchedSymbol,
+ field_name_syntax: MatchedFieldName,
offset: usize,
length: usize,
) -> EncodedTextValue {
@@ -203,7 +203,7 @@ impl EncodedTextValue {
self.data_length + u32::max(self.annotations_offset, self.field_name_offset) as usize
}
- pub fn field_name_syntax(&self) -> Option {
+ pub fn field_name_syntax(&self) -> Option {
self.field_name_syntax
}
@@ -215,6 +215,7 @@ impl EncodedTextValue {
#[cfg(test)]
mod tests {
use super::*;
+ use crate::lazy::text::matched::MatchedSymbol;
#[test]
fn total_length_data_only() {
@@ -225,7 +226,7 @@ mod tests {
#[test]
fn total_length_data_with_field_name() {
let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
- .with_field_name(MatchedSymbol::Identifier, 90, 4);
+ .with_field_name(MatchedFieldName::Symbol(MatchedSymbol::Identifier), 90, 4);
assert_eq!(value.total_length(), 22);
}
@@ -239,13 +240,13 @@ mod tests {
#[test]
fn total_length_data_with_field_name_and_annotations() {
let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
- .with_field_name(MatchedSymbol::Identifier, 90, 4)
+ .with_field_name(MatchedFieldName::Symbol(MatchedSymbol::Identifier), 90, 4)
.with_annotations_sequence(94, 6);
assert_eq!(value.total_length(), 22);
// Same test but with extra whitespace between the components
let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
- .with_field_name(MatchedSymbol::Identifier, 80, 4)
+ .with_field_name(MatchedFieldName::Symbol(MatchedSymbol::Identifier), 80, 4)
.with_annotations_sequence(91, 6);
assert_eq!(value.total_length(), 32, "{:?}", value);
}
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index abd7c233..7c90df62 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -23,9 +23,11 @@ use std::borrow::Cow;
use std::num::IntErrorKind;
use std::str::FromStr;
+use nom::branch::alt;
+use nom::bytes::streaming::tag;
use nom::character::is_hex_digit;
use nom::sequence::preceded;
-use nom::AsChar;
+use nom::{AsChar, Parser};
use num_bigint::{BigInt, BigUint};
use num_traits::Num;
use smallvec::SmallVec;
@@ -61,6 +63,26 @@ pub(crate) enum MatchedValue {
Struct,
}
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub(crate) enum MatchedFieldName {
+ Symbol(MatchedSymbol),
+ String(MatchedString),
+}
+
+impl MatchedFieldName {
+ pub fn read<'data>(
+ &self,
+ matched_input: TextBufferView<'data>,
+ ) -> IonResult> {
+ match self {
+ MatchedFieldName::Symbol(matched_symbol) => matched_symbol.read(matched_input),
+ MatchedFieldName::String(matched_string) => {
+ matched_string.read(matched_input).map(|s| s.into())
+ }
+ }
+ }
+}
+
/// A partially parsed Ion int.
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct MatchedInt {
@@ -128,7 +150,11 @@ impl MatchedInt {
}
};
- Ok(int)
+ if self.is_negative {
+ Ok(-int)
+ } else {
+ Ok(int)
+ }
}
}
@@ -178,7 +204,7 @@ pub(crate) struct MatchedDecimal {
is_negative: bool,
digits_offset: u16,
digits_length: u16,
- trailing_digits_length: u16,
+ num_trailing_digits: u16,
exponent_is_negative: bool,
exponent_digits_offset: u16,
exponent_digits_length: u16,
@@ -192,7 +218,7 @@ impl MatchedDecimal {
is_negative: bool,
digits_offset: u16,
digits_length: u16,
- trailing_digits_length: u16,
+ num_trailing_digits: u16,
exponent_is_negative: bool,
exponent_offset: u16,
exponent_length: u16,
@@ -201,7 +227,7 @@ impl MatchedDecimal {
is_negative,
digits_offset,
digits_length,
- trailing_digits_length,
+ num_trailing_digits,
exponent_is_negative,
exponent_digits_offset: exponent_offset,
exponent_digits_length: exponent_length,
@@ -273,7 +299,7 @@ impl MatchedDecimal {
}
};
- exponent -= self.trailing_digits_length as i64;
+ exponent -= self.num_trailing_digits as i64;
Ok(Decimal::new(coefficient, exponent))
}
@@ -365,7 +391,7 @@ impl MatchedString {
// This is the same parser that matched the input initially, which means that the only
// reason it wouldn't succeed here is if the input is empty, meaning we're done reading.
while let Ok((remaining_after_match, (segment_body, _has_escapes))) = preceded(
- TextBufferView::match_optional_whitespace,
+ TextBufferView::match_optional_comments_and_whitespace,
TextBufferView::match_long_string_segment,
)(remaining)
{
@@ -519,25 +545,36 @@ fn decode_escape_into_bytes<'data>(
b'b' => 0x08u8, // backspace
b'v' => 0x0Bu8, // vertical tab
b'f' => 0x0Cu8, // form feed
- // If the byte following the '\' is a real newline (that is: 0x0A), we discard it.
- b'\n' => return Ok(input_after_escape),
+ // If the bytes following the '\' are an unescaped CR/LF, discard both.
+ b'\r' if input_after_escape.bytes().first() == Some(&b'\n') => {
+ return Ok(input_after_escape.slice_to_end(1))
+ }
+ // If the next byte is a CR or LF, discard it.
+ b'\r' | b'\n' => return Ok(input_after_escape),
// These cases require more sophisticated parsing, not just a 1-to-1 mapping of bytes
- b'x' => return decode_hex_digits_escape(2, input_after_escape, sanitized),
- // Clobs represent text of some encoding, but it may or may not be a flavor of Unicode.
- // As such, clob syntax does not support Unicode escape sequences like `\u` or `\U`.
- b'u' if support_unicode_escapes => {
- return decode_hex_digits_escape(4, input_after_escape, sanitized)
+ b'x' => {
+ return decode_hex_digits_escape(
+ 2,
+ input_after_escape,
+ sanitized,
+ support_unicode_escapes,
+ )
}
- b'U' if support_unicode_escapes => {
- return decode_hex_digits_escape(8, input_after_escape, sanitized)
+ b'u' => {
+ return decode_hex_digits_escape(
+ 4,
+ input_after_escape,
+ sanitized,
+ support_unicode_escapes,
+ )
}
- b'u' | b'U' => {
- return Err(IonError::Decoding(
- DecodingError::new(
- "Unicode escape sequences (\\u, \\U) are not legal in this context",
- )
- .with_position(input.offset()),
- ))
+ b'U' => {
+ return decode_hex_digits_escape(
+ 8,
+ input_after_escape,
+ sanitized,
+ support_unicode_escapes,
+ )
}
_ => {
return Err(IonError::Decoding(
@@ -557,6 +594,7 @@ fn decode_hex_digits_escape<'data>(
num_digits: usize,
input: TextBufferView<'data>,
sanitized: &mut Vec,
+ support_unicode_escapes: bool,
) -> IonResult> {
if input.len() < num_digits {
return Err(IonError::Decoding(
@@ -569,6 +607,15 @@ fn decode_hex_digits_escape<'data>(
));
}
+ // Clobs represent text of some encoding, but it may or may not be a flavor of Unicode.
+ // As such, clob syntax does not support Unicode escape sequences like `\u` or `\U`.
+ if num_digits != 2 && !support_unicode_escapes {
+ return Err(IonError::Decoding(
+ DecodingError::new("Unicode escape sequences (\\u, \\U) are not legal in this context")
+ .with_position(input.offset()),
+ ));
+ }
+
let hex_digit_bytes = &input.bytes()[..num_digits];
let all_are_hex_digits = hex_digit_bytes
@@ -588,38 +635,28 @@ fn decode_hex_digits_escape<'data>(
// Isolate the portion of the input that follows the hex digits so we can return it.
let remaining_input = input.slice_to_end(num_digits);
- // We just confirmed all of the digits are ASCII hex digits, so these steps cannot fail.
- // We can unwrap() in each case.
+ // We just confirmed all of the digits are ASCII hex digits, so this step cannot fail.
let hex_digits = std::str::from_utf8(hex_digit_bytes).unwrap();
- // If this was a '\x' escape, we cannot interpret the hex digits as a Unicode scalar. We treat
- // it as a byte literal instead.
- if num_digits == 2 {
- let byte = u8::from_str_radix(hex_digits, 16).unwrap();
- sanitized.push(byte);
+
+ if !support_unicode_escapes {
+ // Inside a clob, \x is a byte literal, not a Unicode code point.
+ let byte_literal = u8::from_str_radix(hex_digits, 16).unwrap();
+ sanitized.push(byte_literal);
return Ok(remaining_input);
}
- // From here on, we know that the escape was either `\u` or `\U`--a Unicode scalar.
- // Note that this means we are not processing a clob (which doesn't support Unicode) and can
- // further infer that we are working with UTF-8, the only supported encoding for strings/symbols.
let code_point = u32::from_str_radix(hex_digits, 16).unwrap();
// Check to see if this is a high surrogate; if it is, our code point isn't complete. Another
// unicode escape representing the low surrogate has to be next in the input to complete it.
- // See the docs for this helper function for details. (Note: this will only ever be true for
- // 4- and 8-digit escape sequences. `\x` escapes don't have enough digits to represent a
- // high surrogate.)
+ // See the docs for the `code_point_is_a_high_surrogate` helper function for details.
+ // (Note: this will only ever be true for 4- and 8-digit escape sequences. `\x` escapes don't
+ // have enough digits to represent a high surrogate.)
if code_point_is_a_high_surrogate(code_point) {
// The spec has MAY-style language around supporting high surrogates. Supporting them is
- // allowed but discouraged. For the time being, we will return an error. Other implementations
- // (notably ion-java) support high surrogates largely for resilience/debugging. We can consider
- // adding that support if there is demand for it.
- return Err(IonError::Decoding(
- DecodingError::new(
- "found a Unicode high surrogate; UTF-16 is not legal in Ion strings/symbols",
- )
- .with_position(input.offset()),
- ));
+ // allowed but discouraged. The ion-tests spec conformance tests include cases with UTF-16
+ // surrogates, so ion-rust supports them.
+ return complete_surrogate_pair(sanitized, code_point, remaining_input);
}
// A Rust `char` can represent any Unicode scalar value--a code point that is not part of a
@@ -634,6 +671,57 @@ fn decode_hex_digits_escape<'data>(
Ok(remaining_input)
}
+/// Reads another escaped code point from the buffer, treating it as the low surrogate to be paired
+/// with the specified high surrogate. Appends the UTF-8 encoding of the resulting Unicode scalar
+/// to `sanitized` and returns the remaining text in the buffer.
+fn complete_surrogate_pair<'data>(
+ sanitized: &mut Vec,
+ high_surrogate: u32,
+ input: TextBufferView<'data>,
+) -> IonResult> {
+ let mut match_next_codepoint = preceded(
+ tag("\\"),
+ alt((
+ preceded(tag("x"), TextBufferView::match_n_hex_digits(2)),
+ preceded(tag("u"), TextBufferView::match_n_hex_digits(4)),
+ preceded(tag("U"), TextBufferView::match_n_hex_digits(8)),
+ )),
+ );
+ let (remaining, hex_digits) = match match_next_codepoint.parse(input) {
+ Ok((remaining, hex_digits)) => (remaining, hex_digits),
+ Err(_) => {
+ return {
+ let error =
+ DecodingError::new("found a high surrogate not followed by a low surrogate")
+ .with_position(input.offset());
+ Err(IonError::Decoding(error))
+ }
+ }
+ };
+ let high_surrogate = high_surrogate as u16;
+
+ let hex_digits = std::str::from_utf8(hex_digits.bytes()).unwrap();
+ let low_surrogate = u16::from_str_radix(hex_digits, 16).map_err(|_| {
+ let error =
+ DecodingError::new("low surrogate did not fit in a u16").with_position(input.offset());
+ IonError::Decoding(error)
+ })?;
+
+ let character = char::decode_utf16([high_surrogate, low_surrogate])
+ .next()
+ .unwrap()
+ .map_err(|_| {
+ let error = DecodingError::new("encountered invalid surrogate pair")
+ .with_position(input.offset());
+ IonError::Decoding(error)
+ })?;
+
+ let utf8_buffer: &mut [u8; 4] = &mut [0; 4];
+ let utf8_encoded = character.encode_utf8(utf8_buffer);
+ sanitized.extend_from_slice(utf8_encoded.as_bytes());
+ Ok(remaining)
+}
+
/// Returns `true` if the provided code point is a utf-16 high surrogate.
///
/// Terse primer: Unicode text is made up of a stream of unsigned integers called 'code points'.
@@ -1050,9 +1138,48 @@ impl MatchedClob {
#[cfg(test)]
mod tests {
+ use std::str::FromStr;
+
+ use num_bigint::BigInt;
+
use crate::lazy::bytes_ref::BytesRef;
use crate::lazy::text::buffer::TextBufferView;
- use crate::{Decimal, IonResult, Timestamp};
+ use crate::{Decimal, Int, IonResult, Timestamp};
+
+ #[test]
+ fn read_ints() -> IonResult<()> {
+ fn expect_int(data: &str, expected: impl Into) {
+ let expected: Int = expected.into();
+ let buffer = TextBufferView::new(data.as_bytes());
+ let (_remaining, matched) = buffer.match_int().unwrap();
+ let actual = matched.read(buffer).unwrap();
+ assert_eq!(
+ actual, expected,
+ "Actual didn't match expected for input '{}'.\n{:?}\n!=\n{:?}",
+ data, actual, expected
+ );
+ }
+
+ let tests = [
+ ("-5", Int::from(-5)),
+ ("0", Int::from(0)),
+ (
+ "1234567890_1234567890_1234567890_1234567890",
+ Int::from(BigInt::from_str("1234567890_1234567890_1234567890_1234567890").unwrap()),
+ ),
+ (
+ "-1234567890_1234567890_1234567890_1234567890",
+ Int::from(
+ BigInt::from_str("-1234567890_1234567890_1234567890_1234567890").unwrap(),
+ ),
+ ),
+ ];
+
+ for (input, expected) in tests {
+ expect_int(input, expected);
+ }
+ Ok(())
+ }
#[test]
fn read_timestamps() -> IonResult<()> {
@@ -1159,10 +1286,21 @@ mod tests {
#[test]
fn read_decimals() -> IonResult<()> {
fn expect_decimal(data: &str, expected: Decimal) {
- let data = format!("{data} "); // Append a space
let buffer = TextBufferView::new(data.as_bytes());
- let (_remaining, matched) = buffer.match_decimal().unwrap();
- let actual = matched.read(buffer).unwrap();
+ let result = buffer.match_decimal();
+ assert!(
+ result.is_ok(),
+ "Unexpected match error for input: '{data}': {:?}",
+ result
+ );
+ let (_remaining, matched) = buffer.match_decimal().expect("match decimal");
+ let result = matched.read(buffer);
+ assert!(
+ result.is_ok(),
+ "Unexpected read error for input '{data}': {:?}",
+ result
+ );
+ let actual = result.unwrap();
assert_eq!(
actual, expected,
"Actual didn't match expected for input '{}'.\n{:?}\n!=\n{:?}",
@@ -1179,8 +1317,6 @@ mod tests {
("-5.d0", Decimal::new(-5, 0)),
("5.0", Decimal::new(50, -1)),
("-5.0", Decimal::new(-50, -1)),
- ("5.0d", Decimal::new(50, -1)),
- ("-5.0d", Decimal::new(-50, -1)),
("500d0", Decimal::new(5, 2)),
("-500d0", Decimal::new(-5, 2)),
("0.005", Decimal::new(5, -3)),
@@ -1259,14 +1395,11 @@ mod tests {
(r"'''he''' '''llo'''", "hello"),
(r#""😎🙂🙃""#, "😎🙂🙃"),
(r"'''😎🙂''' '''🙃'''", "😎🙂🙃"),
- // The below bytes are the UTF-8 encoding of Unicode code points: U+2764 U+FE0F
- (r#""\xe2\x9d\xa4\xef\xb8\x8f""#, "❤️"),
- (r"'''\xe2\x9d\xa4\xef\xb8\x8f'''", "❤️"),
(r"'''\u2764\uFE0F'''", "❤️"),
(r"'''\U00002764\U0000FE0F'''", "❤️"),
- // In short strings, unescaped newlines are not normalized.
- ("\"foo\rbar\r\nbaz\"", "foo\rbar\r\nbaz"),
- // In long-form strings, unescaped newlines converted to `\n`.
+ // In short strings, carriage returns are not normalized.
+ ("\"foo\rbar\rbaz\"", "foo\rbar\rbaz"),
+ // In long-form strings, all unescaped newlines are converted to `\n`.
("'''foo\rbar\r\nbaz'''", "foo\nbar\nbaz"),
];
@@ -1297,7 +1430,13 @@ mod tests {
}
fn expect_clob(data: &str, expected: &str) {
- let actual = read_clob(data).unwrap();
+ let result = read_clob(data);
+ assert!(
+ result.is_ok(),
+ "Unexpected read failure for input '{data}': {:?}",
+ result
+ );
+ let actual = result.unwrap();
assert_eq!(
actual,
expected.as_ref(),
@@ -1319,7 +1458,8 @@ mod tests {
(r#"{{"hello"}}"#, "hello"),
(r#"{{"\x4D"}}"#, "M"),
(r#"{{"\x4d \x4d \x4d"}}"#, "M M M"),
- // The below bytes are the UTF-8 encoding of Unicode code points: U+2764 U+FE0F
+ (r"{{'''\x4d''' '''\x4d''' '''\x4d'''}}", "MMM"),
+ // The below byte literals are the UTF-8 encoding of Unicode code points: U+2764 U+FE0F
(r#"{{"\xe2\x9d\xa4\xef\xb8\x8f"}}"#, "❤️"),
(r#"{{'''hel''' '''lo'''}}"#, "hello"),
(
@@ -1333,8 +1473,8 @@ mod tests {
),
// In a long-form clob, unescaped `\r` and `\r\n` are normalized into unescaped `\n`
("{{'''foo\rbar\r\nbaz'''}}", "foo\nbar\nbaz"),
- // In a short-form clob, newlines are not normalized.
- ("{{\"foo\rbar\r\nbaz\"}}", "foo\rbar\r\nbaz"),
+ // In a short-form clob, carriage returns are not normalized.
+ ("{{\"foo\rbar\rbaz\"}}", "foo\rbar\rbaz"),
];
for (input, expected) in tests {
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index 5e6254e6..b8d497df 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -55,6 +55,14 @@ impl<'data> LazyRawTextReader<'data> {
let (remaining, matched) = buffer_after_whitespace
.match_top_level_item()
.with_context("reading a top-level value", buffer_after_whitespace)?;
+
+ if let RawStreamItem::VersionMarker(major, minor) = matched {
+ if (major, minor) != (1, 0) {
+ return IonResult::decoding_error(format!(
+ "Ion version {major}.{minor} is not supported"
+ ));
+ }
+ }
// Since we successfully matched the next value, we'll update the buffer
// so a future call to `next()` will resume parsing the remaining input.
self.buffer = remaining;
@@ -191,8 +199,9 @@ mod tests {
// Second item
2 /*comment before comma*/,
// Third item
- 3
+ 3, // Final trailing comma
]
+
// S-Expression
(
diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs
index aee5d58e..3a0269ef 100644
--- a/src/lazy/text/raw/sequence.rs
+++ b/src/lazy/text/raw/sequence.rs
@@ -26,8 +26,9 @@ impl<'data> LazyRawTextList<'data> {
}
pub fn iter(&self) -> RawTextListIterator<'data> {
+ let open_bracket_index = self.value.encoded_value.data_offset() - self.value.input.offset();
// Make an iterator over the input bytes that follow the initial `[`
- RawTextListIterator::new(self.value.input.slice_to_end(1))
+ RawTextListIterator::new(self.value.input.slice_to_end(open_bracket_index + 1))
}
}
@@ -114,9 +115,17 @@ impl<'data> RawTextListIterator<'data> {
// ...or there aren't values, so it's just the input after the opening delimiter.
self.input
};
- let (input_after_ws, _ws) = input_after_last
- .match_optional_comments_and_whitespace()
- .with_context("seeking the end of a list", input_after_last)?;
+ let (mut input_after_ws, _ws) =
+ input_after_last
+ .match_optional_comments_and_whitespace()
+ .with_context("seeking the end of a list", input_after_last)?;
+ // Skip an optional comma and more whitespace
+ if input_after_ws.bytes().first() == Some(&b',') {
+ (input_after_ws, _) = input_after_ws
+ .slice_to_end(1)
+ .match_optional_comments_and_whitespace()
+ .with_context("skipping a list's trailing comma", input_after_ws)?;
+ }
let (input_after_end, _end_delimiter) = satisfy(|c| c == ']')(input_after_ws)
.with_context("seeking the closing delimiter of a list", input_after_ws)?;
let end = input_after_end.offset();
@@ -136,7 +145,10 @@ impl<'data> Iterator for RawTextListIterator<'data> {
self.input = remaining;
Some(Ok(value))
}
- Ok((_remaining, None)) => None,
+ Ok((_remaining, None)) => {
+ // Don't update `remaining` so subsequent calls will continue to return None
+ None
+ }
Err(e) => {
self.has_returned_error = true;
e.with_context("reading the next list value", self.input)
@@ -159,8 +171,9 @@ impl<'data> LazyRawTextSExp<'data> {
}
pub fn iter(&self) -> RawTextSExpIterator<'data> {
+ let open_paren_index = self.value.encoded_value.data_offset() - self.value.input.offset();
// Make an iterator over the input bytes that follow the initial `(`
- RawTextSExpIterator::new(self.value.input.slice_to_end(1))
+ RawTextSExpIterator::new(self.value.input.slice_to_end(open_paren_index + 1))
}
}
diff --git a/src/lazy/text/raw/struct.rs b/src/lazy/text/raw/struct.rs
index 44ddf14b..f73030ef 100644
--- a/src/lazy/text/raw/struct.rs
+++ b/src/lazy/text/raw/struct.rs
@@ -39,9 +39,17 @@ impl<'data> RawTextStructIterator<'data> {
// ...or there aren't fields, so it's just the input after the opening delimiter.
self.input
};
- let (input_after_ws, _ws) = input_after_last
- .match_optional_comments_and_whitespace()
- .with_context("seeking the end of a struct", input_after_last)?;
+ let (mut input_after_ws, _ws) =
+ input_after_last
+ .match_optional_comments_and_whitespace()
+ .with_context("seeking the end of a struct", input_after_last)?;
+ // Skip an optional comma and more whitespace
+ if input_after_ws.bytes().first() == Some(&b',') {
+ (input_after_ws, _) = input_after_ws
+ .slice_to_end(1)
+ .match_optional_comments_and_whitespace()
+ .with_context("skipping a list's trailing comma", input_after_ws)?;
+ }
let (input_after_end, _end_delimiter) = satisfy(|c| c == b'}' as char)(input_after_ws)
.with_context("seeking the closing delimiter of a struct", input_after_ws)?;
let end = input_after_end.offset();
@@ -173,8 +181,9 @@ impl<'data> LazyRawStruct<'data, TextEncoding> for LazyRawTextStruct<'data> {
}
fn iter(&self) -> Self::Iterator {
+ let open_brace_index = self.value.encoded_value.data_offset() - self.value.input.offset();
// Slice the input to skip the opening `{`
- RawTextStructIterator::new(self.value.input.slice_to_end(1))
+ RawTextStructIterator::new(self.value.input.slice_to_end(open_brace_index + 1))
}
}
diff --git a/src/lazy/value.rs b/src/lazy/value.rs
index 26ed7ba5..7898bd1b 100644
--- a/src/lazy/value.rs
+++ b/src/lazy/value.rs
@@ -1,3 +1,5 @@
+use std::borrow::Cow;
+
use crate::lazy::decoder::{LazyDecoder, LazyRawValue};
use crate::lazy::encoding::BinaryEncoding;
use crate::lazy::r#struct::LazyStruct;
@@ -9,7 +11,6 @@ use crate::{
Annotations, Element, IntoAnnotatedElement, IonError, IonResult, IonType, RawSymbolTokenRef,
SymbolRef, SymbolTable, Value,
};
-use std::borrow::Cow;
/// A value in a binary Ion stream whose header has been parsed but whose body (i.e. its data) has
/// not. A `LazyValue` is immutable; its data can be read any number of times.
@@ -359,12 +360,13 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom IonResult<()> {
diff --git a/src/lazy/value_ref.rs b/src/lazy/value_ref.rs
index 2c47907d..5b6c3eb0 100644
--- a/src/lazy/value_ref.rs
+++ b/src/lazy/value_ref.rs
@@ -5,7 +5,7 @@ use crate::lazy::r#struct::LazyStruct;
use crate::lazy::sequence::{LazyList, LazySExp};
use crate::lazy::str_ref::StrRef;
use crate::result::IonFailure;
-use crate::{Decimal, Int, IonError, IonResult, IonType, SymbolRef, Timestamp};
+use crate::{Decimal, Element, Int, IonError, IonResult, IonType, SymbolRef, Timestamp};
use std::fmt::{Debug, Formatter};
/// A [ValueRef] represents a value that has been read from the input stream. Scalar variants contain
@@ -97,6 +97,15 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for V
}
}
+impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element {
+ type Error = IonError;
+
+ fn try_from(value_ref: ValueRef<'top, 'data, D>) -> Result {
+ let value: Value = value_ref.try_into()?;
+ Ok(value.into())
+ }
+}
+
impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> {
pub fn expect_null(self) -> IonResult {
if let ValueRef::Null(ion_type) = self {
@@ -166,7 +175,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> {
if let ValueRef::Symbol(s) = self {
Ok(s)
} else {
- IonResult::decoding_error("expected a symbol")
+ IonResult::decoding_error(format!("expected a symbol, found {:?}", self))
}
}
diff --git a/src/raw_symbol_token_ref.rs b/src/raw_symbol_token_ref.rs
index 961bab97..67b54ffe 100644
--- a/src/raw_symbol_token_ref.rs
+++ b/src/raw_symbol_token_ref.rs
@@ -9,6 +9,18 @@ pub enum RawSymbolTokenRef<'a> {
Text(Cow<'a, str>),
}
+impl<'a> RawSymbolTokenRef<'a> {
+ /// Returns `true` if this token matches either the specified symbol ID or text value.
+ /// This is useful for comparing tokens that represent system symbol values of an unknown
+ /// encoding.
+ pub fn matches_sid_or_text(&self, symbol_id: SymbolId, symbol_text: &str) -> bool {
+ match self {
+ RawSymbolTokenRef::SymbolId(sid) => symbol_id == *sid,
+ RawSymbolTokenRef::Text(text) => symbol_text == text,
+ }
+ }
+}
+
/// Implemented by types that can be viewed as a [RawSymbolTokenRef] without allocations.
pub trait AsRawSymbolTokenRef {
fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef;
diff --git a/src/text/parsers/clob.rs b/src/text/parsers/clob.rs
index 860d8d5b..3d322913 100644
--- a/src/text/parsers/clob.rs
+++ b/src/text/parsers/clob.rs
@@ -208,7 +208,7 @@ mod clob_parsing_tests {
// parse tests for long clob
parse_equals("{{'''Hello''' '''world'''}}", "Helloworld");
parse_equals("{{'''Hello world'''}}", "Hello world");
- parse_equals("{{'''\\xe2\\x9d\\xa4\\xef\\xb8\\x8f\'''}}", "❤️");
+ parse_equals(r"{{'''\xe2\x9d\xa4\xef\xb8\x8f'''}}", "❤️");
// Clobs represent text of some encoding, but it may or may not be a flavor of Unicode.
// As such, clob syntax does not support Unicode escape sequences like `\u` or `\U`.
diff --git a/tests/element_test_vectors.rs b/tests/element_test_vectors.rs
index ca4c3de8..0be6332e 100644
--- a/tests/element_test_vectors.rs
+++ b/tests/element_test_vectors.rs
@@ -693,3 +693,75 @@ mod token_native_element_tests {
non_equivs(TokenNativeElementApi, file_name)
}
}
+
+#[cfg(test)]
+mod lazy_element_tests {
+ use super::*;
+ use ion_rs::lazy::reader::LazyReader;
+
+ struct LazyReaderElementApi;
+
+ impl ElementApi for LazyReaderElementApi {
+ type ElementReader<'a> = LazyReader<'a>;
+
+ fn make_reader(data: &[u8]) -> IonResult> {
+ Ok(LazyReader::new(data))
+ }
+
+ fn global_skip_list() -> SkipList {
+ ELEMENT_GLOBAL_SKIP_LIST
+ }
+
+ fn read_one_equivs_skip_list() -> SkipList {
+ &[]
+ }
+
+ fn round_trip_skip_list() -> SkipList {
+ ELEMENT_ROUND_TRIP_SKIP_LIST
+ }
+
+ fn equivs_skip_list() -> SkipList {
+ ELEMENT_EQUIVS_SKIP_LIST
+ }
+
+ fn non_equivs_skip_list() -> SkipList {
+ &[]
+ }
+ }
+
+ good_round_trip! {
+ use LazyReaderElementApi;
+ fn binary_compact(Format::Binary, Format::Text(TextKind::Compact));
+ fn binary_lines(Format::Binary, Format::Text(TextKind::Lines));
+ fn binary_pretty(Format::Binary, Format::Text(TextKind::Pretty));
+ fn compact_binary(Format::Text(TextKind::Compact), Format::Binary);
+ fn compact_lines(Format::Text(TextKind::Compact), Format::Text(TextKind::Lines));
+ fn compact_pretty(Format::Text(TextKind::Compact), Format::Text(TextKind::Pretty));
+ fn lines_binary(Format::Text(TextKind::Lines), Format::Binary);
+ fn lines_compact(Format::Text(TextKind::Lines), Format::Text(TextKind::Compact));
+ fn lines_pretty(Format::Text(TextKind::Lines), Format::Text(TextKind::Pretty));
+ fn pretty_binary(Format::Text(TextKind::Pretty), Format::Binary);
+ fn pretty_compact(Format::Text(TextKind::Pretty), Format::Text(TextKind::Compact));
+ fn pretty_lines(Format::Text(TextKind::Pretty), Format::Text(TextKind::Lines));
+ }
+
+ #[test_resources("ion-tests/iontestdata/bad/**/*.ion")]
+ #[test_resources("ion-tests/iontestdata/bad/**/*.10n")]
+ fn lazy_bad(file_name: &str) {
+ bad(LazyReaderElementApi, file_name)
+ }
+
+ #[test_resources("ion-tests/iontestdata/good/equivs/**/*.ion")]
+ #[test_resources("ion-tests/iontestdata/good/equivs/**/*.10n")]
+ fn lazy_equivs(file_name: &str) {
+ equivs(LazyReaderElementApi, file_name)
+ }
+
+ #[test_resources("ion-tests/iontestdata/good/non-equivs/**/*.ion")]
+ // no binary files exist and the macro doesn't like empty globs...
+ // see frehberg/test-generator#12
+ //#[test_resources("ion-tests/iontestdata/good/non-equivs/**/*.10n")]
+ fn lazy_non_equivs(file_name: &str) {
+ non_equivs(LazyReaderElementApi, file_name)
+ }
+}