-
Notifications
You must be signed in to change notification settings - Fork 36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Initial raw lazy text reader (top-level nulls, bools, ints) #609
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This file was moved to the parent directory, not deleted. It appears again later in the diff. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This file was moved to its parent directory, not deleted. I'll call out its new location when it appears later in the diff. |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
pub mod annotations_iterator; | ||
pub mod lazy_raw_sequence; | ||
pub mod reader; | ||
pub mod sequence; | ||
pub mod r#struct; | ||
pub mod value; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
use crate::binary::int::DecodedInt; | ||
use crate::binary::uint::DecodedUInt; | ||
use crate::lazy::binary::encoded_value::EncodedValue; | ||
use crate::lazy::binary::encoding::BinaryEncoding; | ||
use crate::lazy::binary::immutable_buffer::ImmutableBuffer; | ||
use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; | ||
use crate::lazy::binary::raw::lazy_raw_sequence::LazyRawBinarySequence; | ||
use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct; | ||
use crate::lazy::binary::raw::sequence::LazyRawBinarySequence; | ||
use crate::lazy::decoder::private::LazyRawValuePrivate; | ||
use crate::lazy::decoder::LazyRawValue; | ||
use crate::lazy::encoding::BinaryEncoding; | ||
use crate::lazy::raw_value_ref::RawValueRef; | ||
use crate::result::IonFailure; | ||
use crate::types::SymbolId; | ||
|
@@ -35,7 +35,7 @@ impl<'a> Debug for LazyRawBinaryValue<'a> { | |
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
write!( | ||
f, | ||
"LazyRawValue {{\n val={:?},\n buf={:?}\n}}\n", | ||
"LazyRawBinaryValue {{\n val={:?},\n buf={:?}\n}}\n", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ |
||
self.encoded_value, self.input | ||
) | ||
} | ||
|
@@ -54,6 +54,10 @@ impl<'data> LazyRawValue<'data, BinaryEncoding> for LazyRawBinaryValue<'data> { | |
self.ion_type() | ||
} | ||
|
||
fn is_null(&self) -> bool { | ||
self.is_null() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This arrangement produces a small amount of extra code (there are two |
||
} | ||
|
||
fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { | ||
self.annotations() | ||
} | ||
|
@@ -70,6 +74,10 @@ impl<'data> LazyRawBinaryValue<'data> { | |
self.encoded_value.ion_type() | ||
} | ||
|
||
pub fn is_null(&self) -> bool { | ||
self.encoded_value.header().is_null() | ||
} | ||
|
||
/// Returns `true` if this value has a non-empty annotations sequence; otherwise, returns `false`. | ||
fn has_annotations(&self) -> bool { | ||
self.encoded_value.has_annotations() | ||
|
@@ -118,7 +126,7 @@ impl<'data> LazyRawBinaryValue<'data> { | |
/// [`LazyRawBinarySequence`] or [`LazyStruct`](crate::lazy::struct::LazyStruct) | ||
/// that can be traversed to access the container's contents. | ||
pub fn read(&self) -> ValueParseResult<'data, BinaryEncoding> { | ||
if self.encoded_value.header().is_null() { | ||
if self.is_null() { | ||
let raw_value_ref = RawValueRef::Null(self.ion_type()); | ||
return Ok(raw_value_ref); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; | ||
use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct; | ||
use crate::lazy::binary::raw::reader::LazyRawBinaryReader; | ||
use crate::lazy::binary::raw::sequence::LazyRawBinarySequence; | ||
use crate::lazy::binary::raw::value::LazyRawBinaryValue; | ||
use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate}; | ||
use crate::lazy::decoder::{LazyDecoder, LazyRawField, LazyRawSequence, LazyRawStruct}; | ||
use crate::lazy::raw_value_ref::RawValueRef; | ||
use crate::lazy::text::raw::reader::LazyRawTextReader; | ||
use crate::lazy::text::value::LazyRawTextValue; | ||
use crate::{IonResult, IonType, RawSymbolTokenRef}; | ||
use std::marker::PhantomData; | ||
|
||
// These types derive trait implementations in order to allow types that containing them | ||
// to also derive trait implementations. | ||
|
||
/// The Ion 1.0 binary encoding. | ||
#[derive(Clone, Debug)] | ||
pub struct BinaryEncoding; | ||
|
||
/// The Ion 1.0 text encoding. | ||
#[derive(Clone, Debug)] | ||
pub struct TextEncoding; | ||
|
||
impl<'data> LazyDecoder<'data> for BinaryEncoding { | ||
type Reader = LazyRawBinaryReader<'data>; | ||
type Value = LazyRawBinaryValue<'data>; | ||
type Sequence = LazyRawBinarySequence<'data>; | ||
type Struct = LazyRawBinaryStruct<'data>; | ||
type AnnotationsIterator = RawBinaryAnnotationsIterator<'data>; | ||
} | ||
Comment on lines
+25
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ As mentioned earlier in the diff, |
||
|
||
// === Placeholders === | ||
// The types below will need to be properly defined in order for the lazy text reader to be complete. | ||
// The exist to satisfy various trait definitions. | ||
Comment on lines
+33
to
+35
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ At this point, we have a |
||
#[derive(Debug, Clone)] | ||
pub struct ToDoTextSequence; | ||
|
||
impl<'data> LazyContainerPrivate<'data, TextEncoding> for ToDoTextSequence { | ||
fn from_value(_value: LazyRawTextValue<'data>) -> Self { | ||
todo!() | ||
} | ||
} | ||
|
||
impl<'data> LazyRawSequence<'data, TextEncoding> for ToDoTextSequence { | ||
type Iterator = Box<dyn Iterator<Item = IonResult<LazyRawTextValue<'data>>>>; | ||
|
||
fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> { | ||
todo!() | ||
} | ||
|
||
fn ion_type(&self) -> IonType { | ||
todo!() | ||
} | ||
|
||
fn iter(&self) -> Self::Iterator { | ||
todo!() | ||
} | ||
|
||
fn as_value(&self) -> &<TextEncoding as LazyDecoder<'data>>::Value { | ||
todo!() | ||
} | ||
} | ||
|
||
#[derive(Debug, Clone)] | ||
pub struct ToDoTextStruct; | ||
|
||
#[derive(Debug, Clone)] | ||
pub struct ToDoTextField; | ||
|
||
impl<'data> LazyRawFieldPrivate<'data, TextEncoding> for ToDoTextField { | ||
fn into_value(self) -> LazyRawTextValue<'data> { | ||
todo!() | ||
} | ||
} | ||
|
||
impl<'data> LazyRawField<'data, TextEncoding> for ToDoTextField { | ||
fn name(&self) -> RawSymbolTokenRef<'data> { | ||
todo!() | ||
} | ||
|
||
fn value(&self) -> &LazyRawTextValue<'data> { | ||
todo!() | ||
} | ||
} | ||
|
||
impl<'data> LazyContainerPrivate<'data, TextEncoding> for ToDoTextStruct { | ||
fn from_value(_value: <TextEncoding as LazyDecoder>::Value) -> Self { | ||
todo!() | ||
} | ||
} | ||
|
||
impl<'data> LazyRawStruct<'data, TextEncoding> for ToDoTextStruct { | ||
type Field = ToDoTextField; | ||
type Iterator = Box<dyn Iterator<Item = IonResult<ToDoTextField>>>; | ||
|
||
fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> { | ||
todo!() | ||
} | ||
|
||
fn find(&self, _name: &str) -> IonResult<Option<LazyRawTextValue<'data>>> { | ||
todo!() | ||
} | ||
|
||
fn get(&self, _name: &str) -> IonResult<Option<RawValueRef<'data, TextEncoding>>> { | ||
todo!() | ||
} | ||
|
||
fn iter(&self) -> Self::Iterator { | ||
todo!() | ||
} | ||
} | ||
|
||
#[derive(Debug, Clone)] | ||
pub struct ToDoTextAnnotationsIterator<'data> { | ||
spooky: &'data PhantomData<()>, | ||
} | ||
|
||
impl<'data> Iterator for ToDoTextAnnotationsIterator<'data> { | ||
type Item = IonResult<RawSymbolTokenRef<'data>>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
todo!() | ||
} | ||
} | ||
|
||
impl<'data> LazyDecoder<'data> for TextEncoding { | ||
type Reader = LazyRawTextReader<'data>; | ||
type Value = LazyRawTextValue<'data>; | ||
type Sequence = ToDoTextSequence; | ||
type Struct = ToDoTextStruct; | ||
type AnnotationsIterator = ToDoTextAnnotationsIterator<'data>; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,6 +69,14 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { | |
} | ||
} | ||
|
||
pub fn expect_i64(self) -> IonResult<i64> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Without this method, "get-int-as-i64-or-IonError" is expressed as: let int = raw_reader
.next()?
.expect_value()? // It's not an IVM or end-of-stream
.expect_int()? // The value is an Int
.expect_i64()?; // The Int fits in an i64 This reduces it to: let int = raw_reader
.next()?
.expect_value()?
.expect_i64()?; and is consistent with both |
||
if let RawValueRef::Int(i) = self { | ||
i.expect_i64() | ||
} else { | ||
IonResult::decoding_error("expected an i64 (int)") | ||
} | ||
} | ||
|
||
pub fn expect_float(self) -> IonResult<f64> { | ||
if let RawValueRef::Float(f) = self { | ||
Ok(f) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This PR involves two types that are effectively wrappers around a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This PR involves two types that are effectively wrappers around a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This PR deals with two types that are effectively wrappers around a |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
use crate::lazy::text::buffer::TextBufferView; | ||
use crate::position::Position; | ||
use crate::result::DecodingError; | ||
use crate::{IonError, IonResult}; | ||
use smallvec::SmallVec; | ||
|
||
/// Attempts to validate a byte sequence as UTF-8 text. If the data is not valid UTF-8, returns | ||
/// an [`IonError`]. | ||
/// | ||
/// The provided `position` is added to the `IonError` that is constructed if the data is not valid. | ||
pub(crate) trait AsUtf8 { | ||
fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str>; | ||
} | ||
|
||
impl<const N: usize> AsUtf8 for SmallVec<[u8; N]> { | ||
fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> { | ||
std::str::from_utf8(self.as_ref()).map_err(|_| { | ||
let decoding_error = | ||
DecodingError::new("encountered invalid UTF-8").with_position(position); | ||
IonError::Decoding(decoding_error) | ||
}) | ||
} | ||
} | ||
|
||
impl<'data> AsUtf8 for TextBufferView<'data> { | ||
fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> { | ||
std::str::from_utf8(self.bytes()).map_err(|_| { | ||
let decoding_error = | ||
DecodingError::new("encountered invalid UTF-8").with_position(position); | ||
IonError::Decoding(decoding_error) | ||
}) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🗺️ The
const_generics
feature of thesmallvec
crate provides trait implementations for all sizes of backing array ([u8; N]
in our case) rather than just 0-32 and several powers of two beyond. It's a feature becausesmallvec
predates const generics and didn't want to force a breaking change.