Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into inline-text-setting
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton committed Aug 13, 2024
2 parents ef7c845 + d4ad0c5 commit c28eb37
Show file tree
Hide file tree
Showing 12 changed files with 594 additions and 108 deletions.
205 changes: 187 additions & 18 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@ use crate::lazy::binary::encoded_value::EncodedValue;
use crate::lazy::binary::raw::v1_1::e_expression::{
BinaryEExpArgsIterator_1_1, BinaryEExpression_1_1,
};
use crate::lazy::binary::raw::v1_1::r#struct::LazyRawBinaryFieldName_1_1;
use crate::lazy::binary::raw::v1_1::value::{
LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1,
DelimitedContents, LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1,
};
use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, OpcodeType, ION_1_1_OPCODES};
use crate::lazy::decoder::{LazyRawValueExpr, RawValueExpr};
use crate::lazy::decoder::{LazyRawFieldExpr, LazyRawValueExpr, RawValueExpr};
use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt;
use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt;
use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt;
use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym;
use crate::lazy::encoder::binary::v1_1::flex_sym::{FlexSym, FlexSymValue};
use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt;
use crate::lazy::expanded::macro_table::MacroRef;
use crate::lazy::expanded::template::ParameterEncoding;
Expand Down Expand Up @@ -70,6 +71,11 @@ impl<'a> PartialEq for ImmutableBuffer<'a> {
/// When `Ok`, contains the value that was matched/parsed and the remainder of the input buffer.
pub(crate) type ParseResult<'a, T> = IonResult<(T, ImmutableBuffer<'a>)>;

enum SymAddressFieldName<'top> {
ModeChange,
FieldName(LazyRawBinaryFieldName_1_1<'top>),
}

impl<'a> ImmutableBuffer<'a> {
/// Constructs a new `ImmutableBuffer` that wraps `data`.
#[inline]
Expand Down Expand Up @@ -316,13 +322,17 @@ impl<'a> ImmutableBuffer<'a> {
/// containing a [FlexUInt] representation of the value's length. If no additional bytes were
/// read, the returned `FlexUInt`'s `size_in_bytes()` method will return `0`.
#[inline]
pub fn read_value_length(self, header: Header) -> ParseResult<'a, FlexUInt> {
pub fn read_value_length(self, header: Header) -> ParseResult<'a, Option<FlexUInt>> {
match header.length_type() {
LengthType::InOpcode(n) => {
// FlexUInt represents the length, but is not physically present, hence the 0 size.
Ok((FlexUInt::new(0, n as u64), self))
Ok((Some(FlexUInt::new(0, n as u64)), self))
}
LengthType::Unknown => Ok((None, self)),
LengthType::FlexUIntFollows => {
let (flex, after) = self.read_flex_uint()?;
Ok((Some(flex), after))
}
LengthType::FlexUIntFollows => self.read_flex_uint(),
}
}

Expand Down Expand Up @@ -444,6 +454,153 @@ impl<'a> ImmutableBuffer<'a> {
IonResult::decoding_error("found a non-value, non-eexp after a nop pad")
}

pub(crate) fn peek_delimited_container(
self,
opcode: Opcode,
) -> IonResult<(DelimitedContents<'a>, ImmutableBuffer<'a>)> {
use crate::IonType;

if let Some(IonType::Struct) = opcode.ion_type {
self.peek_delimited_struct()
} else {
self.peek_delimited_sequence()
}
}

pub(crate) fn peek_delimited_sequence(
self,
) -> IonResult<(DelimitedContents<'a>, ImmutableBuffer<'a>)> {
let mut input = self.consume(1);
let mut values =
BumpVec::<LazyRawValueExpr<'a, v1_1::Binary>>::new_in(self.context.allocator());

loop {
let opcode = input.expect_opcode()?;
if opcode.is_delimited_end() {
input = input.consume(1);
break;
} else if opcode.opcode_type == OpcodeType::Nop {
let res = input.consume_nop_padding(opcode)?;
input = res.1;
} else if let (Some(value), after) = ImmutableBuffer::read_sequence_value_expr(input)? {
values.push(value);
input = after;
// input = input.consume(value.range().len());
}
}

Ok((DelimitedContents::Values(values.into_bump_slice()), input))
}

/// Reads the value for a delimited struct field, consuming NOPs if present.
fn peek_delimited_struct_value(
&self,
) -> IonResult<(Option<LazyRawBinaryValue_1_1<'a>>, ImmutableBuffer<'a>)> {
let opcode = self.expect_opcode()?;
if opcode.is_nop() {
let after_nops = self.consume_nop_padding(opcode)?.1;
if after_nops.is_empty() {
// Non-NOP field wasn't found, nothing remaining.
return Ok((None, after_nops));
}
Ok((None, after_nops))
} else {
self.read_value(opcode)
.map(|(v, after)| (Some(v), after))
}
}

/// Reads the field name, as a flexsym, for the current delimited struct field.
fn peek_delimited_field_flexsym(
&self,
) -> IonResult<(Option<LazyRawBinaryFieldName_1_1<'a>>, ImmutableBuffer<'a>)> {
if self.is_empty() {
return Ok((None, *self));
}

let (flex_sym, after) = self.read_flex_sym()?;
let (sym, after) = match flex_sym.value() {
FlexSymValue::SymbolRef(sym_ref) => (sym_ref, after),
FlexSymValue::Opcode(o) if o.is_delimited_end() => return Ok((None, after)),
_ => unreachable!(),
};

let matched_field_id = self.slice(0, after.offset() - self.offset());
let field_name = LazyRawBinaryFieldName_1_1::new(sym, matched_field_id);
Ok((Some(field_name), after))
}

/// Reads a field within a delimited struct and returns the LazyRawFieldExpr for that field.
pub(crate) fn peek_delimited_field(
&self,
) -> IonResult<(
Option<LazyRawFieldExpr<'a, v1_1::Binary>>,
ImmutableBuffer<'a>,
)> {
let mut buffer = *self;
loop {
// Peek at our field name.
let (field_name, after_name) = match buffer.peek_delimited_field_flexsym()? {
(Some(field_name), after_name) => (field_name, after_name),
(None, after_name) => return Ok((None, after_name)),
};

if after_name.is_empty() {
return IonResult::incomplete("found field name but no value", after_name.offset());
}

let (value, after_value) = match after_name.peek_delimited_struct_value()? {
(None, after) => {
if after.is_empty() {
return IonResult::incomplete(
"found field name but no value",
after.offset(),
);
}
buffer = after;
continue; // No value for this field, loop to try next field.
}
(Some(value), after) => (value, after),
};

let allocator = self.context().allocator();
let value_ref = &*allocator.alloc_with(|| value);

return Ok((
Some(LazyRawFieldExpr::NameValue(field_name, value_ref)),
after_value,
));
}
}

/// Reads a delimited struct from the buffer and returns a collection of LazyRawValueExprs
/// that can be used with a struct iterator.
pub(crate) fn peek_delimited_struct(
self,
) -> IonResult<(DelimitedContents<'a>, ImmutableBuffer<'a>)> {
let mut input = self.consume(1);
let mut values =
BumpVec::<LazyRawFieldExpr<'a, v1_1::Binary>>::new_in(self.context.allocator());

loop {
match input.expect_opcode()? {
o if o.is_delimited_end() => break,
_ => match input.peek_delimited_field()? {
(Some(field), after) => {
values.push(field);
input = after;
}
(None, after) => {
input = after;
break;
}
},
}
}

Ok((DelimitedContents::Fields(values.into_bump_slice()), input))
}

/// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that
/// the next byte (`type_descriptor`) is not a NOP.
pub fn read_value(self, opcode: Opcode) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> {
Expand All @@ -459,29 +616,39 @@ impl<'a> ImmutableBuffer<'a> {
#[inline(always)]
fn read_value_without_annotations(
self,
type_descriptor: Opcode,
opcode: Opcode,
) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> {
let input = self;
let header = type_descriptor.to_header().ok_or_else(|| {
let header = opcode.to_header().ok_or_else(|| {
IonError::decoding_error(format!(
"found a non-value in value position; buffer=<{:X?}>",
input.bytes_range(0, 16.min(input.bytes().len()))
))
})?;

let header_offset = input.offset();
let (total_length, length_length, value_body_length, delimited_contents) = if opcode.is_delimited_start() {
let (contents, after) = input.peek_delimited_container(opcode)?;
let total_length = after.offset() - self.offset();
let value_body_length = total_length - 1; // Total length - sizeof(opcode)
(total_length, 0, value_body_length, contents)
} else {
let length = match header.length_type() {
LengthType::InOpcode(n) => FlexUInt::new(0, n as u64),
LengthType::Unknown => FlexUInt::new(0, 0), // Delimited value, we do not know the size.
// This call to `read_value_length` is not always inlined, so we avoid the method call
// if possible.
LengthType::FlexUIntFollows => input.consume(1).read_flex_uint()?.0,
};

let length = match header.length_type() {
LengthType::InOpcode(n) => FlexUInt::new(0, n as u64),
// This call to `read_value_length` is not always inlined, so we avoid the method call
// if possible.
_ => input.consume(1).read_value_length(header)?.0,
};
let length_length = length.size_in_bytes() as u8;
let value_length = length.value() as usize; // ha
let total_length = 1 // Header byte

let length_length = length.size_in_bytes() as u8;
let value_length = length.value() as usize; // ha
let total_length = 1 // Header byte
+ length_length as usize
+ value_length;
(total_length, length_length, value_length, DelimitedContents::None)
};

if total_length > input.len() {
return IonResult::incomplete(
Expand All @@ -501,13 +668,15 @@ impl<'a> ImmutableBuffer<'a> {
// This is a tagged value, so its opcode length is always 1
opcode_length: 1,
length_length,
value_body_length: value_length,
value_body_length,
total_length,
};

let lazy_value = LazyRawBinaryValue_1_1 {
encoded_value,
// If this value has a field ID or annotations, this will be replaced by the caller.
input: self,
delimited_contents,
};
Ok((lazy_value, self.consume(total_length)))
}
Expand Down
Loading

0 comments on commit c28eb37

Please sign in to comment.