Skip to content

Commit

Permalink
Update length-prefixed structs to merge flexsym, and symbol address t…
Browse files Browse the repository at this point in the history
…ypes; Fix delimited struct delimiters
  • Loading branch information
nirosys committed Jun 18, 2024
1 parent e02d499 commit fd917bd
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 54 deletions.
38 changes: 18 additions & 20 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,28 +367,26 @@ impl<'a> ImmutableBuffer<'a> {
let header_offset = input.offset();

loop {
let opcode = input.peek_opcode()?;
if opcode.opcode_type == OpcodeType::DelimitedContainerClose {
offsets.push(input.offset());
break;
} else {
let (flexsym, after) = input.read_flex_sym()?;
let field_offset = match flexsym.value() {
FlexSymValue::SymbolRef(_sym) => input.offset(),
FlexSymValue::Opcode(_op) => todo!(),
};
input = after;

let mut opcode = input.peek_opcode()?;
if opcode.opcode_type == OpcodeType::Nop {
let res = input.consume_nop_padding(opcode)?;
input = res.1;
opcode = input.peek_opcode()?;
let (flexsym, after) = input.read_flex_sym()?;
let field_offset = match flexsym.value() {
FlexSymValue::SymbolRef(_sym) => input.offset(),
FlexSymValue::Opcode(Opcode { opcode_type: OpcodeType::DelimitedContainerClose, ..}) => {
offsets.push(after.offset() - 1);
break
}
let value = input.read_value(opcode)?;
input = input.consume(value.encoded_value.total_length());
offsets.push(field_offset);
_ => unreachable!(),
};
input = after;

let mut opcode = input.peek_opcode()?;
if opcode.opcode_type == OpcodeType::Nop {
let res = input.consume_nop_padding(opcode)?;
input = res.1;
opcode = input.peek_opcode()?;
}
let value = input.read_value(opcode)?;
input = input.consume(value.encoded_value.total_length());
offsets.push(field_offset);
}

let header = head_opcode
Expand Down
34 changes: 17 additions & 17 deletions src/lazy/binary/raw/v1_1/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,6 @@ mod tests {
([0xEB, 0x0B], IonType::Struct), // null.struct
];

let allocator = BumpAllocator::new();
for (data, expected_type) in data {
let encoding_context = EncodingContext::empty();
let context = encoding_context.get_ref();
Expand All @@ -965,9 +964,9 @@ mod tests {
0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x01, // "foo": 1
0x17, 0xF3, // 11: {
0xFB, 0x62, 0x61, 0x72, 0x61, 0x02, // "bar": 2
0xF0, // }
0x01, 0xF0, // }
0xFB, 0x62, 0x61, 0x7a, 0x61, 0x03, // "baz": 3
0xF0, // }
0x01, 0xF0, // }
];

let encoding_context = EncodingContext::empty();
Expand Down Expand Up @@ -1056,6 +1055,14 @@ mod tests {
(11usize.into(), IonType::Int),
],
),
(
// {"foo": 1, $11: 2} - FlexSym Mode
&[ 0xDA, 0x01, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x01, 0x17, 0x61, 0x02 ],
&[
("foo".into(), IonType::Int),
(11.into(), IonType::Int)
],
),
(
// {}
&[ 0xFD, 0x01 ],
Expand All @@ -1073,50 +1080,43 @@ mod tests {
// FlexSym
(
// { "foo": 1, $11: 2 }
&[ 0xD9, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x01, 0x17, 0x61, 0x02],
&[ 0xDA, 0x01, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x01, 0x17, 0x61, 0x02],
&[ ("foo".into(), IonType::Int), (11usize.into(), IonType::Int)],
),
(
// { "foo": 1, $11: 2 }
&[ 0xFD, 0x13, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x01, 0x17, 0x61, 0x02],
&[ 0xFD, 0x15, 0x01, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x01, 0x17, 0x61, 0x02],
&[ ("foo".into(), IonType::Int), (11usize.into(), IonType::Int)],
),
(
// { "foo": <NOP>, $11: 2 }
&[ 0xFD, 0x11, 0xFB, 0x66, 0x6F, 0x6F, 0xEC, 0x17, 0x61, 0x02],
&[ 0xFD, 0x13, 0x01, 0xFB, 0x66, 0x6F, 0x6F, 0xEC, 0x17, 0x61, 0x02],
&[ (11usize.into(), IonType::Int) ],
),
(
// { "foo": 2, $11: <NOP> }
&[ 0xFD, 0x11, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x02, 0x17, 0xEC],
&[ 0xFD, 0x13, 0x01, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x02, 0x17, 0xEC],
&[ ("foo".into(), IonType::Int) ],
),
(
// { "foo": { $10: 2 }, "bar": 2 }
&[
0xFD, 0x1D, 0xFB, 0x66, 0x6F, 0x6F, 0xD3, 0x15, 0x61, 0x02,
0xFD, 0x1F, 0x01, 0xFB, 0x66, 0x6F, 0x6F, 0xD3, 0x15, 0x61, 0x02,
0xFB, 0x62, 0x61, 0x72, 0x61, 0x02,
],
&[
("foo".into(), IonType::Struct),
("bar".into(), IonType::Int),
],
),
(
// {}
&[0xFD, 0x01],
&[],
),
(
// {} - delimited
&[
0xF3, 0xF0,
],
&[ 0xF3, 0x01, 0xF0 ],
&[],
),
(
// { "foo": 1, $11: 2 } - delimited
&[ 0xF3, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x01, 0x17, 0xE1, 0x02, 0xF0],
&[ 0xF3, 0xFB, 0x66, 0x6F, 0x6F, 0x61, 0x01, 0x17, 0xE1, 0x02, 0x01, 0xF0],
&[ ("foo".into(), IonType::Int), (11usize.into(), IonType::Symbol)],
),
];
Expand Down
55 changes: 38 additions & 17 deletions src/lazy/binary/raw/v1_1/struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,20 @@ impl<'top> LazyRawStruct<'top, BinaryEncoding_1_1> for LazyRawBinaryStruct_1_1<'
}
}

enum StructType {
enum StructMode {
FlexSym,
SymbolAddress,
}

enum SymAddressFieldName<'top> {
ModeChange,
FieldName(LazyRawBinaryFieldName_1_1<'top>),
}

pub struct RawBinaryStructIterator_1_1<'top> {
source: ImmutableBuffer<'top>,
bytes_to_skip: usize,
struct_type: StructType,
mode: StructMode,
delimited_offsets: Option<&'top [usize]>,
}

Expand All @@ -149,9 +154,9 @@ impl<'top> RawBinaryStructIterator_1_1<'top> {
RawBinaryStructIterator_1_1 {
source: input,
bytes_to_skip: 0,
struct_type: match opcode_type {
OpcodeType::Struct => StructType::SymbolAddress,
OpcodeType::StructDelimited => StructType::FlexSym,
mode: match opcode_type {
OpcodeType::Struct => StructMode::SymbolAddress,
OpcodeType::StructDelimited => StructMode::FlexSym,
_ => unreachable!("Unexpected opcode for structure"),
},
delimited_offsets,
Expand All @@ -167,6 +172,7 @@ impl<'top> RawBinaryStructIterator_1_1<'top> {
buffer: ImmutableBuffer<'top>,
) -> IonResult<Option<(LazyRawBinaryFieldName_1_1<'top>, ImmutableBuffer<'top>)>> {
use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSymValue;
use crate::lazy::binary::raw::v1_1::Opcode;

if buffer.is_empty() {
return Ok(None);
Expand All @@ -175,10 +181,13 @@ impl<'top> RawBinaryStructIterator_1_1<'top> {
let (flex_sym, after) = buffer.read_flex_sym()?;
let (sym, after) = match flex_sym.value() {
FlexSymValue::SymbolRef(sym_ref) => (sym_ref, after),
FlexSymValue::Opcode(_opcode) => todo!(),
FlexSymValue::Opcode(Opcode{ opcode_type: OpcodeType::DelimitedContainerClose, ..}) => {
return Ok(None)
}
_ => unreachable!(),
};

let matched_field_id = buffer.slice(0, flex_sym.size_in_bytes());
let matched_field_id = buffer.slice(0, after.offset() - buffer.offset());
let field_name = LazyRawBinaryFieldName_1_1::new(sym, matched_field_id);
Ok(Some((field_name, after)))
}
Expand All @@ -188,18 +197,23 @@ impl<'top> RawBinaryStructIterator_1_1<'top> {
/// [`ImmutableBuffer`] positioned after the field name is returned.
fn peek_field_symbol_addr(
buffer: ImmutableBuffer<'top>,
) -> IonResult<Option<(LazyRawBinaryFieldName_1_1<'top>, ImmutableBuffer<'top>)>> {
) -> IonResult<Option<(SymAddressFieldName<'top>, ImmutableBuffer<'top>)>> {
if buffer.is_empty() {
return Ok(None);
}

let (symbol_address, after) = buffer.read_flex_uint()?;

let field_id = symbol_address.value() as usize;
let matched_field_id = buffer.slice(0, symbol_address.size_in_bytes());
let field_name =
LazyRawBinaryFieldName_1_1::new(RawSymbolRef::SymbolId(field_id), matched_field_id);
Ok(Some((field_name, after)))

if field_id == 0 {
// Mode switch.
Ok(Some((SymAddressFieldName::ModeChange, after)))
} else {
let matched_field_id = buffer.slice(0, symbol_address.size_in_bytes());
let field_name =
LazyRawBinaryFieldName_1_1::new(RawSymbolRef::SymbolId(field_id), matched_field_id);
Ok(Some((SymAddressFieldName::FieldName(field_name), after)))
}
}

/// Helper function called by [`Self::peek_field`] in order to parse a struct field's value.
Expand Down Expand Up @@ -228,15 +242,22 @@ impl<'top> RawBinaryStructIterator_1_1<'top> {
/// struct. On success, returns both the field pair via [`LazyRawFieldExpr`] as well as the
/// total bytes needed to skip the field.
fn peek_field(
&self,
&mut self,
input: ImmutableBuffer<'top>,
) -> IonResult<Option<(LazyRawFieldExpr<'top, BinaryEncoding_1_1>, usize)>> {
let mut buffer = input;
loop {
// Peek at our field name.
let peek_result = match self.struct_type {
StructType::SymbolAddress => Self::peek_field_symbol_addr(buffer)?,
StructType::FlexSym => Self::peek_field_flexsym(buffer)?,
let peek_result = match self.mode {
StructMode::SymbolAddress => match Self::peek_field_symbol_addr(buffer)? {
Some((SymAddressFieldName::ModeChange, after)) => {
self.mode = StructMode::FlexSym;
Self::peek_field_flexsym(after)?
}
Some((SymAddressFieldName::FieldName(fieldname), after)) => Some((fieldname, after)),
None => None,
}
StructMode::FlexSym => Self::peek_field_flexsym(buffer)?,
};

let Some((field_name, after_name)) = peek_result else {
Expand Down

0 comments on commit fd917bd

Please sign in to comment.