Skip to content

Commit

Permalink
Adds read/write support for system eexps, placeholders for remaining …
Browse files Browse the repository at this point in the history
…system macros (#862)

* Macros now impleement `Send`, allowing them to be shared across
  threads. This makes it possible for all readers and writers to
  share the same system macro table instance instead of allocating
  their own copy.
* Adds a `SystemMacroAddress` type that is range-checked at creation.
* Adds a `SystemAddress` variant to the `MacroIdRef` enum.
* Adds `Opcode::from_byte` support for `SystemEExpression`.
* `Parameter` and `Macro` now use `CompactString` to store their names,
  skipping all heap allocations in the common case.
* Simplifies TDL used in read benchmark macro defs
* Adds named constructors to `Parameter`
  • Loading branch information
zslayton authored Dec 2, 2024
1 parent 137d79e commit cff0c7f
Show file tree
Hide file tree
Showing 15 changed files with 544 additions and 263 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ base64 = "0.12"
# chrono < 0.5 brings in a deprecated version of the `time` crate via `oldtime` feature by default
# this makes it explicitly not do this as there is an advisory warning against this:
# See: https://github.com/chronotope/chrono/issues/602
compact_str = "0.8.0"
chrono = { version = "0.4", default-features = false, features = ["clock", "std", "wasmbind"] }
delegate = "0.12.0"
thiserror = "1.0"
Expand Down
78 changes: 40 additions & 38 deletions benches/read_many_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,23 @@ fn maximally_compact_1_1_data(num_values: usize) -> TestData_1_1 {
let template_definition_text: String = r#"
(macro event (timestamp thread_id thread_name client_num host_id parameters*)
{
'timestamp': timestamp,
'threadId': thread_id,
'threadName': (make_string "scheduler-thread-" thread_name),
'loggerName': "com.example.organization.product.component.ClassName",
'logLevel': (literal INFO),
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
'parameters': [
timestamp: (%timestamp),
threadId: (%thread_id),
threadName: (.make_string "scheduler-thread-" (%thread_name)),
loggerName: "com.example.organization.product.component.ClassName",
logLevel: INFO,
format: "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
parameters: [
"SUCCESS",
(make_string "example-client-" client_num),
(make_string "aws-us-east-5f-" host_id),
parameters
(.make_string "example-client-" (%client_num)),
(.make_string "aws-us-east-5f-" (%host_id)),
(%parameters)
]
}
)
"#.to_owned();

let text_1_1_data = r#"(:event 1670446800245 418 "6" "1" "abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "6" "1" "abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);

let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
Expand Down Expand Up @@ -90,23 +90,23 @@ fn moderately_compact_1_1_data(num_values: usize) -> TestData_1_1 {
let template_definition_text = r#"
(macro event (timestamp thread_id thread_name client_num host_id parameters*)
{
'timestamp': timestamp,
'threadId': thread_id,
'threadName': thread_name,
'loggerName': "com.example.organization.product.component.ClassName",
'logLevel': (literal INFO),
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
'parameters': [
timestamp: (%timestamp),
threadId: (%thread_id),
threadName: (%thread_name),
loggerName: "com.example.organization.product.component.ClassName",
logLevel: INFO,
format: "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
parameters: [
"SUCCESS",
client_num,
host_id,
parameters
(%client_num),
(%host_id),
(%parameters)
]
}
)
"#;

let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID
Expand Down Expand Up @@ -159,23 +159,23 @@ fn length_prefixed_moderately_compact_1_1_data(num_values: usize) -> TestData_1_
let template_definition_text = r#"
(macro event (timestamp thread_id thread_name client_num host_id parameters*)
{
'timestamp': timestamp,
'threadId': thread_id,
'threadName': thread_name,
'loggerName': "com.example.organization.product.component.ClassName",
'logLevel': (literal INFO),
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
'parameters': [
timestamp: (%timestamp),
threadId: (%thread_id),
threadName: (%thread_name),
loggerName: "com.example.organization.product.component.ClassName",
logLevel: INFO,
format: "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
parameters: [
"SUCCESS",
client_num,
host_id,
parameters
(%client_num),
(%host_id),
(%parameters)
]
}
)
"#;

let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = [0xF5, // LP invocation
Expand Down Expand Up @@ -307,15 +307,15 @@ mod benchmark {

// Load the Ion 1.0 values into a Sequence. We'll compare our Ion 1.1 streams' data to this
// sequence to make sure that all of the tests are working on equivalent data.
let seq_1_0 = Reader::new(v1_1::Text, text_1_0_data.as_slice())
let seq_1_0 = Reader::new(v1_0::Text, text_1_0_data.as_slice())
.unwrap()
.read_all_elements()?;

let mut text_1_0_group = c.benchmark_group("text 1.0");
// Visit each top level value in the stream without reading it.
text_1_0_group.bench_function("scan all", |b| {
b.iter(|| {
let mut reader = Reader::new(v1_1::Text, text_1_0_data.as_slice()).unwrap();
let mut reader = Reader::new(v1_0::Text, text_1_0_data.as_slice()).unwrap();
while let Some(item) = reader.next().unwrap() {
black_box(item);
}
Expand All @@ -324,7 +324,7 @@ mod benchmark {
// Read every value in the stream, however deeply nested.
text_1_0_group.bench_function("read all", |b| {
b.iter(|| {
let mut reader = Reader::new(v1_1::Text, text_1_0_data.as_slice()).unwrap();
let mut reader = Reader::new(v1_0::Text, text_1_0_data.as_slice()).unwrap();
let mut num_values = 0usize;
while let Some(item) = reader.next().unwrap() {
num_values += count_value_and_children(&item).unwrap();
Expand All @@ -335,7 +335,7 @@ mod benchmark {
// Read the 'format' field from each top-level struct in the stream.
text_1_0_group.bench_function("read 'format' field", |b| {
b.iter(|| {
let mut reader = Reader::new(v1_1::Text, text_1_0_data.as_slice()).unwrap();
let mut reader = Reader::new(v1_0::Text, text_1_0_data.as_slice()).unwrap();
let mut num_values = 0usize;
while let Some(value) = reader.next().unwrap() {
let s = value.read().unwrap().expect_struct().unwrap();
Expand Down Expand Up @@ -412,7 +412,9 @@ mod benchmark {
let seq_1_1 = reader_1_1.read_all_elements().unwrap();
assert!(
IonData::eq(seq_1_0, &seq_1_1),
"{name} binary Ion 1.1 sequence was not equal to the original Ion 1.0 sequence"
"{name} binary Ion 1.1 sequence was not equal to the original Ion 1.0 sequence: \n{:#?}\n !=\n{:#?}",
seq_1_0.elements().next().unwrap(),
seq_1_1.elements().next().unwrap(),
);

// === Text equivalence check ===
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/binary/raw/v1_1/type_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pub enum OpcodeType {
TypedNull, // 0xEB -
Nop, // 0xEC-0xED -
SystemSymbolAddress, // 0xEE
SystemMacroInvoke, // 0xEF -
SystemMacroEExpression, // 0xEF -
DelimitedContainerClose, // 0xF0
ListDelimited, // 0xF1
SExpDelimited, // 0xF2
Expand Down
1 change: 1 addition & 0 deletions src/lazy/binary/raw/v1_1/type_descriptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ impl Opcode {
InOpcode(1),
OpcodeKind::Value(IonType::Symbol),
),
(0xE, 0xF) => (SystemMacroEExpression, Unknown, OpcodeKind::EExp),
(0xF, 0x0) => (DelimitedContainerClose, InOpcode(0), OpcodeKind::Control),
(0xF, 0x1) => (ListDelimited, Unknown, OpcodeKind::Value(IonType::List)),
(0xF, 0x2) => (SExpDelimited, Unknown, OpcodeKind::Value(IonType::SExp)),
Expand Down
26 changes: 26 additions & 0 deletions src/lazy/encoder/binary/v1_1/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,10 @@ impl BinaryValueWriter_1_1<'_, '_> {
MacroIdRef::LocalAddress(_address) => {
todo!("macros with addresses greater than {MAX_20_BIT_ADDRESS}");
}
MacroIdRef::SystemAddress(address) => {
self.encoding_buffer
.extend_from_slice_copy(&[0xEF, address.as_u8()]); // System e-expression
}
};

Ok(BinaryEExpWriter_1_1::new(
Expand Down Expand Up @@ -955,6 +959,7 @@ mod tests {
use crate::lazy::encoder::value_writer::ValueWriter;
use crate::lazy::encoder::value_writer::{SequenceWriter, StructWriter};
use crate::lazy::encoder::write_as_ion::{WriteAsIon, WriteAsSExp};
use crate::lazy::text::raw::v1_1::reader::{system_macros, MacroIdRef};
use crate::raw_symbol_ref::AsRawSymbolRef;
use crate::types::float::{FloatRepr, SmallestFloatRepr};
use crate::{
Expand Down Expand Up @@ -2914,6 +2919,27 @@ mod tests {
Ok(())
}

#[test]
fn write_system_macro_invocation() -> IonResult<()> {
encoding_test(
|writer: &mut LazyRawBinaryWriter_1_1<&mut Vec<u8>>| {
let mut args =
writer.eexp_writer(MacroIdRef::SystemAddress(system_macros::MAKE_STRING))?;
args.write_symbol("foo")?
.write_symbol("bar")?
.write_symbol("baz")?;
args.close()
},
&[
0xEF, 0x03, // Invoke system macro address 3
0xA3, 0x66, 0x6f, 0x6f, // foo
0xA3, 0x62, 0x61, 0x72, // bar
0xA3, 0x62, 0x61, 0x7a, // baz
],
)?;
Ok(())
}

#[rstest]
#[case::boolean("true false")]
#[case::int("1 2 3 4 5")]
Expand Down
40 changes: 32 additions & 8 deletions src/lazy/encoder/text/v1_1/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ pub struct TextAnnotatedValueWriter_1_1<'value, W: Write> {
}

impl<'value, W: Write + 'value> AnnotatableWriter for TextValueWriter_1_1<'value, W> {
type AnnotatedValueWriter<'a> = TextAnnotatedValueWriter_1_1<'a, W> where Self: 'a;
type AnnotatedValueWriter<'a>
= TextAnnotatedValueWriter_1_1<'a, W>
where
Self: 'a;

fn with_annotations<'a>(
self,
Expand Down Expand Up @@ -83,14 +86,22 @@ impl<'value, W: Write + 'value> ValueWriter for TextValueWriter_1_1<'value, W> {
}

fn eexp_writer<'a>(self, macro_id: impl Into<MacroIdRef<'a>>) -> IonResult<Self::EExpWriter> {
let id = macro_id.into();
let opening_text = match id {
MacroIdRef::LocalName(name) => format!("(:{}", name),
MacroIdRef::LocalAddress(address) => format!("(:{}", address),
MacroIdRef::SystemAddress(system_address) => {
format!("(:$ion::{}", system_address.as_usize())
}
};
TextEExpWriter_1_1::new(
self.value_writer_1_0.writer,
self.value_writer_1_0.depth,
self.value_writer_1_0.parent_type,
// Pretend we're in a sexp for syntax purposes
ContainerType::SExp,
// TODO: Reusable buffer
format!("(:{}", macro_id.into()).as_str(),
opening_text.as_str(),
" ",
match self.value_writer_1_0.parent_type {
ParentType::Struct | ParentType::List => ",",
Expand All @@ -101,7 +112,10 @@ impl<'value, W: Write + 'value> ValueWriter for TextValueWriter_1_1<'value, W> {
}

impl<'value, W: Write + 'value> AnnotatableWriter for TextAnnotatedValueWriter_1_1<'value, W> {
type AnnotatedValueWriter<'a> = TextAnnotatedValueWriter_1_1<'a, W> where Self: 'a;
type AnnotatedValueWriter<'a>
= TextAnnotatedValueWriter_1_1<'a, W>
where
Self: 'a;

fn with_annotations<'a>(
self,
Expand Down Expand Up @@ -167,7 +181,10 @@ pub struct TextListWriter_1_1<'value, W: Write> {
}

impl<W: Write> MakeValueWriter for TextListWriter_1_1<'_, W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W> where Self: 'a;
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
TextValueWriter_1_1 {
Expand All @@ -189,7 +206,10 @@ pub struct TextSExpWriter_1_1<'value, W: Write> {
}

impl<W: Write> MakeValueWriter for TextSExpWriter_1_1<'_, W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W> where Self: 'a;
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
TextValueWriter_1_1 {
Expand Down Expand Up @@ -217,7 +237,10 @@ impl<W: Write> FieldEncoder for TextStructWriter_1_1<'_, W> {
}

impl<W: Write> MakeValueWriter for TextStructWriter_1_1<'_, W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W> where Self: 'a;
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
TextValueWriter_1_1 {
Expand Down Expand Up @@ -274,9 +297,10 @@ impl<'value, W: Write + 'value> SequenceWriter for TextEExpWriter_1_1<'value, W>
}

impl<'value, W: Write + 'value> MakeValueWriter for TextEExpWriter_1_1<'value, W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W>
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a,;
Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
TextValueWriter_1_1 {
Expand Down
24 changes: 21 additions & 3 deletions src/lazy/encoder/text/v1_1/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ impl<W: Write> SequenceWriter for LazyRawTextWriter_1_1<W> {
}

impl<W: Write> MakeValueWriter for LazyRawTextWriter_1_1<W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W>
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a;

Expand Down Expand Up @@ -117,11 +118,11 @@ mod tests {
use crate::lazy::expanded::compiler::TemplateCompiler;
use crate::lazy::expanded::macro_evaluator::RawEExpression;
use crate::lazy::expanded::EncodingContext;
use crate::lazy::text::raw::v1_1::reader::{LazyRawTextReader_1_1, MacroIdRef};
use crate::lazy::text::raw::v1_1::reader::{system_macros, LazyRawTextReader_1_1, MacroIdRef};
use crate::symbol_ref::AsSymbolRef;
use crate::{
v1_1, Annotatable, Decimal, ElementReader, IonData, IonResult, IonType, Null, RawSymbolRef,
Reader, Timestamp,
Reader, Timestamp, Writer,
};

#[test]
Expand Down Expand Up @@ -355,4 +356,21 @@ mod tests {
assert!(IonData::eq(&expected, &actual));
Ok(())
}

#[test]
fn test_system_eexp() -> IonResult<()> {
let mut writer = Writer::new(v1_1::Text, vec![])?;
let mut macro_args = writer.eexp_writer(system_macros::MAKE_STRING)?;
macro_args
.write("foo")?
.write("bar")?
.write("baz")?
.write_symbol("+++")?;
macro_args.close()?;
let encoded_bytes = writer.close()?;
let mut reader = Reader::new(v1_1::Text, encoded_bytes)?;
let element = reader.read_next_element()?;
assert_eq!("foobarbaz+++", element.unwrap().as_string().unwrap());
Ok(())
}
}
Loading

0 comments on commit cff0c7f

Please sign in to comment.