Skip to content

Commit f4fdc16

Browse files
authored
Merge pull request #14 from ndsev/extract-as-ext-enhancements
Enhancements for extract_extern_as_yaml
2 parents d177d7c + 86a5f3f commit f4fdc16

File tree

2 files changed

+60
-7
lines changed

2 files changed

+60
-7
lines changed

requirements.txt

+3
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
PyYAML~=6.0
22
zserio>=2.15.0,<3
3+
zstandard
4+
lz4
5+
brotli

zs_yaml/built_in_transformations.py

+57-7
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,30 @@
66
import os
77
import zserio
88
import yaml
9+
import zlib
10+
import zstandard
11+
import lz4.frame
12+
import brotli
13+
from enum import Enum
914

1015
# Cache to store loaded YAML/JSON files
1116
_file_cache = {}
1217

18+
class CompressionType(Enum):
19+
NO_COMPRESSION = 0
20+
ZLIB = 1
21+
ZSTD = 2
22+
LZ4 = 3
23+
BROTLI = 4
24+
25+
@classmethod
26+
def from_string(cls, value: str):
27+
"""Convert string representation to enum value, case-insensitive"""
28+
try:
29+
return cls[value.upper()]
30+
except KeyError:
31+
raise ValueError(f"Unknown compression type: {value}. Valid values are: {', '.join(cls.__members__.keys())}")
32+
1333
def insert_yaml_as_extern(transformer, file, template_args=None):
1434
"""
1535
Include external YAML by transforming it to JSON and using zserio.
@@ -121,34 +141,53 @@ def repeat_node(transformer, node, count):
121141
return [copy.deepcopy(node) for _ in range(count)]
122142

123143

124-
def extract_extern_as_yaml(transformer, buffer, bitSize, schema_module, schema_type, file_name):
144+
def extract_extern_as_yaml(transformer, buffer, bitSize, schema_module, schema_type, file_name, compression_type=0, remove_nulls=False):
125145
"""
126146
Extract binary data and save as an external YAML file.
127147
128-
Note: This function contains some redundancy with the bin_to_yaml function
129-
in zs_yaml.convert to avoid circular imports. If modifying this function,
130-
please consider updating bin_to_yaml as well, and vice versa.
131-
132148
Args:
133149
transformer (YamlTransformer): The transformer instance.
134150
buffer (bytes): The binary data to be extracted.
135151
bitSize (int): The size of the binary data in bits.
136152
schema_module (str): The name of the schema module.
137153
schema_type (str): The name of the schema type.
138154
file_name (str): The name of the file to save the extracted data.
155+
compression_type (Union[CompressionType, str, int, None]): Type of compression used.
156+
Can be a CompressionType enum, string (e.g., 'zstd'), or integer value.
157+
Defaults to None (no compression).
158+
remove_nulls (bool): Whether the extracted yaml should also contain fields with null values or not.
139159
140160
Returns:
141161
dict: A reference to the extracted file.
142162
"""
163+
# Convert compression_type to enum if needed
164+
if compression_type is not None:
165+
if isinstance(compression_type, str):
166+
compression_type = CompressionType.from_string(compression_type)
167+
elif isinstance(compression_type, int):
168+
compression_type = CompressionType(compression_type)
169+
elif not isinstance(compression_type, CompressionType):
170+
raise ValueError("compression_type must be a CompressionType enum, string, or integer value")
171+
143172
# Ensure the output directory exists
144173
output_dir = os.path.dirname(transformer.yaml_file_path)
145174
os.makedirs(output_dir, exist_ok=True)
146175

147176
# Generate the full path for the new file
148177
yaml_file_path = os.path.join(output_dir, file_name)
149178

150-
# Extract binary data
179+
# Extract and decompress binary data if needed
151180
buffer = bytes(buffer)
181+
if compression_type is not None:
182+
if compression_type == CompressionType.ZLIB:
183+
buffer = zlib.decompress(buffer)
184+
elif compression_type == CompressionType.ZSTD:
185+
dctx = zstandard.ZstdDecompressor()
186+
buffer = dctx.decompress(buffer)
187+
elif compression_type == CompressionType.LZ4:
188+
buffer = lz4.frame.decompress(buffer)
189+
elif compression_type == CompressionType.BROTLI:
190+
buffer = brotli.decompress(buffer)
152191

153192
# Import the module and get the type
154193
module = importlib.import_module(schema_module)
@@ -169,7 +208,18 @@ def extract_extern_as_yaml(transformer, buffer, bitSize, schema_module, schema_t
169208
**json_data
170209
}
171210

172-
# Save the extracted data to the new file
211+
# Clean the data if remove_nulls is True
212+
if remove_nulls:
213+
def rm_nulls(data):
214+
"""Remove null values from a dictionary recursively."""
215+
if isinstance(data, dict):
216+
return {k: rm_nulls(v) for k, v in data.items() if v is not None}
217+
elif isinstance(data, list):
218+
return [rm_nulls(item) for item in data if item is not None]
219+
return data
220+
221+
data_to_write = rm_nulls(data_to_write)
222+
173223
with open(yaml_file_path, 'w') as f:
174224
yaml.dump(data_to_write, f, default_flow_style=False, sort_keys=False)
175225

0 commit comments

Comments
 (0)