Skip to content

Commit

Permalink
Merge branch 'main' of github.com:tdixon97/legend-pydataobj into main
Browse files Browse the repository at this point in the history
  • Loading branch information
tdixon97 committed Jan 28, 2025
2 parents 563152d + b678606 commit fc0bc10
Show file tree
Hide file tree
Showing 21 changed files with 243 additions and 41 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ci:

repos:
- repo: https://github.com/adamchainz/blacken-docs
rev: "1.18.0"
rev: "1.19.1"
hooks:
- id: blacken-docs
additional_dependencies: [black==23.*]
Expand All @@ -29,7 +29,7 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/kynan/nbstripout
rev: "0.7.1"
rev: "0.8.1"
hooks:
- id: nbstripout
args:
Expand All @@ -40,7 +40,7 @@ repos:
]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.6.9"
rev: "v0.8.6"
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
Expand Down Expand Up @@ -72,12 +72,12 @@ repos:
args: [--prose-wrap=always]

- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.20.2
rev: v0.23
hooks:
- id: validate-pyproject

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.3
rev: 0.30.0
hooks:
- id: check-dependabot
- id: check-github-workflows
Expand Down
12 changes: 6 additions & 6 deletions src/lgdo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,23 @@
)

__all__ = [
"LGDO",
"Array",
"ArrayOfEqualSizedArrays",
"ArrayOfEncodedEqualSizedArrays",
"ArrayOfEqualSizedArrays",
"FixedSizeArray",
"Histogram",
"LGDO",
"LH5Iterator",
"LH5Store",
"Scalar",
"Struct",
"Table",
"VectorOfVectors",
"VectorOfEncodedVectors",
"VectorOfVectors",
"WaveformTable",
"__version__",
"load_dfs",
"load_nda",
"ls",
"show",
"LH5Iterator",
"LH5Store",
"__version__",
]
6 changes: 3 additions & 3 deletions src/lgdo/compression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
from .varlen import ULEB128ZigZagDiff

__all__ = [
"WaveformCodec",
"encode",
"decode",
"RadwareSigcompress",
"ULEB128ZigZagDiff",
"WaveformCodec",
"decode",
"encode",
]
4 changes: 2 additions & 2 deletions src/lgdo/lh5/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
"LH5Store",
"load_dfs",
"load_nda",
"ls",
"read",
"write",
"read_as",
"ls",
"read_n_rows",
"show",
"write",
]
20 changes: 10 additions & 10 deletions src/lgdo/lh5/_serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,22 @@
from .write.vector_of_vectors import _h5_write_vector_of_vectors

__all__ = [
"_h5_read_lgdo",
"_h5_read_vector_of_vectors",
"_h5_read_ndarray",
"_h5_read_array",
"_h5_read_array_of_encoded_equalsized_arrays",
"_h5_read_array_of_equalsized_arrays",
"_h5_read_encoded_array",
"_h5_read_fixedsize_array",
"_h5_read_array_of_equalsized_arrays",
"_h5_read_struct",
"_h5_read_table",
"_h5_read_histogram",
"_h5_read_lgdo",
"_h5_read_ndarray",
"_h5_read_scalar",
"_h5_read_array_of_encoded_equalsized_arrays",
"_h5_read_struct",
"_h5_read_table",
"_h5_read_vector_of_encoded_vectors",
"_h5_write_scalar",
"_h5_read_vector_of_vectors",
"_h5_write_array",
"_h5_write_vector_of_vectors",
"_h5_write_struct",
"_h5_write_lgdo",
"_h5_write_scalar",
"_h5_write_struct",
"_h5_write_vector_of_vectors",
]
5 changes: 5 additions & 0 deletions src/lgdo/lh5/_serializers/write/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ def _h5_write_array(
if "hdf5_settings" in obj.attrs:
h5py_kwargs |= obj.attrs["hdf5_settings"]

# HACK: a tuple is strictly requested for the "chunks" setting, but
# we'd like to pass a list too in some situations
if "chunks" in h5py_kwargs and isinstance(h5py_kwargs["chunks"], list):
h5py_kwargs["chunks"] = tuple(h5py_kwargs["chunks"])

# create HDF5 dataset
ds = group.create_dataset(name, data=nda, **h5py_kwargs)

Expand Down
6 changes: 6 additions & 0 deletions src/lgdo/lh5/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ def __str__(self) -> str:
+ super().__str__()
)

def __reduce__(self) -> tuple: # for pickling.
return self.__class__, (*self.args, self.file, self.obj)


class LH5EncodeError(Exception):
def __init__(
Expand All @@ -32,3 +35,6 @@ def __str__(self) -> str:
f"while writing object {self.group}/{self.name} to file {self.file}: "
+ super().__str__()
)

def __reduce__(self) -> tuple: # for pickling.
return self.__class__, (*self.args, self.file, self.group, self.name)
6 changes: 3 additions & 3 deletions src/lgdo/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@
from .waveformtable import WaveformTable

__all__ = [
"LGDO",
"Array",
"ArrayOfEqualSizedArrays",
"ArrayOfEncodedEqualSizedArrays",
"ArrayOfEqualSizedArrays",
"FixedSizeArray",
"Histogram",
"LGDO",
"Scalar",
"Struct",
"Table",
"VectorOfVectors",
"VectorOfEncodedVectors",
"VectorOfVectors",
"WaveformTable",
]

Expand Down
11 changes: 8 additions & 3 deletions src/lgdo/types/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,13 +418,18 @@ def fill(self, data, w: NDArray = None, keys: Sequence[str] = None) -> None:

def __setitem__(self, name: str, obj: LGDO) -> None:
# do not allow for new attributes on this
msg = "histogram fields cannot be mutated"
raise TypeError(msg)
known_keys = ("binning", "weights", "isdensity")
if name in known_keys and not dict.__contains__(self, name):
# but allow initialization while unpickling (after __init__() this is unreachable)
dict.__setitem__(self, name, obj)
else:
msg = "histogram fields cannot be mutated "
raise TypeError(msg)

def __getattr__(self, name: str) -> None:
# do not allow for new attributes on this
msg = "histogram fields cannot be mutated"
raise TypeError(msg)
raise AttributeError(msg)

def add_field(self, name: str | int, obj: LGDO) -> None: # noqa: ARG002
"""
Expand Down
6 changes: 6 additions & 0 deletions src/lgdo/types/lgdo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
class LGDO(ABC):
"""Abstract base class representing a LEGEND Data Object (LGDO)."""

def __new__(cls, *_args, **_kwargs):
# allow for (un-)pickling LGDO objects.
obj = super().__new__(cls)
obj.attrs = {}
return obj

@abstractmethod
def __init__(self, attrs: dict[str, Any] | None = None) -> None:
self.attrs = {} if attrs is None else dict(attrs)
Expand Down
6 changes: 6 additions & 0 deletions src/lgdo/types/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ class Table(Struct):
:meth:`__len__` to access valid data, which returns the ``size`` attribute.
"""

def __new__(cls, *args, **kwargs):
# allow for (un-)pickling LGDO objects.
obj = super().__new__(cls, *args, **kwargs)
obj.size = None
return obj

def __init__(
self,
col_dict: Mapping[str, LGDO] | pd.DataFrame | ak.Array | None = None,
Expand Down
23 changes: 15 additions & 8 deletions src/lgdo/types/vectorofvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import awkward_pandas as akpd
import numpy as np
import pandas as pd
from numba import jit
from numpy.typing import ArrayLike, DTypeLike, NDArray

from .. import utils
Expand Down Expand Up @@ -560,18 +561,16 @@ def to_aoesa(
compatible one.
"""
if self.ndim == 2:
ak_arr = self.view_as("ak")

if max_len is None:
max_len = int(ak.max(ak.count(ak_arr, axis=-1)))

nda = ak.fill_none(
ak.pad_none(ak_arr, max_len, clip=True), fill_val
).to_numpy(allow_missing=False)

lens = np.copy(self.cumulative_length)
lens[1:] = lens[1:] - lens[:-1]
max_len = int(np.max(lens))
nda = np.full((len(self), max_len), fill_val)
if preserve_dtype:
nda = nda.astype(self.flattened_data.dtype, copy=False)

_to_aoesa(self.flattened_data.nda, self.cumulative_length.nda, nda)

return aoesa.ArrayOfEqualSizedArrays(nda=nda, attrs=self.getattrs())

raise NotImplementedError
Expand Down Expand Up @@ -664,3 +663,11 @@ def view_as(

msg = f"{library} is not a supported third-party format."
raise ValueError(msg)


@jit
def _to_aoesa(flattened_array, cumulative_length, nda):
prev_cl = 0
for i, cl in enumerate(cumulative_length):
nda[i, : (cl - prev_cl)] = flattened_array[prev_cl:cl]
prev_cl = cl
17 changes: 17 additions & 0 deletions tests/lh5/test_exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

import pickle

from lgdo.lh5.exceptions import LH5DecodeError, LH5EncodeError


def test_pickle():
# test (un-)pickling of LH5 exceptions; e.g. for multiprocessing use.

ex = LH5EncodeError("message", "file", "group", "name")
ex = pickle.loads(pickle.dumps(ex))
assert isinstance(ex, LH5EncodeError)

ex = LH5DecodeError("message", "file", "obj")
ex = pickle.loads(pickle.dumps(ex))
assert isinstance(ex, LH5DecodeError)
13 changes: 13 additions & 0 deletions tests/lh5/test_lh5_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ def test_write_with_hdf5_compression(lgnd_file, tmptestdir):
assert h5f["/geds/raw/waveform/values"].compression is None
assert h5f["/geds/raw/waveform/values"].shuffle is False

store.write(
wft.values,
"/geds/raw/waveform/values",
f"{tmptestdir}/tmp-pygama-hdf5-compressed-wfs.lh5",
wo_mode="overwrite_file",
chunks=[1, 10],
compression=None,
shuffle=False,
)
with h5py.File(f"{tmptestdir}/tmp-pygama-hdf5-compressed-wfs.lh5") as h5f:
assert h5f["/geds/raw/waveform/values"].compression is None
assert h5f["/geds/raw/waveform/values"].shuffle is False


def test_write_empty_vov(tmptestdir):
vov = types.VectorOfVectors(flattened_data=[], cumulative_length=[])
Expand Down
13 changes: 13 additions & 0 deletions tests/types/test_array.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import pickle

import awkward as ak
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -61,3 +63,14 @@ def test_view():

with pytest.raises(ValueError):
a.view_as("ak", with_units=True)


def test_pickle():
obj = Array(nda=np.array([1, 2, 3, 4]))
obj.attrs["attr1"] = 1

ex = pickle.loads(pickle.dumps(obj))
assert isinstance(ex, Array)
assert ex.attrs["attr1"] == 1
assert ex.attrs["datatype"] == obj.attrs["datatype"]
assert np.all(ex.nda == np.array([1, 2, 3, 4]))
50 changes: 50 additions & 0 deletions tests/types/test_encoded.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import pickle

import awkward as ak
import awkward_pandas as akpd
import numpy as np
Expand Down Expand Up @@ -285,3 +287,51 @@ def test_aoeesa_view_as():

with pytest.raises(TypeError):
df = voev.view_as("np")


def test_aoeesa_pickle():
obj = ArrayOfEncodedEqualSizedArrays(
encoded_data=VectorOfVectors(
flattened_data=Array(nda=np.array([1, 2, 3, 4, 5, 2, 4, 8, 9, 7, 5, 3, 1])),
cumulative_length=Array(nda=np.array([2, 5, 6, 10, 13])),
),
decoded_size=99,
)

ex = pickle.loads(pickle.dumps(obj))

desired = [
[1, 2],
[3, 4, 5],
[2],
[4, 8, 9, 7],
[5, 3, 1],
]

for i, v in enumerate(ex):
assert np.array_equal(v, desired[i])


def test_voev_pickle():
obj = VectorOfEncodedVectors(
encoded_data=VectorOfVectors(
flattened_data=Array(nda=np.array([1, 2, 3, 4, 5, 2, 4, 8, 9, 7, 5, 3, 1])),
cumulative_length=Array(nda=np.array([2, 5, 6, 10, 13])),
),
decoded_size=Array(shape=5, fill_val=6),
attrs={"units": "s"},
)

ex = pickle.loads(pickle.dumps(obj))

desired = [
[1, 2],
[3, 4, 5],
[2],
[4, 8, 9, 7],
[5, 3, 1],
]

for i, (v, s) in enumerate(ex):
assert np.array_equal(v, desired[i])
assert s == 6
Loading

0 comments on commit fc0bc10

Please sign in to comment.