Skip to content

Commit

Permalink
style: pre-commit fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci[bot] committed Oct 25, 2024
1 parent 1adb2c6 commit 841e364
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 58 deletions.
60 changes: 40 additions & 20 deletions src/lgdo/types/histogram.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import logging
from collections.abc import Iterable, Sequence, Mapping
from collections.abc import Iterable, Mapping, Sequence
from typing import Any

import hist
Expand Down Expand Up @@ -324,7 +324,7 @@ def binning(self) -> tuple[Histogram.Axis, ...]:
assert all(isinstance(v, Histogram.Axis) for k, v in bins)
return tuple(v for _, v in bins)

def fill(self, data, w: np.ndarray = None, keys:List[str] = None) -> None:
def fill(self, data, w: np.ndarray = None, keys: List[str] = None) -> None:
"""Fill histogram by incrementing bins with data points weighted by w
Parameters
Expand All @@ -335,55 +335,75 @@ def fill(self, data, w: np.ndarray = None, keys:List[str] = None) -> None:
w
weight to use for incrementing data points. If None, use 1 for all
"""
if isinstance(data, np.ndarray) and len(data.shape)==1 and len(self.binning)==1:
if (
isinstance(data, np.ndarray)
and len(data.shape) == 1
and len(self.binning) == 1
):
N = len(data)
data = [data]
elif isinstance(data, np.ndarray) and len(data.shape)==2 and data.shape[1]==len(self.binning):
elif (
isinstance(data, np.ndarray)
and len(data.shape) == 2
and data.shape[1] == len(self.binning)
):
N = data.shape[0]
data = data.T
elif isinstance(data, pd.DataFrame) and data.ndim==len(self.binning):
elif isinstance(data, pd.DataFrame) and data.ndim == len(self.binning):
if keys is not None:
data = data[keys]
N = len(data)
data = data.values.T
elif isinstance(data, Sequence) and len(data)==len(self.binning):
elif isinstance(data, Sequence) and len(data) == len(self.binning):
data = [d if isinstance(d, np.ndarray) else np.array(d) for d in data]
N = len(data[0])
if not all(len(d)==N for d in data):
if not all(len(d) == N for d in data):
msg = "length of all data arrays must be equal"
raise ValueError(msg)
elif isinstance(data, Mapping) and len(data)==len(self.binning):
elif isinstance(data, Mapping) and len(data) == len(self.binning):
if not isinstance(keys, Sequence):
msg = "filling hist with Mapping data requires a list of keys"
raise ValueError(msg)
data = [data[k] if isinstance(data[k], np.ndarray) else np.array(data[k]) for k in keys]
data = [
data[k] if isinstance(data[k], np.ndarray) else np.array(data[k])
for k in keys
]
N = len(data[0])
if not all(len(d)==N for d in data):
if not all(len(d) == N for d in data):
msg = "length of all data arrays must be equal"
raise ValueError(msg)
else:
msg = "data must be 2D numpy array or list of 1D arrays with length equal to number of axes"
raise ValueError(msg)

idx = np.zeros(N, "float64") # bin indices for flattened array
oor_mask = np.ones(N, "bool") # mask for out of range values
stride = [s//self.weights.dtype.itemsize for s in self.weights.nda.strides]
idx = np.zeros(N, "float64") # bin indices for flattened array
oor_mask = np.ones(N, "bool") # mask for out of range values
stride = [s // self.weights.dtype.itemsize for s in self.weights.nda.strides]
for col, ax, s in zip(data, self.binning, stride):
if ax.is_range:
np.add(idx, s*np.floor((col - ax.first)/ax.step - int(not ax.closedleft)), idx)
np.add(
idx,
s * np.floor((col - ax.first) / ax.step - int(not ax.closedleft)),
idx,
)
if ax.closedleft:
oor_mask &= ( (ax.first <= col) & (col < ax.last) )
oor_mask &= (ax.first <= col) & (col < ax.last)
else:
oor_mask &= ( (ax.first < col) & (col <= ax.last) )
oor_mask &= (ax.first < col) & (col <= ax.last)
else:
idx += s*(np.searchsorted(ax.edges, col, side=("right" if ax.closedleft else "left")) - 1)
idx += s * (
np.searchsorted(
ax.edges, col, side=("right" if ax.closedleft else "left")
)
- 1
)
if ax.closedleft:
oor_mask &= ( (ax.edges[0] <= col) & (col < ax.edges[-1]) )
oor_mask &= (ax.edges[0] <= col) & (col < ax.edges[-1])
else:
oor_mask &= ( (ax.edges[0] < col) & (col <= ax.edges[-1]) )
oor_mask &= (ax.edges[0] < col) & (col <= ax.edges[-1])

# increment bin contents
idx = idx[oor_mask].astype('int64')
idx = idx[oor_mask].astype("int64")
w = w[oor_mask] if w is not None else 1
np.add.at(self.weights.nda.reshape(-1), idx, w)

Expand Down
100 changes: 62 additions & 38 deletions tests/types/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,66 +298,90 @@ def test_read_histogram_multiple(lgnd_test_data):
with pytest.raises(LH5DecodeError):
lh5.read("test_histogram_range", [file, file])


def test_histogram_fill(lgnd_test_data):
# Test the basics with fixed width bins
h = Histogram(None, [ (0, 5, 1) ])
h.fill(np.array([0.5, 1.5, 1.1])) # add some data
assert all(h.weights.nda == np.array([1., 2., 0., 0., 0.]))
h.fill(np.array([0.5, 3.5, 4., 3.5])) # add more data
assert all(h.weights.nda == np.array([2., 2., 0., 2., 1.]))
h.fill(np.array([-1., 6., np.inf, np.nan])) # add out of range data
assert all(h.weights.nda == np.array([2., 2., 0., 2., 1.]))
h = Histogram(None, [(0, 5, 1)])
h.fill(np.array([0.5, 1.5, 1.1])) # add some data
assert all(h.weights.nda == np.array([1.0, 2.0, 0.0, 0.0, 0.0]))
h.fill(np.array([0.5, 3.5, 4.0, 3.5])) # add more data
assert all(h.weights.nda == np.array([2.0, 2.0, 0.0, 2.0, 1.0]))
h.fill(np.array([-1.0, 6.0, np.inf, np.nan])) # add out of range data
assert all(h.weights.nda == np.array([2.0, 2.0, 0.0, 2.0, 1.0]))

# Test the basics with variable width bins
h = Histogram(None, [ np.array([0., 0.75, 2., 4., 4.5, 5.]) ])
h.fill(np.array([0.5, 1.5, 1.1])) # add some data
assert all(h.weights.nda == np.array([1., 2., 0., 0., 0.]))
h.fill(np.array([0.5, 3.5, 4., 3.5])) # add more data
assert all(h.weights.nda == np.array([2., 2., 2., 1., 0.]))
h.fill(np.array([-1., 6., np.inf, np.nan])) # add out of range data
assert all(h.weights.nda == np.array([2., 2., 2., 1., 0.]))
h = Histogram(None, [np.array([0.0, 0.75, 2.0, 4.0, 4.5, 5.0])])
h.fill(np.array([0.5, 1.5, 1.1])) # add some data
assert all(h.weights.nda == np.array([1.0, 2.0, 0.0, 0.0, 0.0]))
h.fill(np.array([0.5, 3.5, 4.0, 3.5])) # add more data
assert all(h.weights.nda == np.array([2.0, 2.0, 2.0, 1.0, 0.0]))
h.fill(np.array([-1.0, 6.0, np.inf, np.nan])) # add out of range data
assert all(h.weights.nda == np.array([2.0, 2.0, 2.0, 1.0, 0.0]))

# Test bin edge behavior with fixed width bins
h = Histogram(None, [ Histogram.Axis(None, 0, 6, 1, closedleft=True) ])
h = Histogram(None, [Histogram.Axis(None, 0, 6, 1, closedleft=True)])
h.fill(np.array([0, 2, 4, 6]))
assert all(h.weights.nda == np.array([1., 0., 1., 0., 1., 0.]))
h = Histogram(None, [ Histogram.Axis(None, 0, 6, 1, closedleft=False) ])
assert all(h.weights.nda == np.array([1.0, 0.0, 1.0, 0.0, 1.0, 0.0]))
h = Histogram(None, [Histogram.Axis(None, 0, 6, 1, closedleft=False)])
h.fill(np.array([0, 2, 4, 6]))
assert all(h.weights.nda == np.array([0., 1., 0., 1., 0., 1.]))
assert all(h.weights.nda == np.array([0.0, 1.0, 0.0, 1.0, 0.0, 1.0]))

# Test bin edge behavior with variable width bins
h = Histogram(None, [ Histogram.Axis([0., 0.75, 2., 4., 4.5, 5., 6.], None, None, None, closedleft=True) ])
h = Histogram(
None,
[
Histogram.Axis(
[0.0, 0.75, 2.0, 4.0, 4.5, 5.0, 6.0], None, None, None, closedleft=True
)
],
)
h.fill(np.array([0, 2, 4, 6]))
assert all(h.weights.nda == np.array([1., 0., 1., 1., 0., 0.]))
h = Histogram(None, [ Histogram.Axis([0., 0.75, 2., 4., 4.5, 5., 6.], None, None, None, closedleft=False) ])
assert all(h.weights.nda == np.array([1.0, 0.0, 1.0, 1.0, 0.0, 0.0]))
h = Histogram(
None,
[
Histogram.Axis(
[0.0, 0.75, 2.0, 4.0, 4.5, 5.0, 6.0], None, None, None, closedleft=False
)
],
)
h.fill(np.array([0, 2, 4, 6]))
assert all(h.weights.nda == np.array([0., 1., 1., 0., 0., 1.]))
assert all(h.weights.nda == np.array([0.0, 1.0, 1.0, 0.0, 0.0, 1.0]))

# Test 2d histogram with numpy array data
h = Histogram(None, [ (0, 3, 1), (0, 3, 1) ])
data = np.array( [ [1, 1], [2, 2], [-1, 2], [2, -1] ])
h = Histogram(None, [(0, 3, 1), (0, 3, 1)])
data = np.array([[1, 1], [2, 2], [-1, 2], [2, -1]])
h.fill(data)
assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 1.]]))

assert np.all(
h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
)

# Test 2d histogram with pandas data
h = Histogram(None, [ (0, 3, 1), (0, 3, 1) ])
h = Histogram(None, [(0, 3, 1), (0, 3, 1)])
data = pd.DataFrame({"a": [1, 2, -1, 2], "b": [1, 2, 2, -1]})
h.fill(data)
assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 1.]]))
assert np.all(
h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
)
h.fill(data, keys=["a", "b"])
assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 2., 0.], [0., 0., 2.]]))

assert np.all(
h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]])
)

# Test list of columnar data
h = Histogram(None, [ (0, 3, 1), (0, 3, 1) ])
h = Histogram(None, [(0, 3, 1), (0, 3, 1)])
data = [np.array([1, 2, -1, 2]), np.array([1, 2, 2, -1])]
h.fill(data)
assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 1.]]))
assert np.all(
h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
)

# Test ordered dict of columnar data
h = Histogram(None, [ (0, 3, 1), (0, 3, 1) ])
h = Histogram(None, [(0, 3, 1), (0, 3, 1)])
data = {"a": [1, 2, -1, 2], "b": [1, 2, 2, -1]}
with pytest.raises(ValueError, match="requires a list of keys"):
h.fill(data)
h.fill(data, keys=["a", "b"])
assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 1.]]))

assert np.all(
h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
)

0 comments on commit 841e364

Please sign in to comment.