style: pre-commit fixes

legend-exp · Oct 25, 2024 · 841e364 · 841e364
1 parent 1adb2c6
commit 841e364
Show file tree

Hide file tree

Showing 2 changed files with 102 additions and 58 deletions.
diff --git a/src/lgdo/types/histogram.py b/src/lgdo/types/histogram.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from collections.abc import Iterable, Sequence, Mapping
+from collections.abc import Iterable, Mapping, Sequence
 from typing import Any
 
 import hist
@@ -324,7 +324,7 @@ def binning(self) -> tuple[Histogram.Axis, ...]:
         assert all(isinstance(v, Histogram.Axis) for k, v in bins)
         return tuple(v for _, v in bins)
 
-    def fill(self, data, w: np.ndarray = None, keys:List[str] = None) -> None:
+    def fill(self, data, w: np.ndarray = None, keys: List[str] = None) -> None:
         """Fill histogram by incrementing bins with data points weighted by w
 
         Parameters
@@ -335,55 +335,75 @@ def fill(self, data, w: np.ndarray = None, keys:List[str] = None) -> None:
         w
             weight to use for incrementing data points. If None, use 1 for all
         """
-        if isinstance(data, np.ndarray) and len(data.shape)==1 and len(self.binning)==1:
+        if (
+            isinstance(data, np.ndarray)
+            and len(data.shape) == 1
+            and len(self.binning) == 1
+        ):
             N = len(data)
             data = [data]
-        elif isinstance(data, np.ndarray) and len(data.shape)==2 and data.shape[1]==len(self.binning):
+        elif (
+            isinstance(data, np.ndarray)
+            and len(data.shape) == 2
+            and data.shape[1] == len(self.binning)
+        ):
             N = data.shape[0]
             data = data.T
-        elif isinstance(data, pd.DataFrame) and data.ndim==len(self.binning):
+        elif isinstance(data, pd.DataFrame) and data.ndim == len(self.binning):
             if keys is not None:
                 data = data[keys]
             N = len(data)
             data = data.values.T
-        elif isinstance(data, Sequence) and len(data)==len(self.binning):
+        elif isinstance(data, Sequence) and len(data) == len(self.binning):
             data = [d if isinstance(d, np.ndarray) else np.array(d) for d in data]
             N = len(data[0])
-            if not all(len(d)==N for d in data):
+            if not all(len(d) == N for d in data):
                 msg = "length of all data arrays must be equal"
                 raise ValueError(msg)
-        elif isinstance(data, Mapping) and len(data)==len(self.binning):
+        elif isinstance(data, Mapping) and len(data) == len(self.binning):
             if not isinstance(keys, Sequence):
                 msg = "filling hist with Mapping data requires a list of keys"
                 raise ValueError(msg)
-            data = [data[k] if isinstance(data[k], np.ndarray) else np.array(data[k]) for k in keys]
+            data = [
+                data[k] if isinstance(data[k], np.ndarray) else np.array(data[k])
+                for k in keys
+            ]
             N = len(data[0])
-            if not all(len(d)==N for d in data):
+            if not all(len(d) == N for d in data):
                 msg = "length of all data arrays must be equal"
                 raise ValueError(msg)
         else:
             msg = "data must be 2D numpy array or list of 1D arrays with length equal to number of axes"
             raise ValueError(msg)
 
-        idx = np.zeros(N, "float64") # bin indices for flattened array
-        oor_mask = np.ones(N, "bool") # mask for out of range values
-        stride = [s//self.weights.dtype.itemsize for s in self.weights.nda.strides]
+        idx = np.zeros(N, "float64")  # bin indices for flattened array
+        oor_mask = np.ones(N, "bool")  # mask for out of range values
+        stride = [s // self.weights.dtype.itemsize for s in self.weights.nda.strides]
         for col, ax, s in zip(data, self.binning, stride):
             if ax.is_range:
-                np.add(idx, s*np.floor((col - ax.first)/ax.step - int(not ax.closedleft)), idx)
+                np.add(
+                    idx,
+                    s * np.floor((col - ax.first) / ax.step - int(not ax.closedleft)),
+                    idx,
+                )
                 if ax.closedleft:
-                    oor_mask &= ( (ax.first <= col) & (col < ax.last) )
+                    oor_mask &= (ax.first <= col) & (col < ax.last)
                 else:
-                    oor_mask &= ( (ax.first < col) & (col <= ax.last) )
+                    oor_mask &= (ax.first < col) & (col <= ax.last)
             else:
-                idx += s*(np.searchsorted(ax.edges, col, side=("right" if ax.closedleft else "left")) - 1)
+                idx += s * (
+                    np.searchsorted(
+                        ax.edges, col, side=("right" if ax.closedleft else "left")
+                    )
+                    - 1
+                )
                 if ax.closedleft:
-                    oor_mask &= ( (ax.edges[0] <= col) & (col < ax.edges[-1]) )
+                    oor_mask &= (ax.edges[0] <= col) & (col < ax.edges[-1])
                 else:
-                    oor_mask &= ( (ax.edges[0] < col) & (col <= ax.edges[-1]) )
+                    oor_mask &= (ax.edges[0] < col) & (col <= ax.edges[-1])
 
         # increment bin contents
-        idx = idx[oor_mask].astype('int64')
+        idx = idx[oor_mask].astype("int64")
         w = w[oor_mask] if w is not None else 1
         np.add.at(self.weights.nda.reshape(-1), idx, w)
 

diff --git a/tests/types/test_histogram.py b/tests/types/test_histogram.py
@@ -298,66 +298,90 @@ def test_read_histogram_multiple(lgnd_test_data):
     with pytest.raises(LH5DecodeError):
         lh5.read("test_histogram_range", [file, file])
 
+
 def test_histogram_fill(lgnd_test_data):
     # Test the basics with fixed width bins
-    h = Histogram(None, [ (0, 5, 1) ])
-    h.fill(np.array([0.5, 1.5, 1.1])) # add some data
-    assert all(h.weights.nda == np.array([1., 2., 0., 0., 0.]))
-    h.fill(np.array([0.5, 3.5, 4., 3.5])) # add more data
-    assert all(h.weights.nda == np.array([2., 2., 0., 2., 1.]))
-    h.fill(np.array([-1., 6., np.inf, np.nan])) # add out of range data
-    assert all(h.weights.nda == np.array([2., 2., 0., 2., 1.]))
+    h = Histogram(None, [(0, 5, 1)])
+    h.fill(np.array([0.5, 1.5, 1.1]))  # add some data
+    assert all(h.weights.nda == np.array([1.0, 2.0, 0.0, 0.0, 0.0]))
+    h.fill(np.array([0.5, 3.5, 4.0, 3.5]))  # add more data
+    assert all(h.weights.nda == np.array([2.0, 2.0, 0.0, 2.0, 1.0]))
+    h.fill(np.array([-1.0, 6.0, np.inf, np.nan]))  # add out of range data
+    assert all(h.weights.nda == np.array([2.0, 2.0, 0.0, 2.0, 1.0]))
 
     # Test the basics with variable width bins
-    h = Histogram(None, [ np.array([0., 0.75, 2., 4., 4.5, 5.]) ])
-    h.fill(np.array([0.5, 1.5, 1.1])) # add some data
-    assert all(h.weights.nda == np.array([1., 2., 0., 0., 0.]))
-    h.fill(np.array([0.5, 3.5, 4., 3.5])) # add more data
-    assert all(h.weights.nda == np.array([2., 2., 2., 1., 0.]))
-    h.fill(np.array([-1., 6., np.inf, np.nan])) # add out of range data
-    assert all(h.weights.nda == np.array([2., 2., 2., 1., 0.]))
-    
+    h = Histogram(None, [np.array([0.0, 0.75, 2.0, 4.0, 4.5, 5.0])])
+    h.fill(np.array([0.5, 1.5, 1.1]))  # add some data
+    assert all(h.weights.nda == np.array([1.0, 2.0, 0.0, 0.0, 0.0]))
+    h.fill(np.array([0.5, 3.5, 4.0, 3.5]))  # add more data
+    assert all(h.weights.nda == np.array([2.0, 2.0, 2.0, 1.0, 0.0]))
+    h.fill(np.array([-1.0, 6.0, np.inf, np.nan]))  # add out of range data
+    assert all(h.weights.nda == np.array([2.0, 2.0, 2.0, 1.0, 0.0]))
+
     # Test bin edge behavior with fixed width bins
-    h = Histogram(None, [ Histogram.Axis(None, 0, 6, 1, closedleft=True) ])
+    h = Histogram(None, [Histogram.Axis(None, 0, 6, 1, closedleft=True)])
     h.fill(np.array([0, 2, 4, 6]))
-    assert all(h.weights.nda == np.array([1., 0., 1., 0., 1., 0.]))
-    h = Histogram(None, [ Histogram.Axis(None, 0, 6, 1, closedleft=False) ])
+    assert all(h.weights.nda == np.array([1.0, 0.0, 1.0, 0.0, 1.0, 0.0]))
+    h = Histogram(None, [Histogram.Axis(None, 0, 6, 1, closedleft=False)])
     h.fill(np.array([0, 2, 4, 6]))
-    assert all(h.weights.nda == np.array([0., 1., 0., 1., 0., 1.]))
-    
+    assert all(h.weights.nda == np.array([0.0, 1.0, 0.0, 1.0, 0.0, 1.0]))
+
     # Test bin edge behavior with variable width bins
-    h = Histogram(None, [ Histogram.Axis([0., 0.75, 2., 4., 4.5, 5., 6.], None, None, None, closedleft=True) ])
+    h = Histogram(
+        None,
+        [
+            Histogram.Axis(
+                [0.0, 0.75, 2.0, 4.0, 4.5, 5.0, 6.0], None, None, None, closedleft=True
+            )
+        ],
+    )
     h.fill(np.array([0, 2, 4, 6]))
-    assert all(h.weights.nda == np.array([1., 0., 1., 1., 0., 0.]))
-    h = Histogram(None, [ Histogram.Axis([0., 0.75, 2., 4., 4.5, 5., 6.], None, None, None, closedleft=False) ])
+    assert all(h.weights.nda == np.array([1.0, 0.0, 1.0, 1.0, 0.0, 0.0]))
+    h = Histogram(
+        None,
+        [
+            Histogram.Axis(
+                [0.0, 0.75, 2.0, 4.0, 4.5, 5.0, 6.0], None, None, None, closedleft=False
+            )
+        ],
+    )
     h.fill(np.array([0, 2, 4, 6]))
-    assert all(h.weights.nda == np.array([0., 1., 1., 0., 0., 1.]))
-    
+    assert all(h.weights.nda == np.array([0.0, 1.0, 1.0, 0.0, 0.0, 1.0]))
+
     # Test 2d histogram with numpy array data
-    h = Histogram(None, [ (0, 3, 1), (0, 3, 1) ])
-    data = np.array( [ [1, 1], [2, 2], [-1, 2], [2, -1] ])
+    h = Histogram(None, [(0, 3, 1), (0, 3, 1)])
+    data = np.array([[1, 1], [2, 2], [-1, 2], [2, -1]])
     h.fill(data)
-    assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 1.]]))
-
+    assert np.all(
+        h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+    )
+
     # Test 2d histogram with pandas data
-    h = Histogram(None, [ (0, 3, 1), (0, 3, 1) ])
+    h = Histogram(None, [(0, 3, 1), (0, 3, 1)])
     data = pd.DataFrame({"a": [1, 2, -1, 2], "b": [1, 2, 2, -1]})
     h.fill(data)
-    assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 1.]]))
+    assert np.all(
+        h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+    )
     h.fill(data, keys=["a", "b"])
-    assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 2., 0.], [0., 0., 2.]]))
-
+    assert np.all(
+        h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]])
+    )
+
     # Test list of columnar data
-    h = Histogram(None, [ (0, 3, 1), (0, 3, 1) ])
+    h = Histogram(None, [(0, 3, 1), (0, 3, 1)])
     data = [np.array([1, 2, -1, 2]), np.array([1, 2, 2, -1])]
     h.fill(data)
-    assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 1.]]))
+    assert np.all(
+        h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+    )
 
     # Test ordered dict of columnar data
-    h = Histogram(None, [ (0, 3, 1), (0, 3, 1) ])
+    h = Histogram(None, [(0, 3, 1), (0, 3, 1)])
     data = {"a": [1, 2, -1, 2], "b": [1, 2, 2, -1]}
     with pytest.raises(ValueError, match="requires a list of keys"):
         h.fill(data)
     h.fill(data, keys=["a", "b"])
-    assert np.all(h.weights.nda == np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 1.]]))
-
+    assert np.all(
+        h.weights.nda == np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+    )