From 545d6dc39d68de96dd5ddfa95e1eb19a08961420 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Fri, 11 Oct 2024 13:30:47 -0700
Subject: [PATCH 01/27] Array manages capacity separately from size

---
 src/lgdo/types/array.py | 72 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 61 insertions(+), 11 deletions(-)

diff --git a/src/lgdo/types/array.py b/src/lgdo/types/array.py
index ec555bcf..ae9409e7 100644
--- a/src/lgdo/types/array.py
+++ b/src/lgdo/types/array.py
@@ -78,11 +78,7 @@ def __init__(
         elif isinstance(nda, Array):
             nda = nda.nda
 
-        elif not isinstance(nda, np.ndarray):
-            nda = np.array(nda)
-
         self.nda = nda
-        self.dtype = self.nda.dtype
 
         super().__init__(attrs)
 
@@ -96,18 +92,72 @@ def form_datatype(self) -> str:
         return dt + "<" + nd + ">{" + et + "}"
 
     def __len__(self) -> int:
-        return len(self.nda)
+        return self._size
+
+    @property
+    def nda(self):
+        return self._nda[: self._size, ...] if self._nda.shape != () else self._nda
+
+    @nda.setter
+    def nda(self, value):
+        self._nda = value if isinstance(value, np.ndarray) else np.array(value)
+        self._size = len(self._nda) if self._nda.shape != () else 0
+
+    @property
+    def dtype(self):
+        return self._nda.dtype
+
+    @property
+    def shape(self):
+        return (len(self),) + self._nda.shape[1:]
+
+    def set_capacity(self, capacity: int) -> None:
+        "Set size (number of rows) of internal memory buffer"
+        if capacity < len(self):
+            msg = "Cannot reduce capacity below Array length"
+            raise ValueError(msg)
+        self._nda.resize((capacity,) + self._nda.shape[1:], refcheck=True)
+
+    def get_capacity(self) -> int:
+        "Get capacity (i.e. max size before memory must be re-allocated)"
+        return len(self._nda)
+
+    def trim_capacity(self) -> None:
+        "Set capacity to be minimum needed to support Array size"
+        self.set_capacity(np.prod(self.shape))
+
+    def resize(self, new_size: int, trim=False) -> None:
+        """Set size of Array in rows. Only change capacity if it must be
+        increased to accommodate new rows; in this case double capacity.
+        If trim is True, capacity will be set to match size."""
+
+        if trim and new_size != self.get_capacity:
+            self.set_capacity(new_size)
+
+        # If capacity is not big enough, set to next power of 2 big enough
+        if new_size > self.get_capacity():
+            self.set_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
 
-    def resize(self, new_size: int) -> None:
-        new_shape = (new_size,) + self.nda.shape[1:]
-        return self.nda.resize(new_shape, refcheck=True)
+        self._size = new_size
 
     def append(self, value: np.ndarray) -> None:
-        self.resize(len(self) + 1)
-        self.nda[-1] = value
+        "Append value to end of array (with copy)"
+        self.insert(len(self), value)
 
     def insert(self, i: int, value: int | float) -> None:
-        self.nda = np.insert(self.nda, i, value)
+        "Insert value into row i (with copy)"
+        value = np.array(value)
+        if value.shape == self.shape[1:]:
+            self.resize(len(self) + 1)
+            self[i + 1 :] = self[i:-1]
+            self[i] = value
+        elif value.shape[1:] == self.shape[1:]:
+            self.resize(len(self) + len(value))
+            self[i + len(value) :] = self[i : -len(value)]
+            self[i : i + len(value)] = value
+        else:
+            msg = f"Could not insert value with shape {value.shape} into Array with shape {self.shape}"
+            raise ValueError(msg)
 
     def __getitem__(self, key):
         return self.nda[key]

From 980ad6a863dfba0515d358d29b4236192fe9534b Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Fri, 11 Oct 2024 13:31:30 -0700
Subject: [PATCH 02/27] VectorOfVectors dtype is a property

---
 src/lgdo/types/vectorofvectors.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index c4f543d6..f6d5575f 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -209,9 +209,6 @@ def __init__(
             elif self.flattened_data is None:
                 self.flattened_data = flattened_data
 
-            # finally set dtype
-            self.dtype = self.flattened_data.dtype
-
         # set ndim
         self.ndim = 2
         pointer = self.flattened_data
@@ -224,6 +221,10 @@ def __init__(
 
         super().__init__(attrs)
 
+    @property
+    def dtype(self):
+        return self.flattened_data.dtype
+
     def datatype_name(self) -> str:
         return "array"
 

From d0d8ce203f0dfcb34f05c967e705793a6dbba612 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 10:13:24 -0700
Subject: [PATCH 03/27] Raise error on insert if i>len

---
 src/lgdo/types/array.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/lgdo/types/array.py b/src/lgdo/types/array.py
index ae9409e7..32343074 100644
--- a/src/lgdo/types/array.py
+++ b/src/lgdo/types/array.py
@@ -146,6 +146,10 @@ def append(self, value: np.ndarray) -> None:
 
     def insert(self, i: int, value: int | float) -> None:
         "Insert value into row i (with copy)"
+        if i > len(self):
+            msg = f"index {i} is out of bounds for array with size {len(self)}"
+            raise IndexError(msg)
+        
         value = np.array(value)
         if value.shape == self.shape[1:]:
             self.resize(len(self) + 1)

From 075a4f1b7847629921e39eec1d1be5fa9090115b Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 10:14:29 -0700
Subject: [PATCH 04/27] Add get/set_capacity to VoV and change modifiers to
 take advantage of changes to len/capacity management

---
 src/lgdo/types/vectorofvectors.py | 119 ++++++++++++------------------
 1 file changed, 47 insertions(+), 72 deletions(-)

diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index f6d5575f..985e9427 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -209,20 +209,14 @@ def __init__(
             elif self.flattened_data is None:
                 self.flattened_data = flattened_data
 
-        # set ndim
-        self.ndim = 2
-        pointer = self.flattened_data
-        while True:
-            if isinstance(pointer, Array):
-                break
-
-            self.ndim += 1
-            pointer = pointer.flattened_data
-
         super().__init__(attrs)
 
     @property
-    def dtype(self):
+    def ndim(self):
+        return 1 + (1 if isinstance(self.flattened_data, Array) else self.flattened_data.ndim)
+    
+    @property
+    def dtype(self) -> np.dtype:
         return self.flattened_data.dtype
 
     def datatype_name(self) -> str:
@@ -276,7 +270,27 @@ def __setitem__(self, i: int, new: NDArray) -> None:
         else:
             raise NotImplementedError
 
-    def resize(self, new_size: int) -> None:
+    def set_capacity(self, cap_cl, *cap_args) -> None:
+        """Set capacity of internal data arrays. Expect number of args to
+        equal `self.n_dim`. First arg is capacity of cumulative length array.
+        If `self.n_dim` is 2, second argument is capacity of flattened data,
+        otherwise arguments are fed recursively to remaining dimensions.
+        """
+        self.cumulative_length.set_capacity(cap_cl)
+        self.flattened_data.set_capacity(*cap_args)
+
+    def get_capacity(self) -> Tuple[int]:
+        """Get tuple containing capacity of each dimension. First dimension
+        is cumulative length array. Last dimension is flattened data.
+        """
+        return (self.cumulative_length.get_capacity(), *self.flattened_data.get_capacity())
+
+    def trim_capacity(self) -> None:
+        "Set capacity for all dimensions to minimum needed to hold data"
+        self.cumulative_length.trim_capacity()
+        self.flattened_data.trim_capacity()
+
+    def resize(self, new_size: int, trim: bool = False) -> None:
         """Resize vector along the first axis.
 
         `self.flattened_data` is resized only if `new_size` is smaller than the
@@ -285,6 +299,8 @@ def resize(self, new_size: int) -> None:
         If `new_size` is larger than the current vector length,
         `self.cumulative_length` is padded with its last element.  This
         corresponds to appending empty vectors.
+        
+        If `trim` is ``True``, resize capacity to match new size
 
         Examples
         --------
@@ -303,23 +319,20 @@ def resize(self, new_size: int) -> None:
          [3],
         ]
         """
-        vidx = self.cumulative_length
         old_s = len(self)
-        dlen = new_size - old_s
-        csum = vidx[-1] if len(self) > 0 else 0
 
         # first resize the cumulative length
-        self.cumulative_length.resize(new_size)
+        self.cumulative_length.resize(new_size, trim)
 
         # if new_size > size, new elements are filled with zeros, let's fix
         # that
-        if dlen > 0:
-            self.cumulative_length[old_s:] = csum
+        if new_size > old_s:
+            self.cumulative_length[old_s:] = self.cumulative_length[old_s-1]
 
         # then resize the data array
         # if dlen > 0 this has no effect
         if len(self.cumulative_length) > 0:
-            self.flattened_data.resize(self.cumulative_length[-1])
+            self.flattened_data.resize(self.cumulative_length[-1], trim)
 
     def append(self, new: NDArray) -> None:
         """Append a 1D vector `new` at the end.
@@ -334,20 +347,7 @@ def append(self, new: NDArray) -> None:
          [8 9],
         ]
         """
-        if self.ndim == 2:
-            # first extend cumulative_length by +1
-            self.cumulative_length.resize(len(self) + 1)
-            # set it at the right value
-            newlen = (
-                self.cumulative_length[-2] + len(new) if len(self) > 1 else len(new)
-            )
-            self.cumulative_length[-1] = newlen
-            # then resize flattened_data to accommodate the new vector
-            self.flattened_data.resize(len(self.flattened_data) + len(new))
-            # finally set it
-            self[-1] = new
-        else:
-            raise NotImplementedError
+        self.insert(len(self), new)
 
     def insert(self, i: int, new: NDArray) -> None:
         """Insert a vector at index `i`.
@@ -364,23 +364,15 @@ def insert(self, i: int, new: NDArray) -> None:
          [8 9],
          [4 5],
         ]
-
-        Warning
-        -------
-        This method involves a significant amount of memory re-allocation and
-        is expected to perform poorly on large vectors.
         """
         if self.ndim == 2:
-            if i >= len(self):
-                msg = f"index {i} is out of bounds for vector owith size {len(self)}"
+            if i > len(self):
+                msg = f"index {i} is out of bounds for vector with size {len(self)}"
                 raise IndexError(msg)
 
-            self.flattened_data = Array(
-                np.insert(self.flattened_data, self.cumulative_length[i - 1], new)
-            )
-            self.cumulative_length = Array(
-                np.insert(self.cumulative_length, i, self.cumulative_length[i - 1])
-            )
+            i_start = 0 if i==0 else self.cumulative_length[i-1]
+            self.flattened_data.insert(i_start, new)
+            self.cumulative_length.insert(i, i_start)
             self.cumulative_length[i:] += np.uint32(len(new))
         else:
             raise NotImplementedError
@@ -400,11 +392,6 @@ def replace(self, i: int, new: NDArray) -> None:
         [[8 9],
          [4 5],
         ]
-
-        Warning
-        -------
-        This method involves a significant amount of memory re-allocation and
-        is expected to perform poorly on large vectors.
         """
         if self.ndim == 2:
             if i >= len(self):
@@ -414,27 +401,15 @@ def replace(self, i: int, new: NDArray) -> None:
             vidx = self.cumulative_length
             dlen = len(new) - len(self[i])
 
-            if dlen == 0:
-                # don't waste resources
-                self[i] = new
-            elif dlen < 0:
-                start = vidx[i - 1]
-                stop = start + len(new)
-                # set the already allocated indices
-                self.flattened_data[start:stop] = new
-                # then delete the extra indices
-                self.flattened_data = Array(
-                    np.delete(self.flattened_data, np.s_[stop : vidx[i]])
-                )
-            else:
-                # set the already allocated indices
-                self.flattened_data[vidx[i - 1] : vidx[i]] = new[: len(self[i])]
-                # then insert the remaining
-                self.flattened_data = Array(
-                    np.insert(self.flattened_data, vidx[i], new[len(self[i]) :])
-                )
-
-            vidx[i:] = vidx[i:] + dlen
+            if dlen != 0:
+                # move the subsequent entries
+                vidx[i:] += dlen
+                self.flattened_data.resize(vidx[-1])
+                self.flattened_data._nda[vidx[i]:vidx[-1]] = self.flattened_data._nda[vidx[i]-dlen:vidx[-1]-dlen]
+            
+            # set the already allocated indices
+            start = vidx[i - 1] if i>0 else 0
+            self.flattened_data[start:vidx[i]] = new
         else:
             raise NotImplementedError
 

From 1e1fddaba1f29177eeb1f304c9998ea846261e6a Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 10:19:15 -0700
Subject: [PATCH 05/27] Modify core.read and store.read to resize array when
 filling in place instead of returning n_read

---
 src/lgdo/cli.py                               |   6 +-
 src/lgdo/lh5/core.py                          |  42 +++---
 src/lgdo/lh5/iterator.py                      |  33 ++---
 src/lgdo/lh5/store.py                         |  78 ++---------
 src/lgdo/types/histogram.py                   |   4 +-
 tests/compression/conftest.py                 |   2 +-
 tests/compression/test_radware_sigcompress.py |   2 +-
 tests/lh5/conftest.py                         |   2 +-
 tests/lh5/test_core.py                        |   2 +-
 tests/lh5/test_lh5_iterator.py                |  17 ++-
 tests/lh5/test_lh5_store.py                   | 128 +++++++++---------
 tests/lh5/test_lh5_write.py                   |  24 ++--
 tests/test_cli.py                             |  18 +--
 tests/types/test_histogram.py                 |   4 +-
 tests/types/test_vectorofvectors.py           |   2 +-
 15 files changed, 152 insertions(+), 212 deletions(-)

diff --git a/src/lgdo/cli.py b/src/lgdo/cli.py
index 6563fd66..73105738 100644
--- a/src/lgdo/cli.py
+++ b/src/lgdo/cli.py
@@ -227,10 +227,10 @@ def lh5concat(args=None):
                 continue
 
             # read as little as possible
-            obj, _ = store.read(current, h5f0, n_rows=1)
+            obj = store.read(current, h5f0, n_rows=1)
             if isinstance(obj, (Table, Array, VectorOfVectors)):
                 # read all!
-                obj, _ = store.read(current, h5f0)
+                obj = store.read(current, h5f0)
                 lgdos[current] = obj
 
             break
@@ -292,7 +292,7 @@ def _inplace_table_filter(name, table, obj_list):
         log.info(msg)
 
         for name in lgdos:
-            obj, _ = store.read(name, file)
+            obj = store.read(name, file)
             # need to remove nested LGDOs from obj too before appending
             if isinstance(obj, Table):
                 _inplace_table_filter(name, obj, obj_list)
diff --git a/src/lgdo/lh5/core.py b/src/lgdo/lh5/core.py
index 80132376..fc97338d 100644
--- a/src/lgdo/lh5/core.py
+++ b/src/lgdo/lh5/core.py
@@ -119,11 +119,12 @@ def read(
         lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
         lh5_obj = lh5_file[name]
     else:
-        lh5_files = list(lh5_file)
-        n_rows_read = 0
-        obj_buf_is_new = False
-
-        for i, h5f in enumerate(lh5_files):
+        if obj_buf is not None:
+            obj_buf.resize(obj_buf_start)
+        else:
+            obj_buf_start = 0
+        
+        for i, h5f in enumerate(lh5_file):
             if (
                 isinstance(idx, (list, tuple))
                 and len(idx) > 0
@@ -145,33 +146,26 @@ def read(
                 idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
             else:
                 idx_i = None
-            n_rows_i = n_rows - n_rows_read
 
-            obj_ret = read(
+            obj_buf_start_i = len(obj_buf) if obj_buf else 0
+            n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
+
+            obj_buf = read(
                 name,
                 h5f,
-                start_row,
+                start_row if i==0 else 0,
                 n_rows_i,
                 idx_i,
                 use_h5idx,
                 field_mask,
                 obj_buf,
-                obj_buf_start,
+                obj_buf_start_i,
                 decompress,
             )
-            if isinstance(obj_ret, tuple):
-                obj_buf, n_rows_read_i = obj_ret
-                obj_buf_is_new = True
-            else:
-                obj_buf = obj_ret
-                n_rows_read_i = len(obj_buf)
 
-            n_rows_read += n_rows_read_i
-            if n_rows_read >= n_rows or obj_buf is None:
-                return obj_buf, n_rows_read
-            start_row = 0
-            obj_buf_start += n_rows_read_i
-        return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
+            if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
+                return obj_buf
+        return obj_buf
 
     if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
         idx = idx[0]
@@ -188,8 +182,12 @@ def read(
         obj_buf_start=obj_buf_start,
         decompress=decompress,
     )
+    try:
+        obj.resize(obj_buf_start + n_rows_read)
+    except AttributeError:
+        pass
 
-    return obj if obj_buf is None else (obj, n_rows_read)
+    return obj
 
 
 def write(
diff --git a/src/lgdo/lh5/iterator.py b/src/lgdo/lh5/iterator.py
index efced94c..88c2573c 100644
--- a/src/lgdo/lh5/iterator.py
+++ b/src/lgdo/lh5/iterator.py
@@ -21,7 +21,7 @@ class LH5Iterator(typing.Iterator):
 
     This class can be used either for random access:
 
-    >>> lh5_obj, n_rows = lh5_it.read(entry)
+    >>> lh5_obj = lh5_it.read(entry)
 
     to read the block of entries starting at entry. In case of multiple files
     or the use of an event selection, entry refers to a global event index
@@ -29,7 +29,7 @@ class LH5Iterator(typing.Iterator):
 
     This can also be used as an iterator:
 
-    >>> for lh5_obj, entry, n_rows in LH5Iterator(...):
+    >>> for lh5_obj, entry in LH5Iterator(...):
     >>>    # do the thing!
 
     This is intended for if you are reading a large quantity of data but
@@ -129,7 +129,6 @@ def __init__(
             msg = f"can't open any files from {lh5_files}"
             raise RuntimeError(msg)
 
-        self.n_rows = 0
         self.current_entry = 0
         self.next_entry = 0
 
@@ -235,11 +234,10 @@ def get_global_entrylist(self) -> np.ndarray:
                 )
         return self.global_entry_list
 
-    def read(self, entry: int) -> tuple[LGDO, int]:
-        """Read the nextlocal chunk of events, starting at entry. Return the
-        LH5 buffer and number of rows read."""
-        self.n_rows = 0
+    def read(self, entry: int) -> LGDO:
+        "Read the nextlocal chunk of events, starting at entry."
         i_file = np.searchsorted(self.entry_map, entry, "right")
+        self.lh5_buffer.resize(0)
 
         # if file hasn't been opened yet, search through files
         # sequentially until we find the right one
@@ -250,10 +248,10 @@ def read(self, entry: int) -> tuple[LGDO, int]:
                 i_file += 1
 
         if i_file == len(self.lh5_files):
-            return (self.lh5_buffer, self.n_rows)
+            return self.lh5_buffer
         local_entry = entry - self._get_file_cumentries(i_file - 1)
 
-        while self.n_rows < self.buffer_len and i_file < len(self.file_map):
+        while len(self.lh5_buffer) < self.buffer_len and i_file < len(self.file_map):
             # Loop through files
             local_idx = self.get_file_entrylist(i_file)
             if local_idx is not None and len(local_idx) == 0:
@@ -262,18 +260,17 @@ def read(self, entry: int) -> tuple[LGDO, int]:
                 continue
 
             i_local = local_idx[local_entry] if local_idx is not None else local_entry
-            self.lh5_buffer, n_rows = self.lh5_st.read(
+            self.lh5_buffer = self.lh5_st.read(
                 self.groups[i_file],
                 self.lh5_files[i_file],
                 start_row=i_local,
-                n_rows=self.buffer_len - self.n_rows,
+                n_rows=self.buffer_len - len(self.lh5_buffer),
                 idx=local_idx,
                 field_mask=self.field_mask,
                 obj_buf=self.lh5_buffer,
-                obj_buf_start=self.n_rows,
+                obj_buf_start=len(self.lh5_buffer),
             )
 
-            self.n_rows += n_rows
             i_file += 1
             local_entry = 0
 
@@ -282,7 +279,7 @@ def read(self, entry: int) -> tuple[LGDO, int]:
         if self.friend is not None:
             self.friend.read(entry)
 
-        return (self.lh5_buffer, self.n_rows)
+        return self.lh5_buffer
 
     def reset_field_mask(self, mask):
         """Replaces the field mask of this iterator and any friends with mask"""
@@ -307,8 +304,8 @@ def __iter__(self) -> typing.Iterator:
     def __next__(self) -> tuple[LGDO, int, int]:
         """Read next buffer_len entries and return lh5_table, iterator entry
         and n_rows read."""
-        buf, n_rows = self.read(self.next_entry)
-        self.next_entry = self.current_entry + n_rows
-        if n_rows == 0:
+        buf = self.read(self.next_entry)
+        if len(buf) == 0:
             raise StopIteration
-        return (buf, self.current_entry, n_rows)
+        self.next_entry = self.current_entry + len(buf)
+        return (buf, self.current_entry)
diff --git a/src/lgdo/lh5/store.py b/src/lgdo/lh5/store.py
index eab09ed6..a1149e6b 100644
--- a/src/lgdo/lh5/store.py
+++ b/src/lgdo/lh5/store.py
@@ -19,6 +19,7 @@
 
 from .. import types
 from . import _serializers, utils
+from .core import read
 
 log = logging.getLogger(__name__)
 
@@ -150,7 +151,7 @@ def get_buffer(
         """Returns an LH5 object appropriate for use as a pre-allocated buffer
         in a read loop. Sets size to `size` if object has a size.
         """
-        obj, n_rows = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
+        obj = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
         if hasattr(obj, "resize") and size is not None:
             obj.resize(new_size=size)
         return obj
@@ -177,69 +178,20 @@ def read(
         """
         # grab files from store
         if isinstance(lh5_file, (str, h5py.File)):
-            lh5_obj = self.gimme_file(lh5_file, "r", **file_kwargs)[name]
+            h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
         else:
-            lh5_files = list(lh5_file)
-            n_rows_read = 0
-
-            for i, h5f in enumerate(lh5_files):
-                if (
-                    isinstance(idx, (list, tuple))
-                    and len(idx) > 0
-                    and not np.isscalar(idx[0])
-                ):
-                    # a list of lists: must be one per file
-                    idx_i = idx[i]
-                elif idx is not None:
-                    # make idx a proper tuple if it's not one already
-                    if not (isinstance(idx, tuple) and len(idx) == 1):
-                        idx = (idx,)
-                    # idx is a long continuous array
-                    n_rows_i = utils.read_n_rows(name, h5f)
-                    # find the length of the subset of idx that contains indices
-                    # that are less than n_rows_i
-                    n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
-                    # now split idx into idx_i and the remainder
-                    idx_i = np.array(idx[0])[:n_rows_to_read_i]
-                    idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
-                else:
-                    idx_i = None
-                n_rows_i = n_rows - n_rows_read
-
-                obj_buf, n_rows_read_i = self.read(
-                    name,
-                    h5f,
-                    start_row,
-                    n_rows_i,
-                    idx_i,
-                    use_h5idx,
-                    field_mask,
-                    obj_buf,
-                    obj_buf_start,
-                    decompress,
-                )
-
-                n_rows_read += n_rows_read_i
-                if n_rows_read >= n_rows or obj_buf is None:
-                    return obj_buf, n_rows_read
-                start_row = 0
-                obj_buf_start += n_rows_read_i
-            return obj_buf, n_rows_read
-
-        if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
-            idx = idx[0]
-        return _serializers._h5_read_lgdo(
-            lh5_obj.id,
-            lh5_obj.file.filename,
-            lh5_obj.name,
-            start_row=start_row,
-            n_rows=n_rows,
-            idx=idx,
-            use_h5idx=use_h5idx,
-            field_mask=field_mask,
-            obj_buf=obj_buf,
-            obj_buf_start=obj_buf_start,
-            decompress=decompress,
+            h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
+        return read(
+            name,
+            h5f,
+            start_row,
+            n_rows,
+            idx,
+            use_h5idx,
+            field_mask,
+            obj_buf,
+            obj_buf_start,
+            decompress,
         )
 
     def write(
diff --git a/src/lgdo/types/histogram.py b/src/lgdo/types/histogram.py
index 7efde7d6..b2311306 100644
--- a/src/lgdo/types/histogram.py
+++ b/src/lgdo/types/histogram.py
@@ -318,12 +318,12 @@ def binning(self) -> tuple[Histogram.Axis, ...]:
     def __setitem__(self, name: str, obj: LGDO) -> None:
         # do not allow for new attributes on this
         msg = "histogram fields cannot be mutated"
-        raise TypeError(msg)
+        raise AttributeError(msg)
 
     def __getattr__(self, name: str) -> None:
         # do not allow for new attributes on this
         msg = "histogram fields cannot be mutated"
-        raise TypeError(msg)
+        raise AttributeError(msg)
 
     def add_field(self, name: str | int, obj: LGDO) -> None:  # noqa: ARG002
         """
diff --git a/tests/compression/conftest.py b/tests/compression/conftest.py
index a2451579..cb96d622 100644
--- a/tests/compression/conftest.py
+++ b/tests/compression/conftest.py
@@ -8,7 +8,7 @@
 @pytest.fixture()
 def wftable(lgnd_test_data):
     store = lh5.LH5Store()
-    wft, _ = store.read(
+    wft = store.read(
         "/geds/raw/waveform",
         lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"),
     )
diff --git a/tests/compression/test_radware_sigcompress.py b/tests/compression/test_radware_sigcompress.py
index 8387bea6..f54455dc 100644
--- a/tests/compression/test_radware_sigcompress.py
+++ b/tests/compression/test_radware_sigcompress.py
@@ -182,7 +182,7 @@ def test_aoesa(wftable):
 
 def test_performance(lgnd_test_data):
     store = lh5.LH5Store()
-    obj, _ = store.read(
+    obj = store.read(
         "/geds/raw/waveform",
         lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"),
     )
diff --git a/tests/lh5/conftest.py b/tests/lh5/conftest.py
index 772c0a8a..fa3bf1c4 100644
--- a/tests/lh5/conftest.py
+++ b/tests/lh5/conftest.py
@@ -120,7 +120,7 @@ def lh5_file(tmptestdir):
 @pytest.fixture(scope="module")
 def enc_lgnd_file(lgnd_file, tmptestdir):
     store = lh5.LH5Store()
-    wft, n_rows = store.read("/geds/raw/waveform", lgnd_file)
+    wft = store.read("/geds/raw/waveform", lgnd_file)
     wft.values.attrs["compression"] = compression.RadwareSigcompress(codec_shift=-32768)
     store.write(
         wft,
diff --git a/tests/lh5/test_core.py b/tests/lh5/test_core.py
index 2db57634..7481feb7 100644
--- a/tests/lh5/test_core.py
+++ b/tests/lh5/test_core.py
@@ -30,7 +30,7 @@ def test_write(tmptestdir):
 
 def test_read_as(lh5_file):
     store = lh5.LH5Store()
-    obj1, _ = store.read("/data/struct/table", lh5_file, start_row=1)
+    obj1 = store.read("/data/struct/table", lh5_file, start_row=1)
     obj1 = obj1.view_as("pd", with_units=True)
 
     obj2 = lh5.read_as(
diff --git a/tests/lh5/test_lh5_iterator.py b/tests/lh5/test_lh5_iterator.py
index 0f934b3a..59b32cf3 100644
--- a/tests/lh5/test_lh5_iterator.py
+++ b/tests/lh5/test_lh5_iterator.py
@@ -23,17 +23,16 @@ def test_basics(lgnd_file):
         buffer_len=5,
     )
 
-    lh5_obj, n_rows = lh5_it.read(4)
-    assert n_rows == 5
+    lh5_obj = lh5_it.read(4)
+    assert len(lh5_obj) == 5
     assert isinstance(lh5_obj, lgdo.Table)
     assert list(lh5_obj.keys()) == ["baseline"]
     assert (
         lh5_obj["baseline"].nda == np.array([14353, 14254, 14525, 11656, 13576])
     ).all()
 
-    for lh5_obj, entry, n_rows in lh5_it:
+    for lh5_obj, entry in lh5_it:
         assert len(lh5_obj) == 5
-        assert n_rows == 5
         assert entry % 5 == 0
 
 
@@ -73,7 +72,7 @@ def test_lgnd_waveform_table_fancy_idx(lgnd_file):
         buffer_len=5,
     )
 
-    lh5_obj, n_rows = lh5_it.read(0)
+    lh5_obj = lh5_it.read(0)
     assert isinstance(lh5_obj, lgdo.WaveformTable)
     assert len(lh5_obj) == 5
 
@@ -115,9 +114,9 @@ def test_friend(more_lgnd_files):
         friend=lh5_raw_it,
     )
 
-    lh5_obj, n_rows = lh5_it.read(0)
+    lh5_obj = lh5_it.read(0)
 
-    assert n_rows == 5
+    assert len(lh5_obj) == 5
     assert isinstance(lh5_obj, lgdo.Table)
     assert set(lh5_obj.keys()) == {"waveform", "baseline", "is_valid_0vbb"}
 
@@ -133,7 +132,7 @@ def test_iterate(more_lgnd_files):
         buffer_len=5,
     )
 
-    for lh5_out, entry, n_rows in lh5_it:
+    for lh5_out, entry in lh5_it:
         assert set(lh5_out.keys()) == {"is_valid_0vbb", "timestamp", "zacEmax_ctc_cal"}
         assert entry % 5 == 0
-        assert n_rows == 5
+        assert len(lh5_out) == 5
diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py
index 2e33ec9d..63931d44 100644
--- a/tests/lh5/test_lh5_store.py
+++ b/tests/lh5/test_lh5_store.py
@@ -63,10 +63,9 @@ def test_get_buffer(lh5_file):
 
 def test_read_scalar(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/scalar", lh5_file)
+    lh5_obj = store.read("/data/struct/scalar", lh5_file)
     assert isinstance(lh5_obj, lgdo.Scalar)
     assert lh5_obj.value == 10
-    assert n_rows == 1
     assert lh5_obj.attrs["sth"] == 1
     with h5py.File(lh5_file) as h5f:
         assert h5f["/data/struct/scalar"].compression is None
@@ -74,63 +73,63 @@ def test_read_scalar(lh5_file):
 
 def test_read_array(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/array", lh5_file)
+    lh5_obj = store.read("/data/struct/array", lh5_file)
     assert isinstance(lh5_obj, types.Array)
     assert (lh5_obj.nda == np.array([2, 3, 4])).all()
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
     with h5py.File(lh5_file) as h5f:
         assert (
             h5f["/data/struct/array"].compression
             is DEFAULT_HDF5_SETTINGS["compression"]
         )
 
-    lh5_obj, n_rows = store.read("/data/struct_full/array2d", lh5_file)
+    lh5_obj = store.read("/data/struct_full/array2d", lh5_file)
     assert isinstance(lh5_obj, types.Array)
     assert lh5_obj == types.Array(shape=(23, 56), fill_val=69, dtype=int)
 
 
 def test_read_array_slice(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/data/struct_full/array", lh5_file, start_row=1, n_rows=3
     )
     assert isinstance(lh5_obj, types.Array)
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
     assert lh5_obj == lgdo.Array([2, 3, 4])
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/data/struct_full/array", [lh5_file, lh5_file], start_row=1, n_rows=6
     )
     assert isinstance(lh5_obj, types.Array)
-    assert n_rows == 6
+    assert len(lh5_obj) == 6
     assert lh5_obj == lgdo.Array([2, 3, 4, 5, 1, 2])
 
 
 def test_read_array_fancy_idx(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct_full/array", lh5_file, idx=[0, 3, 4])
+    lh5_obj = store.read("/data/struct_full/array", lh5_file, idx=[0, 3, 4])
     assert isinstance(lh5_obj, types.Array)
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
     assert lh5_obj == lgdo.Array([1, 4, 5])
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/data/struct_full/array", [lh5_file, lh5_file], idx=[[0, 3, 4], [0, 3, 4]]
     )
     assert isinstance(lh5_obj, types.Array)
-    assert n_rows == 6
+    assert len(lh5_obj) == 6
     assert lh5_obj == lgdo.Array([1, 4, 5, 1, 4, 5])
 
 
 def test_read_vov(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/vov", lh5_file)
+    lh5_obj = store.read("/data/struct/vov", lh5_file)
     assert isinstance(lh5_obj, types.VectorOfVectors)
 
     assert lh5_obj == lgdo.VectorOfVectors(
         [[3, 4, 5], [2], [4, 8, 9, 7]], attrs={"myattr": 2}
     )
 
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
     assert lh5_obj.attrs["myattr"] == 2
 
     with h5py.File(lh5_file) as h5f:
@@ -143,7 +142,7 @@ def test_read_vov(lh5_file):
             is DEFAULT_HDF5_SETTINGS["compression"]
         )
 
-    lh5_obj, n_rows = store.read("/data/struct/vov3d", lh5_file)
+    lh5_obj = store.read("/data/struct/vov3d", lh5_file)
     assert isinstance(lh5_obj, types.VectorOfVectors)
 
     assert ak.all(
@@ -154,26 +153,26 @@ def test_read_vov(lh5_file):
 def test_read_vov_fancy_idx(lh5_file):
     store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read("/data/struct_full/vov", lh5_file, idx=[0], n_rows=1)
+    lh5_obj = store.read("/data/struct_full/vov", lh5_file, idx=[0], n_rows=1)
     assert isinstance(lh5_obj, types.VectorOfVectors)
 
-    lh5_obj, n_rows = store.read("/data/struct_full/vov", lh5_file, idx=[0, 2])
+    lh5_obj = store.read("/data/struct_full/vov", lh5_file, idx=[0, 2])
     assert isinstance(lh5_obj, types.VectorOfVectors)
 
     assert lh5_obj == types.VectorOfVectors([[1, 2], [2]], attrs={"myattr": 2})
-    assert n_rows == 2
+    assert len(lh5_obj) == 2
 
-    lh5_obj, n_rows = store.read("/data/struct_full/vov3d", lh5_file, idx=[0, 2])
+    lh5_obj = store.read("/data/struct_full/vov3d", lh5_file, idx=[0, 2])
     assert isinstance(lh5_obj, types.VectorOfVectors)
 
     print(lh5_obj)
     assert lh5_obj == types.VectorOfVectors([[[1, 2], [3, 4, 5]], [[5, 3, 1]]])
-    assert n_rows == 2
+    assert len(lh5_obj) == 2
 
 
 def test_read_voev(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/voev", lh5_file, decompress=False)
+    lh5_obj = store.read("/data/struct/voev", lh5_file, decompress=False)
     assert isinstance(lh5_obj, types.VectorOfEncodedVectors)
 
     desired = [np.array([3, 4, 5]), np.array([2]), np.array([4, 8, 9, 7])]
@@ -181,13 +180,13 @@ def test_read_voev(lh5_file):
     for i in range(len(desired)):
         assert (desired[i] == lh5_obj[i][0]).all()
 
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/data/struct/voev", [lh5_file, lh5_file], decompress=False
     )
     assert isinstance(lh5_obj, types.VectorOfEncodedVectors)
-    assert n_rows == 6
+    assert len(lh5_obj) == 6
 
     with h5py.File(lh5_file) as h5f:
         assert h5f["/data/struct/voev/encoded_data/flattened_data"].compression is None
@@ -203,7 +202,7 @@ def test_read_voev(lh5_file):
 
 def test_read_voev_fancy_idx(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/data/struct_full/voev", lh5_file, idx=[0, 2], decompress=False
     )
     assert isinstance(lh5_obj, types.VectorOfEncodedVectors)
@@ -213,38 +212,38 @@ def test_read_voev_fancy_idx(lh5_file):
     for i in range(len(desired)):
         assert (desired[i] == lh5_obj[i][0]).all()
 
-    assert n_rows == 2
+    assert len(lh5_obj) == 2
 
 
 def test_read_aoesa(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/aoesa", lh5_file)
+    lh5_obj = store.read("/data/struct/aoesa", lh5_file)
     assert isinstance(lh5_obj, types.ArrayOfEqualSizedArrays)
     assert (lh5_obj.nda == np.full((3, 5), fill_value=42)).all()
 
 
 def test_read_table(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/table", lh5_file)
+    lh5_obj = store.read("/data/struct/table", lh5_file)
     assert isinstance(lh5_obj, types.Table)
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
 
-    lh5_obj, n_rows = store.read("/data/struct/table", [lh5_file, lh5_file])
-    assert n_rows == 6
+    lh5_obj = store.read("/data/struct/table", [lh5_file, lh5_file])
+    assert len(lh5_obj) == 6
     assert lh5_obj.attrs["stuff"] == 5
     assert lh5_obj["a"].attrs["attr"] == 9
 
 
 def test_read_empty_struct(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/empty_struct", lh5_file)
+    lh5_obj = store.read("/data/struct/empty_struct", lh5_file)
     assert isinstance(lh5_obj, types.Struct)
     assert list(lh5_obj.keys()) == []
 
 
 def test_read_hdf5_compressed_data(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/table", lh5_file)
+    lh5_obj = store.read("/data/struct/table", lh5_file)
 
     assert "compression" not in lh5_obj["b"].attrs
     with h5py.File(lh5_file) as h5f:
@@ -260,12 +259,12 @@ def test_read_hdf5_compressed_data(lh5_file):
 
 def test_read_wftable(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/wftable", lh5_file)
+    lh5_obj = store.read("/data/struct/wftable", lh5_file)
     assert isinstance(lh5_obj, types.WaveformTable)
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
 
-    lh5_obj, n_rows = store.read("/data/struct/wftable", [lh5_file, lh5_file])
-    assert n_rows == 6
+    lh5_obj = store.read("/data/struct/wftable", [lh5_file, lh5_file])
+    assert len(lh5_obj) == 6
     assert lh5_obj.values.attrs["custom"] == 8
 
     with h5py.File(lh5_file) as h5f:
@@ -285,35 +284,35 @@ def test_read_wftable(lh5_file):
 
 def test_read_wftable_encoded(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/data/struct/wftable_enc", lh5_file, decompress=False)
+    lh5_obj = store.read("/data/struct/wftable_enc", lh5_file, decompress=False)
     assert isinstance(lh5_obj, types.WaveformTable)
     assert isinstance(lh5_obj.values, types.ArrayOfEncodedEqualSizedArrays)
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
     assert lh5_obj.values.attrs["codec"] == "radware_sigcompress"
     assert "codec_shift" in lh5_obj.values.attrs
 
-    lh5_obj, n_rows = store.read("/data/struct/wftable_enc/values", lh5_file)
+    lh5_obj = store.read("/data/struct/wftable_enc/values", lh5_file)
     assert isinstance(lh5_obj, lgdo.ArrayOfEqualSizedArrays)
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
 
-    lh5_obj, n_rows = store.read("/data/struct/wftable_enc", lh5_file)
+    lh5_obj = store.read("/data/struct/wftable_enc", lh5_file)
     assert isinstance(lh5_obj, lgdo.WaveformTable)
     assert isinstance(lh5_obj.values, lgdo.ArrayOfEqualSizedArrays)
-    assert n_rows == 3
+    assert len(lh5_obj) == 3
 
-    lh5_obj_chain, n_rows = store.read(
+    lh5_obj_chain = store.read(
         "/data/struct/wftable_enc", [lh5_file, lh5_file], decompress=False
     )
-    assert n_rows == 6
+    assert len(lh5_obj) == 6
     assert isinstance(lh5_obj_chain.values, lgdo.ArrayOfEncodedEqualSizedArrays)
 
-    lh5_obj_chain, n_rows = store.read(
+    lh5_obj_chain = store.read(
         "/data/struct/wftable_enc", [lh5_file, lh5_file], decompress=True
     )
     assert isinstance(lh5_obj_chain.values, lgdo.ArrayOfEqualSizedArrays)
     assert np.array_equal(lh5_obj_chain.values[:3], lh5_obj.values)
     assert np.array_equal(lh5_obj_chain.values[3:], lh5_obj.values)
-    assert n_rows == 6
+    assert len(lh5_obj) == 6
 
     with h5py.File(lh5_file) as h5f:
         assert (
@@ -336,20 +335,20 @@ def test_read_wftable_encoded(lh5_file):
 def test_read_with_field_mask(lh5_file):
     store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read("/data/struct_full", lh5_file, field_mask=["array"])
+    lh5_obj = store.read("/data/struct_full", lh5_file, field_mask=["array"])
     assert list(lh5_obj.keys()) == ["array"]
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/data/struct_full", lh5_file, field_mask=("array", "table")
     )
     assert sorted(lh5_obj.keys()) == ["array", "table"]
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/data/struct_full", lh5_file, field_mask={"array": True}
     )
     assert list(lh5_obj.keys()) == ["array"]
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/data/struct_full", lh5_file, field_mask={"vov": False, "voev": False}
     )
     assert sorted(lh5_obj.keys()) == [
@@ -368,23 +367,21 @@ def test_read_with_field_mask(lh5_file):
 def test_read_lgnd_array(lgnd_file):
     store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read("/geds/raw/baseline", lgnd_file)
+    lh5_obj = store.read("/geds/raw/baseline", lgnd_file)
     assert isinstance(lh5_obj, types.Array)
-    assert n_rows == 100
     assert len(lh5_obj) == 100
 
-    lh5_obj, n_rows = store.read("/geds/raw/waveform/values", lgnd_file)
+    lh5_obj = store.read("/geds/raw/waveform/values", lgnd_file)
     assert isinstance(lh5_obj, types.ArrayOfEqualSizedArrays)
 
 
 def test_read_lgnd_array_fancy_idx(lgnd_file):
     store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/geds/raw/baseline", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68]
     )
     assert isinstance(lh5_obj, types.Array)
-    assert n_rows == 7
     assert len(lh5_obj) == 7
     assert (lh5_obj.nda == [13508, 14353, 14525, 14341, 15079, 11675, 13995]).all()
 
@@ -392,20 +389,18 @@ def test_read_lgnd_array_fancy_idx(lgnd_file):
 def test_read_lgnd_vov(lgnd_file):
     store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read("/geds/raw/tracelist", lgnd_file)
+    lh5_obj = store.read("/geds/raw/tracelist", lgnd_file)
     assert isinstance(lh5_obj, types.VectorOfVectors)
-    assert n_rows == 100
     assert len(lh5_obj) == 100
 
 
 def test_read_lgnd_vov_fancy_idx(lgnd_file):
     store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/geds/raw/tracelist", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68]
     )
     assert isinstance(lh5_obj, types.VectorOfVectors)
-    assert n_rows == 7
     assert len(lh5_obj) == 7
     assert (lh5_obj.cumulative_length.nda == [1, 2, 3, 4, 5, 6, 7]).all()
     assert (lh5_obj.flattened_data.nda == [40, 60, 64, 60, 64, 28, 60]).all()
@@ -413,19 +408,18 @@ def test_read_lgnd_vov_fancy_idx(lgnd_file):
 
 def test_read_array_concatenation(lgnd_file):
     store = lh5.LH5Store()
-    lh5_obj, n_rows = store.read("/geds/raw/baseline", [lgnd_file, lgnd_file])
+    lh5_obj = store.read("/geds/raw/baseline", [lgnd_file, lgnd_file])
     assert isinstance(lh5_obj, types.Array)
-    assert n_rows == 200
     assert len(lh5_obj) == 200
 
 
 def test_read_lgnd_waveform_table(lgnd_file):
     store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read("/geds/raw/waveform", lgnd_file)
+    lh5_obj = store.read("/geds/raw/waveform", lgnd_file)
     assert isinstance(lh5_obj, types.WaveformTable)
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/geds/raw/waveform",
         lgnd_file,
         start_row=10,
@@ -441,7 +435,7 @@ def test_read_lgnd_waveform_table(lgnd_file):
 def test_read_lgnd_waveform_table_fancy_idx(lgnd_file):
     store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read(
+    lh5_obj = store.read(
         "/geds/raw/waveform",
         lgnd_file,
         idx=[7, 9, 25, 27, 33, 38, 46, 52, 57, 59, 67, 71, 72, 82, 90, 92, 93, 94, 97],
@@ -452,6 +446,6 @@ def test_read_lgnd_waveform_table_fancy_idx(lgnd_file):
 
 def test_read_compressed_lgnd_waveform_table(lgnd_file, enc_lgnd_file):
     store = lh5.LH5Store()
-    wft, _ = store.read("/geds/raw/waveform", enc_lgnd_file)
+    wft = store.read("/geds/raw/waveform", enc_lgnd_file)
     assert isinstance(wft.values, types.ArrayOfEqualSizedArrays)
     assert "compression" not in wft.values.attrs
diff --git a/tests/lh5/test_lh5_write.py b/tests/lh5/test_lh5_write.py
index fd9604d4..97065552 100644
--- a/tests/lh5/test_lh5_write.py
+++ b/tests/lh5/test_lh5_write.py
@@ -17,7 +17,7 @@ def test_write_compressed_lgnd_waveform_table(enc_lgnd_file):  # noqa: ARG001
 
 def test_write_with_hdf5_compression(lgnd_file, tmptestdir):
     store = lh5.LH5Store()
-    wft, n_rows = store.read("/geds/raw/waveform", lgnd_file)
+    wft = store.read("/geds/raw/waveform", lgnd_file)
     store.write(
         wft,
         "/geds/raw/waveform",
@@ -55,7 +55,7 @@ def test_write_empty_vov(tmptestdir):
         group="/data",
     )
 
-    obj, _ = store.read("/data/vov", f"{tmptestdir}/tmp-pygama-lgdo-empty-vov.lh5")
+    obj = store.read("/data/vov", f"{tmptestdir}/tmp-pygama-lgdo-empty-vov.lh5")
     assert obj == vov
 
 
@@ -123,7 +123,7 @@ def test_write_object_overwrite_table_no_deletion(caplog, tmptestdir):
     ]
 
     # Now, check that the data were overwritten
-    tb_dat, _ = store.read("my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
+    tb_dat = store.read("my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
     assert np.array_equal(tb_dat["dset1"].nda, np.ones(10))
 
 
@@ -149,7 +149,7 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir):
     )  # Now, try to overwrite with a different field
 
     # Now, check that the data were overwritten
-    tb_dat, _ = store.read("my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
+    tb_dat = store.read("my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
     assert np.array_equal(tb_dat["dset2"].nda, np.ones(10))
 
     # Also make sure that the first table's fields aren't lurking around the lh5 file!
@@ -180,7 +180,7 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir):
     )  # Now, try to overwrite with a different field
 
     # Now, check that the data were overwritten
-    tb_dat, _ = store.read(
+    tb_dat = store.read(
         "my_group/my_table", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
     assert np.array_equal(tb_dat["dset2"].nda, np.ones(10))
@@ -236,7 +236,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     ]
 
     # Now, check that the data were overwritten
-    tb_dat, _ = store.read(
+    tb_dat = store.read(
         "my_group/my_table", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
     assert np.array_equal(tb_dat["values"].nda, np.ones((10, 10)))
@@ -255,7 +255,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    array_dat, _ = store.read(
+    array_dat = store.read(
         "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
     expected_out_array = np.append(np.zeros(5), np.ones(20))
@@ -275,7 +275,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    scalar_dat, _ = store.read(
+    scalar_dat = store.read(
         "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
 
@@ -294,7 +294,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
         write_start=1,
     )  # start overwriting the second list of lists
 
-    vector_dat, _ = store.read(
+    vector_dat = store.read(
         "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
 
@@ -385,7 +385,7 @@ def test_write_object_append_column(tmptestdir):
     )
 
     # Now, check that the data were appended
-    tb_dat, _ = store.read(
+    tb_dat = store.read(
         "my_group/my_table", f"{tmptestdir}/write_object_append_column_test.lh5"
     )
     assert isinstance(tb_dat, types.Table)
@@ -440,7 +440,7 @@ def test_write_histogram(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    h3, _ = store.read(
+    h3 = store.read(
         "my_group/my_histogram", f"{tmptestdir}/write_histogram_test.lh5"
     )
     assert np.array_equal(h3.weights.nda, np.array([[10, 10], [10, 10]]))
@@ -508,7 +508,7 @@ def test_write_histogram_variable(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    h3, _ = store.read(
+    h3 = store.read(
         "my_group/my_histogram", f"{tmptestdir}/write_histogram_test.lh5"
     )
     assert np.array_equal(h3.weights.nda, np.array([[10, 10], [10, 10]]))
diff --git a/tests/test_cli.py b/tests/test_cli.py
index ac054854..2b98b987 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -74,10 +74,10 @@ def test_lh5concat(lgnd_test_data, tmptestdir):
     ]
 
     store = lh5.LH5Store()
-    tbl1, size = store.read("ch1057600/raw", infile1)
-    tbl2, size = store.read("ch1057600/raw", infile2)
-    tbl, size = store.read("ch1057600/raw", outfile)
-    assert size == 20
+    tbl1 = store.read("ch1057600/raw", infile1)
+    tbl2 = store.read("ch1057600/raw", infile2)
+    tbl = store.read("ch1057600/raw", outfile)
+    assert len(tbl) == 20
 
     for i in range(10):
         assert tbl.packet_id[i] == tbl1.packet_id[i]
@@ -111,7 +111,7 @@ def test_lh5concat(lgnd_test_data, tmptestdir):
         "ch1057600/raw/waveform/values",
     ]
 
-    tbl, _ = store.read("ch1057600/raw", outfile)
+    tbl = store.read("ch1057600/raw", outfile)
     assert isinstance(tbl, types.Table)
 
     arg_list[4] = "--exclude"
@@ -136,10 +136,10 @@ def test_lh5concat(lgnd_test_data, tmptestdir):
         "ch1057600/raw/waveform/t0",
     ]
 
-    tbl1, size = store.read("ch1059201/raw", infile1)
-    tbl2, size = store.read("ch1059201/raw", infile2)
-    tbl, size = store.read("ch1059201/raw", outfile)
-    assert size == 20
+    tbl1 = store.read("ch1059201/raw", infile1)
+    tbl2 = store.read("ch1059201/raw", infile2)
+    tbl = store.read("ch1059201/raw", outfile)
+    assert len(tbl) == 20
 
     for i in range(10):
         assert tbl.packet_id[i] == tbl1.packet_id[i]
diff --git a/tests/types/test_histogram.py b/tests/types/test_histogram.py
index c77878af..d922eb75 100644
--- a/tests/types/test_histogram.py
+++ b/tests/types/test_histogram.py
@@ -265,9 +265,9 @@ def test_view_as_np():
 def test_not_like_table():
     h = Histogram(np.array([1, 1]), (np.array([0, 1, 2]),))
     assert h.form_datatype() == "struct{binning,weights,isdensity}"
-    with pytest.raises(TypeError):
+    with pytest.raises(AttributeError):
         x = h.x  # noqa: F841
-    with pytest.raises(TypeError):
+    with pytest.raises(AttributeError):
         h["x"] = Scalar(1.0)
     with pytest.raises(TypeError):
         h.add_field("x", Scalar(1.0))
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
index 0948c7bc..8357a5c5 100644
--- a/tests/types/test_vectorofvectors.py
+++ b/tests/types/test_vectorofvectors.py
@@ -439,5 +439,5 @@ def test_lh5_iterator_view_as(lgnd_test_data):
         "ch1067205/dsp/energies",
     )
 
-    for obj, _, _ in it:
+    for obj, _ in it:
         assert ak.is_valid(obj.view_as("ak"))

From 23a03a6272f7be6a001037ef90b49af89aa7aab5 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 11:27:34 -0700
Subject: [PATCH 06/27] Changed table to handle capacity and resizing similar
 to array

---
 src/lgdo/lh5/core.py        | 11 +++-------
 src/lgdo/types/table.py     | 44 ++++++++++++++++++++++++++++---------
 tests/lh5/test_lh5_store.py |  6 +++++
 tests/types/test_table.py   | 42 +++++++++++++++++++++--------------
 4 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/src/lgdo/lh5/core.py b/src/lgdo/lh5/core.py
index fc97338d..3a2c2cb1 100644
--- a/src/lgdo/lh5/core.py
+++ b/src/lgdo/lh5/core.py
@@ -92,8 +92,7 @@ def read(
         will be set to ``True``, while the rest will default to ``False``.
     obj_buf
         Read directly into memory provided in `obj_buf`. Note: the buffer
-        will be expanded to accommodate the data requested. To maintain the
-        buffer length, send in ``n_rows = len(obj_buf)``.
+        will be resized to accommodate the data retrieved.
     obj_buf_start
         Start location in ``obj_buf`` for read. For concatenating data to
         array-like objects.
@@ -106,12 +105,8 @@ def read(
 
     Returns
     -------
-    (object, n_rows_read)
-        `object` is the read-out object `n_rows_read` is the number of rows
-        successfully read out. Essential for arrays when the amount of data
-        is smaller than the object buffer.  For scalars and structs
-        `n_rows_read` will be``1``. For tables it is redundant with
-        ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
+    object
+        `the read-out object
     """
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]
diff --git a/src/lgdo/types/table.py b/src/lgdo/types/table.py
index 81c43bf3..b1bef50a 100644
--- a/src/lgdo/types/table.py
+++ b/src/lgdo/types/table.py
@@ -100,7 +100,30 @@ def __len__(self) -> int:
         """Provides ``__len__`` for this array-like class."""
         return self.size
 
-    def resize(self, new_size: int | None = None, do_warn: bool = False) -> None:
+    def set_capacity(self, capacity: int | ArrayLike) -> None:
+        "Set size (number of rows) of internal memory buffer"
+        if isinstance(capacity, int):
+            for obj in self.values():
+                obj.set_capacity(capacity)
+        else:
+            if len(capacity) != len(self.keys()):
+                msg = "List of capacities must have same length as number of keys"
+                raise ValueError(msg)
+            
+            for obj, cap in zip(self.values(), capacity):
+                obj.set_capacity(cap)
+
+    def get_capacity(self) -> int:
+        "Get list of capacities for each key"
+        return [ v.get_capacity() for v in self.values() ]
+
+    def trim_capacity(self) -> int:
+        "Set capacity to be minimum needed to support Array size"
+        for v in self.values():
+            v.trim_capacity()
+        
+
+    def resize(self, new_size: int | None = None, do_warn: bool = False, trim: bool = False) -> None:
         # if new_size = None, use the size from the first field
         for field, obj in self.items():
             if new_size is None:
@@ -112,19 +135,20 @@ def resize(self, new_size: int | None = None, do_warn: bool = False) -> None:
                         f"with size {len(obj)} != {new_size}"
                     )
                 if isinstance(obj, Table):
-                    obj.resize(new_size)
+                    obj.resize(new_size, trim)
                 else:
-                    obj.resize(new_size)
+                    obj.resize(new_size, trim)
         self.size = new_size
 
-    def push_row(self) -> None:
-        self.loc += 1
-
-    def is_full(self) -> bool:
-        return self.loc >= self.size
+    def append(self, vals: Dict) -> None:
+        "Append vals to end of table. Vals is a mapping from table key to val"
+        self.insert(len(self), vals)
 
-    def clear(self) -> None:
-        self.loc = 0
+    def insert(self, i: int, vals: Dict) -> None:
+        "Insert vals into table at row i. Vals is a mapping from table key to val"
+        for k, ar in self.items():
+            ar.insert(i, vals[k])
+        self.size += 1
 
     def add_field(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
         """Add a field (column) to the table.
diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py
index 63931d44..99501832 100644
--- a/tests/lh5/test_lh5_store.py
+++ b/tests/lh5/test_lh5_store.py
@@ -132,6 +132,12 @@ def test_read_vov(lh5_file):
     assert len(lh5_obj) == 3
     assert lh5_obj.attrs["myattr"] == 2
 
+    lh5_obj = store.read("/data/struct/vov", [lh5_file, lh5_file])
+    assert len(lh5_obj) == 6
+    assert lh5_obj == lgdo.VectorOfVectors(
+        [[3, 4, 5], [2], [4, 8, 9, 7], [3, 4, 5], [2], [4, 8, 9, 7]], attrs={"myattr": 2}
+    )
+
     with h5py.File(lh5_file) as h5f:
         assert (
             h5f["/data/struct/vov/cumulative_length"].compression
diff --git a/tests/types/test_table.py b/tests/types/test_table.py
index efbb6234..f1cadbc1 100644
--- a/tests/types/test_table.py
+++ b/tests/types/test_table.py
@@ -78,25 +78,33 @@ def test_datatype_name():
     assert tbl.datatype_name() == "table"
 
 
-def test_push_row():
-    tbl = Table()
-    tbl.push_row()
-    assert tbl.loc == 1
-
-
-def test_is_full():
-    tbl = Table(size=2)
-    tbl.push_row()
-    assert tbl.is_full() is False
-    tbl.push_row()
-    assert tbl.is_full() is True
+def test_append():
+    col_dict = {
+        "a": lgdo.Array(nda=np.array([1, 2, 3, 4])),
+        "b": lgdo.Array(nda=np.array([5, 6, 7, 8])),
+    }
 
+    tbl = Table(col_dict=col_dict)
+    tbl.append({"a": -1, "b": -1})
+    assert len(tbl) == 5
+    assert tbl == Table( {
+        "a": lgdo.Array(nda=np.array([1, 2, 3, 4, -1])),
+        "b": lgdo.Array(nda=np.array([5, 6, 7, 8, -1])),
+    } )
+
+def test_insert():
+    col_dict = {
+        "a": lgdo.Array(nda=np.array([1, 2, 3, 4])),
+        "b": lgdo.Array(nda=np.array([5, 6, 7, 8])),
+    }
 
-def test_clear():
-    tbl = Table()
-    tbl.push_row()
-    tbl.clear()
-    assert tbl.loc == 0
+    tbl = Table(col_dict=col_dict)
+    tbl.insert(1, {"a": -1, "b": -1})
+    assert len(tbl) == 5
+    assert tbl == Table( {
+        "a": lgdo.Array(nda=np.array([1, -1, 2, 3, 4])),
+        "b": lgdo.Array(nda=np.array([5, -1, 6, 7, 8])),
+    } )
 
 
 def test_add_field():

From 32ceef9ef6592191d2877c38da426ce26bcc0b44 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 13:34:59 -0700
Subject: [PATCH 07/27] Fixed test

---
 tests/lh5/test_lh5_store.py | 4 ++--
 tests/types/test_table.py   | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py
index 99501832..30077dac 100644
--- a/tests/lh5/test_lh5_store.py
+++ b/tests/lh5/test_lh5_store.py
@@ -309,7 +309,7 @@ def test_read_wftable_encoded(lh5_file):
     lh5_obj_chain = store.read(
         "/data/struct/wftable_enc", [lh5_file, lh5_file], decompress=False
     )
-    assert len(lh5_obj) == 6
+    assert len(lh5_obj_chain) == 6
     assert isinstance(lh5_obj_chain.values, lgdo.ArrayOfEncodedEqualSizedArrays)
 
     lh5_obj_chain = store.read(
@@ -318,7 +318,7 @@ def test_read_wftable_encoded(lh5_file):
     assert isinstance(lh5_obj_chain.values, lgdo.ArrayOfEqualSizedArrays)
     assert np.array_equal(lh5_obj_chain.values[:3], lh5_obj.values)
     assert np.array_equal(lh5_obj_chain.values[3:], lh5_obj.values)
-    assert len(lh5_obj) == 6
+    assert len(lh5_obj_chain) == 6
 
     with h5py.File(lh5_file) as h5f:
         assert (
diff --git a/tests/types/test_table.py b/tests/types/test_table.py
index f1cadbc1..cfdbe23d 100644
--- a/tests/types/test_table.py
+++ b/tests/types/test_table.py
@@ -14,7 +14,6 @@
 def test_init():
     tbl = Table()
     assert not tbl.size
-    assert tbl.loc == 0
 
     tbl = Table(size=10)
     assert tbl.size == 10

From 8d7c1eb4af133fc585cc5aa415938cdc237beaf5 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 13:36:10 -0700
Subject: [PATCH 08/27] Added abstract base class for LGDO collections

---
 src/lgdo/lh5/_serializers/read/composite.py |  4 +-
 src/lgdo/types/array.py                     | 19 ++++--
 src/lgdo/types/encoded.py                   | 43 +++++++------
 src/lgdo/types/lgdo.py                      | 70 +++++++++++++++++++++
 src/lgdo/types/table.py                     | 27 ++++----
 src/lgdo/types/vectorofvectors.py           | 10 +--
 6 files changed, 125 insertions(+), 48 deletions(-)

diff --git a/src/lgdo/lh5/_serializers/read/composite.py b/src/lgdo/lh5/_serializers/read/composite.py
index 2674499e..d4c4464f 100644
--- a/src/lgdo/lh5/_serializers/read/composite.py
+++ b/src/lgdo/lh5/_serializers/read/composite.py
@@ -370,15 +370,13 @@ def _h5_read_table(
             table = Table(col_dict=col_dict, attrs=attrs)
 
         # set (write) loc to end of tree
-        table.loc = n_rows_read
+        table.resize(do_warn=True)
         return table, n_rows_read
 
     # We have read all fields into the object buffer. Run
     # checks: All columns should be the same size. So update
     # table's size as necessary, warn if any mismatches are found
     obj_buf.resize(do_warn=True)
-    # set (write) loc to end of tree
-    obj_buf.loc = obj_buf_start + n_rows_read
 
     # check attributes
     utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
diff --git a/src/lgdo/types/array.py b/src/lgdo/types/array.py
index 32343074..e567977c 100644
--- a/src/lgdo/types/array.py
+++ b/src/lgdo/types/array.py
@@ -17,12 +17,12 @@
 
 from .. import utils
 from ..units import default_units_registry as u
-from .lgdo import LGDO
+from .lgdo import LGDOCollection
 
 log = logging.getLogger(__name__)
 
 
-class Array(LGDO):
+class Array(LGDOCollection):
     r"""Holds an :class:`numpy.ndarray` and attributes.
 
     :class:`Array` (and the other various array types) holds an `nda` instead
@@ -111,7 +111,7 @@ def dtype(self):
     def shape(self):
         return (len(self),) + self._nda.shape[1:]
 
-    def set_capacity(self, capacity: int) -> None:
+    def reserve_capacity(self, capacity: int) -> None:
         "Set size (number of rows) of internal memory buffer"
         if capacity < len(self):
             msg = "Cannot reduce capacity below Array length"
@@ -124,7 +124,7 @@ def get_capacity(self) -> int:
 
     def trim_capacity(self) -> None:
         "Set capacity to be minimum needed to support Array size"
-        self.set_capacity(np.prod(self.shape))
+        self.reserve_capacity(np.prod(self.shape))
 
     def resize(self, new_size: int, trim=False) -> None:
         """Set size of Array in rows. Only change capacity if it must be
@@ -132,11 +132,11 @@ def resize(self, new_size: int, trim=False) -> None:
         If trim is True, capacity will be set to match size."""
 
         if trim and new_size != self.get_capacity:
-            self.set_capacity(new_size)
+            self.reserve_capacity(new_size)
 
         # If capacity is not big enough, set to next power of 2 big enough
         if new_size > self.get_capacity():
-            self.set_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
+            self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
 
         self._size = new_size
 
@@ -163,6 +163,13 @@ def insert(self, i: int, value: int | float) -> None:
             msg = f"Could not insert value with shape {value.shape} into Array with shape {self.shape}"
             raise ValueError(msg)
 
+    def replace(self, i: int, value: int | float) -> None:
+        "Replace value at row i"
+        if i >= len(self):
+            msg = f"index {i} is out of bounds for array with size {len(self)}"
+            raise IndexError(msg)
+        self[i] = value
+
     def __getitem__(self, key):
         return self.nda[key]
 
diff --git a/src/lgdo/types/encoded.py b/src/lgdo/types/encoded.py
index 5fe38249..15d6dadb 100644
--- a/src/lgdo/types/encoded.py
+++ b/src/lgdo/types/encoded.py
@@ -11,12 +11,12 @@
 
 from .. import utils
 from .array import Array
-from .lgdo import LGDO
+from .lgdo import LGDOCollection
 from .scalar import Scalar
 from .vectorofvectors import VectorOfVectors
 
 
-class VectorOfEncodedVectors(LGDO):
+class VectorOfEncodedVectors(LGDOCollection):
     """An array of variable-length encoded arrays.
 
     Used to represent an encoded :class:`.VectorOfVectors`. In addition to an
@@ -92,6 +92,17 @@ def __eq__(self, other: VectorOfEncodedVectors) -> bool:
 
         return False
 
+    def reserve_capacity(self, *capacity: int) -> None:
+        self.encoded_data.reserve_capacity(*capacity)
+        self.decoded_size.reserve_capacity(capacity[0])
+
+    def get_capacity(self) -> Tuple:
+        return (self.decoded_size.get_capacity, *self.encoded_data.get_capacity())
+
+    def trim_capacity(self) -> None:
+        self.encoded_data.trim_capacity()
+        self.decoded_size.trim_capacity()
+    
     def resize(self, new_size: int) -> None:
         """Resize vector along the first axis.
 
@@ -102,21 +113,6 @@ def resize(self, new_size: int) -> None:
         self.encoded_data.resize(new_size)
         self.decoded_size.resize(new_size)
 
-    def append(self, value: tuple[NDArray, int]) -> None:
-        """Append a 1D encoded vector at the end.
-
-        Parameters
-        ----------
-        value
-            a tuple holding the encoded array and its decoded size.
-
-        See Also
-        --------
-        .VectorOfVectors.append
-        """
-        self.encoded_data.append(value[0])
-        self.decoded_size.append(value[1])
-
     def insert(self, i: int, value: tuple[NDArray, int]) -> None:
         """Insert an encoded vector at index `i`.
 
@@ -282,7 +278,7 @@ def view_as(
         raise ValueError(msg)
 
 
-class ArrayOfEncodedEqualSizedArrays(LGDO):
+class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
     """An array of encoded arrays with equal decoded size.
 
     Used to represent an encoded :class:`.ArrayOfEqualSizedArrays`. In addition
@@ -349,7 +345,16 @@ def __eq__(self, other: ArrayOfEncodedEqualSizedArrays) -> bool:
 
         return False
 
-    def resize(self, new_size: int) -> None:
+    def reserve_capacity(self, *capacity: int) -> None:
+        self.encoded_data.reserve_capacity(capacity)
+
+    def get_capacity(self) -> Tuple:
+        return self.encoded_data.get_capacity()
+
+    def trim_capacity(self) -> None:
+        self.encoded_data.trim_capacity()
+
+    def resize(self, new_size: int, trim: bool = False) -> None:
         """Resize array along the first axis.
 
         See Also
diff --git a/src/lgdo/types/lgdo.py b/src/lgdo/types/lgdo.py
index 4a965c04..18c7e7a4 100644
--- a/src/lgdo/types/lgdo.py
+++ b/src/lgdo/types/lgdo.py
@@ -86,3 +86,73 @@ def __str__(self) -> str:
 
     def __repr__(self) -> str:
         return self.__class__.__name__ + f"(attrs={self.attrs!r})"
+
+
+class LGDOCollection(LGDO):
+    """Abstract base class representing a LEGEND Collection Object (LGDO).
+    This defines the interface for classes used as table columns.
+    """
+
+    @abstractmethod
+    def __init__(self, attrs: dict[str, Any] | None = None) -> None:
+        super().__init__(attrs)
+
+    @abstractmethod
+    def __len__(self) -> int:
+        """Provides ``__len__`` for this array-like class."""
+
+    @abstractmethod
+    def reserve_capacity(self, capacity: int) -> None:
+        """Reserve capacity (in rows) for later use. Internal memory buffers
+        will have enough entries to store this many rows.
+        """
+
+    @abstractmethod
+    def get_capacity(self) -> int:
+        "get reserved capacity of internal memory buffers in rows"
+
+    @abstractmethod
+    def trim_capacity(self) -> None:
+        """set capacity to only what is required to store current contents
+        of LGDOCollection
+        """
+
+    @abstractmethod
+    def resize(self, new_size: int, trim: bool = False) -> None:
+        """Return this LGDO's datatype attribute string."""
+
+    def append(self, val) -> None:
+        "append val to end of LGDOCollection"
+        self.insert(len(self), val)
+
+    @abstractmethod
+    def insert(self, i: int, val) -> None:
+        "insert val into LGDOCollection at position i"
+
+    @abstractmethod
+    def replace(self, i: int, val) -> None:
+        "replace item at position i with val in LGDOCollection"
+
+    def clear(self, trim: bool = False) -> None:
+        "set size of LGDOCollection to zero"
+        self.resize(0, trim)
+
+    def getattrs(self, datatype: bool = False) -> dict:
+        """Return a copy of the LGDO attributes dictionary.
+
+        Parameters
+        ----------
+        datatype
+            if ``False``, remove ``datatype`` attribute from the output
+            dictionary.
+        """
+        d = dict(self.attrs)
+        if not datatype:
+            d.pop("datatype", None)
+        return d
+
+    def __str__(self) -> str:
+        return repr(self)
+
+    def __repr__(self) -> str:
+        return self.__class__.__name__ + f"(attrs={self.attrs!r})"
diff --git a/src/lgdo/types/table.py b/src/lgdo/types/table.py
index b1bef50a..4fe9103f 100644
--- a/src/lgdo/types/table.py
+++ b/src/lgdo/types/table.py
@@ -18,7 +18,7 @@
 
 from .array import Array
 from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
-from .lgdo import LGDO
+from .lgdo import LGDOCollection
 from .scalar import Scalar
 from .struct import Struct
 from .vectorofvectors import VectorOfVectors
@@ -26,7 +26,7 @@
 log = logging.getLogger(__name__)
 
 
-class Table(Struct):
+class Table(Struct, LGDOCollection):
     """A special struct of arrays or subtable columns of equal length.
 
     Holds onto an internal read/write location ``loc`` that is useful in
@@ -42,7 +42,7 @@ class Table(Struct):
 
     def __init__(
         self,
-        col_dict: Mapping[str, LGDO] | pd.DataFrame | ak.Array | None = None,
+        col_dict: Mapping[str, LGDOCollection] | pd.DataFrame | ak.Array | None = None,
         size: int | None = None,
         attrs: Mapping[str, Any] | None = None,
     ) -> None:
@@ -58,7 +58,7 @@ def __init__(
         col_dict
             instantiate this table using the supplied mapping of column names
             and array-like objects. Supported input types are: mapping of
-            strings to LGDOs, :class:`pd.DataFrame` and :class:`ak.Array`.
+            strings to LGDOCollections, :class:`pd.DataFrame` and :class:`ak.Array`.
             Note 1: no copy is performed, the objects are used directly (unless
             :class:`ak.Array` is provided).  Note 2: if `size` is not ``None``,
             all arrays will be resized to match it.  Note 3: if the arrays have
@@ -86,13 +86,10 @@ def __init__(
         if col_dict is not None and len(col_dict) > 0:
             self.resize(new_size=size, do_warn=(size is None))
 
-        # if no col_dict, just set the size (default to 1024)
+        # if no col_dict, just set the size
         else:
             self.size = size if size is not None else None
 
-        # always start at loc=0
-        self.loc = 0
-
     def datatype_name(self) -> str:
         return "table"
 
@@ -100,18 +97,18 @@ def __len__(self) -> int:
         """Provides ``__len__`` for this array-like class."""
         return self.size
 
-    def set_capacity(self, capacity: int | ArrayLike) -> None:
+    def reserve_capacity(self, capacity: int | ArrayLike) -> None:
         "Set size (number of rows) of internal memory buffer"
         if isinstance(capacity, int):
             for obj in self.values():
-                obj.set_capacity(capacity)
+                obj.reserve_capacity(capacity)
         else:
             if len(capacity) != len(self.keys()):
                 msg = "List of capacities must have same length as number of keys"
                 raise ValueError(msg)
             
             for obj, cap in zip(self.values(), capacity):
-                obj.set_capacity(cap)
+                obj.reserve_capacity(cap)
 
     def get_capacity(self) -> int:
         "Get list of capacities for each key"
@@ -150,7 +147,7 @@ def insert(self, i: int, vals: Dict) -> None:
             ar.insert(i, vals[k])
         self.size += 1
 
-    def add_field(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
+    def add_field(self, name: str, obj: LGDOCollection, use_obj_size: bool = False) -> None:
         """Add a field (column) to the table.
 
         Use the name "field" here to match the terminology used in
@@ -187,7 +184,7 @@ def add_field(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
             new_size = len(obj) if use_obj_size else self.size
             self.resize(new_size=new_size)
 
-    def add_column(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
+    def add_column(self, name: str, obj: LGDOCollection, use_obj_size: bool = False) -> None:
         """Alias for :meth:`.add_field` using table terminology 'column'."""
         self.add_field(name, obj, use_obj_size=use_obj_size)
 
@@ -218,8 +215,8 @@ def join(
             set to ``False`` to turn off warnings associated with mismatched
             `loc` parameter or :meth:`add_column` warnings.
         """
-        if other_table.loc != self.loc and do_warn:
-            log.warning(f"other_table.loc ({other_table.loc}) != self.loc({self.loc})")
+        if len(other_table) != len(self) and do_warn:
+            log.warning(f"len(other_table) ({len(other_table)}) != len(self) ({len(self)})")
         if cols is None:
             cols = other_table.keys()
         for name in cols:
diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index 985e9427..d28870f0 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -19,12 +19,12 @@
 from . import arrayofequalsizedarrays as aoesa
 from . import vovutils
 from .array import Array
-from .lgdo import LGDO
+from .lgdo import LGDOCollection
 
 log = logging.getLogger(__name__)
 
 
-class VectorOfVectors(LGDO):
+class VectorOfVectors(LGDOCollection):
     """A n-dimensional variable-length 1D array of variable-length 1D arrays.
 
     If the vector is 2-dimensional, the internal representation is as two NumPy
@@ -270,14 +270,14 @@ def __setitem__(self, i: int, new: NDArray) -> None:
         else:
             raise NotImplementedError
 
-    def set_capacity(self, cap_cl, *cap_args) -> None:
+    def reserve_capacity(self, cap_cl, *cap_args) -> None:
         """Set capacity of internal data arrays. Expect number of args to
         equal `self.n_dim`. First arg is capacity of cumulative length array.
         If `self.n_dim` is 2, second argument is capacity of flattened data,
         otherwise arguments are fed recursively to remaining dimensions.
         """
-        self.cumulative_length.set_capacity(cap_cl)
-        self.flattened_data.set_capacity(*cap_args)
+        self.cumulative_length.reserve_capacity(cap_cl)
+        self.flattened_data.reserve_capacity(*cap_args)
 
     def get_capacity(self) -> Tuple[int]:
         """Get tuple containing capacity of each dimension. First dimension

From 8a5dcb27be935b24a4fb11a8eb9cb49b8c08cba3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 13 Oct 2024 21:15:19 +0000
Subject: [PATCH 09/27] style: pre-commit fixes

---
 src/lgdo/lh5/core.py              |  4 ++--
 src/lgdo/lh5/store.py             |  2 --
 src/lgdo/types/array.py           |  2 +-
 src/lgdo/types/encoded.py         |  2 +-
 src/lgdo/types/table.py           | 21 ++++++++++++++-------
 src/lgdo/types/vectorofvectors.py | 27 +++++++++++++++++----------
 tests/lh5/test_lh5_store.py       | 27 ++++++++-------------------
 tests/lh5/test_lh5_write.py       | 12 +++---------
 tests/types/test_table.py         | 21 +++++++++++++--------
 9 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/src/lgdo/lh5/core.py b/src/lgdo/lh5/core.py
index 3a2c2cb1..b6a52dc4 100644
--- a/src/lgdo/lh5/core.py
+++ b/src/lgdo/lh5/core.py
@@ -118,7 +118,7 @@ def read(
             obj_buf.resize(obj_buf_start)
         else:
             obj_buf_start = 0
-        
+
         for i, h5f in enumerate(lh5_file):
             if (
                 isinstance(idx, (list, tuple))
@@ -148,7 +148,7 @@ def read(
             obj_buf = read(
                 name,
                 h5f,
-                start_row if i==0 else 0,
+                start_row if i == 0 else 0,
                 n_rows_i,
                 idx_i,
                 use_h5idx,
diff --git a/src/lgdo/lh5/store.py b/src/lgdo/lh5/store.py
index a1149e6b..424e230d 100644
--- a/src/lgdo/lh5/store.py
+++ b/src/lgdo/lh5/store.py
@@ -5,7 +5,6 @@
 
 from __future__ import annotations
 
-import bisect
 import logging
 import os
 import sys
@@ -14,7 +13,6 @@
 from typing import Any
 
 import h5py
-import numpy as np
 from numpy.typing import ArrayLike
 
 from .. import types
diff --git a/src/lgdo/types/array.py b/src/lgdo/types/array.py
index e567977c..c1c720ff 100644
--- a/src/lgdo/types/array.py
+++ b/src/lgdo/types/array.py
@@ -149,7 +149,7 @@ def insert(self, i: int, value: int | float) -> None:
         if i > len(self):
             msg = f"index {i} is out of bounds for array with size {len(self)}"
             raise IndexError(msg)
-        
+
         value = np.array(value)
         if value.shape == self.shape[1:]:
             self.resize(len(self) + 1)
diff --git a/src/lgdo/types/encoded.py b/src/lgdo/types/encoded.py
index 15d6dadb..b4caf7f8 100644
--- a/src/lgdo/types/encoded.py
+++ b/src/lgdo/types/encoded.py
@@ -102,7 +102,7 @@ def get_capacity(self) -> Tuple:
     def trim_capacity(self) -> None:
         self.encoded_data.trim_capacity()
         self.decoded_size.trim_capacity()
-    
+
     def resize(self, new_size: int) -> None:
         """Resize vector along the first axis.
 
diff --git a/src/lgdo/types/table.py b/src/lgdo/types/table.py
index 4fe9103f..5fbe7c6b 100644
--- a/src/lgdo/types/table.py
+++ b/src/lgdo/types/table.py
@@ -106,21 +106,22 @@ def reserve_capacity(self, capacity: int | ArrayLike) -> None:
             if len(capacity) != len(self.keys()):
                 msg = "List of capacities must have same length as number of keys"
                 raise ValueError(msg)
-            
+
             for obj, cap in zip(self.values(), capacity):
                 obj.reserve_capacity(cap)
 
     def get_capacity(self) -> int:
         "Get list of capacities for each key"
-        return [ v.get_capacity() for v in self.values() ]
+        return [v.get_capacity() for v in self.values()]
 
     def trim_capacity(self) -> int:
         "Set capacity to be minimum needed to support Array size"
         for v in self.values():
             v.trim_capacity()
-        
 
-    def resize(self, new_size: int | None = None, do_warn: bool = False, trim: bool = False) -> None:
+    def resize(
+        self, new_size: int | None = None, do_warn: bool = False, trim: bool = False
+    ) -> None:
         # if new_size = None, use the size from the first field
         for field, obj in self.items():
             if new_size is None:
@@ -147,7 +148,9 @@ def insert(self, i: int, vals: Dict) -> None:
             ar.insert(i, vals[k])
         self.size += 1
 
-    def add_field(self, name: str, obj: LGDOCollection, use_obj_size: bool = False) -> None:
+    def add_field(
+        self, name: str, obj: LGDOCollection, use_obj_size: bool = False
+    ) -> None:
         """Add a field (column) to the table.
 
         Use the name "field" here to match the terminology used in
@@ -184,7 +187,9 @@ def add_field(self, name: str, obj: LGDOCollection, use_obj_size: bool = False)
             new_size = len(obj) if use_obj_size else self.size
             self.resize(new_size=new_size)
 
-    def add_column(self, name: str, obj: LGDOCollection, use_obj_size: bool = False) -> None:
+    def add_column(
+        self, name: str, obj: LGDOCollection, use_obj_size: bool = False
+    ) -> None:
         """Alias for :meth:`.add_field` using table terminology 'column'."""
         self.add_field(name, obj, use_obj_size=use_obj_size)
 
@@ -216,7 +221,9 @@ def join(
             `loc` parameter or :meth:`add_column` warnings.
         """
         if len(other_table) != len(self) and do_warn:
-            log.warning(f"len(other_table) ({len(other_table)}) != len(self) ({len(self)})")
+            log.warning(
+                f"len(other_table) ({len(other_table)}) != len(self) ({len(self)})"
+            )
         if cols is None:
             cols = other_table.keys()
         for name in cols:
diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index d28870f0..5bed511e 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -213,8 +213,10 @@ def __init__(
 
     @property
     def ndim(self):
-        return 1 + (1 if isinstance(self.flattened_data, Array) else self.flattened_data.ndim)
-    
+        return 1 + (
+            1 if isinstance(self.flattened_data, Array) else self.flattened_data.ndim
+        )
+
     @property
     def dtype(self) -> np.dtype:
         return self.flattened_data.dtype
@@ -283,7 +285,10 @@ def get_capacity(self) -> Tuple[int]:
         """Get tuple containing capacity of each dimension. First dimension
         is cumulative length array. Last dimension is flattened data.
         """
-        return (self.cumulative_length.get_capacity(), *self.flattened_data.get_capacity())
+        return (
+            self.cumulative_length.get_capacity(),
+            *self.flattened_data.get_capacity(),
+        )
 
     def trim_capacity(self) -> None:
         "Set capacity for all dimensions to minimum needed to hold data"
@@ -299,7 +304,7 @@ def resize(self, new_size: int, trim: bool = False) -> None:
         If `new_size` is larger than the current vector length,
         `self.cumulative_length` is padded with its last element.  This
         corresponds to appending empty vectors.
-        
+
         If `trim` is ``True``, resize capacity to match new size
 
         Examples
@@ -327,7 +332,7 @@ def resize(self, new_size: int, trim: bool = False) -> None:
         # if new_size > size, new elements are filled with zeros, let's fix
         # that
         if new_size > old_s:
-            self.cumulative_length[old_s:] = self.cumulative_length[old_s-1]
+            self.cumulative_length[old_s:] = self.cumulative_length[old_s - 1]
 
         # then resize the data array
         # if dlen > 0 this has no effect
@@ -370,7 +375,7 @@ def insert(self, i: int, new: NDArray) -> None:
                 msg = f"index {i} is out of bounds for vector with size {len(self)}"
                 raise IndexError(msg)
 
-            i_start = 0 if i==0 else self.cumulative_length[i-1]
+            i_start = 0 if i == 0 else self.cumulative_length[i - 1]
             self.flattened_data.insert(i_start, new)
             self.cumulative_length.insert(i, i_start)
             self.cumulative_length[i:] += np.uint32(len(new))
@@ -405,11 +410,13 @@ def replace(self, i: int, new: NDArray) -> None:
                 # move the subsequent entries
                 vidx[i:] += dlen
                 self.flattened_data.resize(vidx[-1])
-                self.flattened_data._nda[vidx[i]:vidx[-1]] = self.flattened_data._nda[vidx[i]-dlen:vidx[-1]-dlen]
-            
+                self.flattened_data._nda[vidx[i] : vidx[-1]] = self.flattened_data._nda[
+                    vidx[i] - dlen : vidx[-1] - dlen
+                ]
+
             # set the already allocated indices
-            start = vidx[i - 1] if i>0 else 0
-            self.flattened_data[start:vidx[i]] = new
+            start = vidx[i - 1] if i > 0 else 0
+            self.flattened_data[start : vidx[i]] = new
         else:
             raise NotImplementedError
 
diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py
index 30077dac..711182e5 100644
--- a/tests/lh5/test_lh5_store.py
+++ b/tests/lh5/test_lh5_store.py
@@ -90,9 +90,7 @@ def test_read_array(lh5_file):
 
 def test_read_array_slice(lh5_file):
     store = lh5.LH5Store()
-    lh5_obj = store.read(
-        "/data/struct_full/array", lh5_file, start_row=1, n_rows=3
-    )
+    lh5_obj = store.read("/data/struct_full/array", lh5_file, start_row=1, n_rows=3)
     assert isinstance(lh5_obj, types.Array)
     assert len(lh5_obj) == 3
     assert lh5_obj == lgdo.Array([2, 3, 4])
@@ -135,7 +133,8 @@ def test_read_vov(lh5_file):
     lh5_obj = store.read("/data/struct/vov", [lh5_file, lh5_file])
     assert len(lh5_obj) == 6
     assert lh5_obj == lgdo.VectorOfVectors(
-        [[3, 4, 5], [2], [4, 8, 9, 7], [3, 4, 5], [2], [4, 8, 9, 7]], attrs={"myattr": 2}
+        [[3, 4, 5], [2], [4, 8, 9, 7], [3, 4, 5], [2], [4, 8, 9, 7]],
+        attrs={"myattr": 2},
     )
 
     with h5py.File(lh5_file) as h5f:
@@ -188,9 +187,7 @@ def test_read_voev(lh5_file):
 
     assert len(lh5_obj) == 3
 
-    lh5_obj = store.read(
-        "/data/struct/voev", [lh5_file, lh5_file], decompress=False
-    )
+    lh5_obj = store.read("/data/struct/voev", [lh5_file, lh5_file], decompress=False)
     assert isinstance(lh5_obj, types.VectorOfEncodedVectors)
     assert len(lh5_obj) == 6
 
@@ -344,14 +341,10 @@ def test_read_with_field_mask(lh5_file):
     lh5_obj = store.read("/data/struct_full", lh5_file, field_mask=["array"])
     assert list(lh5_obj.keys()) == ["array"]
 
-    lh5_obj = store.read(
-        "/data/struct_full", lh5_file, field_mask=("array", "table")
-    )
+    lh5_obj = store.read("/data/struct_full", lh5_file, field_mask=("array", "table"))
     assert sorted(lh5_obj.keys()) == ["array", "table"]
 
-    lh5_obj = store.read(
-        "/data/struct_full", lh5_file, field_mask={"array": True}
-    )
+    lh5_obj = store.read("/data/struct_full", lh5_file, field_mask={"array": True})
     assert list(lh5_obj.keys()) == ["array"]
 
     lh5_obj = store.read(
@@ -384,9 +377,7 @@ def test_read_lgnd_array(lgnd_file):
 def test_read_lgnd_array_fancy_idx(lgnd_file):
     store = lh5.LH5Store()
 
-    lh5_obj = store.read(
-        "/geds/raw/baseline", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68]
-    )
+    lh5_obj = store.read("/geds/raw/baseline", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68])
     assert isinstance(lh5_obj, types.Array)
     assert len(lh5_obj) == 7
     assert (lh5_obj.nda == [13508, 14353, 14525, 14341, 15079, 11675, 13995]).all()
@@ -403,9 +394,7 @@ def test_read_lgnd_vov(lgnd_file):
 def test_read_lgnd_vov_fancy_idx(lgnd_file):
     store = lh5.LH5Store()
 
-    lh5_obj = store.read(
-        "/geds/raw/tracelist", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68]
-    )
+    lh5_obj = store.read("/geds/raw/tracelist", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68])
     assert isinstance(lh5_obj, types.VectorOfVectors)
     assert len(lh5_obj) == 7
     assert (lh5_obj.cumulative_length.nda == [1, 2, 3, 4, 5, 6, 7]).all()
diff --git a/tests/lh5/test_lh5_write.py b/tests/lh5/test_lh5_write.py
index 97065552..28a5bd2e 100644
--- a/tests/lh5/test_lh5_write.py
+++ b/tests/lh5/test_lh5_write.py
@@ -255,9 +255,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    array_dat = store.read(
-        "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5"
-    )
+    array_dat = store.read("my_array", f"{tmptestdir}/write_object_overwrite_test.lh5")
     expected_out_array = np.append(np.zeros(5), np.ones(20))
 
     assert np.array_equal(array_dat.nda, expected_out_array)
@@ -440,9 +438,7 @@ def test_write_histogram(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    h3 = store.read(
-        "my_group/my_histogram", f"{tmptestdir}/write_histogram_test.lh5"
-    )
+    h3 = store.read("my_group/my_histogram", f"{tmptestdir}/write_histogram_test.lh5")
     assert np.array_equal(h3.weights.nda, np.array([[10, 10], [10, 10]]))
     assert h3.binning[0].edges[0] == 2
     assert h3.binning[1].edges[-1] == 7
@@ -508,9 +504,7 @@ def test_write_histogram_variable(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    h3 = store.read(
-        "my_group/my_histogram", f"{tmptestdir}/write_histogram_test.lh5"
-    )
+    h3 = store.read("my_group/my_histogram", f"{tmptestdir}/write_histogram_test.lh5")
     assert np.array_equal(h3.weights.nda, np.array([[10, 10], [10, 10]]))
     assert np.array_equal(h3.binning[0].edges, np.array([2, 3.5, 4]))
     with pytest.raises(TypeError):
diff --git a/tests/types/test_table.py b/tests/types/test_table.py
index cfdbe23d..06e70a83 100644
--- a/tests/types/test_table.py
+++ b/tests/types/test_table.py
@@ -86,10 +86,13 @@ def test_append():
     tbl = Table(col_dict=col_dict)
     tbl.append({"a": -1, "b": -1})
     assert len(tbl) == 5
-    assert tbl == Table( {
-        "a": lgdo.Array(nda=np.array([1, 2, 3, 4, -1])),
-        "b": lgdo.Array(nda=np.array([5, 6, 7, 8, -1])),
-    } )
+    assert tbl == Table(
+        {
+            "a": lgdo.Array(nda=np.array([1, 2, 3, 4, -1])),
+            "b": lgdo.Array(nda=np.array([5, 6, 7, 8, -1])),
+        }
+    )
+
 
 def test_insert():
     col_dict = {
@@ -100,10 +103,12 @@ def test_insert():
     tbl = Table(col_dict=col_dict)
     tbl.insert(1, {"a": -1, "b": -1})
     assert len(tbl) == 5
-    assert tbl == Table( {
-        "a": lgdo.Array(nda=np.array([1, -1, 2, 3, 4])),
-        "b": lgdo.Array(nda=np.array([5, -1, 6, 7, 8])),
-    } )
+    assert tbl == Table(
+        {
+            "a": lgdo.Array(nda=np.array([1, -1, 2, 3, 4])),
+            "b": lgdo.Array(nda=np.array([5, -1, 6, 7, 8])),
+        }
+    )
 
 
 def test_add_field():

From 8dd3d75a20630dc637e48de43d61ab56e78c76f1 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 14:24:43 -0700
Subject: [PATCH 10/27] Appease the pre-commit bot

---
 src/lgdo/lh5/core.py              | 5 ++---
 src/lgdo/types/encoded.py         | 6 +++---
 src/lgdo/types/table.py           | 8 ++++----
 src/lgdo/types/vectorofvectors.py | 2 +-
 tests/compression/conftest.py     | 3 +--
 5 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/lgdo/lh5/core.py b/src/lgdo/lh5/core.py
index b6a52dc4..4acd3ebc 100644
--- a/src/lgdo/lh5/core.py
+++ b/src/lgdo/lh5/core.py
@@ -4,6 +4,7 @@
 import inspect
 import sys
 from collections.abc import Mapping, Sequence
+from contextlib import suppress
 from typing import Any
 
 import h5py
@@ -177,10 +178,8 @@ def read(
         obj_buf_start=obj_buf_start,
         decompress=decompress,
     )
-    try:
+    with suppress(AttributeError):
         obj.resize(obj_buf_start + n_rows_read)
-    except AttributeError:
-        pass
 
     return obj
 
diff --git a/src/lgdo/types/encoded.py b/src/lgdo/types/encoded.py
index b4caf7f8..fe7b522e 100644
--- a/src/lgdo/types/encoded.py
+++ b/src/lgdo/types/encoded.py
@@ -96,7 +96,7 @@ def reserve_capacity(self, *capacity: int) -> None:
         self.encoded_data.reserve_capacity(*capacity)
         self.decoded_size.reserve_capacity(capacity[0])
 
-    def get_capacity(self) -> Tuple:
+    def get_capacity(self) -> tuple:
         return (self.decoded_size.get_capacity, *self.encoded_data.get_capacity())
 
     def trim_capacity(self) -> None:
@@ -348,7 +348,7 @@ def __eq__(self, other: ArrayOfEncodedEqualSizedArrays) -> bool:
     def reserve_capacity(self, *capacity: int) -> None:
         self.encoded_data.reserve_capacity(capacity)
 
-    def get_capacity(self) -> Tuple:
+    def get_capacity(self) -> tuple:
         return self.encoded_data.get_capacity()
 
     def trim_capacity(self) -> None:
@@ -361,7 +361,7 @@ def resize(self, new_size: int, trim: bool = False) -> None:
         --------
         .VectorOfVectors.resize
         """
-        self.encoded_data.resize(new_size)
+        self.encoded_data.resize(new_size, trim)
 
     def append(self, value: NDArray) -> None:
         """Append a 1D encoded array at the end.
diff --git a/src/lgdo/types/table.py b/src/lgdo/types/table.py
index 5fbe7c6b..43e7347d 100644
--- a/src/lgdo/types/table.py
+++ b/src/lgdo/types/table.py
@@ -18,7 +18,7 @@
 
 from .array import Array
 from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
-from .lgdo import LGDOCollection
+from .lgdo import LGDO, LGDOCollection
 from .scalar import Scalar
 from .struct import Struct
 from .vectorofvectors import VectorOfVectors
@@ -97,7 +97,7 @@ def __len__(self) -> int:
         """Provides ``__len__`` for this array-like class."""
         return self.size
 
-    def reserve_capacity(self, capacity: int | ArrayLike) -> None:
+    def reserve_capacity(self, capacity: int | list) -> None:
         "Set size (number of rows) of internal memory buffer"
         if isinstance(capacity, int):
             for obj in self.values():
@@ -138,11 +138,11 @@ def resize(
                     obj.resize(new_size, trim)
         self.size = new_size
 
-    def append(self, vals: Dict) -> None:
+    def append(self, vals: dict) -> None:
         "Append vals to end of table. Vals is a mapping from table key to val"
         self.insert(len(self), vals)
 
-    def insert(self, i: int, vals: Dict) -> None:
+    def insert(self, i: int, vals: dict) -> None:
         "Insert vals into table at row i. Vals is a mapping from table key to val"
         for k, ar in self.items():
             ar.insert(i, vals[k])
diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index 5bed511e..51675631 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -281,7 +281,7 @@ def reserve_capacity(self, cap_cl, *cap_args) -> None:
         self.cumulative_length.reserve_capacity(cap_cl)
         self.flattened_data.reserve_capacity(*cap_args)
 
-    def get_capacity(self) -> Tuple[int]:
+    def get_capacity(self) -> tuple[int]:
         """Get tuple containing capacity of each dimension. First dimension
         is cumulative length array. Last dimension is flattened data.
         """
diff --git a/tests/compression/conftest.py b/tests/compression/conftest.py
index cb96d622..75ab953a 100644
--- a/tests/compression/conftest.py
+++ b/tests/compression/conftest.py
@@ -8,8 +8,7 @@
 @pytest.fixture()
 def wftable(lgnd_test_data):
     store = lh5.LH5Store()
-    wft = store.read(
+    return store.read(
         "/geds/raw/waveform",
         lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"),
     )
-    return wft

From 5a2e402cfd5bdd1ed263ff08408fd61f118ff4c4 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 17:46:40 -0700
Subject: [PATCH 11/27] Fixed tutorial

---
 docs/source/notebooks/LH5Files.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/notebooks/LH5Files.ipynb b/docs/source/notebooks/LH5Files.ipynb
index 7cb97cf3..390d0efb 100644
--- a/docs/source/notebooks/LH5Files.ipynb
+++ b/docs/source/notebooks/LH5Files.ipynb
@@ -189,8 +189,8 @@
    "source": [
     "from lgdo.lh5 import LH5Iterator\n",
     "\n",
-    "for lh5_obj, entry, n_rows in LH5Iterator(lh5_file, \"geds/raw/energy\", buffer_len=20):\n",
-    "    print(f\"entry {entry}, energy = {lh5_obj} ({n_rows} rows)\")"
+    "for lh5_obj, entry in LH5Iterator(lh5_file, \"geds/raw/energy\", buffer_len=20):\n",
+    "    print(f\"entry {entry}, energy = {lh5_obj} ({len(lh5_obj)} rows)\")"
    ]
   },
   {
@@ -211,7 +211,7 @@
     "from lgdo.lh5 import LH5Store\n",
     "\n",
     "store = LH5Store(keep_open=True)  # with keep_open=True, files are kept open inside the store\n",
-    "store.read(\"geds/raw\", lh5_file)  # returns a tuple: (obj, n_rows_read)"
+    "store.read(\"geds/raw\", lh5_file)\n"
    ]
   },
   {

From 0a24cf998015bc944bbd192c444e6fcbdd15419f Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 17:56:38 -0700
Subject: [PATCH 12/27] Fixed docstring error

---
 src/lgdo/lh5/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lgdo/lh5/core.py b/src/lgdo/lh5/core.py
index 4acd3ebc..c4e76296 100644
--- a/src/lgdo/lh5/core.py
+++ b/src/lgdo/lh5/core.py
@@ -107,7 +107,7 @@ def read(
     Returns
     -------
     object
-        `the read-out object
+        the read-out object
     """
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]

From 0fb6adf2e86a87b1521468302a2e83b8a19be785 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 21:29:52 -0700
Subject: [PATCH 13/27] Added tests for capacity and fixed bugs

---
 src/lgdo/types/array.py             |  4 ++--
 src/lgdo/types/lgdo.py              | 22 +---------------------
 src/lgdo/types/table.py             | 11 ++---------
 src/lgdo/types/vectorofvectors.py   | 11 +++++++----
 tests/types/test_table.py           | 28 ++++++++++++++++++++++++++++
 tests/types/test_vectorofvectors.py | 22 +++++++++++++++++++++-
 6 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/src/lgdo/types/array.py b/src/lgdo/types/array.py
index c1c720ff..51540396 100644
--- a/src/lgdo/types/array.py
+++ b/src/lgdo/types/array.py
@@ -131,6 +131,8 @@ def resize(self, new_size: int, trim=False) -> None:
         increased to accommodate new rows; in this case double capacity.
         If trim is True, capacity will be set to match size."""
 
+        self._size = new_size
+
         if trim and new_size != self.get_capacity:
             self.reserve_capacity(new_size)
 
@@ -138,8 +140,6 @@ def resize(self, new_size: int, trim=False) -> None:
         if new_size > self.get_capacity():
             self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
 
-        self._size = new_size
-
     def append(self, value: np.ndarray) -> None:
         "Append value to end of array (with copy)"
         self.insert(len(self), value)
diff --git a/src/lgdo/types/lgdo.py b/src/lgdo/types/lgdo.py
index 18c7e7a4..be8a9c85 100644
--- a/src/lgdo/types/lgdo.py
+++ b/src/lgdo/types/lgdo.py
@@ -135,24 +135,4 @@ def replace(self, i: int, val) -> None:
 
     def clear(self, trim: bool = False) -> None:
         "set size of LGDOCollection to zero"
-        self.resize(0, trim)
-
-    def getattrs(self, datatype: bool = False) -> dict:
-        """Return a copy of the LGDO attributes dictionary.
-
-        Parameters
-        ----------
-        datatype
-            if ``False``, remove ``datatype`` attribute from the output
-            dictionary.
-        """
-        d = dict(self.attrs)
-        if not datatype:
-            d.pop("datatype", None)
-        return d
-
-    def __str__(self) -> str:
-        return repr(self)
-
-    def __repr__(self) -> str:
-        return self.__class__.__name__ + f"(attrs={self.attrs!r})"
+        self.resize(0, trim=trim)
diff --git a/src/lgdo/types/table.py b/src/lgdo/types/table.py
index 43e7347d..58689249 100644
--- a/src/lgdo/types/table.py
+++ b/src/lgdo/types/table.py
@@ -29,10 +29,6 @@
 class Table(Struct, LGDOCollection):
     """A special struct of arrays or subtable columns of equal length.
 
-    Holds onto an internal read/write location ``loc`` that is useful in
-    managing table I/O using functions like :meth:`push_row`, :meth:`is_full`,
-    and :meth:`clear`.
-
     Note
     ----
     If you write to a table and don't fill it up to its total size, be sure to
@@ -78,7 +74,8 @@ def __init__(
             col_dict = _ak_to_lgdo_or_col_dict(col_dict)
 
         # call Struct constructor
-        super().__init__(obj_dict=col_dict, attrs=attrs)
+        Struct.__init__(self, obj_dict=col_dict)
+        LGDOCollection.__init__(self, attrs=attrs)
 
         # if col_dict is not empty, set size according to it
         # if size is also supplied, resize all fields to match it
@@ -138,10 +135,6 @@ def resize(
                     obj.resize(new_size, trim)
         self.size = new_size
 
-    def append(self, vals: dict) -> None:
-        "Append vals to end of table. Vals is a mapping from table key to val"
-        self.insert(len(self), vals)
-
     def insert(self, i: int, vals: dict) -> None:
         "Insert vals into table at row i. Vals is a mapping from table key to val"
         for k, ar in self.items():
diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index 51675631..513804d0 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -285,10 +285,11 @@ def get_capacity(self) -> tuple[int]:
         """Get tuple containing capacity of each dimension. First dimension
         is cumulative length array. Last dimension is flattened data.
         """
-        return (
-            self.cumulative_length.get_capacity(),
-            *self.flattened_data.get_capacity(),
-        )
+        fd_cap = self.flattened_data.get_capacity()
+        if isinstance(fd_cap, int):
+            return (self.cumulative_length.get_capacity(), fd_cap)
+        else:
+            return (self.cumulative_length.get_capacity(), *fd_cap)
 
     def trim_capacity(self) -> None:
         "Set capacity for all dimensions to minimum needed to hold data"
@@ -338,6 +339,8 @@ def resize(self, new_size: int, trim: bool = False) -> None:
         # if dlen > 0 this has no effect
         if len(self.cumulative_length) > 0:
             self.flattened_data.resize(self.cumulative_length[-1], trim)
+        else:
+            self.flattened_data.resize(0, trim)
 
     def append(self, new: NDArray) -> None:
         """Append a 1D vector `new` at the end.
diff --git a/tests/types/test_table.py b/tests/types/test_table.py
index 06e70a83..f5ec04f3 100644
--- a/tests/types/test_table.py
+++ b/tests/types/test_table.py
@@ -76,6 +76,34 @@ def test_datatype_name():
     tbl = Table()
     assert tbl.datatype_name() == "table"
 
+def test_resize_and_capacity():
+    col_dict = {
+        "a": lgdo.Array(nda=np.array([1, 2, 3, 4])),
+        "b": lgdo.Array(nda=np.array([5, 6, 7, 8])),
+    }
+    tbl = Table(col_dict=col_dict)
+
+    assert(len(tbl) == 4)
+    assert(tbl.get_capacity() == [4, 4])
+
+    tbl.reserve_capacity([5, 7])
+    assert(len(tbl) == 4)
+    assert(tbl.get_capacity() == [5, 7])
+
+    tbl.resize(6)
+    assert(len(tbl) == 6)
+    assert(tbl.get_capacity()[0] >= 6 and tbl.get_capacity()[1] == 7)
+
+    tbl.trim_capacity()
+    assert(len(tbl) == 6)
+    assert(tbl.get_capacity() == [6, 6])
+
+    with pytest.raises(ValueError):
+        tbl.reserve_capacity(3)
+    
+    tbl.clear(trim=True)
+    assert(len(tbl) == 0)
+    assert(tbl.get_capacity() == [0, 0])
 
 def test_append():
     col_dict = {
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
index 8357a5c5..7df645ea 100644
--- a/tests/types/test_vectorofvectors.py
+++ b/tests/types/test_vectorofvectors.py
@@ -173,25 +173,45 @@ def test_getitem(testvov):
     assert np.array_equal(v[-1], [1, 2])
 
 
-def test_resize(testvov):
+def test_resize_and_capacity(testvov):
     vov = testvov.v2d
 
+    assert vov.get_capacity() == (5, 13)
+    
     vov.resize(3)
     assert ak.is_valid(vov.view_as("ak"))
+    assert vov.get_capacity() == (5, 13)
     assert len(vov.cumulative_length) == 3
     assert len(vov.flattened_data) == vov.cumulative_length[-1]
     assert vov == VectorOfVectors([[1, 2], [3, 4, 5], [2]])
 
+    vov.trim_capacity()
+    assert ak.is_valid(vov.view_as("ak"))
+    assert vov.get_capacity() == (3, 6)
+    assert len(vov.cumulative_length) == 3
+    assert len(vov.flattened_data) == vov.cumulative_length[-1]
+    assert vov == VectorOfVectors([[1, 2], [3, 4, 5], [2]])
+
+    vov.reserve_capacity(5, 10)
     vov.resize(5)
     assert ak.is_valid(vov.view_as("ak"))
+    assert vov.get_capacity()[0] >= 5 and  vov.get_capacity()[1] >= 7
     assert len(vov) == 5
     assert len(vov[3]) == 0
     assert len(vov[4]) == 0
     assert vov == VectorOfVectors([[1, 2], [3, 4, 5], [2], [], []])
 
+    vov.clear(trim=True)
+    assert ak.is_valid(vov.view_as("ak"))
+    assert vov.get_capacity() == (0, 0)
+    assert len(vov) == 0
+    
     vov = testvov.v3d
 
+    assert vov.get_capacity() == (3, 5, 13)
+
     vov.resize(3)
+    assert vov.get_capacity() == (3, 5, 13)
     assert ak.is_valid(vov.view_as("ak"))
     assert len(vov.cumulative_length) == 3
     assert len(vov.flattened_data) == vov.cumulative_length[-1]

From 03f5ce7cda9e8ed399214f0845b772b9994816ef Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 14 Oct 2024 04:30:16 +0000
Subject: [PATCH 14/27] style: pre-commit fixes

---
 tests/types/test_table.py           | 24 +++++++++++++-----------
 tests/types/test_vectorofvectors.py |  6 +++---
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/tests/types/test_table.py b/tests/types/test_table.py
index f5ec04f3..eabbb525 100644
--- a/tests/types/test_table.py
+++ b/tests/types/test_table.py
@@ -76,6 +76,7 @@ def test_datatype_name():
     tbl = Table()
     assert tbl.datatype_name() == "table"
 
+
 def test_resize_and_capacity():
     col_dict = {
         "a": lgdo.Array(nda=np.array([1, 2, 3, 4])),
@@ -83,27 +84,28 @@ def test_resize_and_capacity():
     }
     tbl = Table(col_dict=col_dict)
 
-    assert(len(tbl) == 4)
-    assert(tbl.get_capacity() == [4, 4])
+    assert len(tbl) == 4
+    assert tbl.get_capacity() == [4, 4]
 
     tbl.reserve_capacity([5, 7])
-    assert(len(tbl) == 4)
-    assert(tbl.get_capacity() == [5, 7])
+    assert len(tbl) == 4
+    assert tbl.get_capacity() == [5, 7]
 
     tbl.resize(6)
-    assert(len(tbl) == 6)
-    assert(tbl.get_capacity()[0] >= 6 and tbl.get_capacity()[1] == 7)
+    assert len(tbl) == 6
+    assert tbl.get_capacity()[0] >= 6 and tbl.get_capacity()[1] == 7
 
     tbl.trim_capacity()
-    assert(len(tbl) == 6)
-    assert(tbl.get_capacity() == [6, 6])
+    assert len(tbl) == 6
+    assert tbl.get_capacity() == [6, 6]
 
     with pytest.raises(ValueError):
         tbl.reserve_capacity(3)
-    
+
     tbl.clear(trim=True)
-    assert(len(tbl) == 0)
-    assert(tbl.get_capacity() == [0, 0])
+    assert len(tbl) == 0
+    assert tbl.get_capacity() == [0, 0]
+
 
 def test_append():
     col_dict = {
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
index 7df645ea..15a69390 100644
--- a/tests/types/test_vectorofvectors.py
+++ b/tests/types/test_vectorofvectors.py
@@ -177,7 +177,7 @@ def test_resize_and_capacity(testvov):
     vov = testvov.v2d
 
     assert vov.get_capacity() == (5, 13)
-    
+
     vov.resize(3)
     assert ak.is_valid(vov.view_as("ak"))
     assert vov.get_capacity() == (5, 13)
@@ -195,7 +195,7 @@ def test_resize_and_capacity(testvov):
     vov.reserve_capacity(5, 10)
     vov.resize(5)
     assert ak.is_valid(vov.view_as("ak"))
-    assert vov.get_capacity()[0] >= 5 and  vov.get_capacity()[1] >= 7
+    assert vov.get_capacity()[0] >= 5 and vov.get_capacity()[1] >= 7
     assert len(vov) == 5
     assert len(vov[3]) == 0
     assert len(vov[4]) == 0
@@ -205,7 +205,7 @@ def test_resize_and_capacity(testvov):
     assert ak.is_valid(vov.view_as("ak"))
     assert vov.get_capacity() == (0, 0)
     assert len(vov) == 0
-    
+
     vov = testvov.v3d
 
     assert vov.get_capacity() == (3, 5, 13)

From a1cf2b2113014ca8092fb26ea50506732cae4fc2 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 21:36:15 -0700
Subject: [PATCH 15/27] Appease pre-commit bot

---
 src/lgdo/types/vectorofvectors.py   | 3 +--
 tests/types/test_table.py           | 3 ++-
 tests/types/test_vectorofvectors.py | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index 513804d0..3fa1d59a 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -288,8 +288,7 @@ def get_capacity(self) -> tuple[int]:
         fd_cap = self.flattened_data.get_capacity()
         if isinstance(fd_cap, int):
             return (self.cumulative_length.get_capacity(), fd_cap)
-        else:
-            return (self.cumulative_length.get_capacity(), *fd_cap)
+        return (self.cumulative_length.get_capacity(), *fd_cap)
 
     def trim_capacity(self) -> None:
         "Set capacity for all dimensions to minimum needed to hold data"
diff --git a/tests/types/test_table.py b/tests/types/test_table.py
index eabbb525..d60f00d9 100644
--- a/tests/types/test_table.py
+++ b/tests/types/test_table.py
@@ -93,7 +93,8 @@ def test_resize_and_capacity():
 
     tbl.resize(6)
     assert len(tbl) == 6
-    assert tbl.get_capacity()[0] >= 6 and tbl.get_capacity()[1] == 7
+    assert tbl.get_capacity()[0] >= 6
+    assert tbl.get_capacity()[1] == 7
 
     tbl.trim_capacity()
     assert len(tbl) == 6
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
index 15a69390..c28dee56 100644
--- a/tests/types/test_vectorofvectors.py
+++ b/tests/types/test_vectorofvectors.py
@@ -195,7 +195,8 @@ def test_resize_and_capacity(testvov):
     vov.reserve_capacity(5, 10)
     vov.resize(5)
     assert ak.is_valid(vov.view_as("ak"))
-    assert vov.get_capacity()[0] >= 5 and vov.get_capacity()[1] >= 7
+    assert vov.get_capacity()[0] >= 5
+    assert vov.get_capacity()[1] >= 7
     assert len(vov) == 5
     assert len(vov[3]) == 0
     assert len(vov[4]) == 0

From 0a0bffb70163f82968332e88e5a4a41bafbabfe1 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Sun, 13 Oct 2024 21:44:44 -0700
Subject: [PATCH 16/27] Improve test coverage

---
 tests/types/test_array.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/tests/types/test_array.py b/tests/types/test_array.py
index b055aa34..26dcd51f 100644
--- a/tests/types/test_array.py
+++ b/tests/types/test_array.py
@@ -26,17 +26,45 @@ def test_init():
     assert array.attrs == attrs | {"datatype": "array<1>{real}"}
 
 
-def test_resize():
+def test_resize_and_capacity():
     array = Array(nda=np.array([1, 2, 3, 4]))
+    assert array.get_capacity() == 4
+
     array.resize(3)
+    assert array.get_capacity() == 4
     assert (array.nda == np.array([1, 2, 3])).all()
 
+    array.resize(5)
+    assert array.get_capacity() >= 5
+
+    array.clear(trim=True)
+    assert array.get_capacity() == 0
+    assert len(array) == 0
+
 
 def test_insert():
     a = Array(np.array([1, 2, 3, 4]))
     a.insert(2, [-1, -1])
     assert a == Array([1, 2, -1, -1, 3, 4])
 
+    with pytest.raises(IndexError):
+        a.insert(10, 10)
+
+
+def test_append():
+    a = Array(np.array([1, 2, 3, 4]))
+    a.append(-1)
+    assert a == Array([1, 2, 3, 4, -1])
+
+
+def test_replace():
+    a = Array(np.array([1, 2, 3, 4]))
+    a.replace(2, -1)
+    assert a == Array([1, 2, -1, 4])
+
+    with pytest.raises(IndexError):
+        a.replace(10, 10)
+
 
 def test_view():
     a = Array(np.array([1, 2, 3, 4]), attrs={"units": "m"})

From ae4979f818ab010b49e88b5546b012c1c0e45ba5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Nov 2024 16:25:02 +0000
Subject: [PATCH 17/27] style: pre-commit fixes

---
 docs/source/notebooks/LH5Files.ipynb | 6 ++++--
 tests/lh5/test_lh5_store.py          | 4 +---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/notebooks/LH5Files.ipynb b/docs/source/notebooks/LH5Files.ipynb
index a9a24dcb..a03e29b0 100644
--- a/docs/source/notebooks/LH5Files.ipynb
+++ b/docs/source/notebooks/LH5Files.ipynb
@@ -212,8 +212,10 @@
    "source": [
     "from lgdo.lh5 import LH5Store\n",
     "\n",
-    "store = LH5Store(keep_open=True)  # with keep_open=True, files are kept open inside the store\n",
-    "store.read(\"geds/raw\", lh5_file)\n"
+    "store = LH5Store(\n",
+    "    keep_open=True\n",
+    ")  # with keep_open=True, files are kept open inside the store\n",
+    "store.read(\"geds/raw\", lh5_file)"
    ]
   },
   {
diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py
index 87c033ca..f46a5d74 100644
--- a/tests/lh5/test_lh5_store.py
+++ b/tests/lh5/test_lh5_store.py
@@ -134,9 +134,7 @@ def test_read_array_fancy_idx(lh5_file):
     assert lh5_obj == lgdo.Array([1, 4, 5, 1, 4, 5])
 
     # Test with out of range index
-    lh5_obj = store.read(
-        "/data/struct_full/array", lh5_file, idx=[0, 3, 4, 100]
-    )
+    lh5_obj = store.read("/data/struct_full/array", lh5_file, idx=[0, 3, 4, 100])
     assert isinstance(lh5_obj, types.Array)
     assert len(lh5_obj) == 3
     assert lh5_obj == lgdo.Array([1, 4, 5])

From 7afab7228f1fa19b29ef4cbbde8d833ebed16c11 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Mon, 25 Nov 2024 12:24:59 -0800
Subject: [PATCH 18/27] Fixed test

---
 tests/test_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 36bce2d8..c5bd80dc 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -164,6 +164,6 @@ def test_lh5concat(lgnd_test_data, tmptestdir):
     outfile = f"{tmptestdir}/concat_test_struct_out.lh5"
     cli.lh5concat(["--output", outfile, "--", infile1, infile2])
 
-    out_stp = store.read("stp", outfile)[0]
+    out_stp = store.read("stp", outfile)
     assert out_stp.attrs["datatype"] == "struct{x}"
     assert np.all(out_stp.x["col"].nda == np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]))

From 74cb2817e7db0bb4c7bb4b06212e0428a243877e Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Tue, 26 Nov 2024 20:01:01 -0800
Subject: [PATCH 19/27] When filling VoV from AoesA, if length of V is longer
 than A use filler value

---
 src/lgdo/types/vectorofvectors.py   | 10 +++++++++-
 src/lgdo/types/vovutils.py          | 18 ++++++++++++++----
 tests/types/test_vectorofvectors.py | 11 ++++++++++-
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index a27461ce..5e7203c6 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -468,7 +468,15 @@ def _set_vector_unsafe(
             cum_lens = np.add(start, lens.cumsum(), dtype=int)
 
             # fill with fast vectorized routine
-            vovutils._nb_fill(vec, lens, self.flattened_data.nda[start : cum_lens[-1]])
+            if np.issubdtype(self.flattened_data.dtype, np.unsignedinteger):
+                nan_val = np.iinfo(self.flattened_data.dtype).max
+            if np.issubdtype(self.flattened_data.dtype, np.integer):
+                nan_val = np.iinfo(self.flattened_data.dtype).min
+            else:
+                nan_val = np.nan
+            vovutils._nb_fill(
+                vec, lens, nan_val, self.flattened_data.nda[start : cum_lens[-1]]
+            )
 
             # add new vector(s) length to cumulative_length
             self.cumulative_length[i : i + len(lens)] = cum_lens
diff --git a/src/lgdo/types/vovutils.py b/src/lgdo/types/vovutils.py
index c3862eec..abae760b 100644
--- a/src/lgdo/types/vovutils.py
+++ b/src/lgdo/types/vovutils.py
@@ -81,7 +81,7 @@ def _nb_build_cl(sorted_array_in: NDArray, cumulative_length_out: NDArray) -> ND
 
 @numba.guvectorize(
     [
-        f"{data_type}[:,:],{size_type}[:],{data_type}[:]"
+        f"{data_type}[:,:],{size_type}[:],{data_type},{data_type}[:]"
         for data_type in [
             "b1",
             "i1",
@@ -99,10 +99,12 @@ def _nb_build_cl(sorted_array_in: NDArray, cumulative_length_out: NDArray) -> ND
         ]
         for size_type in ["i4", "i8", "u4", "u8"]
     ],
-    "(l,m),(l),(n)",
+    "(l,m),(l),(),(n)",
     **nb_kwargs,
 )
-def _nb_fill(aoa_in: NDArray, len_in: NDArray, flattened_array_out: NDArray):
+def _nb_fill(
+    aoa_in: NDArray, len_in: NDArray, nan_val: int | float, flattened_array_out: NDArray
+):
     """Vectorized function to fill flattened array from array of arrays and
     lengths. Values in aoa_in past lengths will not be copied.
 
@@ -112,6 +114,9 @@ def _nb_fill(aoa_in: NDArray, len_in: NDArray, flattened_array_out: NDArray):
         array of arrays containing values to be copied
     len_in
         array of vector lengths for each row of aoa_in
+    nan_val
+        value to use when len_in is longer than aoa_in. Should use
+        np.nan for floating point, and 0xfff... for integer types
     flattened_array_out
         flattened array to copy values into. Must be longer than sum of
         lengths in len_in
@@ -122,9 +127,14 @@ def _nb_fill(aoa_in: NDArray, len_in: NDArray, flattened_array_out: NDArray):
         raise ValueError(msg)
 
     start = 0
+    max_len = aoa_in.shape[1]
     for i, ll in enumerate(len_in):
         stop = start + ll
-        flattened_array_out[start:stop] = aoa_in[i, :ll]
+        if ll > max_len:
+            flattened_array_out[start : start + max_len] = aoa_in[i, :]
+            flattened_array_out[start + max_len : stop] = nan_val
+        else:
+            flattened_array_out[start:stop] = aoa_in[i, :ll]
         start = stop
 
 
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
index c03c1f24..f1b2e1b1 100644
--- a/tests/types/test_vectorofvectors.py
+++ b/tests/types/test_vectorofvectors.py
@@ -389,7 +389,7 @@ def test_set_vector_unsafe(testvov):
         np.array([4, 8, 9, 7], dtype=testvov.dtype),
         np.array([5, 3, 1], dtype=testvov.dtype),
     ]
-    desired_aoa = np.zeros(shape=(5, 5), dtype=testvov.dtype)
+    desired_aoa = np.zeros(shape=(5, 4), dtype=testvov.dtype)
     desired_lens = np.array([len(arr) for arr in desired])
 
     # test sequential filling
@@ -404,6 +404,15 @@ def test_set_vector_unsafe(testvov):
     third_vov._set_vector_unsafe(0, desired_aoa, desired_lens)
     assert testvov == third_vov
 
+    # test vectorized filling when len is longer than array
+    fourth_vov = lgdo.VectorOfVectors(shape_guess=(5, 5), dtype=testvov.dtype)
+    desired_lens[3] = 10
+    fourth_vov._set_vector_unsafe(0, desired_aoa, desired_lens)
+    exp_entry_w_overflow = np.concatenate(
+        [desired[3], np.array([np.iinfo(testvov.dtype).min] * 6)]
+    )
+    assert np.all(fourth_vov[3] == exp_entry_w_overflow)
+
 
 def test_iter(testvov):
     testvov = testvov.v2d

From 51137a72acd5f6e93433e01bf98e237d368b0dcf Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Fri, 20 Dec 2024 10:31:58 -0800
Subject: [PATCH 20/27] Do not return current_i_entry when iterating

---
 src/lgdo/lh5/iterator.py            |  8 +++++---
 tests/lh5/test_lh5_iterator.py      | 14 ++++++++------
 tests/types/test_vectorofvectors.py |  2 +-
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/lgdo/lh5/iterator.py b/src/lgdo/lh5/iterator.py
index 0bae1d01..83793bc8 100644
--- a/src/lgdo/lh5/iterator.py
+++ b/src/lgdo/lh5/iterator.py
@@ -25,7 +25,7 @@ class LH5Iterator(typing.Iterator):
     This can be used as an iterator:
 
 
-    >>> for lh5_obj, entry in LH5Iterator(...):
+    >>> for lh5_obj in LH5Iterator(...):
     >>>    # do the thing!
 
     This is intended for if you are reading a large quantity of data. This
@@ -43,6 +43,8 @@ class LH5Iterator(typing.Iterator):
     In addition to accessing requested data via ``lh5_obj``, several
     properties exist to tell you where that data came from:
 
+    - lh5_it.current_i_entry: get the index within the entry list of the
+      first entry that is currently read
     - lh5_it.current_local_entries: get the entry numbers relative to the
       file the data came from
     - lh5_it.current_global_entries: get the entry number relative to the
@@ -52,7 +54,7 @@ class LH5Iterator(typing.Iterator):
 
     This class can also be used for random access:
 
-    >>> lh5_obj, n_rows = lh5_it.read(i_entry)
+    >>> lh5_obj = lh5_it.read(i_entry)
 
     to read the block of entries starting at i_entry. In case of multiple files
     or the use of an event selection, i_entry refers to a global event index
@@ -492,4 +494,4 @@ def __next__(self) -> tuple[LGDO, int, int]:
         if len(buf) == 0:
             raise StopIteration
         self.next_i_entry = self.current_i_entry + len(buf)
-        return (buf, self.current_i_entry)
+        return buf
diff --git a/tests/lh5/test_lh5_iterator.py b/tests/lh5/test_lh5_iterator.py
index a5f2f689..273612cf 100644
--- a/tests/lh5/test_lh5_iterator.py
+++ b/tests/lh5/test_lh5_iterator.py
@@ -31,7 +31,8 @@ def test_basics(lgnd_file):
         lh5_obj["baseline"].nda == np.array([14353, 14254, 14525, 11656, 13576])
     ).all()
 
-    for lh5_obj, entry in lh5_it:
+    for lh5_obj in lh5_it:
+        entry = lh5_it.current_i_entry
         assert len(lh5_obj) == 5
         assert entry % 5 == 0
         assert all(lh5_it.current_local_entries == np.arange(entry, entry + 5))
@@ -161,8 +162,9 @@ def test_iterate(more_lgnd_files):
         ],
     ]
 
-    for lh5_out, entry in lh5_it:
+    for lh5_out in lh5_it:
         assert set(lh5_out.keys()) == {"is_valid_0vbb", "timestamp", "zacEmax_ctc_cal"}
+        entry = lh5_it.current_i_entry
         assert entry % 5 == 0
         assert len(lh5_out) == 5
         assert all(lh5_it.current_local_entries == exp_loc_entries[entry // 5])
@@ -178,9 +180,9 @@ def test_iterate(more_lgnd_files):
         buffer_len=5,
     )
 
-    for lh5_out, entry in lh5_it:
+    for lh5_out in lh5_it:
         assert set(lh5_out.keys()) == {"is_valid_0vbb", "timestamp", "zacEmax_ctc_cal"}
-        assert entry % 5 == 0
+        assert lh5_it.current_i_entry % 5 == 0
         assert len(lh5_out) == 5
     print(lh5_it.get_global_entrylist())
     assert all(
@@ -197,9 +199,9 @@ def test_iterate(more_lgnd_files):
         buffer_len=5,
     )
 
-    for lh5_out, entry in lh5_it:
+    for lh5_out in lh5_it:
         assert set(lh5_out.keys()) == {"is_valid_0vbb", "timestamp", "zacEmax_ctc_cal"}
-        assert entry % 5 == 0
+        assert lh5_it.current_i_entry % 5 == 0
         assert len(lh5_out) == 5
 
     with pytest.raises(ValueError):
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
index f1b2e1b1..59b0f2fc 100644
--- a/tests/types/test_vectorofvectors.py
+++ b/tests/types/test_vectorofvectors.py
@@ -469,5 +469,5 @@ def test_lh5_iterator_view_as(lgnd_test_data):
         "ch1067205/dsp/energies",
     )
 
-    for obj, _ in it:
+    for obj in it:
         assert ak.is_valid(obj.view_as("ak"))

From 2e0f5973aab238ae9271a8b995a65caee75a4c72 Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Fri, 20 Dec 2024 10:35:34 -0800
Subject: [PATCH 21/27] Fixed tests

---
 tests/lh5/test_lh5_store.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py
index 75352772..15082dcf 100644
--- a/tests/lh5/test_lh5_store.py
+++ b/tests/lh5/test_lh5_store.py
@@ -292,10 +292,9 @@ def test_read_table_fancy_idx(lh5_file):
     assert isinstance(lh5_obj, types.Table)
     assert len(lh5_obj) == 2
 
-    lh5_obj, n_rows = store.read("/data/struct/table", lh5_file, idx=[])
+    lh5_obj = store.read("/data/struct/table", lh5_file, idx=[])
     assert isinstance(lh5_obj, types.Table)
-    assert n_rows == 0
-
+    assert len(lh5_obj) == 0
 
 def test_read_empty_struct(lh5_file):
     store = lh5.LH5Store()
@@ -460,14 +459,12 @@ def test_read_lgnd_vov_fancy_idx(lgnd_file):
     assert (lh5_obj.cumulative_length.nda == [1, 2, 3, 4, 5, 6, 7]).all()
     assert (lh5_obj.flattened_data.nda == [40, 60, 64, 60, 64, 28, 60]).all()
 
-    lh5_obj, n_rows = store.read("/geds/raw/tracelist", lgnd_file, idx=[])
+    lh5_obj = store.read("/geds/raw/tracelist", lgnd_file, idx=[])
     assert isinstance(lh5_obj, types.VectorOfVectors)
-    assert n_rows == 0
     assert len(lh5_obj) == 0
 
-    lh5_obj, n_rows = store.read("/geds/raw/tracelist", [lgnd_file] * 3, idx=[250])
+    lh5_obj = store.read("/geds/raw/tracelist", [lgnd_file] * 3, idx=[250])
     assert isinstance(lh5_obj, types.VectorOfVectors)
-    assert n_rows == 1
     assert len(lh5_obj) == 1
 
 

From 10193a973b7c1f8e484e9ac99cfed580e1195d88 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 20 Dec 2024 18:35:55 +0000
Subject: [PATCH 22/27] style: pre-commit fixes

---
 tests/lh5/test_lh5_store.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py
index 15082dcf..01958928 100644
--- a/tests/lh5/test_lh5_store.py
+++ b/tests/lh5/test_lh5_store.py
@@ -296,6 +296,7 @@ def test_read_table_fancy_idx(lh5_file):
     assert isinstance(lh5_obj, types.Table)
     assert len(lh5_obj) == 0
 
+
 def test_read_empty_struct(lh5_file):
     store = lh5.LH5Store()
     lh5_obj = store.read("/data/struct/empty_struct", lh5_file)

From 847c19cf5780bbd557b98bb9c12db840eb781bfc Mon Sep 17 00:00:00 2001
From: iguinn <iguinn@email.unc.edu>
Date: Tue, 14 Jan 2025 11:10:13 -0800
Subject: [PATCH 23/27] Fixed broken test

---
 src/lgdo/types/histogram.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lgdo/types/histogram.py b/src/lgdo/types/histogram.py
index dc6cf1c2..061a1d1e 100644
--- a/src/lgdo/types/histogram.py
+++ b/src/lgdo/types/histogram.py
@@ -424,7 +424,7 @@ def __setitem__(self, name: str, obj: LGDO) -> None:
             dict.__setitem__(self, name, obj)
         else:
             msg = "histogram fields cannot be mutated "
-            raise TypeError(msg)
+            raise AttributeError(msg)
 
     def __getattr__(self, name: str) -> None:
         # do not allow for new attributes on this

From 582960c448dee905a8530c1969dff6a96371e079 Mon Sep 17 00:00:00 2001
From: iguinn <guinnis@ornl.gov>
Date: Fri, 24 Jan 2025 09:06:39 -0800
Subject: [PATCH 24/27] Fixed tutorial notebook

---
 docs/source/notebooks/LH5Files.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/notebooks/LH5Files.ipynb b/docs/source/notebooks/LH5Files.ipynb
index a03e29b0..1a13dffb 100644
--- a/docs/source/notebooks/LH5Files.ipynb
+++ b/docs/source/notebooks/LH5Files.ipynb
@@ -191,8 +191,8 @@
    "source": [
     "from lgdo.lh5 import LH5Iterator\n",
     "\n",
-    "for lh5_obj, entry in LH5Iterator(lh5_file, \"geds/raw/energy\", buffer_len=20):\n",
-    "    print(f\"entry {entry}, energy = {lh5_obj} ({len(lh5_obj)} rows)\")"
+    "for lh5_obj in LH5Iterator(lh5_file, \"geds/raw/energy\", buffer_len=20):\n",
+    "    print(f\"energy = {lh5_obj} ({len(lh5_obj)} rows)\")"
    ]
   },
   {

From d1e92e8811eab5d95995362e331a22cf7a94f547 Mon Sep 17 00:00:00 2001
From: iguinn <guinnis@ornl.gov>
Date: Fri, 14 Feb 2025 11:42:07 -0800
Subject: [PATCH 25/27] Added ability to specify start and number of entries
 for iteration

---
 src/lgdo/lh5/iterator.py | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/src/lgdo/lh5/iterator.py b/src/lgdo/lh5/iterator.py
index 83793bc8..e023b614 100644
--- a/src/lgdo/lh5/iterator.py
+++ b/src/lgdo/lh5/iterator.py
@@ -68,6 +68,8 @@ def __init__(
         base_path: str = "",
         entry_list: list[int] | list[list[int]] | None = None,
         entry_mask: list[bool] | list[list[bool]] | None = None,
+        i_start: int = 0,
+        n_entries: int | None = None,
         field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
         buffer_len: int = "100*MB",
         file_cache: int = 10,
@@ -92,6 +94,10 @@ def __init__(
         entry_mask
             mask of entries to read. If a list of arrays is provided, expect
             one for each file. Ignore if a selection list is provided.
+        i_start
+            index of first entry to start at when iterating
+        n_entries
+            number of entries to read before terminating iteration
         field_mask
             mask of which fields to read. See :meth:`LH5Store.read` for
             more details.
@@ -186,6 +192,8 @@ def __init__(
             msg = f"can't open any files from {lh5_files}"
             raise RuntimeError(msg)
 
+        self.i_start = i_start
+        self.n_entries = n_entries
         self.current_i_entry = 0
         self.next_i_entry = 0
 
@@ -319,13 +327,22 @@ def get_global_entrylist(self) -> np.ndarray:
                 )
         return self.global_entry_list
 
-    def read(self, i_entry: int) -> LGDO:
+    def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
         "Read the nextlocal chunk of events, starting at entry."
-        i_file = np.searchsorted(self.entry_map, i_entry, "right")
         self.lh5_buffer.resize(0)
+        
+        if n_entries is None:
+            n_entries = self.buffer_len
+        elif n_entries==0:
+            return self.lh5_buffer
+        elif n_entries > self.buffer_len:
+            msg = "n_entries cannot be larger than buffer_len"
+            raise ValueError(msg)
+        
 
         # if file hasn't been opened yet, search through files
         # sequentially until we find the right one
+        i_file = np.searchsorted(self.entry_map, i_entry, "right")
         if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
             while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
                 i_file
@@ -336,7 +353,7 @@ def read(self, i_entry: int) -> LGDO:
             return self.lh5_buffer
         local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
 
-        while len(self.lh5_buffer) < self.buffer_len and i_file < len(self.file_map):
+        while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
             # Loop through files
             local_idx = self.get_file_entrylist(i_file)
             if local_idx is not None and len(local_idx) == 0:
@@ -349,7 +366,7 @@ def read(self, i_entry: int) -> LGDO:
                 self.groups[i_file],
                 self.lh5_files[i_file],
                 start_row=i_local,
-                n_rows=self.buffer_len - len(self.lh5_buffer),
+                n_rows=n_entries - len(self.lh5_buffer),
                 idx=local_idx,
                 field_mask=self.field_mask,
                 obj_buf=self.lh5_buffer,
@@ -485,12 +502,16 @@ def __len__(self) -> int:
     def __iter__(self) -> typing.Iterator:
         """Loop through entries in blocks of size buffer_len."""
         self.current_i_entry = 0
-        self.next_i_entry = 0
+        self.next_i_entry = self.i_start
         return self
 
     def __next__(self) -> tuple[LGDO, int, int]:
         """Read next buffer_len entries and return lh5_table and iterator entry."""
-        buf = self.read(self.next_i_entry)
+        n_entries = self.n_entries
+        if n_entries is not None:
+            n_entries = min(self.buffer_len, n_entries+self.i_start-self.next_i_entry)
+        
+        buf = self.read(self.next_i_entry, n_entries)
         if len(buf) == 0:
             raise StopIteration
         self.next_i_entry = self.current_i_entry + len(buf)

From 58d7872179c002731d79b2eb6e6d07b7d4538192 Mon Sep 17 00:00:00 2001
From: iguinn <guinnis@ornl.gov>
Date: Fri, 14 Feb 2025 11:42:53 -0800
Subject: [PATCH 26/27] Test use of start and n_entries for iterator

---
 tests/lh5/test_lh5_iterator.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/lh5/test_lh5_iterator.py b/tests/lh5/test_lh5_iterator.py
index 273612cf..f3de6183 100644
--- a/tests/lh5/test_lh5_iterator.py
+++ b/tests/lh5/test_lh5_iterator.py
@@ -219,3 +219,37 @@ def test_iterate(more_lgnd_files):
             field_mask=["is_valid_0vbb", "timestamp", "zacEmax_ctc_cal"],
             buffer_len=5,
         )
+
+
+def test_range(lgnd_file):
+    lh5_it = lh5.LH5Iterator(
+        lgnd_file,
+        "/geds/raw",
+        field_mask=["baseline"],
+        buffer_len=5,
+        i_start = 7,
+        n_entries = 13
+    )
+
+    # Test error when n_entries > buffer_len
+    with pytest.raises(ValueError):
+        lh5_obj = lh5_it.read(4, n_entries=7)
+        
+    lh5_obj = lh5_it.read(4, n_entries=3)
+    assert len(lh5_obj) == 3
+    assert isinstance(lh5_obj, lgdo.Table)
+    assert list(lh5_obj.keys()) == ["baseline"]
+    assert (
+        lh5_obj["baseline"].nda == np.array([14353, 14254, 14525])
+    ).all()
+
+    exp_i_entries = [7, 12, 17]
+    exp_lens = [5, 5, 3]
+    for lh5_obj, exp_i, exp_len in zip(lh5_it, exp_i_entries, exp_lens):
+        entry = lh5_it.current_i_entry
+        assert len(lh5_obj) == exp_len
+        assert entry == exp_i
+        assert all(lh5_it.current_local_entries == np.arange(entry, entry + exp_len))
+        assert all(lh5_it.current_global_entries == np.arange(entry, entry + exp_len))
+        assert all(lh5_it.current_files == [lgnd_file] * exp_len)
+        assert all(lh5_it.current_groups == ["/geds/raw"] * exp_len)

From ce1b6fe268e69ec71932368f3830abf791bf1d0e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 14 Feb 2025 19:43:25 +0000
Subject: [PATCH 27/27] style: pre-commit fixes

---
 src/lgdo/lh5/iterator.py       | 11 ++++++-----
 tests/lh5/test_lh5_iterator.py | 10 ++++------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/lgdo/lh5/iterator.py b/src/lgdo/lh5/iterator.py
index e023b614..6f4d4d46 100644
--- a/src/lgdo/lh5/iterator.py
+++ b/src/lgdo/lh5/iterator.py
@@ -330,15 +330,14 @@ def get_global_entrylist(self) -> np.ndarray:
     def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
         "Read the nextlocal chunk of events, starting at entry."
         self.lh5_buffer.resize(0)
-        
+
         if n_entries is None:
             n_entries = self.buffer_len
-        elif n_entries==0:
+        elif n_entries == 0:
             return self.lh5_buffer
         elif n_entries > self.buffer_len:
             msg = "n_entries cannot be larger than buffer_len"
             raise ValueError(msg)
-        
 
         # if file hasn't been opened yet, search through files
         # sequentially until we find the right one
@@ -509,8 +508,10 @@ def __next__(self) -> tuple[LGDO, int, int]:
         """Read next buffer_len entries and return lh5_table and iterator entry."""
         n_entries = self.n_entries
         if n_entries is not None:
-            n_entries = min(self.buffer_len, n_entries+self.i_start-self.next_i_entry)
-        
+            n_entries = min(
+                self.buffer_len, n_entries + self.i_start - self.next_i_entry
+            )
+
         buf = self.read(self.next_i_entry, n_entries)
         if len(buf) == 0:
             raise StopIteration
diff --git a/tests/lh5/test_lh5_iterator.py b/tests/lh5/test_lh5_iterator.py
index f3de6183..6d5e3e98 100644
--- a/tests/lh5/test_lh5_iterator.py
+++ b/tests/lh5/test_lh5_iterator.py
@@ -227,21 +227,19 @@ def test_range(lgnd_file):
         "/geds/raw",
         field_mask=["baseline"],
         buffer_len=5,
-        i_start = 7,
-        n_entries = 13
+        i_start=7,
+        n_entries=13,
     )
 
     # Test error when n_entries > buffer_len
     with pytest.raises(ValueError):
         lh5_obj = lh5_it.read(4, n_entries=7)
-        
+
     lh5_obj = lh5_it.read(4, n_entries=3)
     assert len(lh5_obj) == 3
     assert isinstance(lh5_obj, lgdo.Table)
     assert list(lh5_obj.keys()) == ["baseline"]
-    assert (
-        lh5_obj["baseline"].nda == np.array([14353, 14254, 14525])
-    ).all()
+    assert (lh5_obj["baseline"].nda == np.array([14353, 14254, 14525])).all()
 
     exp_i_entries = [7, 12, 17]
     exp_lens = [5, 5, 3]