Skip to content

Commit

Permalink
Move written flag from builder to HDF5IO, add tests (#381)
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Jun 16, 2020
1 parent 3fa0587 commit 4739b5a
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 51 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# HDMF Changelog

## HDMF 2.0.0 (Upcoming)

### Internal improvements

#### Breaking changes
- `Builder` objects no longer have the `written` field which was used by `HDF5IO` to mark the object as written. This
is replaced by `HDF5IO.get_written`. @rly (#381)

## HDMF 1.6.4 (Upcoming)

### Bug fixes:
Expand Down
60 changes: 44 additions & 16 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def __init__(self, **kwargs):
self.__ref_queue = deque() # a queue of the references that need to be added
self.__dci_queue = deque() # a queue of DataChunkIterators that need to be exhausted
ObjectMapper.no_convert(Dataset)
self._written_builders = dict() # keep track of which builders were written (or read) by this IO object

@property
def comm(self):
Expand Down Expand Up @@ -344,6 +345,33 @@ def read_builder(self):
self.__read[self.__file] = f_builder
return f_builder

def __set_written(self, builder):
"""
Mark this builder as written.
:param builder: Builder object to be marked as written
:type builder: Builder
"""
# currently all values in self._written_builders are True, so this could be a set but is a dict for
# future flexibility
builder_id = self.__builderhash(builder)
self._written_builders[builder_id] = True

def get_written(self, builder):
"""Return True if this builder has been written to (or read from) disk by this IO object, False otherwise.
:param builder: Builder object to get the written flag for
:type builder: Builder
:return: True if the builder is found in self._written_builders using the builder ID, False otherwise
"""
builder_id = self.__builderhash(builder)
return self._written_builders.get(builder_id, False)

def __builderhash(self, obj):
"""Return the ID of a builder for use as a unique hash."""
return id(obj)

def __set_built(self, fpath, id, builder):
"""
Update self.__built to cache the given builder for the given file and id.
Expand Down Expand Up @@ -440,7 +468,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
self.__set_built(sub_h5obj.file.filename, sub_h5obj.file[target_path].id, builder)
builder.location = parent_loc
link_builder = LinkBuilder(builder, k, source=h5obj.file.filename)
link_builder.written = True
self.__set_written(link_builder)
kwargs['links'][builder_name] = link_builder
else:
builder = self.__get_built(sub_h5obj.file.filename, sub_h5obj.id)
Expand All @@ -462,7 +490,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
continue
kwargs['source'] = h5obj.file.filename
ret = GroupBuilder(name, **kwargs)
ret.written = True
self.__set_written(ret)
return ret

def __read_dataset(self, h5obj, name=None):
Expand Down Expand Up @@ -515,7 +543,7 @@ def __read_dataset(self, h5obj, name=None):
else:
kwargs["data"] = h5obj
ret = DatasetBuilder(name, **kwargs)
ret.written = True
self.__set_written(ret)
return ret

def __read_attrs(self, h5obj):
Expand Down Expand Up @@ -573,6 +601,7 @@ def write_builder(self, **kwargs):
self.set_attributes(self.__file, f_builder.attributes)
self.__add_refs()
self.__exhaust_dcis()
self.__set_written(f_builder)

def __add_refs(self):
'''
Expand Down Expand Up @@ -726,7 +755,7 @@ def _filler():
def write_group(self, **kwargs):
parent, builder, exhaust_dci = getargs('parent', 'builder', 'exhaust_dci', kwargs)
self.logger.debug("Writing GroupBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
if builder.written:
if self.get_written(builder):
group = parent[builder.name]
else:
group = parent.create_group(builder.name)
Expand All @@ -748,7 +777,7 @@ def write_group(self, **kwargs):
self.write_link(group, sub_builder)
attributes = builder.attributes
self.set_attributes(group, attributes)
builder.written = True
self.__set_written(builder)
return group

def __get_path(self, builder):
Expand All @@ -767,7 +796,7 @@ def __get_path(self, builder):
def write_link(self, **kwargs):
parent, builder = getargs('parent', 'builder', kwargs)
self.logger.debug("Writing LinkBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
if builder.written:
if self.get_written(builder):
return None
name = builder.name
target_builder = builder.builder
Expand All @@ -790,7 +819,7 @@ def write_link(self, **kwargs):
msg = 'cannot create external link to %s' % path
raise ValueError(msg)
parent[name] = link_obj
builder.written = True
self.__set_written(builder)
return link_obj

@docval({'name': 'parent', 'type': Group, 'doc': 'the parent HDF5 object'},
Expand All @@ -808,7 +837,7 @@ def write_dataset(self, **kwargs): # noqa: C901
"""
parent, builder, link_data, exhaust_dci = getargs('parent', 'builder', 'link_data', 'exhaust_dci', kwargs)
self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
if builder.written:
if self.get_written(builder):
return None
name = builder.name
data = builder.data
Expand Down Expand Up @@ -860,7 +889,7 @@ def write_dataset(self, **kwargs): # noqa: C901
msg = 'cannot add %s to %s - could not determine type' % (name, parent.name)
raise Exception(msg) from exc
dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings'])
builder.written = True
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing references. attributes: %s"
% (name, list(attributes.keys())))

Expand All @@ -887,7 +916,7 @@ def _filler():
# Write a scalar data region reference dataset
if isinstance(data, RegionBuilder):
dset = parent.require_dataset(name, shape=(), dtype=_dtype)
builder.written = True
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing a region reference. "
"attributes: %s" % (name, list(attributes.keys())))

Expand All @@ -900,7 +929,7 @@ def _filler():
# Write a scalar object reference dataset
elif isinstance(data, ReferenceBuilder):
dset = parent.require_dataset(name, dtype=_dtype, shape=())
builder.written = True
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing an object reference. "
"attributes: %s" % (name, list(attributes.keys())))

Expand All @@ -915,7 +944,7 @@ def _filler():
# Write a array of region references
if options['dtype'] == 'region':
dset = parent.require_dataset(name, dtype=_dtype, shape=(len(data),), **options['io_settings'])
builder.written = True
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing region references. "
"attributes: %s" % (name, list(attributes.keys())))

Expand All @@ -929,8 +958,8 @@ def _filler():
self.set_attributes(dset, attributes)
# Write array of object references
else:
dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, ** options['io_settings'])
builder.written = True
dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings'])
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing object references. "
"attributes: %s" % (name, list(attributes.keys())))

Expand Down Expand Up @@ -964,10 +993,9 @@ def _filler():
# Validate the attributes on the linked dataset
elif len(attributes) > 0:
pass
builder.written = True
self.__set_written(builder)
if exhaust_dci:
self.__exhaust_dcis()
return

@classmethod
def __scalar_fill__(cls, parent, name, data, options=None):
Expand Down
12 changes: 0 additions & 12 deletions src/hdmf/build/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def __init__(self, **kwargs):
self.__source = parent.source
else:
self.__source = None
self.__written = False

@property
def path(self):
Expand All @@ -42,17 +41,6 @@ def path(self):
c = c.parent
return "/".join(s[::-1])

@property
def written(self):
''' The source of this Builder '''
return self.__written

@written.setter
def written(self, s):
if self.__written and not s:
raise ValueError("cannot change written to not written")
self.__written = s

@property
def name(self):
''' The name of this Builder '''
Expand Down
9 changes: 0 additions & 9 deletions tests/unit/test_io_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,6 @@ def test_read_builder(self):
self.assertBuilderEqual(builder, self.builder)
io.close()

def test_overwrite_written(self):
self.maxDiff = None
io = HDF5IO(self.path, manager=self.manager, mode='a')
io.write_builder(self.builder)
builder = io.read_builder()
with self.assertRaisesWith(ValueError, "cannot change written to not written"):
builder.written = False
io.close()

def test_dataset_shape(self):
self.maxDiff = None
io = HDF5IO(self.path, manager=self.manager, mode='a')
Expand Down
Loading

0 comments on commit 4739b5a

Please sign in to comment.