Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move written flag from builder to HDF5IO, add tests #381

Merged
merged 8 commits into from
Jun 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# HDMF Changelog

## HDMF 2.0.0 (Upcoming)

### Internal improvements

#### Breaking changes
- `Builder` objects no longer have the `written` field which was used by `HDF5IO` to mark the object as written. This
is replaced by `HDF5IO.get_written`. @rly (#381)

## HDMF 1.6.4 (Upcoming)

### Bug fixes:
Expand Down
60 changes: 44 additions & 16 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def __init__(self, **kwargs):
self.__ref_queue = deque() # a queue of the references that need to be added
self.__dci_queue = deque() # a queue of DataChunkIterators that need to be exhausted
ObjectMapper.no_convert(Dataset)
self._written_builders = dict() # keep track of which builders were written (or read) by this IO object

@property
def comm(self):
Expand Down Expand Up @@ -344,6 +345,33 @@ def read_builder(self):
self.__read[self.__file] = f_builder
return f_builder

def __set_written(self, builder):
"""
Mark this builder as written.

:param builder: Builder object to be marked as written
:type builder: Builder
"""
# currently all values in self._written_builders are True, so this could be a set but is a dict for
# future flexibility
builder_id = self.__builderhash(builder)
self._written_builders[builder_id] = True

def get_written(self, builder):
"""Return True if this builder has been written to (or read from) disk by this IO object, False otherwise.

:param builder: Builder object to get the written flag for
:type builder: Builder

:return: True if the builder is found in self._written_builders using the builder ID, False otherwise
"""
builder_id = self.__builderhash(builder)
return self._written_builders.get(builder_id, False)

def __builderhash(self, obj):
Copy link
Contributor

@ajtritt ajtritt Jun 16, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same function is implemented here:

def __bldrhash__(self, obj):
return id(obj)

Not sure if it should be generalized, by just adding __hash__ to Builder. It seems simple enough that its not a big deal to leave as is, but thought I would mention it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's where I got it from. I figured there might have been a reason that it was on the BuildManager rather than on Builder in the first place, e.g., maybe builders should by default have a hash that is based on its contents, so I left it and re-implemented it here.

"""Return the ID of a builder for use as a unique hash."""
return id(obj)

def __set_built(self, fpath, id, builder):
"""
Update self.__built to cache the given builder for the given file and id.
Expand Down Expand Up @@ -440,7 +468,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
self.__set_built(sub_h5obj.file.filename, sub_h5obj.file[target_path].id, builder)
builder.location = parent_loc
link_builder = LinkBuilder(builder, k, source=h5obj.file.filename)
link_builder.written = True
self.__set_written(link_builder)
kwargs['links'][builder_name] = link_builder
else:
builder = self.__get_built(sub_h5obj.file.filename, sub_h5obj.id)
Expand All @@ -462,7 +490,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
continue
kwargs['source'] = h5obj.file.filename
ret = GroupBuilder(name, **kwargs)
ret.written = True
self.__set_written(ret)
return ret

def __read_dataset(self, h5obj, name=None):
Expand Down Expand Up @@ -515,7 +543,7 @@ def __read_dataset(self, h5obj, name=None):
else:
kwargs["data"] = h5obj
ret = DatasetBuilder(name, **kwargs)
ret.written = True
self.__set_written(ret)
return ret

def __read_attrs(self, h5obj):
Expand Down Expand Up @@ -573,6 +601,7 @@ def write_builder(self, **kwargs):
self.set_attributes(self.__file, f_builder.attributes)
self.__add_refs()
self.__exhaust_dcis()
self.__set_written(f_builder)

def __add_refs(self):
'''
Expand Down Expand Up @@ -726,7 +755,7 @@ def _filler():
def write_group(self, **kwargs):
parent, builder, exhaust_dci = getargs('parent', 'builder', 'exhaust_dci', kwargs)
self.logger.debug("Writing GroupBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
if builder.written:
if self.get_written(builder):
group = parent[builder.name]
else:
group = parent.create_group(builder.name)
Expand All @@ -748,7 +777,7 @@ def write_group(self, **kwargs):
self.write_link(group, sub_builder)
attributes = builder.attributes
self.set_attributes(group, attributes)
builder.written = True
self.__set_written(builder)
return group

def __get_path(self, builder):
Expand All @@ -767,7 +796,7 @@ def __get_path(self, builder):
def write_link(self, **kwargs):
parent, builder = getargs('parent', 'builder', kwargs)
self.logger.debug("Writing LinkBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
if builder.written:
if self.get_written(builder):
return None
name = builder.name
target_builder = builder.builder
Expand All @@ -790,7 +819,7 @@ def write_link(self, **kwargs):
msg = 'cannot create external link to %s' % path
raise ValueError(msg)
parent[name] = link_obj
builder.written = True
self.__set_written(builder)
return link_obj

@docval({'name': 'parent', 'type': Group, 'doc': 'the parent HDF5 object'},
Expand All @@ -808,7 +837,7 @@ def write_dataset(self, **kwargs): # noqa: C901
"""
parent, builder, link_data, exhaust_dci = getargs('parent', 'builder', 'link_data', 'exhaust_dci', kwargs)
self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
if builder.written:
if self.get_written(builder):
return None
name = builder.name
data = builder.data
Expand Down Expand Up @@ -860,7 +889,7 @@ def write_dataset(self, **kwargs): # noqa: C901
msg = 'cannot add %s to %s - could not determine type' % (name, parent.name)
raise Exception(msg) from exc
dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings'])
builder.written = True
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing references. attributes: %s"
% (name, list(attributes.keys())))

Expand All @@ -887,7 +916,7 @@ def _filler():
# Write a scalar data region reference dataset
if isinstance(data, RegionBuilder):
dset = parent.require_dataset(name, shape=(), dtype=_dtype)
builder.written = True
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing a region reference. "
"attributes: %s" % (name, list(attributes.keys())))

Expand All @@ -900,7 +929,7 @@ def _filler():
# Write a scalar object reference dataset
elif isinstance(data, ReferenceBuilder):
dset = parent.require_dataset(name, dtype=_dtype, shape=())
builder.written = True
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing an object reference. "
"attributes: %s" % (name, list(attributes.keys())))

Expand All @@ -915,7 +944,7 @@ def _filler():
# Write a array of region references
if options['dtype'] == 'region':
dset = parent.require_dataset(name, dtype=_dtype, shape=(len(data),), **options['io_settings'])
builder.written = True
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing region references. "
"attributes: %s" % (name, list(attributes.keys())))

Expand All @@ -929,8 +958,8 @@ def _filler():
self.set_attributes(dset, attributes)
# Write array of object references
else:
dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, ** options['io_settings'])
builder.written = True
dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings'])
self.__set_written(builder)
self.logger.debug("Queueing set attribute on dataset '%s' containing object references. "
"attributes: %s" % (name, list(attributes.keys())))

Expand Down Expand Up @@ -964,10 +993,9 @@ def _filler():
# Validate the attributes on the linked dataset
elif len(attributes) > 0:
pass
builder.written = True
self.__set_written(builder)
if exhaust_dci:
self.__exhaust_dcis()
return

@classmethod
def __scalar_fill__(cls, parent, name, data, options=None):
Expand Down
12 changes: 0 additions & 12 deletions src/hdmf/build/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def __init__(self, **kwargs):
self.__source = parent.source
else:
self.__source = None
self.__written = False

@property
def path(self):
Expand All @@ -42,17 +41,6 @@ def path(self):
c = c.parent
return "/".join(s[::-1])

@property
def written(self):
''' The source of this Builder '''
return self.__written

@written.setter
def written(self, s):
if self.__written and not s:
raise ValueError("cannot change written to not written")
self.__written = s

@property
def name(self):
''' The name of this Builder '''
Expand Down
9 changes: 0 additions & 9 deletions tests/unit/test_io_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,6 @@ def test_read_builder(self):
self.assertBuilderEqual(builder, self.builder)
io.close()

def test_overwrite_written(self):
self.maxDiff = None
io = HDF5IO(self.path, manager=self.manager, mode='a')
io.write_builder(self.builder)
builder = io.read_builder()
with self.assertRaisesWith(ValueError, "cannot change written to not written"):
builder.written = False
io.close()

def test_dataset_shape(self):
self.maxDiff = None
io = HDF5IO(self.path, manager=self.manager, mode='a')
Expand Down
Loading