From 694a8400f79c0cdd728b1147054288153260ac86 Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Mon, 18 Feb 2019 21:15:32 -0500 Subject: [PATCH 01/15] Implement the H5StoreManager class. Manages multiple instances of H5Store within a specific directory. Exposed as job.stores and project.stores. --- changelog.txt | 2 +- signac/contrib/job.py | 26 +++++---- signac/contrib/project.py | 27 +++++----- signac/core/dict_manager.py | 99 +++++++++++++++++++++++++++++++++++ signac/core/h5store.py | 10 ++++ tests/test_h5store_manager.py | 94 +++++++++++++++++++++++++++++++++ 6 files changed, 230 insertions(+), 28 deletions(-) create mode 100644 signac/core/dict_manager.py create mode 100644 tests/test_h5store_manager.py diff --git a/changelog.txt b/changelog.txt index 3d706c268..f4ca40989 100644 --- a/changelog.txt +++ b/changelog.txt @@ -24,7 +24,7 @@ Highlights Added +++++ - - Adds an ``H5Store`` class, useful for storing array-like data with an HDF5 backend. Accessible via ``with job.data:`` or ``with project.data:``. + - Adds an ``H5Store`` and a ``H5StoreManager`` class, useful for storing array-like data with an HDF5 backend. Accessible via ``with job.data:`` or ``with project.data:``. - Adds the ``signac.get_job()`` and the ``signac.Project.get_job()`` functions which allow users to get a job handle by switching into or providing the job's workspace directory. - Add automatic cast of numpy arrays to lists when storing them within a `JSONDict`, e.g., a `job.statepoint` or `job.document`. - Enable `Collection` class to manage collections stored in gzip files. diff --git a/signac/contrib/job.py b/signac/contrib/job.py index 8c71c3ec0..19bbb76e6 100644 --- a/signac/contrib/job.py +++ b/signac/contrib/job.py @@ -11,7 +11,7 @@ from ..core.json import json, CustomJSONEncoder from ..core.attrdict import SyncedAttrDict from ..core.jsondict import JSONDict -from ..core.h5store import H5Store +from ..core.h5store import H5StoreManager from .hashing import calc_id from .utility import _mkdir_p from .errors import DestinationExistsError, JobsCorruptedError @@ -53,8 +53,7 @@ class Job(object): FN_DOCUMENT = 'signac_job_document.json' "The job's document filename." - FN_DATA = 'signac_data.h5' - "The job's datastore filename." + KEY_DATA = 'signac_data' def __init__(self, project, statepoint, _id=None): self._project = project @@ -77,10 +76,6 @@ def __init__(self, project, statepoint, _id=None): self._fn_doc = os.path.join(self._wd, self.FN_DOCUMENT) self._document = None - # Prepare job datastore - self._fn_data = os.path.join(self._wd, self.FN_DATA) - self._data = None - # Prepare current working directory for context management self._cwd = list() @@ -163,7 +158,6 @@ def reset_statepoint(self, new_statepoint): self._wd = dst._wd self._fn_doc = dst._fn_doc self._document = None - self._fn_data = dst._fn_data self._data = None self._cwd = list() logger.info("Moved '{}' -> '{}'.".format(self, dst)) @@ -263,21 +257,25 @@ def doc(self): def doc(self, new_doc): self.document = new_doc + @property + def stores(self): + return H5StoreManager(self._wd) + @property def data(self): """The data associated with this job. + Equivalent to: + + return job.store['signac_data'] + :return: An HDF5-backed datastore. :rtype: :class:`~signac.core.h5store.H5Store`""" - if self._data is None: - self.init() - self._data = H5Store(filename=self._fn_data) - return self._data + return self.init().stores[self.KEY_DATA] @data.setter def data(self, new_data): - self._data.clear() - self._data.update(new_data) + self.stores[self.KEY_DATA] = new_data def _init(self, force=False): fn_manifest = os.path.join(self._wd, self.FN_MANIFEST) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 50460ad24..12f6bccf0 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -18,7 +18,7 @@ from .. import syncutil from ..core.json import json from ..core.jsondict import JSONDict -from ..core.h5store import H5Store +from ..core.h5store import H5StoreManager from .collection import Collection from ..common import six from ..common.config import load_config @@ -124,8 +124,8 @@ class Project(object): FN_DOCUMENT = 'signac_project_document.json' "The project's document filename." - FN_DATA = 'signac_data.h5' - "The project's datastore filename." + KEY_DATA = 'signac_data' + "The project's datastore key." FN_STATEPOINTS = 'signac_statepoints.json' "The default filename to read from and write statepoints to." @@ -147,10 +147,6 @@ def __init__(self, config=None): self._fn_doc = os.path.join(self._rd, self.FN_DOCUMENT) self._document = None - # Prepare project datastore - self._fn_data = os.path.join(self._rd, self.FN_DATA) - self._data = None - # Internal caches self._index_cache = dict() self._sp_cache = dict() @@ -305,21 +301,26 @@ def doc(self): def doc(self, new_doc): self.document = new_doc + @property + def stores(self): + return H5StoreManager(self._rd) + @property def data(self): """The data associated with this project. + Equivalent to: + + return project.store['signac_data'] + :return: An HDF5-backed datastore. - :rtype: :class:`~signac.core.h5store.H5Store` + :rtype: :class:`~signac.core.h5store.H5DictManager` """ - if self._data is None: - self._data = H5Store(filename=self._fn_data) - return self._data + return self.stores[self.KEY_DATA] @data.setter def data(self, new_data): - self._data.clear() - self._data.update(new_data) + self.stores[self.KEY_DATA] = new_data def open_job(self, statepoint=None, id=None): """Get a job handle associated with a statepoint. diff --git a/signac/core/dict_manager.py b/signac/core/dict_manager.py new file mode 100644 index 000000000..f6f499ff9 --- /dev/null +++ b/signac/core/dict_manager.py @@ -0,0 +1,99 @@ +# Copyright (c) 2019 The Regents of the University of Michigan +# All rights reserved. +# This software is licensed under the BSD 3-Clause License. +"Basic wrapper to access multiple different data stores." +import os +import re +import errno +import uuid + +from ..common import six + + +class DictManager(object): + + cls = None + suffix = None + + __slots__ = ['prefix'] + + def __init__(self, prefix): + assert self.cls is not None + assert self.suffix is not None + self.prefix = os.path.abspath(prefix) + + def __eq__(self, other): + return os.path.realpath(self.prefix) == os.path.realpath(other.prefix) and \ + self.suffix == other.suffix + + def __repr__(self): + return "{}(prefix='{}')".format(type(self).__name__, os.path.relpath(self.prefix)) + + __str__ = __repr__ + + def __getitem__(self, key): + return self.cls(os.path.join(self.prefix, key) + self.suffix) + + def __setitem__(self, key, value): + tmp_key = str(uuid.uuid4()) + try: + self[tmp_key].update(value) + if six.PY2: + os.rename(self[tmp_key].filename, self[key].filename) + else: + os.replace(self[tmp_key].filename, self[key].filename) + except (IOError, OSError) as error: + if error.errno == errno.ENOENT and not len(value): + raise ValueError("Cannot asssign empty value!") + else: + raise error + except Exception: + try: + del self[tmp_key] + except KeyError: + pass + raise + + def __delitem__(self, key): + try: + os.unlink(self[key].filename) + except IOError as error: + if error.errno == errno.ENOENT: + raise KeyError(key) + else: + raise error + + def __getattr__(self, name): + try: + return super(DictManager, self).__getattribute__(name) + except AttributeError: + if name.startswith('__') or name in self.__slots__: + raise + try: + return self.__getitem__(name) + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name, value): + if name.startswith('__') or name in self.__slots__: + super(DictManager, self).__setattr__(name, value) + else: + self.__setitem__(name, value) + + def __delattr__(self, name, value): + if name.startswith('__') or name in self.__slots__: + super(DictManager, self).__delattr__(name, value) + else: + self.__delitem__(name, value) + + def __iter__(self): + for fn in os.listdir(self.prefix): + m = re.match('(.*){}'.format(self.suffix), fn) + if m: + yield m.groups()[0] + + def keys(self): + return iter(self) + + def __len__(self): + return len(list(self.keys())) diff --git a/signac/core/h5store.py b/signac/core/h5store.py index 7f8fcbb02..d1954ac97 100644 --- a/signac/core/h5store.py +++ b/signac/core/h5store.py @@ -10,6 +10,7 @@ from threading import RLock from ..common import six +from .dict_manager import DictManager if six.PY2: from collections import Mapping @@ -284,6 +285,10 @@ def __init__(self, filename, **kwargs): self._file = None self._kwargs = kwargs + @property + def filename(self): + return self._filename + def __repr__(self): return "<{}(filename={})>".format(type(self).__name__, os.path.relpath(self._filename)) @@ -428,3 +433,8 @@ def clear(self): """ with _ensure_open(self): self._file.clear() + + +class H5StoreManager(DictManager): + cls = H5Store + suffix = '.h5' diff --git a/tests/test_h5store_manager.py b/tests/test_h5store_manager.py new file mode 100644 index 000000000..ac74e62ff --- /dev/null +++ b/tests/test_h5store_manager.py @@ -0,0 +1,94 @@ +# Copyright (c) 2019 The Regents of the University of Michigan +# All rights reserved. +# This software is licensed under the BSD 3-Clause License. +import os +import unittest + +from signac.core.h5store import H5StoreManager +from signac.common import six +if six.PY2: + from tempdir import TemporaryDirectory +else: + from tempfile import TemporaryDirectory + + +class TestH5StoreManager(unittest.TestCase): + + def setUp(self): + self._tmp_dir = TemporaryDirectory(prefix='h5store_') + self.addCleanup(self._tmp_dir.cleanup) + self.store = H5StoreManager(prefix=self._tmp_dir.name) + with open(os.path.join(self._tmp_dir.name, 'other_file.txt'), 'w') as file: + file.write(r'blank\n') + + def test_repr(self): + self.assertEqual(eval(repr(self.store)), self.store) + + def test_str(self): + self.assertEqual(eval(str(self.store)), self.store) + + def test_set(self): + self.assertEqual(len(self.store), 0) + self.assertNotIn('test', self.store) + for value in ('', [], {}): + with self.assertRaises(ValueError): + self.store['test'] = value + for value in (True, 0, 0.0, 1, 1.0, None): + with self.assertRaises(TypeError): + self.store['test'] = value + for value in ('abc'): + with self.assertRaises(ValueError): + self.store['test'] = value + + # Assigning a dictionary is the intended use case + self.store['test'] = dict(foo=True) + self.assertEqual(len(self.store), 1) + self.assertIn('test', self.store) + + def test_set_iterable(self): + self.assertEqual(len(self.store), 0) + self.assertNotIn('test', self.store) + self.store['test'] = list(dict(foo=True).items()) + self.assertEqual(len(self.store), 1) + self.assertIn('test', self.store) + + def test_set_get(self): + self.assertEqual(len(self.store), 0) + self.assertNotIn('test', self.store) + self.store['test']['foo'] = 'bar' + self.assertIn('test', self.store) + self.assertEqual(len(self.store), 1) + self.assertIn('foo', self.store['test']) + + def test_del(self): + self.assertEqual(len(self.store), 0) + self.assertNotIn('test', self.store) + self.store['test']['foo'] = 'bar' + self.assertIn('test', self.store) + self.assertEqual(len(self.store), 1) + self.assertIn('foo', self.store['test']) + with self.assertRaises(KeyError): + del self.store['invalid'] + del self.store['test'] + self.assertEqual(len(self.store), 0) + self.assertNotIn('test', self.store) + + def test_iteration(self): + keys = ['foo', 'bar', 'baz'] + for key in keys: + self.store[key] = dict(test=True) + self.assertEqual(list(sorted(keys)), list(sorted(self.store))) + self.assertEqual(list(sorted(keys)), list(sorted(self.store.keys()))) + + def test_contains(self): + keys = ['foo', 'bar', 'baz'] + for key in keys: + self.assertNotIn(key, self.store) + for key in keys: + self.store[key] = dict(test=True) + for key in keys: + self.assertIn(key, self.store) + + +if __name__ == '__main__': + unittest.main() From ccabe8f816b01d600298ae006e12a699cb71a79d Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Mon, 18 Feb 2019 21:21:38 -0500 Subject: [PATCH 02/15] Fix Py27 issue. --- signac/core/dict_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signac/core/dict_manager.py b/signac/core/dict_manager.py index f6f499ff9..f502a6bdf 100644 --- a/signac/core/dict_manager.py +++ b/signac/core/dict_manager.py @@ -57,7 +57,7 @@ def __setitem__(self, key, value): def __delitem__(self, key): try: os.unlink(self[key].filename) - except IOError as error: + except (IOError, OSError) as error: if error.errno == errno.ENOENT: raise KeyError(key) else: From 965e0eb0392b70e3e24953cca1143680aa590acc Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Mon, 18 Feb 2019 21:27:36 -0500 Subject: [PATCH 03/15] Fix exception handling bug in DictManager. --- signac/core/dict_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signac/core/dict_manager.py b/signac/core/dict_manager.py index f502a6bdf..7c4c7ac30 100644 --- a/signac/core/dict_manager.py +++ b/signac/core/dict_manager.py @@ -47,12 +47,12 @@ def __setitem__(self, key, value): raise ValueError("Cannot asssign empty value!") else: raise error - except Exception: + except Exception as error: try: del self[tmp_key] except KeyError: pass - raise + raise error def __delitem__(self, key): try: From 0a699fd5b63edcac2ed2038a26442ba2354c32ac Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Tue, 19 Feb 2019 11:47:32 -0500 Subject: [PATCH 04/15] Raise H5StoreClosedError when trying to flush a closed instance of H5Store. --- signac/core/h5store.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/signac/core/h5store.py b/signac/core/h5store.py index d1954ac97..f9942ebe5 100644 --- a/signac/core/h5store.py +++ b/signac/core/h5store.py @@ -364,7 +364,10 @@ def mode(self): def flush(self): """Flush the underlying HDF5-file.""" - self._file.flush() + if self._file is None: + raise H5StoreClosedError(self._filename) + else: + self._file.flush() def __getitem__(self, key): key = key if key.startswith('/') else '/' + key From 642ab512cdadd0d475f139a60b3e05d8f6c9fedc Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Tue, 19 Feb 2019 11:48:58 -0500 Subject: [PATCH 05/15] Employ registry in DictManager for reference count management. Register returned dicts in internal registry to ensure persistence of returned dicts over the lifetime of the manager instance. --- signac/core/dict_manager.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/signac/core/dict_manager.py b/signac/core/dict_manager.py index 7c4c7ac30..aec2d846d 100644 --- a/signac/core/dict_manager.py +++ b/signac/core/dict_manager.py @@ -15,12 +15,17 @@ class DictManager(object): cls = None suffix = None - __slots__ = ['prefix'] + __slots__ = ['_prefix', '_dict_registry'] def __init__(self, prefix): assert self.cls is not None assert self.suffix is not None - self.prefix = os.path.abspath(prefix) + self._prefix = os.path.abspath(prefix) + self._dict_registry = dict() + + @property + def prefix(self): + return self._prefix def __eq__(self, other): return os.path.realpath(self.prefix) == os.path.realpath(other.prefix) and \ @@ -32,7 +37,9 @@ def __repr__(self): __str__ = __repr__ def __getitem__(self, key): - return self.cls(os.path.join(self.prefix, key) + self.suffix) + if key not in self._dict_registry: + self._dict_registry[key] = self.cls(os.path.join(self.prefix, key) + self.suffix) + return self._dict_registry[key] def __setitem__(self, key, value): tmp_key = str(uuid.uuid4()) @@ -53,6 +60,8 @@ def __setitem__(self, key, value): except KeyError: pass raise error + else: + del self._dict_registry[key] def __delitem__(self, key): try: From e2598e7da96cc8caad08401e6b447580e8f1e3cb Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Tue, 19 Feb 2019 12:06:47 -0500 Subject: [PATCH 06/15] Add unit tests for custom data stores. In addition to testing 'job.data', we also test 'job.stores.test'. --- tests/test_job.py | 363 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) diff --git a/tests/test_job.py b/tests/test_job.py index dc5f80b6f..bfd678e60 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -1384,5 +1384,368 @@ def open_data(job): yield +class JobOpenCustomDataTest(BaseJobTest): + + @staticmethod + @contextmanager + def open_data(job): + with job.stores.test: + yield + + def test_get_set(self): + key = 'get_set' + d = testdata() + job = self.open_job(test_token) + with self.open_data(job): + self.assertFalse(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 0) + self.assertNotIn(key, job.stores.test) + job.stores.test[key] = d + self.assertTrue(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 1) + self.assertIn(key, job.stores.test) + self.assertEqual(job.stores.test[key], d) + self.assertEqual(job.stores.test.get(key), d) + self.assertEqual(job.stores.test.get('bs', d), d) + + def test_del(self): + key = 'del0' + key1 = 'del1' + d = testdata() + d1 = testdata() + job = self.open_job(test_token) + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + self.assertNotIn(key, job.stores.test) + job.stores.test[key] = d + self.assertEqual(len(job.stores.test), 1) + self.assertIn(key, job.stores.test) + job.stores.test[key1] = d1 + self.assertEqual(len(job.stores.test), 2) + self.assertIn(key, job.stores.test) + self.assertIn(key1, job.stores.test) + self.assertEqual(job.stores.test[key], d) + self.assertEqual(job.stores.test[key1], d1) + del job.stores.test[key] + self.assertEqual(len(job.stores.test), 1) + self.assertIn(key1, job.stores.test) + self.assertNotIn(key, job.stores.test) + + def test_get_set_data(self): + key = 'get_set' + d = testdata() + job = self.open_job(test_token) + with self.open_data(job): + self.assertFalse(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 0) + self.assertNotIn(key, job.stores.test) + job.stores.test[key] = d + self.assertTrue(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 1) + self.assertIn(key, job.stores.test) + self.assertEqual(job.stores.test[key], d) + self.assertEqual(job.stores.test.get(key), d) + self.assertEqual(job.stores.test.get('bs', d), d) + + def test_set_set_data(self): + key0, key1 = 'set_set0', 'set_set1' + d0, d1 = testdata(), testdata() + job = self.open_job(test_token) + with self.open_data(job): + self.assertFalse(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 0) + self.assertNotIn(key0, job.stores.test) + job.stores.test[key0] = d0 + self.assertTrue(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 1) + self.assertIn(key0, job.stores.test) + self.assertEqual(job.stores.test[key0], d0) + job = self.open_job(test_token) + with self.open_data(job): + self.assertTrue(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 1) + self.assertIn(key0, job.stores.test) + self.assertEqual(job.stores.test[key0], d0) + job = self.open_job(test_token) + with self.open_data(job): + job.stores.test[key1] = d1 + self.assertTrue(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 2) + self.assertIn(key0, job.stores.test) + self.assertIn(key1, job.stores.test) + self.assertEqual(job.stores.test[key0], d0) + self.assertEqual(job.stores.test[key1], d1) + + def test_get_set_nested(self): + d0 = testdata() + d1 = testdata() + d2 = testdata() + assert d0 != d1 != d2 + job = self.open_job(test_token) + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + self.assertNotIn('key0', job.stores.test) + job.stores.test['key0'] = d0 + self.assertEqual(len(job.stores.test), 1) + self.assertIn('key0', job.stores.test) + self.assertEqual(job.stores.test['key0'], d0) + with self.assertRaises(AttributeError): + job.stores.test.key0.key1 + job.stores.test.key0 = {'key1': d0} + self.assertEqual(len(job.stores.test), 1) + self.assertIn('key0', job.stores.test) + self.assertEqual(dict(job.stores.test), {'key0': {'key1': d0}}) + self.assertEqual(job.stores.test['key0'], {'key1': d0}) + self.assertEqual(job.stores.test['key0']['key1'], d0) + self.assertEqual(job.stores.test.key0, {'key1': d0}) + self.assertEqual(job.stores.test.key0.key1, d0) + job.stores.test.key0.key1 = d1 + self.assertEqual(job.stores.test, {'key0': {'key1': d1}}) + self.assertEqual(job.stores.test['key0'], {'key1': d1}) + self.assertEqual(job.stores.test['key0']['key1'], d1) + self.assertEqual(job.stores.test.key0, {'key1': d1}) + self.assertEqual(job.stores.test.key0.key1, d1) + job.stores.test['key0']['key1'] = d2 + self.assertEqual(job.stores.test, {'key0': {'key1': d2}}) + self.assertEqual(job.stores.test['key0'], {'key1': d2}) + self.assertEqual(job.stores.test['key0']['key1'], d2) + self.assertEqual(job.stores.test.key0, {'key1': d2}) + self.assertEqual(job.stores.test.key0.key1, d2) + + def test_get_set_nested_data(self): + d0 = testdata() + d1 = testdata() + d2 = testdata() + assert d0 != d1 != d2 + job = self.open_job(test_token) + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + self.assertNotIn('key0', job.stores.test) + job.stores.test['key0'] = d0 + self.assertEqual(len(job.stores.test), 1) + self.assertIn('key0', job.stores.test) + self.assertEqual(job.stores.test['key0'], d0) + with self.assertRaises(AttributeError): + job.stores.test.key0.key1 + job.stores.test.key0 = {'key1': d0} + self.assertEqual(len(job.stores.test), 1) + self.assertIn('key0', job.stores.test) + self.assertEqual(dict(job.stores.test), {'key0': {'key1': d0}}) + self.assertEqual(job.stores.test['key0'], {'key1': d0}) + self.assertEqual(job.stores.test['key0']['key1'], d0) + self.assertEqual(job.stores.test.key0, {'key1': d0}) + self.assertEqual(job.stores.test.key0.key1, d0) + job.stores.test.key0.key1 = d1 + self.assertEqual(job.stores.test, {'key0': {'key1': d1}}) + self.assertEqual(job.stores.test['key0'], {'key1': d1}) + self.assertEqual(job.stores.test['key0']['key1'], d1) + self.assertEqual(job.stores.test.key0, {'key1': d1}) + self.assertEqual(job.stores.test.key0.key1, d1) + job.stores.test['key0']['key1'] = d2 + self.assertEqual(job.stores.test, {'key0': {'key1': d2}}) + self.assertEqual(job.stores.test['key0'], {'key1': d2}) + self.assertEqual(job.stores.test['key0']['key1'], d2) + self.assertEqual(job.stores.test.key0, {'key1': d2}) + self.assertEqual(job.stores.test.key0.key1, d2) + + def test_assign(self): + key = 'assign' + d0 = testdata() + d1 = testdata() + job = self.open_job(test_token) + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + job.stores.test[key] = d0 + self.assertEqual(len(job.stores.test), 1) + self.assertEqual(dict(job.stores.test), {key: d0}) + with self.assertRaises(ValueError): + job.stores.test = d1 + job.stores.test = {key: d1} + self.assertEqual(len(job.stores.test), 1) + self.assertEqual(dict(job.stores.test), {key: d1}) + + def test_assign_data(self): + key = 'assign' + d0 = testdata() + d1 = testdata() + job = self.open_job(test_token) + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + job.stores.test[key] = d0 + self.assertEqual(len(job.stores.test), 1) + self.assertEqual(dict(job.stores.test), {key: d0}) + with self.assertRaises(ValueError): + job.stores.test = d1 + job.stores.test = {key: d1} + self.assertEqual(len(job.stores.test), 1) + self.assertEqual(dict(job.stores.test), {key: d1}) + + def test_copy_data(self): + key = 'get_set' + d = testdata() + job = self.open_job(test_token) + with self.open_data(job): + job.stores.test[key] = d + self.assertTrue(bool(job.stores.test)) + self.assertEqual(len(job.stores.test), 1) + self.assertIn(key, job.stores.test) + self.assertEqual(job.stores.test[key], d) + self.assertEqual(job.stores.test.get(key), d) + self.assertEqual(job.stores.test.get('bs', d), d) + copy = dict(job.stores.test) + self.assertTrue(bool(copy)) + self.assertEqual(len(copy), 1) + self.assertIn(key, copy) + self.assertEqual(copy[key], d) + self.assertEqual(copy.get(key), d) + self.assertEqual(copy.get('bs', d), d) + + def test_update(self): + key = 'get_set' + d = testdata() + job = self.open_job(test_token) + with self.open_data(job): + job.stores.test.update({key: d}) + self.assertIn(key, job.stores.test) + + def test_clear_data(self): + key = 'clear' + d = testdata() + job = self.open_job(test_token) + with self.open_data(job): + job.stores.test[key] = d + self.assertIn(key, job.stores.test) + self.assertEqual(len(job.stores.test), 1) + job.stores.test.clear() + self.assertNotIn(key, job.stores.test) + self.assertEqual(len(job.stores.test), 0) + + def test_reopen(self): + key = 'clear' + d = testdata() + job = self.open_job(test_token) + with self.open_data(job): + job.stores.test[key] = d + self.assertIn(key, job.stores.test) + self.assertEqual(len(job.stores.test), 1) + job2 = self.open_job(test_token) + with self.open_data(job2): + self.assertIn(key, job2.stores.test) + self.assertEqual(len(job2.stores.test), 1) + + def test_concurrency(self): + key = 'concurrent' + d = testdata() + job = self.open_job(test_token) + job2 = self.open_job(test_token) + with self.open_data(job): + with self.open_data(job2): + self.assertNotIn(key, job.stores.test) + self.assertNotIn(key, job2.stores.test) + job.stores.test[key] = d + self.assertIn(key, job.stores.test) + self.assertIn(key, job2.stores.test) + + def test_remove(self): + key = 'remove' + job = self.open_job(test_token) + job.remove() + d = testdata() + with self.open_data(job): + job.stores.test[key] = d + self.assertIn(key, job.stores.test) + self.assertEqual(len(job.stores.test), 1) + fn_test = os.path.join(job.workspace(), 'test') + with open(fn_test, 'w') as file: + file.write('test') + self.assertTrue(os.path.isfile(fn_test)) + job.remove() + with self.open_data(job): + self.assertNotIn(key, job.stores.test) + self.assertFalse(os.path.isfile(fn_test)) + + def test_clear_job(self): + key = 'clear' + job = self.open_job(test_token) + self.assertNotIn(job, self.project) + job.clear() + self.assertNotIn(job, self.project) + job.clear() + self.assertNotIn(job, self.project) + job.init() + self.assertIn(job, self.project) + job.clear() + self.assertIn(job, self.project) + job.clear() + job.clear() + self.assertIn(job, self.project) + d = testdata() + with self.open_data(job): + job.stores.test[key] = d + self.assertIn(job, self.project) + self.assertIn(key, job.stores.test) + self.assertEqual(len(job.stores.test), 1) + job.clear() + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + with open(job.fn('test'), 'w') as file: + file.write('test') + self.assertTrue(job.isfile('test')) + self.assertIn(job, self.project) + job.clear() + self.assertFalse(job.isfile('test')) + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + + def test_reset(self): + key = 'reset' + job = self.open_job(test_token) + self.assertNotIn(job, self.project) + job.reset() + self.assertIn(job, self.project) + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + job.stores.test[key] = testdata() + self.assertEqual(len(job.stores.test), 1) + job.reset() + self.assertIn(job, self.project) + with self.open_data(job): + self.assertEqual(len(job.stores.test), 0) + + def test_data(self): + key = 'test_data' + job = self.open_job(test_token) + + def check_content(key, d): + self.assertEqual(job.stores.test[key], d) + self.assertEqual(getattr(job.stores.test, key), d) + self.assertEqual(dict(job.stores.test)[key], d) + self.assertEqual(job.stores.test[key], d) + self.assertEqual(getattr(job.stores.test, key), d) + self.assertEqual(dict(job.stores.test)[key], d) + + with self.open_data(job): + d = testdata() + job.stores.test[key] = d + check_content(key, d) + d2 = testdata() + job.stores.test[key] = d2 + check_content(key, d2) + d3 = testdata() + job.stores.test[key] = d3 + check_content(key, d3) + d4 = testdata() + setattr(job.stores.test, key, d4) + check_content(key, d4) + + +class JobClosedCustomDataTest(JobOpenCustomDataTest): + + @staticmethod + @contextmanager + def open_data(job): + yield + + if __name__ == '__main__': unittest.main() From 7f0aef249dbb5b79a01a35a62ab04c439697b771 Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Tue, 19 Feb 2019 12:07:47 -0500 Subject: [PATCH 07/15] Fix dict persistency bug for job.stores. This patch fixes an issue that required users to explicitly hold a reference to each store returned from `job.stores`, otherwise the returned container would be immediatley deleted prior to use. --- signac/contrib/job.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/signac/contrib/job.py b/signac/contrib/job.py index 19bbb76e6..9d577778a 100644 --- a/signac/contrib/job.py +++ b/signac/contrib/job.py @@ -76,6 +76,9 @@ def __init__(self, project, statepoint, _id=None): self._fn_doc = os.path.join(self._wd, self.FN_DOCUMENT) self._document = None + # Prepare job h5-stores + self._stores = H5StoreManager(self._wd) + # Prepare current working directory for context management self._cwd = list() @@ -259,7 +262,7 @@ def doc(self, new_doc): @property def stores(self): - return H5StoreManager(self._wd) + return self.init()._stores @property def data(self): @@ -271,7 +274,7 @@ def data(self): :return: An HDF5-backed datastore. :rtype: :class:`~signac.core.h5store.H5Store`""" - return self.init().stores[self.KEY_DATA] + return self.stores[self.KEY_DATA] @data.setter def data(self, new_data): From 8c10735d7a71b10864727679ba7fcbac80d31b6b Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Tue, 19 Feb 2019 12:19:19 -0500 Subject: [PATCH 08/15] Fix pickling issue in DictManager. --- signac/core/dict_manager.py | 7 +++++++ tests/test_h5store_manager.py | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/signac/core/dict_manager.py b/signac/core/dict_manager.py index aec2d846d..31c8c404e 100644 --- a/signac/core/dict_manager.py +++ b/signac/core/dict_manager.py @@ -106,3 +106,10 @@ def keys(self): def __len__(self): return len(list(self.keys())) + + def __getstate__(self): + return dict(_prefix=self._prefix, _dict_registry=self._dict_registry) + + def __setstate__(self, d): + self._prefix = d['_prefix'] + self._dict_registry = d['_dict_registry'] diff --git a/tests/test_h5store_manager.py b/tests/test_h5store_manager.py index ac74e62ff..dd40ae2ad 100644 --- a/tests/test_h5store_manager.py +++ b/tests/test_h5store_manager.py @@ -3,6 +3,7 @@ # This software is licensed under the BSD 3-Clause License. import os import unittest +import pickle from signac.core.h5store import H5StoreManager from signac.common import six @@ -89,6 +90,9 @@ def test_contains(self): for key in keys: self.assertIn(key, self.store) + def test_pickle(self): + self.assertEqual(pickle.loads(pickle.dumps(self.store)), self.store) + if __name__ == '__main__': unittest.main() From dae9ea91910143aa9c42c3fb1677531c11e607f2 Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Tue, 19 Feb 2019 16:09:01 -0500 Subject: [PATCH 09/15] Fix bug causing the DictManager.__delattr__ to fail. --- signac/core/dict_manager.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/signac/core/dict_manager.py b/signac/core/dict_manager.py index 31c8c404e..e098a5901 100644 --- a/signac/core/dict_manager.py +++ b/signac/core/dict_manager.py @@ -89,11 +89,11 @@ def __setattr__(self, name, value): else: self.__setitem__(name, value) - def __delattr__(self, name, value): + def __delattr__(self, name): if name.startswith('__') or name in self.__slots__: - super(DictManager, self).__delattr__(name, value) + super(DictManager, self).__delattr__(name) else: - self.__delitem__(name, value) + self.__delitem__(name) def __iter__(self): for fn in os.listdir(self.prefix): From ed4ddde09599a29e3d91b0c1212c697753b64e4b Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Wed, 20 Feb 2019 10:54:47 -0500 Subject: [PATCH 10/15] Implement H5Store.__delattr__ method. --- signac/core/h5store.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/signac/core/h5store.py b/signac/core/h5store.py index f9942ebe5..10f022afb 100644 --- a/signac/core/h5store.py +++ b/signac/core/h5store.py @@ -400,6 +400,12 @@ def __setattr__(self, key, value): else: self.__setitem__(key, value) + def __delattr__(self, key): + if key.startswith('__') or key in self.__slots__: + super(H5Store, self).__delattr__(key) + else: + self.__delitem__(key) + def __iter__(self): with _ensure_open(self): # The generator below should be refactored to use 'yield from' From ce814b4246b0423623c385a1c96e178c677f77b2 Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Sun, 24 Feb 2019 11:19:26 -0500 Subject: [PATCH 11/15] Update changelog. --- changelog.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index f4ca40989..3e38c997e 100644 --- a/changelog.txt +++ b/changelog.txt @@ -24,7 +24,7 @@ Highlights Added +++++ - - Adds an ``H5Store`` and a ``H5StoreManager`` class, useful for storing array-like data with an HDF5 backend. Accessible via ``with job.data:`` or ``with project.data:``. + - Adds an ``H5Store`` and a ``H5StoreManager`` class, useful for storing array-like data with an HDF5-backend. Those classes are exposed as part of the ``job.data``, ``job.stores``, ``project.data``, and ``project.stores``, properties. - Adds the ``signac.get_job()`` and the ``signac.Project.get_job()`` functions which allow users to get a job handle by switching into or providing the job's workspace directory. - Add automatic cast of numpy arrays to lists when storing them within a `JSONDict`, e.g., a `job.statepoint` or `job.document`. - Enable `Collection` class to manage collections stored in gzip files. From 32b1b1b9e4f9b2a62c6ac8745ec8accf174bc320 Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Sun, 24 Feb 2019 16:44:57 -0500 Subject: [PATCH 12/15] Update Job.data/stores and Project.data/stores doc strings. --- signac/contrib/job.py | 26 ++++++++++++++++++++++++-- signac/contrib/project.py | 24 +++++++++++++++++++++++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/signac/contrib/job.py b/signac/contrib/job.py index 9d577778a..3ddb44e11 100644 --- a/signac/contrib/job.py +++ b/signac/contrib/job.py @@ -262,6 +262,28 @@ def doc(self, new_doc): @property def stores(self): + """Access HDF5-stores associated wit this job. + + Use this property to access an HDF5-file within the job's workspace + directory using the H5Store dict-like interface. + + This is an example for accessing an HDF5-file called 'my_data.h5' within + the job's workspace: + + job.stores['my_data']['array'] = np.random((32, 4)) + + This is equivalent to: + + H5Store(job.fn('my_data.h5'))['array'] = np.random((32, 4)) + + Both the `job.stores` and the `H5Store` itself support attribute + access. The above example could therefore also be expressed as + + job.stores.my_data.array = np.random((32, 4)) + + :return: The HDF5-Store manager for this job. + :rtype: :class:`~..core.h5store.H5StoreManager + """ return self.init()._stores @property @@ -270,10 +292,10 @@ def data(self): Equivalent to: - return job.store['signac_data'] + return job.stores['signac_data'] :return: An HDF5-backed datastore. - :rtype: :class:`~signac.core.h5store.H5Store`""" + :rtype: :class:`~..core.h5store.H5Store`""" return self.stores[self.KEY_DATA] @data.setter diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 12f6bccf0..7ad679fe0 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -303,6 +303,28 @@ def doc(self, new_doc): @property def stores(self): + """Access HDF5-stores associated wit this project. + + Use this property to access an HDF5-file within the project's root + directory using the H5Store dict-like interface. + + This is an example for accessing an HDF5-file called 'my_data.h5' within + the project's root directory: + + project.stores['my_data']['array'] = np.random((32, 4)) + + This is equivalent to: + + H5Store(project.fn('my_data.h5'))['array'] = np.random((32, 4)) + + Both the `project.stores` and the `H5Store` itself support attribute + access. The above example could therefore also be expressed as + + project.stores.my_data.array = np.random((32, 4)) + + :return: The HDF5-Store manager for this project. + :rtype: :class:`~..core.h5store.H5StoreManager + """ return H5StoreManager(self._rd) @property @@ -314,7 +336,7 @@ def data(self): return project.store['signac_data'] :return: An HDF5-backed datastore. - :rtype: :class:`~signac.core.h5store.H5DictManager` + :rtype: :class:`~..core.h5store.H5Store` """ return self.stores[self.KEY_DATA] From eb5bd7369ef0c98e590eebd40424daaa983186b6 Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Sun, 24 Feb 2019 16:46:14 -0500 Subject: [PATCH 13/15] Improve documentation of DictManager constructor. --- signac/core/dict_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signac/core/dict_manager.py b/signac/core/dict_manager.py index e098a5901..eb6293f8d 100644 --- a/signac/core/dict_manager.py +++ b/signac/core/dict_manager.py @@ -18,8 +18,8 @@ class DictManager(object): __slots__ = ['_prefix', '_dict_registry'] def __init__(self, prefix): - assert self.cls is not None - assert self.suffix is not None + assert self.cls is not None, "Subclasses of DictManager must define the cls variable." + assert self.suffix is not None, "Subclasses of DictManager must define the suffix variable." self._prefix = os.path.abspath(prefix) self._dict_registry = dict() From fa32152f938dbf57181de1c2bba1bea6faf96bd2 Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Sun, 24 Feb 2019 17:01:46 -0500 Subject: [PATCH 14/15] Add unit tests for job.stores implicit initialization. --- tests/test_job.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_job.py b/tests/test_job.py index bfd678e60..be24925b2 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -1383,6 +1383,19 @@ class JobClosedDataTest(JobOpenDataTest): def open_data(job): yield + def test_implicit_initialization(self): + job = self.open_job(test_token) + self.assertNotIn('test', job.stores) + self.assertNotIn('foo', job.stores.test) + self.assertEqual(list(job.stores.keys()), []) + self.assertEqual(list(job.stores), []) + self.assertNotIn('test', job.stores) + job.stores.test.foo = True + self.assertIn('test', job.stores) + self.assertIn('foo', job.stores.test) + self.assertEqual(list(job.stores.keys()), ['test']) + self.assertEqual(list(job.stores), ['test']) + class JobOpenCustomDataTest(BaseJobTest): From f42ed84d3e44ef803b4a52d17e4991986b7c2553 Mon Sep 17 00:00:00 2001 From: Carl Simon Adorf Date: Sun, 24 Feb 2019 17:11:06 -0500 Subject: [PATCH 15/15] Fix a key name in a job unit test. --- tests/test_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_job.py b/tests/test_job.py index be24925b2..87dad6926 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -1634,7 +1634,7 @@ def test_clear_data(self): self.assertEqual(len(job.stores.test), 0) def test_reopen(self): - key = 'clear' + key = 'reopen' d = testdata() job = self.open_job(test_token) with self.open_data(job):