From a91fb16a013f2f79b6196cd9439d132ffc7c159c Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 18 Jul 2019 01:24:40 -0700 Subject: [PATCH 1/9] Add NWBZarrIO class --- src/pynwb/__init__.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 30a39ecb8..b9588374f 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -13,6 +13,7 @@ from hdmf.utils import docval, getargs, popargs, call_docval_func # noqa: E402 from hdmf.backends.io import HDMFIO # noqa: E402 from hdmf.backends.hdf5 import HDF5IO as _HDF5IO # noqa: E402 +from hdmf.backends.zarr.zarr_tools import ZarrIO as _ZarrIO from hdmf.validate import ValidatorMap # noqa: E402 from hdmf.build import BuildManager # noqa: E402 @@ -231,6 +232,44 @@ def __init__(self, **kwargs): manager = get_manager() super(NWBHDF5IO, self).__init__(path, manager=manager, mode=mode, file=file_obj, comm=comm) +class NWBZarrIO(_ZarrIO): + + @docval({'name': 'path', 'type': str, 'doc': 'the path to the Zarr file'}, + {'name': 'mode', 'type': str, + 'doc': 'the mode to open the Zarr file with, one of ("w", "r", "r+", "a", "w-")'}, + {'name': 'load_namespaces', 'type': bool, + 'doc': 'whether or not to load cached namespaces from given path - not applicable in write mode', + 'default': False}, + {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, + {'name': 'extensions', 'type': (str, TypeMap, list), + 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths \ + to namespaces and TypeMaps', 'default': None}, + {'name': 'comm', 'type': 'Intracomm', + 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}) + def __init__(self, **kwargs): + path, mode, manager, extensions, load_namespaces, file_obj, comm =\ + popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'file', 'comm', kwargs) + if load_namespaces: + if manager is not None: + warn("loading namespaces from file - ignoring 'manager'") + if extensions is not None: + warn("loading namespaces from file - ignoring 'extensions' argument") + # namespaces are not loaded when creating an NWBHDF5IO object in write mode + if 'w' in mode or mode == 'x': + raise ValueError("cannot load namespaces from file when writing to it") + + tm = get_type_map() + super(NWBZarrIO, self).load_namespaces(tm, path) + manager = BuildManager(tm) + else: + if manager is not None and extensions is not None: + raise ValueError("'manager' and 'extensions' cannot be specified together") + elif extensions is not None: + manager = get_manager(extensions=extensions) + elif manager is None: + manager = get_manager() + super(NWBZarrIO, self).__init__(path, manager=manager, mode=mode, file=file_obj, comm=comm) + from . import io as __io # noqa: F401,E402 from .core import NWBContainer, NWBData # noqa: F401,E402 From 25127b400539e312ec05a095ca27df8febf5ad68 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 18 Jul 2019 01:25:39 -0700 Subject: [PATCH 2/9] Add rountrip write-only test for ZarrIO --- tests/integration/ui_write/base.py | 43 +++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/tests/integration/ui_write/base.py b/tests/integration/ui_write/base.py index d5d1e315b..f6c396794 100644 --- a/tests/integration/ui_write/base.py +++ b/tests/integration/ui_write/base.py @@ -4,10 +4,12 @@ import os import numpy as np import h5py +import shutil import numpy.testing as npt -from pynwb import NWBContainer, get_manager, NWBFile, NWBData, NWBHDF5IO, validate as pynwb_validate +from pynwb import NWBContainer, get_manager, NWBFile, NWBData, NWBHDF5IO, NWBZarrIO, validate as pynwb_validate from hdmf.backends.hdf5 import HDF5IO +from hdmf.backends.zarr import ZarrIO CORE_NAMESPACE = 'core' @@ -147,7 +149,11 @@ def setUp(self): self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc()) self.create_date = datetime(2018, 4, 15, 12, tzinfo=tzlocal()) self.container_type = self.container.__class__.__name__ - self.filename = 'test_%s.nwb' % self.container_type + test_case_name = str(self.id()).split(".")[-1] + if test_case_name == "test_zarr_roundtrip": + self.filename = 'test_zarrio_%s' % self.container_type + else: + self.filename = 'test_%s.nwb' % self.container_type self.writer = None self.reader = None @@ -157,7 +163,10 @@ def tearDown(self): if self.reader is not None: self.reader.close() if os.path.exists(self.filename) and os.getenv("CLEAN_NWB", '1') not in ('0', 'false', 'FALSE', 'False'): - os.remove(self.filename) + if os.path.isfile(self.filename): + os.remove(self.filename) + elif os.path.isdir(self.filename): + shutil.rmtree(self.filename) def roundtripContainer(self, cache_spec=False): description = 'a file to test writing and reading a %s' % self.container_type @@ -187,6 +196,34 @@ def test_roundtrip(self): self.assertContainerEqual(self.read_container, self.container) self.validate() + def roundtripContainerZarrIO(self, cache_spec=False): + description = 'a file to test writing and reading a %s' % self.container_type + identifier = 'TEST_%s' % self.container_type + nwbfile = NWBFile(description, identifier, self.start_time, file_create_date=self.create_date) + self.addContainer(nwbfile) + + self.writer = ZarrIO(self.filename, manager=get_manager(), mode='w') + self.writer.write(nwbfile, cache_spec=cache_spec) + self.writer.close() + #self.reader = ZarrIO(self.filename, manager=get_manager(), mode='r') + #self.read_nwbfile = self.reader.read() + + #try: + # tmp = self.getContainer(self.read_nwbfile) + # return tmp + #except Exception as e: + # self.reader.close() + # self.reader = None + # raise e + + def test_zarr_roundtrip(self): + self.read_container = self.roundtripContainerZarrIO() + # make sure we get a completely new object + #str(self.container) # added as a test to make sure printing works + #self.assertNotEqual(id(self.container), id(self.read_container)) + #self.assertContainerEqual(self.read_container, self.container) + #self.validate() + def validate(self): # validate created file if os.path.exists(self.filename): From 05172a0dc652aed7f641aa2d3264420937d10ab1 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 18 Jul 2019 03:32:27 -0700 Subject: [PATCH 3/9] Fix bad param in NWBZarrIO --- src/pynwb/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index b9588374f..6c090b32e 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -247,8 +247,8 @@ class NWBZarrIO(_ZarrIO): {'name': 'comm', 'type': 'Intracomm', 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}) def __init__(self, **kwargs): - path, mode, manager, extensions, load_namespaces, file_obj, comm =\ - popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'file', 'comm', kwargs) + path, mode, manager, extensions, load_namespaces, comm =\ + popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'comm', kwargs) if load_namespaces: if manager is not None: warn("loading namespaces from file - ignoring 'manager'") @@ -268,7 +268,7 @@ def __init__(self, **kwargs): manager = get_manager(extensions=extensions) elif manager is None: manager = get_manager() - super(NWBZarrIO, self).__init__(path, manager=manager, mode=mode, file=file_obj, comm=comm) + super(NWBZarrIO, self).__init__(path, manager=manager, mode=mode, comm=comm) from . import io as __io # noqa: F401,E402 From d4cf86b3f9e951f91cbcdc37cf0019b56284c9a9 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 18 Jul 2019 03:33:21 -0700 Subject: [PATCH 4/9] Enable zarr read tests for roundtrip --- tests/integration/ui_write/base.py | 53 +++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/tests/integration/ui_write/base.py b/tests/integration/ui_write/base.py index f6c396794..46524ecb4 100644 --- a/tests/integration/ui_write/base.py +++ b/tests/integration/ui_write/base.py @@ -11,6 +11,8 @@ from hdmf.backends.hdf5 import HDF5IO from hdmf.backends.zarr import ZarrIO +from zarr.core import Array as ZarrArray + CORE_NAMESPACE = 'core' container_tests = dict() @@ -93,6 +95,8 @@ def assertContainerEqual(self, container1, container2): # noqa: C901 f2 = getattr(container2, nwbfield) if isinstance(f1, h5py.Dataset): f1 = f1[()] + if isinstance(f1, ZarrArray): + f1 = f1[:] if isinstance(f1, (tuple, list, np.ndarray)): if len(f1) > 0: if isinstance(f1[0], NWBContainer): @@ -196,6 +200,15 @@ def test_roundtrip(self): self.assertContainerEqual(self.read_container, self.container) self.validate() + def validate(self): + # validate created file + if os.path.exists(self.filename): + with NWBHDF5IO(self.filename, mode='r') as io: + errors = pynwb_validate(io) + if errors: + for err in errors: + raise Exception(err) + def roundtripContainerZarrIO(self, cache_spec=False): description = 'a file to test writing and reading a %s' % self.container_type identifier = 'TEST_%s' % self.container_type @@ -205,33 +218,35 @@ def roundtripContainerZarrIO(self, cache_spec=False): self.writer = ZarrIO(self.filename, manager=get_manager(), mode='w') self.writer.write(nwbfile, cache_spec=cache_spec) self.writer.close() - #self.reader = ZarrIO(self.filename, manager=get_manager(), mode='r') - #self.read_nwbfile = self.reader.read() + self.reader = ZarrIO(self.filename, manager=get_manager(), mode='r') + self.read_nwbfile = self.reader.read() - #try: - # tmp = self.getContainer(self.read_nwbfile) - # return tmp - #except Exception as e: - # self.reader.close() - # self.reader = None - # raise e + try: + tmp = self.getContainer(self.read_nwbfile) + return tmp + except Exception as e: + self.reader.close() + self.reader = None + raise e def test_zarr_roundtrip(self): self.read_container = self.roundtripContainerZarrIO() # make sure we get a completely new object - #str(self.container) # added as a test to make sure printing works - #self.assertNotEqual(id(self.container), id(self.read_container)) - #self.assertContainerEqual(self.read_container, self.container) - #self.validate() + str(self.container) # added as a test to make sure printing works + self.assertNotEqual(id(self.container), id(self.read_container)) + self.assertContainerEqual(self.read_container, self.container) + self.validate_zarr() - def validate(self): + def validate_zarr(self): # validate created file if os.path.exists(self.filename): - with NWBHDF5IO(self.filename, mode='r') as io: - errors = pynwb_validate(io) - if errors: - for err in errors: - raise Exception(err) + with NWBZarrIO(self.filename, mode='r') as io: + # TODO need to update the validator to support Zarr. For now well just read the file instead + #errors = pynwb_validate(io) + #if errors: + # for err in errors: + # raise Exception(err) + temp = io.read() def addContainer(self, nwbfile): ''' Should take an NWBFile object and add the container to it ''' From 82646e268927ae741cca1dafb2a906d700d981ef Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 18 Jul 2019 03:36:14 -0700 Subject: [PATCH 5/9] Skip modular storage tests for ZarrIO --- tests/integration/ui_write/test_modular_storage.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/ui_write/test_modular_storage.py b/tests/integration/ui_write/test_modular_storage.py index f033a7c02..260bf52df 100644 --- a/tests/integration/ui_write/test_modular_storage.py +++ b/tests/integration/ui_write/test_modular_storage.py @@ -22,6 +22,10 @@ def remove_file(self, path): os.remove(path) def setUp(self): + test_case_name = str(self.id()).split(".")[-1] + if test_case_name == "test_zarr_roundtrip": + self.skipTest("Modular storage testing does not apply to ZarrIO") + self.__manager = get_manager() self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc()) From 5b0334890e60063d3ec3e75021d84e3985245f86 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 1 Aug 2019 14:34:33 -0700 Subject: [PATCH 6/9] Allow pass-through of global chunking option to Zarr --- src/pynwb/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 6c090b32e..68838c7e2 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -245,10 +245,11 @@ class NWBZarrIO(_ZarrIO): 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths \ to namespaces and TypeMaps', 'default': None}, {'name': 'comm', 'type': 'Intracomm', - 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}) + 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}, + {'name': 'chunking', 'type': bool, 'doc': "Enable/Disable chunking of datasets by default", 'default': True}) def __init__(self, **kwargs): - path, mode, manager, extensions, load_namespaces, comm =\ - popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'comm', kwargs) + path, mode, manager, extensions, load_namespaces, comm, chunking =\ + popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'comm','chunking', kwargs) if load_namespaces: if manager is not None: warn("loading namespaces from file - ignoring 'manager'") @@ -268,7 +269,7 @@ def __init__(self, **kwargs): manager = get_manager(extensions=extensions) elif manager is None: manager = get_manager() - super(NWBZarrIO, self).__init__(path, manager=manager, mode=mode, comm=comm) + super(NWBZarrIO, self).__init__(path, manager=manager, mode=mode, comm=comm, chunking=chunking) from . import io as __io # noqa: F401,E402 From ebf3b2f93bca056cb9a978e721a740fc484963df Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 1 Aug 2019 14:40:05 -0700 Subject: [PATCH 7/9] Pass cache_spec=True explititly and make tests work with older verion of HDMF --- tests/integration/test_io.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_io.py b/tests/integration/test_io.py index 054a0b2d1..8078b85f0 100644 --- a/tests/integration/test_io.py +++ b/tests/integration/test_io.py @@ -7,7 +7,10 @@ from pynwb import NWBFile, TimeSeries, get_manager, NWBHDF5IO, validate -from hdmf.backends.io import UnsupportedOperation +try: # In case we are using an older version of HDMF + from hdmf.backends.io import UnsupportedOperation +except ImportError: + UnsupportedOperation = ValueError from hdmf.backends.hdf5 import HDF5IO, H5DataIO from hdmf.data_utils import DataChunkIterator from hdmf.build import GroupBuilder, DatasetBuilder @@ -97,7 +100,7 @@ def test_write_cache_spec(self): Round-trip test for writing spec and reading it back in ''' with HDF5IO(self.path, manager=self.manager, mode="a") as io: - io.write(self.container) + io.write(self.container, cache_spec=True) with File(self.path) as f: self.assertIn('specifications', f) @@ -211,7 +214,7 @@ def test_write_cache_spec(self): with File(self.path) as fil: with HDF5IO(self.path, manager=self.manager, file=fil, mode='a') as io: - io.write(self.container) + io.write(self.container, cache_spec=True) with File(self.path) as f: self.assertIn('specifications', f) From f286011a4996533c7c1f933a4381b9621b68c743 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 16 Feb 2022 16:52:45 -0800 Subject: [PATCH 8/9] Update NWBZarrIO class --- src/pynwb/__init__.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 05cf96c57..2b92d3561 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -255,6 +255,7 @@ def export(self, **kwargs): kwargs['container'] = nwbfile call_docval_func(super().export, kwargs) + try: from hdmf.backends.zarr.zarr_tools import ZarrIO as _ZarrIO import zarr @@ -267,27 +268,31 @@ class NWBZarrIO(_ZarrIO): {'name': 'load_namespaces', 'type': bool, 'doc': 'whether or not to load cached namespaces from given path - not applicable in write mode', 'default': False}, - {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, + {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', + 'default': None}, {'name': 'extensions', 'type': (str, TypeMap, list), - 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', + 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', 'default': None}, - {'name': 'comm', 'type': 'Intracomm', - 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}, - {'name': 'chunking', 'type': bool, 'doc': 'Enable/Disable chunking of datasets by default', + {'name': 'synchronizer', 'type': (zarr.ProcessSynchronizer, zarr.ThreadSynchronizer, bool), + 'doc': 'Zarr synchronizer to use for parallel I/O. If set to True a ProcessSynchronizer is used.', + 'default': None}, + {'name': 'chunking', 'type': bool, 'doc': "Enable/Disable chunking of datasets by default", 'default': True}) def __init__(self, **kwargs): - path, mode, manager, extensions, load_namespaces, comm, chunking =\ - popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'comm', 'chunking', kwargs) + path, mode, manager, extensions, load_namespaces, synchronizer, chunking = \ + popargs('path', 'mode', 'manager', 'extensions', + 'load_namespaces', 'synchronizer', 'chunking', kwargs) if load_namespaces: if manager is not None: warn("loading namespaces from file - ignoring 'manager'") if extensions is not None: warn("loading namespaces from file - ignoring 'extensions' argument") - if 'w' in mode or mode == 'x': # namespaces are not loaded in write mode + # namespaces are not loaded when creating an NWBZarrIO object in write mode + if 'w' in mode or mode == 'x': raise ValueError("cannot load namespaces from file when writing to it") tm = get_type_map() - super().load_namespaces(tm, path) + super(NWBZarrIO, self).load_namespaces(tm, path) manager = BuildManager(tm) else: if manager is not None and extensions is not None: @@ -296,8 +301,11 @@ def __init__(self, **kwargs): manager = get_manager(extensions=extensions) elif manager is None: manager = get_manager() - super(NWBZarrIO, self).__init__(path, manager=manager, mode=mode, comm=comm, chunking=chunking) - + super(NWBZarrIO, self).__init__(path, + manager=manager, + mode=mode, + synchronizer=synchronizer, + chunking=chunking) @docval({'name': 'src_io', 'type': HDMFIO, 'doc': 'the HDMFIO object for reading the data to export'}, {'name': 'nwbfile', 'type': 'NWBFile', From c5c07b460aff5f2b2b64dd0730fea0ece15137d9 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 16 Feb 2022 17:35:44 -0800 Subject: [PATCH 9/9] Added notebook to illustrate the use of the zarr backend --- .../notebooks/zarr_file_conversion_test.ipynb | 291 ++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 docs/notebooks/zarr_file_conversion_test.ipynb diff --git a/docs/notebooks/zarr_file_conversion_test.ipynb b/docs/notebooks/zarr_file_conversion_test.ipynb new file mode 100644 index 000000000..25e59b47a --- /dev/null +++ b/docs/notebooks/zarr_file_conversion_test.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b03b7582", + "metadata": {}, + "source": [ + "# Installing the Zarr Backend for NWB" + ] + }, + { + "cell_type": "markdown", + "id": "6bd6b4c2", + "metadata": {}, + "source": [ + "```\n", + "conda create -n nwbzarr python=3.9\n", + "conda activate nwbzarr\n", + "conda install Cython\n", + "conda install numpy==1.21.0\n", + "conda install pkgconfig\n", + "conda install h5py=3.3.0\n", + "conda install pandas==1.3.0\n", + "conda install python-dateutil==2.8.1\n", + "git clone --recurse-submodules https://github.com/NeurodataWithoutBorders/pynwb.git\n", + "cd pynwb\n", + "git checkout add/zarrio\n", + "pip install -e .\n", + "pip uninstall hdmf\n", + "git clone --recurse-submodules https://github.com/hdmf-dev/hdmf.git\n", + "cd hdmf\n", + "git checkout 1.0.3-zarr\n", + "conda install --file requirements.txt\n", + "pip install -e .\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "96a1c56c", + "metadata": {}, + "source": [ + "To use this notebook, the following optional package should also be installed\n", + "\n", + "```\n", + "pip install dandi\n", + "conda install jupyter\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "41b2187e", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil" + ] + }, + { + "cell_type": "markdown", + "id": "d5841dfd", + "metadata": {}, + "source": [ + "# Download a file from DANDI" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac7df3b3", + "metadata": {}, + "outputs": [], + "source": [ + "from dandi.dandiapi import DandiAPIClient\n", + "\n", + "dandiset_id = '000207' \n", + "filepath = \"sub-1/sub-1_ses-1_ecephys+image.nwb\" # 5 MB file\n", + "with DandiAPIClient() as client:\n", + " asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(filepath)\n", + " s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)\n", + " filename = os.path.basename(asset.path)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7d2b53df", + "metadata": {}, + "outputs": [], + "source": [ + "asset.download(filename)" + ] + }, + { + "cell_type": "markdown", + "id": "7b37360b", + "metadata": {}, + "source": [ + "# Define output settings and clean up old files " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "854dc0ae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Removing test_hdf5_sub-1_ses-1_ecephys+image.nwb\n", + "Removing test_zarr_sub-1_ses-1_ecephys+image.nwb\n" + ] + } + ], + "source": [ + "zarr_filename = \"test_zarr_\" + filename\n", + "hdf_filename = \"test_hdf5_\" + filename\n", + "\n", + "# Delete our converted HDF5 file from previous runs of this notebook\n", + "if os.path.exists(hdf_filename):\n", + " print(\"Removing %s\" % hdf_filename)\n", + " os.remove(hdf_filename)\n", + "# Delete our converted Zarr file from previous runs of this notebook\n", + "if os.path.exists(zarr_filename):\n", + " print(\"Removing %s\" % zarr_filename)\n", + " shutil.rmtree(zarr_filename)" + ] + }, + { + "cell_type": "markdown", + "id": "240691c5", + "metadata": {}, + "source": [ + "# Convert the file to Zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "981df005", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oruebel/Devel/nwb/zarr/hdmf/src/hdmf/backends/zarr/zarr_tools.py:78: UserWarning: \u001b[91mThe ZarrIO backend is experimental. It is under active development. The ZarrIO backend may change or be removed at any time and backward compatibility is not guaranteed.\u001b[0m\n", + " warnings.warn(warn_msg)\n" + ] + } + ], + "source": [ + "from pynwb import NWBHDF5IO, NWBZarrIO\n", + "\n", + "with NWBHDF5IO(filename , 'r', load_namespaces=False) as read_io:\n", + " with NWBZarrIO(zarr_filename, mode='w', chunking=True) as export_io:\n", + " export_io.export(src_io=read_io, write_args=dict(link_data=False))" + ] + }, + { + "cell_type": "markdown", + "id": "cd15edcc", + "metadata": {}, + "source": [ + "# Read the Zarr file back in" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2d8aa004", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oruebel/Devel/nwb/zarr/pynwb/src/pynwb/base.py:167: UserWarning: Length of data does not match length of timestamps. Your data may be transposed. Time should be on the 0th dimension\n", + " warn(\"Length of data does not match length of timestamps. Your data may be transposed. Time should be on \"\n" + ] + } + ], + "source": [ + "zr = NWBZarrIO(zarr_filename, 'r')\n", + "zf = zr.read()" + ] + }, + { + "cell_type": "markdown", + "id": "b5b97c8f", + "metadata": {}, + "source": [ + "# Convert the Zarr file back to HDF5" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c74a470e", + "metadata": {}, + "outputs": [], + "source": [ + "with NWBZarrIO(zarr_filename, mode='r') as read_io:\n", + " with NWBHDF5IO(hdf_filename , 'w') as export_io:\n", + " export_io.export(src_io=read_io, write_args=dict(link_data=False))" + ] + }, + { + "cell_type": "markdown", + "id": "f1cc2427", + "metadata": {}, + "source": [ + "# Read the new HDF5 file back\n", + "\n", + "Now our file has been converted from HDF5 to Zarr and back again to HDF5. Here we check that we can stil read that file" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "51f008f2", + "metadata": {}, + "outputs": [], + "source": [ + "with NWBHDF5IO(hdf_filename , 'r') as hr:\n", + " hf = hr.read()" + ] + }, + { + "cell_type": "markdown", + "id": "a6a56195", + "metadata": {}, + "source": [ + "# Notes\n", + "\n", + "The ZarrIO backend for NWB is under development as part of the following PRs on GitHub:\n", + "\n", + "* **HDMF**: https://github.com/hdmf-dev/hdmf/pull/696\n", + "\n", + " * Related PR: https://github.com/hdmf-dev/hdmf/pull/697 This PR includes all of the general changes to HDMF that we did to implement the Zarr backend. Once #697 is merged #696 should be agains synced with dev, so that the PR then only includes the changes to add Zarr itself.\n", + " \n", + "* **PyNWB**: https://github.com/NeurodataWithoutBorders/pynwb/pull/1018" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6bbed78", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ffda2b3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}