From d3ec5f1bfa327ab1f5976f65f7e7518bbf7f8a62 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 23 Nov 2022 13:24:07 -0500 Subject: [PATCH 01/37] Unravelling `ls` issues --- dandi/cli/cmd_ls.py | 3 + dandi/files/__init__.py | 1 + dandi/metadata.py | 124 ++++++++++++++++++++++++++-------------- 3 files changed, 84 insertions(+), 44 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index 7e45f0fe5..c3d956118 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -340,6 +340,8 @@ def get_metadata_ls( def fn(): rec = {} # No need for calling get_metadata if no keys are needed from it + print(path) + print("jsfkfjskl") if keys is None or list(keys) != ["nwb_version"]: try: if schema is not None: @@ -358,6 +360,7 @@ def fn(): digest=Digest.dandi_etag(digest), ).json_dict() else: + print("99999999999") rec = get_metadata(path) except Exception as exc: _add_exc_error(path, rec, errors, exc) diff --git a/dandi/files/__init__.py b/dandi/files/__init__.py index 67f8b7e38..8172195f1 100644 --- a/dandi/files/__init__.py +++ b/dandi/files/__init__.py @@ -97,6 +97,7 @@ def find_dandi_files( # BIDS dataset_description.json file at the path (if a directory) or in a # parent path path_queue: deque[tuple[Path, Optional[BIDSDatasetDescriptionAsset]]] = deque() + print(paths) for p in map(Path, paths): if dandiset_path is not None: try: diff --git a/dandi/metadata.py b/dandi/metadata.py index cfc93b037..125496d92 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -38,7 +38,12 @@ metadata_cache, nwb_has_external_links, ) -from .utils import ensure_datetime, get_mime_type, get_utcnow_datetime +from .utils import ( + ensure_datetime, + find_parent_directory_containing, + get_mime_type, + get_utcnow_datetime, +) lgr = get_logger() @@ -59,11 +64,14 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: ------- dict """ + from .files import find_dandi_files + # when we run in parallel, these annoying warnings appear ignore_benign_pynwb_warnings() path = str(path) # for Path meta = dict() + print("00000000000000000") if op.isdir(path): try: dandiset = Dandiset(path) @@ -72,50 +80,78 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: lgr.debug("Failed to get metadata for %s: %s", path, exc) return None - if nwb_has_external_links(path): - raise NotImplementedError( - f"NWB files with external links are not supported: {path}" - ) - - # First read out possibly available versions of specifications for NWB(:N) - meta["nwb_version"] = get_nwb_version(path) - - # PyNWB might fail to load because of missing extensions. - # There is a new initiative of establishing registry of such extensions. - # Not yet sure if PyNWB is going to provide "native" support for needed - # functionality: https://github.com/NeurodataWithoutBorders/pynwb/issues/1143 - # So meanwhile, hard-coded workaround for data types we care about - ndtypes_registry = { - "AIBS_ecephys": "allensdk.brain_observatory.ecephys.nwb", - "ndx-labmetadata-abf": "ndx_dandi_icephys", - } - tried_imports = set() - while True: - try: - meta.update(_get_pynwb_metadata(path)) - break - except KeyError as exc: # ATM there is - lgr.debug("Failed to read %s: %s", path, exc) - res = re.match(r"^['\"\\]+(\S+). not a namespace", str(exc)) - if not res: - raise - ndtype = res.groups()[0] - if ndtype not in ndtypes_registry: - raise ValueError( - "We do not know which extension provides %s. " - "Original exception was: %s. " % (ndtype, exc) - ) - import_mod = ndtypes_registry[ndtype] - lgr.debug("Importing %r which should provide %r", import_mod, ndtype) - if import_mod in tried_imports: - raise RuntimeError( - "We already tried importing %s to provide %s, but it seems it didn't help" - % (import_mod, ndtype) - ) - tried_imports.add(import_mod) - __import__(import_mod) + # Clunky test to determine whether this is NWB + if path.endswith(("NWB", "nwb")): + if nwb_has_external_links(path): + raise NotImplementedError( + f"NWB files with external links are not supported: {path}" + ) - meta["nd_types"] = get_neurodata_types(path) + # First read out possibly available versions of specifications for NWB(:N) + print("99999999999") + print(meta) + print("1111111111") + meta["nwb_version"] = get_nwb_version(path) + print("99999999999") + print(meta) + print("1111111111") + + # PyNWB might fail to load because of missing extensions. + # There is a new initiative of establishing registry of such extensions. + # Not yet sure if PyNWB is going to provide "native" support for needed + # functionality: https://github.com/NeurodataWithoutBorders/pynwb/issues/1143 + # So meanwhile, hard-coded workaround for data types we care about + ndtypes_registry = { + "AIBS_ecephys": "allensdk.brain_observatory.ecephys.nwb", + "ndx-labmetadata-abf": "ndx_dandi_icephys", + } + tried_imports = set() + while True: + try: + meta.update(_get_pynwb_metadata(path)) + break + except KeyError as exc: # ATM there is + lgr.debug("Failed to read %s: %s", path, exc) + res = re.match(r"^['\"\\]+(\S+). not a namespace", str(exc)) + if not res: + raise + ndtype = res.groups()[0] + if ndtype not in ndtypes_registry: + raise ValueError( + "We do not know which extension provides %s. " + "Original exception was: %s. " % (ndtype, exc) + ) + import_mod = ndtypes_registry[ndtype] + lgr.debug("Importing %r which should provide %r", import_mod, ndtype) + if import_mod in tried_imports: + raise RuntimeError( + "We already tried importing %s to provide %s, but it seems it didn't help" + % (import_mod, ndtype) + ) + tried_imports.add(import_mod) + __import__(import_mod) + + meta["nd_types"] = get_neurodata_types(path) + else: + # dataset_path = find_parent_directory_containing( + # "dataset_description.json", path + # ) + dandiset_path = find_parent_directory_containing("dandiset.yaml", path) + print("99999999999") + print(path) + df = list( + find_dandi_files( + path, + dandiset_path=dandiset_path, + allow_all=True, + ) + ) + assert len(df) == 1 + df = df[0] + print("aaaaaaaaaaaaa") + # a = df.get_metadata() + a = df.get_metadata() + print(a) return meta From dc34ab002ff8a5d363577371b893c0c0ea9e461c Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 23 Nov 2022 18:17:33 -0500 Subject: [PATCH 02/37] strange `Cannot creat weak reference to PosixPath object` --- dandi/files/__init__.py | 8 ++++++++ dandi/metadata.py | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/dandi/files/__init__.py b/dandi/files/__init__.py index 8172195f1..03e83bad6 100644 --- a/dandi/files/__init__.py +++ b/dandi/files/__init__.py @@ -97,7 +97,9 @@ def find_dandi_files( # BIDS dataset_description.json file at the path (if a directory) or in a # parent path path_queue: deque[tuple[Path, Optional[BIDSDatasetDescriptionAsset]]] = deque() + print("=^.^=") print(paths) + print("=^_^=") for p in map(Path, paths): if dandiset_path is not None: try: @@ -107,8 +109,13 @@ def find_dandi_files( "Path {str(p)!r} is not inside Dandiset path {str(dandiset_path)!r}" ) path_queue.append((p, None)) + print(path_queue) while path_queue: p, bidsdd = path_queue.popleft() + print(p) + print("hhhhhhhhhhhh") + print(bidsdd) + print("lölölölölö") if p.name.startswith("."): continue if p.is_dir(): @@ -187,6 +194,7 @@ def dandi_file( factory = DandiFileFactory() else: factory = BIDSFileFactory(bids_dataset_description) + print("hhhhhhhhhhhh") return factory(filepath, path) diff --git a/dandi/metadata.py b/dandi/metadata.py index 125496d92..469cbaae0 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -64,7 +64,8 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: ------- dict """ - from .files import find_dandi_files + # from .files import dandi_file, find_dandi_files + from .files import dandi_file # when we run in parallel, these annoying warnings appear ignore_benign_pynwb_warnings() @@ -133,19 +134,38 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: meta["nd_types"] = get_neurodata_types(path) else: - # dataset_path = find_parent_directory_containing( - # "dataset_description.json", path - # ) + dataset_path = find_parent_directory_containing( + "dataset_description.json", path + ) dandiset_path = find_parent_directory_containing("dandiset.yaml", path) - print("99999999999") + print("ſſſſſſſſſſſſſſſſſſſſ") print(path) + # df = list( + # find_dandi_files( + # path, + # dataset_path, + # dandiset_path=dandiset_path, + # allow_all=True, + # ) + # ) + print(type(path)) + p = Path(path) + print(type(p)) + print(type(dandiset_path)) + print(type(dataset_path)) + # df = dandi_file(pathlib.PosixPath(path), dandiset_path, + # bids_dataset_description=dataset_path, + # ) df = list( - find_dandi_files( - path, - dandiset_path=dandiset_path, - allow_all=True, + dandi_file( + p, + dandiset_path, + bids_dataset_description=dataset_path, ) ) + print("ăăăăăăăăăăăăăăăăă") + for i in df: + print(i) assert len(df) == 1 df = df[0] print("aaaaaaaaaaaaa") From 8aef54104d13474de2d4b46710cebfee9dcf22e5 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 28 Nov 2022 07:34:29 -0500 Subject: [PATCH 03/37] Fixed weakref issue --- dandi/files/bids.py | 10 ++++++++-- dandi/metadata.py | 37 +++++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 7806f80a2..159a2d4e9 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -54,7 +54,7 @@ def bids_root(self) -> Path: """ The directory on the filesystem in which the BIDS dataset is located """ - return self.filepath.parent + return self.filepath.parent.absolute() def _validate(self) -> None: with self._lock: @@ -79,7 +79,9 @@ def _validate(self) -> None: self._dataset_errors.append(result) elif result.id == "BIDS.MATCH": assert result.path + print("mimimimimim") bids_path = result.path.relative_to(self.bids_root).as_posix() + print("lililililil") assert result.metadata is not None self._asset_metadata[bids_path] = prepare_metadata( result.metadata @@ -154,7 +156,11 @@ def bids_path(self) -> str: """ ``/``-separated path to the asset from the root of the BIDS dataset """ - return self.filepath.relative_to(self.bids_root).as_posix() + print("111111") + a = self.filepath.absolute().relative_to(self.bids_root).as_posix() + print("222222") + return a + # return self.filepath.relative_to(self.bids_root).as_posix() def get_validation_errors( self, diff --git a/dandi/metadata.py b/dandi/metadata.py index 469cbaae0..765a8fb1b 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -65,7 +65,7 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: dict """ # from .files import dandi_file, find_dandi_files - from .files import dandi_file + from .files import dandi_file, find_bids_dataset_description # when we run in parallel, these annoying warnings appear ignore_benign_pynwb_warnings() @@ -138,6 +138,8 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: "dataset_description.json", path ) dandiset_path = find_parent_directory_containing("dandiset.yaml", path) + print("ßßßßßßßßßßßßßßßßßßßßßßßß") + bids_dataset_description = find_bids_dataset_description(path) print("ſſſſſſſſſſſſſſſſſſſſ") print(path) # df = list( @@ -150,25 +152,32 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: # ) print(type(path)) p = Path(path) - print(type(p)) - print(type(dandiset_path)) print(type(dataset_path)) # df = dandi_file(pathlib.PosixPath(path), dandiset_path, # bids_dataset_description=dataset_path, # ) - df = list( - dandi_file( - p, - dandiset_path, - bids_dataset_description=dataset_path, - ) + print("łłłłłłłłłłłłłłłłłłłłłłłłłłłłłłłłłłł") + print(p, type(p)) + print(dandiset_path, type(p)) + print(bids_dataset_description) + # df = list( + # dandi_file( + # p, + # dandiset_path, + # bids_dataset_description=bids_dataset_description, + # ) + # ) + df = dandi_file( + p, + dandiset_path, + bids_dataset_description=bids_dataset_description, ) print("ăăăăăăăăăăăăăăăăă") - for i in df: - print(i) - assert len(df) == 1 - df = df[0] - print("aaaaaaaaaaaaa") + # for i in df: + # print(i) + # assert len(df) == 1 + # df = df[0] + # print("aaaaaaaaaaaaa") # a = df.get_metadata() a = df.get_metadata() print(a) From b3a4b9a6483580f510169af790a76fea9a8955b8 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 28 Nov 2022 08:31:40 -0500 Subject: [PATCH 04/37] Added readme listing and removed some diagnostic print calls --- dandi/cli/cmd_ls.py | 4 ++-- dandi/files/bids.py | 22 +++++++++++++------ dandi/metadata.py | 52 ++++++++++----------------------------------- 3 files changed, 28 insertions(+), 50 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index c3d956118..ceed81f86 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -341,7 +341,7 @@ def fn(): rec = {} # No need for calling get_metadata if no keys are needed from it print(path) - print("jsfkfjskl") + print("QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ") if keys is None or list(keys) != ["nwb_version"]: try: if schema is not None: @@ -360,7 +360,7 @@ def fn(): digest=Digest.dandi_etag(digest), ).json_dict() else: - print("99999999999") + print("000000000000000000000000000000000") rec = get_metadata(path) except Exception as exc: _add_exc_error(path, rec, errors, exc) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 159a2d4e9..376882919 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -65,6 +65,20 @@ def _validate(self) -> None: bids_paths = [str(self.filepath)] + [ str(asset.filepath) for asset in self.dataset_files ] + # This is an ad-hoc fix which should be removed once bidsschematools greater than + # 0.6.0 is released. + # It won't cause any trouble afterwards, but it will no longer fulfill any + # purpose. The issue is that README* is still required and if we don't + # include it explicitly in the listing validation will implicitly fail, even + # if the file is present. + readme_extensions = ["", "md", "rst", "txt"] + for ext in readme_extensions: + ds_root = self.filepath.parent + readme_candidate = ds_root / Path("README" + ext) + if readme_candidate.exists(): + bids_paths += [readme_candidate] + # end of ad-hoc fix. + results = validate_bids(*bids_paths) self._dataset_errors: list[ValidationResult] = [] self._asset_errors: dict[str, list[ValidationResult]] = defaultdict( @@ -79,9 +93,7 @@ def _validate(self) -> None: self._dataset_errors.append(result) elif result.id == "BIDS.MATCH": assert result.path - print("mimimimimim") bids_path = result.path.relative_to(self.bids_root).as_posix() - print("lililililil") assert result.metadata is not None self._asset_metadata[bids_path] = prepare_metadata( result.metadata @@ -156,11 +168,7 @@ def bids_path(self) -> str: """ ``/``-separated path to the asset from the root of the BIDS dataset """ - print("111111") - a = self.filepath.absolute().relative_to(self.bids_root).as_posix() - print("222222") - return a - # return self.filepath.relative_to(self.bids_root).as_posix() + return self.filepath.relative_to(self.bids_root).as_posix() def get_validation_errors( self, diff --git a/dandi/metadata.py b/dandi/metadata.py index 765a8fb1b..d1e957179 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -65,11 +65,13 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: dict """ # from .files import dandi_file, find_dandi_files + import os + from .files import dandi_file, find_bids_dataset_description # when we run in parallel, these annoying warnings appear ignore_benign_pynwb_warnings() - path = str(path) # for Path + path = os.path.abspath(str(path)) # for Path meta = dict() print("00000000000000000") @@ -89,13 +91,13 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: ) # First read out possibly available versions of specifications for NWB(:N) - print("99999999999") - print(meta) print("1111111111") + print(meta) + print("2222222222") meta["nwb_version"] = get_nwb_version(path) - print("99999999999") + print("3333333333") print(meta) - print("1111111111") + print("4444444444") # PyNWB might fail to load because of missing extensions. # There is a new initiative of establishing registry of such extensions. @@ -134,52 +136,20 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: meta["nd_types"] = get_neurodata_types(path) else: - dataset_path = find_parent_directory_containing( - "dataset_description.json", path - ) + # dataset_path = find_parent_directory_containing( + # "dataset_description.json", path + # ) dandiset_path = find_parent_directory_containing("dandiset.yaml", path) - print("ßßßßßßßßßßßßßßßßßßßßßßßß") bids_dataset_description = find_bids_dataset_description(path) - print("ſſſſſſſſſſſſſſſſſſſſ") - print(path) - # df = list( - # find_dandi_files( - # path, - # dataset_path, - # dandiset_path=dandiset_path, - # allow_all=True, - # ) - # ) - print(type(path)) p = Path(path) - print(type(dataset_path)) - # df = dandi_file(pathlib.PosixPath(path), dandiset_path, - # bids_dataset_description=dataset_path, - # ) - print("łłłłłłłłłłłłłłłłłłłłłłłłłłłłłłłłłłł") - print(p, type(p)) - print(dandiset_path, type(p)) - print(bids_dataset_description) - # df = list( - # dandi_file( - # p, - # dandiset_path, - # bids_dataset_description=bids_dataset_description, - # ) - # ) df = dandi_file( p, dandiset_path, bids_dataset_description=bids_dataset_description, ) print("ăăăăăăăăăăăăăăăăă") - # for i in df: - # print(i) - # assert len(df) == 1 - # df = df[0] - # print("aaaaaaaaaaaaa") - # a = df.get_metadata() a = df.get_metadata() + print("ßßßßßßßßßßßßßßßßßßßßßßßß") print(a) return meta From 3272c868477ac46ea80f72a313c3a9c69e2bff14 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 28 Nov 2022 08:52:44 -0500 Subject: [PATCH 05/37] Removed more debugging print calls --- dandi/cli/cmd_ls.py | 3 +-- dandi/files/__init__.py | 9 --------- dandi/files/bids.py | 6 ++++++ dandi/metadata.py | 7 ------- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index ceed81f86..d4af13850 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -340,8 +340,7 @@ def get_metadata_ls( def fn(): rec = {} # No need for calling get_metadata if no keys are needed from it - print(path) - print("QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ") + print("Debugging ls for the following path: ", path) if keys is None or list(keys) != ["nwb_version"]: try: if schema is not None: diff --git a/dandi/files/__init__.py b/dandi/files/__init__.py index 03e83bad6..67f8b7e38 100644 --- a/dandi/files/__init__.py +++ b/dandi/files/__init__.py @@ -97,9 +97,6 @@ def find_dandi_files( # BIDS dataset_description.json file at the path (if a directory) or in a # parent path path_queue: deque[tuple[Path, Optional[BIDSDatasetDescriptionAsset]]] = deque() - print("=^.^=") - print(paths) - print("=^_^=") for p in map(Path, paths): if dandiset_path is not None: try: @@ -109,13 +106,8 @@ def find_dandi_files( "Path {str(p)!r} is not inside Dandiset path {str(dandiset_path)!r}" ) path_queue.append((p, None)) - print(path_queue) while path_queue: p, bidsdd = path_queue.popleft() - print(p) - print("hhhhhhhhhhhh") - print(bidsdd) - print("lölölölölö") if p.name.startswith("."): continue if p.is_dir(): @@ -194,7 +186,6 @@ def dandi_file( factory = DandiFileFactory() else: factory = BIDSFileFactory(bids_dataset_description) - print("hhhhhhhhhhhh") return factory(filepath, path) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 376882919..dca47b56c 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -113,6 +113,9 @@ def get_asset_metadata(self, asset: BIDSAsset) -> dict[str, Any]: """:meta private:""" self._validate() assert self._asset_metadata is not None + print("qqqqqqqqqqqqqqqqqqqqqq get_asset_metadata") + print(self._asset_metadata) + print(self._asset_metadata[asset.bids_path]) return self._asset_metadata[asset.bids_path] def get_validation_errors( @@ -186,6 +189,9 @@ def get_metadata( start_time = end_time = datetime.now().astimezone() add_common_metadata(metadata, self.filepath, start_time, end_time, digest) metadata["path"] = self.path + print("fffffffffffffffffffffffff get_metadata") + print(metadata) + print(**metadata) return BareAsset(**metadata) diff --git a/dandi/metadata.py b/dandi/metadata.py index d1e957179..7d0f69337 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -74,7 +74,6 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: path = os.path.abspath(str(path)) # for Path meta = dict() - print("00000000000000000") if op.isdir(path): try: dandiset = Dandiset(path) @@ -91,13 +90,7 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: ) # First read out possibly available versions of specifications for NWB(:N) - print("1111111111") - print(meta) - print("2222222222") meta["nwb_version"] = get_nwb_version(path) - print("3333333333") - print(meta) - print("4444444444") # PyNWB might fail to load because of missing extensions. # There is a new initiative of establishing registry of such extensions. From ba9914649747eac85adcb391cdd925801d393ec6 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 28 Nov 2022 08:55:02 -0500 Subject: [PATCH 06/37] Removed more debugging print calls --- dandi/files/bids.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index dca47b56c..2573eb380 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -54,7 +54,7 @@ def bids_root(self) -> Path: """ The directory on the filesystem in which the BIDS dataset is located """ - return self.filepath.parent.absolute() + return self.filepath.parent def _validate(self) -> None: with self._lock: @@ -189,9 +189,8 @@ def get_metadata( start_time = end_time = datetime.now().astimezone() add_common_metadata(metadata, self.filepath, start_time, end_time, digest) metadata["path"] = self.path - print("fffffffffffffffffffffffff get_metadata") + print("zzzzzzzzzzzzzzzzzzzzzzzzz get_metadata") print(metadata) - print(**metadata) return BareAsset(**metadata) From 64677c10813d1bbd46a9eaa3a2ce93091a78013e Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 28 Nov 2022 13:02:14 -0500 Subject: [PATCH 07/37] Creating and passing digests --- dandi/cli/cmd_ls.py | 8 +++++++- dandi/files/bids.py | 2 +- dandi/metadata.py | 8 ++++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index d4af13850..5303b6610 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -341,6 +341,7 @@ def fn(): rec = {} # No need for calling get_metadata if no keys are needed from it print("Debugging ls for the following path: ", path) + print(keys) if keys is None or list(keys) != ["nwb_version"]: try: if schema is not None: @@ -359,8 +360,13 @@ def fn(): digest=Digest.dandi_etag(digest), ).json_dict() else: + if use_fake_digest: + digest = "0" * 32 + "-1" + else: + lgr.info("Calculating digest for %s", path) + digest = get_digest(path, digest="dandi-etag") print("000000000000000000000000000000000") - rec = get_metadata(path) + rec = get_metadata(path, Digest.dandi_etag(digest)) except Exception as exc: _add_exc_error(path, rec, errors, exc) if flatten: diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 2573eb380..7784fcec5 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -71,7 +71,7 @@ def _validate(self) -> None: # purpose. The issue is that README* is still required and if we don't # include it explicitly in the listing validation will implicitly fail, even # if the file is present. - readme_extensions = ["", "md", "rst", "txt"] + readme_extensions = ["", ".md", ".rst", ".txt"] for ext in readme_extensions: ds_root = self.filepath.parent readme_candidate = ds_root / Path("README" + ext) diff --git a/dandi/metadata.py b/dandi/metadata.py index 7d0f69337..a984d9f7f 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -50,7 +50,10 @@ # Disable this for clean hacking @metadata_cache.memoize_path -def get_metadata(path: Union[str, Path]) -> Optional[dict]: +def get_metadata( + path: Union[str, Path], + digest: Optional[Digest] = None, +) -> Optional[dict]: """ Get "flatdata" from a .nwb file or a Dandiset directory @@ -141,7 +144,8 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: bids_dataset_description=bids_dataset_description, ) print("ăăăăăăăăăăăăăăăăă") - a = df.get_metadata() + print(df, type(df)) + a = df.get_metadata(digest=digest) print("ßßßßßßßßßßßßßßßßßßßßßßßß") print(a) From f8a0da8b1617544788a702f5b9df21d4fab5aa4e Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 28 Nov 2022 17:12:35 -0500 Subject: [PATCH 08/37] Added validation bids schema version to result object metadata --- dandi/validate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dandi/validate.py b/dandi/validate.py index 85b155a1c..ba350ef84 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -120,6 +120,7 @@ def validate_bids( for meta in validation_result["match_listing"]: file_path = meta.pop("path") meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} + meta["bids_schema_version"] = validation_result["bids_version"] if parent_path != os.path.dirname(file_path): parent_path = os.path.dirname(file_path) dataset_path = find_parent_directory_containing( From 430fb0690418a6e4640a8c4d46346e7863dcc2e6 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 28 Nov 2022 18:16:16 -0500 Subject: [PATCH 09/37] Assigning bids schema version for validation to metadata dict --- dandi/files/bids.py | 7 ++++++- dandi/metadata.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 7784fcec5..542d0f2d6 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -98,6 +98,11 @@ def _validate(self) -> None: self._asset_metadata[bids_path] = prepare_metadata( result.metadata ) + # probably best done as part of `prepare_metadata()` + # pending figuring out how + self._asset_metadata[bids_path][ + "bids_version" + ] = result.metadata["wasAttributedTo"]["bids_schema_version"] def get_asset_errors(self, asset: BIDSAsset) -> list[ValidationResult]: """:meta private:""" @@ -115,7 +120,7 @@ def get_asset_metadata(self, asset: BIDSAsset) -> dict[str, Any]: assert self._asset_metadata is not None print("qqqqqqqqqqqqqqqqqqqqqq get_asset_metadata") print(self._asset_metadata) - print(self._asset_metadata[asset.bids_path]) + print(type(self._asset_metadata)) return self._asset_metadata[asset.bids_path] def get_validation_errors( diff --git a/dandi/metadata.py b/dandi/metadata.py index a984d9f7f..84b5e186b 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -28,6 +28,7 @@ import tenacity from . import __version__, get_logger +from .consts import metadata_all_fields from .dandiset import Dandiset from .misctypes import Digest from .pynwb_utils import ( @@ -143,11 +144,17 @@ def get_metadata( dandiset_path, bids_dataset_description=bids_dataset_description, ) - print("ăăăăăăăăăăăăăăăăă") - print(df, type(df)) a = df.get_metadata(digest=digest) - print("ßßßßßßßßßßßßßßßßßßßßßßßß") + print("UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU") print(a) + print(a.wasAttributedTo[0].identifier) + print(meta) + print("UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU") + for key in metadata_all_fields: + print(key) + value = getattr(a.wasAttributedTo[0], key) + meta[key] = value + print(meta) return meta From 279ecf966cd2d6418f1567a5078863fe6c2b1900 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 29 Nov 2022 13:03:57 -0500 Subject: [PATCH 10/37] Trying to insert bids_schema_version ... somewhere. --- dandi/cli/cmd_ls.py | 1 - dandi/files/bids.py | 18 ++++++++++++++---- dandi/metadata.py | 12 +++++++++--- dandi/validate.py | 2 +- dandi/validate_types.py | 1 + 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index 5303b6610..2f709c331 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -365,7 +365,6 @@ def fn(): else: lgr.info("Calculating digest for %s", path) digest = get_digest(path, digest="dandi-etag") - print("000000000000000000000000000000000") rec = get_metadata(path, Digest.dandi_etag(digest)) except Exception as exc: _add_exc_error(path, rec, errors, exc) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 542d0f2d6..d429c6ebf 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -85,6 +85,7 @@ def _validate(self) -> None: list ) self._asset_metadata = defaultdict(dict) + # self._validation_bids_version = result.origin.bids_version for result in results: if result.id in BIDS_ASSET_ERRORS: assert result.path @@ -100,9 +101,16 @@ def _validate(self) -> None: ) # probably best done as part of `prepare_metadata()` # pending figuring out how - self._asset_metadata[bids_path][ - "bids_version" - ] = result.metadata["wasAttributedTo"]["bids_schema_version"] + print("wwwwww") + print(result.origin.bids_version) + print("mmmmmm") + # print(result.metadata["bids_schema_version"]) + # print("ĸĸĸĸĸĸĸ") + # bids_ver = {"bids_schema_version": result.metadata["bids_schema_version"]} + # self._asset_metadata[bids_path]["wasAttributedTo"].append(bids_ver) + # #self._asset_metadata[bids_path][ + # # "bids_schema_version" + # #] = result.metadata["bids_schema_version"] def get_asset_errors(self, asset: BIDSAsset) -> list[ValidationResult]: """:meta private:""" @@ -192,10 +200,12 @@ def get_metadata( ) -> BareAsset: metadata = self.bids_dataset_description.get_asset_metadata(self) start_time = end_time = datetime.now().astimezone() + print(metadata, "\n") add_common_metadata(metadata, self.filepath, start_time, end_time, digest) + print(metadata, "\n") metadata["path"] = self.path + print(metadata, "\n") print("zzzzzzzzzzzzzzzzzzzzzzzzz get_metadata") - print(metadata) return BareAsset(**metadata) diff --git a/dandi/metadata.py b/dandi/metadata.py index 84b5e186b..97185d6d1 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -145,15 +145,21 @@ def get_metadata( bids_dataset_description=bids_dataset_description, ) a = df.get_metadata(digest=digest) + print("kkkkkkkkkkkkk") + print(df._asset_metadata) print("UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU") + print(type(a)) print(a) print(a.wasAttributedTo[0].identifier) print(meta) print("UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU") for key in metadata_all_fields: - print(key) - value = getattr(a.wasAttributedTo[0], key) - meta[key] = value + try: + value = getattr(a.wasAttributedTo[0], key) + except AttributeError: + pass + else: + meta[key] = value print(meta) return meta diff --git a/dandi/validate.py b/dandi/validate.py index ba350ef84..53628d1a2 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -68,6 +68,7 @@ def validate_bids( origin = ValidationOrigin( name="bidsschematools", version=bidsschematools.__version__, + bids_version=validation_result["bids_version"], ) # Storing variable to not re-compute set paths for each individual file. @@ -120,7 +121,6 @@ def validate_bids( for meta in validation_result["match_listing"]: file_path = meta.pop("path") meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} - meta["bids_schema_version"] = validation_result["bids_version"] if parent_path != os.path.dirname(file_path): parent_path = os.path.dirname(file_path) dataset_path = find_parent_directory_containing( diff --git a/dandi/validate_types.py b/dandi/validate_types.py index dfd920e11..20a95eeb0 100644 --- a/dandi/validate_types.py +++ b/dandi/validate_types.py @@ -8,6 +8,7 @@ class ValidationOrigin: name: str version: str + bids_version: str class Severity(Enum): From 60b24082e35314e6b840a3278b71003503e0049c Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 29 Nov 2022 13:54:50 -0500 Subject: [PATCH 11/37] Added (get_validation)_bids_version to BIDS assets This might be better moved to metadata but that will require updating bids_schema. additionally removed most debugging print calls --- dandi/files/bids.py | 30 ++++++++++-------------------- dandi/metadata.py | 14 +++----------- 2 files changed, 13 insertions(+), 31 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index d429c6ebf..f27485e21 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -45,6 +45,12 @@ class BIDSDatasetDescriptionAsset(LocalFileAsset): #: populated by `_validate()` _asset_metadata: Optional[dict[str, dict[str, Any]]] = None + #: Version of BIDS used for the validation; + #: populated by `_validate()` + #: In future this might be removed and the information included in the + #: BareAsset via dandischema. + _bids_version: Optional[str] = None + #: Threading lock needed in case multiple assets are validated in parallel #: during upload _lock: Lock = field(init=False, default_factory=Lock, repr=False, compare=False) @@ -85,7 +91,6 @@ def _validate(self) -> None: list ) self._asset_metadata = defaultdict(dict) - # self._validation_bids_version = result.origin.bids_version for result in results: if result.id in BIDS_ASSET_ERRORS: assert result.path @@ -99,18 +104,7 @@ def _validate(self) -> None: self._asset_metadata[bids_path] = prepare_metadata( result.metadata ) - # probably best done as part of `prepare_metadata()` - # pending figuring out how - print("wwwwww") - print(result.origin.bids_version) - print("mmmmmm") - # print(result.metadata["bids_schema_version"]) - # print("ĸĸĸĸĸĸĸ") - # bids_ver = {"bids_schema_version": result.metadata["bids_schema_version"]} - # self._asset_metadata[bids_path]["wasAttributedTo"].append(bids_ver) - # #self._asset_metadata[bids_path][ - # # "bids_schema_version" - # #] = result.metadata["bids_schema_version"] + self._bids_version = result.origin.bids_version def get_asset_errors(self, asset: BIDSAsset) -> list[ValidationResult]: """:meta private:""" @@ -126,9 +120,6 @@ def get_asset_metadata(self, asset: BIDSAsset) -> dict[str, Any]: """:meta private:""" self._validate() assert self._asset_metadata is not None - print("qqqqqqqqqqqqqqqqqqqqqq get_asset_metadata") - print(self._asset_metadata) - print(type(self._asset_metadata)) return self._asset_metadata[asset.bids_path] def get_validation_errors( @@ -200,14 +191,13 @@ def get_metadata( ) -> BareAsset: metadata = self.bids_dataset_description.get_asset_metadata(self) start_time = end_time = datetime.now().astimezone() - print(metadata, "\n") add_common_metadata(metadata, self.filepath, start_time, end_time, digest) - print(metadata, "\n") metadata["path"] = self.path - print(metadata, "\n") - print("zzzzzzzzzzzzzzzzzzzzzzzzz get_metadata") return BareAsset(**metadata) + def get_validation_bids_version(self) -> str: + return self.bids_dataset_description._bids_version + class NWBBIDSAsset(BIDSAsset, NWBAsset): """ diff --git a/dandi/metadata.py b/dandi/metadata.py index 97185d6d1..b7b4b070c 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -133,9 +133,6 @@ def get_metadata( meta["nd_types"] = get_neurodata_types(path) else: - # dataset_path = find_parent_directory_containing( - # "dataset_description.json", path - # ) dandiset_path = find_parent_directory_containing("dandiset.yaml", path) bids_dataset_description = find_bids_dataset_description(path) p = Path(path) @@ -145,14 +142,7 @@ def get_metadata( bids_dataset_description=bids_dataset_description, ) a = df.get_metadata(digest=digest) - print("kkkkkkkkkkkkk") - print(df._asset_metadata) - print("UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU") - print(type(a)) - print(a) - print(a.wasAttributedTo[0].identifier) - print(meta) - print("UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU") + meta["bids_version"] = df.get_validation_bids_version() for key in metadata_all_fields: try: value = getattr(a.wasAttributedTo[0], key) @@ -160,7 +150,9 @@ def get_metadata( pass else: meta[key] = value + print("aaaaaaaaaaaaaaaaaaa") print(meta) + print("bbbbbbbbbbbbbbbbbbb") return meta From 4a4530e478fa662d470aab103ee4a1b5e7c8d308 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 30 Nov 2022 03:24:20 -0500 Subject: [PATCH 12/37] Creating ZARR digest for ZARR files --- dandi/cli/cmd_ls.py | 5 ++++- dandi/metadata.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index 2f709c331..4104362a4 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -364,7 +364,10 @@ def fn(): digest = "0" * 32 + "-1" else: lgr.info("Calculating digest for %s", path) - digest = get_digest(path, digest="dandi-etag") + if path.endswith((".zarr", ".ZARR")): + digest = get_digest(path, digest="zarr-checksum") + else: + digest = get_digest(path, digest="dandi-etag") rec = get_metadata(path, Digest.dandi_etag(digest)) except Exception as exc: _add_exc_error(path, rec, errors, exc) diff --git a/dandi/metadata.py b/dandi/metadata.py index b7b4b070c..e8948a4b4 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -78,7 +78,7 @@ def get_metadata( path = os.path.abspath(str(path)) # for Path meta = dict() - if op.isdir(path): + if op.isdir(path) and not path.endswith((".zarr", ".ZARR")): try: dandiset = Dandiset(path) return cast(dict, dandiset.metadata) @@ -141,6 +141,8 @@ def get_metadata( dandiset_path, bids_dataset_description=bids_dataset_description, ) + print("qqqqqqqqqqqqqqqqqqqqqqqqqqqqq") + print(type(df)) a = df.get_metadata(digest=digest) meta["bids_version"] = df.get_validation_bids_version() for key in metadata_all_fields: From 9df52ad053e4e1669423c0e0c0651e1b176738db Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 30 Nov 2022 19:31:22 -0500 Subject: [PATCH 13/37] Using correct ZARR digest class (still not working) included debug print calls --- dandi/cli/cmd_ls.py | 7 ++++--- dandi/files/bids.py | 1 + dandi/metadata.py | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index 4104362a4..c8a2cc2ca 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -5,6 +5,7 @@ import click from .base import devel_option, lgr, map_to_click_exceptions +from ..consts import ZARR_EXTENSIONS, metadata_all_fields from ..dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url from ..misctypes import Digest from ..utils import is_url @@ -92,7 +93,6 @@ def ls( PYOUTFormatter, YAMLFormatter, ) - from ..consts import metadata_all_fields # TODO: avoid from ..support.pyout import PYOUT_SHORT_NAMES_rev @@ -364,11 +364,12 @@ def fn(): digest = "0" * 32 + "-1" else: lgr.info("Calculating digest for %s", path) - if path.endswith((".zarr", ".ZARR")): + if path.endswith(tuple(ZARR_EXTENSIONS)): digest = get_digest(path, digest="zarr-checksum") + rec = get_metadata(path, Digest.dandi_zarr(digest)) else: digest = get_digest(path, digest="dandi-etag") - rec = get_metadata(path, Digest.dandi_etag(digest)) + rec = get_metadata(path, Digest.dandi_etag(digest)) except Exception as exc: _add_exc_error(path, rec, errors, exc) if flatten: diff --git a/dandi/files/bids.py b/dandi/files/bids.py index f27485e21..cdca8f6ea 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -193,6 +193,7 @@ def get_metadata( start_time = end_time = datetime.now().astimezone() add_common_metadata(metadata, self.filepath, start_time, end_time, digest) metadata["path"] = self.path + print("7777777777777777777") return BareAsset(**metadata) def get_validation_bids_version(self) -> str: diff --git a/dandi/metadata.py b/dandi/metadata.py index e8948a4b4..1652f3799 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -143,7 +143,9 @@ def get_metadata( ) print("qqqqqqqqqqqqqqqqqqqqqqqqqqqqq") print(type(df)) + print("666666666666666666666666") a = df.get_metadata(digest=digest) + print("8888888888888888888888") meta["bids_version"] = df.get_validation_bids_version() for key in metadata_all_fields: try: From 0e0ff8e5d332c776fa4a64b6bf5798bcc2893ead Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 30 Nov 2022 19:59:28 -0500 Subject: [PATCH 14/37] More debugging print calls trying to track down digest error --- dandi/metadata.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dandi/metadata.py b/dandi/metadata.py index 1652f3799..fdd0a3e5c 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -980,7 +980,10 @@ def add_common_metadata( NWB assets and non-NWB assets """ if digest is not None: + print("$$$$$$$$$$$$$$$$$$$$$") + print(type(digest)) metadata["digest"] = digest.asdict() + print("èèèèèèèèèèèèèèèèèèèèè") else: metadata["digest"] = {} metadata["dateModified"] = get_utcnow_datetime() From cc28fbba22d4ba835fd87cb11442ee30855da33a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 30 Nov 2022 20:00:35 -0500 Subject: [PATCH 15/37] Removed unneeded import --- dandi/metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dandi/metadata.py b/dandi/metadata.py index fdd0a3e5c..2a806f54a 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -69,7 +69,6 @@ def get_metadata( dict """ # from .files import dandi_file, find_dandi_files - import os from .files import dandi_file, find_bids_dataset_description From 5d59ae802edb53baf5e86479e1aa7e3108148d44 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 30 Nov 2022 20:09:10 -0500 Subject: [PATCH 16/37] Using zarr extensions variable from constants module --- dandi/metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/metadata.py b/dandi/metadata.py index 2a806f54a..7131caa06 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -28,7 +28,7 @@ import tenacity from . import __version__, get_logger -from .consts import metadata_all_fields +from .consts import ZARR_EXTENSIONS, metadata_all_fields from .dandiset import Dandiset from .misctypes import Digest from .pynwb_utils import ( @@ -77,7 +77,7 @@ def get_metadata( path = os.path.abspath(str(path)) # for Path meta = dict() - if op.isdir(path) and not path.endswith((".zarr", ".ZARR")): + if op.isdir(path) and not path.endswith(tuple(ZARR_EXTENSIONS)): try: dandiset = Dandiset(path) return cast(dict, dandiset.metadata) From 5d62fcc56e5699ce349580329782759a4058764b Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 1 Dec 2022 05:29:50 -0500 Subject: [PATCH 17/37] Reinstated fake digest after introducing ZARR conditional --- dandi/cli/cmd_ls.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index c8a2cc2ca..4fbe5b6b0 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -360,16 +360,20 @@ def fn(): digest=Digest.dandi_etag(digest), ).json_dict() else: - if use_fake_digest: - digest = "0" * 32 + "-1" - else: - lgr.info("Calculating digest for %s", path) - if path.endswith(tuple(ZARR_EXTENSIONS)): + if path.endswith(tuple(ZARR_EXTENSIONS)): + if use_fake_digest: + digest = "0" * 32 + "-1" + else: + lgr.info("Calculating digest for %s", path) digest = get_digest(path, digest="zarr-checksum") - rec = get_metadata(path, Digest.dandi_zarr(digest)) + rec = get_metadata(path, Digest.dandi_zarr(digest)) + else: + if use_fake_digest: + digest = "0" * 32 + "-1" else: + lgr.info("Calculating digest for %s", path) digest = get_digest(path, digest="dandi-etag") - rec = get_metadata(path, Digest.dandi_etag(digest)) + rec = get_metadata(path, Digest.dandi_etag(digest)) except Exception as exc: _add_exc_error(path, rec, errors, exc) if flatten: From f0de1035ba50d68cc1b62211561348c4f621c0d3 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 1 Dec 2022 05:30:15 -0500 Subject: [PATCH 18/37] Attempting to fix digest type conflict (still not working) --- dandi/files/bids.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index cdca8f6ea..ba852d328 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -12,6 +12,7 @@ from .bases import GenericAsset, LocalFileAsset, NWBAsset from .zarr import ZarrAsset +from ..consts import ZARR_MIME_TYPE from ..metadata import add_common_metadata, prepare_metadata from ..misctypes import Digest from ..validate_types import ValidationResult @@ -244,6 +245,19 @@ def get_validation_errors( self, schema_version, devel_debug ) + BIDSAsset.get_validation_errors(self) + def get_metadata( + self, + digest: Optional[Digest] = None, + ignore_errors: bool = True, + ) -> BareAsset: + metadata = self.bids_dataset_description.get_asset_metadata(self) + start_time = end_time = datetime.now().astimezone() + add_common_metadata(metadata, self.filepath, start_time, end_time, digest) + metadata["path"] = self.path + metadata["encodingFormat"] = ZARR_MIME_TYPE + print("u5u5u5u5u5u5u5u") + return BareAsset(**metadata) + class GenericBIDSAsset(BIDSAsset, GenericAsset): """ From 86ae9c0c589d06106a85fb21aff61a4f2fdc9192 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 1 Dec 2022 14:05:21 -0500 Subject: [PATCH 19/37] Use Zarr checksum to set metadata contentSize --- dandi/metadata.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dandi/metadata.py b/dandi/metadata.py index 7131caa06..bf31fb6fc 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -992,6 +992,12 @@ def add_common_metadata( "mtime %s of %s is in the future", metadata["blobDateModified"], path ) metadata["contentSize"] = os.path.getsize(path) + if digest is not None and digest.algorithm is models.DigestType.dandi_zarr_checksum: + m = re.fullmatch( + r"(?P[0-9a-f]{32})-(?P[0-9]+)--(?P[0-9]+)", digest.value + ) + if m: + metadata["contentSize"] = int(m["size"]) metadata.setdefault("wasGeneratedBy", []).append( get_generator(start_time, end_time) ) From 9866d0c7881341c3689676b53562b9c1e604878c Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 2 Dec 2022 07:08:13 -0500 Subject: [PATCH 20/37] Removed debugging print calls --- dandi/cli/cmd_ls.py | 2 -- dandi/files/bids.py | 2 -- dandi/metadata.py | 11 ----------- 3 files changed, 15 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index 4fbe5b6b0..bd284a47d 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -340,8 +340,6 @@ def get_metadata_ls( def fn(): rec = {} # No need for calling get_metadata if no keys are needed from it - print("Debugging ls for the following path: ", path) - print(keys) if keys is None or list(keys) != ["nwb_version"]: try: if schema is not None: diff --git a/dandi/files/bids.py b/dandi/files/bids.py index ba852d328..19afc1a8c 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -194,7 +194,6 @@ def get_metadata( start_time = end_time = datetime.now().astimezone() add_common_metadata(metadata, self.filepath, start_time, end_time, digest) metadata["path"] = self.path - print("7777777777777777777") return BareAsset(**metadata) def get_validation_bids_version(self) -> str: @@ -255,7 +254,6 @@ def get_metadata( add_common_metadata(metadata, self.filepath, start_time, end_time, digest) metadata["path"] = self.path metadata["encodingFormat"] = ZARR_MIME_TYPE - print("u5u5u5u5u5u5u5u") return BareAsset(**metadata) diff --git a/dandi/metadata.py b/dandi/metadata.py index bf31fb6fc..8e11bd138 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -140,11 +140,7 @@ def get_metadata( dandiset_path, bids_dataset_description=bids_dataset_description, ) - print("qqqqqqqqqqqqqqqqqqqqqqqqqqqqq") - print(type(df)) - print("666666666666666666666666") a = df.get_metadata(digest=digest) - print("8888888888888888888888") meta["bids_version"] = df.get_validation_bids_version() for key in metadata_all_fields: try: @@ -153,10 +149,6 @@ def get_metadata( pass else: meta[key] = value - print("aaaaaaaaaaaaaaaaaaa") - print(meta) - print("bbbbbbbbbbbbbbbbbbb") - return meta @@ -979,10 +971,7 @@ def add_common_metadata( NWB assets and non-NWB assets """ if digest is not None: - print("$$$$$$$$$$$$$$$$$$$$$") - print(type(digest)) metadata["digest"] = digest.asdict() - print("èèèèèèèèèèèèèèèèèèèèè") else: metadata["digest"] = {} metadata["dateModified"] = get_utcnow_datetime() From 785dec0006335a9bd7caeac58f636fb2485ae6f1 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 2 Dec 2022 07:13:12 -0500 Subject: [PATCH 21/37] Not requiring bids_version for ValidatorOrigin since NWB objects will not necessarily have this --- dandi/metadata.py | 5 ++++- dandi/validate_types.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dandi/metadata.py b/dandi/metadata.py index 8e11bd138..fedde1e2b 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -85,7 +85,9 @@ def get_metadata( lgr.debug("Failed to get metadata for %s: %s", path, exc) return None - # Clunky test to determine whether this is NWB + # We assume that non-NWB data is BIDS. + # This is currently the case, and is slated to change only when we have NWB data which + # is *also* BIDS. if path.endswith(("NWB", "nwb")): if nwb_has_external_links(path): raise NotImplementedError( @@ -142,6 +144,7 @@ def get_metadata( ) a = df.get_metadata(digest=digest) meta["bids_version"] = df.get_validation_bids_version() + # there might be a more elegant way to do this: for key in metadata_all_fields: try: value = getattr(a.wasAttributedTo[0], key) diff --git a/dandi/validate_types.py b/dandi/validate_types.py index 20a95eeb0..ade146f38 100644 --- a/dandi/validate_types.py +++ b/dandi/validate_types.py @@ -8,7 +8,7 @@ class ValidationOrigin: name: str version: str - bids_version: str + bids_version: Optional[str] = None class Severity(Enum): From 2059dd796872917d404020a3d8360b7c7f98c39d Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 2 Dec 2022 12:08:37 -0500 Subject: [PATCH 22/37] Typing fixes and improved variable name --- dandi/files/bids.py | 3 ++- dandi/metadata.py | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 19afc1a8c..42b783cd1 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -83,7 +83,7 @@ def _validate(self) -> None: ds_root = self.filepath.parent readme_candidate = ds_root / Path("README" + ext) if readme_candidate.exists(): - bids_paths += [readme_candidate] + bids_paths += [str(readme_candidate)] # end of ad-hoc fix. results = validate_bids(*bids_paths) @@ -197,6 +197,7 @@ def get_metadata( return BareAsset(**metadata) def get_validation_bids_version(self) -> str: + assert self.bids_dataset_description._bids_version return self.bids_dataset_description._bids_version diff --git a/dandi/metadata.py b/dandi/metadata.py index fedde1e2b..bc2ee190a 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -136,18 +136,17 @@ def get_metadata( else: dandiset_path = find_parent_directory_containing("dandiset.yaml", path) bids_dataset_description = find_bids_dataset_description(path) - p = Path(path) df = dandi_file( - p, + Path(path), dandiset_path, bids_dataset_description=bids_dataset_description, ) - a = df.get_metadata(digest=digest) + path_metadata = df.get_metadata(digest=digest) meta["bids_version"] = df.get_validation_bids_version() # there might be a more elegant way to do this: for key in metadata_all_fields: try: - value = getattr(a.wasAttributedTo[0], key) + value = getattr(path_metadata.wasAttributedTo[0], key) except AttributeError: pass else: From a2119ec5f97b0731ab74b656f4eb2deb88024f84 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 2 Dec 2022 12:13:58 -0500 Subject: [PATCH 23/37] Attemting type check fix for missing attribute get_validation_bids_version --- dandi/metadata.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dandi/metadata.py b/dandi/metadata.py index bc2ee190a..de18fb140 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -70,7 +70,7 @@ def get_metadata( """ # from .files import dandi_file, find_dandi_files - from .files import dandi_file, find_bids_dataset_description + from .files import bids, dandi_file, find_bids_dataset_description # when we run in parallel, these annoying warnings appear ignore_benign_pynwb_warnings() @@ -142,6 +142,8 @@ def get_metadata( bids_dataset_description=bids_dataset_description, ) path_metadata = df.get_metadata(digest=digest) + # This seems like a really bad idea, but without it type checks fail: + assert type(df) in [bids.GenericBIDSAsset, bids.ZarrBIDSAsset] meta["bids_version"] = df.get_validation_bids_version() # there might be a more elegant way to do this: for key in metadata_all_fields: From 722e804553f760293ea3bb183836e2b9fe748578 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 2 Dec 2022 12:17:06 -0500 Subject: [PATCH 24/37] Removed commented imports --- dandi/metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dandi/metadata.py b/dandi/metadata.py index de18fb140..815255448 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -68,7 +68,6 @@ def get_metadata( ------- dict """ - # from .files import dandi_file, find_dandi_files from .files import bids, dandi_file, find_bids_dataset_description From 3649b5ec5b6c1502ea79872d2bd3120f064d1bc6 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 2 Dec 2022 12:25:55 -0500 Subject: [PATCH 25/37] Actually fix typing --- dandi/metadata.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dandi/metadata.py b/dandi/metadata.py index 815255448..282a39b02 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -141,8 +141,7 @@ def get_metadata( bids_dataset_description=bids_dataset_description, ) path_metadata = df.get_metadata(digest=digest) - # This seems like a really bad idea, but without it type checks fail: - assert type(df) in [bids.GenericBIDSAsset, bids.ZarrBIDSAsset] + assert isinstance(df, bids.BIDSAsset) meta["bids_version"] = df.get_validation_bids_version() # there might be a more elegant way to do this: for key in metadata_all_fields: From 32d0379324ee0863b8adbf2be53ee133d2b83f6d Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 2 Dec 2022 16:00:11 -0500 Subject: [PATCH 26/37] Debugging upload tests --- dandi/tests/test_upload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/tests/test_upload.py b/dandi/tests/test_upload.py index d8af7871d..07dd96eab 100644 --- a/dandi/tests/test_upload.py +++ b/dandi/tests/test_upload.py @@ -220,9 +220,9 @@ def test_upload_bids(mocker: MockerFixture, bids_dandiset: SampleDandiset) -> No # Check existence of assets: dandiset = bids_dandiset.dandiset # file we created? - dandiset.get_asset_by_path("CHANGES") + dandiset.get_asset_by_path("README") # BIDS descriptor file? - dandiset.get_asset_by_path("dataset_description.json") + #dandiset.get_asset_by_path("dataset_description.json") # actual data file? dandiset.get_asset_by_path("sub-Sub1/anat/sub-Sub1_T1w.nii.gz") From ba7d60cbb3a940689904dae8419014e2dd3c4641 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 5 Dec 2022 10:21:41 -0500 Subject: [PATCH 27/37] Reinstating test --- dandi/tests/test_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/test_upload.py b/dandi/tests/test_upload.py index 07dd96eab..c5d5de590 100644 --- a/dandi/tests/test_upload.py +++ b/dandi/tests/test_upload.py @@ -222,7 +222,7 @@ def test_upload_bids(mocker: MockerFixture, bids_dandiset: SampleDandiset) -> No # file we created? dandiset.get_asset_by_path("README") # BIDS descriptor file? - #dandiset.get_asset_by_path("dataset_description.json") + dandiset.get_asset_by_path("dataset_description.json") # actual data file? dandiset.get_asset_by_path("sub-Sub1/anat/sub-Sub1_T1w.nii.gz") From e8a64e1356fd8769d507dbe2e0ec15d95cc53a5b Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 5 Dec 2022 12:38:44 -0500 Subject: [PATCH 28/37] Associate BIDS asset errors with the actual assets --- dandi/files/bids.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 42b783cd1..e388bb420 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -95,7 +95,8 @@ def _validate(self) -> None: for result in results: if result.id in BIDS_ASSET_ERRORS: assert result.path - self._asset_errors[str(result.path)].append(result) + bids_path = result.path.relative_to(self.bids_root).as_posix() + self._asset_errors[bids_path].append(result) elif result.id in BIDS_DATASET_ERRORS: self._dataset_errors.append(result) elif result.id == "BIDS.MATCH": From ccda9eca801e181f729729002c018b068c2f44a4 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 5 Dec 2022 15:27:14 -0500 Subject: [PATCH 29/37] Fixed novel README validation error The ad-hoc fix for README listing when validating single files was adding it twice. big fail. --- dandi/files/bids.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index e388bb420..428e9a139 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -82,7 +82,10 @@ def _validate(self) -> None: for ext in readme_extensions: ds_root = self.filepath.parent readme_candidate = ds_root / Path("README" + ext) - if readme_candidate.exists(): + if ( + readme_candidate.exists() + and str(readme_candidate) not in bids_paths + ): bids_paths += [str(readme_candidate)] # end of ad-hoc fix. From 44a8ab4b4e5c8e59c50ec6e8013c464abbd883cc Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 5 Dec 2022 15:38:54 -0500 Subject: [PATCH 30/37] Checking ls output subject identifier Closes: https://github.com/dandi/dandi-cli/issues/1097 --- dandi/cli/tests/{test_ls.py => test_cmd_ls.py} | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) rename dandi/cli/tests/{test_ls.py => test_cmd_ls.py} (96%) diff --git a/dandi/cli/tests/test_ls.py b/dandi/cli/tests/test_cmd_ls.py similarity index 96% rename from dandi/cli/tests/test_ls.py rename to dandi/cli/tests/test_cmd_ls.py index da08bac92..b8f21e6f8 100644 --- a/dandi/cli/tests/test_ls.py +++ b/dandi/cli/tests/test_cmd_ls.py @@ -50,7 +50,6 @@ def load(s): @mark.skipif_no_network -@pytest.mark.xfail(reason="https://github.com/dandi/dandi-cli/issues/1097") def test_ls_bids_file(bids_examples): bids_file_path = "asl003/sub-Sub1/anat/sub-Sub1_T1w.nii.gz" bids_file_path = os.path.join(bids_examples, bids_file_path) @@ -58,7 +57,7 @@ def test_ls_bids_file(bids_examples): assert r.exit_code == 0, r.output data = yaml_load(r.stdout, "safe") assert len(data) == 1 - assert data[0]["subject_id"] == "Sub1" + assert data[0]["identifier"] == "Sub1" @mark.skipif_no_network From efab43ed5b2a8faed2003014fb23effdcfa92492 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 5 Dec 2022 15:44:45 -0500 Subject: [PATCH 31/37] Added ls command tests for zarrBIDS file Closes: https://github.com/dandi/dandi-cli/issues/1038 --- dandi/cli/tests/test_cmd_ls.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dandi/cli/tests/test_cmd_ls.py b/dandi/cli/tests/test_cmd_ls.py index b8f21e6f8..65dbed85c 100644 --- a/dandi/cli/tests/test_cmd_ls.py +++ b/dandi/cli/tests/test_cmd_ls.py @@ -60,6 +60,19 @@ def test_ls_bids_file(bids_examples): assert data[0]["identifier"] == "Sub1" +@mark.skipif_no_network +def test_ls_zarrbids_file(bids_examples): + bids_file_path = ( + "micr_SEMzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-A_SPIM.ome.zarr" + ) + bids_file_path = os.path.join(bids_examples, bids_file_path) + r = CliRunner().invoke(ls, ["-f", "yaml", bids_file_path]) + assert r.exit_code == 0, r.output + data = yaml_load(r.stdout, "safe") + assert len(data) == 1 + assert data[0]["identifier"] == "01" + + @mark.skipif_no_network def test_ls_dandiset_url(): r = CliRunner().invoke( From 1ed0f5fd4cf844745482bba04e5bd9b78368b707 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 6 Dec 2022 12:28:43 -0500 Subject: [PATCH 32/37] ZARR-appropriate fake checksum Co-authored-by: John T. Wodder II --- dandi/cli/cmd_ls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index bd284a47d..6d185d965 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -360,7 +360,7 @@ def fn(): else: if path.endswith(tuple(ZARR_EXTENSIONS)): if use_fake_digest: - digest = "0" * 32 + "-1" + digest = "0" * 32 + "-0--0" else: lgr.info("Calculating digest for %s", path) digest = get_digest(path, digest="zarr-checksum") From 99008412eac37e8f6b3b11c881d7b436528f5edf Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 6 Dec 2022 12:29:04 -0500 Subject: [PATCH 33/37] More compact code Co-authored-by: John T. Wodder II --- dandi/files/bids.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 428e9a139..59b25f731 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -80,8 +80,7 @@ def _validate(self) -> None: # if the file is present. readme_extensions = ["", ".md", ".rst", ".txt"] for ext in readme_extensions: - ds_root = self.filepath.parent - readme_candidate = ds_root / Path("README" + ext) + readme_candidate = self.bids_root / Path("README" + ext) if ( readme_candidate.exists() and str(readme_candidate) not in bids_paths From 3384ef5d14bb8ac88f39946c25f3aa6b66694ebe Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 6 Dec 2022 12:29:16 -0500 Subject: [PATCH 34/37] Dot in extension Co-authored-by: John T. Wodder II --- dandi/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/metadata.py b/dandi/metadata.py index 282a39b02..1d3956391 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -87,7 +87,7 @@ def get_metadata( # We assume that non-NWB data is BIDS. # This is currently the case, and is slated to change only when we have NWB data which # is *also* BIDS. - if path.endswith(("NWB", "nwb")): + if path.endswith((".NWB", ".nwb")): if nwb_has_external_links(path): raise NotImplementedError( f"NWB files with external links are not supported: {path}" From d9ccf8bc8e7b5bead5e142c9ae93356ba4fe0164 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 6 Dec 2022 13:19:48 -0500 Subject: [PATCH 35/37] Safer assert satatement Co-authored-by: John T. Wodder II --- dandi/files/bids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 59b25f731..67e49b5cc 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -200,7 +200,7 @@ def get_metadata( return BareAsset(**metadata) def get_validation_bids_version(self) -> str: - assert self.bids_dataset_description._bids_version + assert self.bids_dataset_description._bids_version is not None return self.bids_dataset_description._bids_version From 375947de21b88361670f39424f50e009cc146460 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 6 Dec 2022 15:40:15 -0500 Subject: [PATCH 36/37] Making sure the method which sets the attribute has been run --- dandi/files/bids.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 67e49b5cc..d2238c332 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -200,6 +200,7 @@ def get_metadata( return BareAsset(**metadata) def get_validation_bids_version(self) -> str: + self.bids_dataset_description._validate() assert self.bids_dataset_description._bids_version is not None return self.bids_dataset_description._bids_version From 11c18f5a329c745301c7510f0a697fcfa4c3a59a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 7 Dec 2022 16:23:03 -0500 Subject: [PATCH 37/37] Updated logic to better accommodate future BIDS NWB examples --- dandi/cli/tests/test_cmd_ls.py | 9 +++++++ dandi/metadata.py | 45 +++++++++++++++++----------------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/dandi/cli/tests/test_cmd_ls.py b/dandi/cli/tests/test_cmd_ls.py index 65dbed85c..13dbde12d 100644 --- a/dandi/cli/tests/test_cmd_ls.py +++ b/dandi/cli/tests/test_cmd_ls.py @@ -49,6 +49,15 @@ def load(s): assert metadata[f] == simple1_nwb_metadata[f] +def test_ls_nwb_file(simple2_nwb): + bids_file_path = "simple2.nwb" + bids_file_path = os.path.join(simple2_nwb, bids_file_path) + r = CliRunner().invoke(ls, ["-f", "yaml", bids_file_path]) + assert r.exit_code == 0, r.output + data = yaml_load(r.stdout, "safe") + assert len(data) == 1 + + @mark.skipif_no_network def test_ls_bids_file(bids_examples): bids_file_path = "asl003/sub-Sub1/anat/sub-Sub1_T1w.nii.gz" diff --git a/dandi/metadata.py b/dandi/metadata.py index 1d3956391..ab4c40e08 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -84,10 +84,28 @@ def get_metadata( lgr.debug("Failed to get metadata for %s: %s", path, exc) return None - # We assume that non-NWB data is BIDS. - # This is currently the case, and is slated to change only when we have NWB data which - # is *also* BIDS. - if path.endswith((".NWB", ".nwb")): + # Is the data BIDS (as defined by the presence of a BIDS dataset descriptor) + bids_dataset_description = find_bids_dataset_description(path) + if bids_dataset_description: + dandiset_path = find_parent_directory_containing("dandiset.yaml", path) + bids_dataset_description = find_bids_dataset_description(path) + df = dandi_file( + Path(path), + dandiset_path, + bids_dataset_description=bids_dataset_description, + ) + path_metadata = df.get_metadata(digest=digest) + assert isinstance(df, bids.BIDSAsset) + meta["bids_version"] = df.get_validation_bids_version() + # there might be a more elegant way to do this: + for key in metadata_all_fields: + try: + value = getattr(path_metadata.wasAttributedTo[0], key) + except AttributeError: + pass + else: + meta[key] = value + elif path.endswith((".NWB", ".nwb")): if nwb_has_external_links(path): raise NotImplementedError( f"NWB files with external links are not supported: {path}" @@ -133,24 +151,7 @@ def get_metadata( meta["nd_types"] = get_neurodata_types(path) else: - dandiset_path = find_parent_directory_containing("dandiset.yaml", path) - bids_dataset_description = find_bids_dataset_description(path) - df = dandi_file( - Path(path), - dandiset_path, - bids_dataset_description=bids_dataset_description, - ) - path_metadata = df.get_metadata(digest=digest) - assert isinstance(df, bids.BIDSAsset) - meta["bids_version"] = df.get_validation_bids_version() - # there might be a more elegant way to do this: - for key in metadata_all_fields: - try: - value = getattr(path_metadata.wasAttributedTo[0], key) - except AttributeError: - pass - else: - meta[key] = value + raise RuntimeError("Unable to get metadata from non-BIDS, non-NWB asset.") return meta