diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 56dc9f642..485e78a33 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -10,7 +10,7 @@ from pathlib import Path import re from threading import Lock -from typing import Any, BinaryIO, Generic, Optional +from typing import Any, BinaryIO, Generic, Optional, cast from xml.etree.ElementTree import fromstring import dandischema @@ -28,7 +28,7 @@ import dandi from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient from dandi.metadata import get_default_metadata, nwb2asset -from dandi.misctypes import DUMMY_DIGEST, Digest, P +from dandi.misctypes import DUMMY_DANDI_ETAG, Digest, P from dandi.organize import validate_organized_path from dandi.pynwb_utils import validate as pynwb_validate from dandi.support.digests import get_dandietag, get_digest @@ -130,7 +130,9 @@ def get_validation_errors( except Exception as e: if devel_debug: raise - return _pydantic_errors_to_validation_results(e, str(self.filepath)) + return _pydantic_errors_to_validation_results( + [e], self.filepath, scope=Scope.DANDISET + ) return [] @@ -145,6 +147,8 @@ class LocalAsset(DandiFile): #: (i.e., relative to the Dandiset's root) path: str + _DUMMY_DIGEST = DUMMY_DANDI_ETAG + @abstractmethod def get_digest(self) -> Digest: """ @@ -168,64 +172,48 @@ def get_validation_errors( schema_version: Optional[str] = None, devel_debug: bool = False, ) -> list[ValidationResult]: - if schema_version is not None: - current_version = get_schema_version() - if schema_version != current_version: - raise ValueError( - f"Unsupported schema version: {schema_version}; expected {current_version}" - ) - try: - asset = self.get_metadata(digest=DUMMY_DIGEST) - BareAsset(**asset.dict()) - except ValidationError as e: - if devel_debug: - raise - # TODO: how do we get **all** errors from validation - there must be a way - return [ - ValidationResult( - origin=ValidationOrigin( - name="dandischema", - version=dandischema.__version__, - ), - severity=Severity.ERROR, - id="dandischema.TODO", - scope=Scope.FILE, - # metadata=metadata, - path=self.filepath, # note that it is not relative .path - message=str(e), - # TODO? dataset_path=dataset_path, - dandiset_path=self.dandiset_path, - ) - ] - except Exception as e: - if devel_debug: - raise - lgr.warning( - "Unexpected validation error for %s: %s", - self.filepath, - e, - extra={"validating": True}, + current_version = get_schema_version() + if schema_version is None: + schema_version = current_version + if schema_version != current_version: + raise ValueError( + f"Unsupported schema version: {schema_version}; expected {current_version}" + ) + try: + asset = self.get_metadata(digest=self._DUMMY_DIGEST) + BareAsset(**asset.dict()) + except ValidationError as e: + if devel_debug: + raise + return _pydantic_errors_to_validation_results( + e, self.filepath, scope=Scope.FILE + ) + except Exception as e: + if devel_debug: + raise + lgr.warning( + "Unexpected validation error for %s: %s", + self.filepath, + e, + extra={"validating": True}, + ) + return [ + ValidationResult( + origin=ValidationOrigin( + name="dandi", + version=dandi.__version__, + ), + severity=Severity.ERROR, + id="dandi.SOFTWARE_ERROR", + scope=Scope.FILE, + # metadata=metadata, + path=self.filepath, # note that it is not relative .path + message=f"Failed to read metadata: {e}", + # TODO? dataset_path=dataset_path, + dandiset_path=self.dandiset_path, ) - return [ - ValidationResult( - origin=ValidationOrigin( - name="dandi", - version=dandi.__version__, - ), - severity=Severity.ERROR, - id="dandi.SOFTWARE_ERROR", - scope=Scope.FILE, - # metadata=metadata, - path=self.filepath, # note that it is not relative .path - message=f"Failed to read metadata: {e}", - # TODO? dataset_path=dataset_path, - dandiset_path=self.dandiset_path, - ) - ] - return [] - else: - # TODO: Do something else? - return [] + ] + return [] def upload( self, @@ -488,6 +476,12 @@ def get_validation_errors( schema_version: Optional[str] = None, devel_debug: bool = False, ) -> list[ValidationResult]: + """Validate NWB asset + + If ``schema_version`` was provided, we only validate basic metadata, + and completely skip validation using nwbinspector.inspect_nwb + + """ errors: list[ValidationResult] = pynwb_validate( self.filepath, devel_debug=devel_debug ) @@ -540,7 +534,9 @@ def get_validation_errors( if devel_debug: raise # TODO: might reraise instead of making it into an error - return _pydantic_errors_to_validation_results([e], str(self.filepath)) + return _pydantic_errors_to_validation_results( + [e], self.filepath, scope=Scope.FILE + ) from .bids import NWBBIDSAsset @@ -743,12 +739,15 @@ def _get_nwb_inspector_version(): def _pydantic_errors_to_validation_results( - errors: Any[list[dict], Exception], - file_path: str, + errors: list[dict | Exception] | ValidationError, + file_path: Path, + scope: Scope, ) -> list[ValidationResult]: """Convert list of dict from pydantic into our custom object.""" out = [] - for e in errors: + for e in ( + errors.errors() if isinstance(errors, ValidationError) else cast(list, errors) + ): if isinstance(e, Exception): message = getattr(e, "message", str(e)) id = "exception" @@ -764,8 +763,7 @@ def _pydantic_errors_to_validation_results( ), ) ) - message = e.get("message", None) - scope = Scope.DANDISET + message = e.get("message", e.get("msg", None)) out.append( ValidationResult( origin=ValidationOrigin( @@ -775,7 +773,7 @@ def _pydantic_errors_to_validation_results( severity=Severity.ERROR, id=id, scope=scope, - path=Path(file_path), + path=file_path, message=message, # TODO? dataset_path=dataset_path, # TODO? dandiset_path=dandiset_path, diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index caee97bc8..67150b9e7 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -31,7 +31,7 @@ RESTFullAPIClient, ) from dandi.metadata import get_default_metadata -from dandi.misctypes import BasePath, Digest +from dandi.misctypes import DUMMY_DANDI_ZARR_CHECKSUM, BasePath, Digest from dandi.support.digests import get_digest, get_zarr_checksum, md5file_nocache from dandi.utils import chunked, exclude_from_zarr, pluralize @@ -133,6 +133,8 @@ class ZarrStat: class ZarrAsset(LocalDirectoryAsset[LocalZarrEntry]): """Representation of a local Zarr directory""" + _DUMMY_DIGEST = DUMMY_DANDI_ZARR_CHECKSUM + @property def filetree(self) -> LocalZarrEntry: """ @@ -188,12 +190,13 @@ def get_validation_errors( schema_version: Optional[str] = None, devel_debug: bool = False, ) -> list[ValidationResult]: + errors: list[ValidationResult] = [] try: - data = zarr.open(self.filepath) + data = zarr.open(str(self.filepath)) except Exception: if devel_debug: raise - return [ + errors.append( ValidationResult( origin=ValidationOrigin( name="zarr", @@ -205,9 +208,10 @@ def get_validation_errors( path=self.filepath, message="Error opening file.", ) - ] + ) + data = None if isinstance(data, zarr.Group) and not data: - return [ + errors.append( ValidationResult( origin=ValidationOrigin( name="zarr", @@ -219,12 +223,12 @@ def get_validation_errors( path=self.filepath, message="Zarr group is empty.", ) - ] + ) if self._is_too_deep(): msg = f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep" if devel_debug: raise ValueError(msg) - return [ + errors.append( ValidationResult( origin=ValidationOrigin( name="zarr", @@ -236,9 +240,8 @@ def get_validation_errors( path=self.filepath, message=msg, ) - ] - # TODO: Should this be appended to the above errors? - return super().get_validation_errors( + ) + return errors + super().get_validation_errors( schema_version=schema_version, devel_debug=devel_debug ) diff --git a/dandi/misctypes.py b/dandi/misctypes.py index fc7610969..4a3341a49 100644 --- a/dandi/misctypes.py +++ b/dandi/misctypes.py @@ -50,7 +50,11 @@ def asdict(self) -> Dict[DigestType, str]: #: Placeholder digest used in some situations where a digest is required but #: not actually relevant and would be too expensive to calculate -DUMMY_DIGEST = Digest(algorithm=DigestType.dandi_etag, value=32 * "d" + "-1") +DUMMY_DANDI_ETAG = Digest(algorithm=DigestType.dandi_etag, value=32 * "d" + "-1") +DUMMY_DANDI_ZARR_CHECKSUM = Digest( + algorithm=DigestType.dandi_zarr_checksum, + value=32 * "d" + "-1--1", +) P = TypeVar("P", bound="BasePath") diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index ca92f544e..76ad3cc93 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -346,7 +346,6 @@ def validate( else: # Fallback if an older version with pynwb.NWBHDF5IO(path=path, mode="r", load_namespaces=True) as reader: error_outputs = pynwb.validate(io=reader) - # TODO: return ValidationResult structs for error_output in error_outputs: errors.append( ValidationResult( @@ -364,20 +363,18 @@ def validate( except Exception as exc: if devel_debug: raise - errors.extend( - [ - ValidationResult( - origin=ValidationOrigin( - name="pynwb", - version=pynwb.__version__, - ), - severity=Severity.ERROR, - id="pywnb.GENERIC", - scope=Scope.FILE, - path=Path(path), - message=f"{exc}", - ) - ] + errors.append( + ValidationResult( + origin=ValidationOrigin( + name="pynwb", + version=pynwb.__version__, + ), + severity=Severity.ERROR, + id="pywnb.GENERIC", + scope=Scope.FILE, + path=Path(path), + message=f"{exc}", + ) ) # To overcome