Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BF+RF validation #1209

Merged
merged 11 commits into from
Feb 9, 2023
Merged
132 changes: 65 additions & 67 deletions dandi/files/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pathlib import Path
import re
from threading import Lock
from typing import Any, BinaryIO, Generic, Optional
from typing import Any, BinaryIO, Generic, Optional, cast
from xml.etree.ElementTree import fromstring

import dandischema
Expand All @@ -28,7 +28,7 @@
import dandi
from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient
from dandi.metadata import get_default_metadata, nwb2asset
from dandi.misctypes import DUMMY_DIGEST, Digest, P
from dandi.misctypes import DUMMY_DANDI_ETAG, Digest, P
from dandi.organize import validate_organized_path
from dandi.pynwb_utils import validate as pynwb_validate
from dandi.support.digests import get_dandietag, get_digest
Expand Down Expand Up @@ -130,7 +130,9 @@ def get_validation_errors(
except Exception as e:
if devel_debug:
raise
return _pydantic_errors_to_validation_results(e, str(self.filepath))
return _pydantic_errors_to_validation_results(
[e], self.filepath, scope=Scope.DANDISET
)
return []


Expand All @@ -145,6 +147,8 @@ class LocalAsset(DandiFile):
#: (i.e., relative to the Dandiset's root)
path: str

_DUMMY_DIGEST = DUMMY_DANDI_ETAG

@abstractmethod
def get_digest(self) -> Digest:
"""
Expand All @@ -168,64 +172,48 @@ def get_validation_errors(
schema_version: Optional[str] = None,
devel_debug: bool = False,
) -> list[ValidationResult]:
if schema_version is not None:
current_version = get_schema_version()
if schema_version != current_version:
raise ValueError(
f"Unsupported schema version: {schema_version}; expected {current_version}"
)
try:
asset = self.get_metadata(digest=DUMMY_DIGEST)
BareAsset(**asset.dict())
except ValidationError as e:
if devel_debug:
raise
# TODO: how do we get **all** errors from validation - there must be a way
return [
ValidationResult(
origin=ValidationOrigin(
name="dandischema",
version=dandischema.__version__,
),
severity=Severity.ERROR,
id="dandischema.TODO",
scope=Scope.FILE,
# metadata=metadata,
path=self.filepath, # note that it is not relative .path
message=str(e),
# TODO? dataset_path=dataset_path,
dandiset_path=self.dandiset_path,
)
]
except Exception as e:
if devel_debug:
raise
lgr.warning(
"Unexpected validation error for %s: %s",
self.filepath,
e,
extra={"validating": True},
current_version = get_schema_version()
if schema_version is None:
schema_version = current_version
if schema_version != current_version:
raise ValueError(
f"Unsupported schema version: {schema_version}; expected {current_version}"
)
try:
asset = self.get_metadata(digest=self._DUMMY_DIGEST)
BareAsset(**asset.dict())
except ValidationError as e:
if devel_debug:
raise
return _pydantic_errors_to_validation_results(
e, self.filepath, scope=Scope.FILE
)
except Exception as e:
if devel_debug:
raise
lgr.warning(
"Unexpected validation error for %s: %s",
self.filepath,
e,
extra={"validating": True},
)
return [
ValidationResult(
origin=ValidationOrigin(
name="dandi",
version=dandi.__version__,
),
severity=Severity.ERROR,
id="dandi.SOFTWARE_ERROR",
scope=Scope.FILE,
# metadata=metadata,
path=self.filepath, # note that it is not relative .path
message=f"Failed to read metadata: {e}",
# TODO? dataset_path=dataset_path,
dandiset_path=self.dandiset_path,
)
return [
ValidationResult(
origin=ValidationOrigin(
name="dandi",
version=dandi.__version__,
),
severity=Severity.ERROR,
id="dandi.SOFTWARE_ERROR",
scope=Scope.FILE,
# metadata=metadata,
path=self.filepath, # note that it is not relative .path
message=f"Failed to read metadata: {e}",
# TODO? dataset_path=dataset_path,
dandiset_path=self.dandiset_path,
)
]
return []
else:
# TODO: Do something else?
return []
]
return []

def upload(
self,
Expand Down Expand Up @@ -488,6 +476,12 @@ def get_validation_errors(
schema_version: Optional[str] = None,
devel_debug: bool = False,
) -> list[ValidationResult]:
"""Validate NWB asset

If ``schema_version`` was provided, we only validate basic metadata,
and completely skip validation using nwbinspector.inspect_nwb

"""
errors: list[ValidationResult] = pynwb_validate(
self.filepath, devel_debug=devel_debug
)
Expand Down Expand Up @@ -540,7 +534,9 @@ def get_validation_errors(
if devel_debug:
raise
# TODO: might reraise instead of making it into an error
return _pydantic_errors_to_validation_results([e], str(self.filepath))
return _pydantic_errors_to_validation_results(
[e], self.filepath, scope=Scope.FILE
)

from .bids import NWBBIDSAsset

Expand Down Expand Up @@ -743,12 +739,15 @@ def _get_nwb_inspector_version():


def _pydantic_errors_to_validation_results(
errors: Any[list[dict], Exception],
file_path: str,
errors: list[dict | Exception] | ValidationError,
file_path: Path,
scope: Scope,
) -> list[ValidationResult]:
"""Convert list of dict from pydantic into our custom object."""
out = []
for e in errors:
for e in (
errors.errors() if isinstance(errors, ValidationError) else cast(list, errors)
):
if isinstance(e, Exception):
message = getattr(e, "message", str(e))
id = "exception"
Expand All @@ -764,8 +763,7 @@ def _pydantic_errors_to_validation_results(
),
)
)
message = e.get("message", None)
scope = Scope.DANDISET
message = e.get("message", e.get("msg", None))
out.append(
ValidationResult(
origin=ValidationOrigin(
Expand All @@ -775,7 +773,7 @@ def _pydantic_errors_to_validation_results(
severity=Severity.ERROR,
id=id,
scope=scope,
path=Path(file_path),
path=file_path,
message=message,
# TODO? dataset_path=dataset_path,
# TODO? dandiset_path=dandiset_path,
Expand Down
23 changes: 13 additions & 10 deletions dandi/files/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
RESTFullAPIClient,
)
from dandi.metadata import get_default_metadata
from dandi.misctypes import BasePath, Digest
from dandi.misctypes import DUMMY_DANDI_ZARR_CHECKSUM, BasePath, Digest
from dandi.support.digests import get_digest, get_zarr_checksum, md5file_nocache
from dandi.utils import chunked, exclude_from_zarr, pluralize

Expand Down Expand Up @@ -133,6 +133,8 @@ class ZarrStat:
class ZarrAsset(LocalDirectoryAsset[LocalZarrEntry]):
"""Representation of a local Zarr directory"""

_DUMMY_DIGEST = DUMMY_DANDI_ZARR_CHECKSUM

@property
def filetree(self) -> LocalZarrEntry:
"""
Expand Down Expand Up @@ -188,12 +190,13 @@ def get_validation_errors(
schema_version: Optional[str] = None,
devel_debug: bool = False,
) -> list[ValidationResult]:
errors: list[ValidationResult] = []
try:
data = zarr.open(self.filepath)
data = zarr.open(str(self.filepath))
except Exception:
if devel_debug:
raise
return [
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="zarr",
Expand All @@ -205,9 +208,10 @@ def get_validation_errors(
path=self.filepath,
message="Error opening file.",
)
]
)
data = None
if isinstance(data, zarr.Group) and not data:
return [
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="zarr",
Expand All @@ -219,12 +223,12 @@ def get_validation_errors(
path=self.filepath,
message="Zarr group is empty.",
)
]
)
if self._is_too_deep():
msg = f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep"
if devel_debug:
raise ValueError(msg)
return [
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="zarr",
Expand All @@ -236,9 +240,8 @@ def get_validation_errors(
path=self.filepath,
message=msg,
)
]
# TODO: Should this be appended to the above errors?
return super().get_validation_errors(
)
return errors + super().get_validation_errors(
schema_version=schema_version, devel_debug=devel_debug
)

Expand Down
6 changes: 5 additions & 1 deletion dandi/misctypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ def asdict(self) -> Dict[DigestType, str]:

#: Placeholder digest used in some situations where a digest is required but
#: not actually relevant and would be too expensive to calculate
DUMMY_DIGEST = Digest(algorithm=DigestType.dandi_etag, value=32 * "d" + "-1")
DUMMY_DANDI_ETAG = Digest(algorithm=DigestType.dandi_etag, value=32 * "d" + "-1")
DUMMY_DANDI_ZARR_CHECKSUM = Digest(
algorithm=DigestType.dandi_zarr_checksum,
value=32 * "d" + "-1--1",
)

P = TypeVar("P", bound="BasePath")

Expand Down
27 changes: 12 additions & 15 deletions dandi/pynwb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,6 @@ def validate(
else: # Fallback if an older version
with pynwb.NWBHDF5IO(path=path, mode="r", load_namespaces=True) as reader:
error_outputs = pynwb.validate(io=reader)
# TODO: return ValidationResult structs
for error_output in error_outputs:
errors.append(
ValidationResult(
Expand All @@ -364,20 +363,18 @@ def validate(
except Exception as exc:
if devel_debug:
raise
errors.extend(
[
ValidationResult(
origin=ValidationOrigin(
name="pynwb",
version=pynwb.__version__,
),
severity=Severity.ERROR,
id="pywnb.GENERIC",
scope=Scope.FILE,
path=Path(path),
message=f"{exc}",
)
]
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="pynwb",
version=pynwb.__version__,
),
severity=Severity.ERROR,
id="pywnb.GENERIC",
scope=Scope.FILE,
path=Path(path),
message=f"{exc}",
)
)

# To overcome
Expand Down