Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BF+RF validation #1209

Merged
merged 11 commits into from
Feb 9, 2023
Merged
128 changes: 63 additions & 65 deletions dandi/files/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pathlib import Path
import re
from threading import Lock
from typing import Any, BinaryIO, Generic, Optional
from typing import Any, BinaryIO, Generic, Optional, cast
from xml.etree.ElementTree import fromstring

import dandischema
Expand All @@ -28,7 +28,7 @@
import dandi
from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient
from dandi.metadata import get_default_metadata, nwb2asset
from dandi.misctypes import DUMMY_DIGEST, Digest, P
from dandi.misctypes import DUMMY_DANDI_ETAG, Digest, P
from dandi.organize import validate_organized_path
from dandi.pynwb_utils import validate as pynwb_validate
from dandi.support.digests import get_dandietag, get_digest
Expand Down Expand Up @@ -130,7 +130,9 @@ def get_validation_errors(
except Exception as e:
if devel_debug:
raise
return _pydantic_errors_to_validation_results(e, str(self.filepath))
return _pydantic_errors_to_validation_results(
[e], str(self.filepath), scope=Scope.DANDISET
)
return []


Expand All @@ -145,6 +147,8 @@ class LocalAsset(DandiFile):
#: (i.e., relative to the Dandiset's root)
path: str

_DUMMY_DIGEST = DUMMY_DANDI_ETAG

@abstractmethod
def get_digest(self) -> Digest:
"""
Expand All @@ -168,64 +172,48 @@ def get_validation_errors(
schema_version: Optional[str] = None,
devel_debug: bool = False,
) -> list[ValidationResult]:
if schema_version is not None:
current_version = get_schema_version()
if schema_version != current_version:
raise ValueError(
f"Unsupported schema version: {schema_version}; expected {current_version}"
)
try:
asset = self.get_metadata(digest=DUMMY_DIGEST)
BareAsset(**asset.dict())
except ValidationError as e:
if devel_debug:
raise
# TODO: how do we get **all** errors from validation - there must be a way
return [
ValidationResult(
origin=ValidationOrigin(
name="dandischema",
version=dandischema.__version__,
),
severity=Severity.ERROR,
id="dandischema.TODO",
scope=Scope.FILE,
# metadata=metadata,
path=self.filepath, # note that it is not relative .path
message=str(e),
# TODO? dataset_path=dataset_path,
dandiset_path=self.dandiset_path,
)
]
except Exception as e:
if devel_debug:
raise
lgr.warning(
"Unexpected validation error for %s: %s",
self.filepath,
e,
extra={"validating": True},
current_version = get_schema_version()
if schema_version is None:
schema_version = current_version
if schema_version != current_version:
raise ValueError(
f"Unsupported schema version: {schema_version}; expected {current_version}"
)
try:
asset = self.get_metadata(digest=self._DUMMY_DIGEST)
BareAsset(**asset.dict())
except ValidationError as e:
if devel_debug:
raise
return _pydantic_errors_to_validation_results(
e, str(self.filepath), scope=Scope.FILE
)
except Exception as e:
if devel_debug:
raise
lgr.warning(
"Unexpected validation error for %s: %s",
self.filepath,
e,
extra={"validating": True},
)
return [
ValidationResult(
origin=ValidationOrigin(
name="dandi",
version=dandi.__version__,
),
severity=Severity.ERROR,
id="dandi.SOFTWARE_ERROR",
scope=Scope.FILE,
# metadata=metadata,
path=self.filepath, # note that it is not relative .path
message=f"Failed to read metadata: {e}",
# TODO? dataset_path=dataset_path,
dandiset_path=self.dandiset_path,
)
return [
ValidationResult(
origin=ValidationOrigin(
name="dandi",
version=dandi.__version__,
),
severity=Severity.ERROR,
id="dandi.SOFTWARE_ERROR",
scope=Scope.FILE,
# metadata=metadata,
path=self.filepath, # note that it is not relative .path
message=f"Failed to read metadata: {e}",
# TODO? dataset_path=dataset_path,
dandiset_path=self.dandiset_path,
)
]
return []
else:
# TODO: Do something else?
return []
]
return []

def upload(
self,
Expand Down Expand Up @@ -488,6 +476,12 @@ def get_validation_errors(
schema_version: Optional[str] = None,
devel_debug: bool = False,
) -> list[ValidationResult]:
"""Validate NWB asset

If `schema_version` was provided, we only validate basic metadata,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
If `schema_version` was provided, we only validate basic metadata,
If ``schema_version`` was provided, we only validate basic metadata,

One backtick for things that can be linked to (like classes and functions), two backticks for other code.

and completely skip validation using nwbinspector.inspect_nwb

"""
errors: list[ValidationResult] = pynwb_validate(
self.filepath, devel_debug=devel_debug
)
Expand Down Expand Up @@ -540,7 +534,9 @@ def get_validation_errors(
if devel_debug:
raise
# TODO: might reraise instead of making it into an error
return _pydantic_errors_to_validation_results([e], str(self.filepath))
return _pydantic_errors_to_validation_results(
[e], str(self.filepath), scope=Scope.FILE
)

from .bids import NWBBIDSAsset

Expand Down Expand Up @@ -743,12 +739,15 @@ def _get_nwb_inspector_version():


def _pydantic_errors_to_validation_results(
errors: Any[list[dict], Exception],
errors: list[dict | Exception] | ValidationError,
file_path: str,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the only use of file_path in this function converts it to a Path, it would be better to make this argument a Path and remove the str() call when passing it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed! thanks, done in b6f8180

scope: Scope,
) -> list[ValidationResult]:
"""Convert list of dict from pydantic into our custom object."""
out = []
for e in errors:
for e in (
errors.errors() if isinstance(errors, ValidationError) else cast(list, errors)
):
if isinstance(e, Exception):
message = getattr(e, "message", str(e))
id = "exception"
Expand All @@ -764,8 +763,7 @@ def _pydantic_errors_to_validation_results(
),
)
)
message = e.get("message", None)
scope = Scope.DANDISET
message = e.get("message", e.get("msg", None))
out.append(
ValidationResult(
origin=ValidationOrigin(
Expand Down
23 changes: 13 additions & 10 deletions dandi/files/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
RESTFullAPIClient,
)
from dandi.metadata import get_default_metadata
from dandi.misctypes import BasePath, Digest
from dandi.misctypes import DUMMY_DANDI_ZARR_CHECKSUM, BasePath, Digest
from dandi.support.digests import get_digest, get_zarr_checksum, md5file_nocache
from dandi.utils import chunked, exclude_from_zarr, pluralize

Expand Down Expand Up @@ -133,6 +133,8 @@ class ZarrStat:
class ZarrAsset(LocalDirectoryAsset[LocalZarrEntry]):
"""Representation of a local Zarr directory"""

_DUMMY_DIGEST = DUMMY_DANDI_ZARR_CHECKSUM

@property
def filetree(self) -> LocalZarrEntry:
"""
Expand Down Expand Up @@ -188,12 +190,13 @@ def get_validation_errors(
schema_version: Optional[str] = None,
devel_debug: bool = False,
) -> list[ValidationResult]:
errors: list[ValidationResult] = []
try:
data = zarr.open(self.filepath)
data = zarr.open(str(self.filepath))
except Exception:
if devel_debug:
raise
return [
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="zarr",
Expand All @@ -205,9 +208,10 @@ def get_validation_errors(
path=self.filepath,
message="Error opening file.",
)
]
)
data = None
if isinstance(data, zarr.Group) and not data:
return [
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="zarr",
Expand All @@ -219,12 +223,12 @@ def get_validation_errors(
path=self.filepath,
message="Zarr group is empty.",
)
]
)
if self._is_too_deep():
msg = f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep"
if devel_debug:
raise ValueError(msg)
return [
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="zarr",
Expand All @@ -236,9 +240,8 @@ def get_validation_errors(
path=self.filepath,
message=msg,
)
]
# TODO: Should this be appended to the above errors?
return super().get_validation_errors(
)
return errors + super().get_validation_errors(
schema_version=schema_version, devel_debug=devel_debug
)

Expand Down
6 changes: 5 additions & 1 deletion dandi/misctypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ def asdict(self) -> Dict[DigestType, str]:

#: Placeholder digest used in some situations where a digest is required but
#: not actually relevant and would be too expensive to calculate
DUMMY_DIGEST = Digest(algorithm=DigestType.dandi_etag, value=32 * "d" + "-1")
DUMMY_DANDI_ETAG = Digest(algorithm=DigestType.dandi_etag, value=32 * "d" + "-1")
DUMMY_DANDI_ZARR_CHECKSUM = Digest(
algorithm=DigestType.dandi_zarr_checksum,
value="2ed39fd5ae56fd4177c4eb503d163528-1--1",
)

P = TypeVar("P", bound="BasePath")

Expand Down
26 changes: 12 additions & 14 deletions dandi/pynwb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,20 +364,18 @@ def validate(
except Exception as exc:
if devel_debug:
raise
errors.extend(
[
ValidationResult(
origin=ValidationOrigin(
name="pynwb",
version=pynwb.__version__,
),
severity=Severity.ERROR,
id="pywnb.GENERIC",
scope=Scope.FILE,
path=Path(path),
message=f"{exc}",
)
]
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="pynwb",
version=pynwb.__version__,
),
severity=Severity.ERROR,
id="pywnb.GENERIC",
scope=Scope.FILE,
path=Path(path),
message=f"{exc}",
)
)

# To overcome
Expand Down