Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update for dandischema 0.8.0 #1241

Merged
merged 2 commits into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
from .support.digests import get_digest, get_zarr_checksum
from .support.iterators import IteratorWithAggregation
from .support.pyout import naturalsize
from .support.typing import Literal
from .support.typing import Literal, Protocol
from .utils import (
abbrev_prompt,
ensure_datetime,
Expand Down Expand Up @@ -520,6 +520,14 @@ def _populate_dandiset_yaml(
}


class Hasher(Protocol):
def update(self, data: bytes) -> None:
...

Check notice

Code scanning / CodeQL

Statement has no effect

This statement has no effect.

def hexdigest(self) -> str:
...

Check notice

Code scanning / CodeQL

Statement has no effect

This statement has no effect.


def _download_file(
downloader: Callable[[int], Iterator[bytes]],
path: Path,
Expand Down Expand Up @@ -646,20 +654,26 @@ def _download_file(

yield {"status": "downloading"}

algo, digester, digest, downloaded_digest = None, None, None, None
algo: Optional[str] = None
digester: Optional[Callable[[], Hasher]] = None
digest: Optional[str] = None
downloaded_digest: Optional[Hasher] = None
if digests:
# choose first available for now.
# TODO: reuse that sorting based on speed
for algo, digest in digests.items():
if algo == "dandi-etag":
if algo == "dandi-etag" and size is not None:
from dandischema.digests.dandietag import ETagHashlike

digester = lambda: ETagHashlike(size) # noqa: E731
# Instantiate outside the lambda so that mypy is assured that
# `size` is not None:
hasher = ETagHashlike(size)
digester = lambda: hasher # noqa: E731
else:
digester = getattr(hashlib, algo, None)
if digester:
if digester is not None:
break
if not digester:
if digester is None:
lgr.warning("Found no digests in hashlib for any of %s", str(digests))

# TODO: how do we discover the total size????
Expand Down Expand Up @@ -725,12 +739,12 @@ def _download_file(

if downloaded_digest and not resuming:
assert downloaded_digest is not None
downloaded_digest = downloaded_digest.hexdigest() # we care only about hex
final_digest = downloaded_digest.hexdigest() # we care only about hex
if digest_callback is not None:
assert isinstance(algo, str)
digest_callback(algo, downloaded_digest)
if digest != downloaded_digest:
msg = f"{algo}: downloaded {downloaded_digest} != {digest}"
digest_callback(algo, final_digest)
if digest != final_digest:
msg = f"{algo}: downloaded {final_digest} != {digest}"
yield {"checksum": "differs", "status": "error", "message": msg}
lgr.debug("%s is different: %s.", path, msg)
return
Expand Down
4 changes: 3 additions & 1 deletion dandi/files/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ def _validate(self) -> None:
self._asset_errors: dict[str, list[ValidationResult]] = defaultdict(
list
)
self._asset_metadata = defaultdict(BareAsset.unvalidated)
# Don't apply eta-reduction to the lambda, as mypy needs to be
# assured that defaultdict's argument takes no parameters.
self._asset_metadata = defaultdict(lambda: BareAsset.unvalidated())
for result in results:
if result.id in BIDS_ASSET_ERRORS:
assert result.path
Expand Down
32 changes: 17 additions & 15 deletions dandi/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@
Type,
TypeVar,
Union,
cast,
)
from uuid import uuid4
from xml.dom.minidom import parseString

from dandischema import models
from pydantic import ByteSize, parse_obj_as
import requests
import tenacity

from . import __version__, get_logger
from .consts import metadata_all_fields
from .misctypes import Digest, LocalReadableFile, Readable
from .misctypes import DUMMY_DANDI_ETAG, Digest, LocalReadableFile, Readable
from .pynwb_utils import (
_get_pynwb_metadata,
get_neurodata_types,
Expand Down Expand Up @@ -90,20 +90,21 @@ def get_metadata(
dandiset_path,
bids_dataset_description=bids_dataset_description,
)
assert isinstance(df, bids.BIDSAsset)
if not digest:
_digest = "0" * 32 + "-1"
digest = Digest.dandi_etag(_digest)
digest = DUMMY_DANDI_ETAG
path_metadata = df.get_metadata(digest=digest)
assert isinstance(df, bids.BIDSAsset)
meta["bids_version"] = df.get_validation_bids_version()
# there might be a more elegant way to do this:
for key in metadata_all_fields:
try:
value = getattr(path_metadata.wasAttributedTo[0], key)
except AttributeError:
pass
else:
meta[key] = value
if path_metadata.wasAttributedTo is not None:
attributed = path_metadata.wasAttributedTo[0]
for key in metadata_all_fields:
try:
value = getattr(attributed, key)
except AttributeError:
pass
else:
meta[key] = value

if r.get_filename().endswith((".NWB", ".nwb")):
if nwb_has_external_links(r):
Expand Down Expand Up @@ -623,7 +624,7 @@ def extract_anatomy(metadata: dict) -> Optional[List[models.Anatomy]]:


def extract_model(modelcls: Type[M], metadata: dict, **kwargs: Any) -> M:
m = cast(M, modelcls.unvalidated())
m = modelcls.unvalidated()
for field in m.__fields__.keys():
value = kwargs.get(field, extract_field(field, metadata))
if value is not None:
Expand Down Expand Up @@ -1002,13 +1003,14 @@ def add_common_metadata(
metadata.blobDateModified = mtime
if mtime > metadata.dateModified:
lgr.warning("mtime %s of %s is in the future", mtime, r)
metadata.contentSize = r.get_size()
size = r.get_size()
if digest is not None and digest.algorithm is models.DigestType.dandi_zarr_checksum:
m = re.fullmatch(
r"(?P<hash>[0-9a-f]{32})-(?P<files>[0-9]+)--(?P<size>[0-9]+)", digest.value
)
if m:
metadata.contentSize = int(m["size"])
size = int(m["size"])
metadata.contentSize = parse_obj_as(ByteSize, size)
if metadata.wasGeneratedBy is None:
metadata.wasGeneratedBy = []
metadata.wasGeneratedBy.append(get_generator(start_time, end_time))
Expand Down
4 changes: 2 additions & 2 deletions dandi/support/typing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys

if sys.version_info >= (3, 8):
from typing import Literal, TypedDict # noqa: F401
from typing import Literal, Protocol, TypedDict # noqa: F401

Check notice

Code scanning / CodeQL

Unused import

Import of 'Literal' is not used. Import of 'Protocol' is not used. Import of 'TypedDict' is not used.
else:
from typing_extensions import Literal, TypedDict # noqa: F401
from typing_extensions import Literal, Protocol, TypedDict # noqa: F401
1 change: 1 addition & 0 deletions dandi/tests/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def test_upload_bids_metadata(
if "sub-" in apath:
metadata = dandiset.get_asset_by_path(apath).get_metadata()
# Hard-coded check for the subject identifier set in the fixture:
assert metadata.wasAttributedTo is not None
assert metadata.wasAttributedTo[0].identifier == "Sub1"


Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ install_requires =
bidsschematools >= 0.5.0
click
click-didyoumean
dandischema ~= 0.7.0
dandischema ~= 0.8.0
etelemetry >= 0.2.2
fasteners
fscacher >= 0.3.0
Expand Down