Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Models: define id, add various additional types (genotype, etc), boost model version to 0.3.0 #560

Merged
merged 16 commits into from
Apr 15, 2021
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dandi/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,4 @@ class routes(object):
dandiset_draft = "{dandi_instance.redirector}/dandiset/{dandiset[identifier]}/draft"


DANDI_SCHEMA_VERSION = "0.2.0"
DANDI_SCHEMA_VERSION = "0.3.0"
10 changes: 5 additions & 5 deletions dandi/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,11 @@ def extract_wasDerivedFrom(metadata):


extract_wasAttributedTo = extract_model_list(
models.Participant, "identifier", "subject_id"
models.Participant, "identifier", "subject_id", id=...
)

extract_wasGeneratedBy = extract_model_list(
models.Session, "name", "session_id", identifier=None
models.Session, "name", "session_id", id=...
)


Expand Down Expand Up @@ -358,7 +358,7 @@ def get_default_metadata(path, digest=None, digest_type=None) -> models.BareAsse

def get_generator(start_time: datetime, end_time: datetime) -> models.Activity:
return models.Activity(
identifier=str(uuid4()),
id=uuid4().urn,
name="Metadata generation",
description="Metadata generated by DANDI cli",
wasAssociatedWith=[
Expand Down Expand Up @@ -483,7 +483,7 @@ def convertv1(data):
]
if oldkey == "license":
value = [
getattr(models.LicenseType, value.replace("-", "").replace(".", ""))
getattr(models.LicenseType, value.replace("-", "_").replace(".", ""))
]
if oldkey == "identifier":
value = f"DANDI:{value}"
Expand Down Expand Up @@ -641,7 +641,7 @@ def generate_context():
name = klass.__name__
fields[name] = f'{klass._ldmeta["nskey"]}:{name}'
for name, field in klass.__fields__.items():
if name == "identifier":
if name == "id":
fields[name] = "@id"
elif name == "schemaKey":
fields[name] = "@type"
Expand Down
15 changes: 9 additions & 6 deletions dandi/model_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,20 +133,23 @@
"rdfs:subClassOf": {"@id": "schema:Enumeration"},
},
{
"@id": "dandi:CC0",
"sameAs": "https://creativecommons.org/publicdomain/zero/1.0/",
"@id": "dandi:CC0-1.0",
"sameAs": "http://spdx.org/licenses/CC0-1.0",
"rdfs:seeAlso": "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
"@type": "dandi:LicenseType",
"rdfs:label": "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
},
{
"@id": "dandi:CCBY40",
"sameAs": "https://creativecommons.org/licenses/by/4.0/",
"@id": "dandi:CC-BY-4.0",
"sameAs": "http://spdx.org/licenses/CC-BY-4.0",
"rdfs:seeAlso": "https://creativecommons.org/licenses/by/4.0/legalcode",
"@type": "dandi:LicenseType",
"rdfs:label": "Attribution 4.0 International (CC BY 4.0)",
},
{
"@id": "dandi:CCBYNC40",
"sameAs": "https://creativecommons.org/licenses/by-nc/4.0",
"@id": "dandi:CC-BY-NC-4.0",
"sameAs": "http://spdx.org/licenses/CC-BY-NC-4.0",
"rdfs:seeAlso": "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
"@type": "dandi:LicenseType",
"rdfs:label": "Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)",
},
Expand Down
80 changes: 66 additions & 14 deletions dandi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
from typing import Any, Dict, List, Optional, Type, Union

from pydantic import UUID4, BaseModel, ByteSize, EmailStr, Field, HttpUrl, validator
from pydantic import BaseModel, ByteSize, EmailStr, Field, HttpUrl, validator
from ruamel import yaml

from .consts import DANDI_SCHEMA_VERSION
Expand Down Expand Up @@ -42,7 +42,7 @@ def create_enum(data):
key = item["@id"].split(":")[-1]
if key in items:
key = item["@id"].replace(":", "_")
items[key.replace("-", "_")] = item["@id"]
items[key.replace("-", "_").replace(".", "")] = item["@id"]
if klass is None or len(items) == 0:
raise ValueError(f"Could not generate a klass or items from {data}")
newklass = Enum(klass, items)
Expand Down Expand Up @@ -129,6 +129,8 @@ def encode(self, o):


class DandiBaseModel(BaseModel):
id: Optional[str] = Field(description="Uniform resource identifier", readOnly=True)

@classmethod
def unvalidated(__pydantic_cls__: Type[BaseModel], **data: Any) -> BaseModel:
"""Allow model to be returned without validation"""
Expand Down Expand Up @@ -274,7 +276,7 @@ class SpeciesType(TypeModel):
class Disorder(TypeModel):
"""Biolink, SNOMED, or other identifier for disorder studied"""

dxdate: Optional[List[date]] = Field(
dxdate: Optional[List[Union[date, datetime]]] = Field(
None,
title="Dates of diagnosis",
description="Dates of diagnosis",
Expand Down Expand Up @@ -408,6 +410,23 @@ class Software(DandiBaseModel):
}


class Agent(DandiBaseModel):
identifier: Optional[Identifier] = Field(
None,
title="Identifier",
description="Identifier for an agent",
nskey="schema",
)
name: str = Field(nskey="schema")
url: Optional[HttpUrl] = Field(None, nskey="schema")
schemaKey: Literal["Software"] = Field("Agent", readOnly=True)

_ldmeta = {
"rdfs:subClassOf": ["prov:Agent"],
"nskey": "dandi",
}


class EthicsApproval(DandiBaseModel):
"""Information about ethics committee approval for project"""

Expand Down Expand Up @@ -520,14 +539,14 @@ class Activity(DandiBaseModel):
description: Optional[str] = Field(
None, description="The description of the activity.", nskey="schema"
)
startDate: Optional[date] = Field(None, nskey="schema")
endDate: Optional[date] = Field(None, nskey="schema")
startDate: Optional[datetime] = Field(None, nskey="schema")
endDate: Optional[datetime] = Field(None, nskey="schema")

# isPartOf: Optional["Activity"] = Field(None, nskey="schema")
# hasPart: Optional["Activity"] = Field(None, nskey="schema")
wasAssociatedWith: Optional[List[Union[Person, Organization, Software]]] = Field(
None, nskey="prov"
)
wasAssociatedWith: Optional[
List[Union[Person, Organization, Software, Agent]]
] = Field(None, nskey="prov")

schemaKey: Literal["Activity"] = Field("Activity", readOnly=True)

Expand Down Expand Up @@ -560,6 +579,35 @@ class Session(Activity):
schemaKey: Literal["Session"] = Field("Session", readOnly=True)


class Locus(DandiBaseModel):
identifier: Union[Identifier, List[Identifier]] = Field(
description="Identifier for genotyping locus"
)
locus_type: str = Field()
symbol: str = Field()
schemaKey: Literal["Locus"] = Field("Locus", readOnly=True)
_ldmeta = {"nskey": "dandi"}


class Allele(DandiBaseModel):
identifier: Union[Identifier, List[Identifier]] = Field(
description="Identifier for genotyping allele"
)
allele_type: str = Field()
symbol: str = Field()
schemaKey: Literal["Allele"] = Field("Allele", readOnly=True)
_ldmeta = {"nskey": "dandi"}


class GenotypeInfo(DandiBaseModel):
locus: Locus = Field(description="Locus at which information was extracted")
allele1: Allele = Field(description="Information about one allele")
allele2: Allele = Field(description="Information about other allele")
wasGeneratedBy: Optional[List["Session"]] = Field(None, nskey="prov")
schemaKey: Literal["GenotypeInfo"] = Field("GenotypeInfo", readOnly=True)
_ldmeta = {"nskey": "dandi"}


class RelatedParticipant(DandiBaseModel):
identifier: Optional[Identifier] = Field(None, nskey="schema")
name: Optional[str] = Field(None, title="A name of the Participant", nskey="schema")
Expand Down Expand Up @@ -608,7 +656,7 @@ class Participant(DandiBaseModel):
description="OBI based identifier for sex of the sample if available",
nskey="dandi",
)
genotype: Optional[Identifier] = Field(
genotype: Optional[Union[List[GenotypeInfo], Identifier]] = Field(
None, description="Genotype descriptor of biosample if available", nskey="dandi"
)
species: Optional[SpeciesType] = Field(
Expand All @@ -628,7 +676,7 @@ class Participant(DandiBaseModel):
schemaKey: Literal["Participant"] = Field("Participant", readOnly=True)

_ldmeta = {
"rdfs:subClassOf": ["schema:Person", "prov:Agent"],
"rdfs:subClassOf": ["prov:Agent"],
"rdfs:label": "Information about the participant.",
"nskey": "dandi",
}
Expand All @@ -638,7 +686,6 @@ class BioSample(DandiBaseModel):
"""Description of the sample that was studied"""

identifier: Optional[Identifier] = Field(nskey="schema")
altName: Optional[List[Identifier]] = Field(None, nskey="dandi")
sampleType: Optional[SampleType] = Field(
None, description="OBI based identifier for the sample used", nskey="dandi"
)
Expand Down Expand Up @@ -672,7 +719,7 @@ class BioSample(DandiBaseModel):


class Identifiable(DandiBaseModel):
identifier: Identifier = Field(readOnly=True, nskey="schema")
identifier: Optional[Identifier] = Field(readOnly=True, nskey="schema")


class CommonModel(DandiBaseModel):
Expand Down Expand Up @@ -773,7 +820,6 @@ def check_data(cls, values):
max_length=150,
nskey="schema",
)

description: str = Field(
description="A description of the Dandiset", max_length=3000, nskey="schema"
)
Expand All @@ -783,6 +829,12 @@ def check_data(cls, values):
nskey="schema",
min_items=1,
)
dateCreated: Optional[datetime] = Field(
nskey="schema", title="Dandiset creation date and time", readOnly=True
)
dateModified: Optional[datetime] = Field(
nskey="schema", title="Last modification date and time", readOnly=True
)

citation: TempOptional[str] = Field(readOnly=True, nskey="schema")

Expand Down Expand Up @@ -888,7 +940,7 @@ class BareAssetMeta(CommonModel):
class AssetMeta(BareAssetMeta, Identifiable):
"""Metadata used to describe an asset on the server."""

identifier: UUID4 = Field(readOnly=True, nskey="schema")
id: str = Field(readOnly=True, description="URN from UUID4")

# on publish or set by server
contentUrl: Optional[List[HttpUrl]] = Field(None, readOnly=True, nskey="schema")
Expand Down
2 changes: 1 addition & 1 deletion dandi/tests/data/metadata/dandimeta_migration.new.json
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@
"single-neurons"
],
"license": [
"dandi:CCBY40"
"dandi:CC-BY-4.0"
],
"name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task",
"schemaVersion": null,
Expand Down
2 changes: 1 addition & 1 deletion dandi/tests/data/metadata/metadata2asset.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"identifier": "0b0a1a0b-e3ea-4cf6-be94-e02c830d54be",
"id": "urn:uuid:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be",
"schemaVersion": null,
"keywords": [
"test",
Expand Down
2 changes: 1 addition & 1 deletion dandi/tests/data/metadata/metadata2asset_simple1.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"schemaVersion": null,
"identifier": "bfc23fb6192b41c083a7257e09a3702b",
"id": "urn:uuid:bfc23fb6192b41c083a7257e09a3702b",
"keywords": [
"keyword1",
"keyword 2"
Expand Down
6 changes: 2 additions & 4 deletions dandi/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_metadata2asset(schema_dir):
"encodingFormat": "application/x-nwb",
"experiment_description": "Experiment Description",
"experimenter": "Joe Q. Experimenter",
"identifier": "6a42c273881f45e8ad4d538f7ede1437",
"id": "urn:uuid:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be",
"institution": "University College",
"keywords": ["test", "sample", "example", "test-case"],
"lab": "Retriever Laboratory",
Expand Down Expand Up @@ -112,7 +112,6 @@ def test_metadata2asset(schema_dir):
data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION
assert data == BareAssetMeta(**data_as_dict)
bare_dict = deepcopy(data_as_dict)
bare_dict.pop("identifier")
assert data.json_dict() == bare_dict
validate_asset_json(data_as_dict, schema_dir)

Expand All @@ -127,7 +126,7 @@ def test_metadata2asset_simple1(schema_dir):
"nwb_version": "2.2.5",
"experiment_description": "experiment_description1",
"experimenter": ("experimenter1",),
"identifier": "bfc23fb6192b41c083a7257e09a3702b",
"id": "urn:uuid:bfc23fb6192b41c083a7257e09a3702b",
satra marked this conversation as resolved.
Show resolved Hide resolved
"institution": "institution1",
"keywords": ["keyword1", "keyword 2"],
"lab": "lab1",
Expand All @@ -152,7 +151,6 @@ def test_metadata2asset_simple1(schema_dir):
data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION
assert data == BareAssetMeta(**data_as_dict)
bare_dict = deepcopy(data_as_dict)
bare_dict.pop("identifier")
assert data.json_dict() == bare_dict
validate_asset_json(data_as_dict, schema_dir)

Expand Down
6 changes: 3 additions & 3 deletions dandi/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ def test_asset():
(
LicenseType,
{
"CC0": "dandi:CC0",
"CCBY40": "dandi:CCBY40",
"CCBYNC40": "dandi:CCBYNC40",
"CC0_10": "dandi:CC0-1.0",
"CC_BY_40": "dandi:CC-BY-4.0",
"CC_BY_NC_40": "dandi:CC-BY-NC-4.0",
satra marked this conversation as resolved.
Show resolved Hide resolved
},
),
(
Expand Down