diff --git a/dandi/consts.py b/dandi/consts.py index e192f0c30..53f228749 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -156,4 +156,4 @@ class routes(object): dandiset_draft = "{dandi_instance.redirector}/dandiset/{dandiset[identifier]}/draft" -DANDI_SCHEMA_VERSION = "0.2.0" +DANDI_SCHEMA_VERSION = "0.3.0" diff --git a/dandi/metadata.py b/dandi/metadata.py index cb00e8c88..c734b49a2 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -270,11 +270,11 @@ def extract_wasDerivedFrom(metadata): extract_wasAttributedTo = extract_model_list( - models.Participant, "identifier", "subject_id" + models.Participant, "identifier", "subject_id", id=... ) extract_wasGeneratedBy = extract_model_list( - models.Session, "name", "session_id", identifier=None + models.Session, "name", "session_id", id=... ) @@ -358,7 +358,7 @@ def get_default_metadata(path, digest=None, digest_type=None) -> models.BareAsse def get_generator(start_time: datetime, end_time: datetime) -> models.Activity: return models.Activity( - identifier=str(uuid4()), + id=uuid4().urn, name="Metadata generation", description="Metadata generated by DANDI cli", wasAssociatedWith=[ @@ -483,7 +483,10 @@ def convertv1(data): ] if oldkey == "license": value = [ - getattr(models.LicenseType, value.replace("-", "").replace(".", "")) + getattr( + models.LicenseType, + value.replace("dandi", "spdx").replace("-", "_").replace(".", ""), + ) ] if oldkey == "identifier": value = f"DANDI:{value}" @@ -603,6 +606,10 @@ def convertv1(data): def migrate2newschema(meta): newmeta = convertv1(meta) + if "version" in newmeta: + newmeta["id"] = f"{newmeta['identifier']}/{newmeta['version']}" + else: + newmeta["id"] = f"{newmeta['identifier']}/draft" dandimeta = models.DandisetMeta.unvalidated(**newmeta) return dandimeta @@ -613,6 +620,7 @@ def generate_context(): fields = { "@version": 1.1, "dandi": "http://schema.dandiarchive.org/", + "dandiasset": "http://iri.dandiarchive.org/", "DANDI": "http://identifiers.org/DANDI:", "dct": "http://purl.org/dc/terms/", "owl": "http://www.w3.org/2002/07/owl#", @@ -631,6 +639,7 @@ def generate_context(): "ORCID": "https://orcid.org/", "ROR": "https://ror.org/", "PATO": "http://purl.obolibrary.org/obo/PATO_", + "spdx": "http://spdx.org/licenses/", } for val in dir(models): klass = getattr(models, val) @@ -641,7 +650,7 @@ def generate_context(): name = klass.__name__ fields[name] = f'{klass._ldmeta["nskey"]}:{name}' for name, field in klass.__fields__.items(): - if name == "identifier": + if name == "id": fields[name] = "@id" elif name == "schemaKey": fields[name] = "@type" diff --git a/dandi/model_types.py b/dandi/model_types.py index fbdcb9d19..08f54b0ae 100644 --- a/dandi/model_types.py +++ b/dandi/model_types.py @@ -133,20 +133,20 @@ "rdfs:subClassOf": {"@id": "schema:Enumeration"}, }, { - "@id": "dandi:CC0", - "sameAs": "https://creativecommons.org/publicdomain/zero/1.0/", + "@id": "spdx:CC0-1.0", + "rdfs:seeAlso": "https://creativecommons.org/publicdomain/zero/1.0/legalcode", "@type": "dandi:LicenseType", "rdfs:label": "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", }, { - "@id": "dandi:CCBY40", - "sameAs": "https://creativecommons.org/licenses/by/4.0/", + "@id": "spdx:CC-BY-4.0", + "rdfs:seeAlso": "https://creativecommons.org/licenses/by/4.0/legalcode", "@type": "dandi:LicenseType", "rdfs:label": "Attribution 4.0 International (CC BY 4.0)", }, { - "@id": "dandi:CCBYNC40", - "sameAs": "https://creativecommons.org/licenses/by-nc/4.0", + "@id": "spdx:CC-BY-NC-4.0", + "rdfs:seeAlso": "https://creativecommons.org/licenses/by-nc/4.0/legalcode", "@type": "dandi:LicenseType", "rdfs:label": "Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)", }, diff --git a/dandi/models.py b/dandi/models.py index 645fdf8bd..2901113c0 100644 --- a/dandi/models.py +++ b/dandi/models.py @@ -42,7 +42,7 @@ def create_enum(data): key = item["@id"].split(":")[-1] if key in items: key = item["@id"].replace(":", "_") - items[key.replace("-", "_")] = item["@id"] + items[key.replace("-", "_").replace(".", "")] = item["@id"] if klass is None or len(items) == 0: raise ValueError(f"Could not generate a klass or items from {data}") newklass = Enum(klass, items) @@ -129,6 +129,8 @@ def encode(self, o): class DandiBaseModel(BaseModel): + id: Optional[str] = Field(description="Uniform resource identifier", readOnly=True) + @classmethod def unvalidated(__pydantic_cls__: Type[BaseModel], **data: Any) -> BaseModel: """Allow model to be returned without validation""" @@ -166,6 +168,8 @@ def schema_extra(schema: Dict[str, Any], model) -> None: for prop, value in schema.get("properties", {}).items(): if value.get("title") is None or value["title"] == prop.title(): value["title"] = name2title(prop) + if value.get("format", None) == "uri": + value["maxLength"] = 1000 allOf = value.get("allOf") anyOf = value.get("anyOf") items = value.get("items") @@ -274,7 +278,7 @@ class SpeciesType(TypeModel): class Disorder(TypeModel): """Biolink, SNOMED, or other identifier for disorder studied""" - dxdate: Optional[List[date]] = Field( + dxdate: Optional[List[Union[date, datetime]]] = Field( None, title="Dates of diagnosis", description="Dates of diagnosis", @@ -408,6 +412,23 @@ class Software(DandiBaseModel): } +class Agent(DandiBaseModel): + identifier: Optional[Identifier] = Field( + None, + title="Identifier", + description="Identifier for an agent", + nskey="schema", + ) + name: str = Field(nskey="schema") + url: Optional[HttpUrl] = Field(None, nskey="schema") + schemaKey: Literal["Software"] = Field("Agent", readOnly=True) + + _ldmeta = { + "rdfs:subClassOf": ["prov:Agent"], + "nskey": "dandi", + } + + class EthicsApproval(DandiBaseModel): """Information about ethics committee approval for project""" @@ -520,14 +541,14 @@ class Activity(DandiBaseModel): description: Optional[str] = Field( None, description="The description of the activity.", nskey="schema" ) - startDate: Optional[date] = Field(None, nskey="schema") - endDate: Optional[date] = Field(None, nskey="schema") + startDate: Optional[datetime] = Field(None, nskey="schema") + endDate: Optional[datetime] = Field(None, nskey="schema") # isPartOf: Optional["Activity"] = Field(None, nskey="schema") # hasPart: Optional["Activity"] = Field(None, nskey="schema") - wasAssociatedWith: Optional[List[Union[Person, Organization, Software]]] = Field( - None, nskey="prov" - ) + wasAssociatedWith: Optional[ + List[Union[Person, Organization, Software, Agent]] + ] = Field(None, nskey="prov") schemaKey: Literal["Activity"] = Field("Activity", readOnly=True) @@ -560,6 +581,40 @@ class Session(Activity): schemaKey: Literal["Session"] = Field("Session", readOnly=True) +class PublishActivity(Activity): + schemaKey: Literal["PublishActivity"] = Field("PublishActivity", readOnly=True) + + +class Locus(DandiBaseModel): + identifier: Union[Identifier, List[Identifier]] = Field( + description="Identifier for genotyping locus" + ) + locus_type: str = Field() + symbol: str = Field() + schemaKey: Literal["Locus"] = Field("Locus", readOnly=True) + _ldmeta = {"nskey": "dandi"} + + +class Allele(DandiBaseModel): + identifier: Union[Identifier, List[Identifier]] = Field( + description="Identifier for genotyping allele" + ) + allele_type: str = Field() + symbol: str = Field() + schemaKey: Literal["Allele"] = Field("Allele", readOnly=True) + _ldmeta = {"nskey": "dandi"} + + +class GenotypeInfo(DandiBaseModel): + locus: Locus = Field(description="Locus at which information was extracted") + alleles: List[Allele] = Field( + max_items=3, description="Information about one allele" + ) + wasGeneratedBy: Optional[List["Session"]] = Field(None, nskey="prov") + schemaKey: Literal["GenotypeInfo"] = Field("GenotypeInfo", readOnly=True) + _ldmeta = {"nskey": "dandi"} + + class RelatedParticipant(DandiBaseModel): identifier: Optional[Identifier] = Field(None, nskey="schema") name: Optional[str] = Field(None, title="A name of the Participant", nskey="schema") @@ -608,7 +663,7 @@ class Participant(DandiBaseModel): description="OBI based identifier for sex of the sample if available", nskey="dandi", ) - genotype: Optional[Identifier] = Field( + genotype: Optional[Union[List[GenotypeInfo], Identifier]] = Field( None, description="Genotype descriptor of biosample if available", nskey="dandi" ) species: Optional[SpeciesType] = Field( @@ -628,7 +683,7 @@ class Participant(DandiBaseModel): schemaKey: Literal["Participant"] = Field("Participant", readOnly=True) _ldmeta = { - "rdfs:subClassOf": ["schema:Person", "prov:Agent"], + "rdfs:subClassOf": ["prov:Agent"], "rdfs:label": "Information about the participant.", "nskey": "dandi", } @@ -638,7 +693,6 @@ class BioSample(DandiBaseModel): """Description of the sample that was studied""" identifier: Optional[Identifier] = Field(nskey="schema") - altName: Optional[List[Identifier]] = Field(None, nskey="dandi") sampleType: Optional[SampleType] = Field( None, description="OBI based identifier for the sample used", nskey="dandi" ) @@ -672,7 +726,7 @@ class BioSample(DandiBaseModel): class Identifiable(DandiBaseModel): - identifier: Identifier = Field(readOnly=True, nskey="schema") + identifier: Optional[Identifier] = Field(readOnly=True, nskey="schema") class CommonModel(DandiBaseModel): @@ -760,6 +814,8 @@ def check_data(cls, values): raise ValueError("At least one contributor must have role ContactPerson") return values + id: str = Field(description="Uniform resource identifier", readOnly=True) + identifier: DANDI = Field( readOnly=True, title="Dandiset identifier", @@ -773,7 +829,6 @@ def check_data(cls, values): max_length=150, nskey="schema", ) - description: str = Field( description="A description of the Dandiset", max_length=3000, nskey="schema" ) @@ -783,6 +838,12 @@ def check_data(cls, values): nskey="schema", min_items=1, ) + dateCreated: Optional[datetime] = Field( + nskey="schema", title="Dandiset creation date and time", readOnly=True + ) + dateModified: Optional[datetime] = Field( + nskey="schema", title="Last modification date and time", readOnly=True + ) citation: TempOptional[str] = Field(readOnly=True, nskey="schema") @@ -816,15 +877,6 @@ def check_data(cls, values): } -class PublishedDandisetMeta(DandisetMeta): - publishedBy: HttpUrl = Field( - description="The URL should contain the provenance of the publishing process.", - readOnly=True, - nskey="dandi", - ) # TODO: formalize "publish" activity to at least the Actor - datePublished: date = Field(readOnly=True, nskey="schema") - - class BareAssetMeta(CommonModel): """Metadata used to describe an asset anywhere (local or server). @@ -888,19 +940,38 @@ class BareAssetMeta(CommonModel): class AssetMeta(BareAssetMeta, Identifiable): """Metadata used to describe an asset on the server.""" - identifier: UUID4 = Field(readOnly=True, nskey="schema") - + id: Optional[str] = Field(readOnly=True, description="Uniform resource identifier") + identifier: Optional[UUID4] = Field(readOnly=True, nskey="schema") # on publish or set by server contentUrl: Optional[List[HttpUrl]] = Field(None, readOnly=True, nskey="schema") -class PublishedAssetMeta(AssetMeta): - publishedBy: HttpUrl = Field( +class Publishable(DandiBaseModel): + id: str = Field(readOnly=True, description="Uniform resource identifier") + publishedBy: Union[HttpUrl, PublishActivity] = Field( description="The URL should contain the provenance of the publishing process.", readOnly=True, nskey="dandi", ) # TODO: formalize "publish" activity to at least the Actor - datePublished: date = Field(readOnly=True, nskey="schema") + datePublished: datetime = Field(readOnly=True, nskey="schema") + url: HttpUrl = Field( + readOnly=True, description="permalink to the item", nskey="schema" + ) + + +class PublishedDandisetMeta(DandisetMeta, Publishable): + version: str = Field(readOnly=True, nskey="schema") + doi: str = Field( + None, + title="DOI", + readOnly=True, + pattern=r"^10\.[A-Za-z0-9.\/-]+", + nskey="dandi", + ) + + +class PublishedAssetMeta(AssetMeta, Publishable): + pass def get_schema_version(): diff --git a/dandi/tests/data/metadata/dandimeta_migration.new.json b/dandi/tests/data/metadata/dandimeta_migration.new.json index 013ec0d78..4e6e040fa 100644 --- a/dandi/tests/data/metadata/dandimeta_migration.new.json +++ b/dandi/tests/data/metadata/dandimeta_migration.new.json @@ -389,6 +389,7 @@ } ], "description": "A challenge for data sharing in systems neuroscience is the multitude of different data formats used. Neurodata Without Borders: Neurophysiology 2.0 (NWB:N) has emerged as a standardized data format for the storage of cellular-level data together with meta-data, stimulus information, and behavior. A key next step to facilitate NWB:N adoption is to provide easy to use processing pipelines to import/export data from/to NWB:N. Here, we present a NWB-formatted dataset of 1863 single neurons recorded from the medial temporal lobes of 59 human subjects undergoing intracranial monitoring while they performed a recognition memory task. We provide code to analyze and export/import stimuli, behavior, and electrophysiological recordings to/from NWB in both MATLAB and Python. The data files are NWB:N compliant, which affords interoperability between programming languages and operating systems. This combined data and code release is a case study for how to utilize NWB:N for human single-neuron recordings and enables easy re-use of this hard-to-obtain data for both teaching and research on the mechanisms of human memory.", + "id": "DANDI:000004/draft", "identifier": "DANDI:000004", "keywords": [ "cognitive neuroscience", @@ -402,7 +403,7 @@ "single-neurons" ], "license": [ - "dandi:CCBY40" + "spdx:CC-BY-4.0" ], "name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task", "schemaVersion": null, diff --git a/dandi/tests/data/metadata/metadata2asset.json b/dandi/tests/data/metadata/metadata2asset.json index 097108a85..d52274ba6 100644 --- a/dandi/tests/data/metadata/metadata2asset.json +++ b/dandi/tests/data/metadata/metadata2asset.json @@ -1,5 +1,5 @@ { - "identifier": "0b0a1a0b-e3ea-4cf6-be94-e02c830d54be", + "id": "dandiasset:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be", "schemaVersion": null, "keywords": [ "test", diff --git a/dandi/tests/data/metadata/metadata2asset_simple1.json b/dandi/tests/data/metadata/metadata2asset_simple1.json index 1d6843e17..abd431569 100644 --- a/dandi/tests/data/metadata/metadata2asset_simple1.json +++ b/dandi/tests/data/metadata/metadata2asset_simple1.json @@ -1,6 +1,6 @@ { "schemaVersion": null, - "identifier": "bfc23fb6192b41c083a7257e09a3702b", + "id": "dandiasset:bfc23fb6192b41c083a7257e09a3702b", "keywords": [ "keyword1", "keyword 2" diff --git a/dandi/tests/test_metadata.py b/dandi/tests/test_metadata.py index 4b486510c..2023fe222 100644 --- a/dandi/tests/test_metadata.py +++ b/dandi/tests/test_metadata.py @@ -77,7 +77,7 @@ def test_metadata2asset(schema_dir): "encodingFormat": "application/x-nwb", "experiment_description": "Experiment Description", "experimenter": "Joe Q. Experimenter", - "identifier": "6a42c273881f45e8ad4d538f7ede1437", + "id": "dandiasset:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be", "institution": "University College", "keywords": ["test", "sample", "example", "test-case"], "lab": "Retriever Laboratory", @@ -112,7 +112,6 @@ def test_metadata2asset(schema_dir): data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION assert data == BareAssetMeta(**data_as_dict) bare_dict = deepcopy(data_as_dict) - bare_dict.pop("identifier") assert data.json_dict() == bare_dict validate_asset_json(data_as_dict, schema_dir) @@ -127,7 +126,7 @@ def test_metadata2asset_simple1(schema_dir): "nwb_version": "2.2.5", "experiment_description": "experiment_description1", "experimenter": ("experimenter1",), - "identifier": "bfc23fb6192b41c083a7257e09a3702b", + "id": "dandiasset:bfc23fb6192b41c083a7257e09a3702b", "institution": "institution1", "keywords": ["keyword1", "keyword 2"], "lab": "lab1", @@ -152,7 +151,6 @@ def test_metadata2asset_simple1(schema_dir): data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION assert data == BareAssetMeta(**data_as_dict) bare_dict = deepcopy(data_as_dict) - bare_dict.pop("identifier") assert data.json_dict() == bare_dict validate_asset_json(data_as_dict, schema_dir) diff --git a/dandi/tests/test_models.py b/dandi/tests/test_models.py index 817e50512..0fe78a2c1 100644 --- a/dandi/tests/test_models.py +++ b/dandi/tests/test_models.py @@ -116,9 +116,9 @@ def test_asset(): ( LicenseType, { - "CC0": "dandi:CC0", - "CCBY40": "dandi:CCBY40", - "CCBYNC40": "dandi:CCBYNC40", + "CC0_10": "spdx:CC0-1.0", + "CC_BY_40": "spdx:CC-BY-4.0", + "CC_BY_NC_40": "spdx:CC-BY-NC-4.0", }, ), (