diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b73c2a6..26716756 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,9 +35,10 @@ repos: additional_dependencies: - "backoff>=2.2.1,<3" - "click>=8.1.7,<9" + - "jinja2>=3.1.2,<4" - "pandas-stubs>=2.1.1,<3" - "pydantic-settings>=2.1.0,<3" - - "pydantic>=2.5.2,<3" + - "pydantic[email]>=2.5.2,<3" - "pytest>=7.4.3,<8" - "types-ldap3>=2.9.13,<3" - "types-pytz>=2023.3.1,<2024" diff --git a/mex/common/ldap/models/person.py b/mex/common/ldap/models/person.py index 12784e73..ecef8e1b 100644 --- a/mex/common/ldap/models/person.py +++ b/mex/common/ldap/models/person.py @@ -7,14 +7,14 @@ class LDAPPerson(LDAPActor): """Model class for LDAP persons.""" - company: str | None = Field(None) - department: str | None = Field(None) - departmentNumber: str | None = Field(None) - displayName: str | None = Field(None) - employeeID: str = Field(...) - givenName: list[str] = Field(..., min_length=1) - ou: list[str] = Field([]) - sn: str = Field(...) + company: str | None = None + department: str | None = None + departmentNumber: str | None = None + displayName: str | None = None + employeeID: str + givenName: list[str] = Field(min_length=1) + ou: list[str] = [] + sn: str @classmethod def get_ldap_fields(cls) -> tuple[str, ...]: diff --git a/mex/common/models/access_platform.py b/mex/common/models/access_platform.py index fbc24e91..339f51f9 100644 --- a/mex/common/models/access_platform.py +++ b/mex/common/models/access_platform.py @@ -1,3 +1,5 @@ +from typing import Annotated + from pydantic import Field from mex.common.models.base import BaseModel @@ -5,27 +7,16 @@ from mex.common.models.merged_item import MergedItem from mex.common.types import ( AccessPlatformID, + APIType, ContactPointID, Link, OrganizationalUnitID, PersonID, + TechnicalAccessibility, Text, - VocabularyEnum, ) -class TechnicalAccessibility(VocabularyEnum): - """Technical accessibility within RKI and outside of RKI.""" - - __vocabulary__ = "technical-accessibility" - - -class APIType(VocabularyEnum): - """Technical standard or style of a network API.""" - - __vocabulary__ = "api-type" - - class BaseAccessPlatform(BaseModel): """A way of physically accessing the Resource for re-use.""" @@ -34,14 +25,15 @@ class BaseAccessPlatform(BaseModel): contact: list[OrganizationalUnitID | PersonID | ContactPointID] = [] description: list[Text] = [] endpointDescription: Link | None = None - endpointType: APIType | None = Field( - None, examples=["https://mex.rki.de/item/api-type-1"] - ) + endpointType: Annotated[ + APIType, Field(examples=["https://mex.rki.de/item/api-type-1"]) + ] | None = None endpointURL: Link | None = None landingPage: list[Link] = [] - technicalAccessibility: TechnicalAccessibility = Field( - ..., examples=["https://mex.rki.de/item/technical-accessibility-1"] - ) + technicalAccessibility: Annotated[ + TechnicalAccessibility, + Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]), + ] title: list[Text] = [] unitInCharge: list[OrganizationalUnitID] = [] diff --git a/mex/common/models/activity.py b/mex/common/models/activity.py index 68312f06..f095aeeb 100644 --- a/mex/common/models/activity.py +++ b/mex/common/models/activity.py @@ -7,8 +7,8 @@ from mex.common.models.merged_item import MergedItem from mex.common.types import ( ActivityID, + ActivityType, ContactPointID, - Identifier, Link, OrganizationalUnitID, OrganizationID, @@ -16,16 +16,9 @@ Text, Theme, Timestamp, - VocabularyEnum, ) -class ActivityType(VocabularyEnum): - """The activity type.""" - - __vocabulary__ = "activity-type" - - class BaseActivity(BaseModel): """The context a resource was generated in. @@ -40,12 +33,9 @@ class BaseActivity(BaseModel): ] ] = [] alternativeTitle: list[Text] = [] - contact: list[ - Annotated[ - OrganizationalUnitID | PersonID | ContactPointID, - Field(examples=[Identifier.generate(seed=42)]), - ] - ] = Field(..., min_length=1) + contact: Annotated[ + list[OrganizationalUnitID | PersonID | ContactPointID,], Field(min_length=1) + ] documentation: list[Link] = [] end: list[ Annotated[Timestamp, Field(examples=["2024-01-17", "2024", "2024-01"])] @@ -57,7 +47,7 @@ class BaseActivity(BaseModel): involvedUnit: list[OrganizationalUnitID] = [] isPartOfActivity: list[ActivityID] = [] publication: list[Link] = [] - responsibleUnit: list[OrganizationalUnitID] = Field(..., min_length=1) + responsibleUnit: Annotated[list[OrganizationalUnitID], Field(min_length=1)] shortName: list[Text] = [] start: list[ Annotated[Timestamp, Field(examples=["2023-01-16", "2023", "2023-02"])] @@ -66,7 +56,7 @@ class BaseActivity(BaseModel): theme: list[ Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])] ] = [] - title: list[Text] = Field(..., min_length=1) + title: Annotated[list[Text], Field(min_length=1)] website: list[Link] = [] diff --git a/mex/common/models/base.py b/mex/common/models/base.py index 4d3a660a..2e29e9e9 100644 --- a/mex/common/models/base.py +++ b/mex/common/models/base.py @@ -2,17 +2,39 @@ import pickle # nosec from abc import abstractmethod from functools import cache -from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union, get_args, get_origin +from typing import TYPE_CHECKING, Annotated, Any, TypeVar, Union, get_args, get_origin -from pydantic import BaseModel as PydanticBaseModel +from pydantic import ( + BaseModel as PydanticBaseModel, +) from pydantic import ConfigDict, Field, TypeAdapter, ValidationError, model_validator from pydantic.fields import FieldInfo +from pydantic.json_schema import DEFAULT_REF_TEMPLATE, JsonSchemaMode, JsonSchemaValue +from pydantic.json_schema import ( + GenerateJsonSchema as PydanticJsonSchemaGenerator, +) from mex.common.types import Identifier ModelValuesT = TypeVar("ModelValuesT", bound=dict[str, Any]) +class JsonSchemaGenerator(PydanticJsonSchemaGenerator): + """Customization of the pydantic class for generating JSON schemas.""" + + def handle_ref_overrides(self, json_schema: JsonSchemaValue) -> JsonSchemaValue: + """Disable pydantic behavior to wrap top-level `$ref` keys in an `allOf`. + + For example, pydantic would convert + {"$ref": "#/$defs/APIType", "examples": ["api-type-1"]} + into + {"allOf": {"$ref": "#/$defs/APIType"}, "examples": ["api-type-1"]} + which is in fact recommended by JSON schema, but we need to disable this + to stay compatible with mex-editor and mex-model. + """ + return json_schema + + class BaseModel(PydanticBaseModel): """Common base class for all MEx model classes.""" @@ -27,6 +49,32 @@ class BaseModel(PydanticBaseModel): validate_assignment=True, ) + @classmethod + def model_json_schema( + cls, + by_alias: bool = True, + ref_template: str = DEFAULT_REF_TEMPLATE, + schema_generator: type[PydanticJsonSchemaGenerator] = JsonSchemaGenerator, + mode: JsonSchemaMode = "validation", + ) -> dict[str, Any]: + """Generates a JSON schema for a model class. + + Args: + by_alias: Whether to use attribute aliases or not. + ref_template: The reference template. + schema_generator: Overriding the logic used to generate the JSON schema + mode: The mode in which to generate the schema. + + Returns: + The JSON schema for the given model class. + """ + return super().model_json_schema( + by_alias=by_alias, + ref_template=ref_template, + schema_generator=schema_generator, + mode=mode, + ) + @classmethod @cache def _get_alias_lookup(cls) -> dict[str, str]: @@ -73,7 +121,7 @@ def _get_field_names_allowing_none(cls) -> list[str]: @classmethod def _convert_non_list_to_list( cls, name: str, field: FieldInfo, value: Any - ) -> Optional[list[Any]]: + ) -> list[Any] | None: """Convert a non-list value to a list value by wrapping it in a list.""" if value is None: if name in cls._get_field_names_allowing_none(): @@ -169,15 +217,16 @@ class MExModel(BaseModel): # also used as the foreign key for all fields containing references. stableTargetId: Any - identifier: Identifier = Field( - ..., - description=( - "A globally unique identifier for this item. Regardless of the entity-type " - "or whether this item was extracted, merged, etc. identifiers will be " - "assigned just once." + identifier: Annotated[ + Identifier, + Field( + description=( + "A globally unique identifier for this item. Regardless of the " + "entity-type or whether this item was extracted, merged, etc. " + "identifiers will be assigned just once." + ), ), - examples=[Identifier.generate(seed=42)], - ) + ] @classmethod @abstractmethod diff --git a/mex/common/models/contact_point.py b/mex/common/models/contact_point.py index de61d98c..5b315bd3 100644 --- a/mex/common/models/contact_point.py +++ b/mex/common/models/contact_point.py @@ -12,17 +12,7 @@ class BaseContactPoint(BaseModel): """A contact point - for example, an interdepartmental project.""" stableTargetId: ContactPointID - email: list[ - Annotated[ - Email, - Field( - examples=["info@rki.de"], - ), - ] - ] = Field( - ..., - min_length=1, - ) + email: Annotated[list[Email], Field(min_length=1)] class ExtractedContactPoint(BaseContactPoint, ExtractedData): diff --git a/mex/common/models/distribution.py b/mex/common/models/distribution.py index bd27571d..15b32943 100644 --- a/mex/common/models/distribution.py +++ b/mex/common/models/distribution.py @@ -1,3 +1,5 @@ +from typing import Annotated + from pydantic import Field from mex.common.models.base import BaseModel @@ -9,28 +11,22 @@ DistributionID, License, Link, + MIMEType, OrganizationID, PersonID, Timestamp, - VocabularyEnum, ) -class MIMEType(VocabularyEnum): - """The mime type.""" - - __vocabulary__ = "mime-type" - - class BaseDistribution(BaseModel): """A specific representation of a dataset.""" stableTargetId: DistributionID accessService: AccessPlatformID | None = None - accessRestriction: AccessRestriction = Field( - ..., - examples=["https://mex.rki.de/item/access-restriction-1"], - ) + accessRestriction: Annotated[ + AccessRestriction, + Field(examples=["https://mex.rki.de/item/access-restriction-1"]), + ] accessURL: Link | None = None author: list[PersonID] = [] contactPerson: list[PersonID] = [] @@ -38,24 +34,28 @@ class BaseDistribution(BaseModel): dataManager: list[PersonID] = [] downloadURL: Link | None = None issued: Timestamp - license: License | None = Field( - None, examples=["https://mex.rki.de/item/license-1"] - ) - mediaType: MIMEType | None = Field( - None, - examples=["https://mex.rki.de/item/mime-type-1"], - ) + license: Annotated[ + License, Field(examples=["https://mex.rki.de/item/license-1"]) + ] | None = None + mediaType: Annotated[ + MIMEType, + Field( + examples=["https://mex.rki.de/item/mime-type-1"], + ), + ] | None = None modified: Timestamp | None = None otherContributor: list[PersonID] = [] projectLeader: list[PersonID] = [] projectManager: list[PersonID] = [] - publisher: list[OrganizationID] = Field(..., min_length=1) + publisher: Annotated[list[OrganizationID], Field(min_length=1)] researcher: list[PersonID] = [] - title: str = Field( - ..., - examples=["theNameOfTheFile"], - min_length=1, - ) + title: Annotated[ + str, + Field( + examples=["theNameOfTheFile"], + min_length=1, + ), + ] class ExtractedDistribution(BaseDistribution, ExtractedData): diff --git a/mex/common/models/extracted_data.py b/mex/common/models/extracted_data.py index 3dd9b8ea..8fc1d339 100644 --- a/mex/common/models/extracted_data.py +++ b/mex/common/models/extracted_data.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Annotated, Any from pydantic import Field, model_validator @@ -19,31 +19,34 @@ class BaseExtractedData(MExModel): correct type, e.g. `PersonID`. """ - hadPrimarySource: PrimarySourceID = Field( - ..., - description=( - "The stableTargetID of the primary source, that this item was extracted " - "from. This field is mandatory for all extracted items to aid with data " - "provenance. Extracted primary sources also have this field and are all " - "extracted from a primary source called MEx, which is its own primary " - "source and has the static stableTargetID: " - f"{MEX_PRIMARY_SOURCE_STABLE_TARGET_ID}" + hadPrimarySource: Annotated[ + PrimarySourceID, + Field( + description=( + "The stableTargetID of the primary source, that this item was " + "extracted from. This field is mandatory for all extracted items to " + "aid with data provenance. Extracted primary sources also have this " + "field and are all extracted from a primary source called MEx, which " + "is its own primary source and has the static stableTargetID: " + f"{MEX_PRIMARY_SOURCE_STABLE_TARGET_ID}" + ), ), - examples=[PrimarySourceID.generate(seed=42)], - ) - identifierInPrimarySource: str = Field( - ..., - description=( - "This is the identifier the original item had in its source system. " - "It is only unique amongst items coming from the same system, because " - "identifier formats are likely to overlap between systems. " - "The value for `identifierInPrimarySource` is therefore only unique in " - "composition with `hadPrimarySource`. MEx uses this composite key " - "to assign a stable and globally unique `identifier` to each item." + ] + identifierInPrimarySource: Annotated[ + str, + Field( + description=( + "This is the identifier the original item had in its source system. " + "It is only unique amongst items coming from the same system, because " + "identifier formats are likely to overlap between systems. " + "The value for `identifierInPrimarySource` is therefore only unique in " + "composition with `hadPrimarySource`. MEx uses this composite key " + "to assign a stable and globally unique `identifier` to each item." + ), + examples=["123456", "item-501", "D7/x4/zz.final3"], + min_length=1, ), - examples=["123456", "item-501", "D7/x4/zz.final3"], - min_length=1, - ) + ] @classmethod def get_entity_type(cls) -> str: diff --git a/mex/common/models/organization.py b/mex/common/models/organization.py index ffcbb09b..3b522e84 100644 --- a/mex/common/models/organization.py +++ b/mex/common/models/organization.py @@ -46,7 +46,7 @@ class BaseOrganization(BaseModel): ), ] ] = [] - officialName: list[Text] = Field(..., min_length=1) + officialName: Annotated[list[Text], Field(min_length=1)] rorId: list[ Annotated[ str, diff --git a/mex/common/models/organizational_unit.py b/mex/common/models/organizational_unit.py index 8db28c5c..6c24269d 100644 --- a/mex/common/models/organizational_unit.py +++ b/mex/common/models/organizational_unit.py @@ -13,15 +13,8 @@ class BaseOrganizationalUnit(BaseModel): stableTargetId: OrganizationalUnitID alternativeName: list[Text] = [] - email: list[ - Annotated[ - Email, - Field( - examples=["info@rki.de"], - ), - ] - ] = [] - name: list[Text] = Field(..., min_length=1) + email: list[Email] = [] + name: Annotated[list[Text], Field(min_length=1)] parentUnit: OrganizationalUnitID | None = None shortName: list[Text] = [] unitOf: list[OrganizationID] = [] diff --git a/mex/common/models/person.py b/mex/common/models/person.py index 91113953..4203c27b 100644 --- a/mex/common/models/person.py +++ b/mex/common/models/person.py @@ -13,14 +13,7 @@ class BasePerson(BaseModel): stableTargetId: PersonID affiliation: list[OrganizationID] = [] - email: list[ - Annotated[ - Email, - Field( - examples=["info@rki.de"], - ), - ] - ] = [] + email: list[Email] = [] familyName: list[ Annotated[ str, diff --git a/mex/common/models/primary_source.py b/mex/common/models/primary_source.py index d8491757..288a9be8 100644 --- a/mex/common/models/primary_source.py +++ b/mex/common/models/primary_source.py @@ -1,3 +1,5 @@ +from typing import Annotated + from pydantic import Field from mex.common.models.base import BaseModel @@ -24,10 +26,12 @@ class BasePrimarySource(BaseModel): locatedAt: list[Link] = [] title: list[Text] = [] unitInCharge: list[OrganizationalUnitID] = [] - version: str | None = Field( - None, - examples=["v1", "2023-01-16", "Schema 9"], - ) + version: Annotated[ + str, + Field( + examples=["v1", "2023-01-16", "Schema 9"], + ), + ] | None = None class ExtractedPrimarySource(BasePrimarySource, ExtractedData): diff --git a/mex/common/models/resource.py b/mex/common/models/resource.py index 98626b30..7ac1e7e5 100644 --- a/mex/common/models/resource.py +++ b/mex/common/models/resource.py @@ -9,63 +9,39 @@ AccessPlatformID, AccessRestriction, ActivityID, + AnonymizationPseudonymization, ContactPointID, + DataProcessingState, DistributionID, + Frequency, + Language, License, Link, OrganizationalUnitID, OrganizationID, PersonID, ResourceID, + ResourceTypeGeneral, Text, Theme, Timestamp, - VocabularyEnum, ) -class ResourceTypeGeneral(VocabularyEnum): - """The general type of a resource.""" - - __vocabulary__ = "resource-type-general" - - -class AnonymizationPseudonymization(VocabularyEnum): - """Whether the resource is anonymized/pseudonymized.""" - - __vocabulary__ = "anonymization-pseudonymization" - - -class DataProcessingState(VocabularyEnum): - """Type for state of data processing.""" - - __vocabulary__ = "data-processing-state" - - -class Frequency(VocabularyEnum): - """Frequency type.""" - - __vocabulary__ = "frequency" - - -class Language(VocabularyEnum): - """Language type.""" - - __vocabulary__ = "language" - - class BaseResource(BaseModel): """A defined piece or collection of information.""" stableTargetId: ResourceID accessPlatform: list[AccessPlatformID] = [] - accessRestriction: AccessRestriction = Field( - ..., - examples=["https://mex.rki.de/item/access-restriction-1"], - ) - accrualPeriodicity: Frequency | None = Field( - None, examples=["https://mex.rki.de/item/frequency-1"] - ) + accessRestriction: Annotated[ + AccessRestriction, + Field( + examples=["https://mex.rki.de/item/access-restriction-1"], + ), + ] + accrualPeriodicity: Annotated[ + Frequency, Field(examples=["https://mex.rki.de/item/frequency-1"]) + ] | None = None alternativeTitle: list[Text] = [] anonymizationPseudonymization: list[ Annotated[ @@ -75,9 +51,9 @@ class BaseResource(BaseModel): ), ] ] = [] - contact: list[OrganizationalUnitID | PersonID | ContactPointID] = Field( - ..., min_length=1 - ) + contact: Annotated[ + list[OrganizationalUnitID | PersonID | ContactPointID], Field(min_length=1) + ] contributingUnit: list[OrganizationalUnitID] = [] contributor: list[PersonID] = [] created: Timestamp | None = None @@ -93,9 +69,9 @@ class BaseResource(BaseModel): language: list[ Annotated[Language, Field(examples=["https://mex.rki.de/item/language-1"])] ] = [] - license: License | None = Field( - None, examples=["https://mex.rki.de/item/license-1"] - ) + license: Annotated[ + License, Field(examples=["https://mex.rki.de/item/license-1"]) + ] | None = None loincId: list[str] = [] meshId: list[ Annotated[ @@ -133,12 +109,18 @@ class BaseResource(BaseModel): ), ] ] = [] - temporal: Timestamp | str | None = None - theme: list[ - Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])] - ] = Field(..., min_length=1) - title: list[Text] = Field(..., min_length=1) - unitInCharge: list[OrganizationalUnitID] = Field(..., min_length=1) + temporal: Timestamp | Annotated[ + str, + Field( + examples=["2022-01 bis 2022-03", "Sommer 2023", "nach 2013", "1998-2008"] + ), + ] | None = None + theme: Annotated[ + list[Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])]], + Field(min_length=1), + ] + title: Annotated[list[Text], Field(min_length=1)] + unitInCharge: Annotated[list[OrganizationalUnitID], Field(min_length=1)] wasGeneratedBy: ActivityID | None = None diff --git a/mex/common/models/variable.py b/mex/common/models/variable.py index 2b7e1594..ce16a3c2 100644 --- a/mex/common/models/variable.py +++ b/mex/common/models/variable.py @@ -6,48 +6,46 @@ from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem from mex.common.types import ( + DataType, ResourceID, Text, VariableGroupID, VariableID, - VocabularyEnum, ) -class DataType(VocabularyEnum): - """The type of the single piece of information within a datum.""" - - __vocabulary__ = "data-type" - - class BaseVariable(BaseModel): """A single piece of information within a resource.""" stableTargetId: VariableID belongsTo: list[VariableGroupID] = [] - codingSystem: str | None = Field( - None, - examples=["SF-36 Version 1"], - ) - dataType: DataType | None = Field( - None, - examples=["https://mex.rki.de/item/data-type-1"], - ) + codingSystem: Annotated[ + str, + Field( + examples=["SF-36 Version 1"], + ), + ] | None = None + dataType: Annotated[ + DataType, + Field( + examples=["https://mex.rki.de/item/data-type-1"], + ), + ] | None = None description: list[Text] = [] - label: list[ - Annotated[ - Text, - Field( - examples=[ - {"language": "de", "value": "Mehrere Treppenabsätze steigen"} - ], - ), - ] - ] = Field( - ..., - min_length=1, - ) - usedIn: list[ResourceID] = Field(..., min_length=1) + label: Annotated[ + list[ + Annotated[ + Text, + Field( + examples=[ + {"language": "de", "value": "Mehrere Treppenabsätze steigen"} + ], + ), + ] + ], + Field(min_length=1), + ] + usedIn: Annotated[list[ResourceID], Field(min_length=1)] valueSet: list[ Annotated[ str, @@ -59,9 +57,7 @@ class BaseVariable(BaseModel): ], ), ] - ] = Field( - [], - ) + ] = [] class ExtractedVariable(BaseVariable, ExtractedData): diff --git a/mex/common/models/variable_group.py b/mex/common/models/variable_group.py index 7fb25fe6..d5a2f09e 100644 --- a/mex/common/models/variable_group.py +++ b/mex/common/models/variable_group.py @@ -1,3 +1,5 @@ +from typing import Annotated + from pydantic import Field from mex.common.models.base import BaseModel @@ -10,8 +12,8 @@ class BaseVariableGroup(BaseModel): """The grouping of variables according to a certain aspect.""" stableTargetId: VariableGroupID - containedBy: list[ResourceID] = Field(..., min_length=1) - label: list[Text] = Field(..., min_length=1) + containedBy: Annotated[list[ResourceID], Field(min_length=1)] + label: Annotated[list[Text], Field(min_length=1)] class ExtractedVariableGroup(BaseVariableGroup, ExtractedData): diff --git a/mex/common/public_api/models.py b/mex/common/public_api/models.py index b665f6c4..2668b2b8 100644 --- a/mex/common/public_api/models.py +++ b/mex/common/public_api/models.py @@ -66,7 +66,7 @@ class PublicApiItem(PublicApiBaseModel): entityType: str itemId: UUID | None = Field(None, exclude=True) - businessId: str = Field(..., exclude=True) + businessId: str = Field(exclude=True) values: list[PublicApiField] @property @@ -96,7 +96,7 @@ class PublicApiItemWithoutValues(PublicApiBaseModel): entityType: str itemId: UUID | None = Field(None, exclude=True) - businessId: str = Field(..., exclude=True) + businessId: str = Field(exclude=True) @property def stableTargetId(self) -> Identifier: # noqa: N802 diff --git a/mex/common/transform.py b/mex/common/transform.py index 4341776c..e824878d 100644 --- a/mex/common/transform.py +++ b/mex/common/transform.py @@ -40,12 +40,10 @@ def default(self, obj: Any) -> Any: @cache def snake_to_dromedary(string: str) -> str: """Convert the given string from `snake_case` into `dromedaryCase`.""" - if len(tokens := re.split(r"_", string)) > 1: - return "".join( - word.capitalize() if index else word.lower() - for index, word in enumerate(tokens) - ) - return string + return "".join( + word.capitalize() if index else word.lower() + for index, word in enumerate(re.split(r"_+", string)) + ) @cache @@ -66,3 +64,9 @@ def dromedary_to_kebab(string: str) -> str: for word in re.split(r"([A-Z]+(?![a-z])|[a-z]+|[A-Z][a-z]+)", string) if word.strip("-") ) + + +@cache +def kebab_to_camel(string: str) -> str: + """Convert the given string from `kebab-case` into `CamelCase`.""" + return "".join(word.title() for word in re.split(r"\-+", string)) diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py index 1e08a8c8..a2032f09 100644 --- a/mex/common/types/__init__.py +++ b/mex/common/types/__init__.py @@ -26,7 +26,17 @@ ) from mex.common.types.vocabulary import ( AccessRestriction, + ActivityType, + AnonymizationPseudonymization, + APIType, + DataProcessingState, + DataType, + Frequency, + Language, License, + MIMEType, + ResourceTypeGeneral, + TechnicalAccessibility, Theme, VocabularyEnum, VocabularyLoader, @@ -34,7 +44,17 @@ ) __all__ = ( + "DataType", "AccessPlatformID", + "ActivityType", + "ResourceTypeGeneral", + "AnonymizationPseudonymization", + "DataProcessingState", + "Frequency", + "Language", + "APIType", + "MIMEType", + "TechnicalAccessibility", "AccessRestriction", "ActivityID", "AssetsPath", diff --git a/mex/common/types/email.py b/mex/common/types/email.py index 55b4dc48..fd4416bd 100644 --- a/mex/common/types/email.py +++ b/mex/common/types/email.py @@ -23,4 +23,5 @@ def __get_pydantic_json_schema__( field_schema = handler(core_schema_) field_schema["title"] = cls.__name__ field_schema["format"] = "email" + field_schema["examples"] = ["info@rki.de"] return field_schema diff --git a/mex/common/types/link.py b/mex/common/types/link.py index 7ef3d902..fd2fd183 100644 --- a/mex/common/types/link.py +++ b/mex/common/types/link.py @@ -41,7 +41,6 @@ class Link(BaseModel): language: LinkLanguage | None = None title: str | None = None url: str = Field( - ..., pattern=r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?", min_length=1, examples=["https://hello-world.org", "file://S:/OE/MF4/Projekte/MEx"], diff --git a/mex/common/types/text.py b/mex/common/types/text.py index 3d55a0d6..99927521 100644 --- a/mex/common/types/text.py +++ b/mex/common/types/text.py @@ -26,7 +26,7 @@ class Text(BaseModel): Text(value="foo") == Text.model_validate("foo") """ - value: str = Field(..., min_length=1) + value: str = Field(min_length=1) language: TextLanguage | None = None @model_validator(mode="before") diff --git a/mex/common/types/vocabulary.py b/mex/common/types/vocabulary.py index ade1bb10..e901118f 100644 --- a/mex/common/types/vocabulary.py +++ b/mex/common/types/vocabulary.py @@ -124,3 +124,63 @@ class License(VocabularyEnum): """License type.""" __vocabulary__ = "license" + + +class TechnicalAccessibility(VocabularyEnum): + """Technical accessibility within RKI and outside of RKI.""" + + __vocabulary__ = "technical-accessibility" + + +class APIType(VocabularyEnum): + """Technical standard or style of a network API.""" + + __vocabulary__ = "api-type" + + +class ActivityType(VocabularyEnum): + """The activity type.""" + + __vocabulary__ = "activity-type" + + +class MIMEType(VocabularyEnum): + """The mime type.""" + + __vocabulary__ = "mime-type" + + +class ResourceTypeGeneral(VocabularyEnum): + """The general type of a resource.""" + + __vocabulary__ = "resource-type-general" + + +class AnonymizationPseudonymization(VocabularyEnum): + """Whether the resource is anonymized/pseudonymized.""" + + __vocabulary__ = "anonymization-pseudonymization" + + +class DataProcessingState(VocabularyEnum): + """Type for state of data processing.""" + + __vocabulary__ = "data-processing-state" + + +class Frequency(VocabularyEnum): + """Frequency type.""" + + __vocabulary__ = "frequency" + + +class Language(VocabularyEnum): + """Language type.""" + + __vocabulary__ = "language" + + +class DataType(VocabularyEnum): + """The type of the single piece of information within a datum.""" + + __vocabulary__ = "data-type" diff --git a/poetry.lock b/poetry.lock index 957e9d73..339774cf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -56,18 +56,15 @@ test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] [[package]] name = "babel" -version = "2.13.1" +version = "2.14.0" description = "Internationalization utilities" optional = false python-versions = ">=3.7" files = [ - {file = "Babel-2.13.1-py3-none-any.whl", hash = "sha256:7077a4984b02b6727ac10f1f7294484f737443d7e2e66c5e4380e41a3ae0b4ed"}, - {file = "Babel-2.13.1.tar.gz", hash = "sha256:33e0952d7dd6374af8dbf6768cc4ddf3ccfefc244f9986d4074704f2fbd18900"}, + {file = "Babel-2.14.0-py3-none-any.whl", hash = "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287"}, + {file = "Babel-2.14.0.tar.gz", hash = "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363"}, ] -[package.dependencies] -setuptools = {version = "*", markers = "python_version >= \"3.12\""} - [package.extras] dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] @@ -102,29 +99,33 @@ lxml = ["lxml"] [[package]] name = "black" -version = "23.11.0" +version = "23.12.0" description = "The uncompromising code formatter." optional = false python-versions = ">=3.8" files = [ - {file = "black-23.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dbea0bb8575c6b6303cc65017b46351dc5953eea5c0a59d7b7e3a2d2f433a911"}, - {file = "black-23.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:412f56bab20ac85927f3a959230331de5614aecda1ede14b373083f62ec24e6f"}, - {file = "black-23.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d136ef5b418c81660ad847efe0e55c58c8208b77a57a28a503a5f345ccf01394"}, - {file = "black-23.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:6c1cac07e64433f646a9a838cdc00c9768b3c362805afc3fce341af0e6a9ae9f"}, - {file = "black-23.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cf57719e581cfd48c4efe28543fea3d139c6b6f1238b3f0102a9c73992cbb479"}, - {file = "black-23.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:698c1e0d5c43354ec5d6f4d914d0d553a9ada56c85415700b81dc90125aac244"}, - {file = "black-23.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:760415ccc20f9e8747084169110ef75d545f3b0932ee21368f63ac0fee86b221"}, - {file = "black-23.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:58e5f4d08a205b11800332920e285bd25e1a75c54953e05502052738fe16b3b5"}, - {file = "black-23.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:45aa1d4675964946e53ab81aeec7a37613c1cb71647b5394779e6efb79d6d187"}, - {file = "black-23.11.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c44b7211a3a0570cc097e81135faa5f261264f4dfaa22bd5ee2875a4e773bd6"}, - {file = "black-23.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a9acad1451632021ee0d146c8765782a0c3846e0e0ea46659d7c4f89d9b212b"}, - {file = "black-23.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:fc7f6a44d52747e65a02558e1d807c82df1d66ffa80a601862040a43ec2e3142"}, - {file = "black-23.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7f622b6822f02bfaf2a5cd31fdb7cd86fcf33dab6ced5185c35f5db98260b055"}, - {file = "black-23.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:250d7e60f323fcfc8ea6c800d5eba12f7967400eb6c2d21ae85ad31c204fb1f4"}, - {file = "black-23.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5133f5507007ba08d8b7b263c7aa0f931af5ba88a29beacc4b2dc23fcefe9c06"}, - {file = "black-23.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:421f3e44aa67138ab1b9bfbc22ee3780b22fa5b291e4db8ab7eee95200726b07"}, - {file = "black-23.11.0-py3-none-any.whl", hash = "sha256:54caaa703227c6e0c87b76326d0862184729a69b73d3b7305b6288e1d830067e"}, - {file = "black-23.11.0.tar.gz", hash = "sha256:4c68855825ff432d197229846f971bc4d6666ce90492e5b02013bcaca4d9ab05"}, + {file = "black-23.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:67f19562d367468ab59bd6c36a72b2c84bc2f16b59788690e02bbcb140a77175"}, + {file = "black-23.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bbd75d9f28a7283b7426160ca21c5bd640ca7cd8ef6630b4754b6df9e2da8462"}, + {file = "black-23.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:593596f699ca2dcbbbdfa59fcda7d8ad6604370c10228223cd6cf6ce1ce7ed7e"}, + {file = "black-23.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:12d5f10cce8dc27202e9a252acd1c9a426c83f95496c959406c96b785a92bb7d"}, + {file = "black-23.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e73c5e3d37e5a3513d16b33305713237a234396ae56769b839d7c40759b8a41c"}, + {file = "black-23.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ba09cae1657c4f8a8c9ff6cfd4a6baaf915bb4ef7d03acffe6a2f6585fa1bd01"}, + {file = "black-23.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace64c1a349c162d6da3cef91e3b0e78c4fc596ffde9413efa0525456148873d"}, + {file = "black-23.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:72db37a2266b16d256b3ea88b9affcdd5c41a74db551ec3dd4609a59c17d25bf"}, + {file = "black-23.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fdf6f23c83078a6c8da2442f4d4eeb19c28ac2a6416da7671b72f0295c4a697b"}, + {file = "black-23.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39dda060b9b395a6b7bf9c5db28ac87b3c3f48d4fdff470fa8a94ab8271da47e"}, + {file = "black-23.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7231670266ca5191a76cb838185d9be59cfa4f5dd401b7c1c70b993c58f6b1b5"}, + {file = "black-23.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:193946e634e80bfb3aec41830f5d7431f8dd5b20d11d89be14b84a97c6b8bc75"}, + {file = "black-23.12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bcf91b01ddd91a2fed9a8006d7baa94ccefe7e518556470cf40213bd3d44bbbc"}, + {file = "black-23.12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:996650a89fe5892714ea4ea87bc45e41a59a1e01675c42c433a35b490e5aa3f0"}, + {file = "black-23.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdbff34c487239a63d86db0c9385b27cdd68b1bfa4e706aa74bb94a435403672"}, + {file = "black-23.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:97af22278043a6a1272daca10a6f4d36c04dfa77e61cbaaf4482e08f3640e9f0"}, + {file = "black-23.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ead25c273adfad1095a8ad32afdb8304933efba56e3c1d31b0fee4143a1e424a"}, + {file = "black-23.12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c71048345bdbced456cddf1622832276d98a710196b842407840ae8055ade6ee"}, + {file = "black-23.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81a832b6e00eef2c13b3239d514ea3b7d5cc3eaa03d0474eedcbbda59441ba5d"}, + {file = "black-23.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:6a82a711d13e61840fb11a6dfecc7287f2424f1ca34765e70c909a35ffa7fb95"}, + {file = "black-23.12.0-py3-none-any.whl", hash = "sha256:a7c07db8200b5315dc07e331dda4d889a56f6bf4db6a9c2a526fa3166a81614f"}, + {file = "black-23.12.0.tar.gz", hash = "sha256:330a327b422aca0634ecd115985c1c7fd7bdb5b5a2ef8aa9888a82e2ebe9437a"}, ] [package.dependencies] @@ -136,7 +137,7 @@ platformdirs = ">=2" [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)"] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -597,7 +598,7 @@ traitlets = "*" [[package]] name = "mex-model" -version = "2.1.1" +version = "2.2.0" description = "RKI MEx metadata model" optional = false python-versions = "*" @@ -607,8 +608,8 @@ develop = false [package.source] type = "git" url = "https://github.com/robert-koch-institut/mex-model.git" -reference = "2.1.1" -resolved_reference = "e40f1e44db2ccf7f4f78f0995905e93bad0efce3" +reference = "feature/mx-1478-more-model-alignments" +resolved_reference = "f9d35adc745fe118962b59ae1e6852d1440c6e3f" [[package]] name = "mypy" @@ -725,36 +726,36 @@ files = [ [[package]] name = "pandas" -version = "2.1.3" +version = "2.1.4" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" files = [ - {file = "pandas-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acf08a73b5022b479c1be155d4988b72f3020f308f7a87c527702c5f8966d34f"}, - {file = "pandas-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cc4469ff0cf9aa3a005870cb49ab8969942b7156e0a46cc3f5abd6b11051dfb"}, - {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35172bff95f598cc5866c047f43c7f4df2c893acd8e10e6653a4b792ed7f19bb"}, - {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59dfe0e65a2f3988e940224e2a70932edc964df79f3356e5f2997c7d63e758b4"}, - {file = "pandas-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0296a66200dee556850d99b24c54c7dfa53a3264b1ca6f440e42bad424caea03"}, - {file = "pandas-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:465571472267a2d6e00657900afadbe6097c8e1dc43746917db4dfc862e8863e"}, - {file = "pandas-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04d4c58e1f112a74689da707be31cf689db086949c71828ef5da86727cfe3f82"}, - {file = "pandas-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fa2ad4ff196768ae63a33f8062e6838efed3a319cf938fdf8b95e956c813042"}, - {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4441ac94a2a2613e3982e502ccec3bdedefe871e8cea54b8775992485c5660ef"}, - {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ded6ff28abbf0ea7689f251754d3789e1edb0c4d0d91028f0b980598418a58"}, - {file = "pandas-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca5680368a5139d4920ae3dc993eb5106d49f814ff24018b64d8850a52c6ed2"}, - {file = "pandas-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:de21e12bf1511190fc1e9ebc067f14ca09fccfb189a813b38d63211d54832f5f"}, - {file = "pandas-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a5d53c725832e5f1645e7674989f4c106e4b7249c1d57549023ed5462d73b140"}, - {file = "pandas-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7cf4cf26042476e39394f1f86868d25b265ff787c9b2f0d367280f11afbdee6d"}, - {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72c84ec1b1d8e5efcbff5312abe92bfb9d5b558f11e0cf077f5496c4f4a3c99e"}, - {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f539e113739a3e0cc15176bf1231a553db0239bfa47a2c870283fd93ba4f683"}, - {file = "pandas-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc77309da3b55732059e484a1efc0897f6149183c522390772d3561f9bf96c00"}, - {file = "pandas-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:08637041279b8981a062899da0ef47828df52a1838204d2b3761fbd3e9fcb549"}, - {file = "pandas-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b99c4e51ef2ed98f69099c72c75ec904dd610eb41a32847c4fcbc1a975f2d2b8"}, - {file = "pandas-2.1.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f7ea8ae8004de0381a2376662c0505bb0a4f679f4c61fbfd122aa3d1b0e5f09d"}, - {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd76d67ca2d48f56e2db45833cf9d58f548f97f61eecd3fdc74268417632b8a"}, - {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1329dbe93a880a3d7893149979caa82d6ba64a25e471682637f846d9dbc10dd2"}, - {file = "pandas-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:321ecdb117bf0f16c339cc6d5c9a06063854f12d4d9bc422a84bb2ed3207380a"}, - {file = "pandas-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:11a771450f36cebf2a4c9dbd3a19dfa8c46c4b905a3ea09dc8e556626060fe71"}, - {file = "pandas-2.1.3.tar.gz", hash = "sha256:22929f84bca106921917eb73c1521317ddd0a4c71b395bcf767a106e3494209f"}, + {file = "pandas-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9"}, + {file = "pandas-2.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034"}, + {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d"}, + {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9"}, + {file = "pandas-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139"}, + {file = "pandas-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46"}, + {file = "pandas-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092"}, + {file = "pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821"}, + {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d"}, + {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171"}, + {file = "pandas-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623"}, + {file = "pandas-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e"}, + {file = "pandas-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6"}, + {file = "pandas-2.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b"}, + {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540"}, + {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead"}, + {file = "pandas-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1"}, + {file = "pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf"}, + {file = "pandas-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34"}, + {file = "pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a"}, + {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732"}, + {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8"}, + {file = "pandas-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860"}, + {file = "pandas-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984"}, + {file = "pandas-2.1.4.tar.gz", hash = "sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7"}, ] [package.dependencies] @@ -822,13 +823,13 @@ testing = ["docopt", "pytest (<6.0.0)"] [[package]] name = "pathspec" -version = "0.11.2" +version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, - {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] [[package]] @@ -877,13 +878,13 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "prompt-toolkit" -version = "3.0.41" +version = "3.0.42" description = "Library for building powerful interactive command lines in Python" optional = false python-versions = ">=3.7.0" files = [ - {file = "prompt_toolkit-3.0.41-py3-none-any.whl", hash = "sha256:f36fe301fafb7470e86aaf90f036eef600a3210be4decf461a5b1ca8403d3cb2"}, - {file = "prompt_toolkit-3.0.41.tar.gz", hash = "sha256:941367d97fc815548822aa26c2a269fdc4eb21e9ec05fc5d447cf09bad5d75f0"}, + {file = "prompt_toolkit-3.0.42-py3-none-any.whl", hash = "sha256:3b50b5fc50660dc8e39dfe464b170959ad82ff185ffa53bfd3be02222e7156a1"}, + {file = "prompt_toolkit-3.0.42.tar.gz", hash = "sha256:bfbf7d6ea9744e4ec94c9a69539e8106c77a2a607d728ded87c9182a4aec39be"}, ] [package.dependencies] @@ -1242,22 +1243,6 @@ files = [ {file = "ruff-0.1.7.tar.gz", hash = "sha256:dffd699d07abf54833e5f6cc50b85a6ff043715da8788c4a79bcd4ab4734d306"}, ] -[[package]] -name = "setuptools" -version = "69.0.2" -description = "Easily download, build, install, upgrade, and uninstall Python packages" -optional = false -python-versions = ">=3.8" -files = [ - {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"}, - {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"}, -] - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] - [[package]] name = "six" version = "1.16.0" @@ -1546,13 +1531,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.8.0" +version = "4.9.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, - {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, + {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, + {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, ] [[package]] @@ -1596,4 +1581,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "1087865cb12f57b22cf88c0a6a8f4f7c754aeb073be41608a035af33cafa4c51" +content-hash = "ac053c7c468ca3f5ed2645f3ff25f31030886b3a39c235df9ceb53dcbe45ebe7" diff --git a/pyproject.toml b/pyproject.toml index d031b663..a30eaced 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ requests = "^2.31.0" [tool.poetry.group.dev.dependencies] black = "^23.11.0" ipdb = "^0.13.13" -mex-model = { git = "https://github.com/robert-koch-institut/mex-model.git", rev = "2.1.1"} +mex-model = { git = "https://github.com/robert-koch-institut/mex-model.git", rev = "feature/mx-1478-more-model-alignments"} mypy = "^1.7.1" pandas-stubs = "^2.1.1" pytest = "^7.4.3" diff --git a/tests/models/test_schema.py b/tests/models/test_schema.py index ce46cd2e..b2feae50 100644 --- a/tests/models/test_schema.py +++ b/tests/models/test_schema.py @@ -1,13 +1,14 @@ import json from copy import deepcopy +from itertools import zip_longest from pathlib import Path from typing import Any import pytest from mex.common.models import EXTRACTED_MODEL_CLASSES_BY_NAME -from mex.common.transform import dromedary_to_kebab -from mex.common.types.identifier import MEX_ID_PATTERN, Identifier +from mex.common.transform import dromedary_to_kebab, kebab_to_camel +from mex.common.types.identifier import MEX_ID_PATTERN # TODO: find a cleaner way to get to the mex-model JSON schemas SPECIFIED_SCHEMA_PATH = Path(".venv", "src", "mex-model", "schema", "entities") @@ -15,7 +16,9 @@ GENERATED_SCHEMAS = dict( sorted( { - name.removeprefix("Extracted"): model.model_json_schema() + name.removeprefix("Extracted"): model.model_json_schema( + ref_template="/schema/fields/{model}" + ) for name, model in EXTRACTED_MODEL_CLASSES_BY_NAME.items() }.items() ) @@ -43,7 +46,7 @@ def test_entity_types_match_spec() -> None: @pytest.mark.parametrize( ("generated", "specified"), - zip(GENERATED_SCHEMAS.values(), SPECIFIED_SCHEMAS.values()), + zip_longest(GENERATED_SCHEMAS.values(), SPECIFIED_SCHEMAS.values()), ids=GENERATED_SCHEMAS, ) def test_field_names_match_spec( @@ -54,7 +57,7 @@ def test_field_names_match_spec( @pytest.mark.parametrize( ("generated", "specified"), - zip(GENERATED_SCHEMAS.values(), SPECIFIED_SCHEMAS.values()), + zip_longest(GENERATED_SCHEMAS.values(), SPECIFIED_SCHEMAS.values()), ids=GENERATED_SCHEMAS, ) def test_required_fields_match_spec( @@ -63,96 +66,90 @@ def test_required_fields_match_spec( assert set(generated["required"]) == set(specified["required"]) +def deduplicate_dicts(dicts: list[dict[str, Any]]) -> list[dict[str, Any]]: + return [json.loads(s) for s in dict.fromkeys(json.dumps(d) for d in dicts)] + + +def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None: + # prepare each item in a list + if isinstance(obj, list): + for item in obj: + prepare_field(field, item) + obj[:] = [item for item in obj if item] + return + + # discard annotations that we fully ignore because they have no use case yet + obj.pop("sameAs", None) # only in spec + obj.pop("subPropertyOf", None) # only in spec + obj.pop("description", None) # only in model (mostly implementation hints) + + # pop annotations that we don't compare directly but use for other comparisons + title = obj.pop("title", "") # only in model (autogenerated by pydantic) + use_scheme = obj.pop("useScheme", "") # only in spec (needed to select vocabulary) + + # ignore differences between dates and datetimes + # (we only have `Timestamp` as a date-time implementation, but no type for `date`, + # but we might/should add that in the future) + if obj.get("format") in ("date", "date-time"): + obj.pop("examples", None) + obj.pop("pattern", None) + obj["format"] = "date-time" + + # align reference paths + if obj.get("pattern") == MEX_ID_PATTERN: + obj.pop("pattern") + obj.pop("type", None) + if field in ("identifier", "stableTargetId"): + obj["$ref"] = "/schema/fields/identifier" + else: + title = dromedary_to_kebab(title.removesuffix("ID")) + obj["$ref"] = f"/schema/entities/{title}#/identifier" + + # align concept/enum annotations + # (the spec uses the `useScheme` annotation to specify a vocabulary and the models + # use enums that are ) + if obj.get("$ref") == "/schema/entities/concept#/identifier": + name = kebab_to_camel(use_scheme.removeprefix("https://mex.rki.de/item/")) + obj["$ref"] = f"/schema/fields/{name}" + + # recurse into the field definitions for array items + if obj.get("type") == "array": + prepare_field(field, obj["items"]) + + for quantifier in {"anyOf", "allOf"} & set(obj): + # prepare choices + prepare_field(field, obj[quantifier]) + + # deduplicate items, used for date/times + obj[quantifier] = deduplicate_dicts(obj[quantifier]) + + # collapse non-choices + if len(obj[quantifier]) == 1: + obj.update(obj.pop(quantifier)[0]) + + @pytest.mark.parametrize( ("entity_type", "field_name"), ENTITY_TYPES_AND_FIELD_NAMES_BY_FQN.values(), ids=ENTITY_TYPES_AND_FIELD_NAMES_BY_FQN.keys(), ) -def test_field_defs_match_spec(entity_type: str, field_name: str) -> None: # noqa: C901 - def prepare_field(obj: list[Any] | dict[str, Any]) -> None: # noqa: C901 - if isinstance(obj, list): # prepare each item in a list - for item in obj: - prepare_field(item) - obj[:] = [item for item in obj if item] - return - obj.pop("sameAs", None) # not in models - obj.pop("subPropertyOf", None) # not in models - obj.pop("description", None) # not in spec - obj.pop("default", None) # not in spec - if obj.get("type") == "null": # pydantic 2 sets these - obj.pop("type") - if obj.get("examples") == [str(Identifier.generate(seed=42))]: # not in spec - obj.pop("examples") - if field_name == "temporal": # this field's examples are too complicated - obj.pop("examples", None) - if obj.get("format") == "date": # we mash up dates and datetimes in models - obj["format"] = "date-time" - if obj.get("pattern") in ( - r"^[1-9]\d{3}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", - r"^\d{4}(-\d{2})?(-\d{2})?$", # ignore date/time pattern differences - ): - obj["pattern"] = r"^\d{4}(-\d{2}(-\d{2}(T\d{2}:\d{2}:\d{2}Z)?)?)?$" - title = obj.pop("title", None) - if obj.get("pattern") == MEX_ID_PATTERN: # align concept/enum annotations - obj.pop("pattern") - obj.pop("type") - if title and field_name not in ("identifier", "stableTargetId"): - title = dromedary_to_kebab(title.removesuffix("ID")) - obj["$ref"] = f"/schema/entities/{title}#/identifier" - else: - obj["$ref"] = "/schema/fields/identifier" - if "$ref" in obj: # align reference paths - obj["$ref"] = obj["$ref"].replace("#/$defs/", "/schema/fields/").lower() - if obj["$ref"] == "/schema/entities/concept#/identifier" and ( - scheme := obj.pop("useScheme", None) - ): - name = scheme.replace("-", "").removeprefix("https://mex.rki.de/item/") - obj["$ref"] = f"/schema/fields/{name}" - if obj.get("type") == "array": - obj.pop("default", None) # ignore empty list defaults - if isinstance(obj["items"], dict): - obj["items"] = [obj["items"]] - if scheme := obj.pop("useScheme", None): # pull down scheme into items - for item in obj["items"]: - item["useScheme"] = scheme - prepare_field(obj["items"]) - if any( - item.get("examples") if isinstance(item, dict) else None - for item in obj["items"] - ): # pull up examples onto field level - obj["examples"] = [ - e - for item in obj["items"] - for e in ( - item.pop("examples", []) if isinstance(item, dict) else [] - ) - ] - if "anyOf" in obj: # ignore any/one difference - obj["oneOf"] = obj.pop("anyOf") - for quantifier in ({"oneOf", "allOf"} & set(obj)): - prepare_field(obj[quantifier]) # prepare choices - obj[quantifier] = list( - json.loads(s) for s in {json.dumps(o) for o in obj[quantifier]} - ) # deduplicate items, used for date/times - if len(obj[quantifier]) == 1: # collapse non-choices - obj.update(obj.pop(quantifier)[0]) - - generated_properties = GENERATED_SCHEMAS[entity_type]["properties"] +def test_field_defs_match_spec(entity_type: str, field_name: str) -> None: specified_properties = SPECIFIED_SCHEMAS[entity_type]["properties"] - generated = deepcopy(generated_properties[field_name]) + generated_properties = GENERATED_SCHEMAS[entity_type]["properties"] specified = deepcopy(specified_properties[field_name]) + generated = deepcopy(generated_properties[field_name]) - prepare_field(generated) - prepare_field(specified) + prepare_field(field_name, specified) + prepare_field(field_name, generated) assert ( generated == specified ), f""" {entity_type}.{field_name} -generated: -{json.dumps(generated_properties[field_name], indent=2, sort_keys=True)} - specified: -{json.dumps(specified_properties[field_name], indent=2, sort_keys=True)} +{json.dumps(specified_properties[field_name], indent=4, sort_keys=True)} + +generated: +{json.dumps(generated_properties[field_name], indent=4, sort_keys=True)} """