diff --git a/CHANGELOG.md b/CHANGELOG.md index 38aa009f..48c41186 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,15 +9,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- add validator to base model that verifies computed fields can be set but not altered +- new class hierarchy for identifiers: ExtractedIdentifier and MergedIdentifier + ### Changes - improve typing for methods using `Self` - make local type variables private +- use json instead of pickle to calculate checksum of models +- replace `set_identifiers` validator with computed fields on each extracted model ### Deprecated ### Removed +- removed custom stringify method on base entities that included the `identifier` field + ### Fixed - fix typing for `__eq__` arguments diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py index def9560a..96eeac4c 100644 --- a/mex/common/backend_api/connector.py +++ b/mex/common/backend_api/connector.py @@ -3,9 +3,9 @@ from mex.common.backend_api.models import BulkInsertResponse from mex.common.connector import HTTPConnector -from mex.common.models import ExtractedData +from mex.common.models import AnyExtractedModel from mex.common.settings import BaseSettings -from mex.common.types import Identifier +from mex.common.types import AnyExtractedIdentifier class BackendApiConnector(HTTPConnector): @@ -27,7 +27,9 @@ def _set_url(self) -> None: settings = BaseSettings.get() self.url = urljoin(str(settings.backend_api_url), self.API_VERSION) - def post_models(self, models: list[ExtractedData]) -> list[Identifier]: + def post_models( + self, models: list[AnyExtractedModel] + ) -> list[AnyExtractedIdentifier]: """Post models to Backend API in a bulk insertion request. Args: diff --git a/mex/common/backend_api/models.py b/mex/common/backend_api/models.py index 0f539beb..adc59785 100644 --- a/mex/common/backend_api/models.py +++ b/mex/common/backend_api/models.py @@ -1,8 +1,8 @@ from mex.common.models import BaseModel -from mex.common.types import Identifier +from mex.common.types import AnyExtractedIdentifier class BulkInsertResponse(BaseModel): """Response body for the bulk ingestion endpoint.""" - identifiers: list[Identifier] + identifiers: list[AnyExtractedIdentifier] diff --git a/mex/common/extract.py b/mex/common/extract.py index 6d948018..3ee37200 100644 --- a/mex/common/extract.py +++ b/mex/common/extract.py @@ -1,6 +1,6 @@ from collections import defaultdict from collections.abc import Generator -from pathlib import Path +from os import PathLike from typing import TYPE_CHECKING, Any, TypeVar, Union import numpy as np @@ -38,7 +38,7 @@ def get_dtypes_for_model(model: type["BaseModel"]) -> dict[str, "Dtype"]: def parse_csv( - path_or_buffer: Union[str, Path, "ReadCsvBuffer[Any]"], + path_or_buffer: Union[str, PathLike[str], "ReadCsvBuffer[Any]"], into: type[_BaseModelT], chunksize: int = 10, **kwargs: Any, diff --git a/mex/common/ldap/transform.py b/mex/common/ldap/transform.py index 01253f31..106c361d 100644 --- a/mex/common/ldap/transform.py +++ b/mex/common/ldap/transform.py @@ -105,7 +105,7 @@ def transform_ldap_person_to_mex_person( f"'{ldap_person.department}' or departmentNumber " f"'{ldap_person.departmentNumber}'" ) - return ExtractedPerson( # type: ignore[call-arg] + return ExtractedPerson( identifierInPrimarySource=str(ldap_person.objectGUID), hadPrimarySource=primary_source.stableTargetId, affiliation=[], # TODO resolve organization for person.company/RKI @@ -132,7 +132,7 @@ def transform_ldap_actor_to_mex_contact_point( Returns: Extracted contact point """ - return ExtractedContactPoint( # type: ignore[call-arg] + return ExtractedContactPoint( identifierInPrimarySource=str(ldap_actor.objectGUID), hadPrimarySource=primary_source.stableTargetId, email=ldap_actor.mail, diff --git a/mex/common/models/access_platform.py b/mex/common/models/access_platform.py index ed22d8ba..cb5f95a4 100644 --- a/mex/common/models/access_platform.py +++ b/mex/common/models/access_platform.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -92,8 +92,18 @@ class ExtractedAccessPlatform(BaseAccessPlatform, ExtractedData): entityType: Annotated[ Literal["ExtractedAccessPlatform"], Field(alias="$type", frozen=True) ] = "ExtractedAccessPlatform" - identifier: Annotated[ExtractedAccessPlatformIdentifier, Field(frozen=True)] - stableTargetId: MergedAccessPlatformIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedAccessPlatformIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedAccessPlatformIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedAccessPlatformIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedAccessPlatformIdentifier) class MergedAccessPlatform(BaseAccessPlatform, MergedItem): diff --git a/mex/common/models/activity.py b/mex/common/models/activity.py index 94f5752c..9551c520 100644 --- a/mex/common/models/activity.py +++ b/mex/common/models/activity.py @@ -5,7 +5,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -97,8 +97,18 @@ class ExtractedActivity(BaseActivity, ExtractedData): entityType: Annotated[ Literal["ExtractedActivity"], Field(alias="$type", frozen=True) ] = "ExtractedActivity" - identifier: Annotated[ExtractedActivityIdentifier, Field(frozen=True)] - stableTargetId: MergedActivityIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedActivityIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedActivityIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedActivityIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedActivityIdentifier) class MergedActivity(BaseActivity, MergedItem): diff --git a/mex/common/models/base.py b/mex/common/models/base.py index 93d8129d..10e709e5 100644 --- a/mex/common/models/base.py +++ b/mex/common/models/base.py @@ -1,29 +1,27 @@ import hashlib -import pickle # nosec +import json from collections.abc import MutableMapping from functools import cache from types import UnionType -from typing import ( - Any, - TypeVar, - Union, -) +from typing import Any, Union -from pydantic import BaseModel as PydanticBaseModel +from pydantic import ( + BaseModel as PydanticBaseModel, +) from pydantic import ( ConfigDict, TypeAdapter, ValidationError, + ValidatorFunctionWrapHandler, model_validator, ) from pydantic.json_schema import DEFAULT_REF_TEMPLATE, JsonSchemaMode from pydantic.json_schema import GenerateJsonSchema as PydanticJsonSchemaGenerator from mex.common.models.schema import JsonSchemaGenerator +from mex.common.transform import MExEncoder from mex.common.utils import get_inner_types -_RawModelDataT = TypeVar("_RawModelDataT") - class BaseModel(PydanticBaseModel): """Common base class for all MEx model classes.""" @@ -142,7 +140,7 @@ def _fix_value_listyness_for_field(cls, field_name: str, value: Any) -> Any: @model_validator(mode="before") @classmethod - def fix_listyness(cls, data: _RawModelDataT) -> _RawModelDataT: + def fix_listyness(cls, data: Any) -> Any: """Adjust the listyness of to-be-parsed data to match the desired shape. If that data is a Mapping and the model defines a list[T] field but the raw data @@ -156,7 +154,7 @@ def fix_listyness(cls, data: _RawModelDataT) -> _RawModelDataT: entry however, an error is raised, because we would not know which to choose. Args: - data: Raw data to be parsed + data: Raw data or instance to be parsed Returns: data with fixed list shapes @@ -168,9 +166,46 @@ def fix_listyness(cls, data: _RawModelDataT) -> _RawModelDataT: data[name] = cls._fix_value_listyness_for_field(field_name, value) return data + @model_validator(mode="wrap") + def verify_computed_field_consistency( + cls, data: Any, handler: ValidatorFunctionWrapHandler + ) -> Any: + """Validate that parsed values for computed fields are consistent. + + Parsing a dictionary with a value for a computed field that is consistent with + what that field would have computed anyway is allowed. Omitting values for + computed fields is perfectly valid as well. However, if the parsed value is + different from the computed value, a validation error is raised. + + Args: + data: Raw data or instance to be parsed + handler: Validator function wrap handler + + Returns: + data with consistent computed fields. + """ + if not cls.model_computed_fields: + return handler(data) + if not isinstance(data, MutableMapping): + raise AssertionError( + "Input should be a valid dictionary, validating other types is not " + "supported for models with computed fields." + ) + custom_values = { + field: value + for field in cls.model_computed_fields + if (value := data.pop(field, None)) + } + result = handler(data) + computed_values = result.model_dump(include=set(custom_values)) + if computed_values != custom_values: + raise ValueError("Cannot set computed fields to custom values!") + return result + def checksum(self) -> str: """Calculate md5 checksum for this model.""" - return hashlib.md5(pickle.dumps(self)).hexdigest() # noqa: S324 + json_str = json.dumps(self, sort_keys=True, cls=MExEncoder) + return hashlib.md5(json_str.encode()).hexdigest() # noqa: S324 def __str__(self) -> str: """Format this model as a string for logging.""" diff --git a/mex/common/models/contact_point.py b/mex/common/models/contact_point.py index 4bf2abc8..f2174d15 100644 --- a/mex/common/models/contact_point.py +++ b/mex/common/models/contact_point.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -44,8 +44,18 @@ class ExtractedContactPoint(BaseContactPoint, ExtractedData): entityType: Annotated[ Literal["ExtractedContactPoint"], Field(alias="$type", frozen=True) ] = "ExtractedContactPoint" - identifier: Annotated[ExtractedContactPointIdentifier, Field(frozen=True)] - stableTargetId: MergedContactPointIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedContactPointIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedContactPointIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedContactPointIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedContactPointIdentifier) class MergedContactPoint(BaseContactPoint, MergedItem): diff --git a/mex/common/models/distribution.py b/mex/common/models/distribution.py index e45560ba..67929a29 100644 --- a/mex/common/models/distribution.py +++ b/mex/common/models/distribution.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -136,8 +136,18 @@ class ExtractedDistribution(BaseDistribution, ExtractedData): entityType: Annotated[ Literal["ExtractedDistribution"], Field(alias="$type", frozen=True) ] = "ExtractedDistribution" - identifier: Annotated[ExtractedDistributionIdentifier, Field(frozen=True)] - stableTargetId: MergedDistributionIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedDistributionIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedDistributionIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedDistributionIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedDistributionIdentifier) class MergedDistribution(BaseDistribution, MergedItem): diff --git a/mex/common/models/entity.py b/mex/common/models/entity.py index 569c232b..f77e872c 100644 --- a/mex/common/models/entity.py +++ b/mex/common/models/entity.py @@ -1,10 +1,11 @@ from typing import TYPE_CHECKING, ClassVar +from pydantic import ConfigDict + from mex.common.models.base import BaseModel -from mex.common.types import Identifier -class BaseEntity(BaseModel, extra="forbid"): +class BaseEntity(BaseModel): """Abstract base model for extracted data, merged item and rule set classes. This class gives type hints for an `identifier` field, the frozen `entityType` field @@ -13,6 +14,10 @@ class BaseEntity(BaseModel, extra="forbid"): type as well as the correct literal values for the entity and stem types. """ + model_config = ConfigDict( + extra="forbid", + ) + if TYPE_CHECKING: # pragma: no cover # The frozen `entityType` field is added to all `BaseEntity` subclasses to # help with assigning the correct class when reading raw JSON entities. @@ -26,13 +31,3 @@ class BaseEntity(BaseModel, extra="forbid"): # type of items. E.g. `ExtractedPerson`, `MergedPerson` and `PreventivePerson` # all share the same `stemType` of `Person`. stemType: ClassVar - - # A globally unique identifier is added to all `BaseEntity` subclasses and - # should be typed to the correct identifier type. Regardless of the entity-type - # or whether this item was extracted, merged, etc., identifiers will be assigned - # just once and should be declared as `frozen` on subclasses. - identifier: Identifier - - def __str__(self) -> str: - """Format this instance as a string for logging.""" - return f"{self.entityType}: {self.identifier}" diff --git a/mex/common/models/extracted_data.py b/mex/common/models/extracted_data.py index 7798158a..a97f4ded 100644 --- a/mex/common/models/extracted_data.py +++ b/mex/common/models/extracted_data.py @@ -1,10 +1,12 @@ -from typing import TYPE_CHECKING, Annotated, Any +from typing import Annotated, TypeVar -from pydantic import Field, model_validator, validate_call +from pydantic import Field from mex.common.models.entity import BaseEntity from mex.common.types import ( + ExtractedIdentifier, ExtractedPrimarySourceIdentifier, + MergedIdentifier, MergedPrimarySourceIdentifier, ) @@ -12,6 +14,9 @@ MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE = "mex" MEX_PRIMARY_SOURCE_STABLE_TARGET_ID = MergedPrimarySourceIdentifier("00000000000000") +_MergedIdentifierT = TypeVar("_MergedIdentifierT", bound=MergedIdentifier) +_ExtractedIdentifierT = TypeVar("_ExtractedIdentifierT", bound=ExtractedIdentifier) + class ExtractedData(BaseEntity): """Base model for all extracted data classes. @@ -26,20 +31,6 @@ class ExtractedData(BaseEntity): See below, for a full description. """ - if TYPE_CHECKING: # pragma: no cover - # Sometimes multiple primary sources describe the same activity, resource, etc. - # and a complete metadata item can only be created by merging these fragments. - # The `stableTargetId` is part of all extracted models to allow MEx to identify - # which items describe the same thing and should be merged to create a complete - # metadata item. The name `stableTargetId` might be a bit misleading, because - # the "stability" is only guaranteed for one "real world" or "digital world" - # thing having the same ID in MEx over time. But it is not a guarantee, that the - # same metadata sources contribute to the complete metadata item. The naming has - # its historical reasons, but we plan to change it in the near future. - # Because we anticipate that items have to be merged, the `stableTargetId` is - # also used as the foreign key for all fields containing references. - stableTargetId: Any - hadPrimarySource: Annotated[ MergedPrimarySourceIdentifier, Field( @@ -71,127 +62,40 @@ class ExtractedData(BaseEntity): ), ] - # TODO make stable_target_id and identifier computed fields (MX-1435) - @model_validator(mode="before") - @classmethod - @validate_call - def set_identifiers(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: C901 - """Ensure identifiers and provenance attributes are set for this instance. - - All extracted data classes have four important identifiers that are defined - by `BaseEntity`, `ExtractedData` and the concrete classes themselves. - - - identifierInPrimarySource - - hadPrimarySource - - identifier - - stableTargetId - - Every time we create a new instance of an extracted item, we automatically - validate that these identifiers are set correctly. + def _get_identifier( + self, identifier_type: type[_ExtractedIdentifierT] + ) -> _ExtractedIdentifierT: + """Consult the identity provider to get the `identifier` for this item. - We check that `identifierInPrimarySource` and `hadPrimarySource` are provided, - because otherwise we cannot reliably determine the origin of this item. - These two identifiers are the only two that need to be set during extraction. + Args: + identifier_type: ExtractedIdentifier-subclass to cast the identifier to - Next, we query the configured `IdentityProvider` to determine whether this item - already has an `identifier` and `stableTargetId`. If not, we let the identity - provider generate new identifiers. + Returns: + Identifier of the correct type + """ + from mex.common.identity import get_provider # break import cycle, sigh - If an `identifier` has been passed to the constructor, we check that it matches - with what we got from the identity provider, because we don't allow any system - to change the association from `identifierInPrimarySource` and - `hadPrimarySource` to the `identifier`. - A use-case for passing a matching `identifier` to the constructor would be - parsing an already extracted item from an NDJSON file or an API endpoint. + return identifier_type( + get_provider() + .assign(self.hadPrimarySource, self.identifierInPrimarySource) + .identifier + ) - If a `stableTargetId` has been passed to the constructor, we use that as the - new value, because changes to the stable target ID are generally allowed. - A use-case for changing the `stableTargetId` will be the matching of - multiple extracted items (see `BaseEntity.stableTargetId` for details). + def _get_stable_target_id( + self, identifier_type: type[_MergedIdentifierT] + ) -> _MergedIdentifierT: + """Consult the identity provider to get the `stableTargetId` for this item. Args: - values: Raw values to validate - - Raises: - ValueError: If `identifier` was supplied but does not match the id provider - ValueError: If `identifierInPrimarySource` was missing - ValueError: If `hadPrimarySource` was missing + identifier_type: MergedIdentifier-subclass to cast the identifier to Returns: - Values with identifier and provenance attributes + StableTargetId of the correct type """ - # break import cycle, sigh - from mex.common.identity import get_provider - - # validate ID in primary source and primary source ID - if identifier_in_primary_source := values.get("identifierInPrimarySource"): - if isinstance(identifier_in_primary_source, list): - if len(identifier_in_primary_source) == 1: - identifier_in_primary_source = str(identifier_in_primary_source[0]) - else: - raise ValueError( - f"Expected one value for identifierInPrimarySource, " - f"got {len(identifier_in_primary_source)}" - ) - else: - identifier_in_primary_source = str(identifier_in_primary_source) - else: - raise ValueError("Missing value for `identifierInPrimarySource`.") - - if had_primary_source := values.get("hadPrimarySource"): - if isinstance(had_primary_source, list): - if len(had_primary_source) == 1: - had_primary_source = MergedPrimarySourceIdentifier( - had_primary_source[0] - ) - else: - raise ValueError( - f"Expected one value for hadPrimarySource, " - f"got {len(had_primary_source)}" - ) - else: - had_primary_source = MergedPrimarySourceIdentifier(had_primary_source) - else: - raise ValueError("Missing value for `hadPrimarySource`.") - - provider = get_provider() - identity = provider.assign(had_primary_source, identifier_in_primary_source) - - # In case an identity was already found and it differs from the identifier - # provided to the constructor, we raise an error because it should not be - # allowed to change the identifier of an existing item. - if identifier := values.get("identifier"): - if isinstance(identifier, list): - if len(identifier) == 1: - identifier = identifier[0] - else: - raise ValueError( - f"Expected one value for Identifier, got {len(identifier)}" - ) - if identity.identifier != str(identifier): - raise ValueError("Identifier cannot be set manually to new value.") - - # In case an identity was found, we allow assigning a new stable target ID - # for the purpose of merging two items, except for the MEx - # primary source itself. - if stable_target_id := values.get("stableTargetId"): - if isinstance(stable_target_id, list): - if len(stable_target_id) == 1: - stable_target_id = stable_target_id[0] - else: - raise ValueError( - f"Expected one value for stableTargetId, " - f"got {len(stable_target_id)}" - ) - if ( - identity.stableTargetId != str(stable_target_id) - and stable_target_id != MEX_PRIMARY_SOURCE_STABLE_TARGET_ID - ): - raise ValueError( - "Cannot change `stableTargetId` of MEx primary source." - ) - - # update instance values - values["identifier"] = identity.identifier - values["stableTargetId"] = identity.stableTargetId - return values + from mex.common.identity import get_provider # break import cycle, sigh + + return identifier_type( + get_provider() + .assign(self.hadPrimarySource, self.identifierInPrimarySource) + .stableTargetId + ) diff --git a/mex/common/models/organization.py b/mex/common/models/organization.py index 9fd1a5ff..3f30542f 100644 --- a/mex/common/models/organization.py +++ b/mex/common/models/organization.py @@ -5,7 +5,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -108,8 +108,18 @@ class ExtractedOrganization(BaseOrganization, ExtractedData): entityType: Annotated[ Literal["ExtractedOrganization"], Field(alias="$type", frozen=True) ] = "ExtractedOrganization" - identifier: Annotated[ExtractedOrganizationIdentifier, Field(frozen=True)] - stableTargetId: MergedOrganizationIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedOrganizationIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedOrganizationIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedOrganizationIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedOrganizationIdentifier) class MergedOrganization(BaseOrganization, MergedItem): diff --git a/mex/common/models/organizational_unit.py b/mex/common/models/organizational_unit.py index 8ae55ae7..dda9cf32 100644 --- a/mex/common/models/organizational_unit.py +++ b/mex/common/models/organizational_unit.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -59,8 +59,18 @@ class ExtractedOrganizationalUnit(BaseOrganizationalUnit, ExtractedData): entityType: Annotated[ Literal["ExtractedOrganizationalUnit"], Field(alias="$type", frozen=True) ] = "ExtractedOrganizationalUnit" - identifier: Annotated[ExtractedOrganizationalUnitIdentifier, Field(frozen=True)] - stableTargetId: MergedOrganizationalUnitIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedOrganizationalUnitIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedOrganizationalUnitIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedOrganizationalUnitIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedOrganizationalUnitIdentifier) class MergedOrganizationalUnit(BaseOrganizationalUnit, MergedItem): diff --git a/mex/common/models/person.py b/mex/common/models/person.py index ec16ef5e..a3e7de73 100644 --- a/mex/common/models/person.py +++ b/mex/common/models/person.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -82,8 +82,18 @@ class ExtractedPerson(BasePerson, ExtractedData): entityType: Annotated[ Literal["ExtractedPerson"], Field(alias="$type", frozen=True) ] = "ExtractedPerson" - identifier: Annotated[ExtractedPersonIdentifier, Field(frozen=True)] - stableTargetId: MergedPersonIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedPersonIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedPersonIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedPersonIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedPersonIdentifier) class MergedPerson(BasePerson, MergedItem): diff --git a/mex/common/models/primary_source.py b/mex/common/models/primary_source.py index dfaf469f..ed0e48af 100644 --- a/mex/common/models/primary_source.py +++ b/mex/common/models/primary_source.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -72,8 +72,18 @@ class ExtractedPrimarySource(BasePrimarySource, ExtractedData): entityType: Annotated[ Literal["ExtractedPrimarySource"], Field(alias="$type", frozen=True) ] = "ExtractedPrimarySource" - identifier: Annotated[ExtractedPrimarySourceIdentifier, Field(frozen=True)] - stableTargetId: MergedPrimarySourceIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedPrimarySourceIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedPrimarySourceIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedPrimarySourceIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedPrimarySourceIdentifier) class MergedPrimarySource(BasePrimarySource, MergedItem): diff --git a/mex/common/models/resource.py b/mex/common/models/resource.py index 8d3bccac..bb56651a 100644 --- a/mex/common/models/resource.py +++ b/mex/common/models/resource.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -232,8 +232,18 @@ class ExtractedResource(BaseResource, ExtractedData): entityType: Annotated[ Literal["ExtractedResource"], Field(alias="$type", frozen=True) ] = "ExtractedResource" - identifier: Annotated[ExtractedResourceIdentifier, Field(frozen=True)] - stableTargetId: MergedResourceIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedResourceIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedResourceIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedResourceIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedResourceIdentifier) class MergedResource(BaseResource, MergedItem): diff --git a/mex/common/models/variable.py b/mex/common/models/variable.py index 26cad656..2bb6a9d4 100644 --- a/mex/common/models/variable.py +++ b/mex/common/models/variable.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -121,8 +121,18 @@ class ExtractedVariable(BaseVariable, ExtractedData): entityType: Annotated[ Literal["ExtractedVariable"], Field(alias="$type", frozen=True) ] = "ExtractedVariable" - identifier: Annotated[ExtractedVariableIdentifier, Field(frozen=True)] - stableTargetId: MergedVariableIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedVariableIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedVariableIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedVariableIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedVariableIdentifier) class MergedVariable(BaseVariable, MergedItem): diff --git a/mex/common/models/variable_group.py b/mex/common/models/variable_group.py index 8ffb36b3..a7e77cd7 100644 --- a/mex/common/models/variable_group.py +++ b/mex/common/models/variable_group.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field +from pydantic import Field, computed_field from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData @@ -43,8 +43,18 @@ class ExtractedVariableGroup(BaseVariableGroup, ExtractedData): entityType: Annotated[ Literal["ExtractedVariableGroup"], Field(alias="$type", frozen=True) ] = "ExtractedVariableGroup" - identifier: Annotated[ExtractedVariableGroupIdentifier, Field(frozen=True)] - stableTargetId: MergedVariableGroupIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedVariableGroupIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedVariableGroupIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedVariableGroupIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedVariableGroupIdentifier) class MergedVariableGroup(BaseVariableGroup, MergedItem): diff --git a/mex/common/organigram/transform.py b/mex/common/organigram/transform.py index eb675a79..5e0defcf 100644 --- a/mex/common/organigram/transform.py +++ b/mex/common/organigram/transform.py @@ -25,7 +25,7 @@ def transform_organigram_units_to_organizational_units( parent_id_in_primary_source_by_id_in_primary_source: dict[str, str] = {} for unit in units: - extracted_unit = ExtractedOrganizationalUnit( # type: ignore[call-arg] + extracted_unit = ExtractedOrganizationalUnit( identifierInPrimarySource=unit.identifier, hadPrimarySource=primary_source.stableTargetId, alternativeName=unit.alternativeName if unit.alternativeName else [], @@ -48,7 +48,14 @@ def transform_organigram_units_to_organizational_units( if parent_unit := extracted_unit_by_id_in_primary_source.get( parent_identifier_in_primary_source ): - extracted_unit.parentUnit = MergedOrganizationalUnitIdentifier( - parent_unit.stableTargetId + # Create a copy, because extracted data instances are immutable + # because of `BaseEntity.verify_computed_field_consistency` + extracted_unit = ExtractedOrganizationalUnit.model_validate( + { + **extracted_unit.model_dump(), + "parentUnit": MergedOrganizationalUnitIdentifier( + parent_unit.stableTargetId + ), + } ) yield extracted_unit diff --git a/mex/common/primary_source/transform.py b/mex/common/primary_source/transform.py index 085a1f66..c23ce50a 100644 --- a/mex/common/primary_source/transform.py +++ b/mex/common/primary_source/transform.py @@ -2,7 +2,6 @@ from mex.common.logging import watch from mex.common.models import ( - MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE, MEX_PRIMARY_SOURCE_STABLE_TARGET_ID, ExtractedPrimarySource, ) @@ -22,17 +21,10 @@ def transform_seed_primary_sources_to_extracted_primary_sources( Generator for ExtractedPrimarySource """ for primary_source in primary_sources: - if primary_source.identifier == MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE: - set_stable_target_id = dict( - stableTargetId=MEX_PRIMARY_SOURCE_STABLE_TARGET_ID - ) - else: - set_stable_target_id = dict() yield ExtractedPrimarySource( identifierInPrimarySource=primary_source.identifier, title=primary_source.title, hadPrimarySource=MEX_PRIMARY_SOURCE_STABLE_TARGET_ID, - **set_stable_target_id, ) diff --git a/mex/common/sinks/backend_api.py b/mex/common/sinks/backend_api.py index e4a0906f..1871c066 100644 --- a/mex/common/sinks/backend_api.py +++ b/mex/common/sinks/backend_api.py @@ -3,14 +3,14 @@ from mex.common.backend_api.connector import BackendApiConnector from mex.common.logging import watch from mex.common.models import AnyExtractedModel -from mex.common.types import Identifier +from mex.common.types import AnyExtractedIdentifier from mex.common.utils import grouper @watch def post_to_backend_api( models: Iterable[AnyExtractedModel], chunk_size: int = 100 -) -> Generator[Identifier, None, None]: +) -> Generator[AnyExtractedIdentifier, None, None]: """Load models to the Backend API using bulk insertion. Args: @@ -22,5 +22,5 @@ def post_to_backend_api( """ connector = BackendApiConnector.get() for chunk in grouper(chunk_size, models): - model_list = list(filter(None, chunk)) + model_list = [model for model in chunk if model is not None] yield from connector.post_models(model_list) diff --git a/mex/common/sinks/ndjson.py b/mex/common/sinks/ndjson.py index c3d05866..e00df0f8 100644 --- a/mex/common/sinks/ndjson.py +++ b/mex/common/sinks/ndjson.py @@ -8,13 +8,13 @@ from mex.common.models import AnyExtractedModel from mex.common.settings import BaseSettings from mex.common.transform import MExEncoder -from mex.common.types import Identifier +from mex.common.types import AnyExtractedIdentifier @watch def write_ndjson( models: Iterable[AnyExtractedModel], -) -> Generator[Identifier, None, None]: +) -> Generator[AnyExtractedIdentifier, None, None]: """Write the incoming models into a new-line delimited JSON file. Args: diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py index 0509b5b7..be360e05 100644 --- a/mex/common/types/__init__.py +++ b/mex/common/types/__init__.py @@ -6,6 +6,7 @@ ExtractedActivityIdentifier, ExtractedContactPointIdentifier, ExtractedDistributionIdentifier, + ExtractedIdentifier, ExtractedOrganizationalUnitIdentifier, ExtractedOrganizationIdentifier, ExtractedPersonIdentifier, @@ -18,6 +19,7 @@ MergedActivityIdentifier, MergedContactPointIdentifier, MergedDistributionIdentifier, + MergedIdentifier, MergedOrganizationalUnitIdentifier, MergedOrganizationIdentifier, MergedPersonIdentifier, @@ -65,6 +67,7 @@ "AccessRestriction", "ActivityType", "AnonymizationPseudonymization", + "AnyExtractedIdentifier", "AnyMergedIdentifier", "AnyNestedModel", "AnyPrimitiveType", @@ -78,6 +81,7 @@ "ExtractedActivityIdentifier", "ExtractedContactPointIdentifier", "ExtractedDistributionIdentifier", + "ExtractedIdentifier", "ExtractedOrganizationalUnitIdentifier", "ExtractedOrganizationIdentifier", "ExtractedPersonIdentifier", @@ -97,6 +101,7 @@ "MergedActivityIdentifier", "MergedContactPointIdentifier", "MergedDistributionIdentifier", + "MergedIdentifier", "MergedOrganizationalUnitIdentifier", "MergedOrganizationIdentifier", "MergedPersonIdentifier", diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py index 065b951e..2c60b513 100644 --- a/mex/common/types/identifier.py +++ b/mex/common/types/identifier.py @@ -80,89 +80,97 @@ def __repr__(self) -> str: # precise JSON schema definitions and to derive database queries from the models. -class ExtractedAccessPlatformIdentifier(Identifier): +class ExtractedIdentifier(Identifier): + """Base class for all extracted identifiers.""" + + +class ExtractedAccessPlatformIdentifier(ExtractedIdentifier): """Identifier for extracted access platforms.""" -class ExtractedActivityIdentifier(Identifier): +class ExtractedActivityIdentifier(ExtractedIdentifier): """Identifier for extracted activities.""" -class ExtractedContactPointIdentifier(Identifier): +class ExtractedContactPointIdentifier(ExtractedIdentifier): """Identifier for extracted contact points.""" -class ExtractedDistributionIdentifier(Identifier): +class ExtractedDistributionIdentifier(ExtractedIdentifier): """Identifier for extracted distributions.""" -class ExtractedOrganizationIdentifier(Identifier): +class ExtractedOrganizationIdentifier(ExtractedIdentifier): """Identifier for extracted organizations.""" -class ExtractedOrganizationalUnitIdentifier(Identifier): +class ExtractedOrganizationalUnitIdentifier(ExtractedIdentifier): """Identifier for extracted organizational units.""" -class ExtractedPersonIdentifier(Identifier): +class ExtractedPersonIdentifier(ExtractedIdentifier): """Identifier for extracted persons.""" -class ExtractedPrimarySourceIdentifier(Identifier): +class ExtractedPrimarySourceIdentifier(ExtractedIdentifier): """Identifier for extracted primary sources.""" -class ExtractedResourceIdentifier(Identifier): +class ExtractedResourceIdentifier(ExtractedIdentifier): """Identifier for extracted resources.""" -class ExtractedVariableIdentifier(Identifier): +class ExtractedVariableIdentifier(ExtractedIdentifier): """Identifier for extracted variables.""" -class ExtractedVariableGroupIdentifier(Identifier): +class ExtractedVariableGroupIdentifier(ExtractedIdentifier): """Identifier for extracted variable groups.""" -class MergedAccessPlatformIdentifier(Identifier): +class MergedIdentifier(Identifier): + """Base class for all merged identifiers.""" + + +class MergedAccessPlatformIdentifier(MergedIdentifier): """Identifier for merged access platforms.""" -class MergedActivityIdentifier(Identifier): +class MergedActivityIdentifier(MergedIdentifier): """Identifier for merged activities.""" -class MergedContactPointIdentifier(Identifier): +class MergedContactPointIdentifier(MergedIdentifier): """Identifier for merged contact points.""" -class MergedDistributionIdentifier(Identifier): +class MergedDistributionIdentifier(MergedIdentifier): """Identifier for merged distributions.""" -class MergedOrganizationIdentifier(Identifier): +class MergedOrganizationIdentifier(MergedIdentifier): """Identifier for merged organizations.""" -class MergedOrganizationalUnitIdentifier(Identifier): +class MergedOrganizationalUnitIdentifier(MergedIdentifier): """Identifier for merged organizational units.""" -class MergedPersonIdentifier(Identifier): +class MergedPersonIdentifier(MergedIdentifier): """Identifier for merged persons.""" -class MergedPrimarySourceIdentifier(Identifier): +class MergedPrimarySourceIdentifier(MergedIdentifier): """Identifier for merged primary sources.""" -class MergedResourceIdentifier(Identifier): +class MergedResourceIdentifier(MergedIdentifier): """Identifier for merged resources.""" -class MergedVariableIdentifier(Identifier): +class MergedVariableIdentifier(MergedIdentifier): """Identifier for merged variables.""" -class MergedVariableGroupIdentifier(Identifier): +class MergedVariableGroupIdentifier(MergedIdentifier): """Identifier for merged variable groups.""" diff --git a/mex/common/wikidata/models/organization.py b/mex/common/wikidata/models/organization.py index e7ee72ea..9845845b 100644 --- a/mex/common/wikidata/models/organization.py +++ b/mex/common/wikidata/models/organization.py @@ -1,6 +1,6 @@ from typing import Annotated -from pydantic import ConfigDict, Field, model_validator +from pydantic import Field, model_validator from mex.common.models import BaseModel @@ -93,8 +93,6 @@ class Aliases(BaseModel): class WikidataOrganization(BaseModel): """Model class for Wikidata sources.""" - model_config = ConfigDict(extra="ignore") - identifier: Annotated[str, Field(alias="id")] labels: Labels claims: Claims diff --git a/mex/common/wikidata/transform.py b/mex/common/wikidata/transform.py index 83f88f72..7708c4fa 100644 --- a/mex/common/wikidata/transform.py +++ b/mex/common/wikidata/transform.py @@ -52,7 +52,7 @@ def transform_wikidata_organization_to_extracted_organization( labels = _get_clean_labels(wikidata_organization.labels) if not labels: return None - return ExtractedOrganization( # type: ignore[call-arg] + return ExtractedOrganization( wikidataId=f"https://www.wikidata.org/entity/{wikidata_organization.identifier}", officialName=labels, shortName=_get_clean_short_names(wikidata_organization.claims.short_name), diff --git a/tests/backend_api/test_connector.py b/tests/backend_api/test_connector.py index a7b2b7bd..7d6f6e45 100644 --- a/tests/backend_api/test_connector.py +++ b/tests/backend_api/test_connector.py @@ -36,10 +36,10 @@ def test_post_models_mocked( assert json.loads(mocked_send_request.call_args_list[-1].kwargs["data"]) == { "ExtractedPerson": [ { - "identifier": "bFQoRhcVH5DH3i", + "identifier": "e3VhxMhEKyjqN5flzLpiEB", "hadPrimarySource": "bFQoRhcVH5DHXE", "identifierInPrimarySource": "00000000-0000-4000-8000-0000000003de", - "stableTargetId": "bFQoRhcVH5DH8y", + "stableTargetId": "NGwfzG8ROsrvIiQIVDVy", "affiliation": ["bFQoRhcVH5DHZg"], "email": ["TintzmannM@rki.de"], "familyName": ["Tintzmann"], diff --git a/tests/models/test_base.py b/tests/models/test_base.py index 3f6a12f9..622679ba 100644 --- a/tests/models/test_base.py +++ b/tests/models/test_base.py @@ -1,11 +1,10 @@ from enum import Enum -from typing import Annotated, Any, Literal +from typing import Any import pytest -from pydantic import Field, ValidationError +from pydantic import ValidationError, computed_field -from mex.common.models import BaseModel, MergedItem -from mex.common.types import Identifier +from mex.common.models import BaseModel class ComplexDummyModel(BaseModel): @@ -90,13 +89,38 @@ class Shelter(Pet): Shelter(inhabitants="foo") # type: ignore +def test_verify_computed_field_consistency() -> None: + class Computer(BaseModel): + + @computed_field # type: ignore[misc] + @property + def cpus(self) -> int: + return 42 + + computer = Computer.model_validate({"cpus": 42}) + assert computer.cpus == 42 + + with pytest.raises( + ValidationError, + match="Input should be a valid dictionary, validating other types is not " + "supported for models with computed fields.", + ): + Computer.model_validate(computer) + + with pytest.raises(ValidationError, match="Cannot set computed fields"): + Computer.model_validate({"cpus": 1}) + + with pytest.raises(ValidationError, match="Cannot set computed fields"): + Computer(cpus=99) + + class DummyBaseModel(BaseModel): foo: str | None = None def test_base_model_checksum() -> None: model_1 = DummyBaseModel() - assert model_1.checksum() == "69d67f58c6948849283e78d7b3f1a51e" + assert model_1.checksum() == "da8e081aa63fd2fd5b909dd86c6dfa6c" model_2 = DummyBaseModel(foo="bar") assert model_1.checksum() != model_2.checksum() @@ -104,16 +128,4 @@ def test_base_model_checksum() -> None: def test_base_model_str() -> None: model = DummyBaseModel(foo="bar") - assert str(model) == "DummyBaseModel: ab794a793aad8fa45b0f85ac05ee2126" - - -def test_mex_model_str() -> None: - class MergedDummy(MergedItem): - entityType: Annotated[ - Literal["MergedDummy"], Field(alias="$type", frozen=True) - ] = "MergedDummy" - identifier: Identifier - - model = MergedDummy(identifier=Identifier.generate(seed=99)) - - assert str(model) == "MergedDummy: bFQoRhcVH5DHV1" + assert str(model) == "DummyBaseModel: 94232c5b8fc9272f6f73a1e36eb68fcf" diff --git a/tests/models/test_extracted_data.py b/tests/models/test_extracted_data.py index ac256cbd..80dbb80d 100644 --- a/tests/models/test_extracted_data.py +++ b/tests/models/test_extracted_data.py @@ -2,16 +2,10 @@ from typing import Annotated, Literal import pytest -from pydantic import Field, ValidationError +from pydantic import Field, ValidationError, computed_field from mex.common.identity import get_provider -from mex.common.models import ( - MEX_PRIMARY_SOURCE_IDENTIFIER, - MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE, - MEX_PRIMARY_SOURCE_STABLE_TARGET_ID, - BaseModel, - ExtractedData, -) +from mex.common.models import BaseModel, ExtractedData from mex.common.types import Identifier, MergedPrimarySourceIdentifier @@ -42,8 +36,18 @@ class ExtractedThing(BaseThing, ExtractedData): entityType: Annotated[ Literal["ExtractedThing"], Field(alias="$type", frozen=True) ] = "ExtractedThing" - identifier: Annotated[ExtractedThingIdentifier, Field(frozen=True)] - stableTargetId: MergedThingIdentifier + + @computed_field # type: ignore[misc] + @property + def identifier(self) -> ExtractedThingIdentifier: + """Return the computed identifier for this extracted data item.""" + return self._get_identifier(ExtractedThingIdentifier) + + @computed_field # type: ignore[misc] + @property + def stableTargetId(self) -> MergedThingIdentifier: # noqa: N802 + """Return the computed stableTargetId for this extracted data item.""" + return self._get_stable_target_id(MergedThingIdentifier) def test_extracted_data_requires_dict_for_construction() -> None: @@ -65,44 +69,13 @@ def test_extracted_data_requires_had_primary_source() -> None: ) -def test_extracted_data_does_not_allow_setting_identifier() -> None: - with pytest.raises(ValidationError, match="Identifier cannot be set manually"): - ExtractedThing( - identifier=Identifier.generate(seed=0), - hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=1), - identifierInPrimarySource="0", - ) - - -def test_extracted_data_does_allow_setting_preexisting_identifiers() -> None: - thing_1 = ExtractedThing( - hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=1), - identifierInPrimarySource="0", - ) - thing_2 = ExtractedThing( - identifier=thing_1.identifier, - hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=1), - identifierInPrimarySource="0", - ) - - assert thing_1.identifier == thing_2.identifier - - -def test_extracted_data_does_not_allow_changing_mex_stable_target_id() -> None: - with pytest.raises(ValidationError, match="Cannot change `stableTargetId` of MEx"): - ExtractedThing( - identifier=MEX_PRIMARY_SOURCE_IDENTIFIER, - hadPrimarySource=MEX_PRIMARY_SOURCE_STABLE_TARGET_ID, - identifierInPrimarySource=MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE, - stableTargetId=MergedPrimarySourceIdentifier.generate(seed=12345), - ) - - def test_extracted_data_stores_identity_in_provider() -> None: thing = ExtractedThing( identifierInPrimarySource="12345", hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=12345), ) + assert thing.identifier + assert thing.stableTargetId provider = get_provider() identities = provider.fetch( diff --git a/tests/models/test_model_schemas.py b/tests/models/test_model_schemas.py index 53c70270..50aa0a05 100644 --- a/tests/models/test_model_schemas.py +++ b/tests/models/test_model_schemas.py @@ -8,19 +8,39 @@ import pytest -from mex.common.models import EXTRACTED_MODEL_CLASSES +from mex.common.models import EXTRACTED_MODEL_CLASSES, BaseModel from mex.common.transform import dromedary_to_kebab from mex.common.types.identifier import MEX_ID_PATTERN MEX_MODEL_ENTITIES = files("mex.model.entities") + +def model_to_schema(model: type[BaseModel]) -> dict[str, Any]: + # pydantic does not include computed fields in the validation schema + # and does not include validation rules in the serialization schema. + # so we need to mangle those two together here, to get a schema that is + # more comparable to what mex-model specifies. + + validation_schema = model.model_json_schema( + ref_template="/schema/fields/{model}", mode="validation" + ) + serialization_schema = model.model_json_schema( + ref_template="/schema/fields/{model}", mode="serialization" + ) + validation_schema["properties"] = { + **serialization_schema["properties"], + **validation_schema["properties"], + } + validation_schema["required"] = sorted( + {*serialization_schema["required"], *validation_schema["required"]} + ) + return validation_schema + + GENERATED_SCHEMAS = dict( sorted( { - model.stemType: model.model_json_schema( - ref_template="/schema/fields/{model}" - ) - for model in EXTRACTED_MODEL_CLASSES + model.stemType: model_to_schema(model) for model in EXTRACTED_MODEL_CLASSES }.items() ) ) @@ -113,6 +133,7 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None: obj.pop("sameAs", None) # only in spec obj.pop("subPropertyOf", None) # only in spec obj.pop("description", None) # only in model (mostly implementation hints) + obj.pop("readOnly", None) # only in model (but could be in spec) # pop annotations that we don't compare directly but use for other comparisons title = obj.pop("title", "") # only in model (autogenerated by pydantic)