diff --git a/.cruft.json b/.cruft.json index bc56a549..5ccdb06d 100644 --- a/.cruft.json +++ b/.cruft.json @@ -1,6 +1,6 @@ { "checkout": null, - "commit": "6067fc53d1335a9bda900c5eff8dbf1c42bfe4ca", + "commit": "77ce51f46b0897c6fbd29782c32923075e296c2a", "context": { "cookiecutter": { "project_name": "common", diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f1f53f88..929f9f87 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,14 +3,22 @@ updates: - package-ecosystem: "github-actions" allow: - dependency-type: "all" + assignees: + - ${{secrets.MEX_BOT_USER}} directory: "/" + labels: + - "dependabot" open-pull-requests-limit: 1 schedule: - interval: "monthly" + interval: "weekly" - package-ecosystem: "pip" allow: - dependency-type: "all" + assignees: + - ${{secrets.MEX_BOT_USER}} directory: "/" + labels: + - "dependabot" open-pull-requests-limit: 1 schedule: interval: "daily" diff --git a/.github/workflows/cookiecutter.yml b/.github/workflows/cookiecutter.yml index b6a3bacb..0d25076c 100644 --- a/.github/workflows/cookiecutter.yml +++ b/.github/workflows/cookiecutter.yml @@ -2,7 +2,8 @@ name: Cookiecutter on: push: - pull_request: + branches: ["main"] + tags: ["**"] schedule: - cron: '0 0 * * *' workflow_dispatch: @@ -18,12 +19,13 @@ jobs: timeout-minutes: 10 steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: - fetch-depth: 1 + fetch-depth: 0 + token: ${{ secrets.WORKFLOW_TOKEN }} - name: Cache requirements - uses: actions/cache@v3 + uses: actions/cache@v4 env: cache-name: cache-requirements with: @@ -33,12 +35,57 @@ jobs: ${{ env.cache-name }}- - name: Setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.11 - name: Install requirements run: make setup - - name: Check template - run: cruft check + - name: Configure git + env: + SIGNING_KEY: ${{ secrets.SIGNING_KEY }} + SIGNING_PUB: ${{ secrets.SIGNING_PUB }} + run: | + eval "$(ssh-agent -s)" + install --directory ~/.ssh --mode 700 + base64 -d <<< '${{ secrets.SIGNING_KEY }}' > ~/.ssh/mex + base64 -d <<< '${{ secrets.SIGNING_PUB }}' > ~/.ssh/mex.pub + chmod 600 ~/.ssh/* + ssh-add ~/.ssh/mex + git config --local user.email ${{ vars.MEX_BOT_EMAIL }} + git config --local user.name ${{ vars.MEX_BOT_USER }} + git config --local gpg.format ssh + git config --local user.signingkey ~/.ssh/mex.pub + git config --local commit.gpgsign true + + - name: Update template + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if cruft check; then + echo template is up to date + exit 0 + fi + if [[ $(gh pr list --label cruft | wc -c) -ne 0 ]]; then + echo already seeing pull request + exit 0 + fi + template_url=$(python -c "print(__import__('json').load(open('.cruft.json'))['template'])") + template_ref=$(git ls-remote ${template_url} --heads main --exit-code | cut -c -6) + git checkout main + git checkout -b cruft/cookiecutter-template-${template_ref} + cruft update --skip-apply-ask + printf '# Changes\n\n- bumped cookiecutter template to %s/commit/%s\n' "$template_url" "$template_ref" > .cruft-pr-body + if [[ $(git status --porcelain | grep .rej | wc -c) -ne 0 ]]; then + printf '\n# Conflicts\n' > .cruft-pr-body + fi + git status --porcelain | grep .rej | awk '{print $2;}' | while read -r line ; do + printf '\n```' >> .cruft-pr-body + cat ${line} >> .cruft-pr-body + printf '```\n' >> .cruft-pr-body + done + git add --all --verbose + git commit --message "Bump cookiecutter template to $template_ref" --verbose + git push --set-upstream origin cruft/cookiecutter-template-${template_ref} --force --verbose + gh pr create --title "Bump cookiecutter template to $template_ref" --body-file .cruft-pr-body --label cruft --assignee ${{ vars.MEX_BOT_USER }} diff --git a/.github/workflows/cve-scan.yml b/.github/workflows/cve-scan.yml index 2390aab8..83f2a6b0 100644 --- a/.github/workflows/cve-scan.yml +++ b/.github/workflows/cve-scan.yml @@ -8,6 +8,10 @@ on: branches-ignore: - 'dependabot/**' pull_request: + types: + - opened + - reopened + - synchronize schedule: - cron: '0 0 * * *' workflow_dispatch: @@ -18,12 +22,12 @@ jobs: timeout-minutes: 10 steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Cache trivy - uses: actions/cache@v3 + uses: actions/cache@v4 env: cache-name: cache-trivy with: diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 356f88ea..07b565c1 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -25,12 +25,12 @@ jobs: timeout-minutes: 10 steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Cache requirements - uses: actions/cache@v3 + uses: actions/cache@v4 env: cache-name: cache-requirements with: @@ -40,7 +40,7 @@ jobs: ${{ env.cache-name }}- - name: Cache poetry - uses: actions/cache@v3 + uses: actions/cache@v4 env: cache-name: cache-poetry with: @@ -50,12 +50,12 @@ jobs: ${{ env.cache-name }}- - name: Setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.11 - name: Setup pages - uses: actions/configure-pages@v3 + uses: actions/configure-pages@v4 - name: Install requirements run: make install @@ -64,7 +64,7 @@ jobs: run: make docs - name: Upload artifact - uses: actions/upload-pages-artifact@v2 + uses: actions/upload-pages-artifact@v3 with: path: ./docs/dist @@ -77,4 +77,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v2 + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index de05b354..a57e9da8 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -3,6 +3,10 @@ name: Linting on: push: pull_request: + types: + - opened + - reopened + - synchronize workflow_dispatch: env: @@ -16,12 +20,12 @@ jobs: timeout-minutes: 10 steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Cache requirements - uses: actions/cache@v3 + uses: actions/cache@v4 env: cache-name: cache-requirements with: @@ -31,7 +35,7 @@ jobs: ${{ env.cache-name }}- - name: Cache poetry - uses: actions/cache@v3 + uses: actions/cache@v4 env: cache-name: cache-poetry with: @@ -41,7 +45,7 @@ jobs: ${{ env.cache-name }}- - name: Setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.11 diff --git a/.github/workflows/open-code.yml b/.github/workflows/open-code.yml index 7dc6ff95..d23d8546 100644 --- a/.github/workflows/open-code.yml +++ b/.github/workflows/open-code.yml @@ -12,7 +12,7 @@ jobs: timeout-minutes: 10 steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: 'main' fetch-depth: 0 diff --git a/.github/workflows/reviewing.yml b/.github/workflows/reviewing.yml new file mode 100644 index 00000000..02833fda --- /dev/null +++ b/.github/workflows/reviewing.yml @@ -0,0 +1,26 @@ +name: Review + +on: + pull_request: + types: + - opened + workflow_dispatch: + +jobs: + assignee: + runs-on: ubuntu-latest + timeout-minutes: 10 + if: >- + github.event.pull_request.user.login != ${{ vars.MEX_BOT_USER }} && + github.event.pull_request.user.login != 'dependabot[bot]' + steps: + - name: Add assignee + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [[ -z "${{ github.event.pull_request.assignee.login }}" ]]; then + echo assigning to ${{ github.event.pull_request.user.login }} + gh pr edit ${{ github.event.pull_request.html_url }} --add-assignee ${{ github.event.pull_request.user.login }} + else + echo already assigned + fi diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index e16a9f61..80229939 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -3,6 +3,10 @@ name: Testing on: push: pull_request: + types: + - opened + - reopened + - synchronize workflow_dispatch: env: @@ -16,12 +20,12 @@ jobs: timeout-minutes: 10 steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Cache requirements - uses: actions/cache@v3 + uses: actions/cache@v4 env: cache-name: cache-requirements with: @@ -31,7 +35,7 @@ jobs: ${{ env.cache-name }}- - name: Cache poetry - uses: actions/cache@v3 + uses: actions/cache@v4 env: cache-name: cache-poetry with: @@ -41,7 +45,7 @@ jobs: ${{ env.cache-name }}- - name: Setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.11 diff --git a/.gitignore b/.gitignore index e97386ca..8f4b50c5 100644 --- a/.gitignore +++ b/.gitignore @@ -100,6 +100,10 @@ dmypy.json # ruff .ruff_cache +# cruft +.cruft-pr-body +*.rej + # PyCharm .idea/ .idea_modules/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b9c4aaee..0e4f9758 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_language_version: python: python3.11 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.1 + rev: v0.2.2 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] @@ -27,7 +27,7 @@ repos: - id: fix-byte-order-marker name: byte-order - repo: https://github.com/python-poetry/poetry - rev: 1.7.1 + rev: 1.8.1 hooks: - id: poetry-check name: poetry diff --git a/CHANGELOG.md b/CHANGELOG.md index 41bf165c..d294fc4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Security +## [0.21.0] - 2024-03-04 + +### Added + +- add `entityType` type hint to `MExModel` (now `BaseEntity`) +- add types for `AnyBaseModel`, `AnyExtractedModel` and `AnyMergedModel` +- create more specific subclasses of `Identifier` (for extracted and merged) +- expose unions, lists and lookups for `Identifier` subclasses in `mex.common.types` + +### Changes + +- swap `contextvars.ContextVar` for `mex.common.context.ContextStore` +- move `stableTargetId` property from base models to extracted models +- update typing of identifiers to specific subclasses +- use `Annotated[..., Field(...)]` notation for pydantic field configs +- split up `mex.common.models.base` and move out `MExModel` and `JsonSchemaGenerator` +- rename `MExModel` to `BaseEntity` with only type hints an model config +- declare `hadPrimarySource`, `identifier` and `identifierInPrimarySource` as frozen + +### Removed + +- absorb unused `BaseExtractedData` into `ExtractedData` +- remove `stableTargetId` property from merged models +- drop support for sinks to accept merged items (now only for extracted data) + ## [0.20.0] - 2024-02-22 ### Changes diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py index 001b1846..def9560a 100644 --- a/mex/common/backend_api/connector.py +++ b/mex/common/backend_api/connector.py @@ -3,7 +3,7 @@ from mex.common.backend_api.models import BulkInsertResponse from mex.common.connector import HTTPConnector -from mex.common.models import MExModel +from mex.common.models import ExtractedData from mex.common.settings import BaseSettings from mex.common.types import Identifier @@ -27,7 +27,7 @@ def _set_url(self) -> None: settings = BaseSettings.get() self.url = urljoin(str(settings.backend_api_url), self.API_VERSION) - def post_models(self, models: list[MExModel]) -> list[Identifier]: + def post_models(self, models: list[ExtractedData]) -> list[Identifier]: """Post models to Backend API in a bulk insertion request. Args: @@ -37,7 +37,7 @@ def post_models(self, models: list[MExModel]) -> list[Identifier]: HTTPError: If insert was not accepted, crashes or times out Returns: - Identifiers of posted models + Identifiers of posted extracted models """ response = self.request( method="POST", diff --git a/mex/common/cli.py b/mex/common/cli.py index 14a0fbcf..a74f34d4 100644 --- a/mex/common/cli.py +++ b/mex/common/cli.py @@ -116,7 +116,7 @@ def _callback( # ensure connectors are closed on exit. context.call_on_close(reset_connector_context) - # load settings from parameters and store in ContextVar. + # load settings from parameters and store it globally. settings = settings_cls.model_validate( { key: value @@ -126,7 +126,7 @@ def _callback( ) SettingsContext.set(settings) - # otherwise print loaded settings in pretty way and continue + # otherwise print loaded settings in pretty way and continue. logger.info(click.style(dedent(f" {func.__doc__}"), fg="green")) logger.info(click.style(f"{settings.text()}\n", fg="bright_cyan")) @@ -142,7 +142,7 @@ def _callback( # if we are in debug mode, jump into interactive debugging. pdb.post_mortem(sys.exc_info()[2]) raise error - # if not in debug mode, exit with code 1 + # if not in debug mode, exit with code 1. echo("exit", fg="red") context.exit(1) diff --git a/mex/common/connector/base.py b/mex/common/connector/base.py index c0959273..e667cd55 100644 --- a/mex/common/connector/base.py +++ b/mex/common/connector/base.py @@ -1,14 +1,12 @@ from abc import ABCMeta, abstractmethod from contextlib import ExitStack -from contextvars import ContextVar from types import TracebackType from typing import Optional, TypeVar, cast, final +from mex.common.context import ContextStore + ConnectorType = TypeVar("ConnectorType", bound="BaseConnector") -ConnectorContextType = dict[type["BaseConnector"], "BaseConnector"] -ConnectorContext = ContextVar( - "ConnectorContext", default=cast(ConnectorContextType, {}) -) +ConnectorContext = ContextStore[dict[type["BaseConnector"], "BaseConnector"]]({}) def reset_connector_context() -> None: diff --git a/mex/common/context.py b/mex/common/context.py new file mode 100644 index 00000000..c0a1d480 --- /dev/null +++ b/mex/common/context.py @@ -0,0 +1,19 @@ +from typing import Generic, TypeVar + +ContextResource = TypeVar("ContextResource") + + +class ContextStore(Generic[ContextResource]): + """Thin wrapper for storing thread-local globals.""" + + def __init__(self, default: ContextResource) -> None: + """Create a new context store with a default value.""" + self._resource = default + + def get(self) -> ContextResource: + """Retrieve the current value stored in this context.""" + return self._resource + + def set(self, resource: ContextResource) -> None: + """Update the current value stored in this context.""" + self._resource = resource diff --git a/mex/common/identity/base.py b/mex/common/identity/base.py index 44b8621f..e1cd1a21 100644 --- a/mex/common/identity/base.py +++ b/mex/common/identity/base.py @@ -2,7 +2,7 @@ from mex.common.connector import BaseConnector from mex.common.identity.models import Identity -from mex.common.types import Identifier, PrimarySourceID +from mex.common.types import AnyMergedIdentifier, MergedPrimarySourceIdentifier class BaseProvider(BaseConnector): @@ -11,7 +11,7 @@ class BaseProvider(BaseConnector): @abstractmethod def assign( self, - had_primary_source: PrimarySourceID, + had_primary_source: MergedPrimarySourceIdentifier, identifier_in_primary_source: str, ) -> Identity: # pragma: no cover """Find an Identity in a database or assign a new one.""" @@ -21,9 +21,9 @@ def assign( def fetch( self, *, - had_primary_source: Identifier | None = None, + had_primary_source: MergedPrimarySourceIdentifier | None = None, identifier_in_primary_source: str | None = None, - stable_target_id: Identifier | None = None, + stable_target_id: AnyMergedIdentifier | None = None, ) -> list[Identity]: # pragma: no cover """Find Identity instances matching the given filters.""" ... diff --git a/mex/common/identity/memory.py b/mex/common/identity/memory.py index e5a91632..0c02a72b 100644 --- a/mex/common/identity/memory.py +++ b/mex/common/identity/memory.py @@ -4,7 +4,11 @@ MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE, MEX_PRIMARY_SOURCE_STABLE_TARGET_ID, ) -from mex.common.types import Identifier, PrimarySourceID +from mex.common.types import ( + AnyMergedIdentifier, + Identifier, + MergedPrimarySourceIdentifier, +) class MemoryIdentityProvider(BaseProvider): @@ -22,7 +26,9 @@ def __init__(self) -> None: ] def assign( - self, had_primary_source: PrimarySourceID, identifier_in_primary_source: str + self, + had_primary_source: MergedPrimarySourceIdentifier, + identifier_in_primary_source: str, ) -> Identity: """Find an Identity in the in-memory database or assign a new one. @@ -52,9 +58,9 @@ def assign( def fetch( self, *, - had_primary_source: Identifier | None = None, + had_primary_source: MergedPrimarySourceIdentifier | None = None, identifier_in_primary_source: str | None = None, - stable_target_id: Identifier | None = None, + stable_target_id: AnyMergedIdentifier | None = None, ) -> list[Identity]: """Find Identity instances in the in-memory database. diff --git a/mex/common/identity/models.py b/mex/common/identity/models.py index 3479f687..40ec890b 100644 --- a/mex/common/identity/models.py +++ b/mex/common/identity/models.py @@ -1,11 +1,15 @@ +from typing import Annotated + +from pydantic import Field + from mex.common.models import BaseModel -from mex.common.types import Identifier, PrimarySourceID +from mex.common.types import Identifier, MergedPrimarySourceIdentifier class Identity(BaseModel): """Model for identifier lookup.""" - identifier: Identifier - hadPrimarySource: PrimarySourceID - identifierInPrimarySource: str - stableTargetId: Identifier + identifier: Annotated[Identifier, Field(frozen=True)] + hadPrimarySource: Annotated[MergedPrimarySourceIdentifier, Field(frozen=True)] + identifierInPrimarySource: Annotated[str, Field(frozen=True)] + stableTargetId: Annotated[Identifier, Field(frozen=True)] diff --git a/mex/common/ldap/README.md b/mex/common/ldap/README.md index 045e06d8..4b94356b 100644 --- a/mex/common/ldap/README.md +++ b/mex/common/ldap/README.md @@ -23,7 +23,7 @@ The module `ldap.transform` contains functions for transforming LDAP data into M models. The `mex_person.stableTargetId` attribute can be used in any entity that requires a -`PersonID`. +`MergedPersonIdentifier`. # Convenience Functions diff --git a/mex/common/ldap/extract.py b/mex/common/ldap/extract.py index 180560f6..be6070f7 100644 --- a/mex/common/ldap/extract.py +++ b/mex/common/ldap/extract.py @@ -4,17 +4,17 @@ from mex.common.identity import get_provider from mex.common.ldap.models.person import LDAPPerson, LDAPPersonWithQuery from mex.common.models import ExtractedPrimarySource -from mex.common.types import PersonID +from mex.common.types import MergedPersonIdentifier def _get_merged_ids_by_attribute( attribute: str, persons: Iterable[LDAPPerson], primary_source: ExtractedPrimarySource, -) -> dict[str, list[PersonID]]: - """Return a mapping from a dynamic Person attribute to corresponding PersonIDs. +) -> dict[str, list[MergedPersonIdentifier]]: + """Return mapping from dynamic Person attribute to corresponding merged person ids. - PersonIDs are looked up in the identity provider and will be omitted + MergedPersonIdentifiers are looked up in the identity provider and will be omitted for any person that has not yet been assigned an `Identity` there. Args: @@ -23,7 +23,8 @@ def _get_merged_ids_by_attribute( primary_source: Primary source for LDAP Returns: - Mapping from a stringified `LDAPPerson[attribute]` to corresponding PersonIDs + Mapping from a stringified `LDAPPerson[attribute]` to corresponding + MergedPersonIdentifiers """ if attribute not in LDAPPerson.model_fields: raise RuntimeError(f"Not a valid LDAPPerson field: {attribute}") @@ -35,17 +36,17 @@ def _get_merged_ids_by_attribute( identifier_in_primary_source=str(person.objectGUID), ): merged_ids_by_attribute[str(getattr(person, attribute))].append( - PersonID(identities[0].stableTargetId) + MergedPersonIdentifier(identities[0].stableTargetId) ) return merged_ids_by_attribute def get_merged_ids_by_employee_ids( persons: Iterable[LDAPPerson], primary_source: ExtractedPrimarySource -) -> dict[str, list[PersonID]]: - """Return a mapping from a person's employeeID to their PersonIDs. +) -> dict[str, list[MergedPersonIdentifier]]: + """Return a mapping from a person's employeeID to their merged person ids. - PersonIDs are looked up in the identity provider and will be omitted + MergedPersonIdentifiers are looked up in the identity provider and will be omitted for any person that has not yet been assigned an `Identity` there. Args: @@ -53,17 +54,17 @@ def get_merged_ids_by_employee_ids( primary_source: Primary source for LDAP Returns: - Mapping from `LDAPPerson.employeeID` to corresponding PersonIDs + Mapping from `LDAPPerson.employeeID` to corresponding MergedPersonIdentifiers """ return _get_merged_ids_by_attribute("employeeID", persons, primary_source) def get_merged_ids_by_email( persons: Iterable[LDAPPerson], primary_source: ExtractedPrimarySource -) -> dict[str, list[PersonID]]: - """Return a mapping from a person's e-mail to their PersonIDs. +) -> dict[str, list[MergedPersonIdentifier]]: + """Return a mapping from a person's e-mail to their merged person ids. - PersonIDs are looked up in the identity provider and will be omitted + MergedPersonIdentifiers are looked up in the identity provider and will be omitted for any person that has not yet been assigned an `Identity` there. Args: @@ -71,7 +72,7 @@ def get_merged_ids_by_email( primary_source: Primary source for LDAP Returns: - Mapping from `LDAPPerson.mail` to corresponding PersonIDs + Mapping from `LDAPPerson.mail` to corresponding MergedPersonIdentifiers """ return _get_merged_ids_by_attribute("mail", persons, primary_source) @@ -79,10 +80,10 @@ def get_merged_ids_by_email( def get_merged_ids_by_query_string( persons_with_query: Iterable[LDAPPersonWithQuery], primary_source: ExtractedPrimarySource, -) -> dict[str, list[PersonID]]: - """Return a mapping from a person query string to their PersonIDs. +) -> dict[str, list[MergedPersonIdentifier]]: + """Return a mapping from a person query string to their merged person ids. - PersonIDs are looked up in the identity provider and will be omitted + MergedPersonIdentifiers are looked up in the identity provider and will be omitted for any person that has not yet been assigned an `Identity` there. Args: @@ -90,7 +91,8 @@ def get_merged_ids_by_query_string( primary_source: Primary source for LDAP Returns: - Mapping from `LDAPPersonWithQuery.query` to corresponding PersonIDs + Mapping from `LDAPPersonWithQuery.query` to corresponding + MergedPersonIdentifiers """ merged_ids_by_attribute = defaultdict(list) provider = get_provider() @@ -100,6 +102,6 @@ def get_merged_ids_by_query_string( identifier_in_primary_source=str(person_with_query.person.objectGUID), ): merged_ids_by_attribute[str(person_with_query.query)].append( - PersonID(identities[0].stableTargetId) + MergedPersonIdentifier(identities[0].stableTargetId) ) return merged_ids_by_attribute diff --git a/mex/common/ldap/models/person.py b/mex/common/ldap/models/person.py index ecef8e1b..54f46fa1 100644 --- a/mex/common/ldap/models/person.py +++ b/mex/common/ldap/models/person.py @@ -1,3 +1,5 @@ +from typing import Annotated + from pydantic import Field from mex.common.ldap.models.actor import LDAPActor @@ -12,7 +14,7 @@ class LDAPPerson(LDAPActor): departmentNumber: str | None = None displayName: str | None = None employeeID: str - givenName: list[str] = Field(min_length=1) + givenName: Annotated[list[str], Field(min_length=1)] ou: list[str] = [] sn: str diff --git a/mex/common/models/__init__.py b/mex/common/models/__init__.py index 75360bce..0edccea7 100644 --- a/mex/common/models/__init__.py +++ b/mex/common/models/__init__.py @@ -1,4 +1,4 @@ -from typing import Final +from typing import Final, Union, get_args from mex.common.models.access_platform import ( BaseAccessPlatform, @@ -6,7 +6,7 @@ MergedAccessPlatform, ) from mex.common.models.activity import BaseActivity, ExtractedActivity, MergedActivity -from mex.common.models.base import BaseModel, MExModel +from mex.common.models.base import BaseModel from mex.common.models.contact_point import ( BaseContactPoint, ExtractedContactPoint, @@ -21,7 +21,6 @@ MEX_PRIMARY_SOURCE_IDENTIFIER, MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE, MEX_PRIMARY_SOURCE_STABLE_TARGET_ID, - BaseExtractedData, ExtractedData, ) from mex.common.models.merged_item import MergedItem @@ -50,11 +49,15 @@ ) __all__ = ( + "AnyBaseModel", + "AnyExtractedModel", + "AnyMergedModel", + "BASE_MODEL_CLASSES_BY_NAME", + "BASE_MODEL_CLASSES", "BaseAccessPlatform", "BaseActivity", "BaseContactPoint", "BaseDistribution", - "BaseExtractedData", "BaseModel", "BaseOrganization", "BaseOrganizationalUnit", @@ -78,6 +81,7 @@ "ExtractedVariable", "ExtractedVariableGroup", "MERGED_MODEL_CLASSES_BY_NAME", + "MERGED_MODEL_CLASSES", "MergedAccessPlatform", "MergedActivity", "MergedContactPoint", @@ -93,10 +97,9 @@ "MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE", "MEX_PRIMARY_SOURCE_IDENTIFIER", "MEX_PRIMARY_SOURCE_STABLE_TARGET_ID", - "MExModel", ) -BASE_MODEL_CLASSES: Final[list[type[BaseModel]]] = [ +AnyBaseModel = Union[ BaseAccessPlatform, BaseActivity, BaseContactPoint, @@ -109,12 +112,12 @@ BaseVariable, BaseVariableGroup, ] - -BASE_MODEL_CLASSES_BY_NAME: Final[dict[str, type[BaseModel]]] = { +BASE_MODEL_CLASSES: Final[list[type[AnyBaseModel]]] = list(get_args(AnyBaseModel)) +BASE_MODEL_CLASSES_BY_NAME: Final[dict[str, type[AnyBaseModel]]] = { cls.__name__: cls for cls in BASE_MODEL_CLASSES } -EXTRACTED_MODEL_CLASSES: Final[list[type[ExtractedData]]] = [ +AnyExtractedModel = Union[ ExtractedAccessPlatform, ExtractedActivity, ExtractedContactPoint, @@ -127,12 +130,14 @@ ExtractedVariable, ExtractedVariableGroup, ] - -EXTRACTED_MODEL_CLASSES_BY_NAME: Final[dict[str, type[ExtractedData]]] = { +EXTRACTED_MODEL_CLASSES: Final[list[type[AnyExtractedModel]]] = list( + get_args(AnyExtractedModel) +) +EXTRACTED_MODEL_CLASSES_BY_NAME: Final[dict[str, type[AnyExtractedModel]]] = { cls.__name__: cls for cls in EXTRACTED_MODEL_CLASSES } -MERGED_MODEL_CLASSES: Final[list[type[MergedItem]]] = [ +AnyMergedModel = Union[ MergedAccessPlatform, MergedActivity, MergedContactPoint, @@ -145,7 +150,7 @@ MergedVariable, MergedVariableGroup, ] - -MERGED_MODEL_CLASSES_BY_NAME: Final[dict[str, type[MergedItem]]] = { +MERGED_MODEL_CLASSES: Final[list[type[AnyMergedModel]]] = list(get_args(AnyMergedModel)) +MERGED_MODEL_CLASSES_BY_NAME: Final[dict[str, type[AnyMergedModel]]] = { cls.__name__: cls for cls in MERGED_MODEL_CLASSES } diff --git a/mex/common/models/access_platform.py b/mex/common/models/access_platform.py index 8c41c21b..4fd21cd1 100644 --- a/mex/common/models/access_platform.py +++ b/mex/common/models/access_platform.py @@ -6,12 +6,13 @@ from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem from mex.common.types import ( - AccessPlatformID, APIType, - ContactPointID, + ExtractedAccessPlatformIdentifier, Link, - OrganizationalUnitID, - PersonID, + MergedAccessPlatformIdentifier, + MergedContactPointIdentifier, + MergedOrganizationalUnitIdentifier, + MergedPersonIdentifier, TechnicalAccessibility, Text, ) @@ -20,9 +21,12 @@ class BaseAccessPlatform(BaseModel): """A way of physically accessing the Resource for re-use.""" - stableTargetId: AccessPlatformID alternativeTitle: list[Text] = [] - contact: list[OrganizationalUnitID | PersonID | ContactPointID] = [] + contact: list[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier + ] = [] description: list[Text] = [] endpointDescription: Link | None = None endpointType: ( @@ -36,20 +40,23 @@ class BaseAccessPlatform(BaseModel): Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]), ] title: list[Text] = [] - unitInCharge: list[OrganizationalUnitID] = [] + unitInCharge: list[MergedOrganizationalUnitIdentifier] = [] class ExtractedAccessPlatform(BaseAccessPlatform, ExtractedData): """An automatically extracted metadata set describing an access platform.""" - entityType: Literal["ExtractedAccessPlatform"] = Field( - "ExtractedAccessPlatform", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedAccessPlatform"], Field(alias="$type", frozen=True) + ] = "ExtractedAccessPlatform" + identifier: Annotated[ExtractedAccessPlatformIdentifier, Field(frozen=True)] + stableTargetId: MergedAccessPlatformIdentifier class MergedAccessPlatform(BaseAccessPlatform, MergedItem): """The result of merging all extracted data and rules for an access platform.""" - entityType: Literal["MergedAccessPlatform"] = Field( - "MergedAccessPlatform", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedAccessPlatform"], Field(alias="$type", frozen=True) + ] = "MergedAccessPlatform" + identifier: Annotated[MergedAccessPlatformIdentifier, Field(frozen=True)] diff --git a/mex/common/models/activity.py b/mex/common/models/activity.py index 115daf03..d82410e7 100644 --- a/mex/common/models/activity.py +++ b/mex/common/models/activity.py @@ -6,13 +6,14 @@ from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem from mex.common.types import ( - ActivityID, ActivityType, - ContactPointID, + ExtractedActivityIdentifier, Link, - OrganizationalUnitID, - OrganizationID, - PersonID, + MergedActivityIdentifier, + MergedContactPointIdentifier, + MergedOrganizationalUnitIdentifier, + MergedOrganizationIdentifier, + MergedPersonIdentifier, Text, Theme, Timestamp, @@ -25,7 +26,6 @@ class BaseActivity(BaseModel): This may be a project, an area of work or an administrative procedure. """ - stableTargetId: ActivityID abstract: list[Text] = [] activityType: list[ Annotated[ @@ -34,25 +34,32 @@ class BaseActivity(BaseModel): ] = [] alternativeTitle: list[Text] = [] contact: Annotated[ - list[OrganizationalUnitID | PersonID | ContactPointID,], Field(min_length=1) + list[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + ], + Field(min_length=1), ] documentation: list[Link] = [] end: list[ Annotated[Timestamp, Field(examples=["2024-01-17", "2024", "2024-01"])] ] = [] - externalAssociate: list[OrganizationID | PersonID] = [] - funderOrCommissioner: list[OrganizationID] = [] + externalAssociate: list[MergedOrganizationIdentifier | MergedPersonIdentifier] = [] + funderOrCommissioner: list[MergedOrganizationIdentifier] = [] fundingProgram: list[str] = [] - involvedPerson: list[PersonID] = [] - involvedUnit: list[OrganizationalUnitID] = [] - isPartOfActivity: list[ActivityID] = [] + involvedPerson: list[MergedPersonIdentifier] = [] + involvedUnit: list[MergedOrganizationalUnitIdentifier] = [] + isPartOfActivity: list[MergedActivityIdentifier] = [] publication: list[Link] = [] - responsibleUnit: Annotated[list[OrganizationalUnitID], Field(min_length=1)] + responsibleUnit: Annotated[ + list[MergedOrganizationalUnitIdentifier], Field(min_length=1) + ] shortName: list[Text] = [] start: list[ Annotated[Timestamp, Field(examples=["2023-01-16", "2023", "2023-02"])] ] = [] - succeeds: list[ActivityID] = [] + succeeds: list[MergedActivityIdentifier] = [] theme: list[ Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])] ] = [] @@ -63,14 +70,17 @@ class BaseActivity(BaseModel): class ExtractedActivity(BaseActivity, ExtractedData): """An automatically extracted metadata set describing an activity.""" - entityType: Literal["ExtractedActivity"] = Field( - "ExtractedActivity", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedActivity"], Field(alias="$type", frozen=True) + ] = "ExtractedActivity" + identifier: Annotated[ExtractedActivityIdentifier, Field(frozen=True)] + stableTargetId: MergedActivityIdentifier class MergedActivity(BaseActivity, MergedItem): """The result of merging all extracted data and rules for an activity.""" - entityType: Literal["MergedActivity"] = Field( - "MergedActivity", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedActivity"], Field(alias="$type", frozen=True) + ] = "MergedActivity" + identifier: Annotated[MergedActivityIdentifier, Field(frozen=True)] diff --git a/mex/common/models/base.py b/mex/common/models/base.py index bccd41f3..56785992 100644 --- a/mex/common/models/base.py +++ b/mex/common/models/base.py @@ -3,8 +3,6 @@ from collections.abc import MutableMapping from functools import cache from typing import ( - TYPE_CHECKING, - Annotated, Any, TypeVar, Union, @@ -12,43 +10,22 @@ get_origin, ) -from pydantic import ( - BaseModel as PydanticBaseModel, -) +from pydantic import BaseModel as PydanticBaseModel from pydantic import ( ConfigDict, - Field, TypeAdapter, ValidationError, model_validator, ) from pydantic.fields import FieldInfo -from pydantic.json_schema import DEFAULT_REF_TEMPLATE, JsonSchemaMode, JsonSchemaValue -from pydantic.json_schema import ( - GenerateJsonSchema as PydanticJsonSchemaGenerator, -) +from pydantic.json_schema import DEFAULT_REF_TEMPLATE, JsonSchemaMode +from pydantic.json_schema import GenerateJsonSchema as PydanticJsonSchemaGenerator -from mex.common.types import Identifier +from mex.common.models.schema import JsonSchemaGenerator RawModelDataT = TypeVar("RawModelDataT") -class JsonSchemaGenerator(PydanticJsonSchemaGenerator): - """Customization of the pydantic class for generating JSON schemas.""" - - def handle_ref_overrides(self, json_schema: JsonSchemaValue) -> JsonSchemaValue: - """Disable pydantic behavior to wrap top-level `$ref` keys in an `allOf`. - - For example, pydantic would convert - {"$ref": "#/$defs/APIType", "examples": ["api-type-1"]} - into - {"allOf": {"$ref": "#/$defs/APIType"}, "examples": ["api-type-1"]} - which is in fact recommended by JSON schema, but we need to disable this - to stay compatible with mex-editor and mex-model. - """ - return json_schema - - class BaseModel(PydanticBaseModel): """Common base class for all MEx model classes.""" @@ -209,37 +186,3 @@ def checksum(self) -> str: def __str__(self) -> str: """Format this model as a string for logging.""" return f"{self.__class__.__name__}: {self.checksum()}" - - -class MExModel(BaseModel): - """Abstract base model for extracted data and merged item classes. - - This class only defines an `identifier` and gives a type hint for `stableTargetId`. - """ - - model_config = ConfigDict(extra="forbid") - - if TYPE_CHECKING: - # Sometimes multiple primary sources describe the same activity, resource, etc. - # and a complete metadata item can only be created by merging these fragments. - # The `stableTargetID` is part of all models in `mex.common.models` to allow - # MEx to identify which extracted items describe the same thing and should be - # merged to create a complete metadata item. - # The name might be a bit misleading (also due to historical reasons), but the - # "stability" is only guaranteed for one "real world" or "digital world" thing - # having the same ID in MEx over time. But not as a guarantee, that the same - # metadata sources contribute to the complete metadata item. - # Because we anticipate that items have to be merged, the `stableTargetID` is - # also used as the foreign key for all fields containing references. - stableTargetId: Any - - identifier: Annotated[ - Identifier, - Field( - description=( - "A globally unique identifier for this item. Regardless of the " - "entity-type or whether this item was extracted, merged, etc. " - "identifiers will be assigned just once." - ), - ), - ] diff --git a/mex/common/models/contact_point.py b/mex/common/models/contact_point.py index 7302952c..4c70966c 100644 --- a/mex/common/models/contact_point.py +++ b/mex/common/models/contact_point.py @@ -5,27 +5,33 @@ from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem -from mex.common.types import ContactPointID, Email +from mex.common.types import ( + Email, + ExtractedContactPointIdentifier, + MergedContactPointIdentifier, +) class BaseContactPoint(BaseModel): """A contact point - for example, an interdepartmental project.""" - stableTargetId: ContactPointID email: Annotated[list[Email], Field(min_length=1)] class ExtractedContactPoint(BaseContactPoint, ExtractedData): """An automatically extracted metadata set describing a contact point.""" - entityType: Literal["ExtractedContactPoint"] = Field( - "ExtractedContactPoint", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedContactPoint"], Field(alias="$type", frozen=True) + ] = "ExtractedContactPoint" + identifier: Annotated[ExtractedContactPointIdentifier, Field(frozen=True)] + stableTargetId: MergedContactPointIdentifier class MergedContactPoint(BaseContactPoint, MergedItem): """The result of merging all extracted data and rules for a contact point.""" - entityType: Literal["MergedContactPoint"] = Field( - "MergedContactPoint", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedContactPoint"], Field(alias="$type", frozen=True) + ] = "MergedContactPoint" + identifier: Annotated[MergedContactPointIdentifier, Field(frozen=True)] diff --git a/mex/common/models/distribution.py b/mex/common/models/distribution.py index 792c7035..30a67328 100644 --- a/mex/common/models/distribution.py +++ b/mex/common/models/distribution.py @@ -6,14 +6,15 @@ from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem from mex.common.types import ( - AccessPlatformID, AccessRestriction, - DistributionID, + ExtractedDistributionIdentifier, License, Link, + MergedAccessPlatformIdentifier, + MergedDistributionIdentifier, + MergedOrganizationIdentifier, + MergedPersonIdentifier, MIMEType, - OrganizationID, - PersonID, Timestamp, ) @@ -21,17 +22,16 @@ class BaseDistribution(BaseModel): """A specific representation of a dataset.""" - stableTargetId: DistributionID - accessService: AccessPlatformID | None = None + accessService: MergedAccessPlatformIdentifier | None = None accessRestriction: Annotated[ AccessRestriction, Field(examples=["https://mex.rki.de/item/access-restriction-1"]), ] accessURL: Link | None = None - author: list[PersonID] = [] - contactPerson: list[PersonID] = [] - dataCurator: list[PersonID] = [] - dataManager: list[PersonID] = [] + author: list[MergedPersonIdentifier] = [] + contactPerson: list[MergedPersonIdentifier] = [] + dataCurator: list[MergedPersonIdentifier] = [] + dataManager: list[MergedPersonIdentifier] = [] downloadURL: Link | None = None issued: Timestamp license: ( @@ -47,11 +47,11 @@ class BaseDistribution(BaseModel): | None ) = None modified: Timestamp | None = None - otherContributor: list[PersonID] = [] - projectLeader: list[PersonID] = [] - projectManager: list[PersonID] = [] - publisher: Annotated[list[OrganizationID], Field(min_length=1)] - researcher: list[PersonID] = [] + otherContributor: list[MergedPersonIdentifier] = [] + projectLeader: list[MergedPersonIdentifier] = [] + projectManager: list[MergedPersonIdentifier] = [] + publisher: Annotated[list[MergedOrganizationIdentifier], Field(min_length=1)] + researcher: list[MergedPersonIdentifier] = [] title: Annotated[ str, Field( @@ -64,14 +64,17 @@ class BaseDistribution(BaseModel): class ExtractedDistribution(BaseDistribution, ExtractedData): """An automatically extracted metadata set describing a distribution.""" - entityType: Literal["ExtractedDistribution"] = Field( - "ExtractedDistribution", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedDistribution"], Field(alias="$type", frozen=True) + ] = "ExtractedDistribution" + identifier: Annotated[ExtractedDistributionIdentifier, Field(frozen=True)] + stableTargetId: MergedDistributionIdentifier class MergedDistribution(BaseDistribution, MergedItem): """The result of merging all extracted data and rules for a distribution.""" - entityType: Literal["MergedDistribution"] = Field( - "MergedDistribution", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedDistribution"], Field(alias="$type", frozen=True) + ] = "MergedDistribution" + identifier: Annotated[MergedDistributionIdentifier, Field(frozen=True)] diff --git a/mex/common/models/entity.py b/mex/common/models/entity.py new file mode 100644 index 00000000..fd32dc5e --- /dev/null +++ b/mex/common/models/entity.py @@ -0,0 +1,31 @@ +from typing import TYPE_CHECKING + +from mex.common.models.base import BaseModel +from mex.common.types import Identifier + + +class BaseEntity(BaseModel, extra="forbid"): + """Abstract base model for extracted data, merged item and rule set classes. + + This class gives type hints for an `identifier` field and the frozen class variable + `entityType`. Subclasses should implement both fields and set the correct identifier + type as well as the correct literal value for the entity type. + """ + + if TYPE_CHECKING: # pragma: no cover + # The `entityType` class variable is added to all `BaseEntity` subclasses to + # help with assigning the correct class when reading raw JSON entities. + # E.g.: https://docs.pydantic.dev/latest/concepts/fields/#discriminator + # Simple duck-typing would not work, because some entity-types have overlapping + # attributes, like `Person.email` and `ContactPoint.email`. + entityType: str + + # A globally unique identifier is added to all `BaseEntity` subclasses and + # should be typed to the correct identifier type. Regardless of the entity-type + # or whether this item was extracted, merged, etc., identifiers will be assigned + # just once and should be declared as `frozen` on subclasses. + identifier: Identifier + + def __str__(self) -> str: + """Format this instance as a string for logging.""" + return f"{self.entityType}: {self.identifier}" diff --git a/mex/common/models/extracted_data.py b/mex/common/models/extracted_data.py index 968b64e1..eedfd272 100644 --- a/mex/common/models/extracted_data.py +++ b/mex/common/models/extracted_data.py @@ -1,36 +1,57 @@ -from typing import Annotated, Any +from typing import TYPE_CHECKING, Annotated, Any from pydantic import Field, model_validator, validate_call -from mex.common.models.base import MExModel -from mex.common.types import Identifier, PrimarySourceID +from mex.common.models.entity import BaseEntity +from mex.common.types import ( + ExtractedPrimarySourceIdentifier, + MergedPrimarySourceIdentifier, +) -MEX_PRIMARY_SOURCE_IDENTIFIER = Identifier("00000000000000") +MEX_PRIMARY_SOURCE_IDENTIFIER = ExtractedPrimarySourceIdentifier("00000000000000") MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE = "mex" -MEX_PRIMARY_SOURCE_STABLE_TARGET_ID = PrimarySourceID("00000000000000") +MEX_PRIMARY_SOURCE_STABLE_TARGET_ID = MergedPrimarySourceIdentifier("00000000000000") -class BaseExtractedData(MExModel): - """Base model class definition for all extracted data instances. +class ExtractedData(BaseEntity): + """Base model for all extracted data classes. This class adds two important attributes for metadata provenance: `hadPrimarySource` and `identifierInPrimarySource`, which are used to uniquely identify an item in its original primary source. The attribute `stableTargetId` has to be set by each concrete subclass, like `ExtractedPerson`, because it needs to have the - correct type, e.g. `PersonID`. + correct type, e.g. `MergedPersonIdentifier`. + + This class also adds a validator to automatically set identifiers for provenance. + See below, for a full description. """ + if TYPE_CHECKING: # pragma: no cover + # Sometimes multiple primary sources describe the same activity, resource, etc. + # and a complete metadata item can only be created by merging these fragments. + # The `stableTargetId` is part of all extracted models to allow MEx to identify + # which items describe the same thing and should be merged to create a complete + # metadata item. The name `stableTargetId` might be a bit misleading, because + # the "stability" is only guaranteed for one "real world" or "digital world" + # thing having the same ID in MEx over time. But it is not a guarantee, that the + # same metadata sources contribute to the complete metadata item. The naming has + # its historical reasons, but we plan to change it in the near future. + # Because we anticipate that items have to be merged, the `stableTargetId` is + # also used as the foreign key for all fields containing references. + stableTargetId: Any + hadPrimarySource: Annotated[ - PrimarySourceID, + MergedPrimarySourceIdentifier, Field( description=( - "The stableTargetID of the primary source, that this item was " + "The stableTargetId of the primary source, that this item was " "extracted from. This field is mandatory for all extracted items to " "aid with data provenance. Extracted primary sources also have this " - "field and are all extracted from a primary source called MEx, which " - "is its own primary source and has the static stableTargetID: " - f"{MEX_PRIMARY_SOURCE_STABLE_TARGET_ID}" + "field and are all extracted from a static primary source for MEx. " + "The extracted primary source for MEx has its own merged item as a " + "primary source." ), + frozen=True, ), ] identifierInPrimarySource: Annotated[ @@ -41,40 +62,24 @@ class BaseExtractedData(MExModel): "It is only unique amongst items coming from the same system, because " "identifier formats are likely to overlap between systems. " "The value for `identifierInPrimarySource` is therefore only unique in " - "composition with `hadPrimarySource`. MEx uses this composite key " - "to assign a stable and globally unique `identifier` to each item." + "composition with `hadPrimarySource`. MEx uses this composite key to " + "assign a stable and globally unique `identifier` per extracted item." ), examples=["123456", "item-501", "D7/x4/zz.final3"], min_length=1, + frozen=True, ), ] - def __str__(self) -> str: - """Format this extracted data instance as a string for logging.""" - return ( - f"{self.__class__.__name__}: " - f"{self.identifierInPrimarySource} " - f"{self.identifier} " - f"{self.stableTargetId}" - ) - - -class ExtractedData(BaseExtractedData): - """Base model class for extracted data items that ensures identities. - - This base class does not add any attributes. It only adds the functionality - to automatically set identifiers for provenance. See below, for description. - """ - # TODO make stable_target_id and identifier computed fields (MX-1435) @model_validator(mode="before") @classmethod @validate_call def set_identifiers(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: C901 - """Ensure identifier and provenance attributes are set for this instance. + """Ensure identifiers and provenance attributes are set for this instance. All extracted data classes have four important identifiers that are defined - by `MExModel` and `BaseExtractedData`: + by `BaseEntity`, `ExtractedData` and the concrete classes themselves. - identifierInPrimarySource - hadPrimarySource @@ -88,7 +93,7 @@ def set_identifiers(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: C90 because otherwise we cannot reliably determine the origin of this item. These two identifiers are the only two that need to be set during extraction. - Next we query the configured `IdentityProvider` to determine whether this item + Next, we query the configured `IdentityProvider` to determine whether this item already has an `identifier` and `stableTargetId`. If not, we let the identity provider generate new identifiers. @@ -96,13 +101,13 @@ def set_identifiers(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: C90 with what we got from the identity provider, because we don't allow any system to change the association from `identifierInPrimarySource` and `hadPrimarySource` to the `identifier`. - A use case for passing a matching `identifier` to the constructor would be + A use-case for passing a matching `identifier` to the constructor would be parsing an already extracted item from an NDJSON file or an API endpoint. If a `stableTargetId` has been passed to the constructor, we use that as the new value, because changes to the stable target ID are generally allowed. - A use case for changing the `stableTargetId` will be the matching of - multiple extracted items (see `MExModel.stableTargetId` for details). + A use-case for changing the `stableTargetId` will be the matching of + multiple extracted items (see `BaseEntity.stableTargetId` for details). Args: values: Raw values to validate @@ -136,14 +141,16 @@ def set_identifiers(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: C90 if had_primary_source := values.get("hadPrimarySource"): if isinstance(had_primary_source, list): if len(had_primary_source) == 1: - had_primary_source = PrimarySourceID(had_primary_source[0]) + had_primary_source = MergedPrimarySourceIdentifier( + had_primary_source[0] + ) else: raise ValueError( f"Expected one value for hadPrimarySource, " f"got {len(had_primary_source)}" ) else: - had_primary_source = PrimarySourceID(had_primary_source) + had_primary_source = MergedPrimarySourceIdentifier(had_primary_source) else: raise ValueError("Missing value for `hadPrimarySource`.") diff --git a/mex/common/models/filter.py b/mex/common/models/filter.py index 4d1ffb7d..3343b64f 100644 --- a/mex/common/models/filter.py +++ b/mex/common/models/filter.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Annotated, Any, Optional from pydantic import BaseModel, Field, create_model @@ -18,7 +18,7 @@ class EntityFilter(BaseModel, extra="forbid"): fieldInPrimarySource: str locationInPrimarySource: Optional[str] = None examplesInPrimarySource: Optional[list[str]] = None - mappingRules: list[EntityFilterRule] = Field(..., min_length=1) + mappingRules: Annotated[list[EntityFilterRule], Field(min_length=1)] comment: Optional[str] = None diff --git a/mex/common/models/mapping.py b/mex/common/models/mapping.py index 63f24b57..00b81223 100644 --- a/mex/common/models/mapping.py +++ b/mex/common/models/mapping.py @@ -1,11 +1,11 @@ -from typing import Any, Optional, get_origin +from typing import Annotated, Any, Optional, get_origin from pydantic import BaseModel, Field, create_model from mex.common.models import EXTRACTED_MODEL_CLASSES, ExtractedData -class GenericRule(BaseModel, extra="forbid"): # forbid additional fields +class GenericRule(BaseModel, extra="forbid"): """Generic mapping rule model.""" forValues: Optional[list[str]] = None @@ -13,13 +13,13 @@ class GenericRule(BaseModel, extra="forbid"): # forbid additional fields rule: Optional[str] = None -class GenericField(BaseModel, extra="forbid"): # forbid additional fields +class GenericField(BaseModel, extra="forbid"): """Generic Field model.""" fieldInPrimarySource: str locationInPrimarySource: Optional[str] = None examplesInPrimarySource: Optional[list[str]] = None - mappingRules: list[GenericRule] = Field(..., min_length=1) + mappingRules: Annotated[list[GenericRule], Field(min_length=1)] comment: Optional[str] = None diff --git a/mex/common/models/merged_item.py b/mex/common/models/merged_item.py index dee80d25..5dadb310 100644 --- a/mex/common/models/merged_item.py +++ b/mex/common/models/merged_item.py @@ -1,9 +1,5 @@ -from mex.common.models.base import MExModel +from mex.common.models.entity import BaseEntity -class MergedItem(MExModel): - """Base model class definition for all merged items.""" - - def __str__(self) -> str: - """Format this merged item instance as a string for logging.""" - return f"{self.__class__.__name__}: {self.identifier}" +class MergedItem(BaseEntity): + """Base model for all merged item classes.""" diff --git a/mex/common/models/organization.py b/mex/common/models/organization.py index 9456ce71..8f9fbaf7 100644 --- a/mex/common/models/organization.py +++ b/mex/common/models/organization.py @@ -5,7 +5,11 @@ from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem -from mex.common.types import OrganizationID, Text +from mex.common.types import ( + ExtractedOrganizationIdentifier, + MergedOrganizationIdentifier, + Text, +) class BaseOrganization(BaseModel): @@ -14,7 +18,6 @@ class BaseOrganization(BaseModel): This can be any community or other social, commercial or political structure. """ - stableTargetId: OrganizationID alternativeName: list[Text] = [] geprisId: list[ Annotated[ @@ -83,14 +86,17 @@ class BaseOrganization(BaseModel): class ExtractedOrganization(BaseOrganization, ExtractedData): """An automatically extracted metadata set describing an organization.""" - entityType: Literal["ExtractedOrganization"] = Field( - "ExtractedOrganization", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedOrganization"], Field(alias="$type", frozen=True) + ] = "ExtractedOrganization" + identifier: Annotated[ExtractedOrganizationIdentifier, Field(frozen=True)] + stableTargetId: MergedOrganizationIdentifier class MergedOrganization(BaseOrganization, MergedItem): """The result of merging all extracted data and rules for an organization.""" - entityType: Literal["MergedOrganization"] = Field( - "MergedOrganization", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedOrganization"], Field(alias="$type", frozen=True) + ] = "MergedOrganization" + identifier: Annotated[MergedOrganizationIdentifier, Field(frozen=True)] diff --git a/mex/common/models/organizational_unit.py b/mex/common/models/organizational_unit.py index 3f52c3c5..67c295b5 100644 --- a/mex/common/models/organizational_unit.py +++ b/mex/common/models/organizational_unit.py @@ -5,33 +5,42 @@ from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem -from mex.common.types import Email, Link, OrganizationalUnitID, OrganizationID, Text +from mex.common.types import ( + Email, + ExtractedOrganizationalUnitIdentifier, + Link, + MergedOrganizationalUnitIdentifier, + MergedOrganizationIdentifier, + Text, +) class BaseOrganizationalUnit(BaseModel): """An organizational unit which is part of some larger organization.""" - stableTargetId: OrganizationalUnitID alternativeName: list[Text] = [] email: list[Email] = [] name: Annotated[list[Text], Field(min_length=1)] - parentUnit: OrganizationalUnitID | None = None + parentUnit: MergedOrganizationalUnitIdentifier | None = None shortName: list[Text] = [] - unitOf: list[OrganizationID] = [] + unitOf: list[MergedOrganizationIdentifier] = [] website: list[Link] = [] class ExtractedOrganizationalUnit(BaseOrganizationalUnit, ExtractedData): """An automatically extracted metadata set describing an organizational unit.""" - entityType: Literal["ExtractedOrganizationalUnit"] = Field( - "ExtractedOrganizationalUnit", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedOrganizationalUnit"], Field(alias="$type", frozen=True) + ] = "ExtractedOrganizationalUnit" + identifier: Annotated[ExtractedOrganizationalUnitIdentifier, Field(frozen=True)] + stableTargetId: MergedOrganizationalUnitIdentifier class MergedOrganizationalUnit(BaseOrganizationalUnit, MergedItem): """The result of merging all extracted data and rules for an organizational unit.""" - entityType: Literal["MergedOrganizationalUnit"] = Field( - "MergedOrganizationalUnit", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedOrganizationalUnit"], Field(alias="$type", frozen=True) + ] = "MergedOrganizationalUnit" + identifier: Annotated[MergedOrganizationalUnitIdentifier, Field(frozen=True)] diff --git a/mex/common/models/person.py b/mex/common/models/person.py index d6cb7eee..cfc25342 100644 --- a/mex/common/models/person.py +++ b/mex/common/models/person.py @@ -5,14 +5,19 @@ from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem -from mex.common.types import Email, OrganizationalUnitID, OrganizationID, PersonID +from mex.common.types import ( + Email, + ExtractedPersonIdentifier, + MergedOrganizationalUnitIdentifier, + MergedOrganizationIdentifier, + MergedPersonIdentifier, +) class BasePerson(BaseModel): """A person related to a source and/or resource, i.e. a project leader.""" - stableTargetId: PersonID - affiliation: list[OrganizationID] = [] + affiliation: list[MergedOrganizationIdentifier] = [] email: list[Email] = [] familyName: list[ Annotated[ @@ -48,7 +53,7 @@ class BasePerson(BaseModel): ), ] ] = [] - memberOf: list[OrganizationalUnitID] = [] + memberOf: list[MergedOrganizationalUnitIdentifier] = [] orcidId: list[ Annotated[ str, @@ -64,14 +69,17 @@ class BasePerson(BaseModel): class ExtractedPerson(BasePerson, ExtractedData): """An automatically extracted metadata set describing a person.""" - entityType: Literal["ExtractedPerson"] = Field( - "ExtractedPerson", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedPerson"], Field(alias="$type", frozen=True) + ] = "ExtractedPerson" + identifier: Annotated[ExtractedPersonIdentifier, Field(frozen=True)] + stableTargetId: MergedPersonIdentifier class MergedPerson(BasePerson, MergedItem): """The result of merging all extracted data and rules for a person.""" - entityType: Literal["MergedPerson"] = Field( - "MergedPerson", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedPerson"], Field(alias="$type", frozen=True) + ] = "MergedPerson" + identifier: Annotated[MergedPersonIdentifier, Field(frozen=True)] diff --git a/mex/common/models/primary_source.py b/mex/common/models/primary_source.py index 11cf2d03..ae02cb71 100644 --- a/mex/common/models/primary_source.py +++ b/mex/common/models/primary_source.py @@ -6,11 +6,12 @@ from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem from mex.common.types import ( - ContactPointID, + ExtractedPrimarySourceIdentifier, Link, - OrganizationalUnitID, - PersonID, - PrimarySourceID, + MergedContactPointIdentifier, + MergedOrganizationalUnitIdentifier, + MergedPersonIdentifier, + MergedPrimarySourceIdentifier, Text, ) @@ -18,14 +19,17 @@ class BasePrimarySource(BaseModel): """A collection of information, that is managed and curated by an RKI unit.""" - stableTargetId: PrimarySourceID alternativeTitle: list[Text] = [] - contact: list[OrganizationalUnitID | PersonID | ContactPointID] = [] + contact: list[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier + ] = [] description: list[Text] = [] documentation: list[Link] = [] locatedAt: list[Link] = [] title: list[Text] = [] - unitInCharge: list[OrganizationalUnitID] = [] + unitInCharge: list[MergedOrganizationalUnitIdentifier] = [] version: ( Annotated[ str, @@ -40,14 +44,17 @@ class BasePrimarySource(BaseModel): class ExtractedPrimarySource(BasePrimarySource, ExtractedData): """An automatically extracted metadata set describing a primary source.""" - entityType: Literal["ExtractedPrimarySource"] = Field( - "ExtractedPrimarySource", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedPrimarySource"], Field(alias="$type", frozen=True) + ] = "ExtractedPrimarySource" + identifier: Annotated[ExtractedPrimarySourceIdentifier, Field(frozen=True)] + stableTargetId: MergedPrimarySourceIdentifier class MergedPrimarySource(BasePrimarySource, MergedItem): """The result of merging all extracted data and rules for a primary source.""" - entityType: Literal["MergedPrimarySource"] = Field( - "MergedPrimarySource", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedPrimarySource"], Field(alias="$type", frozen=True) + ] = "MergedPrimarySource" + identifier: Annotated[MergedPrimarySourceIdentifier, Field(frozen=True)] diff --git a/mex/common/models/resource.py b/mex/common/models/resource.py index c008a004..486950cb 100644 --- a/mex/common/models/resource.py +++ b/mex/common/models/resource.py @@ -6,21 +6,22 @@ from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem from mex.common.types import ( - AccessPlatformID, AccessRestriction, - ActivityID, AnonymizationPseudonymization, - ContactPointID, DataProcessingState, - DistributionID, + ExtractedResourceIdentifier, Frequency, Language, License, Link, - OrganizationalUnitID, - OrganizationID, - PersonID, - ResourceID, + MergedAccessPlatformIdentifier, + MergedActivityIdentifier, + MergedContactPointIdentifier, + MergedDistributionIdentifier, + MergedOrganizationalUnitIdentifier, + MergedOrganizationIdentifier, + MergedPersonIdentifier, + MergedResourceIdentifier, ResourceTypeGeneral, Text, Theme, @@ -31,8 +32,7 @@ class BaseResource(BaseModel): """A defined piece or collection of information.""" - stableTargetId: ResourceID - accessPlatform: list[AccessPlatformID] = [] + accessPlatform: list[MergedAccessPlatformIdentifier] = [] accessRestriction: Annotated[ AccessRestriction, Field( @@ -53,19 +53,24 @@ class BaseResource(BaseModel): ] ] = [] contact: Annotated[ - list[OrganizationalUnitID | PersonID | ContactPointID], Field(min_length=1) + list[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier + ], + Field(min_length=1), ] - contributingUnit: list[OrganizationalUnitID] = [] - contributor: list[PersonID] = [] + contributingUnit: list[MergedOrganizationalUnitIdentifier] = [] + contributor: list[MergedPersonIdentifier] = [] created: Timestamp | None = None - creator: list[PersonID] = [] + creator: list[MergedPersonIdentifier] = [] description: list[Text] = [] - distribution: list[DistributionID] = [] + distribution: list[MergedDistributionIdentifier] = [] documentation: list[Link] = [] - externalPartner: list[OrganizationID] = [] + externalPartner: list[MergedOrganizationIdentifier] = [] icd10code: list[str] = [] instrumentToolOrApparatus: list[Text] = [] - isPartOf: list[ResourceID] = [] + isPartOf: list[MergedResourceIdentifier] = [] keyword: list[Text] = [] language: list[ Annotated[Language, Field(examples=["https://mex.rki.de/item/language-1"])] @@ -88,7 +93,7 @@ class BaseResource(BaseModel): methodDescription: list[Text] = [] modified: Timestamp | None = None publication: list[Link] = [] - publisher: list[OrganizationID] = [] + publisher: list[MergedOrganizationIdentifier] = [] qualityInformation: list[Text] = [] resourceTypeGeneral: list[ Annotated[ @@ -130,21 +135,26 @@ class BaseResource(BaseModel): Field(min_length=1), ] title: Annotated[list[Text], Field(min_length=1)] - unitInCharge: Annotated[list[OrganizationalUnitID], Field(min_length=1)] - wasGeneratedBy: ActivityID | None = None + unitInCharge: Annotated[ + list[MergedOrganizationalUnitIdentifier], Field(min_length=1) + ] + wasGeneratedBy: MergedActivityIdentifier | None = None class ExtractedResource(BaseResource, ExtractedData): """An automatically extracted metadata set describing a resource.""" - entityType: Literal["ExtractedResource"] = Field( - "ExtractedResource", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedResource"], Field(alias="$type", frozen=True) + ] = "ExtractedResource" + identifier: Annotated[ExtractedResourceIdentifier, Field(frozen=True)] + stableTargetId: MergedResourceIdentifier class MergedResource(BaseResource, MergedItem): """The result of merging all extracted data and rules for a resource.""" - entityType: Literal["MergedResource"] = Field( - "MergedResource", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedResource"], Field(alias="$type", frozen=True) + ] = "MergedResource" + identifier: Annotated[MergedResourceIdentifier, Field(frozen=True)] diff --git a/mex/common/models/schema.py b/mex/common/models/schema.py new file mode 100644 index 00000000..8f2752f1 --- /dev/null +++ b/mex/common/models/schema.py @@ -0,0 +1,20 @@ +from pydantic.json_schema import ( + GenerateJsonSchema as PydanticJsonSchemaGenerator, +) +from pydantic.json_schema import JsonSchemaValue + + +class JsonSchemaGenerator(PydanticJsonSchemaGenerator): + """Customization of the pydantic class for generating JSON schemas.""" + + def handle_ref_overrides(self, json_schema: JsonSchemaValue) -> JsonSchemaValue: + """Disable pydantic behavior to wrap top-level `$ref` keys in an `allOf`. + + For example, pydantic would convert + {"$ref": "#/$defs/APIType", "examples": ["api-type-1"]} + into + {"allOf": {"$ref": "#/$defs/APIType"}, "examples": ["api-type-1"]} + which is in fact recommended by JSON schema, but we need to disable this + to stay compatible with mex-editor and mex-model. + """ + return json_schema diff --git a/mex/common/models/variable.py b/mex/common/models/variable.py index 7b630154..cec9f4fd 100644 --- a/mex/common/models/variable.py +++ b/mex/common/models/variable.py @@ -7,18 +7,18 @@ from mex.common.models.merged_item import MergedItem from mex.common.types import ( DataType, - ResourceID, + ExtractedVariableIdentifier, + MergedResourceIdentifier, + MergedVariableGroupIdentifier, + MergedVariableIdentifier, Text, - VariableGroupID, - VariableID, ) class BaseVariable(BaseModel): """A single piece of information within a resource.""" - stableTargetId: VariableID - belongsTo: list[VariableGroupID] = [] + belongsTo: list[MergedVariableGroupIdentifier] = [] codingSystem: ( Annotated[ str, @@ -51,7 +51,7 @@ class BaseVariable(BaseModel): ], Field(min_length=1), ] - usedIn: Annotated[list[ResourceID], Field(min_length=1)] + usedIn: Annotated[list[MergedResourceIdentifier], Field(min_length=1)] valueSet: list[ Annotated[ str, @@ -69,14 +69,17 @@ class BaseVariable(BaseModel): class ExtractedVariable(BaseVariable, ExtractedData): """An automatically extracted metadata set describing a variable.""" - entityType: Literal["ExtractedVariable"] = Field( - "ExtractedVariable", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedVariable"], Field(alias="$type", frozen=True) + ] = "ExtractedVariable" + identifier: Annotated[ExtractedVariableIdentifier, Field(frozen=True)] + stableTargetId: MergedVariableIdentifier class MergedVariable(BaseVariable, MergedItem): """The result of merging all extracted data and rules for a variable.""" - entityType: Literal["MergedVariable"] = Field( - "MergedVariable", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedVariable"], Field(alias="$type", frozen=True) + ] = "MergedVariable" + identifier: Annotated[MergedVariableIdentifier, Field(frozen=True)] diff --git a/mex/common/models/variable_group.py b/mex/common/models/variable_group.py index 22382876..bbc9a6cd 100644 --- a/mex/common/models/variable_group.py +++ b/mex/common/models/variable_group.py @@ -5,28 +5,35 @@ from mex.common.models.base import BaseModel from mex.common.models.extracted_data import ExtractedData from mex.common.models.merged_item import MergedItem -from mex.common.types import ResourceID, Text, VariableGroupID +from mex.common.types import ( + ExtractedVariableGroupIdentifier, + MergedResourceIdentifier, + MergedVariableGroupIdentifier, + Text, +) class BaseVariableGroup(BaseModel): """The grouping of variables according to a certain aspect.""" - stableTargetId: VariableGroupID - containedBy: Annotated[list[ResourceID], Field(min_length=1)] + containedBy: Annotated[list[MergedResourceIdentifier], Field(min_length=1)] label: Annotated[list[Text], Field(min_length=1)] class ExtractedVariableGroup(BaseVariableGroup, ExtractedData): """An automatically extracted metadata set describing a variable group.""" - entityType: Literal["ExtractedVariableGroup"] = Field( - "ExtractedVariableGroup", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["ExtractedVariableGroup"], Field(alias="$type", frozen=True) + ] = "ExtractedVariableGroup" + identifier: Annotated[ExtractedVariableGroupIdentifier, Field(frozen=True)] + stableTargetId: MergedVariableGroupIdentifier class MergedVariableGroup(BaseVariableGroup, MergedItem): """The result of merging all extracted data and rules for a variable group.""" - entityType: Literal["MergedVariableGroup"] = Field( - "MergedVariableGroup", alias="$type", frozen=True - ) + entityType: Annotated[ + Literal["MergedVariableGroup"], Field(alias="$type", frozen=True) + ] = "MergedVariableGroup" + identifier: Annotated[MergedVariableGroupIdentifier, Field(frozen=True)] diff --git a/mex/common/organigram/README.md b/mex/common/organigram/README.md index 109797bd..dc4f3c42 100644 --- a/mex/common/organigram/README.md +++ b/mex/common/organigram/README.md @@ -17,7 +17,7 @@ The module `organigram.extract` contains functions for data extraction. The module `organigram.transform` contains functions for data transformation. Use the `stableTargetId` attribute of the transformed objects to set attributes -requiring an `OrganizationalUnitID`. +requiring an `MergedOrganizationalUnitIdentifier`. # Convenience Functions diff --git a/mex/common/organigram/extract.py b/mex/common/organigram/extract.py index ec4f2290..17ec89fa 100644 --- a/mex/common/organigram/extract.py +++ b/mex/common/organigram/extract.py @@ -5,7 +5,7 @@ from mex.common.models import ExtractedOrganizationalUnit from mex.common.organigram.models import OrganigramUnit from mex.common.settings import BaseSettings -from mex.common.types import OrganizationalUnitID +from mex.common.types import MergedOrganizationalUnitIdentifier @watch @@ -47,7 +47,7 @@ def _get_synonyms( def get_unit_merged_ids_by_synonyms( extracted_units: Iterable[ExtractedOrganizationalUnit], -) -> dict[str, OrganizationalUnitID]: +) -> dict[str, MergedOrganizationalUnitIdentifier]: """Return a mapping from unit alt_label and label to their merged IDs. There will be multiple entries per unit mapping to the same merged ID. @@ -59,7 +59,7 @@ def get_unit_merged_ids_by_synonyms( Mapping from unit synonyms to stableTargetIds """ return { - synonym: OrganizationalUnitID(extracted_unit.stableTargetId) + synonym: MergedOrganizationalUnitIdentifier(extracted_unit.stableTargetId) for extracted_unit in extracted_units for synonym in _get_synonyms(extracted_unit) } @@ -67,7 +67,7 @@ def get_unit_merged_ids_by_synonyms( def get_unit_merged_ids_by_emails( extracted_units: Iterable[ExtractedOrganizationalUnit], -) -> dict[str, OrganizationalUnitID]: +) -> dict[str, MergedOrganizationalUnitIdentifier]: """Return a mapping from unit emails to their merged IDs. There may be multiple emails per unit mapping to the same merged ID. @@ -79,7 +79,7 @@ def get_unit_merged_ids_by_emails( Mapping from lowercased `email` to stableTargetIds """ return { - email.lower(): OrganizationalUnitID(extracted_unit.stableTargetId) + email.lower(): MergedOrganizationalUnitIdentifier(extracted_unit.stableTargetId) for extracted_unit in extracted_units for email in extracted_unit.email } diff --git a/mex/common/organigram/transform.py b/mex/common/organigram/transform.py index dd1f8b8e..1c9e14d4 100644 --- a/mex/common/organigram/transform.py +++ b/mex/common/organigram/transform.py @@ -3,7 +3,7 @@ from mex.common.logging import watch from mex.common.models import ExtractedOrganizationalUnit, ExtractedPrimarySource from mex.common.organigram.models import OrganigramUnit -from mex.common.types import Email, OrganizationalUnitID +from mex.common.types import Email, MergedOrganizationalUnitIdentifier @watch @@ -48,7 +48,7 @@ def transform_organigram_units_to_organizational_units( if parent_unit := extracted_unit_by_id_in_primary_source.get( parent_identifier_in_primary_source ): - extracted_unit.parentUnit = OrganizationalUnitID( + extracted_unit.parentUnit = MergedOrganizationalUnitIdentifier( parent_unit.stableTargetId ) yield extracted_unit diff --git a/mex/common/settings.py b/mex/common/settings.py index 814ca782..7ff1b356 100644 --- a/mex/common/settings.py +++ b/mex/common/settings.py @@ -1,4 +1,3 @@ -from contextvars import ContextVar from pathlib import Path from typing import Any, Optional, TypeVar, Union @@ -8,12 +7,11 @@ from pydantic_settings import SettingsConfigDict from pydantic_settings.sources import ENV_FILE_SENTINEL, DotenvType, EnvSettingsSource +from mex.common.context import ContextStore from mex.common.types import AssetsPath, IdentityProvider, Sink, WorkPath SettingsType = TypeVar("SettingsType", bound="BaseSettings") -SettingsContext: ContextVar[Optional["BaseSettings"]] = ContextVar( - "SettingsContext", default=None -) +SettingsContext = ContextStore[Optional["BaseSettings"]](None) class BaseSettings(PydanticBaseSettings): @@ -182,7 +180,7 @@ def get(cls: type[SettingsType]) -> SettingsType: wiki_api_url: AnyUrl = Field( Url("https://wikidata/"), description="URL of Wikidata API, this URL is used to send " - "wikidata organizatizion ID to get all the info about the organization, " + "wikidata organization ID to get all the info about the organization, " "which includes basic info, aliases, labels, descriptions, claims, and " "sitelinks", validation_alias="MEX_WIKI_API_URL", diff --git a/mex/common/sinks/backend_api.py b/mex/common/sinks/backend_api.py index d4c76304..76dbe49d 100644 --- a/mex/common/sinks/backend_api.py +++ b/mex/common/sinks/backend_api.py @@ -2,19 +2,19 @@ from mex.common.backend_api.connector import BackendApiConnector from mex.common.logging import watch -from mex.common.models import MExModel +from mex.common.models import AnyExtractedModel from mex.common.types import Identifier from mex.common.utils import grouper @watch def post_to_backend_api( - models: Iterable[MExModel], chunk_size: int = 100 + models: Iterable[AnyExtractedModel], chunk_size: int = 100 ) -> Generator[Identifier, None, None]: """Load models to the Backend API using bulk insertion. Args: - models: Iterable of extracted or merged models + models: Iterable of extracted models chunk_size: Optional size to chunks to post in one request Returns: diff --git a/mex/common/sinks/ndjson.py b/mex/common/sinks/ndjson.py index 11fa5d93..27676892 100644 --- a/mex/common/sinks/ndjson.py +++ b/mex/common/sinks/ndjson.py @@ -4,18 +4,20 @@ from typing import IO, Any, Generator, Iterable from mex.common.logging import echo, watch -from mex.common.models import MExModel +from mex.common.models import AnyExtractedModel from mex.common.settings import BaseSettings from mex.common.transform import MExEncoder from mex.common.types import Identifier @watch -def write_ndjson(models: Iterable[MExModel]) -> Generator[Identifier, None, None]: +def write_ndjson( + models: Iterable[AnyExtractedModel], +) -> Generator[Identifier, None, None]: """Write the incoming models into a new-line delimited JSON file. Args: - models: Iterable of models to write + models: Iterable of extracted models to write Settings: work_dir: Path to store the NDJSON files in diff --git a/mex/common/transform.py b/mex/common/transform.py index e4a59b1a..f0ef7cca 100644 --- a/mex/common/transform.py +++ b/mex/common/transform.py @@ -3,7 +3,7 @@ from enum import Enum from functools import cache from pathlib import PurePath -from typing import Any +from typing import Any, Iterable, cast from uuid import UUID from pydantic import AnyUrl, SecretStr @@ -73,3 +73,26 @@ def kebab_to_camel(string: str) -> str: if len(tokens := re.split(r"\-+", string)) > 1: return "".join(word.title() for word in tokens) return string[:1].upper() + string[1:] + + +def ensure_prefix(string: Any, prefix: Any) -> str: + """Return a str with the given prefix prepended if it is not present yet. + + If the string already starts with the prefix, return a copy. + This method is the inverse of `str.removeprefix`. + """ + string = str(string) + prefix = str(prefix) + if string.startswith(prefix): + return cast(str, string) + return f"{prefix}{string}" + + +def to_key_and_values(dct: dict[str, Any]) -> Iterable[tuple[str, list[Any]]]: + """Return an iterable of dictionary items where the values are always lists.""" + for key, value in dct.items(): + if value is None: + value = [] + elif not isinstance(value, list): + value = [value] + yield key, value diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py index 2dbc6286..1f758389 100644 --- a/mex/common/types/__init__.py +++ b/mex/common/types/__init__.py @@ -1,17 +1,30 @@ +from typing import Final, Union, get_args + from mex.common.types.email import Email from mex.common.types.identifier import ( - AccessPlatformID, - ActivityID, - ContactPointID, - DistributionID, + ExtractedAccessPlatformIdentifier, + ExtractedActivityIdentifier, + ExtractedContactPointIdentifier, + ExtractedDistributionIdentifier, + ExtractedOrganizationalUnitIdentifier, + ExtractedOrganizationIdentifier, + ExtractedPersonIdentifier, + ExtractedPrimarySourceIdentifier, + ExtractedResourceIdentifier, + ExtractedVariableGroupIdentifier, + ExtractedVariableIdentifier, Identifier, - OrganizationalUnitID, - OrganizationID, - PersonID, - PrimarySourceID, - ResourceID, - VariableGroupID, - VariableID, + MergedAccessPlatformIdentifier, + MergedActivityIdentifier, + MergedContactPointIdentifier, + MergedDistributionIdentifier, + MergedOrganizationalUnitIdentifier, + MergedOrganizationIdentifier, + MergedPersonIdentifier, + MergedPrimarySourceIdentifier, + MergedResourceIdentifier, + MergedVariableGroupIdentifier, + MergedVariableIdentifier, ) from mex.common.types.identity import IdentityProvider from mex.common.types.link import Link, LinkLanguage @@ -46,19 +59,27 @@ ) __all__ = ( - "AccessPlatformID", "AccessRestriction", - "ActivityID", "ActivityType", "AnonymizationPseudonymization", + "AnyNestedModel", "APIType", "AssetsPath", "CET", - "ContactPointID", "DataProcessingState", "DataType", - "DistributionID", "Email", + "ExtractedAccessPlatformIdentifier", + "ExtractedActivityIdentifier", + "ExtractedContactPointIdentifier", + "ExtractedDistributionIdentifier", + "ExtractedOrganizationalUnitIdentifier", + "ExtractedOrganizationIdentifier", + "ExtractedPersonIdentifier", + "ExtractedPrimarySourceIdentifier", + "ExtractedResourceIdentifier", + "ExtractedVariableGroupIdentifier", + "ExtractedVariableIdentifier", "Frequency", "Identifier", "IdentityProvider", @@ -66,13 +87,21 @@ "License", "Link", "LinkLanguage", + "MergedAccessPlatformIdentifier", + "MergedActivityIdentifier", + "MergedContactPointIdentifier", + "MergedDistributionIdentifier", + "MergedOrganizationalUnitIdentifier", + "MergedOrganizationIdentifier", + "MergedPersonIdentifier", + "MergedPrimarySourceIdentifier", + "MergedResourceIdentifier", + "MergedVariableGroupIdentifier", + "MergedVariableIdentifier", "MIMEType", - "OrganizationalUnitID", - "OrganizationID", + "NESTED_MODEL_CLASSES_BY_NAME", + "NESTED_MODEL_CLASSES", "PathWrapper", - "PersonID", - "PrimarySourceID", - "ResourceID", "ResourceTypeGeneral", "Sink", "split_to_caps", @@ -85,9 +114,56 @@ "Timestamp", "TimestampPrecision", "UTC", - "VariableGroupID", - "VariableID", "VocabularyEnum", "VocabularyLoader", "WorkPath", ) + +AnyNestedModel = Union[ + Link, + Text, +] +NESTED_MODEL_CLASSES: Final[list[type[AnyNestedModel]]] = list(get_args(AnyNestedModel)) +NESTED_MODEL_CLASSES_BY_NAME: Final[dict[str, type[AnyNestedModel]]] = { + cls.__name__: cls for cls in NESTED_MODEL_CLASSES +} + +AnyMergedIdentifier = Union[ + MergedAccessPlatformIdentifier, + MergedActivityIdentifier, + MergedContactPointIdentifier, + MergedDistributionIdentifier, + MergedOrganizationalUnitIdentifier, + MergedOrganizationIdentifier, + MergedPersonIdentifier, + MergedPrimarySourceIdentifier, + MergedResourceIdentifier, + MergedVariableGroupIdentifier, + MergedVariableIdentifier, +] +MERGED_IDENTIFIER_CLASSES: Final[list[type[AnyMergedIdentifier]]] = list( + get_args(AnyMergedIdentifier) +) +MERGED_IDENTIFIER_CLASSES_BY_NAME: Final[dict[str, type[AnyMergedIdentifier]]] = { + cls.__name__: cls for cls in MERGED_IDENTIFIER_CLASSES +} + +AnyExtractedIdentifier = Union[ + ExtractedAccessPlatformIdentifier, + ExtractedActivityIdentifier, + ExtractedContactPointIdentifier, + ExtractedDistributionIdentifier, + ExtractedOrganizationalUnitIdentifier, + ExtractedOrganizationIdentifier, + ExtractedPersonIdentifier, + ExtractedPrimarySourceIdentifier, + ExtractedResourceIdentifier, + ExtractedVariableGroupIdentifier, + ExtractedVariableIdentifier, +] +EXTRACTED_IDENTIFIER_CLASSES: Final[list[type[AnyExtractedIdentifier]]] = list( + get_args(AnyExtractedIdentifier) +) +EXTRACTED_IDENTIFIER_CLASSES_BY_NAME: Final[dict[str, type[AnyExtractedIdentifier]]] = { + cls.__name__: cls for cls in EXTRACTED_IDENTIFIER_CLASSES +} diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py index 71f7b215..d5c42b6a 100644 --- a/mex/common/types/identifier.py +++ b/mex/common/types/identifier.py @@ -75,52 +75,96 @@ def __repr__(self) -> str: # We have technically-identical subclasses of identifier types (one per entity-type). # This allows us to annotate which entity-types are allowed on reference fields. -# For example `contact: PersonID | OrganizationID` tells us that a contact for an item -# has to be either a person or an organization. +# For example `contact: MergedPersonIdentifier | MergedOrganizationIdentifier` tells us +# that a contact for an item has to be either a merged person or merged organization. # We cannot validate this using pydantic, because all identifiers have the same -# format. But it helps for documentation purposes and allows us to generate a more -# precise JSON schema / swagger definition. +# format. But it helps for documentation purposes, allows us to generate a more +# precise JSON schema definitions and to derive database queries from the models. -class AccessPlatformID(Identifier): +class ExtractedAccessPlatformIdentifier(Identifier): + """Identifier for extracted access platforms.""" + + +class ExtractedActivityIdentifier(Identifier): + """Identifier for extracted activities.""" + + +class ExtractedContactPointIdentifier(Identifier): + """Identifier for extracted contact points.""" + + +class ExtractedDistributionIdentifier(Identifier): + """Identifier for extracted distributions.""" + + +class ExtractedOrganizationIdentifier(Identifier): + """Identifier for extracted organizations.""" + + +class ExtractedOrganizationalUnitIdentifier(Identifier): + """Identifier for extracted organizational units.""" + + +class ExtractedPersonIdentifier(Identifier): + """Identifier for extracted persons.""" + + +class ExtractedPrimarySourceIdentifier(Identifier): + """Identifier for extracted primary sources.""" + + +class ExtractedResourceIdentifier(Identifier): + """Identifier for extracted resources.""" + + +class ExtractedVariableIdentifier(Identifier): + """Identifier for extracted variables.""" + + +class ExtractedVariableGroupIdentifier(Identifier): + """Identifier for extracted variable groups.""" + + +class MergedAccessPlatformIdentifier(Identifier): """Identifier for merged access platforms.""" -class ActivityID(Identifier): +class MergedActivityIdentifier(Identifier): """Identifier for merged activities.""" -class ContactPointID(Identifier): +class MergedContactPointIdentifier(Identifier): """Identifier for merged contact points.""" -class DistributionID(Identifier): +class MergedDistributionIdentifier(Identifier): """Identifier for merged distributions.""" -class OrganizationID(Identifier): +class MergedOrganizationIdentifier(Identifier): """Identifier for merged organizations.""" -class OrganizationalUnitID(Identifier): +class MergedOrganizationalUnitIdentifier(Identifier): """Identifier for merged organizational units.""" -class PersonID(Identifier): +class MergedPersonIdentifier(Identifier): """Identifier for merged persons.""" -class PrimarySourceID(Identifier): +class MergedPrimarySourceIdentifier(Identifier): """Identifier for merged primary sources.""" -class ResourceID(Identifier): +class MergedResourceIdentifier(Identifier): """Identifier for merged resources.""" -class VariableID(Identifier): +class MergedVariableIdentifier(Identifier): """Identifier for merged variables.""" -class VariableGroupID(Identifier): +class MergedVariableGroupIdentifier(Identifier): """Identifier for merged variable groups.""" diff --git a/mex/common/types/link.py b/mex/common/types/link.py index 7f08c7bd..a29ff7cf 100644 --- a/mex/common/types/link.py +++ b/mex/common/types/link.py @@ -1,6 +1,6 @@ import re from enum import StrEnum -from typing import Any +from typing import Annotated, Any from pydantic import BaseModel, Field, model_validator @@ -40,12 +40,15 @@ class Link(BaseModel): language: LinkLanguage | None = None title: str | None = None - url: str = Field( - pattern=r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?", - min_length=1, - examples=["https://hello-world.org", "file://S:/OE/MF4/Projekte/MEx"], - json_schema_extra={"format": "uri"}, - ) + url: Annotated[ + str, + Field( + pattern=r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?", + min_length=1, + examples=["https://hello-world.org", "file://S:/OE/MF4/Projekte/MEx"], + json_schema_extra={"format": "uri"}, + ), + ] @model_validator(mode="before") @classmethod diff --git a/mex/common/types/text.py b/mex/common/types/text.py index 99927521..2f406fb8 100644 --- a/mex/common/types/text.py +++ b/mex/common/types/text.py @@ -1,5 +1,5 @@ from enum import StrEnum -from typing import Any +from typing import Annotated, Any from langdetect.detector_factory import PROFILES_DIRECTORY, DetectorFactory from langdetect.lang_detect_exception import LangDetectException @@ -26,7 +26,7 @@ class Text(BaseModel): Text(value="foo") == Text.model_validate("foo") """ - value: str = Field(min_length=1) + value: Annotated[str, Field(min_length=1)] language: TextLanguage | None = None @model_validator(mode="before") diff --git a/mex/common/wikidata/models/organization.py b/mex/common/wikidata/models/organization.py index 788e3946..6ffea244 100644 --- a/mex/common/wikidata/models/organization.py +++ b/mex/common/wikidata/models/organization.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Annotated, Optional, Union from pydantic import ConfigDict, Field, model_validator @@ -51,15 +51,15 @@ class Claim(BaseModel): class Claims(BaseModel): """model class for Claims.""" - website: list[Claim] = Field([], alias="P856") - isni_id: list[Claim] = Field([], alias="P213") - ror_id: list[Claim] = Field([], alias="P6782") - official_name: list[Claim] = Field([], alias="P1448") - short_name: list[Claim] = Field([], alias="P1813") - native_label: list[Claim] = Field([], alias="P1705") - gepris_id: list[Claim] = Field([], alias="P4871") - gnd_id: list[Claim] = Field([], alias="P227") - viaf_id: list[Claim] = Field([], alias="P214") + website: Annotated[list[Claim], Field(alias="P856")] = [] + isni_id: Annotated[list[Claim], Field(alias="P213")] = [] + ror_id: Annotated[list[Claim], Field(alias="P6782")] = [] + official_name: Annotated[list[Claim], Field(alias="P1448")] = [] + short_name: Annotated[list[Claim], Field(alias="P1813")] = [] + native_label: Annotated[list[Claim], Field(alias="P1705")] = [] + gepris_id: Annotated[list[Claim], Field(alias="P4871")] = [] + gnd_id: Annotated[list[Claim], Field(alias="P227")] = [] + viaf_id: Annotated[list[Claim], Field(alias="P214")] = [] class Label(BaseModel): @@ -95,7 +95,7 @@ class WikidataOrganization(BaseModel): model_config = ConfigDict(extra="ignore") - identifier: str = Field(alias="id") + identifier: Annotated[str, Field(alias="id")] labels: Labels claims: Claims aliases: Aliases diff --git a/poetry.lock b/poetry.lock index 86fd2e39..73b5ffd4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -408,13 +408,13 @@ ipython = {version = ">=7.31.1", markers = "python_version >= \"3.11\""} [[package]] name = "ipython" -version = "8.22.1" +version = "8.22.2" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.22.1-py3-none-any.whl", hash = "sha256:869335e8cded62ffb6fac8928e5287a05433d6462e3ebaac25f4216474dd6bc4"}, - {file = "ipython-8.22.1.tar.gz", hash = "sha256:39c6f9efc079fb19bfb0f17eee903978fe9a290b1b82d68196c641cecb76ea22"}, + {file = "ipython-8.22.2-py3-none-any.whl", hash = "sha256:3c86f284c8f3d8f2b6c662f885c4889a91df7cd52056fd02b7d8d6195d7f56e9"}, + {file = "ipython-8.22.2.tar.gz", hash = "sha256:2dcaad9049f9056f1fef63514f176c7d41f930daa78d05b82a176202818f2c14"}, ] [package.dependencies] @@ -718,40 +718,40 @@ files = [ [[package]] name = "pandas" -version = "2.2.0" +version = "2.2.1" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" files = [ - {file = "pandas-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8108ee1712bb4fa2c16981fba7e68b3f6ea330277f5ca34fa8d557e986a11670"}, - {file = "pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:736da9ad4033aeab51d067fc3bd69a0ba36f5a60f66a527b3d72e2030e63280a"}, - {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e0b4fc3ddceb56ec8a287313bc22abe17ab0eb184069f08fc6a9352a769b18"}, - {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20404d2adefe92aed3b38da41d0847a143a09be982a31b85bc7dd565bdba0f4e"}, - {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ea3ee3f125032bfcade3a4cf85131ed064b4f8dd23e5ce6fa16473e48ebcaf5"}, - {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9670b3ac00a387620489dfc1bca66db47a787f4e55911f1293063a78b108df1"}, - {file = "pandas-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a946f210383c7e6d16312d30b238fd508d80d927014f3b33fb5b15c2f895430"}, - {file = "pandas-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a1b438fa26b208005c997e78672f1aa8138f67002e833312e6230f3e57fa87d5"}, - {file = "pandas-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ce2fbc8d9bf303ce54a476116165220a1fedf15985b09656b4b4275300e920b"}, - {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2707514a7bec41a4ab81f2ccce8b382961a29fbe9492eab1305bb075b2b1ff4f"}, - {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85793cbdc2d5bc32620dc8ffa715423f0c680dacacf55056ba13454a5be5de88"}, - {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cfd6c2491dc821b10c716ad6776e7ab311f7df5d16038d0b7458bc0b67dc10f3"}, - {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a146b9dcacc3123aa2b399df1a284de5f46287a4ab4fbfc237eac98a92ebcb71"}, - {file = "pandas-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbc1b53c0e1fdf16388c33c3cca160f798d38aea2978004dd3f4d3dec56454c9"}, - {file = "pandas-2.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a41d06f308a024981dcaa6c41f2f2be46a6b186b902c94c2674e8cb5c42985bc"}, - {file = "pandas-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:159205c99d7a5ce89ecfc37cb08ed179de7783737cea403b295b5eda8e9c56d1"}, - {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1e1f3861ea9132b32f2133788f3b14911b68102d562715d71bd0013bc45440"}, - {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:761cb99b42a69005dec2b08854fb1d4888fdf7b05db23a8c5a099e4b886a2106"}, - {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a20628faaf444da122b2a64b1e5360cde100ee6283ae8effa0d8745153809a2e"}, - {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f5be5d03ea2073627e7111f61b9f1f0d9625dc3c4d8dda72cc827b0c58a1d042"}, - {file = "pandas-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:a626795722d893ed6aacb64d2401d017ddc8a2341b49e0384ab9bf7112bdec30"}, - {file = "pandas-2.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9f66419d4a41132eb7e9a73dcec9486cf5019f52d90dd35547af11bc58f8637d"}, - {file = "pandas-2.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57abcaeda83fb80d447f28ab0cc7b32b13978f6f733875ebd1ed14f8fbc0f4ab"}, - {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e60f1f7dba3c2d5ca159e18c46a34e7ca7247a73b5dd1a22b6d59707ed6b899a"}, - {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb61dc8567b798b969bcc1fc964788f5a68214d333cade8319c7ab33e2b5d88a"}, - {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:52826b5f4ed658fa2b729264d63f6732b8b29949c7fd234510d57c61dbeadfcd"}, - {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bde2bc699dbd80d7bc7f9cab1e23a95c4375de615860ca089f34e7c64f4a8de7"}, - {file = "pandas-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3de918a754bbf2da2381e8a3dcc45eede8cd7775b047b923f9006d5f876802ae"}, - {file = "pandas-2.2.0.tar.gz", hash = "sha256:30b83f7c3eb217fb4d1b494a57a2fda5444f17834f5df2de6b2ffff68dc3c8e2"}, + {file = "pandas-2.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8df8612be9cd1c7797c93e1c5df861b2ddda0b48b08f2c3eaa0702cf88fb5f88"}, + {file = "pandas-2.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0f573ab277252ed9aaf38240f3b54cfc90fff8e5cab70411ee1d03f5d51f3944"}, + {file = "pandas-2.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f02a3a6c83df4026e55b63c1f06476c9aa3ed6af3d89b4f04ea656ccdaaaa359"}, + {file = "pandas-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c38ce92cb22a4bea4e3929429aa1067a454dcc9c335799af93ba9be21b6beb51"}, + {file = "pandas-2.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c2ce852e1cf2509a69e98358e8458775f89599566ac3775e70419b98615f4b06"}, + {file = "pandas-2.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:53680dc9b2519cbf609c62db3ed7c0b499077c7fefda564e330286e619ff0dd9"}, + {file = "pandas-2.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:94e714a1cca63e4f5939cdce5f29ba8d415d85166be3441165edd427dc9f6bc0"}, + {file = "pandas-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f821213d48f4ab353d20ebc24e4faf94ba40d76680642fb7ce2ea31a3ad94f9b"}, + {file = "pandas-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c70e00c2d894cb230e5c15e4b1e1e6b2b478e09cf27cc593a11ef955b9ecc81a"}, + {file = "pandas-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e97fbb5387c69209f134893abc788a6486dbf2f9e511070ca05eed4b930b1b02"}, + {file = "pandas-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101d0eb9c5361aa0146f500773395a03839a5e6ecde4d4b6ced88b7e5a1a6403"}, + {file = "pandas-2.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7d2ed41c319c9fb4fd454fe25372028dfa417aacb9790f68171b2e3f06eae8cd"}, + {file = "pandas-2.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:af5d3c00557d657c8773ef9ee702c61dd13b9d7426794c9dfeb1dc4a0bf0ebc7"}, + {file = "pandas-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:06cf591dbaefb6da9de8472535b185cba556d0ce2e6ed28e21d919704fef1a9e"}, + {file = "pandas-2.2.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:88ecb5c01bb9ca927ebc4098136038519aa5d66b44671861ffab754cae75102c"}, + {file = "pandas-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:04f6ec3baec203c13e3f8b139fb0f9f86cd8c0b94603ae3ae8ce9a422e9f5bee"}, + {file = "pandas-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a935a90a76c44fe170d01e90a3594beef9e9a6220021acfb26053d01426f7dc2"}, + {file = "pandas-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c391f594aae2fd9f679d419e9a4d5ba4bce5bb13f6a989195656e7dc4b95c8f0"}, + {file = "pandas-2.2.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9d1265545f579edf3f8f0cb6f89f234f5e44ba725a34d86535b1a1d38decbccc"}, + {file = "pandas-2.2.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11940e9e3056576ac3244baef2fedade891977bcc1cb7e5cc8f8cc7d603edc89"}, + {file = "pandas-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4acf681325ee1c7f950d058b05a820441075b0dd9a2adf5c4835b9bc056bf4fb"}, + {file = "pandas-2.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9bd8a40f47080825af4317d0340c656744f2bfdb6819f818e6ba3cd24c0e1397"}, + {file = "pandas-2.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:df0c37ebd19e11d089ceba66eba59a168242fc6b7155cba4ffffa6eccdfb8f16"}, + {file = "pandas-2.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:739cc70eaf17d57608639e74d63387b0d8594ce02f69e7a0b046f117974b3019"}, + {file = "pandas-2.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9d3558d263073ed95e46f4650becff0c5e1ffe0fc3a015de3c79283dfbdb3df"}, + {file = "pandas-2.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4aa1d8707812a658debf03824016bf5ea0d516afdea29b7dc14cf687bc4d4ec6"}, + {file = "pandas-2.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:76f27a809cda87e07f192f001d11adc2b930e93a2b0c4a236fde5429527423be"}, + {file = "pandas-2.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:1ba21b1d5c0e43416218db63037dbe1a01fc101dc6e6024bcad08123e48004ab"}, + {file = "pandas-2.2.1.tar.gz", hash = "sha256:0ab90f87093c13f3e8fa45b48ba9f39181046e8f3317d3aadb2fffbb1b978572"}, ] [package.dependencies] @@ -782,6 +782,7 @@ parquet = ["pyarrow (>=10.0.1)"] performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] plot = ["matplotlib (>=3.6.3)"] postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] spss = ["pyreadstat (>=1.2.0)"] sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] @@ -972,18 +973,18 @@ files = [ [[package]] name = "pydantic" -version = "2.6.1" +version = "2.6.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.6.1-py3-none-any.whl", hash = "sha256:0b6a909df3192245cb736509a92ff69e4fef76116feffec68e93a567347bae6f"}, - {file = "pydantic-2.6.1.tar.gz", hash = "sha256:4fd5c182a2488dc63e6d32737ff19937888001e2a6d86e94b3f233104a5d1fa9"}, + {file = "pydantic-2.6.3-py3-none-any.whl", hash = "sha256:72c6034df47f46ccdf81869fddb81aade68056003900a8724a4f160700016a2a"}, + {file = "pydantic-2.6.3.tar.gz", hash = "sha256:e07805c4c7f5c6826e33a1d4c9d47950d7eaf34868e2690f8594d2e30241f11f"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.16.2" +pydantic-core = "2.16.3" typing-extensions = ">=4.6.1" [package.extras] @@ -991,90 +992,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.16.2" +version = "2.16.3" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3fab4e75b8c525a4776e7630b9ee48aea50107fea6ca9f593c98da3f4d11bf7c"}, - {file = "pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8bde5b48c65b8e807409e6f20baee5d2cd880e0fad00b1a811ebc43e39a00ab2"}, - {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2924b89b16420712e9bb8192396026a8fbd6d8726224f918353ac19c4c043d2a"}, - {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16aa02e7a0f539098e215fc193c8926c897175d64c7926d00a36188917717a05"}, - {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:936a787f83db1f2115ee829dd615c4f684ee48ac4de5779ab4300994d8af325b"}, - {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:459d6be6134ce3b38e0ef76f8a672924460c455d45f1ad8fdade36796df1ddc8"}, - {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9ee4febb249c591d07b2d4dd36ebcad0ccd128962aaa1801508320896575ef"}, - {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40a0bd0bed96dae5712dab2aba7d334a6c67cbcac2ddfca7dbcc4a8176445990"}, - {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:870dbfa94de9b8866b37b867a2cb37a60c401d9deb4a9ea392abf11a1f98037b"}, - {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:308974fdf98046db28440eb3377abba274808bf66262e042c412eb2adf852731"}, - {file = "pydantic_core-2.16.2-cp310-none-win32.whl", hash = "sha256:a477932664d9611d7a0816cc3c0eb1f8856f8a42435488280dfbf4395e141485"}, - {file = "pydantic_core-2.16.2-cp310-none-win_amd64.whl", hash = "sha256:8f9142a6ed83d90c94a3efd7af8873bf7cefed2d3d44387bf848888482e2d25f"}, - {file = "pydantic_core-2.16.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:406fac1d09edc613020ce9cf3f2ccf1a1b2f57ab00552b4c18e3d5276c67eb11"}, - {file = "pydantic_core-2.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce232a6170dd6532096cadbf6185271e4e8c70fc9217ebe105923ac105da9978"}, - {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a90fec23b4b05a09ad988e7a4f4e081711a90eb2a55b9c984d8b74597599180f"}, - {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8aafeedb6597a163a9c9727d8a8bd363a93277701b7bfd2749fbefee2396469e"}, - {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9957433c3a1b67bdd4c63717eaf174ebb749510d5ea612cd4e83f2d9142f3fc8"}, - {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d7a9165167269758145756db43a133608a531b1e5bb6a626b9ee24bc38a8f7"}, - {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dffaf740fe2e147fedcb6b561353a16243e654f7fe8e701b1b9db148242e1272"}, - {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ed79883b4328b7f0bd142733d99c8e6b22703e908ec63d930b06be3a0e7113"}, - {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cf903310a34e14651c9de056fcc12ce090560864d5a2bb0174b971685684e1d8"}, - {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46b0d5520dbcafea9a8645a8164658777686c5c524d381d983317d29687cce97"}, - {file = "pydantic_core-2.16.2-cp311-none-win32.whl", hash = "sha256:70651ff6e663428cea902dac297066d5c6e5423fda345a4ca62430575364d62b"}, - {file = "pydantic_core-2.16.2-cp311-none-win_amd64.whl", hash = "sha256:98dc6f4f2095fc7ad277782a7c2c88296badcad92316b5a6e530930b1d475ebc"}, - {file = "pydantic_core-2.16.2-cp311-none-win_arm64.whl", hash = "sha256:ef6113cd31411eaf9b39fc5a8848e71c72656fd418882488598758b2c8c6dfa0"}, - {file = "pydantic_core-2.16.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:88646cae28eb1dd5cd1e09605680c2b043b64d7481cdad7f5003ebef401a3039"}, - {file = "pydantic_core-2.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b883af50eaa6bb3299780651e5be921e88050ccf00e3e583b1e92020333304b"}, - {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bf26c2e2ea59d32807081ad51968133af3025c4ba5753e6a794683d2c91bf6e"}, - {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99af961d72ac731aae2a1b55ccbdae0733d816f8bfb97b41909e143de735f522"}, - {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02906e7306cb8c5901a1feb61f9ab5e5c690dbbeaa04d84c1b9ae2a01ebe9379"}, - {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5362d099c244a2d2f9659fb3c9db7c735f0004765bbe06b99be69fbd87c3f15"}, - {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac426704840877a285d03a445e162eb258924f014e2f074e209d9b4ff7bf380"}, - {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b94cbda27267423411c928208e89adddf2ea5dd5f74b9528513f0358bba019cb"}, - {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6db58c22ac6c81aeac33912fb1af0e930bc9774166cdd56eade913d5f2fff35e"}, - {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396fdf88b1b503c9c59c84a08b6833ec0c3b5ad1a83230252a9e17b7dfb4cffc"}, - {file = "pydantic_core-2.16.2-cp312-none-win32.whl", hash = "sha256:7c31669e0c8cc68400ef0c730c3a1e11317ba76b892deeefaf52dcb41d56ed5d"}, - {file = "pydantic_core-2.16.2-cp312-none-win_amd64.whl", hash = "sha256:a3b7352b48fbc8b446b75f3069124e87f599d25afb8baa96a550256c031bb890"}, - {file = "pydantic_core-2.16.2-cp312-none-win_arm64.whl", hash = "sha256:a9e523474998fb33f7c1a4d55f5504c908d57add624599e095c20fa575b8d943"}, - {file = "pydantic_core-2.16.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:ae34418b6b389d601b31153b84dce480351a352e0bb763684a1b993d6be30f17"}, - {file = "pydantic_core-2.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:732bd062c9e5d9582a30e8751461c1917dd1ccbdd6cafb032f02c86b20d2e7ec"}, - {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b52776a2e3230f4854907a1e0946eec04d41b1fc64069ee774876bbe0eab55"}, - {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ef551c053692b1e39e3f7950ce2296536728871110e7d75c4e7753fb30ca87f4"}, - {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ebb892ed8599b23fa8f1799e13a12c87a97a6c9d0f497525ce9858564c4575a4"}, - {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa6c8c582036275997a733427b88031a32ffa5dfc3124dc25a730658c47a572f"}, - {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ba0884a91f1aecce75202473ab138724aa4fb26d7707f2e1fa6c3e68c84fbf"}, - {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7924e54f7ce5d253d6160090ddc6df25ed2feea25bfb3339b424a9dd591688bc"}, - {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69a7b96b59322a81c2203be537957313b07dd333105b73db0b69212c7d867b4b"}, - {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7e6231aa5bdacda78e96ad7b07d0c312f34ba35d717115f4b4bff6cb87224f0f"}, - {file = "pydantic_core-2.16.2-cp38-none-win32.whl", hash = "sha256:41dac3b9fce187a25c6253ec79a3f9e2a7e761eb08690e90415069ea4a68ff7a"}, - {file = "pydantic_core-2.16.2-cp38-none-win_amd64.whl", hash = "sha256:f685dbc1fdadb1dcd5b5e51e0a378d4685a891b2ddaf8e2bba89bd3a7144e44a"}, - {file = "pydantic_core-2.16.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:55749f745ebf154c0d63d46c8c58594d8894b161928aa41adbb0709c1fe78b77"}, - {file = "pydantic_core-2.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b30b0dd58a4509c3bd7eefddf6338565c4905406aee0c6e4a5293841411a1286"}, - {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18de31781cdc7e7b28678df7c2d7882f9692ad060bc6ee3c94eb15a5d733f8f7"}, - {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5864b0242f74b9dd0b78fd39db1768bc3f00d1ffc14e596fd3e3f2ce43436a33"}, - {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8f9186ca45aee030dc8234118b9c0784ad91a0bb27fc4e7d9d6608a5e3d386c"}, - {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc6f6c9be0ab6da37bc77c2dda5f14b1d532d5dbef00311ee6e13357a418e646"}, - {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa057095f621dad24a1e906747179a69780ef45cc8f69e97463692adbcdae878"}, - {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ad84731a26bcfb299f9eab56c7932d46f9cad51c52768cace09e92a19e4cf55"}, - {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3b052c753c4babf2d1edc034c97851f867c87d6f3ea63a12e2700f159f5c41c3"}, - {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0f686549e32ccdb02ae6f25eee40cc33900910085de6aa3790effd391ae10c2"}, - {file = "pydantic_core-2.16.2-cp39-none-win32.whl", hash = "sha256:7afb844041e707ac9ad9acad2188a90bffce2c770e6dc2318be0c9916aef1469"}, - {file = "pydantic_core-2.16.2-cp39-none-win_amd64.whl", hash = "sha256:9da90d393a8227d717c19f5397688a38635afec89f2e2d7af0df037f3249c39a"}, - {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f60f920691a620b03082692c378661947d09415743e437a7478c309eb0e4f82"}, - {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:47924039e785a04d4a4fa49455e51b4eb3422d6eaacfde9fc9abf8fdef164e8a"}, - {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6294e76b0380bb7a61eb8a39273c40b20beb35e8c87ee101062834ced19c545"}, - {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe56851c3f1d6f5384b3051c536cc81b3a93a73faf931f404fef95217cf1e10d"}, - {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d776d30cde7e541b8180103c3f294ef7c1862fd45d81738d156d00551005784"}, - {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:72f7919af5de5ecfaf1eba47bf9a5d8aa089a3340277276e5636d16ee97614d7"}, - {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4bfcbde6e06c56b30668a0c872d75a7ef3025dc3c1823a13cf29a0e9b33f67e8"}, - {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ff7c97eb7a29aba230389a2661edf2e9e06ce616c7e35aa764879b6894a44b25"}, - {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9b5f13857da99325dcabe1cc4e9e6a3d7b2e2c726248ba5dd4be3e8e4a0b6d0e"}, - {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a7e41e3ada4cca5f22b478c08e973c930e5e6c7ba3588fb8e35f2398cdcc1545"}, - {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60eb8ceaa40a41540b9acae6ae7c1f0a67d233c40dc4359c256ad2ad85bdf5e5"}, - {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7beec26729d496a12fd23cf8da9944ee338c8b8a17035a560b585c36fe81af20"}, - {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22c5f022799f3cd6741e24f0443ead92ef42be93ffda0d29b2597208c94c3753"}, - {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:eca58e319f4fd6df004762419612122b2c7e7d95ffafc37e890252f869f3fb2a"}, - {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed957db4c33bc99895f3a1672eca7e80e8cda8bd1e29a80536b4ec2153fa9804"}, - {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:459c0d338cc55d099798618f714b21b7ece17eb1a87879f2da20a3ff4c7628e2"}, - {file = "pydantic_core-2.16.2.tar.gz", hash = "sha256:0ba503850d8b8dcc18391f10de896ae51d37fe5fe43dbfb6a35c5c5cad271a06"}, + {file = "pydantic_core-2.16.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:75b81e678d1c1ede0785c7f46690621e4c6e63ccd9192af1f0bd9d504bbb6bf4"}, + {file = "pydantic_core-2.16.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c865a7ee6f93783bd5d781af5a4c43dadc37053a5b42f7d18dc019f8c9d2bd1"}, + {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:162e498303d2b1c036b957a1278fa0899d02b2842f1ff901b6395104c5554a45"}, + {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f583bd01bbfbff4eaee0868e6fc607efdfcc2b03c1c766b06a707abbc856187"}, + {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b926dd38db1519ed3043a4de50214e0d600d404099c3392f098a7f9d75029ff8"}, + {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:716b542728d4c742353448765aa7cdaa519a7b82f9564130e2b3f6766018c9ec"}, + {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4ad7f7ee1a13d9cb49d8198cd7d7e3aa93e425f371a68235f784e99741561f"}, + {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd87f48924f360e5d1c5f770d6155ce0e7d83f7b4e10c2f9ec001c73cf475c99"}, + {file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0df446663464884297c793874573549229f9eca73b59360878f382a0fc085979"}, + {file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4df8a199d9f6afc5ae9a65f8f95ee52cae389a8c6b20163762bde0426275b7db"}, + {file = "pydantic_core-2.16.3-cp310-none-win32.whl", hash = "sha256:456855f57b413f077dff513a5a28ed838dbbb15082ba00f80750377eed23d132"}, + {file = "pydantic_core-2.16.3-cp310-none-win_amd64.whl", hash = "sha256:732da3243e1b8d3eab8c6ae23ae6a58548849d2e4a4e03a1924c8ddf71a387cb"}, + {file = "pydantic_core-2.16.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:519ae0312616026bf4cedc0fe459e982734f3ca82ee8c7246c19b650b60a5ee4"}, + {file = "pydantic_core-2.16.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b3992a322a5617ded0a9f23fd06dbc1e4bd7cf39bc4ccf344b10f80af58beacd"}, + {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d62da299c6ecb04df729e4b5c52dc0d53f4f8430b4492b93aa8de1f541c4aac"}, + {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2acca2be4bb2f2147ada8cac612f8a98fc09f41c89f87add7256ad27332c2fda"}, + {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b662180108c55dfbf1280d865b2d116633d436cfc0bba82323554873967b340"}, + {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7c6ed0dc9d8e65f24f5824291550139fe6f37fac03788d4580da0d33bc00c97"}, + {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b1bb0827f56654b4437955555dc3aeeebeddc47c2d7ed575477f082622c49e"}, + {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e56f8186d6210ac7ece503193ec84104da7ceb98f68ce18c07282fcc2452e76f"}, + {file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:936e5db01dd49476fa8f4383c259b8b1303d5dd5fb34c97de194560698cc2c5e"}, + {file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:33809aebac276089b78db106ee692bdc9044710e26f24a9a2eaa35a0f9fa70ba"}, + {file = "pydantic_core-2.16.3-cp311-none-win32.whl", hash = "sha256:ded1c35f15c9dea16ead9bffcde9bb5c7c031bff076355dc58dcb1cb436c4721"}, + {file = "pydantic_core-2.16.3-cp311-none-win_amd64.whl", hash = "sha256:d89ca19cdd0dd5f31606a9329e309d4fcbb3df860960acec32630297d61820df"}, + {file = "pydantic_core-2.16.3-cp311-none-win_arm64.whl", hash = "sha256:6162f8d2dc27ba21027f261e4fa26f8bcb3cf9784b7f9499466a311ac284b5b9"}, + {file = "pydantic_core-2.16.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f56ae86b60ea987ae8bcd6654a887238fd53d1384f9b222ac457070b7ac4cff"}, + {file = "pydantic_core-2.16.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9bd22a2a639e26171068f8ebb5400ce2c1bc7d17959f60a3b753ae13c632975"}, + {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4204e773b4b408062960e65468d5346bdfe139247ee5f1ca2a378983e11388a2"}, + {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f651dd19363c632f4abe3480a7c87a9773be27cfe1341aef06e8759599454120"}, + {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aaf09e615a0bf98d406657e0008e4a8701b11481840be7d31755dc9f97c44053"}, + {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8e47755d8152c1ab5b55928ab422a76e2e7b22b5ed8e90a7d584268dd49e9c6b"}, + {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:500960cb3a0543a724a81ba859da816e8cf01b0e6aaeedf2c3775d12ee49cade"}, + {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf6204fe865da605285c34cf1172879d0314ff267b1c35ff59de7154f35fdc2e"}, + {file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d33dd21f572545649f90c38c227cc8631268ba25c460b5569abebdd0ec5974ca"}, + {file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:49d5d58abd4b83fb8ce763be7794d09b2f50f10aa65c0f0c1696c677edeb7cbf"}, + {file = "pydantic_core-2.16.3-cp312-none-win32.whl", hash = "sha256:f53aace168a2a10582e570b7736cc5bef12cae9cf21775e3eafac597e8551fbe"}, + {file = "pydantic_core-2.16.3-cp312-none-win_amd64.whl", hash = "sha256:0d32576b1de5a30d9a97f300cc6a3f4694c428d956adbc7e6e2f9cad279e45ed"}, + {file = "pydantic_core-2.16.3-cp312-none-win_arm64.whl", hash = "sha256:ec08be75bb268473677edb83ba71e7e74b43c008e4a7b1907c6d57e940bf34b6"}, + {file = "pydantic_core-2.16.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:b1f6f5938d63c6139860f044e2538baeee6f0b251a1816e7adb6cbce106a1f01"}, + {file = "pydantic_core-2.16.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2a1ef6a36fdbf71538142ed604ad19b82f67b05749512e47f247a6ddd06afdc7"}, + {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704d35ecc7e9c31d48926150afada60401c55efa3b46cd1ded5a01bdffaf1d48"}, + {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d937653a696465677ed583124b94a4b2d79f5e30b2c46115a68e482c6a591c8a"}, + {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9803edf8e29bd825f43481f19c37f50d2b01899448273b3a7758441b512acf8"}, + {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:72282ad4892a9fb2da25defeac8c2e84352c108705c972db82ab121d15f14e6d"}, + {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f752826b5b8361193df55afcdf8ca6a57d0232653494ba473630a83ba50d8c9"}, + {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4384a8f68ddb31a0b0c3deae88765f5868a1b9148939c3f4121233314ad5532c"}, + {file = "pydantic_core-2.16.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4b2bf78342c40b3dc830880106f54328928ff03e357935ad26c7128bbd66ce8"}, + {file = "pydantic_core-2.16.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:13dcc4802961b5f843a9385fc821a0b0135e8c07fc3d9949fd49627c1a5e6ae5"}, + {file = "pydantic_core-2.16.3-cp38-none-win32.whl", hash = "sha256:e3e70c94a0c3841e6aa831edab1619ad5c511199be94d0c11ba75fe06efe107a"}, + {file = "pydantic_core-2.16.3-cp38-none-win_amd64.whl", hash = "sha256:ecdf6bf5f578615f2e985a5e1f6572e23aa632c4bd1dc67f8f406d445ac115ed"}, + {file = "pydantic_core-2.16.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:bda1ee3e08252b8d41fa5537413ffdddd58fa73107171a126d3b9ff001b9b820"}, + {file = "pydantic_core-2.16.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:21b888c973e4f26b7a96491c0965a8a312e13be108022ee510248fe379a5fa23"}, + {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be0ec334369316fa73448cc8c982c01e5d2a81c95969d58b8f6e272884df0074"}, + {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5b6079cc452a7c53dd378c6f881ac528246b3ac9aae0f8eef98498a75657805"}, + {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ee8d5f878dccb6d499ba4d30d757111847b6849ae07acdd1205fffa1fc1253c"}, + {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7233d65d9d651242a68801159763d09e9ec96e8a158dbf118dc090cd77a104c9"}, + {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6119dc90483a5cb50a1306adb8d52c66e447da88ea44f323e0ae1a5fcb14256"}, + {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:578114bc803a4c1ff9946d977c221e4376620a46cf78da267d946397dc9514a8"}, + {file = "pydantic_core-2.16.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d8f99b147ff3fcf6b3cc60cb0c39ea443884d5559a30b1481e92495f2310ff2b"}, + {file = "pydantic_core-2.16.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4ac6b4ce1e7283d715c4b729d8f9dab9627586dafce81d9eaa009dd7f25dd972"}, + {file = "pydantic_core-2.16.3-cp39-none-win32.whl", hash = "sha256:e7774b570e61cb998490c5235740d475413a1f6de823169b4cf94e2fe9e9f6b2"}, + {file = "pydantic_core-2.16.3-cp39-none-win_amd64.whl", hash = "sha256:9091632a25b8b87b9a605ec0e61f241c456e9248bfdcf7abdf344fdb169c81cf"}, + {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:36fa178aacbc277bc6b62a2c3da95226520da4f4e9e206fdf076484363895d2c"}, + {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:dcca5d2bf65c6fb591fff92da03f94cd4f315972f97c21975398bd4bd046854a"}, + {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a72fb9963cba4cd5793854fd12f4cfee731e86df140f59ff52a49b3552db241"}, + {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60cc1a081f80a2105a59385b92d82278b15d80ebb3adb200542ae165cd7d183"}, + {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cbcc558401de90a746d02ef330c528f2e668c83350f045833543cd57ecead1ad"}, + {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fee427241c2d9fb7192b658190f9f5fd6dfe41e02f3c1489d2ec1e6a5ab1e04a"}, + {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f4cb85f693044e0f71f394ff76c98ddc1bc0953e48c061725e540396d5c8a2e1"}, + {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b29eeb887aa931c2fcef5aa515d9d176d25006794610c264ddc114c053bf96fe"}, + {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a425479ee40ff021f8216c9d07a6a3b54b31c8267c6e17aa88b70d7ebd0e5e5b"}, + {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5c5cbc703168d1b7a838668998308018a2718c2130595e8e190220238addc96f"}, + {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99b6add4c0b39a513d323d3b93bc173dac663c27b99860dd5bf491b240d26137"}, + {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f76ee558751746d6a38f89d60b6228fa174e5172d143886af0f85aa306fd89"}, + {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:00ee1c97b5364b84cb0bd82e9bbf645d5e2871fb8c58059d158412fee2d33d8a"}, + {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:287073c66748f624be4cef893ef9174e3eb88fe0b8a78dc22e88eca4bc357ca6"}, + {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed25e1835c00a332cb10c683cd39da96a719ab1dfc08427d476bce41b92531fc"}, + {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:86b3d0033580bd6bbe07590152007275bd7af95f98eaa5bd36f3da219dcd93da"}, + {file = "pydantic_core-2.16.3.tar.gz", hash = "sha256:1cac689f80a3abab2d3c0048b29eea5751114054f032a941a32de4c852c59cad"}, ] [package.dependencies] @@ -1116,13 +1117,13 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pytest" -version = "8.0.1" +version = "8.0.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.0.1-py3-none-any.whl", hash = "sha256:3e4f16fe1c0a9dc9d9389161c127c3edc5d810c38d6793042fb81d9f48a59fca"}, - {file = "pytest-8.0.1.tar.gz", hash = "sha256:267f6563751877d772019b13aacbe4e860d73fe8f651f28112e9ac37de7513ae"}, + {file = "pytest-8.0.2-py3-none-any.whl", hash = "sha256:edfaaef32ce5172d5466b5127b42e0d6d35ebbe4453f0e3505d96afd93f6b096"}, + {file = "pytest-8.0.2.tar.gz", hash = "sha256:d4051d623a2e0b7e51960ba963193b09ce6daeb9759a451844a21e4ddedfc1bd"}, ] [package.dependencies] @@ -1188,13 +1189,13 @@ testing = ["filelock"] [[package]] name = "python-dateutil" -version = "2.8.2" +version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, ] [package.dependencies] @@ -1472,13 +1473,13 @@ types-pyasn1 = "*" [[package]] name = "types-pyasn1" -version = "0.5.0.20240205" +version = "0.5.0.20240301" description = "Typing stubs for pyasn1" optional = false python-versions = ">=3.8" files = [ - {file = "types-pyasn1-0.5.0.20240205.tar.gz", hash = "sha256:b42b4e967d2ad780bde2ce47d7627a00dfb11b37a451f3e73b264ec6e97e50c7"}, - {file = "types_pyasn1-0.5.0.20240205-py3-none-any.whl", hash = "sha256:40b205856c6a01d2ce6fa47a0be2a238a5556b04f47a2875a2aba680a65a959f"}, + {file = "types-pyasn1-0.5.0.20240301.tar.gz", hash = "sha256:da328f5771d54a2016863270b281047f9cc38e39f65a297ba9f987d5de3403f1"}, + {file = "types_pyasn1-0.5.0.20240301-py3-none-any.whl", hash = "sha256:d9989899184bbd6e2adf6f812c8f49c48197fceea251a6fb13666dae3203f80d"}, ] [[package]] @@ -1508,24 +1509,24 @@ urllib3 = ">=2" [[package]] name = "types-setuptools" -version = "69.1.0.20240217" +version = "69.1.0.20240302" description = "Typing stubs for setuptools" optional = false python-versions = ">=3.8" files = [ - {file = "types-setuptools-69.1.0.20240217.tar.gz", hash = "sha256:243fecc8850b6f7fbfa84bab18ec93407046a4e91130056fd5a7caef971aaff9"}, - {file = "types_setuptools-69.1.0.20240217-py3-none-any.whl", hash = "sha256:8b60e14a652b48bda292801c5a0c1251c190ad587c295f7839e901634913bb96"}, + {file = "types-setuptools-69.1.0.20240302.tar.gz", hash = "sha256:ed5462cf8470831d1bdbf300e1eeea876040643bfc40b785109a5857fa7d3c3f"}, + {file = "types_setuptools-69.1.0.20240302-py3-none-any.whl", hash = "sha256:99c1053920a6fa542b734c9ad61849c3993062f80963a4034771626528e192a0"}, ] [[package]] name = "typing-extensions" -version = "4.9.0" +version = "4.10.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, + {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, ] [[package]] @@ -1570,4 +1571,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "c2a795d0d018d91ec0cce8f1c00ba3a46c2e5b29ec3f8978e9d0363ed49ce71d" +content-hash = "253993b04dfe967bfa65974cf26b00e8fe9853ddc45c5790bb665242cc161080" diff --git a/pyproject.toml b/pyproject.toml index 9374344c..684c8fc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mex-common" -version = "0.20.0" +version = "0.21.0" description = "Common library for MEx python projects." authors = ["RKI MEx Team "] readme = "README.md" @@ -27,11 +27,11 @@ ipdb = "^0.13.13" mex-model = { git = "https://github.com/robert-koch-institut/mex-model.git", rev = "2.2.0" } mypy = "^1.8.0" pandas-stubs = "^2.1.4" -pytest = "^8.0.0" +pytest = "^8.0.2" pytest-cov = "^4.1.0" pytest-random-order = "^1.1.1" pytest-xdist = "^3.5.0" -ruff = "^0.2.1" +ruff = "^0.2.2" sphinx = "^7.2.6" types-ldap3 = "^2.9.13" types-pytz = "^2024.1.0" diff --git a/tests/connector/test_http.py b/tests/connector/test_http.py index f463b0b8..5716c469 100644 --- a/tests/connector/test_http.py +++ b/tests/connector/test_http.py @@ -138,5 +138,5 @@ def set_mocked_session(self: DummyConnector) -> None: "https://www.example.com/things", None, timeout=DummyConnector.TIMEOUT, - **expected_kwargs + **expected_kwargs, ) diff --git a/tests/identity/test_memory.py b/tests/identity/test_memory.py index 79b3e3d6..4f7e1dcc 100644 --- a/tests/identity/test_memory.py +++ b/tests/identity/test_memory.py @@ -1,11 +1,11 @@ from mex.common.identity.memory import MemoryIdentityProvider from mex.common.testing import Joker -from mex.common.types import PrimarySourceID +from mex.common.types import MergedPrimarySourceIdentifier def test_assign() -> None: provider = MemoryIdentityProvider.get() - had_primary_source = PrimarySourceID("00000000000000") + had_primary_source = MergedPrimarySourceIdentifier("00000000000000") identifier_in_primary_source = "thing-1" new_identity = provider.assign(had_primary_source, identifier_in_primary_source) @@ -29,7 +29,7 @@ def test_assign() -> None: def test_fetch_empty() -> None: provider = MemoryIdentityProvider.get() - had_primary_source = PrimarySourceID("00000000000000") + had_primary_source = MergedPrimarySourceIdentifier("00000000000000") identifier_in_primary_source = "thing-1" # fetch something that the provider does not know @@ -43,7 +43,7 @@ def test_fetch_empty() -> None: def test_fetch_found() -> None: provider = MemoryIdentityProvider.get() - had_primary_source = PrimarySourceID("00000000000000") + had_primary_source = MergedPrimarySourceIdentifier("00000000000000") identifier_in_primary_source = "thing-1" # assign this identity first diff --git a/tests/models/test_base.py b/tests/models/test_base.py index 3f7877b0..88e4416c 100644 --- a/tests/models/test_base.py +++ b/tests/models/test_base.py @@ -1,10 +1,11 @@ from enum import Enum -from typing import Any, Optional, Union +from typing import Annotated, Any, Literal, Optional, Union import pytest -from pydantic import ValidationError +from pydantic import Field, ValidationError -from mex.common.models import BaseModel +from mex.common.models import BaseModel, MergedItem +from mex.common.types import Identifier class ComplexDummyModel(BaseModel): @@ -82,18 +83,30 @@ class Shelter(Pet): Shelter(inhabitants="foo") # type: ignore -class DummyModel(BaseModel): +class DummyBaseModel(BaseModel): foo: Optional[str] = None -def test_checksum() -> None: - model_1 = DummyModel() - assert model_1.checksum() == "6a48475b6851bc444c39abec23f8520e" +def test_base_model_checksum() -> None: + model_1 = DummyBaseModel() + assert model_1.checksum() == "69d67f58c6948849283e78d7b3f1a51e" - model_2 = DummyModel(foo="bar") + model_2 = DummyBaseModel(foo="bar") assert model_1.checksum() != model_2.checksum() -def test_model_str() -> None: - model = DummyModel(foo="bar") - assert str(model) == "DummyModel: 68008f92758ef95dd4de3319183c3fef" +def test_base_model_str() -> None: + model = DummyBaseModel(foo="bar") + assert str(model) == "DummyBaseModel: ab794a793aad8fa45b0f85ac05ee2126" + + +def test_mex_model_str() -> None: + class MergedDummy(MergedItem): + entityType: Annotated[ + Literal["MergedDummy"], Field(alias="$type", frozen=True) + ] = "MergedDummy" + identifier: Identifier + + model = MergedDummy(identifier=Identifier.generate(seed=99)) + + assert str(model) == "MergedDummy: bFQoRhcVH5DHV1" diff --git a/tests/models/test_extracted_data.py b/tests/models/test_extracted_data.py index 37120fa8..826fa2ed 100644 --- a/tests/models/test_extracted_data.py +++ b/tests/models/test_extracted_data.py @@ -1,7 +1,8 @@ from enum import Enum +from typing import Annotated, Literal import pytest -from pydantic import ValidationError +from pydantic import Field, ValidationError from mex.common.identity import get_provider from mex.common.models import ( @@ -10,7 +11,7 @@ BaseModel, ExtractedData, ) -from mex.common.types import Identifier, PrimarySourceID +from mex.common.types import Identifier, MergedPrimarySourceIdentifier class Animal(Enum): @@ -20,15 +21,29 @@ class Animal(Enum): DOG = "dog" +class ExtractedThingIdentifier(Identifier): + """Identifier for extracted things.""" + + +class MergedThingIdentifier(Identifier): + """Identifier for merged thing.""" + + class BaseThing(BaseModel): - """Dummy model defining a generic stableTargetId.""" + """Dummy model defining some arbitrary field.""" - stableTargetId: Identifier + someField: str = "someDefault" class ExtractedThing(BaseThing, ExtractedData): """Extracted version of a dummy thing model.""" + entityType: Annotated[ + Literal["ExtractedThing"], Field(alias="$type", frozen=True) + ] = "ExtractedThing" + identifier: Annotated[ExtractedThingIdentifier, Field(frozen=True)] + stableTargetId: MergedThingIdentifier + def test_extracted_data_requires_dict_for_construction() -> None: with pytest.raises(ValidationError, match="Input should be a valid dictionary"): @@ -38,7 +53,7 @@ def test_extracted_data_requires_dict_for_construction() -> None: def test_extracted_data_requires_identifier_in_primary_source() -> None: with pytest.raises(ValidationError, match="identifierInPrimarySource"): ExtractedThing( - hadPrimarySource=PrimarySourceID.generate(seed=1), + hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=1), ) @@ -53,19 +68,19 @@ def test_extracted_data_does_not_allow_setting_identifier() -> None: with pytest.raises(ValidationError, match="Identifier cannot be set manually"): ExtractedThing( identifier=Identifier.generate(seed=0), - hadPrimarySource=PrimarySourceID.generate(seed=1), + hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=1), identifierInPrimarySource="0", ) def test_extracted_data_does_allow_setting_preexisting_identifiers() -> None: thing_1 = ExtractedThing( - hadPrimarySource=PrimarySourceID.generate(seed=1), + hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=1), identifierInPrimarySource="0", ) thing_2 = ExtractedThing( identifier=thing_1.identifier, - hadPrimarySource=PrimarySourceID.generate(seed=1), + hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=1), identifierInPrimarySource="0", ) @@ -78,14 +93,14 @@ def test_extracted_data_does_not_allow_changing_mex_stable_target_id() -> None: identifier=MEX_PRIMARY_SOURCE_STABLE_TARGET_ID, hadPrimarySource=MEX_PRIMARY_SOURCE_STABLE_TARGET_ID, identifierInPrimarySource=MEX_PRIMARY_SOURCE_IDENTIFIER_IN_PRIMARY_SOURCE, - stableTargetId=PrimarySourceID.generate(seed=12345), + stableTargetId=MergedPrimarySourceIdentifier.generate(seed=12345), ) def test_extracted_data_stores_identity_in_provider() -> None: thing = ExtractedThing( identifierInPrimarySource="12345", - hadPrimarySource=PrimarySourceID.generate(seed=12345), + hadPrimarySource=MergedPrimarySourceIdentifier.generate(seed=12345), ) provider = get_provider() diff --git a/tests/models/test_filter.py b/tests/models/test_filter.py index 09630b5c..a51bcf28 100644 --- a/tests/models/test_filter.py +++ b/tests/models/test_filter.py @@ -1,20 +1,22 @@ +from typing import Annotated + from pydantic import Field from mex.common.models import ExtractedData from mex.common.models.filter import ( generate_entity_filter_schema, ) -from mex.common.types import OrganizationalUnitID +from mex.common.types import MergedOrganizationalUnitIdentifier from mex.common.types.email import Email class DummyClass(ExtractedData): - dummy_identifier: OrganizationalUnitID | None = None # not required + dummy_identifier: MergedOrganizationalUnitIdentifier | None = None # not required dummy_str: str dummy_int: int | None = None # not required dummy_email: Email dummy_list: list[str] = [] # not required - dummy_min_length_list: list[str] = Field(min_length=1) + dummy_min_length_list: Annotated[list[str], Field(min_length=1)] def test_entity_filter_schema() -> None: diff --git a/tests/models/test_mapping.py b/tests/models/test_mapping.py index 5e5f77f4..07a3b321 100644 --- a/tests/models/test_mapping.py +++ b/tests/models/test_mapping.py @@ -1,20 +1,34 @@ +from typing import Annotated, Literal + from pydantic import Field from mex.common.models import ExtractedData from mex.common.models.mapping import ( generate_mapping_schema_for_mex_class, ) -from mex.common.types import OrganizationalUnitID -from mex.common.types.email import Email +from mex.common.types import Email, Identifier, MergedOrganizationalUnitIdentifier + + +class ExtractedDummyIdentifier(Identifier): + pass + + +class MergedDummyIdentifier(Identifier): + pass class ExtractedDummyClass(ExtractedData): - dummy_identifier: OrganizationalUnitID | None = None # not required + entityType: Annotated[ + Literal["ExtractedDummyClass"], Field(alias="$type", frozen=True) + ] = "ExtractedDummyClass" + identifier: Annotated[ExtractedDummyIdentifier, Field(frozen=True)] + stableTargetId: MergedDummyIdentifier + dummy_unit: MergedOrganizationalUnitIdentifier | None = None # not required dummy_str: str dummy_int: int | None = None # not required dummy_email: Email dummy_list: list[str] = [] # not required - dummy_min_length_list: list[str] = Field(min_length=1) + dummy_min_length_list: Annotated[list[str], Field(min_length=1)] def test_generate_mapping_schema() -> None: @@ -24,17 +38,16 @@ def test_generate_mapping_schema() -> None: "$defs": { "Dummy_emailFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping schema " - "for " - "Dummy_email " - "fields in " - "primary " - "source.", + "description": "Mapping schema for Dummy_email fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -44,21 +57,17 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { "items": {"$ref": "#/$defs/Dummy_emailMappingRule"}, "minItems": 1, "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], "title": "Dummy_emailFieldsInPrimarySource", @@ -66,7 +75,7 @@ def test_generate_mapping_schema() -> None: }, "Dummy_emailMappingRule": { "additionalProperties": False, - "description": "Mapping rule schema of " "field Dummy_email.", + "description": "Mapping rule schema of field Dummy_email.", "properties": { "forValues": { "anyOf": [ @@ -76,21 +85,13 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, - "rule": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Rule", - }, "setValues": { "anyOf": [ { "items": { "examples": ["info@rki.de"], "format": "email", - "pattern": "^[^@ " - "\\t\\r\\n]+@[^@ " - "\\t\\r\\n]+\\.[^@ " - "\\t\\r\\n]+$", + "pattern": "^[^@ \\t\\r\\n]+@[^@ \\t\\r\\n]+\\.[^@ \\t\\r\\n]+$", "title": "Email", "type": "string", }, @@ -101,24 +102,27 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, "title": "Dummy_emailMappingRule", "type": "object", }, - "Dummy_identifierFieldsInPrimarySource": { + "Dummy_intFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping " - "schema " - "for " - "Dummy_identifier " - "fields in " - "primary " - "source.", + "description": "Mapping schema for Dummy_int fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -128,29 +132,25 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { - "items": {"$ref": "#/$defs/Dummy_identifierMappingRule"}, + "items": {"$ref": "#/$defs/Dummy_intMappingRule"}, "minItems": 1, "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], - "title": "Dummy_identifierFieldsInPrimarySource", + "title": "Dummy_intFieldsInPrimarySource", "type": "object", }, - "Dummy_identifierMappingRule": { + "Dummy_intMappingRule": { "additionalProperties": False, - "description": "Mapping rule schema " "of field " "Dummy_identifier.", + "description": "Mapping rule schema of field Dummy_int.", "properties": { "forValues": { "anyOf": [ @@ -160,23 +160,11 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, - "rule": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Rule", - }, "setValues": { "anyOf": [ { "items": { - "anyOf": [ - { - "pattern": "^[a-zA-Z0-9]{14,22}$", - "title": "OrganizationalUnitID", - "type": "string", - }, - {"type": "null"}, - ] + "anyOf": [{"type": "integer"}, {"type": "null"}] }, "type": "array", }, @@ -185,21 +173,27 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, - "title": "Dummy_identifierMappingRule", + "title": "Dummy_intMappingRule", "type": "object", }, - "Dummy_intFieldsInPrimarySource": { + "Dummy_listFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping schema " - "for Dummy_int " - "fields in " - "primary source.", + "description": "Mapping schema for Dummy_list fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -209,29 +203,25 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { - "items": {"$ref": "#/$defs/Dummy_intMappingRule"}, + "items": {"$ref": "#/$defs/Dummy_listMappingRule"}, "minItems": 1, "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], - "title": "Dummy_intFieldsInPrimarySource", + "title": "Dummy_listFieldsInPrimarySource", "type": "object", }, - "Dummy_intMappingRule": { + "Dummy_listMappingRule": { "additionalProperties": False, - "description": "Mapping rule schema of " "field Dummy_int.", + "description": "Mapping rule schema of field Dummy_list.", "properties": { "forValues": { "anyOf": [ @@ -241,39 +231,35 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, - "rule": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Rule", - }, "setValues": { "anyOf": [ - { - "items": { - "anyOf": [{"type": "integer"}, {"type": "null"}] - }, - "type": "array", - }, + {"items": {"type": "string"}, "type": "array"}, {"type": "null"}, ], "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, - "title": "Dummy_intMappingRule", + "title": "Dummy_listMappingRule", "type": "object", }, - "Dummy_listFieldsInPrimarySource": { + "Dummy_min_length_listFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping schema " - "for Dummy_list " - "fields in " - "primary source.", + "description": "Mapping schema for Dummy_min_length_list fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -283,29 +269,25 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { - "items": {"$ref": "#/$defs/Dummy_listMappingRule"}, + "items": {"$ref": "#/$defs/Dummy_min_length_listMappingRule"}, "minItems": 1, "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], - "title": "Dummy_listFieldsInPrimarySource", + "title": "Dummy_min_length_listFieldsInPrimarySource", "type": "object", }, - "Dummy_listMappingRule": { + "Dummy_min_length_listMappingRule": { "additionalProperties": False, - "description": "Mapping rule schema of " "field Dummy_list.", + "description": "Mapping rule schema of field Dummy_min_length_list.", "properties": { "forValues": { "anyOf": [ @@ -315,11 +297,6 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, - "rule": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Rule", - }, "setValues": { "anyOf": [ {"items": {"type": "string"}, "type": "array"}, @@ -328,25 +305,27 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, - "title": "Dummy_listMappingRule", + "title": "Dummy_min_length_listMappingRule", "type": "object", }, - "Dummy_min_length_listFieldsInPrimarySource": { + "Dummy_strFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping " - "schema " - "for " - "Dummy_min_length_list " - "fields " - "in " - "primary " - "source.", + "description": "Mapping schema for Dummy_str fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -356,32 +335,25 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { - "items": {"$ref": "#/$defs/Dummy_min_length_listMappingRule"}, + "items": {"$ref": "#/$defs/Dummy_strMappingRule"}, "minItems": 1, "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], - "title": "Dummy_min_length_listFieldsInPrimarySource", + "title": "Dummy_strFieldsInPrimarySource", "type": "object", }, - "Dummy_min_length_listMappingRule": { + "Dummy_strMappingRule": { "additionalProperties": False, - "description": "Mapping rule " - "schema of " - "field " - "Dummy_min_length_list.", + "description": "Mapping rule schema of field Dummy_str.", "properties": { "forValues": { "anyOf": [ @@ -391,11 +363,6 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, - "rule": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Rule", - }, "setValues": { "anyOf": [ {"items": {"type": "string"}, "type": "array"}, @@ -404,21 +371,27 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, - "title": "Dummy_min_length_listMappingRule", + "title": "Dummy_strMappingRule", "type": "object", }, - "Dummy_strFieldsInPrimarySource": { + "Dummy_unitFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping schema " - "for Dummy_str " - "fields in " - "primary source.", + "description": "Mapping schema for Dummy_unit fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -428,29 +401,25 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { - "items": {"$ref": "#/$defs/Dummy_strMappingRule"}, + "items": {"$ref": "#/$defs/Dummy_unitMappingRule"}, "minItems": 1, "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], - "title": "Dummy_strFieldsInPrimarySource", + "title": "Dummy_unitFieldsInPrimarySource", "type": "object", }, - "Dummy_strMappingRule": { + "Dummy_unitMappingRule": { "additionalProperties": False, - "description": "Mapping rule schema of " "field Dummy_str.", + "description": "Mapping rule schema of field Dummy_unit.", "properties": { "forValues": { "anyOf": [ @@ -460,37 +429,47 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, - "rule": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Rule", - }, "setValues": { "anyOf": [ - {"items": {"type": "string"}, "type": "array"}, + { + "items": { + "anyOf": [ + { + "pattern": "^[a-zA-Z0-9]{14,22}$", + "title": "MergedOrganizationalUnitIdentifier", + "type": "string", + }, + {"type": "null"}, + ] + }, + "type": "array", + }, {"type": "null"}, ], "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, - "title": "Dummy_strMappingRule", + "title": "Dummy_unitMappingRule", "type": "object", }, "HadprimarysourceFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping " - "schema " - "for " - "Hadprimarysource " - "fields in " - "primary " - "source.", + "description": "Mapping schema for Hadprimarysource fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -500,21 +479,17 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { "items": {"$ref": "#/$defs/HadprimarysourceMappingRule"}, "minItems": 1, "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], "title": "HadprimarysourceFieldsInPrimarySource", @@ -522,7 +497,7 @@ def test_generate_mapping_schema() -> None: }, "HadprimarysourceMappingRule": { "additionalProperties": False, - "description": "Mapping rule schema " "of field " "Hadprimarysource.", + "description": "Mapping rule schema of field Hadprimarysource.", "properties": { "forValues": { "anyOf": [ @@ -532,17 +507,12 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, - "rule": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Rule", - }, "setValues": { "anyOf": [ { "items": { "pattern": "^[a-zA-Z0-9]{14,22}$", - "title": "PrimarySourceID", + "title": "MergedPrimarySourceIdentifier", "type": "string", }, "type": "array", @@ -552,21 +522,27 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, "title": "HadprimarysourceMappingRule", "type": "object", }, "IdentifierFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping schema " - "for Identifier " - "fields in " - "primary source.", + "description": "Mapping schema for Identifier fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -576,21 +552,17 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { "items": {"$ref": "#/$defs/IdentifierMappingRule"}, "minItems": 1, "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], "title": "IdentifierFieldsInPrimarySource", @@ -598,7 +570,7 @@ def test_generate_mapping_schema() -> None: }, "IdentifierMappingRule": { "additionalProperties": False, - "description": "Mapping rule schema of " "field Identifier.", + "description": "Mapping rule schema of field Identifier.", "properties": { "forValues": { "anyOf": [ @@ -608,17 +580,12 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, - "rule": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Rule", - }, "setValues": { "anyOf": [ { "items": { "pattern": "^[a-zA-Z0-9]{14,22}$", - "title": "Identifier", + "title": "ExtractedDummyIdentifier", "type": "string", }, "type": "array", @@ -628,25 +595,27 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, "title": "IdentifierMappingRule", "type": "object", }, "IdentifierinprimarysourceFieldsInPrimarySource": { "additionalProperties": False, - "description": "Mapping " - "schema " - "for " - "Identifierinprimarysource " - "fields " - "in " - "primary " - "source.", + "description": "Mapping schema for Identifierinprimarysource fields in primary source.", "properties": { - "comment": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, - "title": "Comment", + "title": "Locationinprimarysource", }, "examplesInPrimarySource": { "anyOf": [ @@ -656,15 +625,6 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Examplesinprimarysource", }, - "fieldInPrimarySource": { - "title": "Fieldinprimarysource", - "type": "string", - }, - "locationInPrimarySource": { - "anyOf": [{"type": "string"}, {"type": "null"}], - "default": None, - "title": "Locationinprimarysource", - }, "mappingRules": { "items": { "$ref": "#/$defs/IdentifierinprimarysourceMappingRule" @@ -673,6 +633,11 @@ def test_generate_mapping_schema() -> None: "title": "Mappingrules", "type": "array", }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, }, "required": ["fieldInPrimarySource", "mappingRules"], "title": "IdentifierinprimarysourceFieldsInPrimarySource", @@ -680,11 +645,7 @@ def test_generate_mapping_schema() -> None: }, "IdentifierinprimarysourceMappingRule": { "additionalProperties": False, - "description": "Mapping " - "rule " - "schema of " - "field " - "Identifierinprimarysource.", + "description": "Mapping rule schema of field Identifierinprimarysource.", "properties": { "forValues": { "anyOf": [ @@ -694,36 +655,130 @@ def test_generate_mapping_schema() -> None: "default": None, "title": "Forvalues", }, + "setValues": { + "anyOf": [ + {"items": {"type": "string"}, "type": "array"}, + {"type": "null"}, + ], + "default": None, + "title": "Setvalues", + }, "rule": { "anyOf": [{"type": "string"}, {"type": "null"}], "default": None, "title": "Rule", }, - "setValues": { + }, + "title": "IdentifierinprimarysourceMappingRule", + "type": "object", + }, + "StabletargetidFieldsInPrimarySource": { + "additionalProperties": False, + "description": "Mapping schema for Stabletargetid fields in primary source.", + "properties": { + "fieldInPrimarySource": { + "title": "Fieldinprimarysource", + "type": "string", + }, + "locationInPrimarySource": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Locationinprimarysource", + }, + "examplesInPrimarySource": { "anyOf": [ {"items": {"type": "string"}, "type": "array"}, {"type": "null"}, ], "default": None, + "title": "Examplesinprimarysource", + }, + "mappingRules": { + "items": {"$ref": "#/$defs/StabletargetidMappingRule"}, + "minItems": 1, + "title": "Mappingrules", + "type": "array", + }, + "comment": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Comment", + }, + }, + "required": ["fieldInPrimarySource", "mappingRules"], + "title": "StabletargetidFieldsInPrimarySource", + "type": "object", + }, + "StabletargetidMappingRule": { + "additionalProperties": False, + "description": "Mapping rule schema of field Stabletargetid.", + "properties": { + "forValues": { + "anyOf": [ + {"items": {"type": "string"}, "type": "array"}, + {"type": "null"}, + ], + "default": None, + "title": "Forvalues", + }, + "setValues": { + "anyOf": [ + { + "items": { + "pattern": "^[a-zA-Z0-9]{14,22}$", + "title": "MergedDummyIdentifier", + "type": "string", + }, + "type": "array", + }, + {"type": "null"}, + ], + "default": None, "title": "Setvalues", }, + "rule": { + "anyOf": [{"type": "string"}, {"type": "null"}], + "default": None, + "title": "Rule", + }, }, - "title": "IdentifierinprimarysourceMappingRule", + "title": "StabletargetidMappingRule", "type": "object", }, }, - "description": "Schema for mapping the properties of the entity type " - "ExtractedDummyClass.", + "description": "Schema for mapping the properties of the entity type ExtractedDummyClass.", "properties": { - "dummy_email": { - "items": {"$ref": "#/$defs/Dummy_emailFieldsInPrimarySource"}, - "title": "Dummy Email", + "hadPrimarySource": { + "items": {"$ref": "#/$defs/HadprimarysourceFieldsInPrimarySource"}, + "title": "Hadprimarysource", + "type": "array", + }, + "identifierInPrimarySource": { + "items": { + "$ref": "#/$defs/IdentifierinprimarysourceFieldsInPrimarySource" + }, + "title": "Identifierinprimarysource", + "type": "array", + }, + "identifier": { + "items": {"$ref": "#/$defs/IdentifierFieldsInPrimarySource"}, + "title": "Identifier", "type": "array", }, - "dummy_identifier": { + "stableTargetId": { + "items": {"$ref": "#/$defs/StabletargetidFieldsInPrimarySource"}, + "title": "Stabletargetid", + "type": "array", + }, + "dummy_unit": { "default": None, - "items": {"$ref": "#/$defs/Dummy_identifierFieldsInPrimarySource"}, - "title": "Dummy Identifier", + "items": {"$ref": "#/$defs/Dummy_unitFieldsInPrimarySource"}, + "title": "Dummy Unit", + "type": "array", + }, + "dummy_str": { + "items": {"$ref": "#/$defs/Dummy_strFieldsInPrimarySource"}, + "title": "Dummy Str", "type": "array", }, "dummy_int": { @@ -732,6 +787,11 @@ def test_generate_mapping_schema() -> None: "title": "Dummy Int", "type": "array", }, + "dummy_email": { + "items": {"$ref": "#/$defs/Dummy_emailFieldsInPrimarySource"}, + "title": "Dummy Email", + "type": "array", + }, "dummy_list": { "default": None, "items": {"$ref": "#/$defs/Dummy_listFieldsInPrimarySource"}, @@ -743,33 +803,12 @@ def test_generate_mapping_schema() -> None: "title": "Dummy Min Length List", "type": "array", }, - "dummy_str": { - "items": {"$ref": "#/$defs/Dummy_strFieldsInPrimarySource"}, - "title": "Dummy Str", - "type": "array", - }, - "hadPrimarySource": { - "items": {"$ref": "#/$defs/HadprimarysourceFieldsInPrimarySource"}, - "title": "Hadprimarysource", - "type": "array", - }, - "identifier": { - "items": {"$ref": "#/$defs/IdentifierFieldsInPrimarySource"}, - "title": "Identifier", - "type": "array", - }, - "identifierInPrimarySource": { - "items": { - "$ref": "#/$defs/IdentifierinprimarysourceFieldsInPrimarySource" - }, - "title": "Identifierinprimarysource", - "type": "array", - }, }, "required": [ - "identifier", "hadPrimarySource", "identifierInPrimarySource", + "identifier", + "stableTargetId", "dummy_str", "dummy_email", "dummy_min_length_list", diff --git a/tests/models/test_merged_item.py b/tests/models/test_merged_item.py deleted file mode 100644 index ff8f0775..00000000 --- a/tests/models/test_merged_item.py +++ /dev/null @@ -1,7 +0,0 @@ -from mex.common.models import MergedItem -from mex.common.types import Identifier - - -def test_merged_item_str() -> None: - item = MergedItem(identifier=Identifier.generate(seed=99)) - assert str(item) == "MergedItem: bFQoRhcVH5DHV1" diff --git a/tests/models/test_model_schemas.py b/tests/models/test_model_schemas.py index 3dc61c2f..5c98b974 100644 --- a/tests/models/test_model_schemas.py +++ b/tests/models/test_model_schemas.py @@ -139,7 +139,11 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None: if field in ("identifier", "stableTargetId"): obj["$ref"] = "/schema/fields/identifier" else: - obj["$ref"] = f"/schema/entities/{title.removesuffix('ID')}#/identifier" + obj["$ref"] = "/schema/entities/{}#/identifier".format( + title.removesuffix("Identifier") + .removeprefix("Merged") + .removeprefix("Extracted") + ) # align concept/enum annotations # (spec uses `useScheme` to specify vocabularies and models use enums) diff --git a/tests/sinks/test_ndjson.py b/tests/sinks/test_ndjson.py index c60827ee..76efdd82 100644 --- a/tests/sinks/test_ndjson.py +++ b/tests/sinks/test_ndjson.py @@ -3,17 +3,18 @@ from pydantic import UUID4 -from mex.common.models import MExModel +from mex.common.models import ExtractedData from mex.common.settings import BaseSettings from mex.common.sinks.ndjson import write_ndjson -from mex.common.types import Timestamp +from mex.common.types import Identifier, Timestamp class DummyEnum(Enum): NAME = "value" -class Thing(MExModel): +class Thing(ExtractedData): + identifier: Identifier str_attr: str enum_attr: DummyEnum | None = None uuid_attr: UUID4 | None = None diff --git a/tests/test_transform.py b/tests/test_transform.py index 7cc443d0..b4e36bb1 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -2,25 +2,28 @@ from datetime import timedelta, timezone from enum import Enum from pathlib import Path, PurePosixPath, PureWindowsPath -from typing import Any +from typing import Annotated, Any from uuid import UUID import pytest +from pydantic import AnyUrl, Field, SecretStr from pydantic import BaseModel as PydanticModel -from pydantic import Field, SecretStr from mex.common.transform import ( MExEncoder, dromedary_to_kebab, dromedary_to_snake, + ensure_prefix, kebab_to_camel, snake_to_dromedary, + to_key_and_values, ) from mex.common.types import Identifier, Timestamp +from mex.common.types.path import PathWrapper class DummyModel(PydanticModel): - string_field: str = Field("foo", alias="strField") + string_field: Annotated[str, Field("foo", alias="strField")] integer: int = 42 @@ -33,6 +36,10 @@ class DummyEnum(Enum): ("raw", "expected"), [ (DummyModel(strField="bar"), '{"integer": 42, "string_field": "bar"}'), + ( + AnyUrl("http://example:8000/path/?query=test"), + '"http://example:8000/path/?query=test"', + ), (SecretStr("str"), '"str"'), (DummyEnum.THAT, '"that"'), (UUID(int=4, version=4), '"00000000-0000-4000-8000-000000000004"'), @@ -48,6 +55,7 @@ class DummyEnum(Enum): (PureWindowsPath(r"C:\\System\\Win32\\exe.dll"), '"C:/System/Win32/exe.dll"'), (PurePosixPath(r"/dev/sys/etc/launch.ctl"), '"/dev/sys/etc/launch.ctl"'), (Path("relative", "path"), '"relative/path"'), + (PathWrapper("relative/path"), '"relative/path"'), ], ) def test_mex_json_encoder(raw: Any, expected: str) -> None: @@ -145,3 +153,44 @@ def test_dromedary_to_kebab(string: str, expected: str) -> None: def test_kebab_to_camel(string: str, expected: str) -> None: result = kebab_to_camel(string) assert result == expected + + +@pytest.mark.parametrize( + ("string", "prefix", "expected"), + [ + ("", "", ""), + ("banana", "ba", "banana"), + ("bar", "foo", "foobar"), + ( + -42, + UUID("{12345678-1234-5678-1234-567812345678}"), + "12345678-1234-5678-1234-567812345678-42", + ), + ], + ids=["empty", "already-prefixed", "prefix-added", "stringified"], +) +def test_ensure_prefix(string: Any, prefix: Any, expected: str) -> None: + result = ensure_prefix(string, prefix) + + assert result == expected + + +@pytest.mark.parametrize( + ("dct", "expected"), + [ + ({}, {}), + ( + {"single": 32, "nested": {"foo": 42}, "empty": None}, + {"single": [32], "nested": [{"foo": 42}], "empty": []}, + ), + ( + {"one": [32], "three": [32, 42, 3.1], "empty": []}, + {"one": [32], "three": [32, 42, 3.1], "empty": []}, + ), + ], + ids=["empty", "singles", "lists"], +) +def test_to_key_and_values(dct: dict[str, Any], expected: dict[str, list[Any]]) -> None: + result = dict(to_key_and_values(dct)) + + assert result == expected