From 9ec130c0127d4dc7d40267c03bd981a9201a1c78 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt Date: Thu, 22 Feb 2024 17:24:35 +0100 Subject: [PATCH] feature/mx-1562 code maintenance (#82) # PR Context - preparation for mx-1381 and to keep https://github.com/robert-koch-institut/mex-common/pull/69 smaller - already removing public-api module for mx-1562, not closing that ticket yet though # Changes - update cruft and dev dependencies - randomize test order by default # Removed - remove `mex.common.public_api` module and the correlating sinks - remove `PathWrapper.resolve` and `PathWrapper.raw` methods --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .cruft.json | 2 +- .pre-commit-config.yaml | 10 +- CHANGELOG.md | 45 ++-- mex.bat | 5 +- mex/common/exceptions.py | 2 +- mex/common/ldap/README.md | 20 +- mex/common/ldap/connector.py | 2 +- mex/common/organigram/README.md | 8 +- mex/common/organigram/extract.py | 2 +- mex/common/primary_source/extract.py | 2 +- mex/common/public_api/__init__.py | 0 mex/common/public_api/connector.py | 342 --------------------------- mex/common/public_api/extract.py | 23 -- mex/common/public_api/models.py | 118 --------- mex/common/public_api/transform.py | 100 -------- mex/common/settings.py | 30 +-- mex/common/sinks/public_api.py | 63 ----- mex/common/sinks/purge.py | 63 ----- mex/common/testing/plugin.py | 4 +- mex/common/types/path.py | 11 - mex/common/types/sink.py | 1 - mex/common/types/timestamp.py | 2 +- mex/common/wikidata/connector.py | 4 +- mex/common/wikidata/transform.py | 7 +- poetry.lock | 242 +++++++++++-------- pyproject.toml | 51 ++-- tests/identity/test_registry.py | 4 +- tests/models/test_base.py | 2 +- tests/public_api/__init__.py | 0 tests/public_api/conftest.py | 73 ------ tests/public_api/test_connector.py | 336 -------------------------- tests/public_api/test_extract.py | 56 ----- tests/public_api/test_transform.py | 173 -------------- tests/sinks/test_ndjson.py | 14 +- tests/sinks/test_public_api.py | 70 ------ tests/test_cli.py | 2 +- tests/test_settings.py | 4 +- tests/types/test_path.py | 2 +- tests/wikidata/test_extract.py | 4 +- tests/wikidata/test_transform.py | 2 +- 40 files changed, 251 insertions(+), 1650 deletions(-) delete mode 100644 mex/common/public_api/__init__.py delete mode 100644 mex/common/public_api/connector.py delete mode 100644 mex/common/public_api/extract.py delete mode 100644 mex/common/public_api/models.py delete mode 100644 mex/common/public_api/transform.py delete mode 100644 mex/common/sinks/public_api.py delete mode 100644 mex/common/sinks/purge.py delete mode 100644 tests/public_api/__init__.py delete mode 100644 tests/public_api/conftest.py delete mode 100644 tests/public_api/test_connector.py delete mode 100644 tests/public_api/test_extract.py delete mode 100644 tests/public_api/test_transform.py delete mode 100644 tests/sinks/test_public_api.py diff --git a/.cruft.json b/.cruft.json index b1eb463f..bc56a549 100644 --- a/.cruft.json +++ b/.cruft.json @@ -1,6 +1,6 @@ { "checkout": null, - "commit": "d1a461de3c7ff099045b71a156ad667887c32368", + "commit": "6067fc53d1335a9bda900c5eff8dbf1c42bfe4ca", "context": { "cookiecutter": { "project_name": "common", diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2dc1256f..b9c4aaee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,12 +3,12 @@ default_language_version: python: python3.11 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.0 + rev: v0.2.1 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/psf/black - rev: 24.1.1 + rev: 24.2.0 hooks: - id: black - repo: https://github.com/pre-commit/pre-commit-hooks @@ -20,6 +20,12 @@ repos: exclude: .cruft.json - id: check-yaml name: yaml + - id: end-of-file-fixer + name: eof + - id: trailing-whitespace + name: whitespaces + - id: fix-byte-order-marker + name: byte-order - repo: https://github.com/python-poetry/poetry rev: 1.7.1 hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index 68e0dd35..41bf165c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,41 +19,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Security -## [0.19.4] - 2024-02-15 - -### Added +## [0.20.0] - 2024-02-22 ### Changes -- update cruft and minor dependencies - -### Deprecated +- update cruft and dev dependencies +- randomize test order by default ### Removed -- date-time format validation for mapping model generation +- remove `mex.common.public_api` module and the correlating sinks +- remove `PathWrapper.resolve` and `PathWrapper.raw` methods ### Fixed -### Security +- remove `pytest.mark` from fixture in `mex.common.testing.plugin` -## [0.19.3] - 2024-02-06 +## [0.19.4] - 2024-02-15 -### Added +### Changes + +- update cruft and minor dependencies + +### Removed + +- date-time format validation for mapping model generation + +## [0.19.3] - 2024-02-06 ### Changes - update cruft to apply new workflow trigger config - update poetry and pre-commit dependencies -### Deprecated - -### Removed - ### Fixed -- fix mex mapping model name -### Security +- fix mex mapping model name ## [0.19.2] - 2024-02-02 @@ -66,14 +67,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `mex.bat test` uses random order and xdist plugins by default -### Deprecated - -### Removed - -### Fixed - -### Security - ## [0.19.1] - 2024-01-19 ### Added @@ -133,8 +126,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Deprecated - deprecate `MExModel.get_entity_type`, use `cls.__name__` instead -- deprecate `mex.common.models.MODEL_CLASSES[_BY_ENTITY_TYPE]`, use the more precise lists or dicts like `EXTRACTED_MODEL_CLASSES_BY_NAME` instead - +- deprecate `mex.common.models.MODEL_CLASSES[_BY_ENTITY_TYPE]`, + use the more precise lists or dicts like `EXTRACTED_MODEL_CLASSES_BY_NAME` instead ## [0.17.1] - 2023-12-20 diff --git a/mex.bat b/mex.bat index 59e1a2b0..f00e1a0b 100644 --- a/mex.bat +++ b/mex.bat @@ -32,10 +32,9 @@ echo linting all files pre-commit run --all-files if %errorlevel% neq 0 exit /b %errorlevel% -@REM run the pytest test suite with unit and integration tests in random order -@REM distributed across all available CPU cores +@REM run pytest unit and integration tests distributed across available cores echo running all tests -poetry run pytest --random-order-bucket=global --numprocesses=auto --dist=worksteal +poetry run pytest --numprocesses=auto --dist=worksteal exit /b %errorlevel% diff --git a/mex/common/exceptions.py b/mex/common/exceptions.py index 7ab2199a..5b088191 100644 --- a/mex/common/exceptions.py +++ b/mex/common/exceptions.py @@ -5,7 +5,7 @@ def __str__(self) -> str: """Format this exception as a string for logging.""" return ( f"{self.__class__.__name__}: " - f"{(', '.join((str(arg) for arg in self.args)))} " + f"{(', '.join(str(arg) for arg in self.args))} " ) diff --git a/mex/common/ldap/README.md b/mex/common/ldap/README.md index fdc56242..045e06d8 100644 --- a/mex/common/ldap/README.md +++ b/mex/common/ldap/README.md @@ -1,31 +1,31 @@ -Helper extractor to extract data from Lightweight Directory Access Protocol (LDAP). +Helper extractor to extract data from Lightweight Directory Access Protocol (LDAP). Common use cases: -- extract employee accounts of your organization +- extract employee accounts of your organization - extract functional accounts of your organization -Possible queries are for example the account name, surname, given name, or email. +Possible queries are for example the account name, surname, given name, or email. # Configuration -For configuring the ldap connection, set the settings parameter `ldap_url` -(see `mex.common.settings` for further info) to an LDAP url (see -[LDAP URL definition](https://datatracker.ietf.org/doc/html/rfc2255#section-3) for +For configuring the ldap connection, set the settings parameter `ldap_url` +(see `mex.common.settings` for further info) to an LDAP url (see +[LDAP URL definition](https://datatracker.ietf.org/doc/html/rfc2255#section-3) for further information). # Extracting data -Use the `LDAPConnector` from the `ldap.connector` module to extract data. +Use the `LDAPConnector` from the `ldap.connector` module to extract data. # Transforming data -The module `ldap.transform` contains functions for transforming LDAP data into MEx +The module `ldap.transform` contains functions for transforming LDAP data into MEx models. -The `mex_person.stableTargetId` attribute can be used in any entity that requires a +The `mex_person.stableTargetId` attribute can be used in any entity that requires a `PersonID`. # Convenience Functions -The module `ldap.extract` holds convenience functions, e.g. for build a mapping from +The module `ldap.extract` holds convenience functions, e.g. for build a mapping from query strings to `stableTargetId`s. diff --git a/mex/common/ldap/connector.py b/mex/common/ldap/connector.py index f808573b..bb168180 100644 --- a/mex/common/ldap/connector.py +++ b/mex/common/ldap/connector.py @@ -74,7 +74,7 @@ def _fetch( if attributes := entry.get("attributes"): yield model_cls.model_validate(attributes) - @cache + @cache # noqa: B019 def _paged_ldap_search( self, fields: tuple[str], search_filter: str, search_base: str ) -> list[dict[str, str]]: diff --git a/mex/common/organigram/README.md b/mex/common/organigram/README.md index 3e512c8e..109797bd 100644 --- a/mex/common/organigram/README.md +++ b/mex/common/organigram/README.md @@ -5,7 +5,7 @@ Common use cases: # Configuration -The extractor reads data from a json file, whose path is set with the settings parameter +The extractor reads data from a json file, whose path is set with the settings parameter `organigram_path` (see `mex.common.settings` for further info). # Extracting data @@ -16,15 +16,15 @@ The module `organigram.extract` contains functions for data extraction. The module `organigram.transform` contains functions for data transformation. -Use the `stableTargetId` attribute of the transformed objects to set attributes +Use the `stableTargetId` attribute of the transformed objects to set attributes requiring an `OrganizationalUnitID`. # Convenience Functions -The module `organigram.extract` holds convenience functions, e.g. for building a mapping +The module `organigram.extract` holds convenience functions, e.g. for building a mapping from email addresses or synonyms to `ExtractedOrganizationalUnit`. # json file format -See example file in `assets/raw-data/organigram/organizational_units.json`. +See example file in `assets/raw-data/organigram/organizational_units.json`. For mandatory / optional attributes, see model in `mex/common/organigram/models.py`. diff --git a/mex/common/organigram/extract.py b/mex/common/organigram/extract.py index 4905da28..ec4f2290 100644 --- a/mex/common/organigram/extract.py +++ b/mex/common/organigram/extract.py @@ -19,7 +19,7 @@ def extract_organigram_units() -> Generator[OrganigramUnit, None, None]: Generator for organigram units """ settings = BaseSettings.get() - with open(settings.organigram_path, "r") as fh: + with open(settings.organigram_path) as fh: for raw in json.load(fh): yield OrganigramUnit.model_validate(raw) diff --git a/mex/common/primary_source/extract.py b/mex/common/primary_source/extract.py index b348c3b2..d38f2a18 100644 --- a/mex/common/primary_source/extract.py +++ b/mex/common/primary_source/extract.py @@ -17,6 +17,6 @@ def extract_seed_primary_sources() -> Generator[SeedPrimarySource, None, None]: Generator for seed primary sources """ settings = BaseSettings.get() - with open(settings.primary_sources_path, "r") as fh: + with open(settings.primary_sources_path) as fh: for raw in json.load(fh): yield SeedPrimarySource.model_validate(raw) diff --git a/mex/common/public_api/__init__.py b/mex/common/public_api/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mex/common/public_api/connector.py b/mex/common/public_api/connector.py deleted file mode 100644 index 49ca195c..00000000 --- a/mex/common/public_api/connector.py +++ /dev/null @@ -1,342 +0,0 @@ -import json -from base64 import b64decode -from datetime import datetime, timedelta -from typing import Final, Generator, TypeVar, cast -from urllib.parse import urljoin -from uuid import UUID - -import backoff -import pandas as pd -import requests -from requests.exceptions import HTTPError - -from mex.common.connector import HTTPConnector -from mex.common.logging import echo -from mex.common.models import MExModel -from mex.common.public_api.models import ( - PublicApiAuthResponse, - PublicApiAxisConstraint, - PublicApiItem, - PublicApiItemWithoutValues, - PublicApiJobItemsResponse, - PublicApiMetadataItemsResponse, - PublicApiSearchRequest, - PublicApiSearchResponse, -) -from mex.common.public_api.transform import ( - transform_mex_model_to_public_api_item, - transform_public_api_item_to_mex_model, -) -from mex.common.settings import BaseSettings -from mex.common.types import Identifier - -ModelT = TypeVar("ModelT", bound=MExModel) -PublicApiItemT = TypeVar( - "PublicApiItemT", bound=PublicApiItem | PublicApiItemWithoutValues -) - - -class PublicApiConnector(HTTPConnector): # pragma: no cover - """Connector class to handle authentication and interaction with the public API.""" - - API_VERSION: Final[str] = "v0" - - def _set_session(self) -> None: - """Create and set request session.""" - settings = BaseSettings.get() - self.session = requests.Session() - self.session.verify = settings.public_api_verify_session # type: ignore - - def _set_url(self) -> None: - """Set url of the host with api version.""" - settings = BaseSettings.get() - self.url = urljoin(str(settings.public_api_url), self.API_VERSION) - - def _check_availability(self) -> None: - """Send an empty search request to verify the host is available.""" - self.request( - "POST", - "/query/search", - PublicApiSearchRequest(limit=0), - ) - - def _set_authentication(self) -> None: - """Generate JWT using secret payload and attach it to session.""" - settings = BaseSettings.get() - response = self.session.post( - str(settings.public_api_token_provider), - data=b64decode(settings.public_api_token_payload.get_secret_value()), - timeout=self.TIMEOUT, - headers={"Accept": "*/*", "Authorization": None}, - ) - response.raise_for_status() - auth_response = PublicApiAuthResponse.model_validate(response.json()) - expires_at = datetime.now() + timedelta(seconds=auth_response.expires_in) - echo( - f"authenticated with public api (expires {expires_at})", fg="bright_magenta" - ) - self.session.headers["Authorization"] = f"Bearer {auth_response.access_token}" - - def echo_job_logs(self, job_id: str) -> None: - """Echo the logs for the job with the given ID to the console. - - Args: - job_id: Public API job ID - """ - response = self.request("GET", f"jobs/{job_id}/logs") - raw_logs = cast(list[str], response.get("logs")) - now = str(datetime.now()) - for raw_log in raw_logs: - log = json.loads(raw_log) - if trace := log.get("trace-id"): - timestamp = pd.to_datetime(log.get("timestamp", now)) - message = f"[{trace}] {log.get('message', 'N/A')}" - echo(message, timestamp, fg="bright_magenta") - - @backoff.on_predicate( - backoff.fibo, lambda status: cast(str, status) == "RUNNING", max_time=180 - ) - def wait_for_job(self, job_id: str) -> str: - """Poll the status for this `job_id` until it is no longer 'RUNNING'.""" - response = self.request("GET", f"jobs/{job_id}") - return str(response.get("status", "NONE")) - - def get_job_items(self, job_id: str) -> Generator[Identifier, None, None]: - """Get the identifiers of the items created, updated or deleted during a job. - - Args: - job_id: Job to query for items - - Returns: - Generator for identifiers of manipulated items - """ - response = self.request("GET", f"jobs/{job_id}/items") - items_response = PublicApiJobItemsResponse.model_validate(response) - for item_id in items_response.itemIds: - if isinstance(item_id, UUID): - if item := self.get_item(item_id): - for field in item.values: - if field.fieldName == "identifier": - yield Identifier(field.fieldValue) - - def post_items( - self, items: list[PublicApiItem], wait_for_done: bool = True - ) -> list[Identifier]: - """Post a list of items them to the public API. - - Args: - items: Public API items to post - wait_for_done: If the return should block until the job is done - - Raises: - HTTPError: If the job was not accepted, crashes or times out - - Returns: - Identifiers of created or updated models - Empty list in case `wait_for_done` was `False` - """ - response = self.request("POST", "metadata/items_bulk", {"items": items}) - job_id = response.get("jobId", "N/A") - if wait_for_done: - self.wait_for_job(job_id) - return list(self.get_job_items(job_id)) - return [] - - def post_models( - self, models: list[MExModel], wait_for_done: bool = True - ) -> list[Identifier]: - """Convert models to public API items and post them. - - Args: - models: Extracted or merged models to post - wait_for_done: If the return should block until the job is done - - Raises: - HTTPError: If the job was not accepted, crashes or times out - - Returns: - Identifiers of created or updated items - Empty list in case `wait_for_done` was `False` - """ - return self.post_items( - [transform_mex_model_to_public_api_item(model) for model in models], - wait_for_done=wait_for_done, - ) - - def search_item( - self, model_cls: type[ModelT], identifier: Identifier - ) -> PublicApiItem | None: - """Search an item and retrieve it from public API. - - Uses the search endpoint of the public API, which covers only a subset of - (e.g. no persons or primary sources) - - Args: - model_cls: Class of the expected model - identifier: Identifier of the model - - Returns: - Public API item, if ID was found, else None - """ - request = PublicApiSearchRequest( - offset=0, - limit=1, - axisConstraints=[ - PublicApiAxisConstraint(values=[str(identifier)], axis="identifier"), - PublicApiAxisConstraint(values=[model_cls.__name__], axis="entityName"), - ], - fields=list(model_cls.model_fields), - ) - response = self.request( - "POST", - "query/search", - request, - ) - search_response = PublicApiSearchResponse.model_validate(response) - if search_response.numFound == 1 and len(search_response.items) == 1: - return search_response.items[0] - return None - - def get_item(self, identifier: Identifier | UUID) -> PublicApiItem | None: - """Get an item from Public API. - - Args: - identifier: Identifier of the Public API item - - Returns: - Public API item, if ID was found, else None - """ - try: - response = self.request("GET", f"metadata/items/{identifier}") - except HTTPError as error: - # no rows in result set, return None - if error.response and error.response.status_code == 404: - return None - # re-raise any unexpected errors - raise error - else: - return PublicApiItem.model_validate(response) - - def search_model( - self, model_cls: type[ModelT], identifier: Identifier - ) -> MExModel | None: - """Get an item from the Public API and convert it to a model. - - Args: - model_cls: Class of the expected model - identifier: Identifier of the model - - Returns: - Extracted or merged model instance, if ID was found, else None - """ - if item := self.search_item(model_cls, identifier): - return transform_public_api_item_to_mex_model(item) - return None - - def delete_item( - self, item: PublicApiItem | PublicApiItemWithoutValues - ) -> UUID | None: - """Delete item from Public API. - - Args: - item: Public API item to delete - - Raises: - HTTPError: If deletion failed - - Returns: - API-internal UUID of the deleted item, or None if item had no ID - """ - if item.itemId: - self.request( - "DELETE", - f"metadata/items/{item.itemId}", - ) - return item.itemId - return None - - def delete_model(self, model: MExModel) -> UUID | None: - """Get the Public API item for a given model and delete it. - - Requires that the model is findable via the Public API, which is not true for - Persons among other model types - - Args: - model: MEx model instance - - Raises: - HTTPError: If deletion failed - - Returns: - API-internal UUID of the deleted item, or None if model did not exist - """ - if item := self.search_item(type(model), model.identifier): - return self.delete_item(item) - return None - - def search_items( - self, - model_cls: type[MExModel], - offset: int = 0, - limit: int = 10, - ) -> list[PublicApiItem]: - """Get all Public API items corresponding to `model_cls` with pagination. - - Uses the search endpoint of the Public API, - which covers only a subset of entities (e.g. no persons or primary sources) - - Args: - model_cls: Model class to fetch - offset: Pagination offset, defaults to 0 - limit: Pagination limit, defaults to 10 - - Returns: - List of Public API items - """ - request = PublicApiSearchRequest( - offset=offset, - limit=limit, - axisConstraints=[ - PublicApiAxisConstraint(values=[model_cls.__name__], axis="entityName") - ], - fields=list(model_cls.model_fields), - ) - response = self.request( - "POST", - "query/search", - request, - ) - return PublicApiSearchResponse.model_validate(response).items - - def search_mex_model_items( - self, model_cls: type[ModelT], offset: int = 0, limit: int = 10 - ) -> list[ModelT]: - """Get all instances of a model class with pagination. - - Args: - model_cls: Model class to fetch - offset: Pagination offset, defaults to 0 - limit: Pagination limit, defaults to 10 - - Returns: - List of instances of `model_cls` - """ - return [ - model - for item in self.search_items(model_cls, offset, limit) - if (model := cast(ModelT, transform_public_api_item_to_mex_model(item))) - ] - - def get_all_items( - self, offset_item_id: UUID | None = None - ) -> PublicApiMetadataItemsResponse: - """Get all items from the Public API endpoint 'metadata/items'. - - Args: - offset_item_id: item UUID pagination offset, defaults to None - """ - endpoint = "metadata/items" - if offset_item_id: - endpoint += f"?next={offset_item_id}" - response = self.request("GET", endpoint) - return PublicApiMetadataItemsResponse.model_validate(response) diff --git a/mex/common/public_api/extract.py b/mex/common/public_api/extract.py deleted file mode 100644 index 0f5ad58c..00000000 --- a/mex/common/public_api/extract.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Generator - -from mex.common.exceptions import MExError -from mex.common.logging import watch -from mex.common.public_api.connector import PublicApiConnector -from mex.common.public_api.models import PublicApiItemWithoutValues - - -@watch -def extract_mex_person_items() -> Generator[PublicApiItemWithoutValues, None, None]: - """Extract all person items from the Public Api.""" - connector = PublicApiConnector.get() - offset_item_id = None - for _ in range(100): - response = connector.get_all_items(offset_item_id=offset_item_id) - offset_item_id = response.next - for item in response.items: - if item.entityType in ["Person", "ExtractedPerson"]: - yield item - if not offset_item_id: - break - else: - raise MExError("Exceeded maximum fetchable amount of persons.") diff --git a/mex/common/public_api/models.py b/mex/common/public_api/models.py deleted file mode 100644 index 2668b2b8..00000000 --- a/mex/common/public_api/models.py +++ /dev/null @@ -1,118 +0,0 @@ -from enum import Enum -from typing import Any, Literal -from uuid import UUID - -from pydantic import ConfigDict, Field, field_validator - -from mex.common.models import BaseModel -from mex.common.types import ( - Identifier, - Link, - LinkLanguage, - Text, - TextLanguage, - Timestamp, -) - -PublicApiFieldValueTypes = UUID | Enum | Timestamp | str | Link | Text -PublicApiFieldValueTypesOrList = ( - PublicApiFieldValueTypes | list[PublicApiFieldValueTypes] -) - - -class PublicApiBaseModel(BaseModel): - """Common Public API base class.""" - - model_config = ConfigDict(extra="ignore", str_min_length=0) - - -class PublicApiAxisConstraint(PublicApiBaseModel): - """Axis constraints for Public API search requests.""" - - axis: str - combineOperator: str = "and" - type: str = "exact" - values: list[str] - - -class PublicApiSearchRequest(PublicApiBaseModel): - """Request body for Public API search requests.""" - - axisConstraints: list[PublicApiAxisConstraint] = [] - fields: list[str] = [] - limit: int = 10 - offset: int = 0 - query: str = "*" - - -class PublicApiField(PublicApiBaseModel): - """A single field of an item as represented in the Public API format.""" - - fieldName: str - fieldValue: PublicApiFieldValueTypesOrList - language: LinkLanguage | TextLanguage | None = None - - @field_validator("language", mode="before") - @classmethod - def fix_language(cls, value: Any) -> Any: - """Only try to parse languages when the string is non-empty.""" - if isinstance(value, str) and value.strip() == "": - return None - return value - - -class PublicApiItem(PublicApiBaseModel): - """Public API item representing an entity or extracted data model.""" - - entityType: str - itemId: UUID | None = Field(None, exclude=True) - businessId: str = Field(exclude=True) - values: list[PublicApiField] - - @property - def stableTargetId(self) -> Identifier: # noqa: N802 - """Return the stableTargetId of this item.""" - return Identifier(self.businessId.removesuffix("#")) - - -class PublicApiSearchResponse(PublicApiBaseModel): - """Response body that is expected for search results.""" - - items: list[PublicApiItem] - numFound: int - - -class PublicApiAuthResponse(PublicApiBaseModel): - """Response body that is expected for auth requests.""" - - access_token: str - refresh_token: str | None = None - token_type: str - expires_in: int - - -class PublicApiItemWithoutValues(PublicApiBaseModel): - """Public API item representing an entity or extracted data model.""" - - entityType: str - itemId: UUID | None = Field(None, exclude=True) - businessId: str = Field(exclude=True) - - @property - def stableTargetId(self) -> Identifier: # noqa: N802 - """Return the stableTargetId of this item.""" - return Identifier(self.businessId.removesuffix("#")) - - -class PublicApiMetadataItemsResponse(PublicApiBaseModel): - """Response body that is expected for the metadata items endpoint.""" - - items: list[PublicApiItemWithoutValues] - next: UUID | Literal[""] - - -class PublicApiJobItemsResponse(PublicApiBaseModel): - """Response body that is expected for the jobs items endpoint.""" - - jobId: UUID - itemIds: list[UUID | Literal["{}"]] diff --git a/mex/common/public_api/transform.py b/mex/common/public_api/transform.py deleted file mode 100644 index 0d2bbdbe..00000000 --- a/mex/common/public_api/transform.py +++ /dev/null @@ -1,100 +0,0 @@ -from collections import defaultdict -from typing import get_args - -from mex.common.models import ( - EXTRACTED_MODEL_CLASSES_BY_NAME, - MERGED_MODEL_CLASSES_BY_NAME, - MExModel, -) -from mex.common.public_api.models import ( - PublicApiField, - PublicApiFieldValueTypes, - PublicApiItem, -) -from mex.common.types import Link, LinkLanguage, Text, TextLanguage - - -def _is_type(type_: type, annotation: type | None) -> bool: - """Check if annotation is or contains the provided type.""" - return type_ in (annotation, *get_args(annotation)) - - -def transform_mex_model_to_public_api_item(model: MExModel) -> PublicApiItem: - """Convert an ExtractedData instance into a Public API item. - - Args: - model: Instance of a subclass of ExtractedData - - Returns: - Public API item - """ - api_values = [] - model_dict = model.model_dump(exclude_none=True) - for field_name in sorted(model_dict): - field = model.model_fields[field_name] - is_text_or_link = _is_type(Text, field.annotation) or _is_type( - Link, field.annotation - ) - if is_text_or_link: - model_values = getattr(model, field_name) - else: - model_values = model_dict[field_name] - if not isinstance(model_values, list): - model_values = [model_values] - for value in model_values: - if is_text_or_link: - language = value.language - value = str(value) - else: - language = None - api_values.append( - PublicApiField( - fieldName=field_name, fieldValue=value, language=language - ) - ) - return PublicApiItem( - entityType=model.__class__.__name__, - businessId=model.stableTargetId, - values=api_values, - ) - - -def transform_public_api_item_to_mex_model( - api_item: PublicApiItem, -) -> MExModel | None: - """Try to convert a Public API item into an extracted data instance. - - Args: - api_item: Public API item - - Returns: - Transformed model or None if unknown type - """ - classes_by_name: dict[str, type[MExModel]] = dict( - **EXTRACTED_MODEL_CLASSES_BY_NAME, **MERGED_MODEL_CLASSES_BY_NAME - ) - cls = classes_by_name.get(api_item.entityType) - if cls is None: - return None - dct_to_parse: dict[str, list[PublicApiFieldValueTypes]] = defaultdict(list) - for value in api_item.values: - field_name = value.fieldName - annotation = cls.model_fields[field_name].annotation - is_link = _is_type(Link, annotation) - is_text = _is_type(Text, annotation) - if isinstance(value.fieldValue, list): - values = value.fieldValue - else: - values = [value.fieldValue] - for v in values: - if value.language and isinstance(v, str) and is_text: - dct_to_parse[field_name].append( - Text(value=v, language=TextLanguage(value.language)) - ) - elif value.language and isinstance(v, str) and is_link: - link = Link.model_validate(v) - link.language = LinkLanguage(value.language) - dct_to_parse[field_name].append(link) - else: - dct_to_parse[field_name].append(v) - return cls.model_validate(dct_to_parse) diff --git a/mex/common/settings.py b/mex/common/settings.py index b2ffdf8a..814ca782 100644 --- a/mex/common/settings.py +++ b/mex/common/settings.py @@ -1,4 +1,3 @@ -from base64 import b64encode from contextvars import ContextVar from pathlib import Path from typing import Any, Optional, TypeVar, Union @@ -110,7 +109,7 @@ def get(cls: type[SettingsType]) -> SettingsType: [Sink.NDJSON], description=( "Where to send data that is extracted or ingested. Defaults to writing " - "ndjson files, but can be set to backend or public APIs or to graph db." + "ndjson files, but can be configured to push to the backend or the graph." ), validation_alias="MEX_SINK", ) @@ -132,7 +131,7 @@ def get(cls: type[SettingsType]) -> SettingsType: ) identity_provider: IdentityProvider = Field( IdentityProvider.MEMORY, - description="Provider to assign stableTargetIds to new model instances.", + description="Provider to assign identifiers to new model instances.", validation_alias="MEX_IDENTITY_PROVIDER", ) backend_api_url: AnyUrl = Field( @@ -154,31 +153,6 @@ def get(cls: type[SettingsType]) -> SettingsType: ), validation_alias="MEX_VERIFY_SESSION", ) - public_api_url: AnyUrl = Field( - Url("http://localhost:53000/"), - description="MEx public API url.", - validation_alias="MEX_PUBLIC_API_URL", - ) - public_api_token_provider: AnyUrl = Field( - Url("http://localhost:53000/api/v0/oauth/token"), - description="URL of the JSON Web Token provider for the public API.", - validation_alias="MEX_PUBLIC_API_TOKEN_PROVIDER", - ) - public_api_token_payload: SecretStr = Field( - SecretStr(b64encode(b"payload").decode()), - description=( - "Base64-encoded payload to send when requesting a JWT for the public API." - ), - validation_alias="MEX_PUBLIC_API_TOKEN_PAYLOAD", - ) - public_api_verify_session: Union[bool, AssetsPath] = Field( - True, - description=( - "Public API-specific session verification setting, " - "see `verify_session` for possible values." - ), - validation_alias="MEX_PUBLIC_API_VERIFY_SESSION", - ) organigram_path: AssetsPath = Field( AssetsPath("raw-data/organigram/organizational_units.json"), description=( diff --git a/mex/common/sinks/public_api.py b/mex/common/sinks/public_api.py deleted file mode 100644 index 7c161e36..00000000 --- a/mex/common/sinks/public_api.py +++ /dev/null @@ -1,63 +0,0 @@ -from typing import Generator, Iterable - -from mex.common.logging import watch -from mex.common.models import MExModel -from mex.common.public_api.connector import PublicApiConnector -from mex.common.public_api.models import PublicApiItem, PublicApiItemWithoutValues -from mex.common.types import Identifier -from mex.common.utils import grouper - - -@watch -def post_to_public_api( - models: Iterable[MExModel], chunk_size: int = 100 -) -> Generator[Identifier, None, None]: - """Load models to the Public API using bulk insertion. - - Args: - models: Iterable of extracted or merged models - chunk_size: Optional size to chunks to post in one request - - Returns: - Generator for identifiers of posted models - """ - connector = PublicApiConnector.get() - for chunk in grouper(chunk_size, models): - model_list = list(filter(None, chunk)) - yield from connector.post_models(model_list) - - -@watch -def purge_models_from_public_api( - models: Iterable[MExModel], -) -> Generator[str, None, None]: - """Purge models from Public API. - - Args: - models: list of MEx models - - Returns: - Generator for status messages per model - """ - connector = PublicApiConnector.get() - for model in models: - api_id = connector.delete_model(model) - yield f"purged item {api_id} for {model.__class__.__name__} {model.identifier}" - - -@watch -def purge_items_from_public_api( - items: Iterable[PublicApiItem | PublicApiItemWithoutValues], -) -> Generator[str, None, None]: - """Purge items from Public API. - - Args: - items: list of Public API items - - Returns: - Generator for status messages per model - """ - connector = PublicApiConnector.get() - for item in items: - api_id = connector.delete_item(item) - yield f"purged item {api_id} for {item.entityType}" diff --git a/mex/common/sinks/purge.py b/mex/common/sinks/purge.py deleted file mode 100644 index c171d909..00000000 --- a/mex/common/sinks/purge.py +++ /dev/null @@ -1,63 +0,0 @@ -from itertools import tee -from typing import Any, Callable, Iterable - -from mex.common.exceptions import MExError -from mex.common.models import MExModel -from mex.common.public_api.models import PublicApiItem, PublicApiItemWithoutValues -from mex.common.settings import BaseSettings -from mex.common.sinks.ndjson import write_ndjson -from mex.common.sinks.public_api import ( - purge_items_from_public_api, - purge_models_from_public_api, -) -from mex.common.types import Sink - -PublicApiItemOptionalValues = PublicApiItem | PublicApiItemWithoutValues - - -def purge_items(items: Iterable[PublicApiItemOptionalValues]) -> None: - """Purge items from the Public API or write to-be-purged items to NDJSON files. - - Args: - items: Iterable of public API items - - Settings: - sink: Where to purge the provided items - """ - settings = BaseSettings.get() - func: Callable[[Iterable[PublicApiItemOptionalValues]], Iterable[Any]] - - for sink, item_gen in zip(settings.sink, tee(items, len(settings.sink))): - if sink == Sink.PUBLIC: - func = purge_items_from_public_api - elif sink == Sink.NDJSON: - func = write_ndjson - else: - raise MExError(f"Cannot purge from {sink}.") - - for _ in func(item_gen): - continue # unpacking the generator - - -def purge_models(models: Iterable[MExModel]) -> None: - """Purge models from the Public API or write to-be-purged models to NDJSON files. - - Args: - models: Iterable of MEx models - - Settings: - sink: Where to purge the provided models - """ - settings = BaseSettings.get() - func: Callable[[MExModel], Iterable[Any]] - - for sink, model_gen in zip(settings.sink, tee(models, len(settings.sink))): - if sink == Sink.PUBLIC: - func = purge_models_from_public_api - elif sink == Sink.NDJSON: - func = write_ndjson - else: - raise MExError(f"Cannot purge from {sink}.") - - for _ in func(model_gen): - continue # unpacking the generator diff --git a/mex/common/testing/plugin.py b/mex/common/testing/plugin.py index a86a8f2e..e0958d30 100644 --- a/mex/common/testing/plugin.py +++ b/mex/common/testing/plugin.py @@ -28,7 +28,6 @@ class NoOpPytest: FixtureRequest = Any MonkeyPatch = Any fixture = MagicMock() - mark = MagicMock() try: @@ -71,7 +70,7 @@ def settings() -> BaseSettings: @pytest.fixture(autouse=True) def isolate_settings_context() -> Generator[None, None, None]: - """Automatically close all connectors and remove from context variable.""" + """Automatically reset the settings context variable.""" yield SettingsContext.set(None) @@ -108,7 +107,6 @@ def faker_session_locale() -> list[str]: @pytest.fixture() -@pytest.mark.usefixtures("settings") def extracted_primary_sources() -> dict[str, ExtractedPrimarySource]: """Return a mapping from `identifierInPrimarySource` to ExtractedPrimarySources.""" seed_primary_sources = extract_seed_primary_sources() diff --git a/mex/common/types/path.py b/mex/common/types/path.py index 7a349599..b85e6d56 100644 --- a/mex/common/types/path.py +++ b/mex/common/types/path.py @@ -1,7 +1,6 @@ from os import PathLike from pathlib import Path from typing import Any, Type, TypeVar, Union -from warnings import warn from pydantic_core import core_schema @@ -51,16 +50,6 @@ def is_relative(self) -> bool: """True if the underlying path is relative.""" return not self._path.is_absolute() - def resolve(self) -> Path: - """Return the resolved path which is the underlying path.""" - warn("deprecated", DeprecationWarning) - return self._path - - def raw(self) -> Path: - """Return the raw underlying path without resolving it.""" - warn("deprecated", DeprecationWarning) - return self._path - @classmethod def __get_pydantic_core_schema__(cls, _source: Type[Any]) -> core_schema.CoreSchema: """Set schema to str schema.""" diff --git a/mex/common/types/sink.py b/mex/common/types/sink.py index 623edf72..3a6964f8 100644 --- a/mex/common/types/sink.py +++ b/mex/common/types/sink.py @@ -7,4 +7,3 @@ class Sink(Enum): BACKEND = "backend" GRAPH = "graph" NDJSON = "ndjson" - PUBLIC = "public" diff --git a/mex/common/types/timestamp.py b/mex/common/types/timestamp.py index b5b85eb1..300f87e4 100644 --- a/mex/common/types/timestamp.py +++ b/mex/common/types/timestamp.py @@ -240,7 +240,7 @@ def __gt__(self, other: Any) -> bool: try: other = self.validate(other) except TypeError: - raise NotImplementedError() + raise NotImplementedError() from None return bool(self.date_time > other.date_time) def __str__(self) -> str: diff --git a/mex/common/wikidata/connector.py b/mex/common/wikidata/connector.py index ceb88596..d53ad723 100644 --- a/mex/common/wikidata/connector.py +++ b/mex/common/wikidata/connector.py @@ -18,7 +18,7 @@ def _check_availability(self) -> None: """Send a GET request to verify the host is available.""" self.request("GET", params={"format": "json"}) - @cache + @cache # noqa: B019 def get_data_by_query(self, query: str) -> list[dict[str, dict[str, str]]]: """Run provided query on wikidata using wikidata query service. @@ -49,7 +49,7 @@ def _check_availability(self) -> None: "GET", self.url, params={"format": "json", "action": "wbgetentities"} ) - @cache + @cache # noqa: B019 def get_wikidata_item_details_by_id(self, item_id: str) -> dict[str, str]: """Get details of a wikidata item by item id. diff --git a/mex/common/wikidata/transform.py b/mex/common/wikidata/transform.py index 22441b0e..501ce616 100644 --- a/mex/common/wikidata/transform.py +++ b/mex/common/wikidata/transform.py @@ -70,9 +70,10 @@ def _get_alternative_names( Returns: combined list of native labels and aliases in DE and EN """ - alternative_names = [] - for alias in all_aliases.en + all_aliases.de: - alternative_names.append(Text(value=alias.value, language=None)) + alternative_names = [ + Text(value=alias.value, language=None) + for alias in all_aliases.en + all_aliases.de + ] for native_label in native_labels: value = native_label.mainsnak.datavalue.value.text diff --git a/poetry.lock b/poetry.lock index efd28a61..6783ae07 100644 --- a/poetry.lock +++ b/poetry.lock @@ -246,63 +246,63 @@ files = [ [[package]] name = "coverage" -version = "7.4.1" +version = "7.4.2" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7"}, - {file = "coverage-7.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b"}, - {file = "coverage-7.4.1-cp310-cp310-win32.whl", hash = "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016"}, - {file = "coverage-7.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018"}, - {file = "coverage-7.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295"}, - {file = "coverage-7.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6"}, - {file = "coverage-7.4.1-cp311-cp311-win32.whl", hash = "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5"}, - {file = "coverage-7.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968"}, - {file = "coverage-7.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581"}, - {file = "coverage-7.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc"}, - {file = "coverage-7.4.1-cp312-cp312-win32.whl", hash = "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74"}, - {file = "coverage-7.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448"}, - {file = "coverage-7.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218"}, - {file = "coverage-7.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad"}, - {file = "coverage-7.4.1-cp38-cp38-win32.whl", hash = "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042"}, - {file = "coverage-7.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d"}, - {file = "coverage-7.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54"}, - {file = "coverage-7.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35"}, - {file = "coverage-7.4.1-cp39-cp39-win32.whl", hash = "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c"}, - {file = "coverage-7.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a"}, - {file = "coverage-7.4.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166"}, - {file = "coverage-7.4.1.tar.gz", hash = "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04"}, + {file = "coverage-7.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf54c3e089179d9d23900e3efc86d46e4431188d9a657f345410eecdd0151f50"}, + {file = "coverage-7.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fe6e43c8b510719b48af7db9631b5fbac910ade4bd90e6378c85ac5ac706382c"}, + {file = "coverage-7.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b98c89db1b150d851a7840142d60d01d07677a18f0f46836e691c38134ed18b"}, + {file = "coverage-7.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5f9683be6a5b19cd776ee4e2f2ffb411424819c69afab6b2db3a0a364ec6642"}, + {file = "coverage-7.4.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78cdcbf7b9cb83fe047ee09298e25b1cd1636824067166dc97ad0543b079d22f"}, + {file = "coverage-7.4.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2599972b21911111114100d362aea9e70a88b258400672626efa2b9e2179609c"}, + {file = "coverage-7.4.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ef00d31b7569ed3cb2036f26565f1984b9fc08541731ce01012b02a4c238bf03"}, + {file = "coverage-7.4.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:20a875bfd8c282985c4720c32aa05056f77a68e6d8bbc5fe8632c5860ee0b49b"}, + {file = "coverage-7.4.2-cp310-cp310-win32.whl", hash = "sha256:b3f2b1eb229f23c82898eedfc3296137cf1f16bb145ceab3edfd17cbde273fb7"}, + {file = "coverage-7.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7df95fdd1432a5d2675ce630fef5f239939e2b3610fe2f2b5bf21fa505256fa3"}, + {file = "coverage-7.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8ddbd158e069dded57738ea69b9744525181e99974c899b39f75b2b29a624e2"}, + {file = "coverage-7.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81a5fb41b0d24447a47543b749adc34d45a2cf77b48ca74e5bf3de60a7bd9edc"}, + {file = "coverage-7.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2412e98e70f16243be41d20836abd5f3f32edef07cbf8f407f1b6e1ceae783ac"}, + {file = "coverage-7.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb79414c15c6f03f56cc68fa06994f047cf20207c31b5dad3f6bab54a0f66ef"}, + {file = "coverage-7.4.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf89ab85027427d351f1de918aff4b43f4eb5f33aff6835ed30322a86ac29c9e"}, + {file = "coverage-7.4.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a178b7b1ac0f1530bb28d2e51f88c0bab3e5949835851a60dda80bff6052510c"}, + {file = "coverage-7.4.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:06fe398145a2e91edaf1ab4eee66149c6776c6b25b136f4a86fcbbb09512fd10"}, + {file = "coverage-7.4.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:18cac867950943fe93d6cd56a67eb7dcd2d4a781a40f4c1e25d6f1ed98721a55"}, + {file = "coverage-7.4.2-cp311-cp311-win32.whl", hash = "sha256:f72cdd2586f9a769570d4b5714a3837b3a59a53b096bb954f1811f6a0afad305"}, + {file = "coverage-7.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:d779a48fac416387dd5673fc5b2d6bd903ed903faaa3247dc1865c65eaa5a93e"}, + {file = "coverage-7.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:adbdfcda2469d188d79771d5696dc54fab98a16d2ef7e0875013b5f56a251047"}, + {file = "coverage-7.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ac4bab32f396b03ebecfcf2971668da9275b3bb5f81b3b6ba96622f4ef3f6e17"}, + {file = "coverage-7.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:006d220ba2e1a45f1de083d5022d4955abb0aedd78904cd5a779b955b019ec73"}, + {file = "coverage-7.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3733545eb294e5ad274abe131d1e7e7de4ba17a144505c12feca48803fea5f64"}, + {file = "coverage-7.4.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42a9e754aa250fe61f0f99986399cec086d7e7a01dd82fd863a20af34cbce962"}, + {file = "coverage-7.4.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2ed37e16cf35c8d6e0b430254574b8edd242a367a1b1531bd1adc99c6a5e00fe"}, + {file = "coverage-7.4.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b953275d4edfab6cc0ed7139fa773dfb89e81fee1569a932f6020ce7c6da0e8f"}, + {file = "coverage-7.4.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32b4ab7e6c924f945cbae5392832e93e4ceb81483fd6dc4aa8fb1a97b9d3e0e1"}, + {file = "coverage-7.4.2-cp312-cp312-win32.whl", hash = "sha256:f5df76c58977bc35a49515b2fbba84a1d952ff0ec784a4070334dfbec28a2def"}, + {file = "coverage-7.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:34423abbaad70fea9d0164add189eabaea679068ebdf693baa5c02d03e7db244"}, + {file = "coverage-7.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5b11f9c6587668e495cc7365f85c93bed34c3a81f9f08b0920b87a89acc13469"}, + {file = "coverage-7.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:51593a1f05c39332f623d64d910445fdec3d2ac2d96b37ce7f331882d5678ddf"}, + {file = "coverage-7.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69f1665165ba2fe7614e2f0c1aed71e14d83510bf67e2ee13df467d1c08bf1e8"}, + {file = "coverage-7.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3c8bbb95a699c80a167478478efe5e09ad31680931ec280bf2087905e3b95ec"}, + {file = "coverage-7.4.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:175f56572f25e1e1201d2b3e07b71ca4d201bf0b9cb8fad3f1dfae6a4188de86"}, + {file = "coverage-7.4.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8562ca91e8c40864942615b1d0b12289d3e745e6b2da901d133f52f2d510a1e3"}, + {file = "coverage-7.4.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d9a1ef0f173e1a19738f154fb3644f90d0ada56fe6c9b422f992b04266c55d5a"}, + {file = "coverage-7.4.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f40ac873045db4fd98a6f40387d242bde2708a3f8167bd967ccd43ad46394ba2"}, + {file = "coverage-7.4.2-cp38-cp38-win32.whl", hash = "sha256:d1b750a8409bec61caa7824bfd64a8074b6d2d420433f64c161a8335796c7c6b"}, + {file = "coverage-7.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b4ae777bebaed89e3a7e80c4a03fac434a98a8abb5251b2a957d38fe3fd30088"}, + {file = "coverage-7.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3ff7f92ae5a456101ca8f48387fd3c56eb96353588e686286f50633a611afc95"}, + {file = "coverage-7.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:861d75402269ffda0b33af94694b8e0703563116b04c681b1832903fac8fd647"}, + {file = "coverage-7.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3507427d83fa961cbd73f11140f4a5ce84208d31756f7238d6257b2d3d868405"}, + {file = "coverage-7.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf711d517e21fb5bc429f5c4308fbc430a8585ff2a43e88540264ae87871e36a"}, + {file = "coverage-7.4.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c00e54f0bd258ab25e7f731ca1d5144b0bf7bec0051abccd2bdcff65fa3262c9"}, + {file = "coverage-7.4.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f8e845d894e39fb53834da826078f6dc1a933b32b1478cf437007367efaf6f6a"}, + {file = "coverage-7.4.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:840456cb1067dc350af9080298c7c2cfdddcedc1cb1e0b30dceecdaf7be1a2d3"}, + {file = "coverage-7.4.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c11ca2df2206a4e3e4c4567f52594637392ed05d7c7fb73b4ea1c658ba560265"}, + {file = "coverage-7.4.2-cp39-cp39-win32.whl", hash = "sha256:3ff5bdb08d8938d336ce4088ca1a1e4b6c8cd3bef8bb3a4c0eb2f37406e49643"}, + {file = "coverage-7.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:ac9e95cefcf044c98d4e2c829cd0669918585755dd9a92e28a1a7012322d0a95"}, + {file = "coverage-7.4.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:f593a4a90118d99014517c2679e04a4ef5aee2d81aa05c26c734d271065efcb6"}, + {file = "coverage-7.4.2.tar.gz", hash = "sha256:1a5ee18e3a8d766075ce9314ed1cb695414bae67df6a4b0805f5137d93d6f1cb"}, ] [package.extras] @@ -408,13 +408,13 @@ ipython = {version = ">=7.31.1", markers = "python_version >= \"3.11\""} [[package]] name = "ipython" -version = "8.21.0" +version = "8.22.1" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.21.0-py3-none-any.whl", hash = "sha256:1050a3ab8473488d7eee163796b02e511d0735cf43a04ba2a8348bd0f2eaf8a5"}, - {file = "ipython-8.21.0.tar.gz", hash = "sha256:48fbc236fbe0e138b88773fa0437751f14c3645fb483f1d4c5dee58b37e5ce73"}, + {file = "ipython-8.22.1-py3-none-any.whl", hash = "sha256:869335e8cded62ffb6fac8928e5287a05433d6462e3ebaac25f4216474dd6bc4"}, + {file = "ipython-8.22.1.tar.gz", hash = "sha256:39c6f9efc079fb19bfb0f17eee903978fe9a290b1b82d68196c641cecb76ea22"}, ] [package.dependencies] @@ -422,16 +422,16 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} decorator = "*" jedi = ">=0.16" matplotlib-inline = "*" -pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} +pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} prompt-toolkit = ">=3.0.41,<3.1.0" pygments = ">=2.4.0" stack-data = "*" -traitlets = ">=5" +traitlets = ">=5.13.0" [package.extras] -all = ["black", "curio", "docrepr", "exceptiongroup", "ipykernel", "ipyparallel", "ipywidgets", "matplotlib", "matplotlib (!=3.2.0)", "nbconvert", "nbformat", "notebook", "numpy (>=1.23)", "pandas", "pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "qtconsole", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "trio", "typing-extensions"] +all = ["ipython[black,doc,kernel,nbconvert,nbformat,notebook,parallel,qtconsole,terminal]", "ipython[test,test-extra]"] black = ["black"] -doc = ["docrepr", "exceptiongroup", "ipykernel", "matplotlib", "pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "typing-extensions"] +doc = ["docrepr", "exceptiongroup", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "stack-data", "typing-extensions"] kernel = ["ipykernel"] nbconvert = ["nbconvert"] nbformat = ["nbformat"] @@ -439,7 +439,7 @@ notebook = ["ipywidgets", "notebook"] parallel = ["ipyparallel"] qtconsole = ["qtconsole"] test = ["pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "testpath"] -test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "testpath", "trio"] +test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"] [[package]] name = "jedi" @@ -789,13 +789,13 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "pandas-stubs" -version = "2.1.4.231227" +version = "2.2.0.240218" description = "Type annotations for pandas" optional = false python-versions = ">=3.9" files = [ - {file = "pandas_stubs-2.1.4.231227-py3-none-any.whl", hash = "sha256:211fc23e6ae87073bdf41dbf362c4a4d85e1e3477cb078dbac3da6c7fdaefba8"}, - {file = "pandas_stubs-2.1.4.231227.tar.gz", hash = "sha256:3ea29ef001e9e44985f5ebde02d4413f94891ef6ec7e5056fb07d125be796c23"}, + {file = "pandas_stubs-2.2.0.240218-py3-none-any.whl", hash = "sha256:e97478320add9b958391b15a56c5f1bf29da656d5b747d28bbe708454b3a1fe6"}, + {file = "pandas_stubs-2.2.0.240218.tar.gz", hash = "sha256:63138c12eec715d66d48611bdd922f31cd7c78bcadd19384c3bd61fd3720a11a"}, ] [package.dependencies] @@ -911,6 +911,54 @@ files = [ [package.extras] tests = ["pytest"] +[[package]] +name = "pyarrow" +version = "15.0.0" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"}, + {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"}, + {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"}, + {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"}, + {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"}, + {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"}, + {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"}, + {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"}, + {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"}, + {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"}, + {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"}, + {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"}, + {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"}, + {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"}, + {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"}, + {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"}, +] + +[package.dependencies] +numpy = ">=1.16.6,<2" + [[package]] name = "pyasn1" version = "0.5.1" @@ -1034,19 +1082,23 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" [[package]] name = "pydantic-settings" -version = "2.1.0" +version = "2.2.1" description = "Settings management using Pydantic" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_settings-2.1.0-py3-none-any.whl", hash = "sha256:7621c0cb5d90d1140d2f0ef557bdf03573aac7035948109adf2574770b77605a"}, - {file = "pydantic_settings-2.1.0.tar.gz", hash = "sha256:26b1492e0a24755626ac5e6d715e9077ab7ad4fb5f19a8b7ed7011d52f36141c"}, + {file = "pydantic_settings-2.2.1-py3-none-any.whl", hash = "sha256:0235391d26db4d2190cb9b31051c4b46882d28a51533f97440867f012d4da091"}, + {file = "pydantic_settings-2.2.1.tar.gz", hash = "sha256:00b9f6a5e95553590434c0fa01ead0b216c3e10bc54ae02e37f359948643c5ed"}, ] [package.dependencies] pydantic = ">=2.3.0" python-dotenv = ">=0.21.0" +[package.extras] +toml = ["tomli (>=2.0.1)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "pygments" version = "2.17.2" @@ -1064,13 +1116,13 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pytest" -version = "8.0.0" +version = "8.0.1" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.0.0-py3-none-any.whl", hash = "sha256:50fb9cbe836c3f20f0dfa99c565201fb75dc54c8d76373cd1bde06b06657bdb6"}, - {file = "pytest-8.0.0.tar.gz", hash = "sha256:249b1b0864530ba251b7438274c4d251c58d868edaaec8762893ad4a0d71c36c"}, + {file = "pytest-8.0.1-py3-none-any.whl", hash = "sha256:3e4f16fe1c0a9dc9d9389161c127c3edc5d810c38d6793042fb81d9f48a59fca"}, + {file = "pytest-8.0.1.tar.gz", hash = "sha256:267f6563751877d772019b13aacbe4e860d73fe8f651f28112e9ac37de7513ae"}, ] [package.dependencies] @@ -1196,28 +1248,28 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "ruff" -version = "0.2.1" +version = "0.2.2" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.2.1-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:dd81b911d28925e7e8b323e8d06951554655021df8dd4ac3045d7212ac4ba080"}, - {file = "ruff-0.2.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:dc586724a95b7d980aa17f671e173df00f0a2eef23f8babbeee663229a938fec"}, - {file = "ruff-0.2.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c92db7101ef5bfc18e96777ed7bc7c822d545fa5977e90a585accac43d22f18a"}, - {file = "ruff-0.2.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:13471684694d41ae0f1e8e3a7497e14cd57ccb7dd72ae08d56a159d6c9c3e30e"}, - {file = "ruff-0.2.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a11567e20ea39d1f51aebd778685582d4c56ccb082c1161ffc10f79bebe6df35"}, - {file = "ruff-0.2.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:00a818e2db63659570403e44383ab03c529c2b9678ba4ba6c105af7854008105"}, - {file = "ruff-0.2.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be60592f9d218b52f03384d1325efa9d3b41e4c4d55ea022cd548547cc42cd2b"}, - {file = "ruff-0.2.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbd2288890b88e8aab4499e55148805b58ec711053588cc2f0196a44f6e3d855"}, - {file = "ruff-0.2.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ef052283da7dec1987bba8d8733051c2325654641dfe5877a4022108098683"}, - {file = "ruff-0.2.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:7022d66366d6fded4ba3889f73cd791c2d5621b2ccf34befc752cb0df70f5fad"}, - {file = "ruff-0.2.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0a725823cb2a3f08ee743a534cb6935727d9e47409e4ad72c10a3faf042ad5ba"}, - {file = "ruff-0.2.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0034d5b6323e6e8fe91b2a1e55b02d92d0b582d2953a2b37a67a2d7dedbb7acc"}, - {file = "ruff-0.2.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e5cb5526d69bb9143c2e4d2a115d08ffca3d8e0fddc84925a7b54931c96f5c02"}, - {file = "ruff-0.2.1-py3-none-win32.whl", hash = "sha256:6b95ac9ce49b4fb390634d46d6ece32ace3acdd52814671ccaf20b7f60adb232"}, - {file = "ruff-0.2.1-py3-none-win_amd64.whl", hash = "sha256:e3affdcbc2afb6f5bd0eb3130139ceedc5e3f28d206fe49f63073cb9e65988e0"}, - {file = "ruff-0.2.1-py3-none-win_arm64.whl", hash = "sha256:efababa8e12330aa94a53e90a81eb6e2d55f348bc2e71adbf17d9cad23c03ee6"}, - {file = "ruff-0.2.1.tar.gz", hash = "sha256:3b42b5d8677cd0c72b99fcaf068ffc62abb5a19e71b4a3b9cfa50658a0af02f1"}, + {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0a9efb032855ffb3c21f6405751d5e147b0c6b631e3ca3f6b20f917572b97eb6"}, + {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d450b7fbff85913f866a5384d8912710936e2b96da74541c82c1b458472ddb39"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecd46e3106850a5c26aee114e562c329f9a1fbe9e4821b008c4404f64ff9ce73"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e22676a5b875bd72acd3d11d5fa9075d3a5f53b877fe7b4793e4673499318ba"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1695700d1e25a99d28f7a1636d85bafcc5030bba9d0578c0781ba1790dbcf51c"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:b0c232af3d0bd8f521806223723456ffebf8e323bd1e4e82b0befb20ba18388e"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f63d96494eeec2fc70d909393bcd76c69f35334cdbd9e20d089fb3f0640216ca"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a61ea0ff048e06de273b2e45bd72629f470f5da8f71daf09fe481278b175001"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1439c8f407e4f356470e54cdecdca1bd5439a0673792dbe34a2b0a551a2fe3"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:940de32dc8853eba0f67f7198b3e79bc6ba95c2edbfdfac2144c8235114d6726"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0c126da55c38dd917621552ab430213bdb3273bb10ddb67bc4b761989210eb6e"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3b65494f7e4bed2e74110dac1f0d17dc8e1f42faaa784e7c58a98e335ec83d7e"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1ec49be4fe6ddac0503833f3ed8930528e26d1e60ad35c2446da372d16651ce9"}, + {file = "ruff-0.2.2-py3-none-win32.whl", hash = "sha256:d920499b576f6c68295bc04e7b17b6544d9d05f196bb3aac4358792ef6f34325"}, + {file = "ruff-0.2.2-py3-none-win_amd64.whl", hash = "sha256:cc9a91ae137d687f43a44c900e5d95e9617cb37d4c989e462980ba27039d239d"}, + {file = "ruff-0.2.2-py3-none-win_arm64.whl", hash = "sha256:c9d15fc41e6054bfc7200478720570078f0b41c9ae4f010bcc16bd6f4d1aacdd"}, + {file = "ruff-0.2.2.tar.gz", hash = "sha256:e62ed7f36b3068a30ba39193a14274cd706bc486fad521276458022f7bccb31d"}, ] [[package]] @@ -1442,13 +1494,13 @@ files = [ [[package]] name = "types-requests" -version = "2.31.0.20240125" +version = "2.31.0.20240218" description = "Typing stubs for requests" optional = false python-versions = ">=3.8" files = [ - {file = "types-requests-2.31.0.20240125.tar.gz", hash = "sha256:03a28ce1d7cd54199148e043b2079cdded22d6795d19a2c2a6791a4b2b5e2eb5"}, - {file = "types_requests-2.31.0.20240125-py3-none-any.whl", hash = "sha256:9592a9a4cb92d6d75d9b491a41477272b710e021011a2a3061157e2fb1f1a5d1"}, + {file = "types-requests-2.31.0.20240218.tar.gz", hash = "sha256:f1721dba8385958f504a5386240b92de4734e047a08a40751c1654d1ac3349c5"}, + {file = "types_requests-2.31.0.20240218-py3-none-any.whl", hash = "sha256:a82807ec6ddce8f00fe0e949da6d6bc1fbf1715420218a9640d695f70a9e5a9b"}, ] [package.dependencies] @@ -1489,13 +1541,13 @@ files = [ [[package]] name = "urllib3" -version = "2.2.0" +version = "2.2.1" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" files = [ - {file = "urllib3-2.2.0-py3-none-any.whl", hash = "sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224"}, - {file = "urllib3-2.2.0.tar.gz", hash = "sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20"}, + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, ] [package.extras] @@ -1518,4 +1570,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "97d2490300dbf924f1c2f6ea7298593b470de8d434605ef15d28351b459794b0" +content-hash = "c2a795d0d018d91ec0cce8f1c00ba3a46c2e5b29ec3f8978e9d0363ed49ce71d" diff --git a/pyproject.toml b/pyproject.toml index 0bac2552..9374344c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mex-common" -version = "0.19.4" +version = "0.20.0" description = "Common library for MEx python projects." authors = ["RKI MEx Team "] readme = "README.md" @@ -15,13 +15,14 @@ click = "^8.1.7" langdetect = "^1.0.9" ldap3 = "^2.9.1" numpy = "^1.26.4" -pandas = "^2.1.4" +pandas = "^2.2.0" +pyarrow = "^15.0.0" pydantic = "^2.6.1" pydantic-settings = "^2.1.0" requests = "^2.31.0" [tool.poetry.group.dev.dependencies] -black = "^24.1.1" +black = "^24.2.0" ipdb = "^0.13.13" mex-model = { git = "https://github.com/robert-koch-institut/mex-model.git", rev = "2.2.0" } mypy = "^1.8.0" @@ -30,7 +31,7 @@ pytest = "^8.0.0" pytest-cov = "^4.1.0" pytest-random-order = "^1.1.1" pytest-xdist = "^3.5.0" -ruff = "^0.2.0" +ruff = "^0.2.1" sphinx = "^7.2.6" types-ldap3 = "^2.9.13" types-pytz = "^2024.1.0" @@ -66,11 +67,15 @@ addopts = [ "--cov-fail-under=95", "--cov-branch", "--pdbcls=IPython.terminal.debugger:TerminalPdb", + "--random-order-bucket=global", ] markers = "integration: mark a test as integration test" [tool.ruff] fix = true +show-fixes = true + +[tool.ruff.lint] ignore = [ "D100", # Allow missing module docstring for brevity "D104", # Allow missing package docstring for brevity @@ -85,24 +90,26 @@ ignore = [ "RUF012", # Allow mutable class attributes (pydantic compat) ] select = [ - "C90", # McCabe complexity checker - "D", # Python docstring style checker - "E", # Python code style errors - "ERA", # Commented-out code detector - "F", # Pyflakes passive python checker - "I", # Isort import utility - "N", # Pep8 naming conventions - "RET", # Flake8 return statement checker - "RUF", # Ruff-specific rules - "S", # Bandit automated security testing - "T10", # Flake8 debug statement checker - "T20", # Flake8 print statement checker - "W", # Python code style warnings + "A", # Flake8 builtin shaddow + "B", # BugBear bug and issue finder + "C90", # McCabe complexity checker + "D", # Python docstring style checker + "E", # Python code style errors + "ERA", # Commented-out code detector + "F", # Pyflakes passive python checker + "I", # Isort import utility + "N", # Pep8 naming conventions + "PERF", # Lint performance anti-patterns + "RET", # Flake8 return statement checker + "RUF", # Ruff-specific rules + "S", # Bandit automated security testing + "T10", # Flake8 debug statement checker + "T20", # Flake8 print statement checker + "UP", # PyUpgrade syntax recommender + "W", # Python code style warnings ] -show-fixes = true -target-version = "py311" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "tests/**" = [ "D101", # Allow missing docstring in public class for tests "D102", # Allow missing docstring in public method for tests @@ -113,10 +120,10 @@ target-version = "py311" "S101", # Allow use of `assert` in tests ] -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["mex", "tests"] -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "google" [build-system] diff --git a/tests/identity/test_registry.py b/tests/identity/test_registry.py index 09116247..bb16f869 100644 --- a/tests/identity/test_registry.py +++ b/tests/identity/test_registry.py @@ -23,10 +23,10 @@ class DummyProvider(BaseProvider): def __init__(self) -> None: pass - def assign(self, *_: str) -> Identity: + def assign(self, *_: str) -> Identity: # pragma: no cover raise RuntimeError() - def fetch(self, **_: str | None) -> list[Identity]: + def fetch(self, **_: str | None) -> list[Identity]: # pragma: no cover raise RuntimeError() def close(self) -> None: diff --git a/tests/models/test_base.py b/tests/models/test_base.py index bff6d295..3f7877b0 100644 --- a/tests/models/test_base.py +++ b/tests/models/test_base.py @@ -16,7 +16,7 @@ class ComplexDummyModel(BaseModel): required_list: list[str] = [] -def test__get_field_names_allowing_none() -> None: +def test_get_field_names_allowing_none() -> None: assert ComplexDummyModel._get_field_names_allowing_none() == [ "optional_str", "optional_list", diff --git a/tests/public_api/__init__.py b/tests/public_api/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/public_api/conftest.py b/tests/public_api/conftest.py deleted file mode 100644 index cff689fb..00000000 --- a/tests/public_api/conftest.py +++ /dev/null @@ -1,73 +0,0 @@ -from unittest.mock import MagicMock, Mock - -import pytest -import requests -from pytest import MonkeyPatch - -from mex.common.public_api.connector import PublicApiConnector -from mex.common.public_api.models import PublicApiMetadataItemsResponse - - -@pytest.fixture -def mocked_api_session(monkeypatch: MonkeyPatch) -> MagicMock: - """Mock the PublicApiConnector with a MagicMock session and return that.""" - mocked_session = MagicMock(spec=requests.Session, name="public_api_session") - mocked_session.request = MagicMock( - return_value=Mock(spec=requests.Response, status_code=200) - ) - - def set_mocked_session(self: PublicApiConnector) -> None: - self.session = mocked_session - - monkeypatch.setattr(PublicApiConnector, "_set_session", set_mocked_session) - monkeypatch.setattr(PublicApiConnector, "wait_for_job", MagicMock()) - return mocked_session - - -@pytest.fixture -def mocked_api_session_authenticated(mocked_api_session: MagicMock) -> MagicMock: - """Get the authenticated MagicMock session.""" - mocked_api_session.post = mocked_post = MagicMock( - return_value=Mock(spec=requests.Response), - ) - mocked_api_session.headers = {} - mocked_post.return_value.json = MagicMock( - return_value={ - "access_token": "expected-jwt", - "expires_in": 300, - "token_type": "Bearer", - }, - ) - return mocked_api_session - - -@pytest.fixture -def mex_metadata_items_response() -> PublicApiMetadataItemsResponse: - """Return a dummy PublicApiMetadataItemsResponse for testing purposes.""" - return PublicApiMetadataItemsResponse.model_validate( - { - "items": [ - { - "itemId": "00005da9-f653-4c9c-b123-7b555d36b0fd", - "businessId": "bgmAz9QJ7IaHGNyMamwhUx", - "entityType": "Datum", - }, - { - "itemId": "000054a2-b16a-4f4e-82d2-1a222dba41e6", - "businessId": "hLRKpjTpCS06BniW1l2NcU", - "entityType": "ExtractedDatum", - }, - { - "itemId": "000054a2-b16a-4f4e-82d2-1a222fba41e6", - "businessId": "g5MAfZYmivhK1I2voBK5bO", - "entityType": "ExtractedPerson", - }, - { - "itemId": "000054a3-b16a-4f4e-82d2-1a222fbc41e6", - "businessId": "cOfkBGYSeIjcKCdDZJQ0yk", - "entityType": "Person", - }, - ], - "next": "", - } - ) diff --git a/tests/public_api/test_connector.py b/tests/public_api/test_connector.py deleted file mode 100644 index 50d6f7e7..00000000 --- a/tests/public_api/test_connector.py +++ /dev/null @@ -1,336 +0,0 @@ -import json -from base64 import b64decode -from unittest.mock import MagicMock, Mock -from uuid import UUID - -import pytest -import requests -from requests import HTTPError - -from mex.common.models import ExtractedActivity, ExtractedPerson -from mex.common.models.activity import ActivityType -from mex.common.public_api.connector import PublicApiConnector -from mex.common.public_api.models import PublicApiMetadataItemsResponse -from mex.common.settings import BaseSettings -from mex.common.types import Identifier, Link, Text, Timestamp - - -def test_authenticate_mocked(mocked_api_session: MagicMock) -> None: - settings = BaseSettings.get() - - mocked_api_session.post = mocked_post = MagicMock( - return_value=Mock(spec=requests.Response), - ) - mocked_api_session.headers = {} - mocked_post.return_value.json = MagicMock( - return_value={ - "access_token": "expected-jwt", - "expires_in": 300, - "token_type": "Bearer", - }, - ) - - connector = PublicApiConnector.get() - - mocked_post.assert_called_once_with( - str(settings.public_api_token_provider), - data=b64decode(settings.public_api_token_payload.get_secret_value()), - timeout=PublicApiConnector.TIMEOUT, - headers={"Accept": "*/*", "Authorization": None}, - ) - assert connector.session.headers["Authorization"] == "Bearer expected-jwt" - - -def test_post_models_mocked( - extracted_person: ExtractedPerson, mocked_api_session_authenticated: MagicMock -) -> None: - expected_payload = { - "items": [ - { - "entityType": "ExtractedPerson", - "values": [ - { - "fieldName": "affiliation", - "fieldValue": "bFQoRhcVH5DHZg", - "language": None, - }, - { - "fieldName": "email", - "fieldValue": "TintzmannM@rki.de", - "language": None, - }, - { - "fieldName": "entityType", - "fieldValue": "ExtractedPerson", - "language": None, - }, - { - "fieldName": "familyName", - "fieldValue": "Tintzmann", - "language": None, - }, - { - "fieldName": "fullName", - "fieldValue": "Meinrad I. Tintzmann", - "language": None, - }, - { - "fieldName": "givenName", - "fieldValue": "Meinrad", - "language": None, - }, - { - "fieldName": "hadPrimarySource", - "fieldValue": "bFQoRhcVH5DHXE", - "language": None, - }, - { - "fieldName": "identifier", - "fieldValue": "bFQoRhcVH5DH3i", - "language": None, - }, - { - "fieldName": "identifierInPrimarySource", - "fieldValue": "00000000-0000-4000-8000-0000000003de", - "language": None, - }, - { - "fieldName": "isniId", - "fieldValue": "https://isni.org/isni/0000000109403744", - "language": None, - }, - { - "fieldName": "memberOf", - "fieldValue": "bFQoRhcVH5DHV2", - "language": None, - }, - { - "fieldName": "memberOf", - "fieldValue": "bFQoRhcVH5DHV3", - "language": None, - }, - { - "fieldName": "orcidId", - "fieldValue": "https://orcid.org/0000-0002-9079-593X", - "language": None, - }, - { - "fieldName": "stableTargetId", - "fieldValue": "bFQoRhcVH5DH8y", - "language": None, - }, - ], - } - ] - } - - mocked_response = Mock(spec=requests.Response) - mocked_response.status_code = 201 - mocked_response.json = MagicMock(return_value={"jobId": "000332211bbb"}) - mocked_api_session_authenticated.request = MagicMock(return_value=mocked_response) - - connector = PublicApiConnector.get() - connector.post_models([extracted_person], wait_for_done=False) - payload = json.loads( - mocked_api_session_authenticated.request.call_args.kwargs["data"] - ) - - assert payload == expected_payload - - -@pytest.mark.skip(reason="public api is being deprecated") -@pytest.mark.integration -def test_search_model_that_does_not_exist() -> None: - random_id = Identifier.generate() - connector = PublicApiConnector.get() - - result = None - try: - result = connector.search_model(ExtractedActivity, random_id) - except HTTPError as error: - if error.response.json().get("message") == "could not create Solr query": - pytest.skip("integration test failed due to misconfiguration") - else: - raise error - - assert result is None - - -def test_search_model_mocked(mocked_api_session_authenticated: MagicMock) -> None: - item_id = UUID("00000000-0000-4000-8000-111111110999") - - activity = ExtractedActivity( - abstract=[ - Text(value="Dies ist ein deutscher Text."), - Text(value="And this is in english."), - ], - activityType=[ActivityType["SPECIAL_RESEARCH_PROJECT"]], - alternativeTitle=[Text(value="ᵗʰᵉ ˡᵃⁿᵍᵘᵃᵍᵉ ᵒᶠ ᵗʰᶦˢ ᵗᵉˣᵗ ᶦˢ ʰᵃʳᵈ ᵗᵒ ᵈᵉᵗᵉᶜᵗ")], - contact=[Identifier("00000000000590")], - documentation=[Link(url="https://docs.vsli.example.org/en/index.html")], - end=Timestamp("1970-06-16T16:20:00"), - externalAssociate=[Identifier("000000000008a1")], - funderOrCommissioner=[Identifier("00000000000be4")], - fundingProgram=[], - responsibleUnit=[ - Identifier("00000000000bf5"), - Identifier("00000000000bf6"), - ], - title=[Text(value="Aperiam debitis similique magnam ipsum neo.")], - identifierInPrimarySource="activity-1", - hadPrimarySource=Identifier("00000000100445"), - ) - - mocked_response = Mock(spec=requests.Response) - mocked_response.status_code = 200 - mocked_response.json = MagicMock( - return_value={ - "numFound": 1, - "items": [ - { - "itemId": item_id, - "entityType": "ExtractedActivity", - "businessId": "vhK1I2voBK5bO12MAfZYmi", - "values": [ - { - "fieldName": "abstract", - "fieldValue": "Dies ist ein deutscher Text.", - "language": "de", - }, - { - "fieldName": "abstract", - "fieldValue": "And this is in english.", - "language": "en", - }, - { - "fieldName": "activityType", - "fieldValue": "https://mex.rki.de/item/activity-type-5", - "language": None, - }, - { - "fieldName": "alternativeTitle", - "fieldValue": "ᵗʰᵉ ˡᵃⁿᵍᵘᵃᵍᵉ ᵒᶠ ᵗʰᶦˢ ᵗᵉˣᵗ ᶦˢ ʰᵃʳᵈ ᵗᵒ ᵈᵉᵗᵉᶜᵗ", - "language": None, - }, - { - "fieldName": "contact", - "fieldValue": Identifier("00000000000590"), - "language": None, - }, - { - "fieldName": "documentation", - "fieldValue": "https://docs.vsli.example.org/en/index.html", - "language": None, - }, - { - "fieldName": "end", - "fieldValue": Timestamp("1970-06-16T15:20:00Z"), - "language": None, - }, - { - "fieldName": "externalAssociate", - "fieldValue": Identifier("000000000008a1"), - "language": None, - }, - { - "fieldName": "funderOrCommissioner", - "fieldValue": Identifier("00000000000be4"), - "language": None, - }, - { - "fieldName": "hadPrimarySource", - "fieldValue": activity.hadPrimarySource, - "language": None, - }, - { - "fieldName": "identifier", - "fieldValue": activity.identifier, - "language": None, - }, - { - "fieldName": "identifierInPrimarySource", - "fieldValue": activity.identifierInPrimarySource, - "language": None, - }, - { - "fieldName": "responsibleUnit", - "fieldValue": Identifier("00000000000bf5"), - "language": None, - }, - { - "fieldName": "responsibleUnit", - "fieldValue": Identifier("00000000000bf6"), - "language": None, - }, - { - "fieldName": "stableTargetId", - "fieldValue": activity.stableTargetId, - "language": None, - }, - { - "fieldName": "title", - "fieldValue": "Aperiam debitis similique magnam ipsum neo.", - "language": None, - }, - ], - } - ], - } - ) - mocked_api_session_authenticated.request = MagicMock(return_value=mocked_response) - - connector = PublicApiConnector.get() - model = connector.search_model(ExtractedActivity, item_id) - - assert model is not None - assert model == activity - - -def test_get_all_items_mocked( - mocked_api_session_authenticated: MagicMock, - mex_metadata_items_response: PublicApiMetadataItemsResponse, -) -> None: - response_data = { - "items": [ - { - "itemId": "00005da9-f653-4c9c-b123-7b555d36b0fd", - "businessId": "bgmAz9QJ7IaHGNyMamwhUx", - "entityType": "Datum", - }, - { - "itemId": "000054a2-b16a-4f4e-82d2-1a222dba41e6", - "businessId": "hLRKpjTpCS06BniW1l2NcU", - "entityType": "ExtractedDatum", - }, - { - "itemId": "000054a2-b16a-4f4e-82d2-1a222fba41e6", - "businessId": "g5MAfZYmivhK1I2voBK5bO", - "entityType": "ExtractedPerson", - }, - { - "itemId": "000054a3-b16a-4f4e-82d2-1a222fbc41e6", - "businessId": "cOfkBGYSeIjcKCdDZJQ0yk", - "entityType": "Person", - }, - ], - "next": "", - } - mocked_response = Mock(spec=requests.Response) - mocked_response.status_code = 200 - mocked_response.json = MagicMock(return_value=response_data) - mocked_api_session_authenticated.request = MagicMock(return_value=mocked_response) - connector = PublicApiConnector.get() - items = connector.get_all_items() - assert items == mex_metadata_items_response - - -@pytest.mark.skip(reason="public api is being deprecated") -@pytest.mark.integration -def test_get_all_items() -> None: - connector = PublicApiConnector.get() - initial_items = connector.get_all_items() - - # check for correct first item on second page only if there is a second page - if next_item_id := initial_items.next: - next_items = connector.get_all_items(offset_item_id=next_item_id) - assert next_items.items[0].itemId == next_item_id diff --git a/tests/public_api/test_extract.py b/tests/public_api/test_extract.py deleted file mode 100644 index 39b134b5..00000000 --- a/tests/public_api/test_extract.py +++ /dev/null @@ -1,56 +0,0 @@ -from unittest.mock import MagicMock -from uuid import UUID - -import pytest -from pytest import MonkeyPatch - -from mex.common.exceptions import MExError -from mex.common.public_api.connector import PublicApiConnector -from mex.common.public_api.extract import extract_mex_person_items -from mex.common.public_api.models import PublicApiMetadataItemsResponse - - -@pytest.mark.skip(reason="public api is being deprecated") -@pytest.mark.integration -def test_extract_mex_person_items() -> None: - mex_persons = list(extract_mex_person_items()) - assert all(p.entityType in ["Person", "ExtractedPerson"] for p in mex_persons) - - -def test_extract_mex_person_items_mocked( - mex_metadata_items_response: PublicApiMetadataItemsResponse, - monkeypatch: MonkeyPatch, -) -> None: - def __init__(self: PublicApiConnector) -> None: - self.session = MagicMock() - - monkeypatch.setattr(PublicApiConnector, "__init__", __init__) - - mex_metadata_items_response_with_next = mex_metadata_items_response.model_copy() - mex_metadata_items_response_with_next.next = UUID( - "3fcce11e80e920b410efd0c919001a31" - ) - get_all_items = MagicMock( - side_effect=[mex_metadata_items_response_with_next, mex_metadata_items_response] - ) - monkeypatch.setattr(PublicApiConnector, "get_all_items", get_all_items) - - mex_persons = list(extract_mex_person_items()) - assert mex_persons == mex_metadata_items_response.items[2:4] * 2 - - -def test_extract_mex_person_items_mocked_limit_reached( - mex_metadata_items_response: PublicApiMetadataItemsResponse, - monkeypatch: MonkeyPatch, -) -> None: - def __init__(self: PublicApiConnector) -> None: - self.session = MagicMock() - - monkeypatch.setattr(PublicApiConnector, "__init__", __init__) - - mex_metadata_items_response.next = UUID("3fcce11e80e920b410efd0c919001a31") - get_all_items = MagicMock(side_effect=[mex_metadata_items_response] * 101) - monkeypatch.setattr(PublicApiConnector, "get_all_items", get_all_items) - - with pytest.raises(MExError): - list(extract_mex_person_items()) diff --git a/tests/public_api/test_transform.py b/tests/public_api/test_transform.py deleted file mode 100644 index c952288f..00000000 --- a/tests/public_api/test_transform.py +++ /dev/null @@ -1,173 +0,0 @@ -from typing import Any - -import pytest -from pytest import MonkeyPatch - -from mex.common.models import EXTRACTED_MODEL_CLASSES_BY_NAME, MExModel -from mex.common.public_api.models import PublicApiItem -from mex.common.public_api.transform import ( - _is_type, - transform_mex_model_to_public_api_item, - transform_public_api_item_to_mex_model, -) -from mex.common.types import ( - Identifier, - Link, - LinkLanguage, - OrganizationID, - PersonID, - Text, - TextLanguage, - Timestamp, -) - - -class DummyModel(MExModel): - stableTargetId: Identifier - optional: str | None = None - oneString: str - manyStrings: list[str] - oneText: Text - manyTexts: list[Text] - oneLink: Link - manyLinks: list[Link] - reference: Identifier - manyReferences: list[PersonID | OrganizationID] - timestamp: Timestamp - - -@pytest.fixture -def raw_mex_model() -> dict[str, Any]: - return { - "identifier": Identifier("0000000000046f"), - "stableTargetId": Identifier("00000000000fds"), - "manyLinks": [ - { - "title": "Example PDF", - "url": "file:///C:/Users/John%20Doe/example.pdf", - }, - {"language": LinkLanguage.DE, "url": "https://foo-bar-beispiel.de"}, - ], - "manyReferences": [ - Identifier("00000000001eac"), - Identifier("00000000001ead"), - ], - "manyStrings": ["red", "blue"], - "manyTexts": [ - {"value": "El burro patea."}, - {"language": TextLanguage.DE, "value": "Der Fuchs springt."}, - ], - "oneLink": {"language": LinkLanguage.EN, "url": "https://www.example.com"}, - "oneString": "grün", - "oneText": {"language": TextLanguage.EN, "value": "The lion sleeps."}, - "reference": Identifier("00000000001eab"), - "timestamp": Timestamp("2010-12-24T22:00"), - } - - -@pytest.fixture -def raw_api_item() -> dict[str, Any]: - return { - "entityType": "DummyModel", - "values": [ - { - "fieldName": "identifier", - "fieldValue": Identifier("0000000000046f"), - }, - { - "fieldName": "manyLinks", - "fieldValue": "[Example PDF](file:///C:/Users/John%20Doe/example\\.pdf)", - }, - { - "fieldName": "manyLinks", - "fieldValue": "https://foo-bar-beispiel.de", - "language": "de", - }, - { - "fieldName": "manyReferences", - "fieldValue": Identifier("00000000001eac"), - }, - { - "fieldName": "manyReferences", - "fieldValue": Identifier("00000000001ead"), - }, - {"fieldName": "manyStrings", "fieldValue": "red"}, - {"fieldName": "manyStrings", "fieldValue": "blue"}, - {"fieldName": "manyTexts", "fieldValue": "El burro patea."}, - { - "fieldName": "manyTexts", - "fieldValue": "Der Fuchs springt.", - "language": "de", - }, - { - "fieldName": "oneLink", - "fieldValue": "https://www.example.com", - "language": "en", - }, - {"fieldName": "oneString", "fieldValue": "grün"}, - { - "fieldName": "oneText", - "fieldValue": "The lion sleeps.", - "language": "en", - }, - { - "fieldName": "reference", - "fieldValue": Identifier("00000000001eab"), - }, - {"fieldName": "stableTargetId", "fieldValue": Identifier("00000000000fds")}, - { - "fieldName": "timestamp", - "fieldValue": Timestamp("2010-12-24T22:00"), - }, - ], - } - - -@pytest.mark.parametrize( - ("type_", "annotation", "expected"), - [ - (str, str, True), - (int, int, True), - (str, int, False), - (str, list[int], False), - (str, list[str], True), - (str, str | None, True), - ], -) -def test__is_type(type_: type, annotation: type | None, expected: bool) -> None: - assert _is_type(type_, annotation) is expected - - -def test_transform_mex_model_to_public_api_item( - raw_mex_model: dict[str, Any], raw_api_item: dict[str, Any] -) -> None: - # optional field will be omitted - dummy_model = DummyModel(optional=None, **raw_mex_model) - - dummy_item = transform_mex_model_to_public_api_item(dummy_model) - - assert dummy_item.model_dump(exclude_none=True) == raw_api_item - - -def test_transform_public_api_item_to_mex_model( - monkeypatch: MonkeyPatch, - raw_api_item: dict[str, Any], - raw_mex_model: dict[str, Any], -) -> None: - monkeypatch.setitem( - EXTRACTED_MODEL_CLASSES_BY_NAME, DummyModel.__name__, DummyModel - ) - dummy_item = PublicApiItem(**raw_api_item, businessId="00000000000fds") - - dummy_model = transform_public_api_item_to_mex_model(dummy_item) - - assert dummy_model - assert dummy_model.model_dump(exclude_none=True) == raw_mex_model - - -def test_transform_public_api_item_to_mex_model_unknown() -> None: - api_item = PublicApiItem( - entityType="UnknownModel", values=[], businessId="a00b02800211BD90" - ) - returned = transform_public_api_item_to_mex_model(api_item) - assert returned is None diff --git a/tests/sinks/test_ndjson.py b/tests/sinks/test_ndjson.py index df8ef95f..c60827ee 100644 --- a/tests/sinks/test_ndjson.py +++ b/tests/sinks/test_ndjson.py @@ -37,16 +37,16 @@ def test_write_ndjson() -> None: ids = list(write_ndjson(test_models)) assert len(ids) - with open(settings.work_dir / "Thing.ndjson", "r") as handle: + with open(settings.work_dir / "Thing.ndjson") as handle: output = handle.read() expected = """\ -{"enum_attr": null, "identifier": "%s", "str_attr": "foo", "ts_attr": null, "uuid_attr": null} -{"enum_attr": "value", "identifier": "%s", "str_attr": "bar", "ts_attr": null, "uuid_attr": null} -{"enum_attr": null, "identifier": "%s", "str_attr": "baz", "ts_attr": null, "uuid_attr": "00000000-0000-4000-8000-00000000002a"} -{"enum_attr": null, "identifier": "%s", "str_attr": "dat", "ts_attr": "2000-01-01", "uuid_attr": null} -""" % tuple( - m.identifier for m in test_models +{{"enum_attr": null, "identifier": "{}", "str_attr": "foo", "ts_attr": null, "uuid_attr": null}} +{{"enum_attr": "value", "identifier": "{}", "str_attr": "bar", "ts_attr": null, "uuid_attr": null}} +{{"enum_attr": null, "identifier": "{}", "str_attr": "baz", "ts_attr": null, "uuid_attr": "00000000-0000-4000-8000-00000000002a"}} +{{"enum_attr": null, "identifier": "{}", "str_attr": "dat", "ts_attr": "2000-01-01", "uuid_attr": null}} +""".format( + *[m.identifier for m in test_models] ) assert output == expected diff --git a/tests/sinks/test_public_api.py b/tests/sinks/test_public_api.py deleted file mode 100644 index ceac5787..00000000 --- a/tests/sinks/test_public_api.py +++ /dev/null @@ -1,70 +0,0 @@ -from typing import Any -from unittest.mock import MagicMock, Mock -from uuid import UUID, uuid4 - -import pytest -from pytest import MonkeyPatch -from requests import HTTPError - -from mex.common.models import ExtractedPerson, ExtractedPrimarySource -from mex.common.public_api.connector import PublicApiConnector -from mex.common.sinks.public_api import post_to_public_api, purge_models_from_public_api - - -def test_post_to_public_api_mocked( - extracted_person: ExtractedPerson, monkeypatch: MonkeyPatch -) -> None: - def __init__(self: PublicApiConnector) -> None: - self.session = MagicMock() - - monkeypatch.setattr(PublicApiConnector, "__init__", __init__) - - response = [UUID("00000000-0000-4000-8000-000000339191")] - post_models = Mock(return_value=response) - monkeypatch.setattr(PublicApiConnector, "post_models", post_models) - - model_ids = list(post_to_public_api([extracted_person])) - assert model_ids == response - post_models.assert_called_once_with([extracted_person]) - - -def test_purge_from_public_api_mocked( - extracted_person: ExtractedPerson, monkeypatch: MonkeyPatch -) -> None: - def __init__(self: PublicApiConnector) -> None: - self.session = MagicMock() - - monkeypatch.setattr(PublicApiConnector, "__init__", __init__) - - api_id = UUID("00000000-0000-4000-8000-000000339191") - delete_model = Mock(return_value=api_id) - monkeypatch.setattr(PublicApiConnector, "delete_model", delete_model) - - messages = list(purge_models_from_public_api([extracted_person])) - assert len(messages) == 1 - assert messages[0] == ( - f"purged item {api_id} for ExtractedPerson {extracted_person.identifier}" - ) - delete_model.assert_called_once_with(extracted_person) - - -@pytest.mark.skip(reason="public api is being deprecated") -@pytest.mark.integration -def test_public_api_post_and_purge_roundtrip( - extracted_primary_sources: dict[str, ExtractedPrimarySource] -) -> None: - extracted_person = ExtractedPerson( - identifierInPrimarySource=str(uuid4()), - hadPrimarySource=extracted_primary_sources["ldap"].stableTargetId, - fullName=["Roundtrip Test"], - ) - try: - results: list[Any] = list(post_to_public_api([extracted_person])) - assert len(results) == 1 - results = list(purge_models_from_public_api([extracted_person])) - assert len(results) == 1 - except HTTPError as error: - if error.response.json().get("message") == "could not create Solr query": - pytest.skip("integration test failed due to misconfiguration") - else: - raise error diff --git a/tests/test_cli.py b/tests/test_cli.py index 5d0422b8..57fa53d9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -220,7 +220,7 @@ def good_entrypoint() -> None: def test_faulty_entrypoint_exits_non_zero() -> None: @entrypoint(BaseSettings) def faulty_entrypoint() -> None: - 1 / 0 + _ = 1 / 0 result = CliRunner().invoke(faulty_entrypoint, args=[]) assert result.exit_code == 1, result.stdout diff --git a/tests/test_settings.py b/tests/test_settings.py index b1f5ec5a..b0ae0449 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -22,7 +22,7 @@ def test_settings_text() -> None: assert len(text.splitlines()) == len(BaseSettings.model_fields) assert re.search(r"debug\s+False", text) - assert re.search(r"api_token_payload\s+\*+", text) # masked secret + assert re.search(r"backend_api_key\s+\*+", text) # masked secret class FooSettings(BaseSettings): @@ -78,7 +78,7 @@ class DummySettings(BaseSettings): if platform.system() == "Windows": # pragma: no cover absolute = WorkPath(r"C:\absolute\path") - else: + else: # pragma: no cover absolute = WorkPath("/absolute/path") relative = Path("relative", "path") diff --git a/tests/types/test_path.py b/tests/types/test_path.py index d2b027ad..ae9c4002 100644 --- a/tests/types/test_path.py +++ b/tests/types/test_path.py @@ -34,7 +34,7 @@ def test_path_wrapper_equality() -> None: assert PathWrapper(Path("foo", "bar")) != PathWrapper(Path("bar", "batz")) with pytest.raises(TypeError): - PathWrapper("foo") == 42 + _ = PathWrapper("foo") == 42 def test_path_wrapper_relative_absolute() -> None: diff --git a/tests/wikidata/test_extract.py b/tests/wikidata/test_extract.py index 04931db4..b437032b 100644 --- a/tests/wikidata/test_extract.py +++ b/tests/wikidata/test_extract.py @@ -175,7 +175,7 @@ def mocked_query_response(): ) def mocked_item_details_response(): - with open(TESTDATA_DIR / "items_details.json", "r", encoding="utf-8") as f: + with open(TESTDATA_DIR / "items_details.json", encoding="utf-8") as f: data = json.load(f) return data[0] @@ -396,7 +396,7 @@ def test_get_organization_details_mocked(monkeypatch: MonkeyPatch) -> None: } def mocked_item_details_response(): - with open(TESTDATA_DIR / "items_details.json", "r", encoding="utf-8") as f: + with open(TESTDATA_DIR / "items_details.json", encoding="utf-8") as f: data = json.load(f) return data[0] diff --git a/tests/wikidata/test_transform.py b/tests/wikidata/test_transform.py index 7b34aa77..1c9a48d0 100644 --- a/tests/wikidata/test_transform.py +++ b/tests/wikidata/test_transform.py @@ -51,7 +51,7 @@ def test_transform_wikidata_organization_to_organization( "wikidataId": ["https://www.wikidata.org/entity/Q26678"], } - with open(TESTDATA_DIR / "items_details.json", "r", encoding="utf-8") as f: + with open(TESTDATA_DIR / "items_details.json", encoding="utf-8") as f: wikidata_organizations = [ WikidataOrganization.model_validate(item) for item in json.load(f) ]