diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ee8e3f2a..26f77074 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_language_version: python: python3.11 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.3 + rev: v0.8.3 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] @@ -25,7 +25,7 @@ repos: - id: fix-byte-order-marker name: byte-order - repo: https://github.com/pdm-project/pdm - rev: 2.20.1 + rev: 2.22.0 hooks: - id: pdm-lock-check name: pdm diff --git a/CHANGELOG.md b/CHANGELOG.md index 090593df..3e8070a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,10 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Changes -- extractors now use wikidata helper function +- extractors now use wikidata helper function - BREAKING: rename artificial provider function `extracted_data` to `extracted_items` - prefer concrete unions over base classes for merged and extracted item typing +- update mex-common to 0.45.0 and mex-model to 3.4.0 ### Deprecated @@ -21,6 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- fix coverage and linting issues + ### Security ## [0.22.0] - 2024-12-10 diff --git a/mex/extractors/artificial/main.py b/mex/extractors/artificial/main.py index 7307f867..7703b782 100644 --- a/mex/extractors/artificial/main.py +++ b/mex/extractors/artificial/main.py @@ -56,6 +56,6 @@ def artificial_data(factories: Faker, identities: IdentityMap) -> None: @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the artificial data job in-process.""" run_job_in_process("artificial") diff --git a/mex/extractors/biospecimen/main.py b/mex/extractors/biospecimen/main.py index dfa159d9..d1f21c62 100644 --- a/mex/extractors/biospecimen/main.py +++ b/mex/extractors/biospecimen/main.py @@ -95,6 +95,6 @@ def extracted_biospecimen_resources( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the biospecimen extractor job in-process.""" run_job_in_process("biospecimen") diff --git a/mex/extractors/blueant/main.py b/mex/extractors/blueant/main.py index e525bbd3..7173a2e5 100644 --- a/mex/extractors/blueant/main.py +++ b/mex/extractors/blueant/main.py @@ -111,6 +111,6 @@ def extracted_blueant_activities( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the blueant extractor job in-process.""" run_job_in_process("blueant") diff --git a/mex/extractors/confluence_vvt/main.py b/mex/extractors/confluence_vvt/main.py index 0d4f5010..75574ce7 100644 --- a/mex/extractors/confluence_vvt/main.py +++ b/mex/extractors/confluence_vvt/main.py @@ -101,6 +101,6 @@ def extracted_confluence_vvt_activities( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the confluence-vvt extractor job in-process.""" run_job_in_process("confluence_vvt") diff --git a/mex/extractors/confluence_vvt/parse_html.py b/mex/extractors/confluence_vvt/parse_html.py index 52b0aa0e..0f57c6ce 100644 --- a/mex/extractors/confluence_vvt/parse_html.py +++ b/mex/extractors/confluence_vvt/parse_html.py @@ -1,3 +1,4 @@ +import contextlib import re from itertools import zip_longest from typing import Any, cast @@ -216,10 +217,8 @@ def get_interne_vorgangsnummer_from_title( ) for item in unwanted_elements: - try: + with contextlib.suppress(ValueError): interne_vorgangsnummers.remove(item) - except ValueError: - pass return [x for x in interne_vorgangsnummers if x] # clean empty strings diff --git a/mex/extractors/datscha_web/extract.py b/mex/extractors/datscha_web/extract.py index b7806228..63456419 100644 --- a/mex/extractors/datscha_web/extract.py +++ b/mex/extractors/datscha_web/extract.py @@ -69,9 +69,14 @@ def extract_datscha_web_organizations( partner_to_org_map = {} for item in datscha_web_items: for partner in item.get_partners(): - if partner and partner != "None": - if organization := get_wikidata_extracted_organization_id_by_name( - partner - ): - partner_to_org_map[partner] = organization + if ( + partner + and partner != "None" + and ( + organization := get_wikidata_extracted_organization_id_by_name( + partner + ) + ) + ): + partner_to_org_map[partner] = organization return partner_to_org_map diff --git a/mex/extractors/datscha_web/main.py b/mex/extractors/datscha_web/main.py index a8ba2dae..43068bbd 100644 --- a/mex/extractors/datscha_web/main.py +++ b/mex/extractors/datscha_web/main.py @@ -109,6 +109,6 @@ def extract_datscha_web( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the datscha-web extractor job in-process.""" run_job_in_process("datscha_web") diff --git a/mex/extractors/ff_projects/main.py b/mex/extractors/ff_projects/main.py index 863a9f9a..88440002 100644 --- a/mex/extractors/ff_projects/main.py +++ b/mex/extractors/ff_projects/main.py @@ -121,6 +121,6 @@ def extract_ff_projects( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the ff-projects extractor job in-process.""" run_job_in_process("ff_projects") diff --git a/mex/extractors/grippeweb/extract.py b/mex/extractors/grippeweb/extract.py index f1ba6dc0..ff4d3119 100644 --- a/mex/extractors/grippeweb/extract.py +++ b/mex/extractors/grippeweb/extract.py @@ -20,7 +20,7 @@ def extract_columns_by_table_and_column_name() -> dict[str, dict[str, list[Any]] connection = GrippewebConnector.get() return { table_name: connection.parse_columns_by_column_name(table_name) - for table_name in QUERY_BY_TABLE_NAME.keys() + for table_name in QUERY_BY_TABLE_NAME } diff --git a/mex/extractors/grippeweb/main.py b/mex/extractors/grippeweb/main.py index fe7ab2ad..7d08f2d8 100644 --- a/mex/extractors/grippeweb/main.py +++ b/mex/extractors/grippeweb/main.py @@ -242,6 +242,6 @@ def grippeweb_extracted_variable( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the Grippeweb extractor job in-process.""" run_job_in_process("grippeweb") diff --git a/mex/extractors/grippeweb/transform.py b/mex/extractors/grippeweb/transform.py index b18c5343..b61d00c1 100644 --- a/mex/extractors/grippeweb/transform.py +++ b/mex/extractors/grippeweb/transform.py @@ -227,7 +227,7 @@ def get_or_create_external_partner( """ if external_partner_dict := resource.externalPartner: external_partner_string = external_partner_dict[0].mappingRules[0].forValues[0] - if external_partner_string in resource.model_fields.keys(): + if external_partner_string in resource.model_fields: external_partner_identifier = [ grippeweb_organization_ids_by_query_string[external_partner_string] ] @@ -375,7 +375,7 @@ def transform_grippeweb_variable_to_extracted_variables( column_strings = {cell for cell in column if isinstance(cell, str)} value_set = ( column_strings - if column_name in valueset_locations_by_field.keys() + if column_name in valueset_locations_by_field else set() ) extracted_variables.append( diff --git a/mex/extractors/ifsg/main.py b/mex/extractors/ifsg/main.py index 8c259f2c..50a19883 100644 --- a/mex/extractors/ifsg/main.py +++ b/mex/extractors/ifsg/main.py @@ -279,6 +279,6 @@ def extracted_ifsg_variable( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the IFSG extractor job in-process.""" run_job_in_process("ifsg") diff --git a/mex/extractors/international_projects/main.py b/mex/extractors/international_projects/main.py index 5cfbfcb2..5e815689 100644 --- a/mex/extractors/international_projects/main.py +++ b/mex/extractors/international_projects/main.py @@ -127,6 +127,6 @@ def extracted_international_projects_activities( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the international-projects extractor job in-process.""" run_job_in_process("international_projects") diff --git a/mex/extractors/main.py b/mex/extractors/main.py index 1ce755e7..4b185aa7 100644 --- a/mex/extractors/main.py +++ b/mex/extractors/main.py @@ -4,6 +4,6 @@ @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run all extractor jobs in-process.""" run_job_in_process("all_extractors") diff --git a/mex/extractors/odk/main.py b/mex/extractors/odk/main.py index 20d25582..68e5d505 100644 --- a/mex/extractors/odk/main.py +++ b/mex/extractors/odk/main.py @@ -139,6 +139,6 @@ def extracted_variables_odk( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the odk extractor job in-process.""" run_job_in_process("odk") diff --git a/mex/extractors/organigram/main.py b/mex/extractors/organigram/main.py index 31d792b0..3f158040 100644 --- a/mex/extractors/organigram/main.py +++ b/mex/extractors/organigram/main.py @@ -45,6 +45,6 @@ def extract_organigram() -> None: @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the organigram extractor job in-process.""" run_job_in_process("organigram") diff --git a/mex/extractors/pipeline/__init__.py b/mex/extractors/pipeline/__init__.py index dbf2fe21..878104db 100644 --- a/mex/extractors/pipeline/__init__.py +++ b/mex/extractors/pipeline/__init__.py @@ -77,4 +77,4 @@ def asset(**_: Any) -> Callable[[_AssetFn], _AssetFn]: from mex.extractors.pipeline.base import load_job_definitions, run_job_in_process -__all__ = ("asset", "run_job_in_process", "load_job_definitions") +__all__ = ("asset", "load_job_definitions", "run_job_in_process") diff --git a/mex/extractors/publisher/main.py b/mex/extractors/publisher/main.py index d2993eed..409d3896 100644 --- a/mex/extractors/publisher/main.py +++ b/mex/extractors/publisher/main.py @@ -22,6 +22,6 @@ def publish_merged_items(extract_and_filter_merged_items: MergedItemsResponse) - @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the publisher job in-process.""" run_job_in_process("publisher") diff --git a/mex/extractors/rdmo/main.py b/mex/extractors/rdmo/main.py index 8c776340..9a18b2c0 100644 --- a/mex/extractors/rdmo/main.py +++ b/mex/extractors/rdmo/main.py @@ -74,6 +74,6 @@ def rdmo_activities( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the RDMO extractor job in-process.""" run_job_in_process("rdmo") diff --git a/mex/extractors/rdmo/transform.py b/mex/extractors/rdmo/transform.py index 207da92d..e2519a3e 100644 --- a/mex/extractors/rdmo/transform.py +++ b/mex/extractors/rdmo/transform.py @@ -100,6 +100,6 @@ def transform_rdmo_sources_to_extracted_activities( start=start, title=title, theme="https://mex.rki.de/item/theme-1", - # TODO: resolve contributor, units and funding organization + # TODO(ND): resolve contributor, units and funding organization website=None, ) diff --git a/mex/extractors/seq_repo/main.py b/mex/extractors/seq_repo/main.py index c8c104ea..fb134b4b 100644 --- a/mex/extractors/seq_repo/main.py +++ b/mex/extractors/seq_repo/main.py @@ -183,6 +183,6 @@ def seq_repo_resource( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the seq-repo extractor job in-process.""" run_job_in_process("seq_repo") diff --git a/mex/extractors/sumo/main.py b/mex/extractors/sumo/main.py index fe5f5773..0652fa82 100644 --- a/mex/extractors/sumo/main.py +++ b/mex/extractors/sumo/main.py @@ -369,6 +369,6 @@ def transformed_sumo_feat_projection_variables( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the sumo extractor job in-process.""" run_job_in_process("sumo") diff --git a/mex/extractors/sumo/transform.py b/mex/extractors/sumo/transform.py index c93159cb..7a92a6b1 100644 --- a/mex/extractors/sumo/transform.py +++ b/mex/extractors/sumo/transform.py @@ -594,7 +594,7 @@ def transform_sumo_activity_to_extracted_activity( hadPrimarySource=extracted_primary_source.stableTargetId, identifierInPrimarySource=identifier_in_primary_source, involvedUnit=involved_unit, - publication=[], # TODO: add bibliographic resource item + publication=[], # TODO(KA): add bibliographic resource item responsibleUnit=responsible_unit, shortName=short_name, start=start, diff --git a/mex/extractors/synopse/main.py b/mex/extractors/synopse/main.py index 3ae6e6f6..8f7de7f8 100644 --- a/mex/extractors/synopse/main.py +++ b/mex/extractors/synopse/main.py @@ -361,6 +361,6 @@ def extracted_synopse_variables( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the synopse extractor job in-process.""" run_job_in_process("synopse") diff --git a/mex/extractors/synopse/transform.py b/mex/extractors/synopse/transform.py index d53a592b..7015c0a3 100644 --- a/mex/extractors/synopse/transform.py +++ b/mex/extractors/synopse/transform.py @@ -669,7 +669,7 @@ def transform_synopse_project_to_activity( url=PureWindowsPath(path_line).as_uri(), title="\n".join(title_lines) ) except ValueError: - pass # TODO: handle relative paths + pass # TODO(HS): handle relative paths involved_units = [ merged_id for unit in (synopse_project.interne_partner or "").split(",") diff --git a/mex/extractors/voxco/main.py b/mex/extractors/voxco/main.py index 21a40db8..55a1520a 100644 --- a/mex/extractors/voxco/main.py +++ b/mex/extractors/voxco/main.py @@ -136,6 +136,6 @@ def extracted_variables_voxco( @entrypoint(Settings) -def run() -> None: +def run() -> None: # pragma: no cover """Run the voxco extractor job in-process.""" run_job_in_process("voxco") diff --git a/pdm.lock b/pdm.lock index 46c7db0d..7c58c245 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:12b999863f76aae1e4e39cde739cc7b7f6648f4170c61fe32cdc0da44354f438" +content_hash = "sha256:0765136f9110dff25280313486160aa36cf831398a49443f2dbc2bd08e653e81" [[metadata.targets]] requires_python = "==3.11.*" @@ -991,11 +991,11 @@ files = [ [[package]] name = "mex-common" -version = "0.43.0" +version = "0.45.0" requires_python = ">=3.11,<3.13" git = "https://github.com/robert-koch-institut/mex-common.git" -ref = "0.43.0" -revision = "ddded85a745d4c0f157c5043e5fb7584c2c006e7" +ref = "0.45.0" +revision = "f5ff330763011e1380d1321bf20bd7b346e1b484" summary = "Common library for MEx python projects." groups = ["default"] marker = "python_version == \"3.11\"" @@ -1004,7 +1004,7 @@ dependencies = [ "click<9,>=8", "langdetect<2,>=1", "ldap3<3,>=2", - "mex-model @ git+https://github.com/robert-koch-institut/mex-model.git@3.3.2", + "mex-model @ git+https://github.com/robert-koch-institut/mex-model.git@3.4.0", "numpy<3,>=2", "pandas<3,>=2", "pyarrow<19,>=18", @@ -1019,7 +1019,7 @@ name = "mex-model" version = "3.3.2" requires_python = ">=3.11,<3.13" git = "https://github.com/robert-koch-institut/mex-model.git" -ref = "3.3.2" +ref = "3.4.0" revision = "5e7c3dd6ee904e6727402cc1bf4663ea014cccbc" summary = "JSON schema files defining the MEx metadata model." groups = ["default"] diff --git a/pyproject.toml b/pyproject.toml index 4691eca5..8b73d96f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "dagster-webserver>=1,<2", "dagster>=1,<2", "faker>=33,<34", - "mex-common @ git+https://github.com/robert-koch-institut/mex-common.git@0.43.0", + "mex-common @ git+https://github.com/robert-koch-institut/mex-common.git@0.45.0", "numpy>=2,<3", "openpyxl>=3,<4", "pandas>=2,<3", @@ -175,27 +175,25 @@ select = ["ALL"] ] "mex/extractors/biospecimen/**" = ["PD003", "PLR0913", "PTH207"] "mex/extractors/blueant/**" = ["PLR0913", "PLR2004"] -"mex/extractors/confluence_vvt/**" = ["EM101", "PLR2004", "SIM105"] -"mex/extractors/datscha_web/**" = ["EM102", "PLR2004", "SIM102"] +"mex/extractors/confluence_vvt/**" = ["EM101", "PLR2004"] +"mex/extractors/datscha_web/**" = ["EM102", "PLR2004"] "mex/extractors/ff_projects/**" = ["EM101", "PD901", "PLR0911", "PLR0913"] -"mex/extractors/grippeweb/**" = ["PLR0913", "SIM118"] +"mex/extractors/grippeweb/**" = ["PLR0913"] "mex/extractors/ifsg/**" = ["C403", "C416", "PLR0913", "PLR1714", "PLR2004"] "mex/extractors/international_projects/**" = ["C414", "PD901", "PLR0913"] -"mex/extractors/rdmo/**" = ["EM102", "PLW0127", "TD002"] +"mex/extractors/rdmo/**" = ["EM102", "PLW0127"] "mex/extractors/seq_repo/**" = ["DTZ007", "EM102", "PLR0913", "SIM102"] -"mex/extractors/sumo/**" = ["FLY002", "PD002", "PLR0913", "TD002"] -"mex/extractors/synopse/**" = ["C416", "EM101", "PLR0913", "TD002"] +"mex/extractors/sumo/**" = ["FLY002", "PD002", "PLR0913"] +"mex/extractors/synopse/**" = ["C416", "EM101", "PLR0913"] "mex/extractors/voxco/**" = ["PLR0912", "PLR0913"] "tests/biospecimen/**" = ["PLR0913"] -"tests/drop/**" = ["ARG001"] "tests/ff_projects/**" = ["DTZ001"] "tests/grippeweb/**" = ["PLR0913"] "tests/ifsg/**" = ["DTZ001", "PLR0913"] -"tests/odk/**" = ["ARG001"] "tests/rdmo/**" = ["ARG001", "PT012"] "tests/seq_repo/**" = ["PLR0913"] "tests/sumo/**" = ["FLY002", "PLR0913"] -"tests/synopse/**" = ["C408", "C419", "PLR0913", "TD002"] +"tests/synopse/**" = ["C408", "C419", "PLR0913"] "tests/voxco/**" = ["PLR0913"] "tests/wikidata/**" = ["INP001", "INP001"] diff --git a/tests/drop/mocked_drop.py b/tests/drop/mocked_drop.py index 052f6e01..447e492d 100644 --- a/tests/drop/mocked_drop.py +++ b/tests/drop/mocked_drop.py @@ -1,5 +1,6 @@ import json from pathlib import Path +from typing import Any from unittest.mock import MagicMock import pytest @@ -31,7 +32,9 @@ def mocked_drop(monkeypatch: MonkeyPatch) -> None: ], ) - def get_file_mocked(self, x_system: str, file_id: str): + def get_file_mocked( + _self: DropApiConnector, x_system: str, file_id: str + ) -> dict[str, Any]: with open( ( Path(__file__).parents[2] diff --git a/tests/odk/test_main.py b/tests/odk/test_main.py index 4c5d3170..0c92205a 100644 --- a/tests/odk/test_main.py +++ b/tests/odk/test_main.py @@ -1,12 +1,9 @@ import pytest -from mex.common.models import ExtractedPrimarySource from mex.extractors.pipeline import run_job_in_process -@pytest.mark.usefixtures("mocked_ldap", "mocked_wikidata") -def test_job( - extracted_primary_sources: dict[str, ExtractedPrimarySource], -) -> None: +@pytest.mark.usefixtures("mocked_ldap", "mocked_wikidata", "extracted_primary_sources") +def test_job() -> None: result = run_job_in_process("odk") assert result.success