Skip to content

Commit

Permalink
feature/update mex-common to 0.45.0 (#305)
Browse files Browse the repository at this point in the history
# Changes
- update mex-common to 0.45.0 and mex-model to 3.4.0

# Fixed
- fix coverage and linting issues
  • Loading branch information
cutoffthetop authored Dec 18, 2024
1 parent 0107b34 commit 97f8a91
Show file tree
Hide file tree
Showing 32 changed files with 63 additions and 58 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ default_language_version:
python: python3.11
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.3
rev: v0.8.3
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand All @@ -25,7 +25,7 @@ repos:
- id: fix-byte-order-marker
name: byte-order
- repo: https://github.com/pdm-project/pdm
rev: 2.20.1
rev: 2.22.0
hooks:
- id: pdm-lock-check
name: pdm
Expand Down
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

### Changes
- extractors now use wikidata helper function

- extractors now use wikidata helper function
- BREAKING: rename artificial provider function `extracted_data` to `extracted_items`
- prefer concrete unions over base classes for merged and extracted item typing
- update mex-common to 0.45.0 and mex-model to 3.4.0

### Deprecated

### Removed

### Fixed

- fix coverage and linting issues

### Security

## [0.22.0] - 2024-12-10
Expand Down
2 changes: 1 addition & 1 deletion mex/extractors/artificial/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,6 @@ def artificial_data(factories: Faker, identities: IdentityMap) -> None:


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the artificial data job in-process."""
run_job_in_process("artificial")
2 changes: 1 addition & 1 deletion mex/extractors/biospecimen/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,6 @@ def extracted_biospecimen_resources(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the biospecimen extractor job in-process."""
run_job_in_process("biospecimen")
2 changes: 1 addition & 1 deletion mex/extractors/blueant/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,6 @@ def extracted_blueant_activities(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the blueant extractor job in-process."""
run_job_in_process("blueant")
2 changes: 1 addition & 1 deletion mex/extractors/confluence_vvt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,6 @@ def extracted_confluence_vvt_activities(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the confluence-vvt extractor job in-process."""
run_job_in_process("confluence_vvt")
5 changes: 2 additions & 3 deletions mex/extractors/confluence_vvt/parse_html.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import re
from itertools import zip_longest
from typing import Any, cast
Expand Down Expand Up @@ -216,10 +217,8 @@ def get_interne_vorgangsnummer_from_title(
)

for item in unwanted_elements:
try:
with contextlib.suppress(ValueError):
interne_vorgangsnummers.remove(item)
except ValueError:
pass

return [x for x in interne_vorgangsnummers if x] # clean empty strings

Expand Down
15 changes: 10 additions & 5 deletions mex/extractors/datscha_web/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,14 @@ def extract_datscha_web_organizations(
partner_to_org_map = {}
for item in datscha_web_items:
for partner in item.get_partners():
if partner and partner != "None":
if organization := get_wikidata_extracted_organization_id_by_name(
partner
):
partner_to_org_map[partner] = organization
if (
partner
and partner != "None"
and (
organization := get_wikidata_extracted_organization_id_by_name(
partner
)
)
):
partner_to_org_map[partner] = organization
return partner_to_org_map
2 changes: 1 addition & 1 deletion mex/extractors/datscha_web/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,6 @@ def extract_datscha_web(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the datscha-web extractor job in-process."""
run_job_in_process("datscha_web")
2 changes: 1 addition & 1 deletion mex/extractors/ff_projects/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,6 @@ def extract_ff_projects(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the ff-projects extractor job in-process."""
run_job_in_process("ff_projects")
2 changes: 1 addition & 1 deletion mex/extractors/grippeweb/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def extract_columns_by_table_and_column_name() -> dict[str, dict[str, list[Any]]
connection = GrippewebConnector.get()
return {
table_name: connection.parse_columns_by_column_name(table_name)
for table_name in QUERY_BY_TABLE_NAME.keys()
for table_name in QUERY_BY_TABLE_NAME
}


Expand Down
2 changes: 1 addition & 1 deletion mex/extractors/grippeweb/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,6 @@ def grippeweb_extracted_variable(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the Grippeweb extractor job in-process."""
run_job_in_process("grippeweb")
4 changes: 2 additions & 2 deletions mex/extractors/grippeweb/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def get_or_create_external_partner(
"""
if external_partner_dict := resource.externalPartner:
external_partner_string = external_partner_dict[0].mappingRules[0].forValues[0]
if external_partner_string in resource.model_fields.keys():
if external_partner_string in resource.model_fields:
external_partner_identifier = [
grippeweb_organization_ids_by_query_string[external_partner_string]
]
Expand Down Expand Up @@ -375,7 +375,7 @@ def transform_grippeweb_variable_to_extracted_variables(
column_strings = {cell for cell in column if isinstance(cell, str)}
value_set = (
column_strings
if column_name in valueset_locations_by_field.keys()
if column_name in valueset_locations_by_field
else set()
)
extracted_variables.append(
Expand Down
2 changes: 1 addition & 1 deletion mex/extractors/ifsg/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,6 @@ def extracted_ifsg_variable(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the IFSG extractor job in-process."""
run_job_in_process("ifsg")
2 changes: 1 addition & 1 deletion mex/extractors/international_projects/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,6 @@ def extracted_international_projects_activities(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the international-projects extractor job in-process."""
run_job_in_process("international_projects")
2 changes: 1 addition & 1 deletion mex/extractors/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run all extractor jobs in-process."""
run_job_in_process("all_extractors")
2 changes: 1 addition & 1 deletion mex/extractors/odk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,6 @@ def extracted_variables_odk(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the odk extractor job in-process."""
run_job_in_process("odk")
2 changes: 1 addition & 1 deletion mex/extractors/organigram/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ def extract_organigram() -> None:


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the organigram extractor job in-process."""
run_job_in_process("organigram")
2 changes: 1 addition & 1 deletion mex/extractors/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@ def asset(**_: Any) -> Callable[[_AssetFn], _AssetFn]:

from mex.extractors.pipeline.base import load_job_definitions, run_job_in_process

__all__ = ("asset", "run_job_in_process", "load_job_definitions")
__all__ = ("asset", "load_job_definitions", "run_job_in_process")
2 changes: 1 addition & 1 deletion mex/extractors/publisher/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ def publish_merged_items(extract_and_filter_merged_items: MergedItemsResponse) -


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the publisher job in-process."""
run_job_in_process("publisher")
2 changes: 1 addition & 1 deletion mex/extractors/rdmo/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,6 @@ def rdmo_activities(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the RDMO extractor job in-process."""
run_job_in_process("rdmo")
2 changes: 1 addition & 1 deletion mex/extractors/rdmo/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,6 @@ def transform_rdmo_sources_to_extracted_activities(
start=start,
title=title,
theme="https://mex.rki.de/item/theme-1",
# TODO: resolve contributor, units and funding organization
# TODO(ND): resolve contributor, units and funding organization
website=None,
)
2 changes: 1 addition & 1 deletion mex/extractors/seq_repo/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,6 @@ def seq_repo_resource(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the seq-repo extractor job in-process."""
run_job_in_process("seq_repo")
2 changes: 1 addition & 1 deletion mex/extractors/sumo/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,6 @@ def transformed_sumo_feat_projection_variables(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the sumo extractor job in-process."""
run_job_in_process("sumo")
2 changes: 1 addition & 1 deletion mex/extractors/sumo/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ def transform_sumo_activity_to_extracted_activity(
hadPrimarySource=extracted_primary_source.stableTargetId,
identifierInPrimarySource=identifier_in_primary_source,
involvedUnit=involved_unit,
publication=[], # TODO: add bibliographic resource item
publication=[], # TODO(KA): add bibliographic resource item
responsibleUnit=responsible_unit,
shortName=short_name,
start=start,
Expand Down
2 changes: 1 addition & 1 deletion mex/extractors/synopse/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,6 @@ def extracted_synopse_variables(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the synopse extractor job in-process."""
run_job_in_process("synopse")
2 changes: 1 addition & 1 deletion mex/extractors/synopse/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ def transform_synopse_project_to_activity(
url=PureWindowsPath(path_line).as_uri(), title="\n".join(title_lines)
)
except ValueError:
pass # TODO: handle relative paths
pass # TODO(HS): handle relative paths
involved_units = [
merged_id
for unit in (synopse_project.interne_partner or "").split(",")
Expand Down
2 changes: 1 addition & 1 deletion mex/extractors/voxco/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,6 @@ def extracted_variables_voxco(


@entrypoint(Settings)
def run() -> None:
def run() -> None: # pragma: no cover
"""Run the voxco extractor job in-process."""
run_job_in_process("voxco")
12 changes: 6 additions & 6 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 8 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies = [
"dagster-webserver>=1,<2",
"dagster>=1,<2",
"faker>=33,<34",
"mex-common @ git+https://github.com/robert-koch-institut/mex-common.git@0.43.0",
"mex-common @ git+https://github.com/robert-koch-institut/mex-common.git@0.45.0",
"numpy>=2,<3",
"openpyxl>=3,<4",
"pandas>=2,<3",
Expand Down Expand Up @@ -175,27 +175,25 @@ select = ["ALL"]
]
"mex/extractors/biospecimen/**" = ["PD003", "PLR0913", "PTH207"]
"mex/extractors/blueant/**" = ["PLR0913", "PLR2004"]
"mex/extractors/confluence_vvt/**" = ["EM101", "PLR2004", "SIM105"]
"mex/extractors/datscha_web/**" = ["EM102", "PLR2004", "SIM102"]
"mex/extractors/confluence_vvt/**" = ["EM101", "PLR2004"]
"mex/extractors/datscha_web/**" = ["EM102", "PLR2004"]
"mex/extractors/ff_projects/**" = ["EM101", "PD901", "PLR0911", "PLR0913"]
"mex/extractors/grippeweb/**" = ["PLR0913", "SIM118"]
"mex/extractors/grippeweb/**" = ["PLR0913"]
"mex/extractors/ifsg/**" = ["C403", "C416", "PLR0913", "PLR1714", "PLR2004"]
"mex/extractors/international_projects/**" = ["C414", "PD901", "PLR0913"]
"mex/extractors/rdmo/**" = ["EM102", "PLW0127", "TD002"]
"mex/extractors/rdmo/**" = ["EM102", "PLW0127"]
"mex/extractors/seq_repo/**" = ["DTZ007", "EM102", "PLR0913", "SIM102"]
"mex/extractors/sumo/**" = ["FLY002", "PD002", "PLR0913", "TD002"]
"mex/extractors/synopse/**" = ["C416", "EM101", "PLR0913", "TD002"]
"mex/extractors/sumo/**" = ["FLY002", "PD002", "PLR0913"]
"mex/extractors/synopse/**" = ["C416", "EM101", "PLR0913"]
"mex/extractors/voxco/**" = ["PLR0912", "PLR0913"]
"tests/biospecimen/**" = ["PLR0913"]
"tests/drop/**" = ["ARG001"]
"tests/ff_projects/**" = ["DTZ001"]
"tests/grippeweb/**" = ["PLR0913"]
"tests/ifsg/**" = ["DTZ001", "PLR0913"]
"tests/odk/**" = ["ARG001"]
"tests/rdmo/**" = ["ARG001", "PT012"]
"tests/seq_repo/**" = ["PLR0913"]
"tests/sumo/**" = ["FLY002", "PLR0913"]
"tests/synopse/**" = ["C408", "C419", "PLR0913", "TD002"]
"tests/synopse/**" = ["C408", "C419", "PLR0913"]
"tests/voxco/**" = ["PLR0913"]
"tests/wikidata/**" = ["INP001", "INP001"]

Expand Down
5 changes: 4 additions & 1 deletion tests/drop/mocked_drop.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock

import pytest
Expand Down Expand Up @@ -31,7 +32,9 @@ def mocked_drop(monkeypatch: MonkeyPatch) -> None:
],
)

def get_file_mocked(self, x_system: str, file_id: str):
def get_file_mocked(
_self: DropApiConnector, x_system: str, file_id: str
) -> dict[str, Any]:
with open(
(
Path(__file__).parents[2]
Expand Down
7 changes: 2 additions & 5 deletions tests/odk/test_main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import pytest

from mex.common.models import ExtractedPrimarySource
from mex.extractors.pipeline import run_job_in_process


@pytest.mark.usefixtures("mocked_ldap", "mocked_wikidata")
def test_job(
extracted_primary_sources: dict[str, ExtractedPrimarySource],
) -> None:
@pytest.mark.usefixtures("mocked_ldap", "mocked_wikidata", "extracted_primary_sources")
def test_job() -> None:
result = run_job_in_process("odk")
assert result.success

0 comments on commit 97f8a91

Please sign in to comment.