Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/bump mex-common to 0.43.0 #300

Merged
merged 6 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changes

- BREAKING: rename artificial provider function `extracted_data` to `extracted_items`
- prefer concrete unions over base classes for merged and extracted item typing

### Deprecated

### Removed
Expand All @@ -21,6 +24,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [0.22.0] - 2024-12-10

### Changes

- wrap up ifsg model v3 update
- wrap up seq-repo model v3 update

## [0.21.0] - 2024-11-19

### Added
Expand Down
2 changes: 1 addition & 1 deletion mex/extractors/artificial/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def factories(faker: Faker, identities: IdentityMap) -> Faker:
def artificial_data(factories: Faker, identities: IdentityMap) -> None:
"""Create artificial data and load the models to the sinks."""
restore_identities(identities) # restore state of memory identity provider
load(m for c in EXTRACTED_MODEL_CLASSES for m in factories.extracted_data(c))
load(m for c in EXTRACTED_MODEL_CLASSES for m in factories.extracted_items(c))


@entrypoint(Settings)
Expand Down
10 changes: 6 additions & 4 deletions mex/extractors/artificial/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pydantic.fields import FieldInfo

from mex.common.identity import Identity
from mex.common.models import ExtractedData
from mex.common.models import AnyExtractedModel
from mex.common.types import (
TEMPORAL_ENTITY_FORMATS_BY_PRECISION,
UTC,
Expand Down Expand Up @@ -102,8 +102,10 @@ def field_value(
raise RuntimeError(msg)
return [factory() for _ in range(self.pyint(*self.min_max_for_field(field)))]

def extracted_data(self, model: type[ExtractedData]) -> list[ExtractedData]:
"""Get a list of extracted data instances for the given model class."""
def extracted_items(
self, model: type[AnyExtractedModel]
) -> list[AnyExtractedModel]:
"""Get a list of extracted items for the given model class."""
models = []
for identity in cast(list[Identity], self.generator.identities(model)):
# manually set identity related fields
Expand All @@ -130,7 +132,7 @@ def __init__(self, factory: Any, identities: IdentityMap) -> None:
super().__init__(factory)
self._identities = identities

def identities(self, model: type[ExtractedData]) -> list[Identity]:
def identities(self, model: type[AnyExtractedModel]) -> list[Identity]:
"""Return a list of identities for the given model class."""
return self._identities[model.__name__.removeprefix("Extracted")]

Expand Down
2 changes: 1 addition & 1 deletion mex/extractors/confluence_vvt/parse_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def get_clean_current_row_all_cols_data(
def get_interne_vorgangsnummer_from_all_rows_data(
intnmr_dict: Any | None | list[str],
) -> list[str] | Any:
"""Get Interne Vorgangsnummer from the table extracted data.
"""Get Interne Vorgangsnummer from the extracted table.

Args:
intnmr_dict: Extracted dict or list of Interne Vorgangsnummer
Expand Down
4 changes: 2 additions & 2 deletions mex/extractors/publisher/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mex.common.backend_api.connector import BackendApiConnector
from mex.common.logging import logger
from mex.common.models import MergedItem
from mex.common.models import AnyMergedModel


def get_merged_items() -> Generator[MergedItem, None, None]:
def get_merged_items() -> Generator[AnyMergedModel, None, None]:
"""Read merged items from backend."""
connector = BackendApiConnector.get()

Expand Down
6 changes: 3 additions & 3 deletions mex/extractors/publisher/filter.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from collections.abc import Generator, Iterable

from mex.common.logging import logger
from mex.common.models import MergedItem
from mex.common.models import AnyMergedModel
from mex.extractors.settings import Settings


def filter_merged_items(
items: Iterable[MergedItem],
) -> Generator[MergedItem, None, None]:
items: Iterable[AnyMergedModel],
) -> Generator[AnyMergedModel, None, None]:
"""Filter to be published items by allow list."""
settings = Settings.get()

Expand Down
4 changes: 2 additions & 2 deletions mex/extractors/publisher/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
from collections.abc import Iterable

from mex.common.logging import logger
from mex.common.models import MergedItem
from mex.common.models import AnyMergedModel
from mex.common.settings import BaseSettings
from mex.common.transform import MExEncoder


def write_merged_items(items: Iterable[MergedItem]) -> None:
def write_merged_items(items: Iterable[AnyMergedModel]) -> None:
"""Write the incoming items into a new-line delimited JSON file."""
settings = BaseSettings.get()
ndjson_path = settings.work_dir / "publisher.ndjson"
Expand Down
14 changes: 7 additions & 7 deletions mex/extractors/publisher/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from mex.common.backend_api.models import MergedItemsResponse
from mex.common.cli import entrypoint
from mex.common.models import MergedItem
from mex.extractors.pipeline import asset, run_job_in_process
from mex.extractors.publisher.extract import get_merged_items
from mex.extractors.publisher.filter import filter_merged_items
Expand All @@ -8,17 +8,17 @@


@asset(group_name="publisher")
def extract_and_filter_merged_items() -> list[MergedItem]:
def extract_and_filter_merged_items() -> MergedItemsResponse:
"""Get merged items from mex-backend and filter them by allow-list."""
items = get_merged_items()

return list(filter_merged_items(items))
filtered = list(filter_merged_items(items))
return MergedItemsResponse(items=filtered, total=len(filtered))


@asset(group_name="publisher")
def publish_merged_items(extract_and_filter_merged_items: list[MergedItem]) -> None:
"""Write recieved merged items to ndjson file."""
write_merged_items(extract_and_filter_merged_items)
def publish_merged_items(extract_and_filter_merged_items: MergedItemsResponse) -> None:
"""Write received merged items to ndjson file."""
write_merged_items(extract_and_filter_merged_items.items)


@entrypoint(Settings)
Expand Down
6 changes: 3 additions & 3 deletions mex/extractors/sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
from itertools import tee

from mex.common.exceptions import MExError
from mex.common.models import ExtractedData
from mex.common.models import AnyExtractedModel
from mex.common.settings import BaseSettings
from mex.common.sinks.backend_api import post_to_backend_api
from mex.common.sinks.ndjson import write_ndjson
from mex.common.types import Identifier, Sink


def load(models: Iterable[ExtractedData]) -> None:
def load(models: Iterable[AnyExtractedModel]) -> None:
"""Load models to the backend API or write to NDJSON files.

Args:
Expand All @@ -19,7 +19,7 @@ def load(models: Iterable[ExtractedData]) -> None:
sink: Where to load the provided models
"""
settings = BaseSettings.get()
func: Callable[[Iterable[ExtractedData]], Iterable[Identifier]]
func: Callable[[Iterable[AnyExtractedModel]], Iterable[Identifier]]

for sink, model_gen in zip(
settings.sink, tee(models, len(settings.sink)), strict=False
Expand Down
8 changes: 4 additions & 4 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies = [
"dagster-webserver>=1,<2",
"dagster>=1,<2",
"faker>=33,<34",
"mex-common @ git+https://github.com/robert-koch-institut/mex-common.git@0.42.0",
"mex-common @ git+https://github.com/robert-koch-institut/mex-common.git@0.43.0",
"numpy>=2,<3",
"openpyxl>=3,<4",
"pandas>=2,<3",
Expand Down
4 changes: 2 additions & 2 deletions tests/artificial/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ def test_builder_provider_field_value_error(faker: Faker) -> None:
faker.field_value(field, identity)


def test_builder_provider_extracted_data(faker: Faker) -> None:
models = faker.extracted_data(ExtractedContactPoint)
def test_builder_provider_extracted_items(faker: Faker) -> None:
models = faker.extracted_items(ExtractedContactPoint)
assert models[0].model_dump(exclude_defaults=True) == {
"email": [
"[email protected]",
Expand Down
36 changes: 18 additions & 18 deletions tests/sumo/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@ def test_extract_cc1_data_model_nokeda() -> None:
element_label="Name des EDIS",
element_label_en="Name of EDIS",
)
extracted_data = list(extract_cc1_data_model_nokeda())
assert len(extracted_data) == 3
assert extracted_data[0] == expected
extracted = list(extract_cc1_data_model_nokeda())
assert len(extracted) == 3
assert extracted[0] == expected


def test_extract_cc1_data_valuesets() -> None:
expected = Cc1DataValuesets(
category_label_de="Herzstillstand (nicht traumatisch)",
sheet_name="nokeda_cedis",
)
extracted_data = list(extract_cc1_data_valuesets())
assert len(extracted_data) == 6
assert extracted_data[0] == expected
extracted = list(extract_cc1_data_valuesets())
assert len(extracted) == 6
assert extracted[0] == expected


def test_extract_cc2_aux_mapping(
Expand All @@ -54,9 +54,9 @@ def test_extract_cc2_aux_mapping(
expected = Cc2AuxMapping(
variable_name_column=["0", "1", "2"], sheet_name="nokeda_age21"
)
extracted_data = list(extract_cc2_aux_mapping(cc2_aux_model))
assert len(extracted_data) == 2
assert extracted_data[0] == expected
extracted = list(extract_cc2_aux_mapping(cc2_aux_model))
assert len(extracted) == 2
assert extracted[0] == expected


def test_extract_cc2_aux_model() -> None:
Expand All @@ -67,16 +67,16 @@ def test_extract_cc2_aux_model() -> None:
in_database_static=True,
variable_name="aux_age21_min",
)
extracted_data = list(extract_cc2_aux_model())
assert len(extracted_data) == 2
assert extracted_data[0] == expected
extracted = list(extract_cc2_aux_model())
assert len(extracted) == 2
assert extracted[0] == expected


def test_extract_cc2_aux_valuesets() -> None:
expected = Cc2AuxValuesets(label_de="Kardiovaskulär", label_en="Cardiovascular")
extracted_data = list(extract_cc2_aux_valuesets())
assert len(extracted_data) == 3
assert extracted_data[0] == expected
extracted = list(extract_cc2_aux_valuesets())
assert len(extracted) == 3
assert extracted[0] == expected


def test_extract_cc2_feat_projection() -> None:
Expand All @@ -88,9 +88,9 @@ def test_extract_cc2_feat_projection() -> None:
feature_name_de="Respiratorisches Syncytial-Virus, spezifisch",
feature_description="specific RSV-ICD-10 codes",
)
extracted_data = list(extract_cc2_feat_projection())
assert len(extracted_data) == 3
assert extracted_data[0] == expected
extracted = list(extract_cc2_feat_projection())
assert len(extracted) == 3
assert extracted[0] == expected


@pytest.mark.usefixtures("mocked_ldap")
Expand Down
12 changes: 6 additions & 6 deletions tests/sumo/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
def test_filter_and_log_variables(
extracted_primary_sources: dict[str, ExtractedPrimarySource],
) -> None:
extracted_data = extract_cc2_aux_model()
extracted_data_gens = tee(extracted_data, 2)
assert len(list(extracted_data_gens[0])) == 2
extracted_data = filter_and_log_cc2_aux_model(
extracted_data_gens[1], extracted_primary_sources["nokeda"]
extracted_models = extract_cc2_aux_model()
extracted_model_gens = tee(extracted_models, 2)
assert len(list(extracted_model_gens[0])) == 2
extracted_models = filter_and_log_cc2_aux_model(
extracted_model_gens[1], extracted_primary_sources["nokeda"]
)
assert len(list(extracted_data)) == 1
assert len(list(extracted_models)) == 1
Loading