From 3669319d52906e0e017cd0f124de8a8291b3c06a Mon Sep 17 00:00:00 2001 From: erichesse Date: Mon, 17 Jun 2024 11:24:13 +0200 Subject: [PATCH] Feature/mx 1537 extract voxco data (#97) # Added - extract voxco data # Changes - consolidate mocked drop connector into one general mock --------- Signed-off-by: erichesse Signed-off-by: erichesse Co-authored-by: Kamran Ali <33874616+mr-kamran-ali@users.noreply.github.com> --- CHANGELOG.md | 5 +- README.md | 5 ++ .../primary-sources/primary-sources.json | 3 ++ mex/voxco/__init__.py | 0 mex/voxco/extract.py | 21 ++++++++ mex/voxco/main.py | 52 +++++++++++++++++++ mex/voxco/model.py | 13 +++++ mex/voxco/settings.py | 19 +++++++ pyproject.toml | 1 + tests/conftest.py | 2 +- tests/drop/__init__.py | 0 tests/drop/mocked_drop.py | 50 ++++++++++++++++++ tests/{ => drop}/test_drop.py | 0 tests/seq_repo/mocked_drop_for_seqrepo.py | 34 ------------ tests/seq_repo/test_extract.py | 2 +- tests/seq_repo/test_main.py | 2 +- tests/test_main.py | 2 +- tests/voxco/__init__.py | 0 tests/voxco/test_data/voxco_data.json | 22 ++++++++ tests/voxco/test_extract.py | 21 ++++++++ tests/voxco/test_main.py | 11 ++++ 21 files changed, 226 insertions(+), 39 deletions(-) create mode 100644 mex/voxco/__init__.py create mode 100644 mex/voxco/extract.py create mode 100644 mex/voxco/main.py create mode 100644 mex/voxco/model.py create mode 100644 mex/voxco/settings.py create mode 100644 tests/drop/__init__.py create mode 100644 tests/drop/mocked_drop.py rename tests/{ => drop}/test_drop.py (100%) delete mode 100644 tests/seq_repo/mocked_drop_for_seqrepo.py create mode 100644 tests/voxco/__init__.py create mode 100644 tests/voxco/test_data/voxco_data.json create mode 100644 tests/voxco/test_extract.py create mode 100644 tests/voxco/test_main.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e71f3f1d..cfa79e64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,12 +33,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - transform grippeweb resources - wikidata aux extractor into seq-repo - function `get_merged_organization_id_by_query_with_transform_and_load` to - wikidata.extract module +- wikidata.extract module +- extract voxco data + ### Changes - update mex-common to 0.27.1 - move `mex.pipeline` documentation to `__init__` to have it in sphinx +- consolidate mocked drop connector into one general mock ### Removed diff --git a/README.md b/README.md index 49b627fb..68e94877 100644 --- a/README.md +++ b/README.md @@ -177,3 +177,8 @@ components of the MEx project are open-sourced under the same license as well. - `pdm run synopse` extracts synopse data from report-server exports - based on synopse to MEx mapping commit 6472329 + +### voxco extractor + +- `pdm run voxco` extracts voxco data from voxco JSON files +- based on voxco to MEx mapping commit 307d275 diff --git a/assets/raw-data/primary-sources/primary-sources.json b/assets/raw-data/primary-sources/primary-sources.json index 0e4835ed..5199a802 100644 --- a/assets/raw-data/primary-sources/primary-sources.json +++ b/assets/raw-data/primary-sources/primary-sources.json @@ -47,6 +47,9 @@ { "identifier": "report-server" }, + { + "identifier": "voxco" + }, { "identifier": "wikidata" } diff --git a/mex/voxco/__init__.py b/mex/voxco/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mex/voxco/extract.py b/mex/voxco/extract.py new file mode 100644 index 00000000..8ba41a3a --- /dev/null +++ b/mex/voxco/extract.py @@ -0,0 +1,21 @@ +from mex.drop import DropApiConnector +from mex.voxco.model import VoxcoVariable + + +def extract_voxco_variables() -> dict[str, list[VoxcoVariable]]: + """Extract voxco variables by loading data from mex-drop source json file. + + Returns: + lists of voxco variables by json file name + """ + connector = DropApiConnector.get() + files = connector.list_files("voxco") + data = { + file_name: connector.get_file("voxco", file_name) + for file_name in files + if "test_" not in file_name + } + return { + file_name: [VoxcoVariable.model_validate(item) for item in file_rows["value"]] + for file_name, file_rows in data.items() + } diff --git a/mex/voxco/main.py b/mex/voxco/main.py new file mode 100644 index 00000000..c491f6ff --- /dev/null +++ b/mex/voxco/main.py @@ -0,0 +1,52 @@ +from pathlib import Path +from typing import Any + +from mex.common.cli import entrypoint +from mex.common.models import ( + ExtractedPrimarySource, + ExtractedResource, +) +from mex.common.primary_source.transform import ( + get_primary_sources_by_name, +) +from mex.mapping.extract import extract_mapping_data +from mex.pipeline import asset, run_job_in_process +from mex.sinks import load +from mex.voxco.extract import extract_voxco_variables +from mex.voxco.model import VoxcoVariable +from mex.voxco.settings import VoxcoSettings + + +@asset(group_name="voxco", deps=["extracted_primary_source_mex"]) +def extracted_primary_source_voxco( + extracted_primary_sources: list[ExtractedPrimarySource], +) -> ExtractedPrimarySource: + """Load and return voxco primary source.""" + (extracted_primary_source,) = get_primary_sources_by_name( + extracted_primary_sources, "voxco" + ) + load([extracted_primary_source]) + + return extracted_primary_source + + +@asset(group_name="voxco") +def voxco_sources() -> dict[str, list[VoxcoVariable]]: + """Extract voxco variables by json file names.""" + return extract_voxco_variables() + + +@asset(group_name="voxco") +def voxco_resource_mappings() -> list[dict[str, Any]]: + """Extract voxco resource mappings.""" + settings = VoxcoSettings.get() + return [ + extract_mapping_data(file, ExtractedResource) + for file in Path(settings.mapping_path).glob("resource_*.yaml") + ] + + +@entrypoint(VoxcoSettings) +def run() -> None: + """Run the voxco extractor job in-process.""" + run_job_in_process("voxco") diff --git a/mex/voxco/model.py b/mex/voxco/model.py new file mode 100644 index 00000000..8545a8cb --- /dev/null +++ b/mex/voxco/model.py @@ -0,0 +1,13 @@ +from pydantic import Field + +from mex.common.models import BaseModel + + +class VoxcoVariable(BaseModel): + """Model class for Voxco Variable.""" + + Id: int + DataType: str + Type: str + QuestionText: str = Field(min_length=0) + Choices: list[str] diff --git a/mex/voxco/settings.py b/mex/voxco/settings.py new file mode 100644 index 00000000..5dd96339 --- /dev/null +++ b/mex/voxco/settings.py @@ -0,0 +1,19 @@ +from pydantic import Field +from pydantic_settings import SettingsConfigDict + +from mex.common.types import AssetsPath +from mex.settings import Settings + + +class VoxcoSettings(Settings): + """Settings for the Voxco extractor.""" + + model_config = SettingsConfigDict(env_prefix="voxco_") + + mapping_path: AssetsPath = Field( + AssetsPath("mappings/__final__/voxco"), + description=( + "Path to the directory with the voxco mapping files containing the " + "default values, absolute path or relative to `assets_dir`." + ), + ) diff --git a/pyproject.toml b/pyproject.toml index 4606ebeb..ec936c5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ seq-repo = "mex.seq_repo.main:run" sumo = "mex.sumo.main:run" sync-persons = "mex.ldap.main:run" synopse = "mex.synopse.main:run" +voxco = "mex.voxco.main:run" [tool.cruft] template = "https://github.com/robert-koch-institut/mex-template" diff --git a/tests/conftest.py b/tests/conftest.py index e48cae44..36f2110f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -29,7 +29,7 @@ "tests.grippeweb.mocked_grippeweb", "tests.ifsg.mocked_ifsg", "tests.rdmo.mocked_rdmo", - "tests.seq_repo.mocked_drop_for_seqrepo", + "tests.drop.mocked_drop", ) TEST_DATA_DIR = Path(__file__).parent / "test_data" diff --git a/tests/drop/__init__.py b/tests/drop/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/drop/mocked_drop.py b/tests/drop/mocked_drop.py new file mode 100644 index 00000000..c1a2c0a2 --- /dev/null +++ b/tests/drop/mocked_drop.py @@ -0,0 +1,50 @@ +import json +from pathlib import Path +from unittest.mock import MagicMock + +import pytest +import requests +from pytest import MonkeyPatch + +from mex.drop import DropApiConnector + + +@pytest.fixture +def mocked_drop(monkeypatch: MonkeyPatch) -> None: + """Mock the drop api connector to return dummy data.""" + monkeypatch.setattr( + DropApiConnector, + "__init__", + lambda self: setattr(self, "session", MagicMock(spec=requests.Session)), + ) + monkeypatch.setattr( + DropApiConnector, + "list_files", + lambda _, x_system: [ + path.stem + for path in ( + Path(__file__).parents[2] + / "tests" + / x_system.replace("-", "_") + / "test_data" + ).rglob("*.json") + ], + ) + + def get_file_mocked(self, x_system: str, file_id: str): + with open( + ( + Path(__file__).parents[2] + / "tests" + / x_system.replace("-", "_") + / "test_data" + / file_id + ).with_suffix(".json") + ) as handle: + return json.load(handle) + + monkeypatch.setattr( + DropApiConnector, + "get_file", + get_file_mocked, + ) diff --git a/tests/test_drop.py b/tests/drop/test_drop.py similarity index 100% rename from tests/test_drop.py rename to tests/drop/test_drop.py diff --git a/tests/seq_repo/mocked_drop_for_seqrepo.py b/tests/seq_repo/mocked_drop_for_seqrepo.py deleted file mode 100644 index 6578d29f..00000000 --- a/tests/seq_repo/mocked_drop_for_seqrepo.py +++ /dev/null @@ -1,34 +0,0 @@ -import json -from pathlib import Path -from unittest.mock import MagicMock - -import pytest -import requests -from pytest import MonkeyPatch - -from mex.drop import DropApiConnector - - -@pytest.fixture -def mocked_drop_for_seqrepo(monkeypatch: MonkeyPatch) -> None: - """Mock the drop api connector to return dummy data.""" - monkeypatch.setattr( - DropApiConnector, - "__init__", - lambda self: setattr(self, "session", MagicMock(spec=requests.Session)), - ) - monkeypatch.setattr( - DropApiConnector, - "list_files", - lambda *_, **__: ["one"], - ) - - def get_file_mocked(*_, **__): - with open(Path(__file__).parent / "test_data" / "default.json") as handle: - return json.load(handle) - - monkeypatch.setattr( - DropApiConnector, - "get_file", - get_file_mocked, - ) diff --git a/tests/seq_repo/test_extract.py b/tests/seq_repo/test_extract.py index f4bd879a..30193174 100644 --- a/tests/seq_repo/test_extract.py +++ b/tests/seq_repo/test_extract.py @@ -10,7 +10,7 @@ @pytest.mark.usefixtures( - "mocked_drop_for_seqrepo", + "mocked_drop", ) def test_extract_sources() -> None: sources = list(extract_sources()) diff --git a/tests/seq_repo/test_main.py b/tests/seq_repo/test_main.py index 20d55df2..6e5622c2 100644 --- a/tests/seq_repo/test_main.py +++ b/tests/seq_repo/test_main.py @@ -4,7 +4,7 @@ @pytest.mark.usefixtures( - "mocked_drop_for_seqrepo", + "mocked_drop", "mocked_ldap", "mocked_wikidata", ) diff --git a/tests/test_main.py b/tests/test_main.py index b0e76ce1..befe2e75 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -7,7 +7,7 @@ "mocked_blueant", "mocked_confluence_vvt", "mocked_datscha_web", - "mocked_drop_for_seqrepo", + "mocked_drop", "mocked_grippeweb", "mocked_ifsg", "mocked_ldap", diff --git a/tests/voxco/__init__.py b/tests/voxco/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/voxco/test_data/voxco_data.json b/tests/voxco/test_data/voxco_data.json new file mode 100644 index 00000000..1dfeb618 --- /dev/null +++ b/tests/voxco/test_data/voxco_data.json @@ -0,0 +1,22 @@ +{ + "Count": 3, + "value": [ + { + "Choices": [ + "@{Code=1; Text=Januar; Image=; HasOpenEnd=False; Visible=True; Default=False}", + "@{Code=1; Text=Februar; Image=; HasOpenEnd=False; Visible=True; Default=False}" + ], + "DataType": "Text", + "HasOpenEnd": false, + "Id": 50614, + "MaxAnswers": 1, + "MaxLength": 2, + "Name": "MONAT", + "QuestionId": 33302, + "QuestionName": "MONAT", + "QuestionText": "Monat ", + "Text": "", + "Type": "Discrete" + } + ] +} diff --git a/tests/voxco/test_extract.py b/tests/voxco/test_extract.py new file mode 100644 index 00000000..7be56bc9 --- /dev/null +++ b/tests/voxco/test_extract.py @@ -0,0 +1,21 @@ +import pytest + +from mex.voxco.extract import extract_voxco_variables + + +@pytest.mark.usefixtures( + "mocked_drop", +) +def test_extract_voxco_variables() -> None: + sources = extract_voxco_variables() + expected = { + "Id": 50614, + "DataType": "Text", + "Type": "Discrete", + "QuestionText": "Monat", + "Choices": [ + "@{Code=1; Text=Januar; Image=; HasOpenEnd=False; Visible=True; Default=False}", + "@{Code=1; Text=Februar; Image=; HasOpenEnd=False; Visible=True; Default=False}", + ], + } + assert sources["voxco_data"][0].model_dump() == expected diff --git a/tests/voxco/test_main.py b/tests/voxco/test_main.py new file mode 100644 index 00000000..f1a685ab --- /dev/null +++ b/tests/voxco/test_main.py @@ -0,0 +1,11 @@ +import pytest + +from mex.pipeline import run_job_in_process + + +@pytest.mark.usefixtures( + "mocked_drop", +) +def test_job() -> None: + result = run_job_in_process("voxco") + assert result.success