Skip to content

Commit

Permalink
Feature/mx 1537 extract voxco data (#97)
Browse files Browse the repository at this point in the history
# Added
- extract voxco data 

# Changes
- consolidate mocked drop connector into one general mock

---------

Signed-off-by: erichesse <[email protected]>
Signed-off-by: erichesse <[email protected]>
Co-authored-by: Kamran Ali <[email protected]>
  • Loading branch information
erichesse and mr-kamran-ali authored Jun 17, 2024
1 parent f439f25 commit 3669319
Show file tree
Hide file tree
Showing 21 changed files with 226 additions and 39 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- transform grippeweb resources
- wikidata aux extractor into seq-repo
- function `get_merged_organization_id_by_query_with_transform_and_load` to
wikidata.extract module
- wikidata.extract module
- extract voxco data


### Changes

- update mex-common to 0.27.1
- move `mex.pipeline` documentation to `__init__` to have it in sphinx
- consolidate mocked drop connector into one general mock

### Removed

Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,8 @@ components of the MEx project are open-sourced under the same license as well.

- `pdm run synopse` extracts synopse data from report-server exports
- based on synopse to MEx mapping commit 6472329

### voxco extractor

- `pdm run voxco` extracts voxco data from voxco JSON files
- based on voxco to MEx mapping commit 307d275
3 changes: 3 additions & 0 deletions assets/raw-data/primary-sources/primary-sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
{
"identifier": "report-server"
},
{
"identifier": "voxco"
},
{
"identifier": "wikidata"
}
Expand Down
Empty file added mex/voxco/__init__.py
Empty file.
21 changes: 21 additions & 0 deletions mex/voxco/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from mex.drop import DropApiConnector
from mex.voxco.model import VoxcoVariable


def extract_voxco_variables() -> dict[str, list[VoxcoVariable]]:
"""Extract voxco variables by loading data from mex-drop source json file.
Returns:
lists of voxco variables by json file name
"""
connector = DropApiConnector.get()
files = connector.list_files("voxco")
data = {
file_name: connector.get_file("voxco", file_name)
for file_name in files
if "test_" not in file_name
}
return {
file_name: [VoxcoVariable.model_validate(item) for item in file_rows["value"]]
for file_name, file_rows in data.items()
}
52 changes: 52 additions & 0 deletions mex/voxco/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from pathlib import Path
from typing import Any

from mex.common.cli import entrypoint
from mex.common.models import (
ExtractedPrimarySource,
ExtractedResource,
)
from mex.common.primary_source.transform import (
get_primary_sources_by_name,
)
from mex.mapping.extract import extract_mapping_data
from mex.pipeline import asset, run_job_in_process
from mex.sinks import load
from mex.voxco.extract import extract_voxco_variables
from mex.voxco.model import VoxcoVariable
from mex.voxco.settings import VoxcoSettings


@asset(group_name="voxco", deps=["extracted_primary_source_mex"])
def extracted_primary_source_voxco(
extracted_primary_sources: list[ExtractedPrimarySource],
) -> ExtractedPrimarySource:
"""Load and return voxco primary source."""
(extracted_primary_source,) = get_primary_sources_by_name(
extracted_primary_sources, "voxco"
)
load([extracted_primary_source])

return extracted_primary_source


@asset(group_name="voxco")
def voxco_sources() -> dict[str, list[VoxcoVariable]]:
"""Extract voxco variables by json file names."""
return extract_voxco_variables()


@asset(group_name="voxco")
def voxco_resource_mappings() -> list[dict[str, Any]]:
"""Extract voxco resource mappings."""
settings = VoxcoSettings.get()
return [
extract_mapping_data(file, ExtractedResource)
for file in Path(settings.mapping_path).glob("resource_*.yaml")
]


@entrypoint(VoxcoSettings)
def run() -> None:
"""Run the voxco extractor job in-process."""
run_job_in_process("voxco")
13 changes: 13 additions & 0 deletions mex/voxco/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pydantic import Field

from mex.common.models import BaseModel


class VoxcoVariable(BaseModel):
"""Model class for Voxco Variable."""

Id: int
DataType: str
Type: str
QuestionText: str = Field(min_length=0)
Choices: list[str]
19 changes: 19 additions & 0 deletions mex/voxco/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pydantic import Field
from pydantic_settings import SettingsConfigDict

from mex.common.types import AssetsPath
from mex.settings import Settings


class VoxcoSettings(Settings):
"""Settings for the Voxco extractor."""

model_config = SettingsConfigDict(env_prefix="voxco_")

mapping_path: AssetsPath = Field(
AssetsPath("mappings/__final__/voxco"),
description=(
"Path to the directory with the voxco mapping files containing the "
"default values, absolute path or relative to `assets_dir`."
),
)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ seq-repo = "mex.seq_repo.main:run"
sumo = "mex.sumo.main:run"
sync-persons = "mex.ldap.main:run"
synopse = "mex.synopse.main:run"
voxco = "mex.voxco.main:run"

[tool.cruft]
template = "https://github.com/robert-koch-institut/mex-template"
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"tests.grippeweb.mocked_grippeweb",
"tests.ifsg.mocked_ifsg",
"tests.rdmo.mocked_rdmo",
"tests.seq_repo.mocked_drop_for_seqrepo",
"tests.drop.mocked_drop",
)

TEST_DATA_DIR = Path(__file__).parent / "test_data"
Expand Down
Empty file added tests/drop/__init__.py
Empty file.
50 changes: 50 additions & 0 deletions tests/drop/mocked_drop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import json
from pathlib import Path
from unittest.mock import MagicMock

import pytest
import requests
from pytest import MonkeyPatch

from mex.drop import DropApiConnector


@pytest.fixture
def mocked_drop(monkeypatch: MonkeyPatch) -> None:
"""Mock the drop api connector to return dummy data."""
monkeypatch.setattr(
DropApiConnector,
"__init__",
lambda self: setattr(self, "session", MagicMock(spec=requests.Session)),
)
monkeypatch.setattr(
DropApiConnector,
"list_files",
lambda _, x_system: [
path.stem
for path in (
Path(__file__).parents[2]
/ "tests"
/ x_system.replace("-", "_")
/ "test_data"
).rglob("*.json")
],
)

def get_file_mocked(self, x_system: str, file_id: str):
with open(
(
Path(__file__).parents[2]
/ "tests"
/ x_system.replace("-", "_")
/ "test_data"
/ file_id
).with_suffix(".json")
) as handle:
return json.load(handle)

monkeypatch.setattr(
DropApiConnector,
"get_file",
get_file_mocked,
)
File renamed without changes.
34 changes: 0 additions & 34 deletions tests/seq_repo/mocked_drop_for_seqrepo.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/seq_repo/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


@pytest.mark.usefixtures(
"mocked_drop_for_seqrepo",
"mocked_drop",
)
def test_extract_sources() -> None:
sources = list(extract_sources())
Expand Down
2 changes: 1 addition & 1 deletion tests/seq_repo/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


@pytest.mark.usefixtures(
"mocked_drop_for_seqrepo",
"mocked_drop",
"mocked_ldap",
"mocked_wikidata",
)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"mocked_blueant",
"mocked_confluence_vvt",
"mocked_datscha_web",
"mocked_drop_for_seqrepo",
"mocked_drop",
"mocked_grippeweb",
"mocked_ifsg",
"mocked_ldap",
Expand Down
Empty file added tests/voxco/__init__.py
Empty file.
22 changes: 22 additions & 0 deletions tests/voxco/test_data/voxco_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"Count": 3,
"value": [
{
"Choices": [
"@{Code=1; Text=Januar; Image=; HasOpenEnd=False; Visible=True; Default=False}",
"@{Code=1; Text=Februar; Image=; HasOpenEnd=False; Visible=True; Default=False}"
],
"DataType": "Text",
"HasOpenEnd": false,
"Id": 50614,
"MaxAnswers": 1,
"MaxLength": 2,
"Name": "MONAT",
"QuestionId": 33302,
"QuestionName": "MONAT",
"QuestionText": "Monat ",
"Text": "",
"Type": "Discrete"
}
]
}
21 changes: 21 additions & 0 deletions tests/voxco/test_extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest

from mex.voxco.extract import extract_voxco_variables


@pytest.mark.usefixtures(
"mocked_drop",
)
def test_extract_voxco_variables() -> None:
sources = extract_voxco_variables()
expected = {
"Id": 50614,
"DataType": "Text",
"Type": "Discrete",
"QuestionText": "Monat",
"Choices": [
"@{Code=1; Text=Januar; Image=; HasOpenEnd=False; Visible=True; Default=False}",
"@{Code=1; Text=Februar; Image=; HasOpenEnd=False; Visible=True; Default=False}",
],
}
assert sources["voxco_data"][0].model_dump() == expected
11 changes: 11 additions & 0 deletions tests/voxco/test_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pytest

from mex.pipeline import run_job_in_process


@pytest.mark.usefixtures(
"mocked_drop",
)
def test_job() -> None:
result = run_job_in_process("voxco")
assert result.success

0 comments on commit 3669319

Please sign in to comment.