Skip to content

Commit

Permalink
Handle Table types and literalinclude the omop config
Browse files Browse the repository at this point in the history
  • Loading branch information
Iain-S committed Apr 28, 2023
1 parent e1e4fc2 commit 8d43f07
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 48 deletions.
45 changes: 9 additions & 36 deletions docs/source/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,48 +26,21 @@ In the source database, remove the circular foreign key between `concept` and `v
alter table concept drop constraint concept.concept_vocabulary_id_fkey
and between `concept` and `domain` with, for example:

.. code-block:: sql
alter table concept drop constraint concept.concept_domain_id_fkey
Create a config file
++++++++++++++++++++

Make a config file called `omop.yaml`.
At the very least, our config file will need to specify the tables that need to be copied over in their entirety:

.. code-block:: yaml
tables:
# Standardized Vocabularies
concept:
vocabulary_table: true
concept_class
vocabulary_table: true
concept_relationship:
vocabulary_table: true
concept_synonym:
vocabulary_table: true
domain:
vocabulary_table: true
drug_strength:
vocabulary_table: true
cohort_definition:
vocabulary_table: true
attribute_definition:
vocabulary_table: true
relationship:
vocabulary_table: true
source_to_concept_map
vocabulary_table: true
vocabulary:
vocabulary_table: true
# Standardized meta-data
cdm_source:
vocabulary_table: true
# Standardized health system data
location:
vocabulary_table: true
care_site:
vocabulary_table: true
provider:
vocabulary_table: true
.. literalinclude:: ../../tests/examples/omop/config.yaml
:language: yaml

Make SQLAlchemy file
++++++++++++++++++++
Expand Down
2 changes: 2 additions & 0 deletions sqlsynthgen/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@ def load(self, connection: Any) -> None:
"r", newline="", encoding="utf-8"
) as yamlfile:
rows = yaml.load(yamlfile, Loader=yaml.Loader)
if not rows:
return
stmt = insert(self.table).values(list(rows))
connection.execute(stmt)
36 changes: 26 additions & 10 deletions sqlsynthgen/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import inspect
from sys import stderr
from types import ModuleType
from typing import Any, Final, Optional
from typing import Any, Final, Optional, Tuple

import snsql
from mimesis.providers.base import BaseProvider
Expand Down Expand Up @@ -48,12 +48,20 @@
}


def _orm_class_from_table_name(tables_module: Any, full_name: str) -> Optional[Any]:
def _orm_class_from_table_name(
tables_module: Any, full_name: str
) -> Optional[Tuple[str, str]]:
"""Return the ORM class corresponding to a table name."""
# If the class in tables_module is an SQLAlchemy ORM class
for mapper in tables_module.Base.registry.mappers:
cls = mapper.class_
if cls.__table__.fullname == full_name:
return cls
return cls.__name__, cls.__name__ + ".__table__"

# If the class in tables_module is a SQLAlchemy Core Table
guess = "t_" + full_name
if guess in dir(tables_module):
return guess, guess
return None


Expand Down Expand Up @@ -100,13 +108,16 @@ def _add_default_generator(content: str, tables_module: ModuleType, column: Any)
target_name_parts = fkey.target_fullname.split(".")
target_table_name = ".".join(target_name_parts[:-1])
target_column_name = target_name_parts[-1]
target_orm_class = _orm_class_from_table_name(tables_module, target_table_name)
if target_orm_class is None:
class_and_name = _orm_class_from_table_name(tables_module, target_table_name)
if not class_and_name:
raise ValueError(f"Could not find the ORM class for {target_table_name}.")

target_orm_class, _ = class_and_name

content += (
f"self.{column.name} = "
f"generic.column_value_provider.column_value(dst_db_conn, "
f"{tables_module.__name__}.{target_orm_class.__name__}, "
f"{tables_module.__name__}.{target_orm_class}, "
f'"{target_column_name}"'
")"
)
Expand Down Expand Up @@ -180,13 +191,18 @@ def make_generators_from_tables(

if table_config.get("vocabulary_table") is True:

orm_class = _orm_class_from_table_name(tables_module, table.fullname)
if not orm_class:
class_and_name = _orm_class_from_table_name(tables_module, table.fullname)

if not class_and_name:
raise RuntimeError(f"Couldn't find {table.fullname} in {tables_module}")
class_name = orm_class.__name__

class_name, table_name = class_and_name

the_table_to_download = f"{tables_module.__name__}.{table_name}"

new_content += (
f"\n\n{class_name.lower()}_vocab "
f"= FileUploader({tables_module.__name__}.{class_name}.__table__)"
f"= FileUploader({the_table_to_download})"
)
vocab_dict += f'{INDENTATION}"{table.name}": {class_name.lower()}_vocab,\n'

Expand Down
5 changes: 3 additions & 2 deletions sqlsynthgen/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ def download_table(table: Any, engine: Any) -> None:
yaml_file_name = table.fullname + ".yaml"
yaml_file_path = Path(yaml_file_name)
if yaml_file_path.exists():
print(f"{str(yaml_file_name)} already exists. Exiting...", file=stderr)
sys.exit(1)
# print(f"{str(yaml_file_name)} already exists. Exiting...", file=stderr)
# sys.exit(1)
print(f"Warning: {str(yaml_file_name)} already exists.", file=stderr)

stmt = select([table])
with engine.connect() as conn:
Expand Down
36 changes: 36 additions & 0 deletions tests/examples/omop/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
tables:
# Standardized Vocabularies
concept:
vocabulary_table: true
concept_ancestor:
vocabulary_table: true
concept_class:
vocabulary_table: true
concept_relationship:
vocabulary_table: true
concept_synonym:
vocabulary_table: true
domain:
vocabulary_table: true
drug_strength:
vocabulary_table: true
cohort_definition:
vocabulary_table: true
attribute_definition:
vocabulary_table: true
relationship:
vocabulary_table: true
source_to_concept_map:
vocabulary_table: true
vocabulary:
vocabulary_table: true
# Standardized meta-data
cdm_source:
vocabulary_table: true
# Standardized health system data
location:
vocabulary_table: true
care_site:
vocabulary_table: true
provider:
vocabulary_table: true
24 changes: 24 additions & 0 deletions tests/test_make.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,30 @@ def test_make_generators_from_tables(

self.assertEqual(expected, actual)

@patch("sqlsynthgen.make.get_settings")
@patch("sqlsynthgen.make.create_engine")
@patch("sqlsynthgen.make.download_table")
def test_make_generators_from_table(
self,
mock_download: MagicMock,
mock_create: MagicMock,
mock_get_settings: MagicMock,
) -> None:
"""Check that we can make a generators file from a tables module."""
mock_get_settings.return_value = get_test_settings()
with open("expected_ssg.py", encoding="utf-8") as expected_output:
expected = expected_output.read()
conf_path = "example_config.yaml"
with open(conf_path, "r", encoding="utf8") as f:
config = yaml.safe_load(f)
stats_path = "example_stats.yaml"

actual = make_generators_from_tables(example_orm, config, stats_path)
mock_download.assert_called_once()
mock_create.assert_called_once()

self.assertEqual(expected, actual)


class TestMakeTables(SSGTestCase):
"""Test the make_tables function."""
Expand Down

0 comments on commit 8d43f07

Please sign in to comment.