From e625978ed48c5532a88bb04b0846b664f6216712 Mon Sep 17 00:00:00 2001 From: Jon Couldridge Date: Thu, 9 Jan 2025 10:30:48 +0000 Subject: [PATCH 1/7] initial repository root files --- .editorconfig | 16 +++++ .gitattributes | 1 + .gitignore | 171 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitattributes create mode 100644 .gitignore diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..6d66202 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,16 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +indent_style = space +indent_size = 2 + +[*.py] +indent_size = 4 # pep8 +profile = black diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6313b56 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..15201ac --- /dev/null +++ b/.gitignore @@ -0,0 +1,171 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# PyPI configuration file +.pypirc From 2425bd336b6316800bb1940d960f3f3b14c76706 Mon Sep 17 00:00:00 2001 From: Jon Couldridge Date: Thu, 9 Jan 2025 13:27:59 +0000 Subject: [PATCH 2/7] python project files, initial dependencies --- .python-version | 1 + pyproject.toml | 38 ++++ src/hutch_bunny/__init__.py | 2 + uv.lock | 333 ++++++++++++++++++++++++++++++++++++ 4 files changed, 374 insertions(+) create mode 100644 .python-version create mode 100644 pyproject.toml create mode 100644 src/hutch_bunny/__init__.py create mode 100644 uv.lock diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..24ee5b1 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..64ac825 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[project] +name = "hutch-bunny" +version = "1.0.0-alpha.1" +description = "A Cohort Discovery Task API worker" +license = { text = "MIT License" } +readme = "README.md" +authors = [ + { name = "Jon Couldridge", email = "jonathan.couldridge@nottingham.ac.uk" }, + { name = "Daniel Lea", email = "daniel.lea@nottingham.ac.uk" }, + { name = "Tri Thien Nguyen", email = "thien.nguyen1@nottingham.ac.uk" }, + { name = "James Mitchell-White", email = "james.mitchell-white1@nottingham.ac.uk" }, + { name = "Anwar Gaungoo", email = "anwar.gaungoo@nottingham.ac.uk" }, +] +maintainers = [ + { name = "Jon Couldridge", email = "jonathan.couldridge@nottingham.ac.uk" }, +] +requires-python = ">=3.13" +dependencies = [ + "numpy>=2.2.1", + "pandas>=2.2.3", + "psycopg[binary]>=3.2.3", + "requests>=2.32.3", # daemon-only + "sqlalchemy>=2.0.36", + # possible extras? + # "pyodbc>=5.2.0", # not sure exactly when this is or isn't needed? + # mysqlclient="^2.1.1" # should be only when targeting mysql - seems to have system dependencies + # TODO: sqlserver driver for sqlalchemy +] + +[project.scripts] +bunny = "hutch_bunny:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[dependency-groups] +dev = ["ruff>=0.8.6", "pytest>=8.3.4"] diff --git a/src/hutch_bunny/__init__.py b/src/hutch_bunny/__init__.py new file mode 100644 index 0000000..31e8c19 --- /dev/null +++ b/src/hutch_bunny/__init__.py @@ -0,0 +1,2 @@ +def main() -> None: + print("Hello from hutch-bunny!") diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..53f1ded --- /dev/null +++ b/uv.lock @@ -0,0 +1,333 @@ +version = 1 +requires-python = ">=3.13" + +[[package]] +name = "certifi" +version = "2024.12.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/bd/1d41ee578ce09523c81a15426705dd20969f5abf006d1afe8aeff0dd776a/certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db", size = 166010 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/32/8f6669fc4798494966bf446c8c4a162e0b5d893dff088afddf76414f70e1/certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56", size = 164927 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "hutch-bunny" +version = "1.0.0a1" +source = { editable = "." } +dependencies = [ + { name = "numpy" }, + { name = "pandas" }, + { name = "psycopg", extra = ["binary"] }, + { name = "requests" }, + { name = "sqlalchemy" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "numpy", specifier = ">=2.2.1" }, + { name = "pandas", specifier = ">=2.2.3" }, + { name = "psycopg", extras = ["binary"], specifier = ">=3.2.3" }, + { name = "requests", specifier = ">=2.32.3" }, + { name = "sqlalchemy", specifier = ">=2.0.36" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=8.3.4" }, + { name = "ruff", specifier = ">=0.8.6" }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, +] + +[[package]] +name = "numpy" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/fdbf6a7871703df6160b5cf3dd774074b086d278172285c52c2758b76305/numpy-2.2.1.tar.gz", hash = "sha256:45681fd7128c8ad1c379f0ca0776a8b0c6583d2f69889ddac01559dfe4390918", size = 20227662 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/d6/91a26e671c396e0c10e327b763485ee295f5a5a7a48c553f18417e5a0ed5/numpy-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f1d09e520217618e76396377c81fba6f290d5f926f50c35f3a5f72b01a0da780", size = 20896464 }, + { url = "https://files.pythonhosted.org/packages/8c/40/5792ccccd91d45e87d9e00033abc4f6ca8a828467b193f711139ff1f1cd9/numpy-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3ecc47cd7f6ea0336042be87d9e7da378e5c7e9b3c8ad0f7c966f714fc10d821", size = 14111350 }, + { url = "https://files.pythonhosted.org/packages/c0/2a/fb0a27f846cb857cef0c4c92bef89f133a3a1abb4e16bba1c4dace2e9b49/numpy-2.2.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f419290bc8968a46c4933158c91a0012b7a99bb2e465d5ef5293879742f8797e", size = 5111629 }, + { url = "https://files.pythonhosted.org/packages/eb/e5/8e81bb9d84db88b047baf4e8b681a3e48d6390bc4d4e4453eca428ecbb49/numpy-2.2.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b6c390bfaef8c45a260554888966618328d30e72173697e5cabe6b285fb2348", size = 6645865 }, + { url = "https://files.pythonhosted.org/packages/7a/1a/a90ceb191dd2f9e2897c69dde93ccc2d57dd21ce2acbd7b0333e8eea4e8d/numpy-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:526fc406ab991a340744aad7e25251dd47a6720a685fa3331e5c59fef5282a59", size = 14043508 }, + { url = "https://files.pythonhosted.org/packages/f1/5a/e572284c86a59dec0871a49cd4e5351e20b9c751399d5f1d79628c0542cb/numpy-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f74e6fdeb9a265624ec3a3918430205dff1df7e95a230779746a6af78bc615af", size = 16094100 }, + { url = "https://files.pythonhosted.org/packages/0c/2c/a79d24f364788386d85899dd280a94f30b0950be4b4a545f4fa4ed1d4ca7/numpy-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:53c09385ff0b72ba79d8715683c1168c12e0b6e84fb0372e97553d1ea91efe51", size = 15239691 }, + { url = "https://files.pythonhosted.org/packages/cf/79/1e20fd1c9ce5a932111f964b544facc5bb9bde7865f5b42f00b4a6a9192b/numpy-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f3eac17d9ec51be534685ba877b6ab5edc3ab7ec95c8f163e5d7b39859524716", size = 17856571 }, + { url = "https://files.pythonhosted.org/packages/be/5b/cc155e107f75d694f562bdc84a26cc930569f3dfdfbccb3420b626065777/numpy-2.2.1-cp313-cp313-win32.whl", hash = "sha256:9ad014faa93dbb52c80d8f4d3dcf855865c876c9660cb9bd7553843dd03a4b1e", size = 6270841 }, + { url = "https://files.pythonhosted.org/packages/44/be/0e5cd009d2162e4138d79a5afb3b5d2341f0fe4777ab6e675aa3d4a42e21/numpy-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:164a829b6aacf79ca47ba4814b130c4020b202522a93d7bff2202bfb33b61c60", size = 12606618 }, + { url = "https://files.pythonhosted.org/packages/a8/87/04ddf02dd86fb17c7485a5f87b605c4437966d53de1e3745d450343a6f56/numpy-2.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4dfda918a13cc4f81e9118dea249e192ab167a0bb1966272d5503e39234d694e", size = 20921004 }, + { url = "https://files.pythonhosted.org/packages/6e/3e/d0e9e32ab14005425d180ef950badf31b862f3839c5b927796648b11f88a/numpy-2.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:733585f9f4b62e9b3528dd1070ec4f52b8acf64215b60a845fa13ebd73cd0712", size = 14119910 }, + { url = "https://files.pythonhosted.org/packages/b5/5b/aa2d1905b04a8fb681e08742bb79a7bddfc160c7ce8e1ff6d5c821be0236/numpy-2.2.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:89b16a18e7bba224ce5114db863e7029803c179979e1af6ad6a6b11f70545008", size = 5153612 }, + { url = "https://files.pythonhosted.org/packages/ce/35/6831808028df0648d9b43c5df7e1051129aa0d562525bacb70019c5f5030/numpy-2.2.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:676f4eebf6b2d430300f1f4f4c2461685f8269f94c89698d832cdf9277f30b84", size = 6668401 }, + { url = "https://files.pythonhosted.org/packages/b1/38/10ef509ad63a5946cc042f98d838daebfe7eaf45b9daaf13df2086b15ff9/numpy-2.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f5cdf9f493b35f7e41e8368e7d7b4bbafaf9660cba53fb21d2cd174ec09631", size = 14014198 }, + { url = "https://files.pythonhosted.org/packages/df/f8/c80968ae01df23e249ee0a4487fae55a4c0fe2f838dfe9cc907aa8aea0fa/numpy-2.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1ad395cf254c4fbb5b2132fee391f361a6e8c1adbd28f2cd8e79308a615fe9d", size = 16076211 }, + { url = "https://files.pythonhosted.org/packages/09/69/05c169376016a0b614b432967ac46ff14269eaffab80040ec03ae1ae8e2c/numpy-2.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:08ef779aed40dbc52729d6ffe7dd51df85796a702afbf68a4f4e41fafdc8bda5", size = 15220266 }, + { url = "https://files.pythonhosted.org/packages/f1/ff/94a4ce67ea909f41cf7ea712aebbe832dc67decad22944a1020bb398a5ee/numpy-2.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:26c9c4382b19fcfbbed3238a14abf7ff223890ea1936b8890f058e7ba35e8d71", size = 17852844 }, + { url = "https://files.pythonhosted.org/packages/46/72/8a5dbce4020dfc595592333ef2fbb0a187d084ca243b67766d29d03e0096/numpy-2.2.1-cp313-cp313t-win32.whl", hash = "sha256:93cf4e045bae74c90ca833cba583c14b62cb4ba2cba0abd2b141ab52548247e2", size = 6326007 }, + { url = "https://files.pythonhosted.org/packages/7b/9c/4fce9cf39dde2562584e4cfd351a0140240f82c0e3569ce25a250f47037d/numpy-2.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:bff7d8ec20f5f42607599f9994770fa65d76edca264a87b5e4ea5629bce12268", size = 12693107 }, +] + +[[package]] +name = "packaging" +version = "24.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, +] + +[[package]] +name = "pandas" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643 }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573 }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085 }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809 }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316 }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055 }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175 }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650 }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177 }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526 }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013 }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620 }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436 }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + +[[package]] +name = "psycopg" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/ad/7ce016ae63e231575df0498d2395d15f005f05e32d3a2d439038e1bd0851/psycopg-3.2.3.tar.gz", hash = "sha256:a5764f67c27bec8bfac85764d23c534af2c27b893550377e37ce59c12aac47a2", size = 155550 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/21/534b8f5bd9734b7a2fcd3a16b1ee82ef6cad81a4796e95ebf4e0c6a24119/psycopg-3.2.3-py3-none-any.whl", hash = "sha256:644d3973fe26908c73d4be746074f6e5224b03c1101d302d9a53bf565ad64907", size = 197934 }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/bf/717c5e51c68e2498b60a6e9f1476cc47953013275a54bf8e23fd5082a72d/psycopg_binary-3.2.3-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:41fdec0182efac66b27478ac15ef54c9ebcecf0e26ed467eb7d6f262a913318b", size = 3360874 }, + { url = "https://files.pythonhosted.org/packages/31/d5/6f9ad6fe5ef80ca9172bc3d028ebae8e9a1ee8aebd917c95c747a5efd85f/psycopg_binary-3.2.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:07d019a786eb020c0f984691aa1b994cb79430061065a694cf6f94056c603d26", size = 3502320 }, + { url = "https://files.pythonhosted.org/packages/fb/7b/c58dd26c27fe7a491141ca765c103e702872ff1c174ebd669d73d7fb0b5d/psycopg_binary-3.2.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c57615791a337378fe5381143259a6c432cdcbb1d3e6428bfb7ce59fff3fb5c", size = 4446950 }, + { url = "https://files.pythonhosted.org/packages/ed/75/acf6a81c788007b7bc0a43b02c22eff7cb19a6ace9e84c32838e86083a3f/psycopg_binary-3.2.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8eb9a4e394926b93ad919cad1b0a918e9b4c846609e8c1cfb6b743683f64da0", size = 4252409 }, + { url = "https://files.pythonhosted.org/packages/83/a5/8a01b923fe42acd185d53f24fb98ead717725ede76a4cd183ff293daf1f1/psycopg_binary-3.2.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5905729668ef1418bd36fbe876322dcb0f90b46811bba96d505af89e6fbdce2f", size = 4488121 }, + { url = "https://files.pythonhosted.org/packages/14/8f/b00e65e204340ab1259ecc8d4cc4c1f72c386be5ca7bfb90ae898a058d68/psycopg_binary-3.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd65774ed7d65101b314808b6893e1a75b7664f680c3ef18d2e5c84d570fa393", size = 4190653 }, + { url = "https://files.pythonhosted.org/packages/ce/fc/ba830fc6c9b02b66d1e2fb420736df4d78369760144169a9046f04d72ac6/psycopg_binary-3.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:700679c02f9348a0d0a2adcd33a0275717cd0d0aee9d4482b47d935023629505", size = 3118074 }, + { url = "https://files.pythonhosted.org/packages/b8/75/b62d06930a615435e909e05de126aa3d49f6ec2993d1aa6a99e7faab5570/psycopg_binary-3.2.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:96334bb64d054e36fed346c50c4190bad9d7c586376204f50bede21a913bf942", size = 3100457 }, + { url = "https://files.pythonhosted.org/packages/57/e5/32dc7518325d0010813853a87b19c784d8b11fdb17f5c0e0c148c5ac77af/psycopg_binary-3.2.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9099e443d4cc24ac6872e6a05f93205ba1a231b1a8917317b07c9ef2b955f1f4", size = 3192788 }, + { url = "https://files.pythonhosted.org/packages/23/a3/d1aa04329253c024a2323051774446770d47b43073874a3de8cca797ed8e/psycopg_binary-3.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1985ab05e9abebfbdf3163a16ebb37fbc5d49aff2bf5b3d7375ff0920bbb54cd", size = 3234247 }, + { url = "https://files.pythonhosted.org/packages/03/20/b675af723b9a61d48abd6a3d64cbb9797697d330255d1f8105713d54ed8e/psycopg_binary-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:e90352d7b610b4693fad0feea48549d4315d10f1eba5605421c92bb834e90170", size = 2913413 }, +] + +[[package]] +name = "pytest" +version = "8.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "pytz" +version = "2024.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3a/31/3c70bf7603cc2dca0f19bdc53b4537a797747a58875b552c8c413d963a3f/pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a", size = 319692 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725", size = 508002 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "ruff" +version = "0.8.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/00/089db7890ea3be5709e3ece6e46408d6f1e876026ec3fd081ee585fef209/ruff-0.8.6.tar.gz", hash = "sha256:dcad24b81b62650b0eb8814f576fc65cfee8674772a6e24c9b747911801eeaa5", size = 3473116 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/28/aa07903694637c2fa394a9f4fe93cf861ad8b09f1282fa650ef07ff9fe97/ruff-0.8.6-py3-none-linux_armv6l.whl", hash = "sha256:defed167955d42c68b407e8f2e6f56ba52520e790aba4ca707a9c88619e580e3", size = 10628735 }, + { url = "https://files.pythonhosted.org/packages/2b/43/827bb1448f1fcb0fb42e9c6edf8fb067ca8244923bf0ddf12b7bf949065c/ruff-0.8.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:54799ca3d67ae5e0b7a7ac234baa657a9c1784b48ec954a094da7c206e0365b1", size = 10386758 }, + { url = "https://files.pythonhosted.org/packages/df/93/fc852a81c3cd315b14676db3b8327d2bb2d7508649ad60bfdb966d60738d/ruff-0.8.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e88b8f6d901477c41559ba540beeb5a671e14cd29ebd5683903572f4b40a9807", size = 10007808 }, + { url = "https://files.pythonhosted.org/packages/94/e9/e0ed4af1794335fb280c4fac180f2bf40f6a3b859cae93a5a3ada27325ae/ruff-0.8.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0509e8da430228236a18a677fcdb0c1f102dd26d5520f71f79b094963322ed25", size = 10861031 }, + { url = "https://files.pythonhosted.org/packages/82/68/da0db02f5ecb2ce912c2bef2aa9fcb8915c31e9bc363969cfaaddbc4c1c2/ruff-0.8.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:91a7ddb221779871cf226100e677b5ea38c2d54e9e2c8ed847450ebbdf99b32d", size = 10388246 }, + { url = "https://files.pythonhosted.org/packages/ac/1d/b85383db181639019b50eb277c2ee48f9f5168f4f7c287376f2b6e2a6dc2/ruff-0.8.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:248b1fb3f739d01d528cc50b35ee9c4812aa58cc5935998e776bf8ed5b251e75", size = 11424693 }, + { url = "https://files.pythonhosted.org/packages/ac/b7/30bc78a37648d31bfc7ba7105b108cb9091cd925f249aa533038ebc5a96f/ruff-0.8.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:bc3c083c50390cf69e7e1b5a5a7303898966be973664ec0c4a4acea82c1d4315", size = 12141921 }, + { url = "https://files.pythonhosted.org/packages/60/b3/ee0a14cf6a1fbd6965b601c88d5625d250b97caf0534181e151504498f86/ruff-0.8.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52d587092ab8df308635762386f45f4638badb0866355b2b86760f6d3c076188", size = 11692419 }, + { url = "https://files.pythonhosted.org/packages/ef/d6/c597062b2931ba3e3861e80bd2b147ca12b3370afc3889af46f29209037f/ruff-0.8.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:61323159cf21bc3897674e5adb27cd9e7700bab6b84de40d7be28c3d46dc67cf", size = 12981648 }, + { url = "https://files.pythonhosted.org/packages/68/84/21f578c2a4144917985f1f4011171aeff94ab18dfa5303ac632da2f9af36/ruff-0.8.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ae4478b1471fc0c44ed52a6fb787e641a2ac58b1c1f91763bafbc2faddc5117", size = 11251801 }, + { url = "https://files.pythonhosted.org/packages/6c/aa/1ac02537c8edeb13e0955b5db86b5c050a1dcba54f6d49ab567decaa59c1/ruff-0.8.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0c000a471d519b3e6cfc9c6680025d923b4ca140ce3e4612d1a2ef58e11f11fe", size = 10849857 }, + { url = "https://files.pythonhosted.org/packages/eb/00/020cb222252d833956cb3b07e0e40c9d4b984fbb2dc3923075c8f944497d/ruff-0.8.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:9257aa841e9e8d9b727423086f0fa9a86b6b420fbf4bf9e1465d1250ce8e4d8d", size = 10470852 }, + { url = "https://files.pythonhosted.org/packages/00/56/e6d6578202a0141cd52299fe5acb38b2d873565f4670c7a5373b637cf58d/ruff-0.8.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:45a56f61b24682f6f6709636949ae8cc82ae229d8d773b4c76c09ec83964a95a", size = 10972997 }, + { url = "https://files.pythonhosted.org/packages/be/31/dd0db1f4796bda30dea7592f106f3a67a8f00bcd3a50df889fbac58e2786/ruff-0.8.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:496dd38a53aa173481a7d8866bcd6451bd934d06976a2505028a50583e001b76", size = 11317760 }, + { url = "https://files.pythonhosted.org/packages/d4/70/cfcb693dc294e034c6fed837fa2ec98b27cc97a26db5d049345364f504bf/ruff-0.8.6-py3-none-win32.whl", hash = "sha256:e169ea1b9eae61c99b257dc83b9ee6c76f89042752cb2d83486a7d6e48e8f764", size = 8799729 }, + { url = "https://files.pythonhosted.org/packages/60/22/ae6bcaa0edc83af42751bd193138bfb7598b2990939d3e40494d6c00698c/ruff-0.8.6-py3-none-win_amd64.whl", hash = "sha256:f1d70bef3d16fdc897ee290d7d20da3cbe4e26349f62e8a0274e7a3f4ce7a905", size = 9673857 }, + { url = "https://files.pythonhosted.org/packages/91/f8/3765e053acd07baa055c96b2065c7fab91f911b3c076dfea71006666f5b0/ruff-0.8.6-py3-none-win_arm64.whl", hash = "sha256:7d7fc2377a04b6e04ffe588caad613d0c460eb2ecba4c0ccbbfe2bc973cbc162", size = 9149556 }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.36" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/65/9cbc9c4c3287bed2499e05033e207473504dc4df999ce49385fb1f8b058a/sqlalchemy-2.0.36.tar.gz", hash = "sha256:7f2767680b6d2398aea7082e45a774b2b0767b5c8d8ffb9c8b683088ea9b29c5", size = 9574485 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/5c/236398ae3678b3237726819b484f15f5c038a9549da01703a771f05a00d6/SQLAlchemy-2.0.36-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5cc79df7f4bc3d11e4b542596c03826063092611e481fcf1c9dfee3c94355ef", size = 2087651 }, + { url = "https://files.pythonhosted.org/packages/a8/14/55c47420c0d23fb67a35af8be4719199b81c59f3084c28d131a7767b0b0b/SQLAlchemy-2.0.36-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3c01117dd36800f2ecaa238c65365b7b16497adc1522bf84906e5710ee9ba0e8", size = 2078132 }, + { url = "https://files.pythonhosted.org/packages/3d/97/1e843b36abff8c4a7aa2e37f9bea364f90d021754c2de94d792c2d91405b/SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bc633f4ee4b4c46e7adcb3a9b5ec083bf1d9a97c1d3854b92749d935de40b9b", size = 3164559 }, + { url = "https://files.pythonhosted.org/packages/7b/c5/07f18a897b997f6d6b234fab2bf31dccf66d5d16a79fe329aefc95cd7461/SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e46ed38affdfc95d2c958de328d037d87801cfcbea6d421000859e9789e61c2", size = 3177897 }, + { url = "https://files.pythonhosted.org/packages/b3/cd/e16f3cbefd82b5c40b33732da634ec67a5f33b587744c7ab41699789d492/SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b2985c0b06e989c043f1dc09d4fe89e1616aadd35392aea2844f0458a989eacf", size = 3111289 }, + { url = "https://files.pythonhosted.org/packages/15/85/5b8a3b0bc29c9928aa62b5c91fcc8335f57c1de0a6343873b5f372e3672b/SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a121d62ebe7d26fec9155f83f8be5189ef1405f5973ea4874a26fab9f1e262c", size = 3139491 }, + { url = "https://files.pythonhosted.org/packages/a1/95/81babb6089938680dfe2cd3f88cd3fd39cccd1543b7cb603b21ad881bff1/SQLAlchemy-2.0.36-cp313-cp313-win32.whl", hash = "sha256:0572f4bd6f94752167adfd7c1bed84f4b240ee6203a95e05d1e208d488d0d436", size = 2060439 }, + { url = "https://files.pythonhosted.org/packages/c1/ce/5f7428df55660d6879d0522adc73a3364970b5ef33ec17fa125c5dbcac1d/SQLAlchemy-2.0.36-cp313-cp313-win_amd64.whl", hash = "sha256:8c78ac40bde930c60e0f78b3cd184c580f89456dd87fc08f9e3ee3ce8765ce88", size = 2084574 }, + { url = "https://files.pythonhosted.org/packages/b8/49/21633706dd6feb14cd3f7935fc00b60870ea057686035e1a99ae6d9d9d53/SQLAlchemy-2.0.36-py3-none-any.whl", hash = "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e", size = 1883787 }, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, +] + +[[package]] +name = "tzdata" +version = "2024.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/34/943888654477a574a86a98e9896bae89c7aa15078ec29f490fef2f1e5384/tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc", size = 193282 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, +] + +[[package]] +name = "urllib3" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, +] From 3b1df1b5dfd2fa6756d1bfd8188e4097d336543e Mon Sep 17 00:00:00 2001 From: Jon Couldridge Date: Fri, 10 Jan 2025 08:58:39 +0000 Subject: [PATCH 3/7] theoretically working bunny source --- .vscode/settings.json | 7 + pyproject.toml | 5 +- src/hutch_bunny/__init__.py | 2 - src/hutch_bunny/cli.py | 45 ++ src/hutch_bunny/core/__about__.py | 1 + src/hutch_bunny/core/__init__.py | 0 src/hutch_bunny/core/constants.py | 7 + src/hutch_bunny/core/db_manager.py | 216 ++++++++ src/hutch_bunny/core/entities.py | 225 ++++++++ src/hutch_bunny/core/enums.py | 23 + src/hutch_bunny/core/execute_query.py | 60 ++ src/hutch_bunny/core/logger.py | 14 + src/hutch_bunny/core/obfuscation.py | 127 +++++ src/hutch_bunny/core/parser.py | 30 + src/hutch_bunny/core/query_solvers.py | 516 ++++++++++++++++++ src/hutch_bunny/core/results_modifiers.py | 20 + src/hutch_bunny/core/rquest_dto/__init__.py | 1 + .../core/rquest_dto/activity_job.py | 41 ++ src/hutch_bunny/core/rquest_dto/base_dto.py | 7 + src/hutch_bunny/core/rquest_dto/cohort.py | 34 ++ src/hutch_bunny/core/rquest_dto/file.py | 32 ++ src/hutch_bunny/core/rquest_dto/group.py | 36 ++ src/hutch_bunny/core/rquest_dto/query.py | 118 ++++ src/hutch_bunny/core/rquest_dto/result.py | 49 ++ src/hutch_bunny/core/rquest_dto/rule.py | 85 +++ src/hutch_bunny/core/setting_database.py | 43 ++ src/hutch_bunny/core/settings.py | 33 ++ src/hutch_bunny/core/task_api_client.py | 83 +++ src/hutch_bunny/daemon.py | 70 +++ tests/test_return.py | 98 ++++ uv.lock | 78 ++- 31 files changed, 2084 insertions(+), 22 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 src/hutch_bunny/cli.py create mode 100644 src/hutch_bunny/core/__about__.py create mode 100644 src/hutch_bunny/core/__init__.py create mode 100644 src/hutch_bunny/core/constants.py create mode 100644 src/hutch_bunny/core/db_manager.py create mode 100644 src/hutch_bunny/core/entities.py create mode 100644 src/hutch_bunny/core/enums.py create mode 100644 src/hutch_bunny/core/execute_query.py create mode 100644 src/hutch_bunny/core/logger.py create mode 100644 src/hutch_bunny/core/obfuscation.py create mode 100644 src/hutch_bunny/core/parser.py create mode 100644 src/hutch_bunny/core/query_solvers.py create mode 100644 src/hutch_bunny/core/results_modifiers.py create mode 100644 src/hutch_bunny/core/rquest_dto/__init__.py create mode 100644 src/hutch_bunny/core/rquest_dto/activity_job.py create mode 100644 src/hutch_bunny/core/rquest_dto/base_dto.py create mode 100644 src/hutch_bunny/core/rquest_dto/cohort.py create mode 100644 src/hutch_bunny/core/rquest_dto/file.py create mode 100644 src/hutch_bunny/core/rquest_dto/group.py create mode 100644 src/hutch_bunny/core/rquest_dto/query.py create mode 100644 src/hutch_bunny/core/rquest_dto/result.py create mode 100644 src/hutch_bunny/core/rquest_dto/rule.py create mode 100644 src/hutch_bunny/core/setting_database.py create mode 100644 src/hutch_bunny/core/settings.py create mode 100644 src/hutch_bunny/core/task_api_client.py create mode 100644 src/hutch_bunny/daemon.py create mode 100644 tests/test_return.py diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..fd9f527 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "test" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 64ac825..b15711d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,8 +19,10 @@ dependencies = [ "numpy>=2.2.1", "pandas>=2.2.3", "psycopg[binary]>=3.2.3", + "python-dotenv>=1.0.1", "requests>=2.32.3", # daemon-only "sqlalchemy>=2.0.36", + "trino>=0.331.0", # should become optional # possible extras? # "pyodbc>=5.2.0", # not sure exactly when this is or isn't needed? # mysqlclient="^2.1.1" # should be only when targeting mysql - seems to have system dependencies @@ -28,7 +30,8 @@ dependencies = [ ] [project.scripts] -bunny = "hutch_bunny:main" +bunny = "hutch_bunny.cli:main" +bunny-daemon = "hutch_bunny.daemon:main" [build-system] requires = ["hatchling"] diff --git a/src/hutch_bunny/__init__.py b/src/hutch_bunny/__init__.py index 31e8c19..e69de29 100644 --- a/src/hutch_bunny/__init__.py +++ b/src/hutch_bunny/__init__.py @@ -1,2 +0,0 @@ -def main() -> None: - print("Hello from hutch-bunny!") diff --git a/src/hutch_bunny/cli.py b/src/hutch_bunny/cli.py new file mode 100644 index 0000000..44d21de --- /dev/null +++ b/src/hutch_bunny/cli.py @@ -0,0 +1,45 @@ +import json +from hutch_bunny.core.obfuscation import get_results_modifiers_from_str +from hutch_bunny.core.execute_query import execute_query +from hutch_bunny.core.rquest_dto.result import RquestResult +from hutch_bunny.core.parser import parser +from hutch_bunny.core.logger import logger +from hutch_bunny.core.setting_database import setting_database + + +def save_to_output(result: RquestResult, destination: str) -> None: + """Save the result to a JSON file. + + Args: + result (RquestResult): The object containing the result of a query. + destination (str): The name of the JSON file to save the results. + + Raises: + ValueError: A path to a non-JSON file was passed as the destination. + """ + if not destination.endswith(".json"): + raise ValueError("Please specify a JSON file (ending in '.json').") + + try: + with open(destination, "w") as output_file: + file_body = json.dumps(result.to_dict()) + output_file.write(file_body) + except Exception as e: + logger.error(str(e), exc_info=True) + + +def main() -> None: + # Setting database connection + db_manager = setting_database(logger=logger) + # Bunny passed args. + args = parser.parse_args() + + with open(args.body) as body: + query_dict = json.load(body) + results_modifier = get_results_modifiers_from_str(args.results_modifiers) + + result = execute_query( + query_dict, results_modifier, logger=logger, db_manager=db_manager + ) + save_to_output(result, args.output) + logger.info(f"Saved results to {args.output}") diff --git a/src/hutch_bunny/core/__about__.py b/src/hutch_bunny/core/__about__.py new file mode 100644 index 0000000..f102a9c --- /dev/null +++ b/src/hutch_bunny/core/__about__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/src/hutch_bunny/core/__init__.py b/src/hutch_bunny/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/hutch_bunny/core/constants.py b/src/hutch_bunny/core/constants.py new file mode 100644 index 0000000..cb60a09 --- /dev/null +++ b/src/hutch_bunny/core/constants.py @@ -0,0 +1,7 @@ +from hutch_bunny.core.enums import DistributionQueryType + + +DISTRIBUTION_TYPE_FILE_NAMES_MAP = { + DistributionQueryType.DEMOGRAPHICS: "demographics.distribution", + DistributionQueryType.GENERIC: "code.distribution", +} diff --git a/src/hutch_bunny/core/db_manager.py b/src/hutch_bunny/core/db_manager.py new file mode 100644 index 0000000..cc694a1 --- /dev/null +++ b/src/hutch_bunny/core/db_manager.py @@ -0,0 +1,216 @@ +from typing import Any, Optional + +from sqlalchemy import create_engine, inspect +from sqlalchemy.engine import URL as SQLAURL +from trino.sqlalchemy import URL as TrinoURL # TODO: how to do as optional? +from dotenv import load_dotenv + +load_dotenv() + + +class BaseDBManager: + def __init__( + self, + username: str, + password: str, + host: str, + port: int, + database: str, + drivername: str, + ) -> None: + """Constructor method for DBManager classes. + Creates the connection engine and the inpector for the database. + + Args: + username (str): The username for the database. + password (str): The password for the database. + host (str): The host for the database. + port (int): The port number for the database. + database (str): The name of the database. + drivername (str): The database driver e.g. "psycopg2", "pymysql", etc. + + Raises: + NotImplementedError: Raised when this method has not been implemented in subclass. + """ + raise NotImplementedError + + def execute_and_fetch(self, stmnt: Any) -> list: + """Execute a statement against the database and fetch the result. + + Args: + stmnt (Any): The statement object to be executed. + + Raises: + NotImplementedError: Raised when this method has not been implemented in subclass. + + Returns: + list: The list of rows returned. + """ + raise NotImplementedError + + def execute(self, stmnt: Any) -> None: + """Execute a statement against the database and don't fetch any results. + + Args: + stmnt (Any): The statement object to be executed. + + Raises: + NotImplementedError: Raised when this method has not been implemented in subclass. + """ + raise NotImplementedError + + def list_tables(self) -> list: + """List the tables in the database. + + Raises: + NotImplementedError: Raised when this method has not been implemented in subclass. + + Returns: + list: The list of tables in the database. + """ + raise NotImplementedError + + +class SyncDBManager(BaseDBManager): + def __init__( + self, + username: str, + password: str, + host: str, + port: int, + database: str, + drivername: str, + schema: Optional[str] = None, + ) -> None: + if not isinstance(username, str): + raise TypeError("`username` must be a string") + if not isinstance(password, str): + raise TypeError("`password` must be a string") + if not isinstance(host, str): + raise TypeError("`host` must be a string") + if not isinstance(port, int): + raise TypeError("`port` must be an integer") + if not isinstance(database, str): + raise TypeError("`database` must be a string") + + url = SQLAURL.create( + drivername=drivername, + username=username, + password=password, + host=host, + port=port, + database=database, + ) + + if schema is not None: + self.engine = create_engine( + url=url, + connect_args={"options": "-csearch_path={}".format(schema)}, + ) + else: + self.engine = create_engine( + url=url, + ) + + self.inspector = inspect(self.engine) + + def execute_and_fetch(self, stmnt: Any) -> list: + with self.engine.begin() as conn: + result = conn.execute(statement=stmnt) + rows = result.all() + # Need to call `dispose` - not automatic + self.engine.dispose() + return rows + + def execute(self, stmnt: Any) -> None: + with self.engine.begin() as conn: + conn.execute(statement=stmnt) + # Need to call `dispose` - not automatic + self.engine.dispose() + + def list_tables(self) -> list: + return self.inspector.get_table_names() + + +class TrinoDBManager(BaseDBManager): + def __init__( + self, + username: str, + host: str, + port: int, + catalog: str, + password: Optional[str] = None, + drivername: Optional[str] = None, + schema: Optional[str] = None, + database: Optional[str] = None, + ) -> None: + """Create a DB manager that interacts with Trino. + + Args: + username (str): The username on the Trino server. + password (Union[str, None]): (optional) The password for the Trino server. + host (str): The host of the Trino server. + port (int): The port of the Trino server. + database (Union[str, None]): Ignored. + drivername (str): (Union[str, None]): Ignored. + schema (Union[str, None]): (optional) The schema in the database. + catalog (str): The catalog on the Trino server. + """ + # check required args + if not isinstance(username, str): + raise TypeError("`username` must be a string") + if not isinstance(host, str): + raise TypeError("`host` must be a string") + if not isinstance(port, int): + raise TypeError("`port` must be an integer") + if not isinstance(catalog, str): + raise TypeError("`catalog` must be a string") + + url = TrinoURL( + user=username, + password=password, + host=host, + port=port, + schema=schema, + catalog=catalog, + ) + + self.engine = create_engine(url, connect_args={"http_scheme": "http"}) + self.inspector = inspect(self.engine) + + def execute_and_fetch(self, stmnt: Any) -> list: + """Execute a SQL statement and return a list of rows containing the + results of the query. + + Args: + stmnt (Any): The SQL statement to be executed. + + Returns: + list: The results of the SQL statement. + """ + with self.engine.begin() as conn: + result = conn.execute(statement=stmnt) + rows = result.all() + # Need to call `dispose` - not automatic + self.engine.dispose() + return rows + + def execute(self, stmnt: Any) -> None: + """Execute a SQL statement. Useful for when results aren't expected back, such as + updating or deleting. + + Args: + stmnt (Any): The SQL statement to be executed. + """ + with self.engine.begin() as conn: + conn.execute(statement=stmnt) + # Need to call `dispose` - not automatic + self.engine.dispose() + + def list_tables(self) -> list: + """Get a list of tables in the database. + + Returns: + list: The tables in the database. + """ + return self.inspector.get_table_names() diff --git a/src/hutch_bunny/core/entities.py b/src/hutch_bunny/core/entities.py new file mode 100644 index 0000000..188b191 --- /dev/null +++ b/src/hutch_bunny/core/entities.py @@ -0,0 +1,225 @@ +from sqlalchemy import ( + BigInteger, + Column, + Date, + ForeignKey, + Integer, + Numeric, + String, + DateTime, + Text, +) +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class Concept(Base): + __tablename__ = "concept" + concept_id = Column(Integer, primary_key=True) + concept_name = Column(String(255), nullable=False) + domain_id = Column(String(20), nullable=False) + vocabulary_id = Column(String(20), nullable=False) + concept_class_id = Column(String(20), nullable=False) + standard_concept = Column(String(1), nullable=True) + concept_code = Column(String(50), nullable=False) + valid_start_date = Column(Date, nullable=False) + valid_end_date = Column(Date, nullable=False) + invalid_reason = Column(String(1), nullable=True) + + +class Person(Base): + __tablename__ = "person" + person_id = Column(Integer, primary_key=True) + gender_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + year_of_birth = Column(Integer, nullable=False) + month_of_birth = Column(Integer, nullable=True) + day_of_birth = Column(Integer, nullable=True) + birth_datetime = Column(DateTime, nullable=True) + race_concept_id = Column(Integer, ForeignKey("concept.concept_id"), nullable=False) + ethnicity_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + location_id = Column(Integer, nullable=True) + provider_id = Column(Integer, nullable=True) + care_site_id = Column(Integer, nullable=True) + person_source_value = Column(String(50), nullable=True) + gender_source_value = Column(String(50), nullable=True) + gender_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + race_source_value = Column(String(50), nullable=True) + race_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + ethnicity_source_value = Column(String(50), nullable=True) + ethnicity_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + + +class Measurement(Base): + __tablename__ = "measurement" + measurement_id = Column(Integer, primary_key=True) + person_id = Column(Integer, ForeignKey("person.person_id"), nullable=False) + measurement_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + measurement_date = Column(Date, nullable=False) + measurement_datetime = Column(DateTime, nullable=True) + measurement_time = Column(String(10), nullable=True) + measurement_type_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + operator_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + value_as_number = Column(Numeric, nullable=True) + value_as_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + unit_concept_id = Column(Integer, ForeignKey("concept.concept_id"), nullable=True) + range_low = Column(Numeric, nullable=True) + range_high = Column(Numeric, nullable=True) + provider_id = Column(Integer, nullable=True) + visit_occurrence_id = Column(Integer, nullable=True) + visit_detail_id = Column(Integer, nullable=True) + measurement_source_value = Column(String(50), nullable=True) + measurement_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + unit_source_value = Column(String(50), nullable=True) + unit_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + value_source_value = Column(String(50), nullable=True) + measurement_event_id = Column(BigInteger, nullable=True) + meas_event_field_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + + +class ConditionOccurrence(Base): + __tablename__ = "condition_occurrence" + condition_occurrence_id = Column(Integer, primary_key=True) + person_id = Column(Integer, ForeignKey("person.person_id"), nullable=False) + condition_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + condition_start_date = Column(Date, nullable=False) + condition_start_datetime = Column(DateTime, nullable=True) + condition_end_date = Column(Date, nullable=True) + condition_end_datetime = Column(DateTime, nullable=True) + condition_type_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + condition_status_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + stop_reason = Column(String(20), nullable=True) + provider_id = Column(Integer, nullable=True) + visit_occurrence_id = Column(Integer, nullable=True) + visit_detail_id = Column(Integer, nullable=True) + condition_source_value = Column(String(50), nullable=True) + condition_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + condition_status_source_value = Column(String(50), nullable=True) + + +class Observation(Base): + __tablename__ = "observation" + observation_id = Column(Integer, primary_key=True) + person_id = Column(Integer, ForeignKey("person.person_id"), nullable=False) + observation_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + observation_date = Column(Date, nullable=False) + observation_datetime = Column(DateTime, nullable=True) + observation_type_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + value_as_number = Column(Numeric, nullable=True) + value_as_string = Column(String(60), nullable=True) + value_as_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + qualifier_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + unit_concept_id = Column(Integer, ForeignKey("concept.concept_id"), nullable=True) + provider_id = Column(Integer, nullable=True) + visit_occurrence_id = Column(Integer, nullable=True) + visit_detail_id = Column(Integer, nullable=True) + observation_source_value = Column(String(50), nullable=True) + observation_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + unit_source_value = Column(String(50), nullable=True) + qualifier_source_value = Column(String(50), nullable=True) + value_source_value = Column(String(50), nullable=True) + observation_event_id = Column(BigInteger, nullable=True) + obs_event_field_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + + +class ProcedureOccurrence(Base): + __tablename__ = "procedure_occurrence" + procedure_occurrence_id = Column(Integer, primary_key=True) + person_id = Column(Integer, ForeignKey("person.person_id"), nullable=False) + procedure_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + procedure_date = Column(Date, nullable=False) + procedure_datetime = Column(DateTime, nullable=True) + procedure_end_date = Column(Date, nullable=True) + procedure_end_datetime = Column(DateTime, nullable=True) + procedure_type_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + modifier_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + quantity = Column(Integer, nullable=True) + provider_id = Column(Integer, nullable=True) + visit_occurrence_id = Column(Integer, nullable=True) + visit_detail_id = Column(Integer, nullable=True) + procedure_source_value = Column(String(50), nullable=True) + procedure_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + modifier_source_value = Column(String(50), nullable=True) + + +class DrugExposure(Base): + __tablename__ = "drug_exposure" + drug_exposure_id = Column(Integer, primary_key=True) + person_id = Column(Integer, ForeignKey("person.person_id"), nullable=False) + drug_concept_id = Column(Integer, ForeignKey("concept.concept_id"), nullable=False) + drug_exposure_start_date = Column(Date, nullable=False) + drug_exposure_start_datetime = Column(DateTime, nullable=True) + drug_exposure_end_date = Column(Date, nullable=False) + drug_exposure_end_datetime = Column(DateTime, nullable=True) + verbatim_end_date = Column(Date, nullable=True) + drug_type_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=False + ) + stop_reason = Column(String(20), nullable=True) + refills = Column(Integer, nullable=True) + quantity = Column(Numeric, nullable=True) + days_supply = Column(Integer, nullable=True) + sig = Column(Text, nullable=True) + route_concept_id = Column(Integer, ForeignKey("concept.concept_id"), nullable=True) + lot_number = Column(String(50), nullable=True) + provider_id = Column(Integer, nullable=True) + visit_occurrence_id = Column(Integer, nullable=True) + visit_detail_id = Column(Integer, nullable=True) + drug_source_value = Column(String(50), nullable=True) + drug_source_concept_id = Column( + Integer, ForeignKey("concept.concept_id"), nullable=True + ) + route_source_value = Column(String(50), nullable=True) + dose_unit_source_value = Column(String(50), nullable=True) diff --git a/src/hutch_bunny/core/enums.py b/src/hutch_bunny/core/enums.py new file mode 100644 index 0000000..71a93ba --- /dev/null +++ b/src/hutch_bunny/core/enums.py @@ -0,0 +1,23 @@ +from enum import Enum + + +class DistributionQueryType(str, Enum): + """Enum representing the types of distribution query types.""" + + DEMOGRAPHICS = "DEMOGRAPHICS" + GENERIC = "GENERIC" + ICD_MAIN = "ICD-MAIN" + + @classmethod + def get_value(cls, value: str): + """Get the enum value of the distribution query type. + + If no corressponding enum value exists, `None` will be returned. + + Args: + value (str): The value to get the enum value for. + + Returns: + Union[DistributionQueryType, None]: Return the enum value corresponding to `value` or `None`. + """ + return cls._value2member_map_.get(value) diff --git a/src/hutch_bunny/core/execute_query.py b/src/hutch_bunny/core/execute_query.py new file mode 100644 index 0000000..54f62fa --- /dev/null +++ b/src/hutch_bunny/core/execute_query.py @@ -0,0 +1,60 @@ +from typing import Dict, List +from logging import Logger +from hutch_bunny.core import query_solvers +from hutch_bunny.core.rquest_dto.query import AvailabilityQuery, DistributionQuery +from hutch_bunny.core.obfuscation import ( + apply_filters_v2, +) +from hutch_bunny.core.rquest_dto.result import RquestResult + + +def execute_query( + query_dict: Dict, + results_modifiers: List, + logger: Logger, + db_manager, +) -> RquestResult: + """ + Executes either an availability query or a distribution query, and returns results filtered by modifiers + + Parameters + ---------- + query_dict: Dict + A dictionary carrying the payload for the query. If there is an 'analysis' item in the query, it's a distribution query. Otherwise, it executes an availability query + results_modifers: List + A list of modifiers applied to the results of the query before returning them to Relay + + Returns + RquestResult + """ + + logger.info("Processing query...") + + if "analysis" in query_dict.keys(): + try: + query = DistributionQuery.from_dict(query_dict) + result = query_solvers.solve_distribution( + db_manager=db_manager, query=query + ) + return result + except TypeError as te: # raised if the distribution query json format is wrong + logger.error(str(te), exc_info=True) + except ValueError as ve: + # raised if there was an issue saving the output or + # the query json has incorrect values + logger.error(str(ve), exc_info=True) + + else: + try: + query = AvailabilityQuery.from_dict(query_dict) + result = query_solvers.solve_availability( + db_manager=db_manager, query=query + ) + result.count = apply_filters_v2(result.count, results_modifiers) + return result + except TypeError as te: # raised if the distribution query json format is wrong + logger.error(str(te), exc_info=True) + except ValueError as ve: + # raised if there was an issue saving the output or + # the query json has incorrect values + logger.error(str(ve), exc_info=True) diff --git a/src/hutch_bunny/core/logger.py b/src/hutch_bunny/core/logger.py new file mode 100644 index 0000000..24927c4 --- /dev/null +++ b/src/hutch_bunny/core/logger.py @@ -0,0 +1,14 @@ +import logging +import hutch_bunny.core.settings as settings +import sys + +logger = logging.getLogger(settings.LOGGER_NAME) +LOG_FORMAT = logging.Formatter( + settings.MSG_FORMAT, + datefmt=settings.DATE_FORMAT, +) +console_handler = logging.StreamHandler(sys.stdout) +console_handler.setFormatter(LOG_FORMAT) +logger = logging.getLogger(settings.LOGGER_NAME) +logger.setLevel(logging.INFO) +logger.addHandler(console_handler) diff --git a/src/hutch_bunny/core/obfuscation.py b/src/hutch_bunny/core/obfuscation.py new file mode 100644 index 0000000..5d2795f --- /dev/null +++ b/src/hutch_bunny/core/obfuscation.py @@ -0,0 +1,127 @@ +import json +import os +import requests +from typing import Union + + +def get_results_modifiers(activity_source_id: int) -> list: + """Get the results modifiers for a given activity source. + + Args: + activity_source_id (int): The acivity source ID. + + Returns: + list: The modifiers for the given activity source. + + Raises: + HTTPError: raised when this function can't get the results modifiers. + """ + res = requests.get( + f"{os.getenv('MANAGER_URL')}/api/activitysources/{activity_source_id}/resultsmodifiers", + verify=int(os.getenv("MANAGER_VERIFY_SSL", 1)), + ) + res.raise_for_status() + modifiers = res.json() + return modifiers + + +def get_results_modifiers_from_str(params: str) -> list: + """Deserialise a JSON list containing results modifiers + + Args: + params (str): + The JSON string containing list of parameter objects for results modifiers + + Raises: + ValueError: The parsed string does not produce a list + + Returns: + list: The list of parameter dicts of results modifiers + """ + deserialised_params = json.loads(params) + if not isinstance(deserialised_params, list): + raise ValueError( + f"{get_results_modifiers_from_str.__name__} requires a JSON list" + ) + return deserialised_params + + +def low_number_suppression( + value: Union[int, float], threshold: int = 10 +) -> Union[int, float]: + """Suppress values that fall below a given threshold. + + Args: + value (Union[int, float]): The value to evaluate. + threshold (int): The threshold to beat. + + Returns: + Union[int, float]: `value` if `value` > `threshold` else `0`. + + Examples: + >>> low_number_suppression(99, threshold=100) + 0 + >>> low_number_suppression(200, threshold=100) + 200 + """ + return value if value > threshold else 0 + + +def rounding(value: Union[int, float], nearest: int = 10) -> int: + """Round the value to the nearest base number, e.g. 10. + + Args: + value (Union[int, float]): The value to be rounded + nearest (int, optional): Round value to this base. Defaults to 10. + + Returns: + int: The value rounded to the specified nearest interval. + + Examples: + >>> rounding(145, nearest=100) + 100 + >>> rounding(160, nearest=100) + 200 + """ + return nearest * round(value / nearest) + + +def apply_filters(value: Union[int, float], filters: list) -> Union[int, float]: + """Iterate over a list of filters from the Manager and apply them to the + supplied value. + + Args: + value (Union[int, float]): The value to be filtered. + filters (list): The filters applied to the value. + + Returns: + Union[int, float]: The filtered value. + """ + actions = {"Low Number Suppression": low_number_suppression, "Rounding": rounding} + result = value + for f in filters: + if action := actions.get(f["type"]["id"]): + result = action(result, **f["parameters"]) + if result == 0: + break # don't apply any more filters + return result + + +def apply_filters_v2(value: Union[int, float], filters: list) -> Union[int, float]: + """Iterate over a list of filters and apply them to the supplied value. + + Args: + value (Union[int, float]): The value to be filtered. + filters (list): The filters applied to the value. + + Returns: + Union[int, float]: The filtered value. + """ + actions = {"Low Number Suppression": low_number_suppression, "Rounding": rounding} + result = value + for f in filters: + if action := actions.get(f.pop("id", None)): + result = action(result, **f) + if result == 0: + break # don't apply any more filters + return result diff --git a/src/hutch_bunny/core/parser.py b/src/hutch_bunny/core/parser.py new file mode 100644 index 0000000..15d472e --- /dev/null +++ b/src/hutch_bunny/core/parser.py @@ -0,0 +1,30 @@ +import argparse + +parser = argparse.ArgumentParser( + prog="bunny-cli", + description="This program takes a JSON string containing an RQuest query and solves it.", +) +parser.add_argument( + "--body", + dest="body", + required=True, + help="The JSON file containing the query", +) +parser.add_argument( + "-o", + "--output", + dest="output", + required=False, + type=str, + default="output.json", + help="The path to the output file", +) +parser.add_argument( + "-m", + "--modifiers", + dest="results_modifiers", + required=False, + type=str, + default="[]", # when parsed will produce an empty list + help="The results modifiers", +) diff --git a/src/hutch_bunny/core/query_solvers.py b/src/hutch_bunny/core/query_solvers.py new file mode 100644 index 0000000..00184cb --- /dev/null +++ b/src/hutch_bunny/core/query_solvers.py @@ -0,0 +1,516 @@ +import base64 +import os +import logging +from typing import Tuple +import pandas as pd +from sqlalchemy import ( + and_, + select, + func, +) +from hutch_bunny.core.db_manager import SyncDBManager +from hutch_bunny.core.entities import ( + Concept, + ConditionOccurrence, + Measurement, + Observation, + Person, + DrugExposure, + ProcedureOccurrence, +) +from hutch_bunny.core.rquest_dto.query import AvailabilityQuery, DistributionQuery +from hutch_bunny.core.rquest_dto.file import File +from hutch_bunny.core.rquest_dto.result import RquestResult +from hutch_bunny.core.enums import DistributionQueryType +import hutch_bunny.core.settings as settings +from hutch_bunny.core.constants import DISTRIBUTION_TYPE_FILE_NAMES_MAP + + +class AvailibilityQuerySolver: + subqueries = list() + concept_table_map = { + "Condition": ConditionOccurrence, + "Ethnicity": Person, + "Drug": DrugExposure, + "Gender": Person, + "Race": Person, + "Measurement": Measurement, + "Observation": Observation, + "Procedure": ProcedureOccurrence, + } + concept_time_column_map = { + "Condition": ConditionOccurrence.condition_start_date, + "Ethnicity": Person.birth_datetime, + "Drug": DrugExposure.drug_exposure_start_date, + "Gender": Person.birth_datetime, + "Race": Person.birth_datetime, + "Measurement": Measurement.measurement_date, + "Observation": Observation.observation_date, + "Procedure": ProcedureOccurrence.procedure_date, + } + numeric_rule_map = { + "Measurement": Measurement.value_as_number, + "Observation": Observation.value_as_number, + } + boolean_rule_map = { + "Condition": ConditionOccurrence.condition_concept_id, + "Ethnicity": Person.ethnicity_concept_id, + "Drug": DrugExposure.drug_concept_id, + "Gender": Person.gender_concept_id, + "Race": Person.race_concept_id, + "Measurement": Measurement.measurement_concept_id, + "Observation": Observation.observation_concept_id, + "Procedure": ProcedureOccurrence.procedure_concept_id, + } + + def __init__(self, db_manager: SyncDBManager, query: AvailabilityQuery) -> None: + self.db_manager = db_manager + self.query = query + + def _find_concepts(self) -> dict: + concept_ids = set() + for group in self.query.cohort.groups: + for rule in group.rules: + concept_ids.add(int(rule.value)) + concept_query = ( + # order must be .concept_id, .domain_id + select(Concept.concept_id, Concept.domain_id) + .where(Concept.concept_id.in_(concept_ids)) + .distinct() + ) + concepts_df = pd.read_sql_query( + concept_query, con=self.db_manager.engine.connect() + ) + concept_dict = { + str(concept_id): domain_id for concept_id, domain_id in concepts_df.values + } + return concept_dict + + def _solve_rules(self) -> None: + """Find all rows that match the rules' criteria.""" + concepts = self._find_concepts() + merge_method = lambda x: "inner" if x == "AND" else "outer" + for group in self.query.cohort.groups: + concept = concepts.get(group.rules[0].value) + concept_table = self.concept_table_map.get(concept) + boolean_rule_col = self.boolean_rule_map.get(concept) + numeric_rule_col = self.numeric_rule_map.get(concept) + if ( + group.rules[0].min_value is not None + and group.rules[0].max_value is not None + ): + stmnt = ( + select(concept_table.person_id) + .where( + and_( + boolean_rule_col == int(group.rules[0].value), + numeric_rule_col.between( + group.rules[0].min_value, group.rules[0].max_value + ), + ) + ) + .distinct() + ) + main_df = pd.read_sql_query( + sql=stmnt, con=self.db_manager.engine.connect() + ) + elif group.rules[0].operator == "=": + stmnt = ( + select(concept_table.person_id) + .where(boolean_rule_col == int(group.rules[0].value)) + .distinct() + ) + main_df = pd.read_sql_query( + sql=stmnt, con=self.db_manager.engine.connect() + ) + elif group.rules[0].operator == "!=": + stmnt = ( + select(concept_table.person_id) + .where(boolean_rule_col != int(group.rules[0].value)) + .distinct() + ) + main_df = pd.read_sql_query( + sql=stmnt, con=self.db_manager.engine.connect() + ) + for i, rule in enumerate(group.rules[1:], start=1): + concept = concepts.get(rule.value) + concept_table = self.concept_table_map.get(concept) + boolean_rule_col = self.boolean_rule_map.get(concept) + numeric_rule_col = self.numeric_rule_map.get(concept) + if rule.min_value is not None and rule.max_value is not None: + # numeric rule + stmnt = ( + select(concept_table.person_id.label(f"person_id_{i}")) + .where( + and_( + boolean_rule_col == int(rule.value), + numeric_rule_col.between( + rule.min_value, rule.max_value + ), + ) + ) + .distinct() + ) + rule_df = pd.read_sql_query( + sql=stmnt, con=self.db_manager.engine.connect() + ) + main_df = main_df.merge( + right=rule_df, + how=merge_method(group.rules_operator), + left_on="person_id", + right_on=f"person_id_{i}", + ) + # Text rules testing for inclusion + elif rule.operator == "=": + stmnt = ( + select(concept_table.person_id.label(f"person_id_{i}")) + .where(boolean_rule_col == int(rule.value)) + .distinct() + ) + rule_df = pd.read_sql_query( + sql=stmnt, con=self.db_manager.engine.connect() + ) + main_df = main_df.merge( + right=rule_df, + how=merge_method(group.rules_operator), + left_on="person_id", + right_on=f"person_id_{i}", + ) + # Text rules testing for exclusion + elif rule.operator == "!=": + stmnt = ( + select(concept_table.person_id.label(f"person_id_{i}")) + .where(boolean_rule_col != int(rule.value)) + .distinct() + ) + rule_df = pd.read_sql_query( + sql=stmnt, con=self.db_manager.engine.connect() + ) + main_df = main_df.merge( + right=rule_df, + how=merge_method(group.rules_operator), + left_on="person_id", + right_on=f"person_id_{i}", + ) + self.subqueries.append(main_df) + + def solve_query(self) -> int: + """Merge the groups and return the number of rows that matched all criteria.""" + self._solve_rules() + merge_method = lambda x: "inner" if x == "AND" else "outer" + group0_df = self.subqueries[0] + group0_df.rename({"person_id": "person_id_0"}, inplace=True, axis=1) + for i, df in enumerate(self.subqueries[1:], start=1): + df.rename({"person_id": f"person_id_{i}"}, inplace=True, axis=1) + group0_df = group0_df.merge( + right=df, + how=merge_method(self.query.cohort.groups_operator), + left_on="person_id_0", + right_on=f"person_id_{i}", + ) + self.subqueries.clear() + return group0_df.shape[0] # the number of rows + + +class BaseDistributionQuerySolver: + def solve_query(self) -> Tuple[str, int]: + raise NotImplementedError + + +class CodeDistributionQuerySolver(BaseDistributionQuerySolver): + allowed_domains_map = { + "Condition": ConditionOccurrence, + "Ethnicity": Person, + "Drug": DrugExposure, + "Gender": Person, + "Race": Person, + "Measurement": Measurement, + "Observation": Observation, + "Procedure": ProcedureOccurrence, + } + domain_concept_id_map = { + "Condition": ConditionOccurrence.condition_concept_id, + "Ethnicity": Person.ethnicity_concept_id, + "Drug": DrugExposure.drug_concept_id, + "Gender": Person.gender_concept_id, + "Race": Person.race_concept_id, + "Measurement": Measurement.measurement_concept_id, + "Observation": Observation.observation_concept_id, + "Procedure": ProcedureOccurrence.procedure_concept_id, + } + output_cols = [ + "BIOBANK", + "CODE", + "COUNT", + "DESCRIPTION", + "MIN", + "Q1", + "MEDIAN", + "MEAN", + "Q3", + "MAX", + "ALTERNATIVES", + "DATASET", + "OMOP", + "OMOP_DESCR", + "CATEGORY", + ] + + def __init__(self, db_manager: SyncDBManager, query: DistributionQuery) -> None: + self.db_manager = db_manager + self.query = query + + def solve_query(self) -> Tuple[str, int]: + """Build table of distribution query and return as a TAB separated string + along with the number of rows. + + Returns: + Tuple[str, int]: The table as a string and the number of rows. + """ + # Prepare the empty results data frame + df = pd.DataFrame(columns=self.output_cols) + + # Get the counts for each concept ID + counts = list() + concepts = list() + categories = list() + biobanks = list() + for k in self.allowed_domains_map: + table = self.allowed_domains_map[k] + concept_col = self.domain_concept_id_map[k] + stmnt = select(func.count(table.person_id), concept_col).group_by( + concept_col + ) + res = pd.read_sql(stmnt, self.db_manager.engine.connect()) + counts.extend(res.iloc[:, 0]) + concepts.extend(res.iloc[:, 1]) + categories.extend([k] * len(res)) + biobanks.extend([self.query.collection] * len(res)) + + df["COUNT"] = counts + df["OMOP"] = concepts + df["CATEGORY"] = categories + df["CODE"] = df["OMOP"].apply(lambda x: f"OMOP:{x}") + df["BIOBANK"] = biobanks + + # Get descriptions + concept_query = select(Concept.concept_id, Concept.concept_name).where( + Concept.concept_id.in_(concepts) + ) + concepts_df = pd.read_sql_query( + concept_query, con=self.db_manager.engine.connect() + ) + for _, row in concepts_df.iterrows(): + df.loc[df["OMOP"] == row["concept_id"], "OMOP_DESCR"] = row["concept_name"] + + # Convert df to tab separated string + results = list(["\t".join(df.columns)]) + for _, row in df.iterrows(): + results.append("\t".join([str(r) for r in row.values])) + + return os.linesep.join(results), len(df) + + +class DemographicsDistributionQuerySolver(BaseDistributionQuerySolver): + allowed_domains_map = { + "Gender": Person, + } + domain_concept_id_map = { + "Gender": Person.gender_concept_id, + } + output_cols = [ + "BIOBANK", + "CODE", + "DESCRIPTION", + "COUNT", + "MIN", + "Q1", + "MEDIAN", + "MEAN", + "Q3", + "MAX", + "ALTERNATIVES", + "DATASET", + "OMOP", + "OMOP_DESCR", + "CATEGORY", + ] + + def __init__(self, db_manager: SyncDBManager, query: DistributionQuery) -> None: + self.db_manager = db_manager + self.query = query + + def solve_query(self) -> Tuple[str, int]: + """Build table of distribution query and return as a TAB separated string + along with the number of rows. + + Returns: + Tuple[str, int]: The table as a string and the number of rows. + """ + # Prepare the empty results data frame + df = pd.DataFrame(columns=self.output_cols) + + # Get the counts for each concept ID + counts = list() + concepts = list() + categories = list() + biobanks = list() + datasets = list() + codes = list() + descriptions = list() + alternatives = list() + for k in self.allowed_domains_map: + table = self.allowed_domains_map[k] + concept_col = self.domain_concept_id_map[k] + + # People count statement + stmnt = select(func.count(table.person_id), concept_col).group_by( + concept_col + ) + + # Concept description statement + concept_query = select(Concept.concept_id, Concept.concept_name).where( + Concept.concept_id.in_(concepts) + ) + + # Get the data + res = pd.read_sql(stmnt, self.db_manager.engine.connect()) + concepts_df = pd.read_sql_query( + concept_query, con=self.db_manager.engine.connect() + ) + combined = res.merge( + concepts_df, + left_on=concept_col.name, + right_on=Concept.concept_id.name, + how="left", + ) + + # Compile the data + counts.append(res.iloc[:, 0].sum()) + concepts.extend(res.iloc[:, 1]) + categories.append("DEMOGRAPHICS") + biobanks.append(self.query.collection) + datasets.append(table.__tablename__) + descriptions.append(k) + codes.append(k.upper()) + + alternative = "^" + for _, row in combined.iterrows(): + alternative += f"{row[Concept.concept_name.name]}|{row.iloc[0]}^" + alternatives.append(alternative) + + # Fill out the results table + df["COUNT"] = counts + df["CATEGORY"] = categories + df["CODE"] = codes + df["BIOBANK"] = biobanks + df["DATASET"] = datasets + df["DESCRIPTION"] = descriptions + df["ALTERNATIVES"] = alternatives + + # Convert df to tab separated string + results = list(["\t".join(df.columns)]) + for _, row in df.iterrows(): + results.append("\t".join([str(r) for r in row.values])) + + return os.linesep.join(results), len(df) + + +def solve_availability( + db_manager: SyncDBManager, query: AvailabilityQuery +) -> RquestResult: + """Solve RQuest availability queries. + + Args: + db_manager (SyncDBManager): The database manager + query (AvailabilityQuery): The availability query object + + Returns: + RquestResult: Result object for the query + """ + logger = logging.getLogger(settings.LOGGER_NAME) + solver = AvailibilityQuerySolver(db_manager, query) + try: + count_ = solver.solve_query() + result = RquestResult( + status="ok", count=count_, collection_id=query.collection, uuid=query.uuid + ) + logger.info("Solved availability query") + except Exception as e: + logger.error(str(e)) + result = RquestResult( + status="error", count=0, collection_id=query.collection, uuid=query.uuid + ) + + return result + + +def _get_distribution_solver( + db_manager: SyncDBManager, query: DistributionQuery +) -> BaseDistributionQuerySolver: + """Return a distribution query solver depending on the query. + If `query.code` is "GENERIC", return a `CodeDistributionQuerySolver`. + If `query.code` is "DEMOGRAPHICS", return a `DemographicsDistributionQuerySolver`. + + Args: + db_manager (SyncDBManager): The database manager. + query (DistributionQuery): The distribution query to solve. + + Returns: + BaseDistributionQuerySolver: The solver for the distribution query type. + """ + if query.code == DistributionQueryType.GENERIC: + return CodeDistributionQuerySolver(db_manager, query) + if query.code == DistributionQueryType.DEMOGRAPHICS: + return DemographicsDistributionQuerySolver(db_manager, query) + + +def solve_distribution( + db_manager: SyncDBManager, query: DistributionQuery +) -> RquestResult: + """Solve RQuest distribution queries. + + Args: + db_manager (SyncDBManager): The database manager + query (DistributionQuery): The distribution query object + + Returns: + DistributionResult: Result object for the query + """ + logger = logging.getLogger(settings.LOGGER_NAME) + solver = _get_distribution_solver(db_manager, query) + try: + res, count = solver.solve_query() + # Convert file data to base64 + res_b64_bytes = base64.b64encode(res.encode("utf-8")) # bytes + size = len(res_b64_bytes) / 1000 # length of file data in KB + res_b64 = res_b64_bytes.decode("utf-8") # convert back to string, now base64 + result_file = File( + data=res_b64, + description="Result of code.distribution anaylsis", + name=DISTRIBUTION_TYPE_FILE_NAMES_MAP.get(query.analysis, ""), + sensitive=True, + reference="", + size=size, + type_="BCOS", + ) + result = RquestResult( + uuid=query.uuid, + status="ok", + count=count, + datasets_count=1, + files=[result_file], + collection_id=query.collection, + ) + except Exception as e: + logger.error(str(e)) + result = RquestResult( + uuid=query.uuid, + status="error", + count=0, + datasets_count=0, + files=[], + collection_id=query.collection, + ) + + return result diff --git a/src/hutch_bunny/core/results_modifiers.py b/src/hutch_bunny/core/results_modifiers.py new file mode 100644 index 0000000..76b6e17 --- /dev/null +++ b/src/hutch_bunny/core/results_modifiers.py @@ -0,0 +1,20 @@ +def results_modifiers( + low_number_suppression_threshold: int, + rounding_target: int, +) -> list: + results_modifiers = [] + if low_number_suppression_threshold: + results_modifiers.append( + { + "id": "Low Number Suppression", + "threshold": low_number_suppression_threshold, + } + ) + if rounding_target: + results_modifiers.append( + { + "id": "Rounding", + "nearest": rounding_target, + } + ) + return results_modifiers diff --git a/src/hutch_bunny/core/rquest_dto/__init__.py b/src/hutch_bunny/core/rquest_dto/__init__.py new file mode 100644 index 0000000..3dc1f76 --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/__init__.py @@ -0,0 +1 @@ +__version__ = "0.1.0" diff --git a/src/hutch_bunny/core/rquest_dto/activity_job.py b/src/hutch_bunny/core/rquest_dto/activity_job.py new file mode 100644 index 0000000..8b668f2 --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/activity_job.py @@ -0,0 +1,41 @@ +from hutch_bunny.core.rquest_dto.base_dto import BaseDto + + +class ActivityJob(BaseDto): + def __init__( + self, type_: str, job_id: str, activity_source_id: int, payload: dict + ) -> None: + self.type_ = type_ + self.job_id = job_id + self.activity_source_id = activity_source_id + self.payload = payload + + def to_dict(self) -> dict: + """Convert `AcitivityJob` to `dict` + + Returns: + dict: The `ActivityJob` as a `dict` + """ + return { + "type": self.type_, + "job_id": self.job_id, + "activity_source_id": self.activity_source_id, + "payload": self.payload, + } + + @classmethod + def from_dict(cls, dict_: dict): + """Build an `ActivityJob` from a `dict` + + Args: + dict_ (dict): The `dict` with the `ActivityJob`'s details + + Returns: + Self: an `ActivityJob` instance + """ + return cls( + type_=dict_.get("type"), + job_id=dict_.get("job_id"), + activity_source_id=dict_.get("activity_source_id"), + payload=dict_.get("payload"), + ) diff --git a/src/hutch_bunny/core/rquest_dto/base_dto.py b/src/hutch_bunny/core/rquest_dto/base_dto.py new file mode 100644 index 0000000..6f674f7 --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/base_dto.py @@ -0,0 +1,7 @@ +class BaseDto: + def to_dict(self) -> dict: + raise NotImplementedError + + @classmethod + def from_dict(cls, dict_: dict): + raise NotImplementedError diff --git a/src/hutch_bunny/core/rquest_dto/cohort.py b/src/hutch_bunny/core/rquest_dto/cohort.py new file mode 100644 index 0000000..89cde7a --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/cohort.py @@ -0,0 +1,34 @@ +from typing import List +from hutch_bunny.core.rquest_dto.base_dto import BaseDto +from hutch_bunny.core.rquest_dto.group import Group + + +class Cohort(BaseDto): + def __init__(self, groups: List[Group], groups_operator: str) -> None: + self.groups = groups + self.groups_operator = groups_operator + + def to_dict(self) -> dict: + """Convert `Cohort` to `dict` + + Returns: + dict: The `Cohort` as a `dict` + """ + return { + "groups": [g.to_dict() for g in self.groups], + "groups_oper": self.groups_operator, + } + + @classmethod + def from_dict(cls, dict_: dict): + """Build a `Cohort` from a `dict` + + Args: + dict_ (dict): The `dict with the cohort's details` + + Returns: + Self: a `Cohort` instance + """ + groups = [Group.from_dict(g) for g in dict_.get("groups", [])] + groups_operator = dict_.get("groups_oper", "") + return cls(groups=groups, groups_operator=groups_operator) diff --git a/src/hutch_bunny/core/rquest_dto/file.py b/src/hutch_bunny/core/rquest_dto/file.py new file mode 100644 index 0000000..ca0822f --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/file.py @@ -0,0 +1,32 @@ +from hutch_bunny.core.rquest_dto.base_dto import BaseDto + + +class File(BaseDto): + def __init__( + self, + data: str, + description: str, + name: str, + reference: str, + sensitive: bool, + size: float, + type_: str, + ) -> None: + self.data = data + self.description = description + self.name = name + self.reference = reference + self.sensitive = sensitive + self.size = size + self.type_ = type_ + + def to_dict(self) -> dict: + return { + "file_name": self.name, + "file_data": self.data, + "file_description": self.description, + "file_size": self.size, + "file_type": self.type_, + "file_sensitive": self.sensitive, + "file_reference": self.reference, + } diff --git a/src/hutch_bunny/core/rquest_dto/group.py b/src/hutch_bunny/core/rquest_dto/group.py new file mode 100644 index 0000000..dd68025 --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/group.py @@ -0,0 +1,36 @@ +from typing import List +from hutch_bunny.core.rquest_dto.base_dto import BaseDto +from hutch_bunny.core.rquest_dto.rule import Rule + + +class Group(BaseDto): + """Python representation of a group based on [ItemList](https://schema.org/ItemList).""" + + def __init__(self, rules: List[Rule], rules_operator: str, **kwargs) -> None: + self.rules = rules + self.rules_operator = rules_operator + + def to_dict(self) -> dict: + """Convert `Group` to `dict`. + + Returns: + dict: `Group` as a `dict`. + """ + return { + "rules": [r.to_dict() for r in self.rules], + "rules_oper": self.rules_operator, + } + + @classmethod + def from_dict(cls, dict_: dict): + """Create a `Group` from dict. + + Args: + dict_ (dict): Mapping containing the `Group`'s attributes. + + Returns: + Self: `Group` object. + """ + rules = [Rule.from_dict(r) for r in dict_.get("rules", [])] + rules_operator = dict_.get("rules_oper", "") + return cls(rules=rules, rules_operator=rules_operator) diff --git a/src/hutch_bunny/core/rquest_dto/query.py b/src/hutch_bunny/core/rquest_dto/query.py new file mode 100644 index 0000000..66eb98e --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/query.py @@ -0,0 +1,118 @@ +from hutch_bunny.core.enums import DistributionQueryType +from hutch_bunny.core.rquest_dto.base_dto import BaseDto +from hutch_bunny.core.rquest_dto.cohort import Cohort + + +class AvailabilityQuery(BaseDto): + """Python representation of an RQuest Availability Query""" + + def __init__( + self, + cohort: Cohort, + uuid: str, + owner: str, + collection: str, + protocol_version: str, + char_salt: str, + **kwargs, + ) -> None: + self.cohort = cohort + self.uuid = uuid + self.owner = owner + self.collection = collection + self.protocol_version = protocol_version + self.char_salt = char_salt + + def to_dict(self) -> dict: + """Convert `AvailabilityQuery` to `dict`. + + Returns: + dict: `AvailabilityQuery` as a `dict`. + """ + return { + "cohort": self.cohort.to_dict(), + "uuid": self.uuid, + "owner": self.owner, + "collection": self.collection, + "protocol_version": self.protocol_version, + "char_salt": self.char_salt, + } + + @classmethod + def from_dict(cls, dict_: dict): + """Create a `AvailabilityQuery` from RQuest JSON. + + Args: + dict_ (dict): Mapping containing the `AvailabilityQuery`'s attributes. + + Returns: + Self: `AvailabilityQuery` object. + """ + cohort = Cohort.from_dict(dict_.pop("cohort", {})) + return cls(cohort=cohort, **dict_) + + +class DistributionQuery(BaseDto): + """Python representation of an RQuest Distribution Query""" + + def __init__( + self, + owner: str, + code: DistributionQueryType, + analysis: str, + uuid: str, + collection: str, + **kwargs, + ) -> None: + self.owner = owner + self.code = code + self.analysis = analysis + self.uuid = uuid + self.collection = collection + + def to_dict(self) -> dict: + """Convert `DistributionQuery` to `dict`. + + Returns: + dict: `DistributionQuery` as a `dict`. + """ + return { + "owner": self.owner, + "code": self.code.value, + "analysis": self.analysis, + "uuid": self.uuid, + "collection": self.collection, + } + + @classmethod + def from_dict(cls, dict_: dict): + """Create a `DistributionQuery` from RQuest JSON. + + Args: + dict_ (dict): Mapping containing the `DistributionQuery`'s attributes. + + Raises: + TypeError: "Distribution queries must have values for: 'owner', 'code', 'analysis', 'uuid' and 'collection'" + ValueError: `dict_` contains an incorrect value for `code`. + + Returns: + Self: `DistributionQuery` object. + """ + + owner = dict_.get("owner") + code = dict_.get("code") + analysis = dict_.get("analysis") + uuid = dict_.get("uuid") + collection = dict_.get("collection") + + if any(v is None for v in [owner, code, analysis, uuid, collection]): + raise TypeError( + "Distribution queries must have values for: 'owner', 'code', 'analysis', 'uuid' and 'collection'" + ) + + if code_enum := DistributionQueryType.get_value(code): + return cls(owner, code_enum, analysis, uuid, collection) + else: + raise ValueError( + f"'{code}' is not a valid distribution query type. Valid values are: '{DistributionQueryType.DEMOGRAPHICS.value}', '{DistributionQueryType.GENERIC.value}' or '{DistributionQueryType.ICD_MAIN.value}'" + ) diff --git a/src/hutch_bunny/core/rquest_dto/result.py b/src/hutch_bunny/core/rquest_dto/result.py new file mode 100644 index 0000000..bf0ae6f --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/result.py @@ -0,0 +1,49 @@ +from typing import List, Union +from hutch_bunny.core.rquest_dto.base_dto import BaseDto +from hutch_bunny.core.rquest_dto.file import File + + +class RquestResult(BaseDto): + """ + This class represents the result of an RQuest query. + """ + + def __init__( + self, + uuid: str, + status: str, + collection_id: str, + count: int = 0, + datasets_count: int = 0, + files: List[File] = None, + message: str = "", + protocol_version: str = "v2", + ) -> None: + self.uuid = uuid + self.status = status + self.count = count + self.datasets_count = datasets_count + self.files = files if files is not None else list() + self.message = message + self.collection_id = collection_id + self.protocol_version = protocol_version + + def to_dict(self) -> dict: + """Convert this `DistributionResult` object to a JSON serialisable `dict`. + + Returns: + dict: + the `dict` representing the result of a distribution query. + """ + return { + "status": self.status, + "protocolVersion": self.protocol_version, + "uuid": self.uuid, + "queryResult": { + "count": self.count, + "datasetCount": self.datasets_count, + "files": [f.to_dict() for f in self.files], + }, + "message": self.message, + "collection_id": self.collection_id, + } diff --git a/src/hutch_bunny/core/rquest_dto/rule.py b/src/hutch_bunny/core/rquest_dto/rule.py new file mode 100644 index 0000000..4190474 --- /dev/null +++ b/src/hutch_bunny/core/rquest_dto/rule.py @@ -0,0 +1,85 @@ +import re +from typing import Any, Tuple, Union + + +class Rule: + def __init__( + self, + value: Any = None, + type_: str = "", + time: Union[str, None] = None, + varname: str = "", + operator: str = "", + **kwargs, + ) -> None: + self.value = value + self.type_ = type_ + self.time = time + self.varname = varname + self.operator = operator + + if self.type_ == "NUM": + self.min_value, self.max_value = self._parse_numeric(self.value) + _, v = self.varname.split("=") + self.value = v + else: + self.min_value, self.max_value = None, None + + def to_dict(self) -> dict: + """Convert `Rule` to `dict`. + + Returns: + dict: `Rule` as a `dict`. + """ + varname = self.varname + value = self.value + if self.type_ == "NUM": + varname = f"OMOP={value}" + value = f"{self.min_value}..{self.max_value}" + dict_ = { + "varname": varname, + "type": self.type_, + "oper": self.operator, + "value": value, + } + return dict_ + + @classmethod + def from_dict(cls, dict_: dict): + """Create a `Rule` from RO-Crate JSON. + + Args: + dict_ (dict): Mapping containing the `Rule`'s attributes. + + Returns: + Self: `Rule` object. + """ + type_ = dict_.get("type", "") + value = dict_.get("value") + time = dict_.get("time") + varname = dict_.get("varname", "") + operator = dict_.get("oper", "") + return cls( + type_=type_, value=value, time=time, varname=varname, operator=operator + ) + + def _parse_numeric( + self, value: str + ) -> Tuple[Union[float, None], Union[float, None]]: + pattern = re.compile(r"(-?\d*\.\d+|\d+|null)\.\.(-?\d*\.\d+|null)") + # Try and parse min and max values, then return them + if match := re.search(pattern, value): + lower, upper = match.groups() + # parse lower bound + try: + min_value = float(lower) + except ValueError: + min_value = None + # parse upper bound + try: + max_value = float(upper) + except ValueError: + max_value = None + return min_value, max_value + + return None, None diff --git a/src/hutch_bunny/core/setting_database.py b/src/hutch_bunny/core/setting_database.py new file mode 100644 index 0000000..2358048 --- /dev/null +++ b/src/hutch_bunny/core/setting_database.py @@ -0,0 +1,43 @@ +from logging import Logger +import os +from hutch_bunny.core.db_manager import SyncDBManager, TrinoDBManager +import hutch_bunny.core.settings as settings + + +def setting_database(logger: Logger): + logger.info("Setting up database connection...") + if bool(os.getenv("USE_TRINO", False)): + datasource_db_port = os.getenv("DATASOURCE_DB_PORT", 8080) + try: + db_manager = TrinoDBManager( + username=os.getenv("DATASOURCE_DB_USERNAME", "trino-user"), + password=os.getenv("DATASOURCE_DB_PASSWORD"), + host=os.getenv("DATASOURCE_DB_HOST"), + port=int(datasource_db_port), + schema=os.getenv("DATASOURCE_DB_SCHEMA"), + catalog=os.getenv("DATASOURCE_DB_CATALOG"), + ) + except TypeError as e: + logger.error(str(e)) + exit() + else: + datasource_db_port = os.getenv("DATASOURCE_DB_PORT") + try: + db_manager = SyncDBManager( + username=os.getenv("DATASOURCE_DB_USERNAME"), + password=os.getenv("DATASOURCE_DB_PASSWORD"), + host=os.getenv("DATASOURCE_DB_HOST"), + port=( + int(datasource_db_port) if datasource_db_port is not None else None + ), + database=os.getenv("DATASOURCE_DB_DATABASE"), + drivername=os.getenv( + "DATASOURCE_DB_DRIVERNAME", settings.DEFAULT_DB_DRIVER + ), + schema=os.getenv("DATASOURCE_DB_SCHEMA"), + ) + except TypeError as e: + logger.error(str(e)) + exit() + + return db_manager diff --git a/src/hutch_bunny/core/settings.py b/src/hutch_bunny/core/settings.py new file mode 100644 index 0000000..6aef610 --- /dev/null +++ b/src/hutch_bunny/core/settings.py @@ -0,0 +1,33 @@ +from os import environ +from dotenv import load_dotenv + +load_dotenv() + +DEFAULT_DB_DRIVER = "postgresql" + +# Logging configuration +LOGGER_NAME = "hutch" +BACKUP_LOGGER_NAME = "backup" +MSG_FORMAT = "%(levelname)s - %(asctime)s - %(message)s" +DATE_FORMAT = "%d-%b-%y %H:%M:%S" + +TASK_API_BASE_URL = environ.get("TASK_API_BASE_URL") +TASK_API_USERNAME = environ.get("TASK_API_USERNAME") +TASK_API_PASSWORD = environ.get("TASK_API_PASSWORD") +TASK_API_TYPE = environ.get("TASK_API_TYPE") +if TASK_API_TYPE and TASK_API_TYPE not in ["a", "b", "c"]: + raise TypeError("TASK_API_TYPE must be either 'a' or 'b' or 'c'") + +LOW_NUMBER_SUPPRESSION_THRESHOLD = environ.get("LOW_NUMBER_SUPPRESSION_THRESHOLD") +ROUNDING_TARGET = environ.get("ROUNDING_TARGET") + + +POLLING_INTERVAL_DEFAULT = 5 +### currently no guards to ensure that POLLING_INTERVAL and POLLING_TIMEOUT are >=0 +POLLING_INTERVAL = int(environ.get("POLLING_INTERVAL")) or POLLING_INTERVAL_DEFAULT + +if POLLING_INTERVAL < 0: + print("POLLING_INTERVAL must be a positive integer. Setting to default 5s...") + POLLING_INTERVAL = POLLING_INTERVAL_DEFAULT + +COLLECTION_ID = environ.get("COLLECTION_ID") diff --git a/src/hutch_bunny/core/task_api_client.py b/src/hutch_bunny/core/task_api_client.py new file mode 100644 index 0000000..8ce72da --- /dev/null +++ b/src/hutch_bunny/core/task_api_client.py @@ -0,0 +1,83 @@ +from requests.models import Response +from enum import Enum +import requests +from requests.auth import HTTPBasicAuth +import hutch_bunny.core.settings as settings +from typing import Optional + + +class SupportedMethod(Enum): + POST = "post" + GET = "get" + PUT = "put" + PATCH = "patch" + DELETE = "delete" + + +class TaskApiClient: + def __init__( + self, + base_url: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, + ): + self.base_url = base_url or settings.TASK_API_BASE_URL + self.username = username or settings.TASK_API_USERNAME + self.password = password or settings.TASK_API_PASSWORD + + def request( + self, method: SupportedMethod, url: str, data: Optional[dict] = None, **kwargs + ) -> Response: + """ + Sends an HTTP request using the specified method to the given URL with optional data and additional parameters. + + Args: + method (SupportedMethod): The HTTP method to use for the request. Must be one of the SupportedMethod enum values. + url (str): The URL to which the request is sent. + data (dict, optional): The data to send in the body of the request. Defaults to None. + **kwargs: Additional keyword arguments to pass to the requests method. This can include parameters such as headers, params, verify, etc. + + Returns: + Response: The response object returned by the requests library. + """ + basicAuth = HTTPBasicAuth(self.username, self.password) + response = requests.request( + method=method.value, url=url, json=data, auth=basicAuth, **kwargs + ) + return response + + def post( + self, endpoint: Optional[str] = None, data: dict = dict(), **kwargs + ) -> Response: + """ + Sends a POST request to the specified endpoint with data and additional parameters. + + Args: + endpoint (str): The endpoint to which the POST request is sent. + data (dict): The data to send in the body of the request. + **kwargs: Additional keyword arguments to pass to the requests method. + + Returns: + Response: The response object returned by the requests library. + """ + url = f"{self.base_url}/{endpoint}" + return self.request( + SupportedMethod.POST, + url, + data, + headers={"Content-Type": "application/json"}, + ) + + def get(self, endpoint: Optional[str] = None, **kwargs) -> Response: + """ + Sends a GET request to the specified endpoint with optional additional parameters. + + Args: + endpoint (str): The endpoint to which the GET request is sent. + **kwargs: Additional keyword arguments to pass to the requests method. This can include parameters such as headers, params, verify, etc. + + Returns: + Response: The response object returned by the requests library. + """ + url = f"{self.base_url}/{endpoint}" + return self.request(SupportedMethod.GET, url, **kwargs) diff --git a/src/hutch_bunny/daemon.py b/src/hutch_bunny/daemon.py new file mode 100644 index 0000000..9d05f22 --- /dev/null +++ b/src/hutch_bunny/daemon.py @@ -0,0 +1,70 @@ +import time +import hutch_bunny.core.settings as settings +from hutch_bunny.core.execute_query import execute_query +from hutch_bunny.core.rquest_dto.result import RquestResult +from hutch_bunny.core.task_api_client import TaskApiClient +from hutch_bunny.core.results_modifiers import results_modifiers +from hutch_bunny.core.logger import logger +from hutch_bunny.core.setting_database import setting_database + + +def main() -> None: + # Setting database connection + db_manager = setting_database(logger=logger) + # Task Api Client class init. + client = TaskApiClient() + + # Building results modifiers + modifiers_list = results_modifiers( + low_number_suppression_threshold=int( + settings.LOW_NUMBER_SUPPRESSION_THRESHOLD or 0 + ), + rounding_target=int(settings.ROUNDING_TARGET or 0), + ) + polling_endpoint = ( + f"task/nextjob/{settings.COLLECTION_ID}.{settings.TASK_API_TYPE}" + if settings.TASK_API_TYPE + else f"task/nextjob/{settings.COLLECTION_ID}" + ) + # Polling forever to get query from Relay + while True: + response = client.get(endpoint=polling_endpoint) + if response.status_code == 200: + logger.info("Job received. Resolving...") + # Convert Response to Dict + query_dict: dict = response.json() + # Start querying + result = execute_query( + query_dict, + results_modifiers=modifiers_list, + logger=logger, + db_manager=db_manager, + ) + # Check the payload shape + if not isinstance(result, RquestResult): + raise TypeError("Payload does not match RQuest result schema.") + + # Build return endpoint after having result + return_endpoint = f"task/result/{result.uuid}/{result.collection_id}" + + # Try to send the results back to Relay + for _ in range(4): + response = client.post(endpoint=return_endpoint, data=result.to_dict()) + + # Bunny will stop retrying to post results when response was successful or there is a client error + if ( + 200 <= response.status_code < 300 + or 400 <= response.status_code < 500 + ): + logger.info("Job resolved.") + break + else: + logger.warning( + f"Bunny failed to post to {return_endpoint} at {time.time()}. Trying again..." + ) + time.sleep(5) + + elif response.status_code == 204: + logger.info("Looking for job...") + + time.sleep(settings.POLLING_INTERVAL) diff --git a/tests/test_return.py b/tests/test_return.py new file mode 100644 index 0000000..9e2212a --- /dev/null +++ b/tests/test_return.py @@ -0,0 +1,98 @@ +import pytest +from hutch_bunny.core.query_solvers import ( + AvailabilityQuery, + DistributionQuery, + solve_availability, +) +from hutch_bunny.core.db_manager import SyncDBManager +from hutch_bunny.core.rquest_dto import cohort +from hutch_bunny.core.rquest_dto.result import RquestResult +from hutch_bunny.core.rquest_dto.cohort import Cohort +from hutch_bunny.core.rquest_dto.group import Group +from hutch_bunny.core.rquest_dto.rule import Rule +from dotenv import load_dotenv +import os +import hutch_bunny.core.settings as settings + +load_dotenv() + + +@pytest.fixture +def db_manager(): + datasource_db_port = os.getenv("DATASOURCE_DB_PORT") + return SyncDBManager( + username=os.getenv("DATASOURCE_DB_USERNAME"), + password=os.getenv("DATASOURCE_DB_PASSWORD"), + host=os.getenv("DATASOURCE_DB_HOST"), + port=(int(datasource_db_port) if datasource_db_port is not None else None), + database=os.getenv("DATASOURCE_DB_DATABASE"), + drivername=os.getenv("DATASOURCE_DB_DRIVERNAME", settings.DEFAULT_DB_DRIVER), + schema=os.getenv("DATASOURCE_DB_SCHEMA"), + ) + + +@pytest.fixture +def availability_query(): + return AvailabilityQuery( + cohort=Cohort( + [ + Group( + rules=[ + Rule( + varname="OMOP", + varcat="Person", + type_="TEXT", + operator="=", + value="8507", + ) + ], + rules_operator="AND", + ), + ], + groups_operator="OR", + ), + uuid="unique_id", + protocol_version="v2", + char_salt="salt", + collection="collection_id", + owner="user1", + ) + + +@pytest.fixture +def availability_example(): + return RquestResult( + uuid="unique_id", + status="ok", + collection_id="collection_id", + count=6272, + datasets_count=0, + files=[], + message="", + protocol_version="v2", + ) + + +@pytest.fixture +def availability_result(db_manager, availability_query): + return solve_availability(db_manager=db_manager, query=availability_query) + + +def test_solve_availability_returns_result(availability_result): + assert isinstance(availability_result, RquestResult) + + +def test_solve_availability_fields_match_query( + availability_result, availability_example +): + assert availability_result.uuid == availability_example.uuid + assert availability_result.collection_id == availability_example.collection_id + assert availability_result.protocol_version == availability_example.protocol_version + + +def test_solve_availability_is_ok(availability_result): + assert availability_result.status == "ok" + + +def test_solve_availability_count_matches(availability_result, availability_example): + assert availability_result.count == availability_example.count diff --git a/uv.lock b/uv.lock index 53f1ded..9edc9a4 100644 --- a/uv.lock +++ b/uv.lock @@ -49,8 +49,10 @@ dependencies = [ { name = "numpy" }, { name = "pandas" }, { name = "psycopg", extra = ["binary"] }, + { name = "python-dotenv" }, { name = "requests" }, { name = "sqlalchemy" }, + { name = "trino" }, ] [package.dev-dependencies] @@ -64,8 +66,10 @@ requires-dist = [ { name = "numpy", specifier = ">=2.2.1" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.2.3" }, + { name = "python-dotenv", specifier = ">=1.0.1" }, { name = "requests", specifier = ">=2.32.3" }, { name = "sqlalchemy", specifier = ">=2.0.36" }, + { name = "trino", specifier = ">=0.331.0" }, ] [package.metadata.requires-dev] @@ -227,6 +231,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, ] +[[package]] +name = "python-dotenv" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 }, +] + [[package]] name = "pytz" version = "2024.2" @@ -253,27 +266,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.8.6" +version = "0.9.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/da/00/089db7890ea3be5709e3ece6e46408d6f1e876026ec3fd081ee585fef209/ruff-0.8.6.tar.gz", hash = "sha256:dcad24b81b62650b0eb8814f576fc65cfee8674772a6e24c9b747911801eeaa5", size = 3473116 } +sdist = { url = "https://files.pythonhosted.org/packages/75/48/385f276f41e89623a5ea8e4eb9c619a44fdfc2a64849916b3584eca6cb9f/ruff-0.9.0.tar.gz", hash = "sha256:143f68fa5560ecf10fc49878b73cee3eab98b777fcf43b0e62d43d42f5ef9d8b", size = 3489167 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/28/aa07903694637c2fa394a9f4fe93cf861ad8b09f1282fa650ef07ff9fe97/ruff-0.8.6-py3-none-linux_armv6l.whl", hash = "sha256:defed167955d42c68b407e8f2e6f56ba52520e790aba4ca707a9c88619e580e3", size = 10628735 }, - { url = "https://files.pythonhosted.org/packages/2b/43/827bb1448f1fcb0fb42e9c6edf8fb067ca8244923bf0ddf12b7bf949065c/ruff-0.8.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:54799ca3d67ae5e0b7a7ac234baa657a9c1784b48ec954a094da7c206e0365b1", size = 10386758 }, - { url = "https://files.pythonhosted.org/packages/df/93/fc852a81c3cd315b14676db3b8327d2bb2d7508649ad60bfdb966d60738d/ruff-0.8.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e88b8f6d901477c41559ba540beeb5a671e14cd29ebd5683903572f4b40a9807", size = 10007808 }, - { url = "https://files.pythonhosted.org/packages/94/e9/e0ed4af1794335fb280c4fac180f2bf40f6a3b859cae93a5a3ada27325ae/ruff-0.8.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0509e8da430228236a18a677fcdb0c1f102dd26d5520f71f79b094963322ed25", size = 10861031 }, - { url = "https://files.pythonhosted.org/packages/82/68/da0db02f5ecb2ce912c2bef2aa9fcb8915c31e9bc363969cfaaddbc4c1c2/ruff-0.8.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:91a7ddb221779871cf226100e677b5ea38c2d54e9e2c8ed847450ebbdf99b32d", size = 10388246 }, - { url = "https://files.pythonhosted.org/packages/ac/1d/b85383db181639019b50eb277c2ee48f9f5168f4f7c287376f2b6e2a6dc2/ruff-0.8.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:248b1fb3f739d01d528cc50b35ee9c4812aa58cc5935998e776bf8ed5b251e75", size = 11424693 }, - { url = "https://files.pythonhosted.org/packages/ac/b7/30bc78a37648d31bfc7ba7105b108cb9091cd925f249aa533038ebc5a96f/ruff-0.8.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:bc3c083c50390cf69e7e1b5a5a7303898966be973664ec0c4a4acea82c1d4315", size = 12141921 }, - { url = "https://files.pythonhosted.org/packages/60/b3/ee0a14cf6a1fbd6965b601c88d5625d250b97caf0534181e151504498f86/ruff-0.8.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52d587092ab8df308635762386f45f4638badb0866355b2b86760f6d3c076188", size = 11692419 }, - { url = "https://files.pythonhosted.org/packages/ef/d6/c597062b2931ba3e3861e80bd2b147ca12b3370afc3889af46f29209037f/ruff-0.8.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:61323159cf21bc3897674e5adb27cd9e7700bab6b84de40d7be28c3d46dc67cf", size = 12981648 }, - { url = "https://files.pythonhosted.org/packages/68/84/21f578c2a4144917985f1f4011171aeff94ab18dfa5303ac632da2f9af36/ruff-0.8.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ae4478b1471fc0c44ed52a6fb787e641a2ac58b1c1f91763bafbc2faddc5117", size = 11251801 }, - { url = "https://files.pythonhosted.org/packages/6c/aa/1ac02537c8edeb13e0955b5db86b5c050a1dcba54f6d49ab567decaa59c1/ruff-0.8.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0c000a471d519b3e6cfc9c6680025d923b4ca140ce3e4612d1a2ef58e11f11fe", size = 10849857 }, - { url = "https://files.pythonhosted.org/packages/eb/00/020cb222252d833956cb3b07e0e40c9d4b984fbb2dc3923075c8f944497d/ruff-0.8.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:9257aa841e9e8d9b727423086f0fa9a86b6b420fbf4bf9e1465d1250ce8e4d8d", size = 10470852 }, - { url = "https://files.pythonhosted.org/packages/00/56/e6d6578202a0141cd52299fe5acb38b2d873565f4670c7a5373b637cf58d/ruff-0.8.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:45a56f61b24682f6f6709636949ae8cc82ae229d8d773b4c76c09ec83964a95a", size = 10972997 }, - { url = "https://files.pythonhosted.org/packages/be/31/dd0db1f4796bda30dea7592f106f3a67a8f00bcd3a50df889fbac58e2786/ruff-0.8.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:496dd38a53aa173481a7d8866bcd6451bd934d06976a2505028a50583e001b76", size = 11317760 }, - { url = "https://files.pythonhosted.org/packages/d4/70/cfcb693dc294e034c6fed837fa2ec98b27cc97a26db5d049345364f504bf/ruff-0.8.6-py3-none-win32.whl", hash = "sha256:e169ea1b9eae61c99b257dc83b9ee6c76f89042752cb2d83486a7d6e48e8f764", size = 8799729 }, - { url = "https://files.pythonhosted.org/packages/60/22/ae6bcaa0edc83af42751bd193138bfb7598b2990939d3e40494d6c00698c/ruff-0.8.6-py3-none-win_amd64.whl", hash = "sha256:f1d70bef3d16fdc897ee290d7d20da3cbe4e26349f62e8a0274e7a3f4ce7a905", size = 9673857 }, - { url = "https://files.pythonhosted.org/packages/91/f8/3765e053acd07baa055c96b2065c7fab91f911b3c076dfea71006666f5b0/ruff-0.8.6-py3-none-win_arm64.whl", hash = "sha256:7d7fc2377a04b6e04ffe588caad613d0c460eb2ecba4c0ccbbfe2bc973cbc162", size = 9149556 }, + { url = "https://files.pythonhosted.org/packages/e9/01/e0885e5519212efc7ab9d868bc39cb9781931c4c6f9b17becafa81193ec4/ruff-0.9.0-py3-none-linux_armv6l.whl", hash = "sha256:949b3513f931741e006cf267bf89611edff04e1f012013424022add3ce78f319", size = 10647069 }, + { url = "https://files.pythonhosted.org/packages/dd/69/510a9a5781dcf84c2ad513c2003936fefc802f39c745d5f2355d77fa45fd/ruff-0.9.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:99fbcb8c7fe94ae1e462ab2a1ef17cb20b25fb6438b9f198b1bcf5207a0a7916", size = 10401936 }, + { url = "https://files.pythonhosted.org/packages/07/9f/37fb86bfdf28c4cbfe94cbcc01fb9ab0cb8128548f243f34d5298b212562/ruff-0.9.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0b022afd8eb0fcfce1e0adec84322abf4d6ce3cd285b3b99c4f17aae7decf749", size = 10010347 }, + { url = "https://files.pythonhosted.org/packages/30/0d/b95121f53c7f7bfb7ba427a35d25f983ed3b476620c5cd69f45caa5b294e/ruff-0.9.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:336567ce92c9ca8ec62780d07b5fa11fbc881dc7bb40958f93a7d621e7ab4589", size = 10882152 }, + { url = "https://files.pythonhosted.org/packages/d4/0b/a955cb6b19eb900c4c594707ab72132ce2d5cd8b5565137fb8fed21b8f08/ruff-0.9.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d338336c44bda602dc8e8766836ac0441e5b0dfeac3af1bd311a97ebaf087a75", size = 10405502 }, + { url = "https://files.pythonhosted.org/packages/1e/fa/9a6c70af74f20edd2519b89eb3322f4bfa399315cf306383443700f2d6b6/ruff-0.9.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d9b3ececf523d733e90b540e7afcc0494189e8999847f8855747acd5a9a8c45f", size = 11465069 }, + { url = "https://files.pythonhosted.org/packages/ee/8b/7effac8915470da496be009fe861060baff2692f92801976b2c01cdc8c54/ruff-0.9.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a11c0872a31232e473e2e0e2107f3d294dbadd2f83fb281c3eb1c22a24866924", size = 12176850 }, + { url = "https://files.pythonhosted.org/packages/bd/ed/626179786889eca47b1e821c1582622ac0c1c8f01d60ac974f8b96867a57/ruff-0.9.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b5fd06220c17a9cc0dc7fc6552f2ac4db74e8e8bff9c401d160ac59d00566f54", size = 11700963 }, + { url = "https://files.pythonhosted.org/packages/75/79/094c34ddec47fd3c61a0bc5e83ca164344c592949cff91f05961fd40922e/ruff-0.9.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0457e775c74bf3976243f910805242b7dcd389e1d440deccbd1194ca17a5728c", size = 13096560 }, + { url = "https://files.pythonhosted.org/packages/e7/23/ec85dca0dcb329835197401734501bfa1d39e72343df64628c67b72bcbf5/ruff-0.9.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05415599bbcb318f730ea1b46a39e4fbf71f6a63fdbfa1dda92efb55f19d7ecf", size = 11278658 }, + { url = "https://files.pythonhosted.org/packages/6c/17/1b3ea5f06578ea1daa08ac35f9de099d1827eea6e116a8cabbf11235c925/ruff-0.9.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:fbf9864b009e43cfc1c8bed1a6a4c529156913105780af4141ca4342148517f5", size = 10879847 }, + { url = "https://files.pythonhosted.org/packages/a6/e5/00bc97d6f419da03c0d898e95cca77311494e7274dc7cc17d94976e32e52/ruff-0.9.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:37b3da222b12e2bb2ce628e02586ab4846b1ed7f31f42a5a0683b213453b2d49", size = 10494220 }, + { url = "https://files.pythonhosted.org/packages/cc/70/d0a23d94f3e40b7ffac0e5506f33bb504672569173781a6c7cab0db6a4ba/ruff-0.9.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:733c0fcf2eb0c90055100b4ed1af9c9d87305b901a8feb6a0451fa53ed88199d", size = 11004182 }, + { url = "https://files.pythonhosted.org/packages/20/8e/367cf8e401890f823d0e4eb33635d0113719d5660b6522b7295376dd95fd/ruff-0.9.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8221a454bfe5ccdf8017512fd6bb60e6ec30f9ea252b8a80e5b73619f6c3cefd", size = 11345761 }, + { url = "https://files.pythonhosted.org/packages/fe/08/4b54e02da73060ebc29368ab15868613f7d2496bde3b01d284d5423646bc/ruff-0.9.0-py3-none-win32.whl", hash = "sha256:d345f2178afd192c7991ddee59155c58145e12ad81310b509bd2e25c5b0247b3", size = 8807005 }, + { url = "https://files.pythonhosted.org/packages/a1/a7/0b422971e897c51bf805f998d75bcfe5d4d858f5002203832875fc91b733/ruff-0.9.0-py3-none-win_amd64.whl", hash = "sha256:0cbc0905d94d21305872f7f8224e30f4bbcd532bc21b2225b2446d8fc7220d19", size = 9689974 }, + { url = "https://files.pythonhosted.org/packages/73/0e/c00f66731e514be3299801b1d9d54efae0abfe8f00a5c14155f2ab9e2920/ruff-0.9.0-py3-none-win_arm64.whl", hash = "sha256:7b1148771c6ca88f820d761350a053a5794bc58e0867739ea93eb5e41ad978cd", size = 9147729 }, ] [[package]] @@ -305,6 +318,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/49/21633706dd6feb14cd3f7935fc00b60870ea057686035e1a99ae6d9d9d53/SQLAlchemy-2.0.36-py3-none-any.whl", hash = "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e", size = 1883787 }, ] +[[package]] +name = "trino" +version = "0.331.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "requests" }, + { name = "tzlocal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/80/56716b9ee69e769cd2babc2dfe58d601ea7f244a2e67a7ef02f7fd3bd3df/trino-0.331.0.tar.gz", hash = "sha256:2d9acdf7b19d136c97c98e55599fb9a5556e124282b0a40afb93bdfbb5371708", size = 50851 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/aa/cce7a726e314fbeae9fed8ed5d6bd4af19796286d7ee82f120e3633da74c/trino-0.331.0-py3-none-any.whl", hash = "sha256:4f909e6c2966d23917e2538bc7f342d5dcc6e512102811fb1e53bdaf15bd49e3", size = 53771 }, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -323,6 +351,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, ] +[[package]] +name = "tzlocal" +version = "5.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/d3/c19d65ae67636fe63953b20c2e4a8ced4497ea232c43ff8d01db16de8dc0/tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e", size = 30201 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/3f/c4c51c55ff8487f2e6d0e618dba917e3c3ee2caae6cf0fbb59c9b1876f2e/tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8", size = 17859 }, +] + [[package]] name = "urllib3" version = "2.3.0" From 0c91a4a116fd18898972eb077e3a76c912e013fa Mon Sep 17 00:00:00 2001 From: Jon Couldridge Date: Fri, 10 Jan 2025 12:43:54 +0000 Subject: [PATCH 4/7] unit tests passing --- tests/test_return.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_return.py b/tests/test_return.py index 9e2212a..1941305 100644 --- a/tests/test_return.py +++ b/tests/test_return.py @@ -1,11 +1,9 @@ import pytest from hutch_bunny.core.query_solvers import ( AvailabilityQuery, - DistributionQuery, solve_availability, ) from hutch_bunny.core.db_manager import SyncDBManager -from hutch_bunny.core.rquest_dto import cohort from hutch_bunny.core.rquest_dto.result import RquestResult from hutch_bunny.core.rquest_dto.cohort import Cohort from hutch_bunny.core.rquest_dto.group import Group From 0f823e55e822f1684294051f36c7b9659b980a97 Mon Sep 17 00:00:00 2001 From: Jon Couldridge Date: Fri, 10 Jan 2025 13:36:46 +0000 Subject: [PATCH 5/7] uv Dockerfile and sample dev compose files --- .dockerignore | 24 ++++++++ Dockerfile | 8 +++ dev.compose.yml | 76 ++++++++++++++++++++++++ dev.standalone.compose.yml | 22 +++++++ src/hutch_bunny/core/entities.py | 2 +- src/hutch_bunny/core/setting_database.py | 42 +++++++------ src/hutch_bunny/core/settings.py | 14 ++++- 7 files changed, 169 insertions(+), 19 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 dev.compose.yml create mode 100644 dev.standalone.compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..2934f36 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,24 @@ +**/.dockerignore +**/.env +**/.venv +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/.idea +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/azds.yaml +**/bin +**/charts +**/docker-compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..caa9e38 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM ghcr.io/astral-sh/uv:bookworm-slim + +COPY . /app +WORKDIR /app + +RUN uv sync --frozen + +ENTRYPOINT ["uv", "run", "bunny-daemon"] diff --git a/dev.compose.yml b/dev.compose.yml new file mode 100644 index 0000000..da2f8e5 --- /dev/null +++ b/dev.compose.yml @@ -0,0 +1,76 @@ +name: hutch-bunny-dev + +services: + db: + image: postgres:16 + restart: always + ports: + - 5432:5432 + environment: + POSTGRES_PASSWORD: postgres + + adminer: + image: wodby/adminer + depends_on: + - db + restart: always + ports: + - 9000:9000 + environment: + ADMINER_DEFAULT_DB_DRIVER: pgsql + ADMINER_DEFAULT_DB_HOST: db + ADMINER_DESIGN: pepa-linha + + rabbitmq: + image: rabbitmq:3-management + ports: + - 5672:5672 + - 15672:15672 + environment: + RABBITMQ_DEFAULT_USER: user + RABBITMQ_DEFAULT_PASS: password + + relay: + image: ghcr.io/health-informatics-uon/hutch/relay:dev-latest + depends_on: + - rabbitmq + - db + restart: always + ports: + - 8080:8080 + - 8081:8081 + environment: + DOTNET_Environment: Development + ConnectionStrings__Default: Server=db;Port=5432;Database=hutch-relay;User Id=postgres;Password=postgres + RelayTaskQueue__ConnectionString: amqp://user:password@rabbitmq:5672 + Obfuscation__LowNumberSuppressionThreshold: 0 + Obfuscation__RoundingTarget: 0 + UpstreamTaskApi__BaseUrl: https:// + UpstreamTaskApi__CollectionId: collection_id + UpstreamTaskApi__Username: username + UpstreamTaskApi__Password: password + Database__ApplyMigrationsOnStartup: true + + bunny: + build: + context: . + dockerfile: Dockerfile + depends_on: + - db + - relay + restart: always + environment: + DATASOURCE_DB_USERNAME: postgres + DATASOURCE_DB_PASSWORD: postgres + DATASOURCE_DB_DATABASE: hutch-omop + DATASOURCE_DB_DRIVERNAME: postgresql + DATASOURCE_DB_SCHEMA: public + DATASOURCE_DB_PORT: 5432 + DATASOURCE_DB_HOST: db + TASK_API_BASE_URL: http://relay:8080/ + TASK_API_USERNAME: username + TASK_API_PASSWORD: password + LOW_NUMBER_SUPPRESSION_THRESHOLD: + ROUNDING_TARGET: + POLLING_INTERVAL: 5 + COLLECTION_ID: collection_id diff --git a/dev.standalone.compose.yml b/dev.standalone.compose.yml new file mode 100644 index 0000000..5479989 --- /dev/null +++ b/dev.standalone.compose.yml @@ -0,0 +1,22 @@ +name: hutch-bunny-standalone-dev + +services: + bunny: + build: + context: . + dockerfile: Dockerfile + environment: + DATASOURCE_DB_USERNAME: postgres + DATASOURCE_DB_PASSWORD: postgres + DATASOURCE_DB_DATABASE: hutch-omop + DATASOURCE_DB_SCHEMA: public + DATASOURCE_DB_PORT: 5432 + DATASOURCE_DB_HOST: host.docker.internal + TASK_API_BASE_URL: https:///link_connector_api + TASK_API_USERNAME: username + TASK_API_PASSWORD: password + COLLECTION_ID: collection_id + TASK_API_TYPE: a + LOW_NUMBER_SUPPRESSION_THRESHOLD: + ROUNDING_TARGET: + POLLING_INTERVAL: 5 diff --git a/src/hutch_bunny/core/entities.py b/src/hutch_bunny/core/entities.py index 188b191..2e6b0f2 100644 --- a/src/hutch_bunny/core/entities.py +++ b/src/hutch_bunny/core/entities.py @@ -9,7 +9,7 @@ DateTime, Text, ) -from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import declarative_base Base = declarative_base() diff --git a/src/hutch_bunny/core/setting_database.py b/src/hutch_bunny/core/setting_database.py index 2358048..c500f6a 100644 --- a/src/hutch_bunny/core/setting_database.py +++ b/src/hutch_bunny/core/setting_database.py @@ -1,40 +1,48 @@ from logging import Logger -import os +from os import environ from hutch_bunny.core.db_manager import SyncDBManager, TrinoDBManager import hutch_bunny.core.settings as settings def setting_database(logger: Logger): logger.info("Setting up database connection...") - if bool(os.getenv("USE_TRINO", False)): - datasource_db_port = os.getenv("DATASOURCE_DB_PORT", 8080) + + # Trino has some different settings / defaults comapred with SQLAlchemy + if settings.DATASOURCE_USE_TRINO: + datasource_db_port = environ.get("DATASOURCE_DB_PORT", 8080) try: db_manager = TrinoDBManager( - username=os.getenv("DATASOURCE_DB_USERNAME", "trino-user"), - password=os.getenv("DATASOURCE_DB_PASSWORD"), - host=os.getenv("DATASOURCE_DB_HOST"), + username=environ.get("DATASOURCE_DB_USERNAME", "trino-user"), + password=environ.get("DATASOURCE_DB_PASSWORD"), + host=environ.get("DATASOURCE_DB_HOST"), port=int(datasource_db_port), - schema=os.getenv("DATASOURCE_DB_SCHEMA"), - catalog=os.getenv("DATASOURCE_DB_CATALOG"), + schema=environ.get("DATASOURCE_DB_SCHEMA"), + catalog=environ.get("DATASOURCE_DB_CATALOG"), ) except TypeError as e: logger.error(str(e)) exit() else: - datasource_db_port = os.getenv("DATASOURCE_DB_PORT") + datasource_db_port = environ.get("DATASOURCE_DB_PORT") + datasource_db_drivername = environ.get( + "DATASOURCE_DB_DRIVERNAME", settings.DEFAULT_DB_DRIVER + ) + + # expand postgres to a full default driver, so we can override sqlalchemy + if datasource_db_drivername == "postgresql": + datasource_db_drivername = settings.DEFAULT_POSTGRES_DRIVER + try: db_manager = SyncDBManager( - username=os.getenv("DATASOURCE_DB_USERNAME"), - password=os.getenv("DATASOURCE_DB_PASSWORD"), - host=os.getenv("DATASOURCE_DB_HOST"), + username=environ.get("DATASOURCE_DB_USERNAME"), + password=environ.get("DATASOURCE_DB_PASSWORD"), + host=environ.get("DATASOURCE_DB_HOST"), port=( int(datasource_db_port) if datasource_db_port is not None else None ), - database=os.getenv("DATASOURCE_DB_DATABASE"), - drivername=os.getenv( - "DATASOURCE_DB_DRIVERNAME", settings.DEFAULT_DB_DRIVER - ), - schema=os.getenv("DATASOURCE_DB_SCHEMA"), + database=environ.get("DATASOURCE_DB_DATABASE"), + drivername=datasource_db_drivername, + schema=environ.get("DATASOURCE_DB_SCHEMA"), ) except TypeError as e: logger.error(str(e)) diff --git a/src/hutch_bunny/core/settings.py b/src/hutch_bunny/core/settings.py index 6aef610..6b146a4 100644 --- a/src/hutch_bunny/core/settings.py +++ b/src/hutch_bunny/core/settings.py @@ -3,7 +3,19 @@ load_dotenv() -DEFAULT_DB_DRIVER = "postgresql" + +## +# DB Connection Settings +# Additional settings reads are in `setting_database.py` +## + +DATASOURCE_USE_TRINO = bool(environ.get("DATASOURCE_USE_TRINO", False)) + +# what unqualified `postgresql` will turn into. if left blank, will use SQLalchemy's default of `postgresql+psycopg2` +DEFAULT_POSTGRES_DRIVER = "postgresql+psycopg" + +# what SQLAlchemy will use if DATASOURCE_DB_DRIVERNAME is not specified in the environment +DEFAULT_DB_DRIVER = DEFAULT_POSTGRES_DRIVER # Logging configuration LOGGER_NAME = "hutch" From 22532d508662bf5a23c7016f0cda0cf97b7aaafa Mon Sep 17 00:00:00 2001 From: Jon Couldridge Date: Fri, 10 Jan 2025 13:48:29 +0000 Subject: [PATCH 6/7] initial readme --- README.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b66f926..b77ab58 100644 --- a/README.md +++ b/README.md @@ -1 +1,23 @@ -# hutch-bunny \ No newline at end of file +[![Hutch][hutch-logo]][hutch-repo] + +# 🐇 Hutch Bunny ![MIT License][license-badge] + +| | | | +|-|-|-| +| ![Python][python-badge] | [![Bunny Docker Images][docker-badge]][bunny-containers] | [![Bunny Docs][docs-badge]][bunny-docs] | + +An HDR UK Cohort Discovery Task Resolver. + +Fetches and resolves Availability and Distribution Queries against an OMOP-CDM database. + +[hutch-logo]: https://raw.githubusercontent.com/HDRUK/hutch/main/assets/Hutch%20splash%20bg.svg +[hutch-repo]: https://github.com/health-informatics-uon/hutch + +[bunny-docs]: https://health-informatics-uon.github.io/hutch/bunny +[bunny-containers]: https://github.com/Health-Informatics-UoN/hutch-cohort-discovery/pkgs/container/hutch%2Fbunny + +[license-badge]: https://img.shields.io/github/license/health-informatics-uon/hutch-cohort-discovery.svg +[python-badge]: https://img.shields.io/badge/Python-3776AB?style=for-the-badge&logo=python&logoColor=white +[docker-badge]: https://img.shields.io/badge/docker-%230db7ed.svg?style=for-the-badge&logo=docker&logoColor=white +[docs-badge]: https://img.shields.io/badge/docs-black?style=for-the-badge&labelColor=%23222 +[readme-badge]: https://img.shields.io/badge/readme-lightgrey?style=for-the-badge&labelColor=%23222 From afe1e91eb23aea324e37225beeddea4234c02421 Mon Sep 17 00:00:00 2001 From: Jon Couldridge Date: Fri, 10 Jan 2025 16:05:02 +0000 Subject: [PATCH 7/7] tidying up some legacy module info --- src/hutch_bunny/core/__about__.py | 1 - src/hutch_bunny/core/rquest_dto/__init__.py | 1 - 2 files changed, 2 deletions(-) delete mode 100644 src/hutch_bunny/core/__about__.py diff --git a/src/hutch_bunny/core/__about__.py b/src/hutch_bunny/core/__about__.py deleted file mode 100644 index f102a9c..0000000 --- a/src/hutch_bunny/core/__about__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "0.0.1" diff --git a/src/hutch_bunny/core/rquest_dto/__init__.py b/src/hutch_bunny/core/rquest_dto/__init__.py index 3dc1f76..e69de29 100644 --- a/src/hutch_bunny/core/rquest_dto/__init__.py +++ b/src/hutch_bunny/core/rquest_dto/__init__.py @@ -1 +0,0 @@ -__version__ = "0.1.0"