Skip to content

Commit

Permalink
Added change log details and DHIS2 info in settings config file (#3)
Browse files Browse the repository at this point in the history
* Added change log details and DHIS2 info in settings config file

* Mock calls to the DHIS2 server in unit tests
  • Loading branch information
ginic authored Jul 9, 2024
1 parent 565a7bd commit c8e8c9f
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 254 deletions.
29 changes: 5 additions & 24 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

You should also add project tags for each release in Github, see [Managing releases in a repository](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository).

## [2.0.0] - 2024-05-29
## [0.0.1] - 2024-07-03
### Added
- Added example auto-built Sphinx documentation in the `docs` folder
- Github workflow for running ruff linter
- A note about conda dependencies to README
- A note about using docker containers to README
- Ruff as a linter for development
### Changed
- All build and packaging switched to use only pyproject.toml
- Minimum python version changed to 3.10
- Github workflow checks python versions 3.10, 3.11, 3.12
- Updated DVC version to avoid `ImportError: cannot import name 'fsspec_loop'` in older versions
### Removed
- Removed setup.cfg

## [1.0.0] - 2022-05-23
### Added
- README and CHANGELOG
- cdstemplate packages for computing word count from input text
- corpus_counter_script.py as a user-facing script with argparse examples
- Tests of cdstemplate packages
- A github workflow to trigger tests on pull request to the main branch
- Sample text data from Project Gutenberg
- Data Version Control stage for the corpus_counter_script.py
- A sample Jupyter notebook that plots most frequent words the Gutenberg data
- Notebook for downloading sample test data
- msfocr.data.data_upload_DHIS2 created for sending key/value pairs to a DHIS2 server
- msfocr.docTR created to implement extracting tables from images using image2table and docTR
- Initial package structure created
2 changes: 1 addition & 1 deletion LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2023 University of Massachusetts Amherst, Center for Data Science
Copyright (c) 2024 University of Massachusetts Amherst, Center for Data Science

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
219 changes: 5 additions & 214 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1 @@
/gutenberg_counts.csv
MSF_data
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ dependencies = [

[project.optional-dependencies]
# Extra dependencies only needed for running tests go here
test = ["pytest"]
test = [
"pytest",
"requests_mock",
]

# Dependencies that are useful only to developers, like an autoformatter and support for visualizations in jupyter notebooks go here
dev = [
Expand Down
4 changes: 4 additions & 0 deletions settings.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[DHIS2Server]
username=
password=
server_url=
Empty file added src/msfocr/data/__init__.py
Empty file.
25 changes: 18 additions & 7 deletions src/msfocr/data/data_upload_DHIS2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
import configparser

import requests

dhis2_username = 'anju-santosh-kumar'
dhis2_password = 'AnjSOCR!01'
DHIS2_Test_Server_URL = 'https://ocr.twoca.org/'
# Set these before trying to make requests
DHIS2_USERNAME = None
DHIS2_PASSWORD = None
DHIS2_SERVER_URL = None

# TODO It might be clearer to create a Server object class and have this be the __init__() function
def configure_DHIS2_server(config_path = "settings.ini"):
config = configparser.ConfigParser()
config.read(config_path)
dhis2_section = config["DHIS2Server"]
global DHIS2_SERVER_URL, DHIS2_USERNAME, DHIS2_PASSWORD
DHIS2_USERNAME = dhis2_section["username"]
DHIS2_PASSWORD = dhis2_section["password"]
DHIS2_SERVER_URL = dhis2_section["server_url"]


# Command to get all fields that are organisationUnits
Expand All @@ -14,14 +27,12 @@
def getUID(item_type, search_items):
filter_param = 'filter=' + '&filter='.join([f'name:ilike:{term}' for term in search_items])

url = f'{DHIS2_Test_Server_URL}/api/{item_type}?{filter_param}'

response = requests.get(url, auth=(dhis2_username, dhis2_password))
url = f'{DHIS2_SERVER_URL}/api/{item_type}?{filter_param}'
response = requests.get(url, auth=(DHIS2_USERNAME, DHIS2_PASSWORD))
if response.status_code == 401:
raise ValueError("Authentication failed. Check your username and password.")

data = response.json()

items = data[item_type]
print(f"{len(data[item_type])} matches found for {search_items}")
print(items)
Expand Down
6 changes: 4 additions & 2 deletions src/msfocr/docTR/ocr_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,11 @@ def generate_key_value_pairs(table):
print(data_element, data_element_id)
category_id = data_upload_DHIS2.getUID('categoryOptions', [category])
# Append to the list of data elements to be push to DHIS2
data_element_pairs.append({ 'dataElement': data_element_id,
data_element_pairs.append(
{'dataElement': data_element_id,
'categoryOptions': category_id,
'value': cell_value})
'value': cell_value}
)

return data_element_pairs

Expand Down
27 changes: 25 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
import pytest
import configparser
from pathlib import Path

import pytest

from msfocr.data.data_upload_DHIS2 import configure_DHIS2_server

@pytest.fixture
def datadir(request):
def datadir():
# Path to the directory containing test data
test_data_dir = Path(__file__).parent / 'data'
return test_data_dir


@pytest.fixture
def test_server_config(tmp_path):
"""Configure a mock DHIS2 server to mimic requests.
You will still need to use requests_mock to imitate responses from http://test.com.
"""
config = configparser.ConfigParser()
config["DHIS2Server"] = {"username": "tester",
"password": "testing_password",
"server_url": "http://test.com"
}
configpath = tmp_path / "test_settings.ini"
with configpath.open("w") as configfile:
config.write(configfile)

configure_DHIS2_server(configpath)


10 changes: 8 additions & 2 deletions tests/test_ocr_functions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from msfocr.docTR import ocr_functions
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from img2table.document import Image
from img2table.ocr import DocTR
import pandas as pd

from msfocr.docTR import ocr_functions

def test_get_sheet_type(datadir):
"""
Tests if the tally sheet type (dataSet, orgUnit, period) detected for a sample image is correct.
Expand All @@ -21,7 +22,7 @@ def test_get_sheet_type(datadir):
assert sheet_type[2] == ["2024-06-25", "2024-06-30"]


def test_generate_key_value_pairs():
def test_generate_key_value_pairs(test_server_config, requests_mock):
"""
Tests if the dataElement value in the key-value pairs is correct by providing sample tablular data.
"""
Expand All @@ -41,6 +42,11 @@ def test_generate_key_value_pairs():
'5-14y': [None, None, None]
})

requests_mock.get("http://test.com/api/dataElements?filter=name:ilike:BCG", json={"dataElements":[{"id": 1, "displayName": "AVAC_002 BCG"}]})
requests_mock.get("http://test.com/api/categoryOptions?filter=name:ilike:0-11m", json={'categoryOptions': [{'id': 2, 'displayName': '0-11m'}]})
requests_mock.get("http://test.com/api/dataElements?filter=name:ilike:Polio (OPV) 1 (from 6 wks)", json={'dataElements': [{'id': 3, 'displayName': 'AVAC_006 Polio (OPV) 1 (from 6 wks)'}]})
requests_mock.get("http://test.com/api/categoryOptions?filter=name:ilike:12-59m", json={'categoryOptions': [{'id': 'tWRttYIzvBn', 'displayName': '12-59m'}]})

answer = [{'dataElement': '', 'categoryOptions': '', 'value': '45+29'},
{'dataElement': '', 'categoryOptions': '', 'value': '30+18'},
{'dataElement': '', 'categoryOptions': '', 'value': '55+29'}]
Expand Down

0 comments on commit c8e8c9f

Please sign in to comment.