Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/mx-1664-improve-wikidata-convenience-function-for-use-in-extractors #339

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

### Changes
- wikidata helper now optionally accepts wikidata primary source

### Deprecated

Expand Down
7 changes: 5 additions & 2 deletions mex/common/wikidata/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from mex.common.exceptions import MExError
from mex.common.models.organization import ExtractedOrganization
from mex.common.models.primary_source import ExtractedPrimarySource
from mex.common.primary_source.helpers import get_extracted_primary_source_by_name
from mex.common.wikidata.extract import search_organization_by_label
from mex.common.wikidata.transform import (
Expand All @@ -12,14 +13,15 @@
@cache
def get_extracted_organization_from_wikidata(
query_string: str,
wikidata_primary_source: ExtractedPrimarySource | None = None,
) -> ExtractedOrganization | None:
"""Get extracted organization matching the query string.

Search wikidata for organization and transform it into an ExtractedOrganization.

Args:
query_string: query string to search in wikidata
wikidata_primary_source: wikidata primary source
wikidata_primary_source: optional wikidata primary source

Returns:
ExtractedOrganization if one matching organization is found in
Expand All @@ -31,7 +33,8 @@ def get_extracted_organization_from_wikidata(
if found_organization is None:
return None

wikidata_primary_source = get_extracted_primary_source_by_name("wikidata")
if not wikidata_primary_source:
cutoffthetop marked this conversation as resolved.
Show resolved Hide resolved
wikidata_primary_source = get_extracted_primary_source_by_name("wikidata")
if not wikidata_primary_source:
msg = "Primary source for wikidata not found"
raise MExError(msg)
Expand Down
15 changes: 11 additions & 4 deletions tests/wikidata/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,16 @@ def test_get_extracted_organization_from_wikidata(
)
)

# organization found and transformed
returned = get_extracted_organization_from_wikidata("Robert Koch-Institut")
assert returned == extracted_wikidata_organization
# test with passing the wikidata primary source: organization found and transformed
assert extracted_wikidata_organization == get_extracted_organization_from_wikidata(
"Robert Koch-Institut",
wikidata_primary_source,
)

# test w/o passing the wikidata primary source: organization found and transformed
assert extracted_wikidata_organization == get_extracted_organization_from_wikidata(
"Robert Koch-Institut",
)


@pytest.mark.integration
Expand All @@ -37,4 +44,4 @@ def test_get_extracted_organization_from_wikidata_for_nonsensequery_and_exceptio
assert returned is None

except MExError:
pytest.fail("Primary Source Wikidata not found")
pytest.fail("Primary source for wikidata not found")