Skip to content

Commit

Permalink
rework wikidata and add primary source helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
erichesse committed Sep 12, 2024
1 parent 9833fb0 commit c9149ce
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 201 deletions.
43 changes: 43 additions & 0 deletions mex/common/primary_source/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from functools import cache

from mex.common.models import (
ExtractedPrimarySource,
)
from mex.common.primary_source.extract import extract_seed_primary_sources
from mex.common.primary_source.transform import (
transform_seed_primary_sources_to_extracted_primary_sources,
)


@cache
def get_all_extracted_primary_sources() -> list[ExtractedPrimarySource]:
"""Extract and transform all primary sources.
Extract the primary sources from the raw-data JSON file and transform them into
a list of ExtractedPrimarySources.
Returns:
List of all ExtractedPrimarySources
"""
seed_primary_sources = extract_seed_primary_sources()
return list(
transform_seed_primary_sources_to_extracted_primary_sources(
seed_primary_sources
)
)


@cache
def get_extracted_primary_source_by_name(name: str) -> ExtractedPrimarySource | None:
"""Pick the extracted primary source with the given name and return it.
Args:
name: Name (`identifierInPrimarySource`) of the primary source
Returns:
Extracted primary source if it was found, else None
"""
primary_sources_by_name = {
p.identifierInPrimarySource: p for p in get_all_extracted_primary_sources()
}
return primary_sources_by_name.get(name)
1 change: 1 addition & 0 deletions mex/common/primary_source/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def transform_seed_primary_sources_to_extracted_primary_sources(
)


# TODO: Remove this in MX-1698
def get_primary_sources_by_name(
extracted_primary_sources: Iterable[ExtractedPrimarySource], *names: str
) -> tuple[ExtractedPrimarySource, ...]:
Expand Down
71 changes: 0 additions & 71 deletions mex/common/wikidata/convenience.py

This file was deleted.

40 changes: 40 additions & 0 deletions mex/common/wikidata/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from functools import cache

from mex.common.models import ExtractedOrganization, ExtractedPrimarySource
from mex.common.wikidata.extract import search_organization_by_label
from mex.common.wikidata.transform import (
transform_wikidata_organization_to_extracted_organization,
)


@cache
def get_extracted_organization_from_wikidata(
query_string: str,
wikidata_primary_source: ExtractedPrimarySource,
) -> ExtractedOrganization | None:
"""Get extracted organization matching the query string.
Search wikidata for organization and transform it into an ExtractedOrganization.
Args:
query_string: query string to search in wikidata
wikidata_primary_source: wikidata primary source
Returns:
ExtractedOrganization if one matching organization is found in
Wikidata lookup.
None if multiple matches / no organization is found.
"""
found_organization = search_organization_by_label(query_string)

if found_organization is None:
return None

extracted_organization = transform_wikidata_organization_to_extracted_organization(
found_organization, wikidata_primary_source
)

if extracted_organization is None:
return None

return extracted_organization
130 changes: 0 additions & 130 deletions tests/wikidata/test_convenience.py

This file was deleted.

0 comments on commit c9149ce

Please sign in to comment.