-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rework wikidata and add primary source helpers
- Loading branch information
Showing
5 changed files
with
84 additions
and
201 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from functools import cache | ||
|
||
from mex.common.models import ( | ||
ExtractedPrimarySource, | ||
) | ||
from mex.common.primary_source.extract import extract_seed_primary_sources | ||
from mex.common.primary_source.transform import ( | ||
transform_seed_primary_sources_to_extracted_primary_sources, | ||
) | ||
|
||
|
||
@cache | ||
def get_all_extracted_primary_sources() -> list[ExtractedPrimarySource]: | ||
"""Extract and transform all primary sources. | ||
Extract the primary sources from the raw-data JSON file and transform them into | ||
a list of ExtractedPrimarySources. | ||
Returns: | ||
List of all ExtractedPrimarySources | ||
""" | ||
seed_primary_sources = extract_seed_primary_sources() | ||
return list( | ||
transform_seed_primary_sources_to_extracted_primary_sources( | ||
seed_primary_sources | ||
) | ||
) | ||
|
||
|
||
@cache | ||
def get_extracted_primary_source_by_name(name: str) -> ExtractedPrimarySource | None: | ||
"""Pick the extracted primary source with the given name and return it. | ||
Args: | ||
name: Name (`identifierInPrimarySource`) of the primary source | ||
Returns: | ||
Extracted primary source if it was found, else None | ||
""" | ||
primary_sources_by_name = { | ||
p.identifierInPrimarySource: p for p in get_all_extracted_primary_sources() | ||
} | ||
return primary_sources_by_name.get(name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from functools import cache | ||
|
||
from mex.common.models import ExtractedOrganization, ExtractedPrimarySource | ||
from mex.common.wikidata.extract import search_organization_by_label | ||
from mex.common.wikidata.transform import ( | ||
transform_wikidata_organization_to_extracted_organization, | ||
) | ||
|
||
|
||
@cache | ||
def get_extracted_organization_from_wikidata( | ||
query_string: str, | ||
wikidata_primary_source: ExtractedPrimarySource, | ||
) -> ExtractedOrganization | None: | ||
"""Get extracted organization matching the query string. | ||
Search wikidata for organization and transform it into an ExtractedOrganization. | ||
Args: | ||
query_string: query string to search in wikidata | ||
wikidata_primary_source: wikidata primary source | ||
Returns: | ||
ExtractedOrganization if one matching organization is found in | ||
Wikidata lookup. | ||
None if multiple matches / no organization is found. | ||
""" | ||
found_organization = search_organization_by_label(query_string) | ||
|
||
if found_organization is None: | ||
return None | ||
|
||
extracted_organization = transform_wikidata_organization_to_extracted_organization( | ||
found_organization, wikidata_primary_source | ||
) | ||
|
||
if extracted_organization is None: | ||
return None | ||
|
||
return extracted_organization |
This file was deleted.
Oops, something went wrong.