-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature/mx-1381 rework database model (#25)
# Changes - re-implemented queries as templated cql files - updated graph connector for new queries - improved isolation of neo4j dependency - improved documentation and code-readability # Removed - trashed hydration module
- Loading branch information
1 parent
ade4cda
commit e8ec12a
Showing
51 changed files
with
2,256 additions
and
1,896 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,12 @@ | ||
from enum import Enum | ||
from typing import TYPE_CHECKING, Union | ||
from typing import Annotated | ||
|
||
from pydantic import Field | ||
|
||
from mex.backend.types import DynamicStrEnum | ||
from mex.common.models import ( | ||
EXTRACTED_MODEL_CLASSES_BY_NAME, | ||
BaseExtractedData, | ||
BaseModel, | ||
) | ||
|
||
|
||
class ExtractedType(Enum, metaclass=DynamicStrEnum): | ||
"""Enumeration of possible types for extracted items.""" | ||
|
||
__names__ = list(EXTRACTED_MODEL_CLASSES_BY_NAME) | ||
|
||
|
||
if TYPE_CHECKING: # pragma: no cover | ||
AnyExtractedModel = BaseExtractedData | ||
else: | ||
AnyExtractedModel = Union[*EXTRACTED_MODEL_CLASSES_BY_NAME.values()] | ||
from mex.common.models import AnyExtractedModel, BaseModel | ||
|
||
|
||
class ExtractedItemSearchResponse(BaseModel): | ||
"""Response body for the extracted item search endpoint.""" | ||
|
||
total: int | ||
items: list[AnyExtractedModel] = Field(discriminator="entityType") | ||
items: Annotated[list[AnyExtractedModel], Field(discriminator="entityType")] |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,164 @@ | ||
from types import UnionType | ||
from typing import Annotated, Any, Generator, Union, get_args, get_origin | ||
from types import NoneType, UnionType | ||
from typing import ( | ||
Annotated, | ||
Any, | ||
Callable, | ||
Generator, | ||
Mapping, | ||
Union, | ||
get_args, | ||
get_origin, | ||
) | ||
|
||
from pydantic import BaseModel | ||
from pydantic.fields import FieldInfo | ||
|
||
from mex.backend.types import LiteralStringType | ||
from mex.common.models import EXTRACTED_MODEL_CLASSES_BY_NAME | ||
from mex.common.types import Identifier, Text | ||
from mex.common.types import MERGED_IDENTIFIER_CLASSES, Link, Text | ||
|
||
|
||
def _get_inner_types(annotation: Any) -> Generator[type, None, None]: | ||
"""Yield all inner types from Unions, lists and annotations.""" | ||
"""Yield all inner types from unions, lists and type annotations (except NoneType). | ||
Args: | ||
annotation: A valid python type annotation | ||
Returns: | ||
A generator for all (non-NoneType) types found in the annotation | ||
""" | ||
if get_origin(annotation) == Annotated: | ||
yield from _get_inner_types(get_args(annotation)[0]) | ||
elif get_origin(annotation) in (Union, UnionType, list): | ||
for arg in get_args(annotation): | ||
yield from _get_inner_types(arg) | ||
elif annotation is None: | ||
yield type(None) | ||
else: | ||
elif annotation not in (None, NoneType): | ||
yield annotation | ||
|
||
|
||
def is_reference_field(field: FieldInfo) -> bool: | ||
"""Return whether the given field contains a stable target id.""" | ||
return any( | ||
isinstance(t, type) and issubclass(t, Identifier) | ||
for t in _get_inner_types(field.annotation) | ||
) | ||
def _contains_only_types(field: FieldInfo, *types: type) -> bool: | ||
"""Return whether a `field` is annotated as one of the given `types`. | ||
Unions, lists and type annotations are checked for their inner types and only the | ||
non-`NoneType` types are considered for the type-check. | ||
def is_text_field(field: FieldInfo) -> bool: | ||
"""Return whether the given field is holding text objects.""" | ||
return any( | ||
isinstance(t, type) and issubclass(t, Text) | ||
for t in _get_inner_types(field.annotation) | ||
) | ||
Args: | ||
field: A pydantic `FieldInfo` object | ||
types: Types to look for in the field's annotation | ||
Returns: | ||
Whether the field contains any of the given types | ||
""" | ||
if inner_types := list(_get_inner_types(field.annotation)): | ||
return all(inner_type in types for inner_type in inner_types) | ||
return False | ||
|
||
REFERENCE_FIELDS_BY_CLASS_NAME = { | ||
name: { | ||
field_name | ||
for field_name, field_info in cls.model_fields.items() | ||
if field_name | ||
not in ( | ||
"identifier", | ||
"stableTargetId", | ||
|
||
def _group_fields_by_class_name( | ||
model_classes_by_name: Mapping[str, type[BaseModel]], | ||
predicate: Callable[[FieldInfo], bool], | ||
) -> dict[str, list[str]]: | ||
"""Group the field names by model class and filter them by the given predicate. | ||
Args: | ||
model_classes_by_name: Map from class names to model classes | ||
predicate: Function to filter the fields of the classes by | ||
Returns: | ||
Dictionary mapping class names to a list of field names filtered by `predicate` | ||
""" | ||
return { | ||
name: sorted( | ||
{ | ||
field_name | ||
for field_name, field_info in cls.model_fields.items() | ||
if predicate(field_info) | ||
} | ||
) | ||
and is_reference_field(field_info) | ||
for name, cls in model_classes_by_name.items() | ||
} | ||
|
||
|
||
# fields that are immutable and can only be set once | ||
FROZEN_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name( | ||
EXTRACTED_MODEL_CLASSES_BY_NAME, lambda field_info: field_info.frozen is True | ||
) | ||
|
||
# static fields that are set once on class-level to a literal type | ||
LITERAL_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name( | ||
EXTRACTED_MODEL_CLASSES_BY_NAME, | ||
lambda field_info: isinstance(field_info.annotation, LiteralStringType), | ||
) | ||
|
||
# fields typed as merged identifiers containing references to merged items | ||
REFERENCE_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name( | ||
EXTRACTED_MODEL_CLASSES_BY_NAME, | ||
lambda field_info: _contains_only_types(field_info, *MERGED_IDENTIFIER_CLASSES), | ||
) | ||
|
||
# nested fields that contain `Text` objects | ||
TEXT_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name( | ||
EXTRACTED_MODEL_CLASSES_BY_NAME, | ||
lambda field_info: _contains_only_types(field_info, Text), | ||
) | ||
|
||
# nested fields that contain `Link` objects | ||
LINK_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name( | ||
EXTRACTED_MODEL_CLASSES_BY_NAME, | ||
lambda field_info: _contains_only_types(field_info, Link), | ||
) | ||
|
||
# fields annotated as `str` type | ||
STRING_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name( | ||
EXTRACTED_MODEL_CLASSES_BY_NAME, | ||
lambda field_info: _contains_only_types(field_info, str), | ||
) | ||
|
||
# fields that should be indexed as searchable fields | ||
SEARCHABLE_FIELDS = sorted( | ||
{ | ||
field_name | ||
for field_names in STRING_FIELDS_BY_CLASS_NAME.values() | ||
for field_name in field_names | ||
} | ||
) | ||
|
||
# classes that have fields that should be searchable | ||
SEARCHABLE_CLASSES = sorted( | ||
{name for name, field_names in STRING_FIELDS_BY_CLASS_NAME.items() if field_names} | ||
) | ||
|
||
# fields with changeable values that are not nested objects or merged item references | ||
MUTABLE_FIELDS_BY_CLASS_NAME = { | ||
name: sorted( | ||
{ | ||
field_name | ||
for field_name in cls.model_fields | ||
if field_name | ||
not in ( | ||
*FROZEN_FIELDS_BY_CLASS_NAME[name], | ||
*REFERENCE_FIELDS_BY_CLASS_NAME[name], | ||
*TEXT_FIELDS_BY_CLASS_NAME[name], | ||
*LINK_FIELDS_BY_CLASS_NAME[name], | ||
) | ||
} | ||
) | ||
for name, cls in EXTRACTED_MODEL_CLASSES_BY_NAME.items() | ||
} | ||
|
||
TEXT_FIELDS_BY_CLASS_NAME = { | ||
name: { | ||
f"{field_name}_value" | ||
for field_name, field_info in cls.model_fields.items() | ||
if is_text_field(field_info) | ||
} | ||
# fields with values that should be set once but are neither literal nor references | ||
FINAL_FIELDS_BY_CLASS_NAME = { | ||
name: sorted( | ||
{ | ||
field_name | ||
for field_name in cls.model_fields | ||
if field_name in FROZEN_FIELDS_BY_CLASS_NAME[name] | ||
and field_name | ||
not in ( | ||
*LITERAL_FIELDS_BY_CLASS_NAME[name], | ||
*REFERENCE_FIELDS_BY_CLASS_NAME[name], | ||
) | ||
} | ||
) | ||
for name, cls in EXTRACTED_MODEL_CLASSES_BY_NAME.items() | ||
} |
Oops, something went wrong.