Skip to content

Commit

Permalink
feature/mx-1381 rework database model (#25)
Browse files Browse the repository at this point in the history
# Changes

- re-implemented queries as templated cql files
- updated graph connector for new queries
- improved isolation of neo4j dependency
- improved documentation and code-readability

# Removed

- trashed hydration module
  • Loading branch information
cutoffthetop authored Apr 8, 2024
1 parent ade4cda commit e8ec12a
Show file tree
Hide file tree
Showing 51 changed files with 2,256 additions and 1,896 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ default_language_version:
python: python3.11
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.2
rev: v0.3.5
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/psf/black
rev: 24.2.0
rev: 24.3.0
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changes

- re-implemented queries as templated cql files
- updated graph connector for new queries
- improved isolation of neo4j dependency
- improved documentation and code-readability

### Deprecated

### Removed

- trashed hydration module

### Fixed

### Security
Expand Down
14 changes: 6 additions & 8 deletions mex/backend/extracted/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@

from fastapi import APIRouter, Query

from mex.backend.extracted.models import ExtractedItemSearchResponse, ExtractedType
from mex.backend.extracted.transform import (
transform_graph_results_to_extracted_item_search_response,
)
from mex.backend.extracted.models import ExtractedItemSearchResponse
from mex.backend.graph.connector import GraphConnector
from mex.backend.types import ExtractedType
from mex.common.types import Identifier

router = APIRouter()
Expand All @@ -15,20 +13,20 @@
@router.get("/extracted-item", tags=["editor"])
def search_extracted_items(
q: Annotated[str, Query(max_length=100)] = "",
stableTargetId: Identifier | None = None, # noqa: N803
entityType: Annotated[ # noqa: N803
stableTargetId: Identifier | None = None,
entityType: Annotated[
Sequence[ExtractedType], Query(max_length=len(ExtractedType))
] = [],
skip: Annotated[int, Query(ge=0, le=10e10)] = 0,
limit: Annotated[int, Query(ge=1, le=100)] = 10,
) -> ExtractedItemSearchResponse:
"""Search for extracted items by query text or by type and id."""
graph = GraphConnector.get()
query_results = graph.query_nodes(
result = graph.fetch_extracted_data(
q,
stableTargetId,
[str(t.value) for t in entityType or ExtractedType],
skip,
limit,
)
return transform_graph_results_to_extracted_item_search_response(query_results)
return ExtractedItemSearchResponse.model_validate(result.one())
24 changes: 3 additions & 21 deletions mex/backend/extracted/models.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,12 @@
from enum import Enum
from typing import TYPE_CHECKING, Union
from typing import Annotated

from pydantic import Field

from mex.backend.types import DynamicStrEnum
from mex.common.models import (
EXTRACTED_MODEL_CLASSES_BY_NAME,
BaseExtractedData,
BaseModel,
)


class ExtractedType(Enum, metaclass=DynamicStrEnum):
"""Enumeration of possible types for extracted items."""

__names__ = list(EXTRACTED_MODEL_CLASSES_BY_NAME)


if TYPE_CHECKING: # pragma: no cover
AnyExtractedModel = BaseExtractedData
else:
AnyExtractedModel = Union[*EXTRACTED_MODEL_CLASSES_BY_NAME.values()]
from mex.common.models import AnyExtractedModel, BaseModel


class ExtractedItemSearchResponse(BaseModel):
"""Response body for the extracted item search endpoint."""

total: int
items: list[AnyExtractedModel] = Field(discriminator="entityType")
items: Annotated[list[AnyExtractedModel], Field(discriminator="entityType")]
40 changes: 0 additions & 40 deletions mex/backend/extracted/transform.py

This file was deleted.

172 changes: 138 additions & 34 deletions mex/backend/fields.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,164 @@
from types import UnionType
from typing import Annotated, Any, Generator, Union, get_args, get_origin
from types import NoneType, UnionType
from typing import (
Annotated,
Any,
Callable,
Generator,
Mapping,
Union,
get_args,
get_origin,
)

from pydantic import BaseModel
from pydantic.fields import FieldInfo

from mex.backend.types import LiteralStringType
from mex.common.models import EXTRACTED_MODEL_CLASSES_BY_NAME
from mex.common.types import Identifier, Text
from mex.common.types import MERGED_IDENTIFIER_CLASSES, Link, Text


def _get_inner_types(annotation: Any) -> Generator[type, None, None]:
"""Yield all inner types from Unions, lists and annotations."""
"""Yield all inner types from unions, lists and type annotations (except NoneType).
Args:
annotation: A valid python type annotation
Returns:
A generator for all (non-NoneType) types found in the annotation
"""
if get_origin(annotation) == Annotated:
yield from _get_inner_types(get_args(annotation)[0])
elif get_origin(annotation) in (Union, UnionType, list):
for arg in get_args(annotation):
yield from _get_inner_types(arg)
elif annotation is None:
yield type(None)
else:
elif annotation not in (None, NoneType):
yield annotation


def is_reference_field(field: FieldInfo) -> bool:
"""Return whether the given field contains a stable target id."""
return any(
isinstance(t, type) and issubclass(t, Identifier)
for t in _get_inner_types(field.annotation)
)
def _contains_only_types(field: FieldInfo, *types: type) -> bool:
"""Return whether a `field` is annotated as one of the given `types`.
Unions, lists and type annotations are checked for their inner types and only the
non-`NoneType` types are considered for the type-check.
def is_text_field(field: FieldInfo) -> bool:
"""Return whether the given field is holding text objects."""
return any(
isinstance(t, type) and issubclass(t, Text)
for t in _get_inner_types(field.annotation)
)
Args:
field: A pydantic `FieldInfo` object
types: Types to look for in the field's annotation
Returns:
Whether the field contains any of the given types
"""
if inner_types := list(_get_inner_types(field.annotation)):
return all(inner_type in types for inner_type in inner_types)
return False

REFERENCE_FIELDS_BY_CLASS_NAME = {
name: {
field_name
for field_name, field_info in cls.model_fields.items()
if field_name
not in (
"identifier",
"stableTargetId",

def _group_fields_by_class_name(
model_classes_by_name: Mapping[str, type[BaseModel]],
predicate: Callable[[FieldInfo], bool],
) -> dict[str, list[str]]:
"""Group the field names by model class and filter them by the given predicate.
Args:
model_classes_by_name: Map from class names to model classes
predicate: Function to filter the fields of the classes by
Returns:
Dictionary mapping class names to a list of field names filtered by `predicate`
"""
return {
name: sorted(
{
field_name
for field_name, field_info in cls.model_fields.items()
if predicate(field_info)
}
)
and is_reference_field(field_info)
for name, cls in model_classes_by_name.items()
}


# fields that are immutable and can only be set once
FROZEN_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name(
EXTRACTED_MODEL_CLASSES_BY_NAME, lambda field_info: field_info.frozen is True
)

# static fields that are set once on class-level to a literal type
LITERAL_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name(
EXTRACTED_MODEL_CLASSES_BY_NAME,
lambda field_info: isinstance(field_info.annotation, LiteralStringType),
)

# fields typed as merged identifiers containing references to merged items
REFERENCE_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name(
EXTRACTED_MODEL_CLASSES_BY_NAME,
lambda field_info: _contains_only_types(field_info, *MERGED_IDENTIFIER_CLASSES),
)

# nested fields that contain `Text` objects
TEXT_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name(
EXTRACTED_MODEL_CLASSES_BY_NAME,
lambda field_info: _contains_only_types(field_info, Text),
)

# nested fields that contain `Link` objects
LINK_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name(
EXTRACTED_MODEL_CLASSES_BY_NAME,
lambda field_info: _contains_only_types(field_info, Link),
)

# fields annotated as `str` type
STRING_FIELDS_BY_CLASS_NAME = _group_fields_by_class_name(
EXTRACTED_MODEL_CLASSES_BY_NAME,
lambda field_info: _contains_only_types(field_info, str),
)

# fields that should be indexed as searchable fields
SEARCHABLE_FIELDS = sorted(
{
field_name
for field_names in STRING_FIELDS_BY_CLASS_NAME.values()
for field_name in field_names
}
)

# classes that have fields that should be searchable
SEARCHABLE_CLASSES = sorted(
{name for name, field_names in STRING_FIELDS_BY_CLASS_NAME.items() if field_names}
)

# fields with changeable values that are not nested objects or merged item references
MUTABLE_FIELDS_BY_CLASS_NAME = {
name: sorted(
{
field_name
for field_name in cls.model_fields
if field_name
not in (
*FROZEN_FIELDS_BY_CLASS_NAME[name],
*REFERENCE_FIELDS_BY_CLASS_NAME[name],
*TEXT_FIELDS_BY_CLASS_NAME[name],
*LINK_FIELDS_BY_CLASS_NAME[name],
)
}
)
for name, cls in EXTRACTED_MODEL_CLASSES_BY_NAME.items()
}

TEXT_FIELDS_BY_CLASS_NAME = {
name: {
f"{field_name}_value"
for field_name, field_info in cls.model_fields.items()
if is_text_field(field_info)
}
# fields with values that should be set once but are neither literal nor references
FINAL_FIELDS_BY_CLASS_NAME = {
name: sorted(
{
field_name
for field_name in cls.model_fields
if field_name in FROZEN_FIELDS_BY_CLASS_NAME[name]
and field_name
not in (
*LITERAL_FIELDS_BY_CLASS_NAME[name],
*REFERENCE_FIELDS_BY_CLASS_NAME[name],
)
}
)
for name, cls in EXTRACTED_MODEL_CLASSES_BY_NAME.items()
}
Loading

0 comments on commit e8ec12a

Please sign in to comment.