Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/mx-1702 prep editor types #168

Merged
merged 17 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ repos:
- id: fix-byte-order-marker
name: byte-order
- repo: https://github.com/pdm-project/pdm
rev: 2.18.2
rev: 2.19.1
hooks:
- id: pdm-lock-check
name: pdm
Expand Down
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changes

- silence neo4j missing label warnings, because we will likely never need all labels
- sort search results by `identifier` and `entityType` to ensure a more stable order
- improve handling of pydantic validation errors and uncaught errors

### Deprecated

### Removed

- remove already obsolete module `mex.backend.serialization`
this is not needed any more with the new mex-common version

### Fixed

- fix how merged edges are counted (currently only used for debugging)

### Security

## [0.19.1] - 2024-09-18
Expand Down Expand Up @@ -77,7 +86,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- remove stop-gaps for MX-1596

### Removed
- removed class _BaseBulkIngestRequest for ingestion model

- removed class _BaseBulkIngestRequest for ingestion model

## [0.17.0] - 2024-07-29

Expand Down
52 changes: 32 additions & 20 deletions mex/backend/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Any
from typing import Any, cast

from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from pydantic import BaseModel, ValidationError
from starlette import status
from starlette.requests import Request
from starlette.responses import Response

from mex.common.logging import logger

Expand Down Expand Up @@ -34,23 +34,35 @@ class ErrorResponse(BaseModel):
debug: DebuggingInfo


def handle_uncaught_exception(request: Request, exc: Exception) -> JSONResponse:
def handle_validation_error(request: Request, exc: Exception) -> Response:
"""Handle pydantic validation errors and provide debugging info."""
logger.exception("ValidationError %s", exc)
return Response(
content=ErrorResponse(
message=str(exc),
debug=DebuggingInfo(
errors=[
jsonable_encoder(e) for e in cast(ValidationError, exc).errors()
],
scope=DebuggingScope.model_validate(request.scope),
),
).model_dump_json(),
status_code=status.HTTP_400_BAD_REQUEST,
rababerladuseladim marked this conversation as resolved.
Show resolved Hide resolved
media_type="application/json",
)


def handle_uncaught_exception(request: Request, exc: Exception) -> Response:
"""Handle uncaught errors and provide debugging info."""
logger.exception("Error %s", exc)
if isinstance(exc, ValidationError):
errors = [dict(error) for error in exc.errors()]
status_code = status.HTTP_400_BAD_REQUEST
else:
errors = [dict(type=type(exc).__name__)]
status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
return JSONResponse(
jsonable_encoder(
ErrorResponse(
message=str(exc),
debug=DebuggingInfo(
errors=errors, scope=DebuggingScope.model_validate(request.scope)
),
)
),
status_code,
logger.exception("UncaughtError %s", exc)
return Response(
content=ErrorResponse(
message=str(exc),
debug=DebuggingInfo(
errors=[dict(type=type(exc).__name__)],
scope=DebuggingScope.model_validate(request.scope),
),
).model_dump_json(),
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
media_type="application/json",
)
18 changes: 16 additions & 2 deletions mex/backend/extracted/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from pydantic import ValidationError

from mex.backend.extracted.models import ExtractedItemSearch
from mex.backend.graph.connector import GraphConnector
from mex.backend.graph.exceptions import InconsistentGraphError
from mex.backend.utils import reraising
from mex.common.models import AnyExtractedModel


Expand All @@ -19,6 +23,9 @@ def search_extracted_items_in_graph(
skip: How many items to skip for pagination
limit: How many items to return at most

Raises:
InconsistentGraphError: When the graph response cannot be parsed

Returns:
ExtractedItemSearch instance
"""
Expand All @@ -30,8 +37,12 @@ def search_extracted_items_in_graph(
skip=skip,
limit=limit,
)
search_result = graph_result.one()
return ExtractedItemSearch.model_validate(search_result)
return reraising(
ValidationError,
InconsistentGraphError,
ExtractedItemSearch.model_validate,
graph_result.one(),
)


def get_extracted_items_from_graph(
Expand All @@ -46,6 +57,9 @@ def get_extracted_items_from_graph(
entity_type: Optional entity type filter
limit: How many items to return at most

Raises:
InconsistentGraphError: When the graph response cannot be parsed

Returns:
List of extracted items
"""
Expand Down
16 changes: 8 additions & 8 deletions mex/backend/graph/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from string import Template
from typing import Annotated, Any, Literal, cast

from neo4j import Driver, GraphDatabase
from neo4j import Driver, GraphDatabase, NotificationMinimumSeverity
from pydantic import Field

from mex.backend.fields import (
Expand All @@ -17,7 +17,7 @@
)
from mex.backend.graph.models import Result
from mex.backend.graph.query import QueryBuilder
from mex.backend.graph.transform import expand_references_in_search_result, to_primitive
from mex.backend.graph.transform import expand_references_in_search_result
from mex.backend.settings import BackendSettings
from mex.common.connector import BaseConnector
from mex.common.exceptions import MExError
Expand Down Expand Up @@ -84,6 +84,7 @@ def _init_driver(self) -> Driver:
settings.graph_password.get_secret_value(),
),
database=settings.graph_db,
warn_notification_severity=NotificationMinimumSeverity.OFF,
)

def _check_connectivity_and_authentication(self) -> Result:
Expand Down Expand Up @@ -379,12 +380,12 @@ def _merge_item(
mutable_fields = set(MUTABLE_FIELDS_BY_CLASS_NAME[model.entityType])
final_fields = set(FINAL_FIELDS_BY_CLASS_NAME[model.entityType])

mutable_values = to_primitive(model, include=mutable_fields)
final_values = to_primitive(model, include=final_fields)
mutable_values = model.model_dump(include=mutable_fields)
final_values = model.model_dump(include=final_fields)
all_values = {**mutable_values, **final_values}

text_values = to_primitive(model, include=text_fields)
link_values = to_primitive(model, include=link_fields)
text_values = model.model_dump(include=text_fields)
link_values = model.model_dump(include=link_fields)

nested_edge_labels: list[str] = []
nested_node_labels: list[str] = []
Expand Down Expand Up @@ -445,7 +446,7 @@ def _merge_edges(
query_builder = QueryBuilder.get()

ref_fields = REFERENCE_FIELDS_BY_CLASS_NAME[model.entityType]
ref_values = to_primitive(model, include=set(ref_fields))
ref_values = model.model_dump(include=set(ref_fields))
ref_values.update(extra_refs or {})

ref_labels: list[str] = []
Expand All @@ -464,7 +465,6 @@ def _merge_edges(
merged_label=ensure_prefix(model.stemType, "Merged"),
ref_labels=ref_labels,
)

return self.commit(
query,
**constraints,
Expand Down
2 changes: 1 addition & 1 deletion mex/backend/graph/cypher/fetch_extracted_or_rule_items.cql
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ CALL {
}
WITH n, collect(ref) as refs
RETURN n{.*, entityType: head(labels(n)), _refs: refs}
ORDER BY n.identifier ASC
ORDER BY n.identifier, n.entityType ASC
SKIP $skip
LIMIT $limit
}
Expand Down
2 changes: 1 addition & 1 deletion mex/backend/graph/cypher/fetch_merged_items.cql
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ CALL {
} ELSE NULL END as ref
}
WITH merged, n, collect(ref) as refs
ORDER BY merged.identifier, n.identifier ASC
ORDER BY merged.identifier, n.identifier, head(labels(n)) ASC
WITH merged, collect(n{.*, entityType: head(labels(n)), _refs: refs}) as n
RETURN merged{entityType: head(labels(merged)), identifier: merged.identifier, components: n}
SKIP $skip
Expand Down
4 changes: 2 additions & 2 deletions mex/backend/graph/cypher/merge_edges.cql
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ CALL {
RETURN null as edge
<%- endif %>
}
WITH source, collect(edge) as edges
WITH source, count(edge) as merged, collect(edge) as edges
CALL {
WITH source, edges
MATCH (source)-[outdated_edge]->(:<<merged_labels|join("|")>>)
WHERE NOT outdated_edge IN edges
DELETE outdated_edge
RETURN count(outdated_edge) as pruned
}
RETURN count(edges) as merged, pruned, edges;
RETURN merged, pruned, edges;
4 changes: 4 additions & 0 deletions mex/backend/graph/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ class NoResultFoundError(MExError):

class MultipleResultsFoundError(MExError):
"""A single database result was required but more than one were found."""


class InconsistentGraphError(MExError):
"""Exception raised for inconsistencies found in the graph database."""
25 changes: 0 additions & 25 deletions mex/backend/graph/transform.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from typing import Any, TypedDict, cast

from pydantic import BaseModel


class _SearchResultReference(TypedDict):
"""Helper class to show the structure of search result references."""
Expand All @@ -24,26 +22,3 @@ def expand_references_in_search_result(item: dict[str, Any]) -> None:
length_needed = 1 + ref["position"] - len(target_list)
target_list.extend([None] * length_needed)
target_list[ref["position"]] = ref["value"]


def to_primitive(
obj: BaseModel,
include: set[str] | None = None,
exclude: set[str] | None = None,
by_alias: bool = True,
exclude_unset: bool = False,
exclude_defaults: bool = False,
exclude_none: bool = False,
) -> Any:
"""Convert model object into python primitives compatible with graph ingestion."""
return obj.__pydantic_serializer__.to_python(
obj,
mode="json",
by_alias=by_alias,
include=include,
exclude=exclude,
exclude_unset=exclude_unset,
exclude_defaults=exclude_defaults,
exclude_none=exclude_none,
fallback=str,
)
31 changes: 31 additions & 0 deletions mex/backend/ingest/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from pydantic import ValidationError

from mex.backend.graph.connector import GraphConnector
from mex.backend.graph.exceptions import InconsistentGraphError
from mex.backend.ingest.models import BulkIngestResponse
from mex.backend.utils import reraising
from mex.common.models import AnyExtractedModel


def ingest_extracted_items_into_graph(
items: list[AnyExtractedModel],
) -> BulkIngestResponse:
"""Ingest a batch of extracted items and return their identifiers.

Args:
items: list of AnyExtractedModel

Raises:
InconsistentGraphError: When the graph response cannot be parsed

Returns:
List of identifiers of the ingested items
"""
connector = GraphConnector.get()
identifiers = connector.ingest(items)
return reraising(
ValidationError,
InconsistentGraphError,
BulkIngestResponse,
identifiers=identifiers,
)
8 changes: 3 additions & 5 deletions mex/backend/ingest/main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from fastapi import APIRouter
from starlette import status

from mex.backend.graph.connector import GraphConnector
from mex.backend.ingest.helpers import ingest_extracted_items_into_graph
from mex.backend.ingest.models import BulkIngestRequest, BulkIngestResponse

router = APIRouter()


@router.post("/ingest", status_code=status.HTTP_201_CREATED, tags=["extractors"])
def ingest_extracted_items(request: BulkIngestRequest) -> BulkIngestResponse:
"""Ingest batches of extracted items grouped by their type."""
connector = GraphConnector.get()
identifiers = connector.ingest(request.items)
return BulkIngestResponse(identifiers=identifiers)
"""Ingest a batch of extracted items and return their identifiers."""
return ingest_extracted_items_into_graph(request.items)
9 changes: 5 additions & 4 deletions mex/backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from fastapi import APIRouter, Depends, FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.openapi.utils import get_openapi
from pydantic import BaseModel
from pydantic import BaseModel, ValidationError

from mex.backend.auxiliary.wikidata import router as wikidata_router
from mex.backend.exceptions import handle_uncaught_exception
from mex.backend.exceptions import handle_uncaught_exception, handle_validation_error
from mex.backend.extracted.main import router as extracted_router
from mex.backend.identity.main import router as identity_router
from mex.backend.ingest.main import router as ingest_router
Expand All @@ -24,8 +24,8 @@
from mex.common.connector import CONNECTOR_STORE
from mex.common.types import (
EXTRACTED_IDENTIFIER_CLASSES,
IDENTIFIER_PATTERN,
MERGED_IDENTIFIER_CLASSES,
MEX_ID_PATTERN,
)


Expand Down Expand Up @@ -57,7 +57,7 @@ def create_openapi_schema() -> dict[str, Any]:
"title": name,
"type": "string",
"description": identifier.__doc__,
"pattern": MEX_ID_PATTERN,
"pattern": IDENTIFIER_PATTERN,
}

app.openapi_schema = openapi_schema
Expand Down Expand Up @@ -110,6 +110,7 @@ def check_system_status() -> SystemStatus:


app.include_router(router)
app.add_exception_handler(ValidationError, handle_validation_error)
app.add_exception_handler(Exception, handle_uncaught_exception)
app.add_middleware(
CORSMiddleware,
Expand Down
Loading