Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…mmon into feature/mx-1494-artificial-extractor-rework
  • Loading branch information
cutoffthetop committed Dec 20, 2023
2 parents 3efa687 + fe8d4f9 commit 4ab1d5a
Show file tree
Hide file tree
Showing 36 changed files with 841 additions and 479 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
python-version: 3.11

- name: Install requirements
run: make setup
run: make install

- name: Run linters
run: make linter
32 changes: 11 additions & 21 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ default_language_version:
python: python3.11
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.6
rev: v0.1.8
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand All @@ -20,26 +20,16 @@ repos:
- id: check-yaml
name: yaml
- repo: https://github.com/python-poetry/poetry
rev: 1.7.0
rev: 1.7.1
hooks:
- id: poetry-check
name: poetry
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.7.0
hooks: # configured according to mypy maintainer: https://github.com/python/mypy/issues/13916
- id: mypy
name: mypy
files: ^mex/
pass_filenames: false
args: [mex]
additional_dependencies:
- "backoff>=2.2.1,<3"
- "click>=8.1.7,<9"
- "pandas-stubs>=2.1.1,<3"
- "pydantic-settings>=2.1.0,<3"
- "pydantic>=2.5.1,<3"
- "pytest>=7.4.3,<8"
- "types-ldap3>=2.9.13.15,<3"
- "types-pytz>=2023.3.1.1,<2024"
- "types-requests>=2.31.0.10,<3"
- "types-setuptools>=68.2.0.1,<69"
- repo: local
hooks:
- id: mypy
name: mypy
entry: poetry run dmypy run --timeout 7200 -- mex
files: ^mex/
language: system
pass_filenames: false
types: [python]
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ MEx project will be released under the same license in the future.

- update global dependencies in `requirements.txt` manually
- update git hooks with `pre-commit autoupdate`
- update git hook additional dependencies manually
- show outdated dependencies with `poetry show --outdated`
- update dependencies in poetry using `poetry update --lock`
- update github actions manually in `.github\workflows\default.yml`
16 changes: 8 additions & 8 deletions mex/common/ldap/models/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
class LDAPPerson(LDAPActor):
"""Model class for LDAP persons."""

company: str | None = Field(None)
department: str | None = Field(None)
departmentNumber: str | None = Field(None)
displayName: str | None = Field(None)
employeeID: str = Field(...)
givenName: list[str] = Field(..., min_length=1)
ou: list[str] = Field([])
sn: str = Field(...)
company: str | None = None
department: str | None = None
departmentNumber: str | None = None
displayName: str | None = None
employeeID: str
givenName: list[str] = Field(min_length=1)
ou: list[str] = []
sn: str

@classmethod
def get_ldap_fields(cls) -> tuple[str, ...]:
Expand Down
30 changes: 11 additions & 19 deletions mex/common/models/access_platform.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,22 @@
from typing import Annotated

from pydantic import Field

from mex.common.models.base import BaseModel
from mex.common.models.extracted_data import ExtractedData
from mex.common.models.merged_item import MergedItem
from mex.common.types import (
AccessPlatformID,
APIType,
ContactPointID,
Link,
OrganizationalUnitID,
PersonID,
TechnicalAccessibility,
Text,
VocabularyEnum,
)


class TechnicalAccessibility(VocabularyEnum):
"""Technical accessibility within RKI and outside of RKI."""

__vocabulary__ = "technical-accessibility"


class APIType(VocabularyEnum):
"""Technical standard or style of a network API."""

__vocabulary__ = "api-type"


class BaseAccessPlatform(BaseModel):
"""A way of physically accessing the Resource for re-use."""

Expand All @@ -34,14 +25,15 @@ class BaseAccessPlatform(BaseModel):
contact: list[OrganizationalUnitID | PersonID | ContactPointID] = []
description: list[Text] = []
endpointDescription: Link | None = None
endpointType: APIType | None = Field(
None, examples=["https://mex.rki.de/item/api-type-1"]
)
endpointType: Annotated[
APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])
] | None = None
endpointURL: Link | None = None
landingPage: list[Link] = []
technicalAccessibility: TechnicalAccessibility = Field(
..., examples=["https://mex.rki.de/item/technical-accessibility-1"]
)
technicalAccessibility: Annotated[
TechnicalAccessibility,
Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]),
]
title: list[Text] = []
unitInCharge: list[OrganizationalUnitID] = []

Expand Down
37 changes: 8 additions & 29 deletions mex/common/models/activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,18 @@
from mex.common.models.merged_item import MergedItem
from mex.common.types import (
ActivityID,
ActivityType,
ContactPointID,
Identifier,
Link,
OrganizationalUnitID,
OrganizationID,
PersonID,
Text,
Theme,
Timestamp,
VocabularyEnum,
)


class ActivityType(VocabularyEnum):
"""The activity type."""

__vocabulary__ = "activity-type"


class BaseActivity(BaseModel):
"""The context a resource was generated in.
Expand All @@ -40,35 +33,21 @@ class BaseActivity(BaseModel):
]
] = []
alternativeTitle: list[Text] = []
contact: list[
Annotated[
OrganizationalUnitID | PersonID | ContactPointID,
Field(examples=[Identifier.generate(seed=42)]),
]
] = Field(
...,
min_length=1,
)
contact: Annotated[
list[OrganizationalUnitID | PersonID | ContactPointID,], Field(min_length=1)
]
documentation: list[Link] = []
end: list[
Annotated[
Timestamp,
Field(
examples=["2024-01-17", "2024", "2024-01"],
),
]
Annotated[Timestamp, Field(examples=["2024-01-17", "2024", "2024-01"])]
] = []
externalAssociate: list[OrganizationalUnitID | PersonID] = []
externalAssociate: list[OrganizationID | PersonID] = []
funderOrCommissioner: list[OrganizationID] = []
fundingProgram: list[str] = []
involvedPerson: list[PersonID] = []
involvedUnit: list[OrganizationalUnitID] = []
isPartOfActivity: list[ActivityID] = []
publication: list[Link] = []
responsibleUnit: list[OrganizationalUnitID] = Field(
...,
min_length=1,
)
responsibleUnit: Annotated[list[OrganizationalUnitID], Field(min_length=1)]
shortName: list[Text] = []
start: list[
Annotated[Timestamp, Field(examples=["2023-01-16", "2023", "2023-02"])]
Expand All @@ -77,7 +56,7 @@ class BaseActivity(BaseModel):
theme: list[
Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])]
] = []
title: list[Text] = Field(..., min_length=1)
title: Annotated[list[Text], Field(min_length=1)]
website: list[Link] = []


Expand Down
71 changes: 60 additions & 11 deletions mex/common/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,39 @@
import pickle # nosec
from collections.abc import MutableMapping
from functools import cache
from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union, get_args, get_origin
from typing import TYPE_CHECKING, Annotated, Any, TypeVar, Union, get_args, get_origin

from pydantic import BaseModel as PydanticBaseModel
from pydantic import (
BaseModel as PydanticBaseModel,
)
from pydantic import ConfigDict, Field, TypeAdapter, ValidationError, model_validator
from pydantic.fields import FieldInfo
from pydantic.json_schema import DEFAULT_REF_TEMPLATE, JsonSchemaMode, JsonSchemaValue
from pydantic.json_schema import (
GenerateJsonSchema as PydanticJsonSchemaGenerator,
)

from mex.common.types import Identifier

RawModelDataT = TypeVar("RawModelDataT")


class JsonSchemaGenerator(PydanticJsonSchemaGenerator):
"""Customization of the pydantic class for generating JSON schemas."""

def handle_ref_overrides(self, json_schema: JsonSchemaValue) -> JsonSchemaValue:
"""Disable pydantic behavior to wrap top-level `$ref` keys in an `allOf`.
For example, pydantic would convert
{"$ref": "#/$defs/APIType", "examples": ["api-type-1"]}
into
{"allOf": {"$ref": "#/$defs/APIType"}, "examples": ["api-type-1"]}
which is in fact recommended by JSON schema, but we need to disable this
to stay compatible with mex-editor and mex-model.
"""
return json_schema


class BaseModel(PydanticBaseModel):
"""Common base class for all MEx model classes."""

Expand All @@ -27,6 +49,32 @@ class BaseModel(PydanticBaseModel):
validate_assignment=True,
)

@classmethod
def model_json_schema(
cls,
by_alias: bool = True,
ref_template: str = DEFAULT_REF_TEMPLATE,
schema_generator: type[PydanticJsonSchemaGenerator] = JsonSchemaGenerator,
mode: JsonSchemaMode = "validation",
) -> dict[str, Any]:
"""Generates a JSON schema for a model class.
Args:
by_alias: Whether to use attribute aliases or not.
ref_template: The reference template.
schema_generator: Overriding the logic used to generate the JSON schema
mode: The mode in which to generate the schema.
Returns:
The JSON schema for the given model class.
"""
return super().model_json_schema(
by_alias=by_alias,
ref_template=ref_template,
schema_generator=schema_generator,
mode=mode,
)

@classmethod
@cache
def _get_alias_lookup(cls) -> dict[str, str]:
Expand Down Expand Up @@ -73,7 +121,7 @@ def _get_field_names_allowing_none(cls) -> list[str]:
@classmethod
def _convert_non_list_to_list(
cls, name: str, field: FieldInfo, value: Any
) -> Optional[list[Any]]:
) -> list[Any] | None:
"""Convert a non-list value to a list value by wrapping it in a list."""
if value is None:
if name in cls._get_field_names_allowing_none():
Expand Down Expand Up @@ -171,12 +219,13 @@ class MExModel(BaseModel):
# also used as the foreign key for all fields containing references.
stableTargetId: Any

identifier: Identifier = Field(
...,
description=(
"A globally unique identifier for this item. Regardless of the entity-type "
"or whether this item was extracted, merged, etc. identifiers will be "
"assigned just once."
identifier: Annotated[
Identifier,
Field(
description=(
"A globally unique identifier for this item. Regardless of the "
"entity-type or whether this item was extracted, merged, etc. "
"identifiers will be assigned just once."
),
),
examples=[Identifier.generate(seed=42)],
)
]
12 changes: 1 addition & 11 deletions mex/common/models/contact_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,7 @@ class BaseContactPoint(BaseModel):
"""A contact point - for example, an interdepartmental project."""

stableTargetId: ContactPointID
email: list[
Annotated[
Email,
Field(
examples=["[email protected]"],
),
]
] = Field(
...,
min_length=1,
)
email: Annotated[list[Email], Field(min_length=1)]


class ExtractedContactPoint(BaseContactPoint, ExtractedData):
Expand Down
Loading

0 comments on commit 4ab1d5a

Please sign in to comment.