Skip to content

Commit

Permalink
feature/mx-1470 Ensure correct types (#47)
Browse files Browse the repository at this point in the history
- ensure correct types for helper extract function in ldap and organigram
- fix tiny issue with pandas not parsing str subclasses with resolution
- upgrade to pydantic 2.5
- remove mypy-extensions dependency
- upgrade all other dependencies
- add yaml-checker pre-commit (for .github folder)
  • Loading branch information
cutoffthetop authored Nov 22, 2023
1 parent 97a1ef1 commit 5ef55be
Show file tree
Hide file tree
Showing 6 changed files with 315 additions and 359 deletions.
20 changes: 11 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ default_language_version:
python: python3.11
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.4
rev: v0.1.6
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/psf/black
rev: 23.10.1
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand All @@ -17,24 +17,26 @@ repos:
- id: pretty-format-json
name: json
args: [--autofix]
- id: check-yaml
name: yaml
- repo: https://github.com/python-poetry/poetry
rev: 1.6.1
rev: 1.7.0
hooks:
- id: poetry-check
name: poetry
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.6.1
rev: v1.7.0
hooks:
- id: mypy
name: mypy
files: ^mex/
additional_dependencies:
- "backoff>=2.2.1,<3"
- "click>=8.1.7,<9"
- "pandas-stubs>=2.0.3.230814"
- "pydantic>=2.1.1,<3"
- "pydantic-settings>=2.0.2,<3"
- "pandas-stubs>=2.1.1,<3"
- "pydantic-settings>=2.1.0,<3"
- "pydantic>=2.5.1,<3"
- "pytest>=7.4.3,<8"
- "types-pytz>=2023.3.1.1,<2024"
- "types-requests>=2.31.0.8,<3"
- "types-setuptools>=68.2.0.0,<69"
- "types-requests>=2.31.0.10,<3"
- "types-setuptools>=68.2.0.1,<69"
59 changes: 29 additions & 30 deletions mex/common/ldap/extract.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
from collections import defaultdict
from typing import Hashable, Iterable, cast
from typing import Iterable

from mex.common.identity import get_provider
from mex.common.ldap.models.person import LDAPPerson, LDAPPersonWithQuery
from mex.common.models import ExtractedPrimarySource
from mex.common.types import Identifier
from mex.common.types import PersonID


def _get_merged_ids_by_attribute(
attribute: str,
persons: Iterable[LDAPPerson],
primary_source: ExtractedPrimarySource,
) -> dict[Hashable, list[Identifier]]:
"""Return a mapping from a dynamic Person attribute to the merged IDs.
) -> dict[str, list[PersonID]]:
"""Return a mapping from a dynamic Person attribute to corresponding PersonIDs.
Merged IDs are looked up in the identity table and will be omitted
for any person that has not yet been transformed and indexed there.
PersonIDs are looked up in the identity provider and will be omitted
for any person that has not yet been assigned an `Identity` there.
Args:
attribute: The key to use for the resulting mapping
persons: Iterable of LDP persons
persons: Iterable of LDAP persons
primary_source: Primary source for LDAP
Returns:
Mapping from `LDAPPerson[attribute]` to corresponding `Identity.stableTargetId`
Mapping from a stringified `LDAPPerson[attribute]` to corresponding PersonIDs
"""
if attribute not in LDAPPerson.model_fields:
raise RuntimeError(f"Not a valid LDAPPerson field: {attribute}")
Expand All @@ -35,63 +35,62 @@ def _get_merged_ids_by_attribute(
identifier_in_primary_source=str(person.objectGUID),
):
merged_ids_by_attribute[str(getattr(person, attribute))].append(
Identifier(identities[0].stableTargetId)
PersonID(identities[0].stableTargetId)
)
return cast(dict[Hashable, list[Identifier]], merged_ids_by_attribute)
return merged_ids_by_attribute


def get_merged_ids_by_employee_ids(
persons: Iterable[LDAPPerson], primary_source: ExtractedPrimarySource
) -> dict[Hashable, list[Identifier]]:
"""Return a mapping from person's employeeID to the merged IDs.
) -> dict[str, list[PersonID]]:
"""Return a mapping from a person's employeeID to their PersonIDs.
Merged IDs are looked up in the identity table and will be omitted
for any person that has not yet been transformed and indexed there.
PersonIDs are looked up in the identity provider and will be omitted
for any person that has not yet been assigned an `Identity` there.
Args:
persons: Iterable of LDP persons
persons: Iterable of LDAP persons
primary_source: Primary source for LDAP
Returns:
Mapping from `LDAPPerson.employeeID` to corresponding `Identity.stableTargetId`
Mapping from `LDAPPerson.employeeID` to corresponding PersonIDs
"""
return _get_merged_ids_by_attribute("employeeID", persons, primary_source)


def get_merged_ids_by_email(
persons: Iterable[LDAPPerson], primary_source: ExtractedPrimarySource
) -> dict[Hashable, list[Identifier]]:
"""Return a mapping from person's e-mail to the merged IDs.
) -> dict[str, list[PersonID]]:
"""Return a mapping from a person's e-mail to their PersonIDs.
Merged IDs are looked up in the identity table and will be omitted
for any person that has not yet been transformed and indexed there.
PersonIDs are looked up in the identity provider and will be omitted
for any person that has not yet been assigned an `Identity` there.
Args:
persons: Iterable of LDP persons
primary_source: Primary source for LDAP
Returns:
Mapping from `LDAPPerson.mail` to corresponding `Identity.stableTargetId`
Mapping from `LDAPPerson.mail` to corresponding PersonIDs
"""
return _get_merged_ids_by_attribute("mail", persons, primary_source)


def get_merged_ids_by_query_string(
persons_with_query: Iterable[LDAPPersonWithQuery],
primary_source: ExtractedPrimarySource,
) -> dict[Hashable, list[Identifier]]:
"""Return a mapping from an author query string to the resolved merged IDs.
) -> dict[str, list[PersonID]]:
"""Return a mapping from a person query string to their PersonIDs.
Merged IDs are looked up in the identity table and will be omitted
for any person that has not yet been transformed and indexed there.
PersonIDs are looked up in the identity provider and will be omitted
for any person that has not yet been assigned an `Identity` there.
Args:
persons_with_query: Iterable of LDP persons with query
primary_source: Primary source for LDAP
Returns:
Mapping from `LDAPPersonWithQuery.query` to corresponding
`Identity.stableTargetId`
Mapping from `LDAPPersonWithQuery.query` to corresponding PersonIDs
"""
merged_ids_by_attribute = defaultdict(list)
provider = get_provider()
Expand All @@ -100,7 +99,7 @@ def get_merged_ids_by_query_string(
had_primary_source=primary_source.stableTargetId,
identifier_in_primary_source=str(person_with_query.person.objectGUID),
):
merged_ids_by_attribute[person_with_query.query].append(
Identifier(identities[0].stableTargetId)
merged_ids_by_attribute[str(person_with_query.query)].append(
PersonID(identities[0].stableTargetId)
)
return cast(dict[Hashable, list[Identifier]], merged_ids_by_attribute)
return merged_ids_by_attribute
4 changes: 2 additions & 2 deletions mex/common/organigram/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def get_unit_merged_ids_by_synonyms(
Mapping from unit synonyms to stableTargetIds
"""
return {
synonym: extracted_unit.stableTargetId
synonym: OrganizationalUnitID(extracted_unit.stableTargetId)
for extracted_unit in extracted_units
for synonym in _get_synonyms(extracted_unit)
}
Expand All @@ -79,7 +79,7 @@ def get_unit_merged_ids_by_emails(
Mapping from lowercased `email` to stableTargetIds
"""
return {
email.lower(): extracted_unit.stableTargetId
email.lower(): OrganizationalUnitID(extracted_unit.stableTargetId)
for extracted_unit in extracted_units
for email in extracted_unit.email
}
2 changes: 1 addition & 1 deletion mex/common/types/timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def _parse_timestamp(value: "Timestamp") -> tuple[datetime, TimestampPrecision]:
def _parse_string(value: str) -> tuple[datetime, TimestampPrecision]:
"""Parse a string containing a timestamp using pandas' tslibs."""
parsed, precision = parsing.parse_datetime_string_with_reso( # type: ignore[attr-defined]
value, freq=None, dayfirst=False, yearfirst=True
str(value), freq=None, dayfirst=False, yearfirst=True
)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=CET)
Expand Down
Loading

0 comments on commit 5ef55be

Please sign in to comment.