Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/mx-1470 Ensure correct types #47

Merged
merged 8 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ default_language_version:
python: python3.11
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.4
rev: v0.1.6
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/psf/black
rev: 23.10.1
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand All @@ -17,24 +17,26 @@ repos:
- id: pretty-format-json
name: json
args: [--autofix]
- id: check-yaml
name: yaml
- repo: https://github.com/python-poetry/poetry
rev: 1.6.1
rev: 1.7.0
hooks:
- id: poetry-check
name: poetry
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.6.1
rev: v1.7.0
hooks:
- id: mypy
name: mypy
files: ^mex/
additional_dependencies:
- "backoff>=2.2.1,<3"
- "click>=8.1.7,<9"
- "pandas-stubs>=2.0.3.230814"
- "pydantic>=2.1.1,<3"
- "pydantic-settings>=2.0.2,<3"
- "pandas-stubs>=2.1.1,<3"
- "pydantic-settings>=2.1.0,<3"
- "pydantic>=2.5.1,<3"
- "pytest>=7.4.3,<8"
- "types-pytz>=2023.3.1.1,<2024"
- "types-requests>=2.31.0.8,<3"
- "types-setuptools>=68.2.0.0,<69"
- "types-requests>=2.31.0.10,<3"
- "types-setuptools>=68.2.0.1,<69"
59 changes: 29 additions & 30 deletions mex/common/ldap/extract.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
from collections import defaultdict
from typing import Hashable, Iterable, cast
from typing import Iterable

from mex.common.identity import get_provider
from mex.common.ldap.models.person import LDAPPerson, LDAPPersonWithQuery
from mex.common.models import ExtractedPrimarySource
from mex.common.types import Identifier
from mex.common.types import PersonID


def _get_merged_ids_by_attribute(
attribute: str,
persons: Iterable[LDAPPerson],
primary_source: ExtractedPrimarySource,
) -> dict[Hashable, list[Identifier]]:
"""Return a mapping from a dynamic Person attribute to the merged IDs.
) -> dict[str, list[PersonID]]:
"""Return a mapping from a dynamic Person attribute to corresponding PersonIDs.

Merged IDs are looked up in the identity table and will be omitted
for any person that has not yet been transformed and indexed there.
PersonIDs are looked up in the identity provider and will be omitted
for any person that has not yet been assigned an `Identity` there.

Args:
attribute: The key to use for the resulting mapping
persons: Iterable of LDP persons
persons: Iterable of LDAP persons
primary_source: Primary source for LDAP

Returns:
Mapping from `LDAPPerson[attribute]` to corresponding `Identity.stableTargetId`
Mapping from a stringified `LDAPPerson[attribute]` to corresponding PersonIDs
"""
if attribute not in LDAPPerson.model_fields:
raise RuntimeError(f"Not a valid LDAPPerson field: {attribute}")
Expand All @@ -35,63 +35,62 @@ def _get_merged_ids_by_attribute(
identifier_in_primary_source=str(person.objectGUID),
):
merged_ids_by_attribute[str(getattr(person, attribute))].append(
Identifier(identities[0].stableTargetId)
PersonID(identities[0].stableTargetId)
)
return cast(dict[Hashable, list[Identifier]], merged_ids_by_attribute)
return merged_ids_by_attribute


def get_merged_ids_by_employee_ids(
persons: Iterable[LDAPPerson], primary_source: ExtractedPrimarySource
) -> dict[Hashable, list[Identifier]]:
"""Return a mapping from person's employeeID to the merged IDs.
) -> dict[str, list[PersonID]]:
"""Return a mapping from a person's employeeID to their PersonIDs.

Merged IDs are looked up in the identity table and will be omitted
for any person that has not yet been transformed and indexed there.
PersonIDs are looked up in the identity provider and will be omitted
for any person that has not yet been assigned an `Identity` there.

Args:
persons: Iterable of LDP persons
persons: Iterable of LDAP persons
primary_source: Primary source for LDAP

Returns:
Mapping from `LDAPPerson.employeeID` to corresponding `Identity.stableTargetId`
Mapping from `LDAPPerson.employeeID` to corresponding PersonIDs
"""
return _get_merged_ids_by_attribute("employeeID", persons, primary_source)


def get_merged_ids_by_email(
persons: Iterable[LDAPPerson], primary_source: ExtractedPrimarySource
) -> dict[Hashable, list[Identifier]]:
"""Return a mapping from person's e-mail to the merged IDs.
) -> dict[str, list[PersonID]]:
"""Return a mapping from a person's e-mail to their PersonIDs.

Merged IDs are looked up in the identity table and will be omitted
for any person that has not yet been transformed and indexed there.
PersonIDs are looked up in the identity provider and will be omitted
for any person that has not yet been assigned an `Identity` there.

Args:
persons: Iterable of LDP persons
primary_source: Primary source for LDAP

Returns:
Mapping from `LDAPPerson.mail` to corresponding `Identity.stableTargetId`
Mapping from `LDAPPerson.mail` to corresponding PersonIDs
"""
return _get_merged_ids_by_attribute("mail", persons, primary_source)


def get_merged_ids_by_query_string(
persons_with_query: Iterable[LDAPPersonWithQuery],
primary_source: ExtractedPrimarySource,
) -> dict[Hashable, list[Identifier]]:
"""Return a mapping from an author query string to the resolved merged IDs.
) -> dict[str, list[PersonID]]:
"""Return a mapping from a person query string to their PersonIDs.

Merged IDs are looked up in the identity table and will be omitted
for any person that has not yet been transformed and indexed there.
PersonIDs are looked up in the identity provider and will be omitted
for any person that has not yet been assigned an `Identity` there.

Args:
persons_with_query: Iterable of LDP persons with query
primary_source: Primary source for LDAP

Returns:
Mapping from `LDAPPersonWithQuery.query` to corresponding
`Identity.stableTargetId`
Mapping from `LDAPPersonWithQuery.query` to corresponding PersonIDs
"""
merged_ids_by_attribute = defaultdict(list)
provider = get_provider()
Expand All @@ -100,7 +99,7 @@ def get_merged_ids_by_query_string(
had_primary_source=primary_source.stableTargetId,
identifier_in_primary_source=str(person_with_query.person.objectGUID),
):
merged_ids_by_attribute[person_with_query.query].append(
Identifier(identities[0].stableTargetId)
merged_ids_by_attribute[str(person_with_query.query)].append(
PersonID(identities[0].stableTargetId)
)
return cast(dict[Hashable, list[Identifier]], merged_ids_by_attribute)
return merged_ids_by_attribute
4 changes: 2 additions & 2 deletions mex/common/organigram/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def get_unit_merged_ids_by_synonyms(
Mapping from unit synonyms to stableTargetIds
"""
return {
synonym: extracted_unit.stableTargetId
synonym: OrganizationalUnitID(extracted_unit.stableTargetId)
for extracted_unit in extracted_units
for synonym in _get_synonyms(extracted_unit)
}
Expand All @@ -79,7 +79,7 @@ def get_unit_merged_ids_by_emails(
Mapping from lowercased `email` to stableTargetIds
"""
return {
email.lower(): extracted_unit.stableTargetId
email.lower(): OrganizationalUnitID(extracted_unit.stableTargetId)
for extracted_unit in extracted_units
for email in extracted_unit.email
}
2 changes: 1 addition & 1 deletion mex/common/types/timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def _parse_timestamp(value: "Timestamp") -> tuple[datetime, TimestampPrecision]:
def _parse_string(value: str) -> tuple[datetime, TimestampPrecision]:
"""Parse a string containing a timestamp using pandas' tslibs."""
parsed, precision = parsing.parse_datetime_string_with_reso( # type: ignore[attr-defined]
value, freq=None, dayfirst=False, yearfirst=True
str(value), freq=None, dayfirst=False, yearfirst=True
)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=CET)
Expand Down
Loading