diff --git a/CHANGELOG.md b/CHANGELOG.md index 72973411..256f6f25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changes +- make ruff linter config opt-out, instead of opt-in + ### Deprecated ### Removed diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py index 94731488..2ee350c1 100644 --- a/mex/common/backend_api/connector.py +++ b/mex/common/backend_api/connector.py @@ -145,7 +145,7 @@ def get_merged_item( Returns: A single merged item """ - # XXX stop-gap until the backend has a proper get merged item endpoint (MX-1669) + # TODO(ND): stop-gap until backend has proper get merged item endpoint (MX-1669) response = self.request( method="GET", endpoint="merged-item", @@ -158,7 +158,8 @@ def get_merged_item( try: return response_model.items[0] except IndexError: - raise HTTPError("merged item was not found") from None + msg = "merged item was not found" + raise HTTPError(msg) from None def preview_merged_item( self, diff --git a/mex/common/cli.py b/mex/common/cli.py index d9424f18..d17a529e 100644 --- a/mex/common/cli.py +++ b/mex/common/cli.py @@ -137,13 +137,13 @@ def _callback( func() except (Abort, BdbQuit, Exit, KeyboardInterrupt): # pragma: no cover context.exit(130) - except Exception as error: + except Exception: # an error occurred, let's print the traceback logger.error(click.style(format_exc(), fg="red")) if settings.debug: # pragma: no cover # if we are in debug mode, jump into interactive debugging. pdb.post_mortem(sys.exc_info()[2]) - raise error + raise # if not in debug mode, exit with code 1. logger.error("exit") context.exit(1) diff --git a/mex/common/connector/http.py b/mex/common/connector/http.py index e6e1ef12..716de1de 100644 --- a/mex/common/connector/http.py +++ b/mex/common/connector/http.py @@ -5,7 +5,7 @@ import backoff import requests -from requests import HTTPError, RequestException, Response +from requests import HTTPError, RequestException, Response, codes from mex.common.connector import BaseConnector from mex.common.settings import BaseSettings @@ -30,7 +30,7 @@ def _set_session(self) -> None: """Create and set request session.""" settings = BaseSettings.get() self.session = requests.Session() - self.session.verify = settings.verify_session # type: ignore + self.session.verify = settings.verify_session # type: ignore[assignment] def _set_authentication(self) -> None: """Authenticate to the host.""" @@ -94,23 +94,25 @@ def request( response=response, ) from error - if response.status_code == 204: + if response.status_code == codes.no_content: return {} return cast(dict[str, Any], response.json()) @backoff.on_predicate( backoff.fibo, - lambda response: cast(Response, response).status_code >= 500, + lambda response: cast(Response, response).status_code + >= codes.internal_server_error, max_tries=4, ) @backoff.on_predicate( backoff.fibo, - lambda response: cast(Response, response).status_code == 429, + lambda response: cast(Response, response).status_code + == codes.too_many_requests, max_tries=10, ) @backoff.on_predicate( backoff.fibo, - lambda response: cast(Response, response).status_code == 403, + lambda response: cast(Response, response).status_code == codes.forbidden, max_tries=10, ) @backoff.on_exception(backoff.fibo, RequestException, max_tries=6) diff --git a/mex/common/context.py b/mex/common/context.py index ba6707cf..96857f38 100644 --- a/mex/common/context.py +++ b/mex/common/context.py @@ -51,11 +51,12 @@ def load(self, cls: type[_SingletonT]) -> _SingletonT: self._singleton = cls() return self._singleton if not issubclass(type(self._singleton), cls): - raise RuntimeError( + msg = ( f"requested class ({cls}) is not a parent class of loaded class " f"({type(self._singleton)}). " f"Did you initialize {cls} upon startup?" ) + raise RuntimeError(msg) # noqa: TRY004 return self._singleton def push(self, instance: _SingletonT) -> None: diff --git a/mex/common/extract.py b/mex/common/extract.py index 688806ad..cb9755c2 100644 --- a/mex/common/extract.py +++ b/mex/common/extract.py @@ -59,10 +59,12 @@ def parse_csv( ) as reader: for chunk in reader: for index, row in chunk.iterrows(): - row.replace(to_replace=np.nan, value=None, inplace=True) - row.replace(regex=r"^\s*$", value=None, inplace=True) try: - model = into.model_validate(row.to_dict()) + model = into.model_validate( + row.replace(to_replace=np.nan, value=None) + .replace(regex=r"^\s*$", value=None) + .to_dict() + ) logger.info( "parse_csv - %s %s - OK", into.__name__, diff --git a/mex/common/identity/registry.py b/mex/common/identity/registry.py index b133ea6e..0525aa9f 100644 --- a/mex/common/identity/registry.py +++ b/mex/common/identity/registry.py @@ -21,7 +21,8 @@ def register_provider(key: Hashable, provider_cls: type[BaseProvider]) -> None: RuntimeError: When the `key` is already registered """ if key in _PROVIDER_REGISTRY: - raise RuntimeError(f"Already registered identity provider: {key}") + msg = f"Already registered identity provider: {key}" + raise RuntimeError(msg) _PROVIDER_REGISTRY[key] = provider_cls @@ -41,9 +42,8 @@ def get_provider() -> BaseProvider: if settings.identity_provider in _PROVIDER_REGISTRY: provider_cls = _PROVIDER_REGISTRY[settings.identity_provider] return provider_cls.get() - raise RuntimeError( - f"Identity provider not implemented: {settings.identity_provider}" - ) + msg = f"Identity provider not implemented: {settings.identity_provider}" + raise RuntimeError(msg) # register the default providers shipped with mex-common diff --git a/mex/common/ldap/connector.py b/mex/common/ldap/connector.py index debaf93b..6f93e716 100644 --- a/mex/common/ldap/connector.py +++ b/mex/common/ldap/connector.py @@ -43,7 +43,8 @@ def __init__(self) -> None: ) self._connection = connection.__enter__() if not self._is_service_available(): - raise MExError(f"LDAP service not available at url: {host}:{port}") + msg = f"LDAP service not available at url: {host}:{port}" + raise MExError(msg) def _is_service_available(self) -> bool: try: @@ -197,15 +198,17 @@ def get_functional_account( ) ) if not functional_accounts: - raise EmptySearchResultError( + msg = ( "Cannot find AD functional account for filters " f"'objectGUID: {objectGUID}, {filters}'" ) + raise EmptySearchResultError(msg) if len(functional_accounts) > 1: - raise FoundMoreThanOneError( + msg = ( "Found multiple AD functional accounts for filters " f"'objectGUID: {objectGUID}, {filters}'" ) + raise FoundMoreThanOneError(msg) return functional_accounts[0] def get_person( @@ -235,15 +238,17 @@ def get_person( ) ) if not persons: - raise EmptySearchResultError( + msg = ( f"Cannot find AD person for filters 'objectGUID: {objectGUID}, " f"employeeID: {employeeID}, {filters}'" ) + raise EmptySearchResultError(msg) if len(persons) > 1: - raise FoundMoreThanOneError( + msg = ( f"Found multiple AD persons for filters 'objectGUID: {objectGUID}, " f"employeeID: {employeeID}, {filters}'" ) + raise FoundMoreThanOneError(msg) return persons[0] def get_unit(self, **filters: str) -> LDAPUnit: @@ -260,9 +265,9 @@ def get_unit(self, **filters: str) -> LDAPUnit: """ units = list(self.get_units(**filters)) if not units: - raise EmptySearchResultError(f"Cannot find AD unit for filters '{filters}'") + msg = f"Cannot find AD unit for filters '{filters}'" + raise EmptySearchResultError(msg) if len(units) > 1: - raise FoundMoreThanOneError( - f"Found multiple AD units for filters '{filters}'" - ) + msg = f"Found multiple AD units for filters '{filters}'" + raise FoundMoreThanOneError(msg) return units[0] diff --git a/mex/common/ldap/extract.py b/mex/common/ldap/extract.py index 89854dd0..7fa371e2 100644 --- a/mex/common/ldap/extract.py +++ b/mex/common/ldap/extract.py @@ -28,7 +28,8 @@ def _get_merged_ids_by_attribute( MergedPersonIdentifiers """ if attribute not in LDAPPerson.model_fields: - raise RuntimeError(f"Not a valid LDAPPerson field: {attribute}") + msg = f"Not a valid LDAPPerson field: {attribute}" + raise RuntimeError(msg) merged_ids_by_attribute = defaultdict(list) provider = get_provider() for person in persons: diff --git a/mex/common/ldap/transform.py b/mex/common/ldap/transform.py index 106c361d..29364450 100644 --- a/mex/common/ldap/transform.py +++ b/mex/common/ldap/transform.py @@ -100,15 +100,16 @@ def transform_ldap_person_to_mex_person( if d and (unit := units_by_identifier_in_primary_source.get(d.lower())) ] if not member_of: - raise MExError( + msg = ( "No unit or department found for LDAP department " f"'{ldap_person.department}' or departmentNumber " f"'{ldap_person.departmentNumber}'" ) + raise MExError(msg) return ExtractedPerson( identifierInPrimarySource=str(ldap_person.objectGUID), hadPrimarySource=primary_source.stableTargetId, - affiliation=[], # TODO resolve organization for person.company/RKI + affiliation=[], # TODO(HS): resolve organization for person.company/RKI email=ldap_person.mail, familyName=[ldap_person.sn], fullName=[ldap_person.displayName] if ldap_person.displayName else [], @@ -184,11 +185,11 @@ def analyse_person_string(string: str) -> list[PersonName]: return [name for strings in split for name in analyse_person_string(strings)] # split on comma if there is more than one - if len(split := re.split(r",", string)) > 2: + if len(split := re.split(r",", string)) > 2: # noqa: PLR2004 return [name for strings in split for name in analyse_person_string(strings)] # split on single commas only if there are more than three words - if len(split := re.split(r",", string)) == 2 and string.strip().count(" ") > 2: + if len(split := re.split(r",", string)) == 2 and string.strip().count(" ") > 2: # noqa: PLR2004 return [name for strings in split for name in analyse_person_string(strings)] # split into surname and given name @@ -209,7 +210,7 @@ def analyse_person_string(string: str) -> list[PersonName]: return [PersonName(surname=split[0], full_name=full_name)] # return surname and given name - if len(split) == 2: + if len(split) == 2: # noqa: PLR2004 return [PersonName(surname=split[1], given_name=split[0], full_name=full_name)] # found no one diff --git a/mex/common/models/base/model.py b/mex/common/models/base/model.py index e5ac4802..7092d140 100644 --- a/mex/common/models/base/model.py +++ b/mex/common/models/base/model.py @@ -143,7 +143,8 @@ def _convert_list_to_non_list(cls, field_name: str, value: list[Any]) -> Any: # if we have just one entry, we can safely unpack it return value[0] # we cannot unambiguously unpack more than one value - raise ValueError(f"got multiple values for {field_name}") + msg = f"got multiple values for {field_name}" + raise ValueError(msg) @classmethod def _fix_value_listyness_for_field(cls, field_name: str, value: Any) -> Any: @@ -186,10 +187,11 @@ def verify_computed_field_consistency( if not isinstance(data, MutableMapping): # data is not a dictionary: we can't "pop" values from that, # so we can't safely do a before/after comparison - raise AssertionError( + msg = ( "Input should be a valid dictionary, validating other types is not " "supported for models with computed fields." ) + raise AssertionError(msg) # noqa: TRY004 custom_values = { field: value for field in cls.model_computed_fields @@ -198,7 +200,8 @@ def verify_computed_field_consistency( result = handler(data) computed_values = result.model_dump(include=set(custom_values)) if computed_values != custom_values: - raise ValueError("Cannot set computed fields to custom values!") + msg = "Cannot set computed fields to custom values!" + raise ValueError(msg) return result @model_validator(mode="wrap") @@ -223,9 +226,9 @@ def fix_listyness(cls, data: Any, handler: ValidatorFunctionWrapHandler) -> Any: Returns: data with fixed list shapes """ - # XXX This needs to be a "wrap" validator that is defined *after* the computed - # field model validator, so it runs *before* the computed field validator. - # Sigh, see https://github.com/pydantic/pydantic/discussions/7434 + # TODO(ND): This needs to be a "wrap" validator that is defined *after* the + # computed field model validator, so it runs *before* the computed field + # validator. Sigh, see https://github.com/pydantic/pydantic/discussions/7434 if isinstance(data, MutableMapping): for name, value in data.items(): field_name = cls._get_alias_lookup().get(name, name) diff --git a/mex/common/models/consent.py b/mex/common/models/consent.py index 3e62ee0b..f7c6e233 100644 --- a/mex/common/models/consent.py +++ b/mex/common/models/consent.py @@ -1,3 +1,2 @@ -# XXX this is a forward-compatibility hint for feature/model-update-v3: -# when this gets merged with model v3, remove the -# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields +# TODO(ND): when this gets merged with feature/model-update-v3, remove the +# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields diff --git a/mex/common/organigram/transform.py b/mex/common/organigram/transform.py index e3957740..829bb347 100644 --- a/mex/common/organigram/transform.py +++ b/mex/common/organigram/transform.py @@ -42,7 +42,7 @@ def transform_organigram_units_to_organizational_units( for extracted_unit in extracted_unit_by_id_in_primary_source.values(): identifier_in_primary_source = extracted_unit.identifierInPrimarySource - if ( + if ( # noqa: SIM102 parent_identifier_in_primary_source := parent_id_in_primary_source_by_id_in_primary_source.get( identifier_in_primary_source diff --git a/mex/common/settings.py b/mex/common/settings.py index fbdd7db4..e5fd3b6d 100644 --- a/mex/common/settings.py +++ b/mex/common/settings.py @@ -208,7 +208,7 @@ def get_env_name(cls, name: str) -> str: case_sensitive=cls.model_config.get("case_sensitive", False), env_prefix=cls.model_config.get("env_prefix", ""), ) - env_info = env_settings._extract_field_info(field, name) + env_info = env_settings._extract_field_info(field, name) # noqa: SLF001 return env_info[0][1].upper() @model_validator(mode="after") diff --git a/mex/common/sinks/ndjson.py b/mex/common/sinks/ndjson.py index 565101e7..619b5b63 100644 --- a/mex/common/sinks/ndjson.py +++ b/mex/common/sinks/ndjson.py @@ -35,7 +35,7 @@ def write_ndjson( handle = file_handles[class_name] except KeyError: file_name = Path(settings.work_dir, f"{class_name}.ndjson") - writer = open(file_name, "a+", encoding="utf-8") + writer = open(file_name, "a+", encoding="utf-8") # noqa: SIM115 file_handles[class_name] = handle = stack.enter_context(writer) logger.info( "write_ndjson - writing %s to file %s", diff --git a/mex/common/transform.py b/mex/common/transform.py index 521421a6..e394894e 100644 --- a/mex/common/transform.py +++ b/mex/common/transform.py @@ -16,7 +16,7 @@ class MExEncoder(json.JSONEncoder): """Custom JSON encoder that can handle pydantic models, enums and UUIDs.""" - def default(self, obj: Any) -> Any: + def default(self, obj: Any) -> Any: # noqa: PLR0911 """Implement custom serialization rules.""" if isinstance(obj, PydanticModel): return obj.model_dump() @@ -106,7 +106,9 @@ def to_key_and_values(dct: dict[str, Any]) -> Iterable[tuple[str, list[Any]]]: """Return an iterable of dictionary items where the values are always lists.""" for key, value in dct.items(): if value is None: - value = [] - elif not isinstance(value, list): - value = [value] - yield key, value + list_of_values = [] + elif isinstance(value, list): + list_of_values = value + else: + list_of_values = [value] + yield key, list_of_values diff --git a/mex/common/types/email.py b/mex/common/types/email.py index 3a98df1b..32128d7c 100644 --- a/mex/common/types/email.py +++ b/mex/common/types/email.py @@ -9,6 +9,8 @@ class Email(str): """Email address of a person, organization or other entity.""" + __slots__ = () + @classmethod def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py index c2537dee..483793c3 100644 --- a/mex/common/types/identifier.py +++ b/mex/common/types/identifier.py @@ -12,6 +12,8 @@ class Identifier(str): """Common identifier class based on UUID version 4.""" + __slots__ = () + @classmethod def generate(cls, seed: int | None = None) -> Self: """Generate a new identifier from a seed or random UUID version 4.""" diff --git a/mex/common/types/link.py b/mex/common/types/link.py index 7ed49d54..0fefddeb 100644 --- a/mex/common/types/link.py +++ b/mex/common/types/link.py @@ -42,7 +42,8 @@ def validate_strings(cls, value: Any) -> dict[str, Any]: return {"url": value} if isinstance(value, dict): return value - raise ValueError(f"Allowed input types are dict and str, got {type(value)}") + msg = f"Allowed input types are dict and str, got {type(value)}" + raise ValueError(msg) def __hash__(self) -> int: """Return the hash of this link.""" diff --git a/mex/common/types/path.py b/mex/common/types/path.py index 6a1539d1..1dd93363 100644 --- a/mex/common/types/path.py +++ b/mex/common/types/path.py @@ -16,7 +16,7 @@ def __init__(self, path: Union[str, Path, "PathWrapper"]) -> None: if isinstance(path, str): path = Path(path) elif isinstance(path, PathWrapper): - path = path._path + path = path._path # noqa: SLF001 self._path = path @classmethod @@ -52,7 +52,8 @@ def __eq__(self, other: object) -> bool: """Return true for two PathWrappers with equal paths.""" if isinstance(other, PathWrapper): return self._path.__eq__(other._path) - raise TypeError(f"Can't compare {type(other)} with {type(self)}") + msg = f"Can't compare {type(other)} with {type(self)}" + raise TypeError(msg) def is_absolute(self) -> bool: """True if the underlying path is absolute.""" diff --git a/mex/common/types/temporal_entity.py b/mex/common/types/temporal_entity.py index 3f591db0..f116d9cf 100644 --- a/mex/common/types/temporal_entity.py +++ b/mex/common/types/temporal_entity.py @@ -59,6 +59,7 @@ class TemporalEntityPrecision(Enum): YEAR_MONTH_DAY_TIME_REGEX = r"^[1-9]\d{3}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$" YEAR_MONTH_DAY_REGEX = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" YEAR_MONTH_REGEX = r"^(?:\d{4}|(?:\d{4}-(?:0[1-9]|1[0-2])))$" +MAX_DATETIME_ARGUMENTS = 7 @total_ordering @@ -70,9 +71,7 @@ class TemporalEntity: precision: TemporalEntityPrecision date_time: datetime STR_SCHEMA_PATTERN = TEMPORAL_ENTITY_REGEX - ALLOWED_PRECISION_LEVELS = [ - key for key in TemporalEntityPrecision.__members__.values() - ] + ALLOWED_PRECISION_LEVELS = list(TemporalEntityPrecision.__members__.values()) JSON_SCHEMA_CONFIG: dict[str, str | list[str]] = { "examples": [ "2011", @@ -98,7 +97,7 @@ def __init__( tzinfo: tzinfo | None = None, ) -> None: ... # pragma: no cover - def __init__( + def __init__( # noqa: PLR0912 self, *args: Union[int, str, date, datetime, "TemporalEntity"], precision: TemporalEntityPrecision | None = None, @@ -123,18 +122,19 @@ def __init__( TemporalEntity(2009, 9, 30, 23, 59, 5, tzinfo=timezone("CET")) TemporalEntity(TemporalEntity(2000)) """ - if len(args) > 7: - raise TypeError( - f"Temporal entity takes at most 7 arguments ({len(args)} given)" + if len(args) > MAX_DATETIME_ARGUMENTS: + msg = ( + f"Temporal entity takes at most {MAX_DATETIME_ARGUMENTS} arguments " + f"({len(args)} given)" ) + raise TypeError(msg) if len(args) == 1 and isinstance( args[0], str | date | datetime | TemporalEntity ): if tzinfo: - raise TypeError( - "Temporal entity does not accept tzinfo in parsing mode" - ) + msg = "Temporal entity does not accept tzinfo in parsing mode" + raise TypeError(msg) if isinstance(args[0], TemporalEntity): date_time, parsed_precision = self._parse_temporal_entity(args[0]) elif isinstance(args[0], datetime): @@ -147,10 +147,11 @@ def __init__( args = cast(tuple[int, ...], args) date_time, parsed_precision = self._parse_integers(*args, tzinfo=tzinfo) else: - raise TypeError( + msg = ( "Temporal entity takes a single str, date, datetime or " "TemporalEntity argument or up to 7 integers" ) + raise TypeError(msg) if precision: self._validate_precision(precision) @@ -227,7 +228,7 @@ def _parse_integers( if tzinfo is None: tzinfo = CET padded = tuple(a or d for a, d in zip_longest(args, (1970, 1, 1, 0, 0, 0, 0))) - date_time = datetime(*padded, tzinfo=tzinfo) # type: ignore + date_time = datetime(*padded, tzinfo=tzinfo) # type: ignore[arg-type,misc] precision = TEMPORAL_ENTITY_PRECISIONS_BY_ARG_LENGTH[len(args)] return date_time, precision @@ -267,12 +268,14 @@ def _parse_date( value: date, ) -> tuple[datetime, TemporalEntityPrecision]: """Parse a date and assume the precision is days.""" - return datetime(value.year, value.month, value.day), TemporalEntityPrecision.DAY + return datetime( + value.year, value.month, value.day, tzinfo=CET + ), TemporalEntityPrecision.DAY - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: """Return whether the given other value is the same as this one.""" try: - other_temporal = TemporalEntity(other) + other_temporal = TemporalEntity(other) # type: ignore[call-overload] except TypeError: return False return bool( diff --git a/mex/common/types/text.py b/mex/common/types/text.py index 14f55f8f..ad4965b9 100644 --- a/mex/common/types/text.py +++ b/mex/common/types/text.py @@ -52,7 +52,8 @@ def validate_strings(cls, value: Any) -> dict[str, Any]: return {"value": value} if isinstance(value, dict): return value - raise ValueError(f"Allowed input types are dict and str, got {type(value)}") + msg = f"Allowed input types are dict and str, got {type(value)}" + raise ValueError(msg) def __hash__(self) -> int: """Return the hash of Text.""" diff --git a/mex/common/utils.py b/mex/common/utils.py index 2638077f..c507d54d 100644 --- a/mex/common/utils.py +++ b/mex/common/utils.py @@ -25,10 +25,7 @@ def contains_any(base: Container[T], tokens: Iterable[T]) -> bool: """Check if a given base contains any of the given tokens.""" - for token in tokens: - if token in base: - return True - return False + return any(token in base for token in tokens) def any_contains_any(bases: Iterable[Container[T] | None], tokens: Iterable[T]) -> bool: diff --git a/mex/common/wikidata/connector.py b/mex/common/wikidata/connector.py index a055b744..42f7cce6 100644 --- a/mex/common/wikidata/connector.py +++ b/mex/common/wikidata/connector.py @@ -1,8 +1,11 @@ from functools import cache +from typing import cast from mex.common.connector.http import HTTPConnector from mex.common.settings import BaseSettings +_PROPS = "info|aliases|labels|descriptions|datatype|claims|sitelinks|sitelinks/urls" + class WikidataQueryServiceConnector(HTTPConnector): """Connector class to handle requesting the Wikidata Query Service.""" @@ -34,10 +37,8 @@ def get_data_by_query(self, query: str) -> list[dict[str, dict[str, str]]]: "User-Agent": f"{settings.mex_web_user_agent}", "Api-User-Agent": f"{settings.mex_web_user_agent}", } - results = self.request("GET", params=params, headers=headers) - - return results["results"]["bindings"] # type: ignore + return cast(list[dict[str, dict[str, str]]], results["results"]["bindings"]) class WikidataAPIConnector(HTTPConnector): @@ -69,18 +70,7 @@ def get_wikidata_item_details_by_id(self, item_id: str) -> dict[str, str]: "action": "wbgetentities", "format": "json", "ids": item_id, - "props": "|".join( - [ - "info", - "aliases", - "labels", - "descriptions", - "datatype", - "claims", - "sitelinks", - "sitelinks/urls", - ] - ), + "props": _PROPS, "formatversion": "2", } headers = { @@ -88,4 +78,4 @@ def get_wikidata_item_details_by_id(self, item_id: str) -> dict[str, str]: "Api-User-Agent": f"{settings.mex_web_user_agent}", } results = self.request("GET", params=params, headers=headers) - return results["entities"][item_id] # type: ignore + return cast(dict[str, str], results["entities"][item_id]) diff --git a/mex/common/wikidata/extract.py b/mex/common/wikidata/extract.py index 1aa1e5ff..e60a560f 100644 --- a/mex/common/wikidata/extract.py +++ b/mex/common/wikidata/extract.py @@ -50,8 +50,9 @@ def search_organization_by_label( try: wd_item_id = results[0]["item"]["value"].split("/")[-1] - except KeyError as exc: - raise MExError(f"KeyError: Error processing results for {item_label}") from exc + except KeyError as error: + msg = f"KeyError: Error processing results for {item_label}" + raise MExError(msg) from error return _get_organization_details(wd_item_id) @@ -134,14 +135,12 @@ def search_organizations_by_label( for item in results: try: wd_item_id = item["item"]["value"].split("/")[-1] - except KeyError as exc: - raise MExError( - f"KeyError: Error processing results for {item_label}" - ) from exc - except IndexError as exc: - raise MExError( - f"IndexError: Error processing results for {item_label}" - ) from exc + except KeyError as error: + msg = f"KeyError: Error processing results for {item_label}" + raise MExError(msg) from error + except IndexError as error: + msg = f"IndexError: Error processing results for {item_label}" + raise MExError(msg) from error yield _get_organization_details(wd_item_id) diff --git a/pdm.lock b/pdm.lock index fcea341a..862a02cf 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:5b087c16b56627dbf2610d4bfb4bfc424949852d2f4c4487ea9b3d02ed1a0e6e" +content_hash = "sha256:e4266229127d28efe577af078ad22dd37650ba3323b215876c04fe81b42d87d3" [[metadata.targets]] requires_python = "==3.11.*" diff --git a/pyproject.toml b/pyproject.toml index 93b45aa1..0f22a4e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,17 +13,17 @@ dependencies = [ "langdetect>=1.0.9,<2", "ldap3>=2.9.1,<3", "mex-model@git+https://github.com/robert-koch-institut/mex-model.git@2.5.0", - "numpy>=2.0.1,<3", - "pandas>=2.2.2,<3", + "numpy>=2.1.2,<3", + "pandas>=2.2.3,<3", "pyarrow>=17.0.0,<18", - "pydantic-settings>=2.4.0,<3", - "pydantic>=2.8.2,<3", + "pydantic-settings>=2.5.2,<3", + "pydantic>=2.9.2,<3", "pytz>=2024.1,<2024.2", "requests>=2.32.3,<3", ] optional-dependencies.dev = [ "ipdb>=0.13.13,<1", - "pandas-stubs>=2.2.2,<3", + "pandas-stubs>=2.2.3,<3", "mypy>=1.11.2,<2", "pytest-cov>=5.0.0,<6", "pytest-random-order>=1.1.1,<2", @@ -97,49 +97,57 @@ docstring-code-format = true [tool.ruff.lint] ignore = [ - "D100", # Allow missing module docstring for brevity - "D104", # Allow missing package docstring for brevity - "D106", # Allow missing nested class docstring (eg pydantic Config) - "D203", # Disallow blank line before class docstring (inverse of D211) - "D213", # Disallow multi-line docstring starting at second line (inverse of D212) - "D406", # Allow section name ending with newline (google style compat) - "D407", # Allow missing dashed underline after section (google style compat) - "D413", # Allow missing blank line after last section (google style compat) - "N805", # Allow first argument of a method to be non-self (pydantic compat) - "N815", # Allow mixedCase variables in class scope (model compat) - "RUF012", # Allow mutable class attributes (pydantic compat) -] -select = [ - "A", # Flake8 builtin shaddow - "B", # BugBear bug and issue finder - "C90", # McCabe complexity checker - "D", # Python docstring style checker - "E", # Python code style errors - "ERA", # Commented-out code detector - "F", # Pyflakes passive python checker - "I", # Isort import utility - "N", # Pep8 naming conventions - "PERF", # Lint performance anti-patterns - "RET", # Flake8 return statement checker - "RUF", # Ruff-specific rules - "S", # Bandit automated security testing - "T10", # Flake8 debug statement checker - "T20", # Flake8 print statement checker - "UP", # PyUpgrade syntax recommender - "W", # Python code style warnings + "AIR", # Disable airflow specific rules (we are not using airflow) + "ANN", # Disable all annotations checks (handled by mypy) + "COM", # Disable flake8-commas checks (let ruff format handle that) + "CPY", # Disable copyright notice checks (we have LICENSE files) + "D100", # Allow missing module docstring (for brevity and speed) + "D104", # Allow missing package docstring (for brevity and speed) + "D203", # Disallow blank line before class docstring (inverse of D211) + "D213", # Disallow multi-line docstring starting at second line (inverse of D212) + "D406", # Allow section name ending with newline (google style compat) + "D407", # Allow missing dashed underline after section (google style compat) + "D413", # Allow missing blank line after last section (google style compat) + "DJ", # Disable django specific checks (we are not using django) + "FBT", # Disable boolean type hint checks (for more flexibility) + "FIX", # Allow committing with open TODOs (don't punish committers) + "N805", # Allow first argument of a method to be non-self (pydantic compat) + "N815", # Allow mixedCase variables in class scope (model compat) + "PTH123", # Allow using builtin open method (simpler than pathlib) + "RUF012", # Allow mutable class attributes (pydantic compat) + "SIM108", # Allow explicit if-else instead of ternary (easier to read) + "TD003", # Allow TODOs without ticket link (don't punish TODO writers) + "TRY003", # Allow long exception message at the raise site (for pydantic) ] +select = ["ALL"] [tool.ruff.lint.per-file-ignores] +"docs/**" = [ + "INP001", # Docs do not need to be a package +] +"mex/common/testing/**" = [ + "ARG001", # Allow unused function arguments for pytest plugin +] "tests/**" = [ - "D101", # Allow missing docstring in public class for tests - "D102", # Allow missing docstring in public method for tests - "D103", # Allow missing docstring in public function for tests - "D107", # Allow missing docstring in `__init__` for tests - "E501", # Allow line too long in tests - "N807", # Allow mocking `__init__` for tests - "S101", # Allow use of `assert` in tests + "ARG005", # Allow unused lambda arguments for mocking + "D101", # Allow missing docstring in public class + "D102", # Allow missing docstring in public method + "D103", # Allow missing docstring in public function + "D107", # Allow missing docstring in `__init__` + "E501", # Allow longer lines with test data + "ISC", # Allow implicitly concatenated strings + "N807", # Allow mocking `__init__` + "PLR0915", # Allow functions with many statements + "PLR2004", # Allow comparing with static values + "PT004", # Allow public fixtures without returns + "PT013", # Allow more flexible pytest imports + "S101", # Allow use of `assert` in tests + "SLF", # Allow private member access ] +[tool.ruff.lint.flake8-import-conventions.extend-aliases] +"reflex" = "rx" + [tool.ruff.lint.isort] known-first-party = ["mex", "tests"] diff --git a/tests/backend_api/conftest.py b/tests/backend_api/conftest.py index 6096e441..e2be02de 100644 --- a/tests/backend_api/conftest.py +++ b/tests/backend_api/conftest.py @@ -6,7 +6,7 @@ from mex.common.backend_api.connector import BackendApiConnector -@pytest.fixture() +@pytest.fixture def mocked_backend(monkeypatch: MonkeyPatch) -> MagicMock: """Return the mocked request dispatch method of backend connector.""" mocked_send_request = MagicMock( diff --git a/tests/conftest.py b/tests/conftest.py index 34573fe3..511a5189 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -64,7 +64,7 @@ def merged_person() -> MergedPerson: ) -@pytest.fixture() +@pytest.fixture def rule_set_request() -> PersonRuleSetRequest: """Return a dummy person rule set request for testing purposes.""" return PersonRuleSetRequest( @@ -74,7 +74,7 @@ def rule_set_request() -> PersonRuleSetRequest: ) -@pytest.fixture() +@pytest.fixture def rule_set_response() -> PersonRuleSetResponse: """Return a dummy person rule set response for testing purposes.""" return PersonRuleSetResponse( diff --git a/tests/identity/test_memory.py b/tests/identity/test_memory.py index dad73272..41e10a4e 100644 --- a/tests/identity/test_memory.py +++ b/tests/identity/test_memory.py @@ -26,32 +26,32 @@ def test_assign() -> None: new_identity = provider.assign(had_primary_source, identifier_in_primary_source) - assert new_identity.model_dump() == dict( - hadPrimarySource=had_primary_source, - identifierInPrimarySource=identifier_in_primary_source, - stableTargetId=Joker(), - identifier=Joker(), - ) + assert new_identity.model_dump() == { + "hadPrimarySource": had_primary_source, + "identifierInPrimarySource": identifier_in_primary_source, + "stableTargetId": Joker(), + "identifier": Joker(), + } found_identity = provider.assign(had_primary_source, identifier_in_primary_source) - assert found_identity.model_dump() == dict( - hadPrimarySource=had_primary_source, - identifierInPrimarySource=identifier_in_primary_source, - stableTargetId=new_identity.stableTargetId, - identifier=new_identity.identifier, - ) + assert found_identity.model_dump() == { + "hadPrimarySource": had_primary_source, + "identifierInPrimarySource": identifier_in_primary_source, + "stableTargetId": new_identity.stableTargetId, + "identifier": new_identity.identifier, + } provider.close() provider = MemoryIdentityProvider.get() fresh_identity = provider.assign(had_primary_source, identifier_in_primary_source) - assert fresh_identity.model_dump() == dict( - hadPrimarySource=had_primary_source, - identifierInPrimarySource=identifier_in_primary_source, - stableTargetId=new_identity.stableTargetId, - identifier=new_identity.identifier, - ) + assert fresh_identity.model_dump() == { + "hadPrimarySource": had_primary_source, + "identifierInPrimarySource": identifier_in_primary_source, + "stableTargetId": new_identity.stableTargetId, + "identifier": new_identity.identifier, + } def test_fetch_empty() -> None: diff --git a/tests/ldap/conftest.py b/tests/ldap/conftest.py index d137e03c..edea5fcb 100644 --- a/tests/ldap/conftest.py +++ b/tests/ldap/conftest.py @@ -12,43 +12,43 @@ LDAPMocker = Callable[[PagedSearchResults], None] -SAMPLE_PERSON_ATTRS = dict( - company=["RKI"], - department=["XY"], - departmentNumber=["XY2"], - displayName=["Sample, Sam"], - employeeID=["1024"], - givenName=["Sam"], - mail=["SampleS@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000000}"], - ou=["XY"], - sAMAccountName=["SampleS"], - sn=["Sample"], -) +SAMPLE_PERSON_ATTRS = { + "company": ["RKI"], + "department": ["XY"], + "departmentNumber": ["XY2"], + "displayName": ["Sample, Sam"], + "employeeID": ["1024"], + "givenName": ["Sam"], + "mail": ["SampleS@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000000}"], + "ou": ["XY"], + "sAMAccountName": ["SampleS"], + "sn": ["Sample"], +} -XY_DEPARTMENT_ATTRS = dict( - mail=["XY@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000042}"], - sAMAccountName=["XY"], -) +XY_DEPARTMENT_ATTRS = { + "mail": ["XY@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000042}"], + "sAMAccountName": ["XY"], +} -XY2_DEPARTMENT_ATTRS = dict( - mail=["XY2@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000043}"], - sAMAccountName=["XY2"], -) +XY2_DEPARTMENT_ATTRS = { + "mail": ["XY2@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000043}"], + "sAMAccountName": ["XY2"], +} -XY_FUNC_ACCOUNT_ATTRS = dict( - mail=["XY@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000044}"], - sAMAccountName=["XY"], -) +XY_FUNC_ACCOUNT_ATTRS = { + "mail": ["XY@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000044}"], + "sAMAccountName": ["XY"], +} -XY2_FUNC_ACCOUNT_ATTRS = dict( - mail=["XY2@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000045}"], - sAMAccountName=["XY2"], -) +XY2_FUNC_ACCOUNT_ATTRS = { + "mail": ["XY2@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000045}"], + "sAMAccountName": ["XY2"], +} @pytest.fixture @@ -60,7 +60,7 @@ def __init__(self: LDAPConnector) -> None: self._connection = MagicMock(spec=Connection, extend=Mock()) self._connection.extend.standard.paged_search = MagicMock( side_effect=[ - [dict(attributes=e) for e in entries] for entries in results + [{"attributes": e} for e in entries] for entries in results ] ) diff --git a/tests/models/test_base.py b/tests/models/test_base.py index f008e28e..76b8994b 100644 --- a/tests/models/test_base.py +++ b/tests/models/test_base.py @@ -106,8 +106,8 @@ def test_base_model_listyness_fix( ) -> None: try: model = ComplexDummyModel.model_validate(data) - except Exception as error: - assert str(expected) in str(error) + except Exception as error: # noqa: BLE001 + assert str(expected) in str(error) # noqa: PT017 else: actual = model.model_dump() for key, value in expected.items(): @@ -125,7 +125,7 @@ class Shelter(Pet): with pytest.raises( ValidationError, match="Input should be a valid dictionary or instance of Pet" ): - Shelter(inhabitants="foo") # type: ignore + Shelter(inhabitants="foo") # type: ignore[call-arg] class Computer(BaseModel): diff --git a/tests/test_cli.py b/tests/test_cli.py index 1385a417..7ade9a37 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,6 +16,8 @@ class MyStr(str): """Dummy string subclass for _field_to_option test.""" + __slots__ = () + class MyEnum(Enum): """Dummy enum class for _field_to_option test.""" @@ -110,7 +112,10 @@ class MyEnum(Enum): create_model( "OptionalFlagSettings", __base__=BaseSettings, - optional_flag=(bool, Field(False, description="This flag is optional")), + optional_flag=( + bool, + Field(default=False, description="This flag is optional"), + ), ), { "name": "optional_flag", @@ -169,7 +174,7 @@ class MyEnum(Enum): __base__=BaseSettings, union_field=( bool | str, - Field(True, description="String or boolean"), + Field(default=True, description="String or boolean"), ), ), { diff --git a/tests/test_utils.py b/tests/test_utils.py index aa3b047d..d379aea7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -55,7 +55,7 @@ def test_any_contains_any(base: Any, tokens: Iterable[Any], expected: bool) -> N @pytest.mark.parametrize( ("annotation", "types", "expected"), - ( + [ (None, [str], False), (str, [str], True), (str, [Identifier], False), @@ -63,7 +63,7 @@ def test_any_contains_any(base: Any, tokens: Iterable[Any], expected: bool) -> N (list[str | int | list[str]], [str, float], False), (list[str | int | list[str]], [int, str], True), (MergedPersonIdentifier | None, MERGED_IDENTIFIER_CLASSES, True), - ), + ], ids=[ "static None", "simple str", @@ -85,7 +85,7 @@ class DummyModel(BaseModel): @pytest.mark.parametrize( ("annotation", "flags", "expected_types"), - ( + [ (str, {}, [str]), (None, {}, [NoneType]), (None, {"include_none": False}, []), @@ -103,7 +103,7 @@ class DummyModel(BaseModel): {"unpack_literal": False}, [Literal["okay"], NoneType], ), - ), + ], ids=[ "string", "None allowing None", @@ -141,7 +141,7 @@ class PseudoModel(BaseModel): @pytest.mark.parametrize( ("string", "expected"), - (("", ""), ("__XYZ__", "xyz"), ("/foo/BAR$42", "foo bar 42")), + [("", ""), ("__XYZ__", "xyz"), ("/foo/BAR$42", "foo bar 42")], ) def test_normalize(string: str, expected: str) -> None: assert normalize(string) == expected diff --git a/tests/testing/test_joker.py b/tests/testing/test_joker.py index 715fc40e..3af85287 100644 --- a/tests/testing/test_joker.py +++ b/tests/testing/test_joker.py @@ -2,7 +2,7 @@ def test_joker_eq() -> None: - assert Joker() == None # noqa + assert Joker() == None # noqa: E711 assert Joker() == 1 assert {"foo": Joker()} == {"foo": ["bar", Joker()]} diff --git a/tests/types/test_temporal_entity.py b/tests/types/test_temporal_entity.py index 53d7dd42..978fc773 100644 --- a/tests/types/test_temporal_entity.py +++ b/tests/types/test_temporal_entity.py @@ -20,7 +20,7 @@ ("args", "kwargs", "message"), [ ( - (datetime.now(),), + (datetime.now(tz=UTC),), {"tzinfo": UTC}, "Temporal entity does not accept tzinfo in parsing mode", ), @@ -155,7 +155,7 @@ def test_temporal_entity_value_errors( ), ( TemporalEntity, - (datetime(2020, 3, 22, 14, 30, 58),), + (datetime(2020, 3, 22, 14, 30, 58, tzinfo=CET),), {}, 'TemporalEntity("2020-03-22T13:30:58Z")', ), @@ -177,19 +177,19 @@ def test_temporal_entity_value_errors( ), ( YearMonthDayTime, - (YearMonthDayTime(2004, 11, 21, 19, 59, tzinfo=timezone("UTC")),), + (YearMonthDayTime(2004, 11, 21, 19, 59, tzinfo=UTC),), {}, 'YearMonthDayTime("2004-11-21T19:59:00Z")', ), ( TemporalEntity, - (datetime(2004, 11, 19, 00, 00),), + (datetime(2004, 11, 19, 00, 00, tzinfo=CET),), {"precision": TemporalEntityPrecision.DAY}, 'TemporalEntity("2004-11-19")', ), ( YearMonth, - (datetime(2004, 11, 19, 00, 00),), + (datetime(2004, 11, 19, 00, 00, tzinfo=CET),), {"precision": TemporalEntityPrecision.YEAR}, 'YearMonth("2004")', ), @@ -226,8 +226,8 @@ def test_temporal_entity_eq() -> None: assert TemporalEntity(2004) == TemporalEntity("2004") assert TemporalEntity(2004, 11) == TemporalEntity(2004, 11) assert TemporalEntity(2004, 11, 2) == "2004-11-02" - assert TemporalEntity(2020, 3, 22, 14, 30, 58, 0) == datetime( - 2020, 3, 22, 14, 30, 58, 0 + assert TemporalEntity(2020, 3, 22, 14, 30, 58, 0, tzinfo=UTC) == datetime( + 2020, 3, 22, 14, 30, 58, 0, tzinfo=UTC ) assert TemporalEntity(2005) != object() @@ -236,7 +236,9 @@ def test_temporal_entity_gt() -> None: assert TemporalEntity(2004) > TemporalEntity("2003") assert TemporalEntity(2004, 11) < "2013-10-02" assert TemporalEntity(2004, 11) <= TemporalEntity(2004, 12) - assert TemporalEntity(2020, 3, 22, 14, 30, 58) >= datetime(2020, 3, 22, 14, 29) + assert TemporalEntity(2020, 3, 22, 14, 30, 58, tzinfo=UTC) >= datetime( + 2020, 3, 22, 14, 29, tzinfo=UTC + ) with pytest.raises(NotImplementedError): assert TemporalEntity(2005) > object()