diff --git a/src/packg/__init__.py b/src/packg/__init__.py index ac8ab57..bad276b 100644 --- a/src/packg/__init__.py +++ b/src/packg/__init__.py @@ -2,4 +2,4 @@ from .misc import format_exception __all__ = ["Const", "format_exception"] -__version__ = "0.22.2" +__version__ = "0.22.3" diff --git a/src/packg/constclass.py b/src/packg/constclass.py index e95a365..0e79435 100644 --- a/src/packg/constclass.py +++ b/src/packg/constclass.py @@ -79,6 +79,14 @@ class Const(metaclass=InstanceToClassDelegator): # create the class properties with empty entries for the root parent _dict: Dict[str, Dict[str, Any]] = {"Const": {}} + @classmethod + def values_list(cls) -> List[Any]: + return list(cls.values()) + + @classmethod + def keys_list(cls) -> List[str]: + return list(cls.keys()) + @classmethod def _get_dict(cls): return cls._dict[cls.__name__] diff --git a/src/packg/iotools/__init__.py b/src/packg/iotools/__init__.py index b1d4005..0a266aa 100644 --- a/src/packg/iotools/__init__.py +++ b/src/packg/iotools/__init__.py @@ -28,6 +28,12 @@ ) from .misc import ( set_working_directory, + set_working_directory, + get_file_size, + get_file_size_in_mb, + format_b_in_mb, + format_b_in_gb, + format_bytes_human_readable, ) from .pathspec_matcher import ( make_git_pathspec, @@ -37,6 +43,7 @@ make_and_apply_pathspecs, make_pathspecs, PathSpecWithConversion, + PathSpecRepr, ) from .yamlext import load_yaml, loads_yaml, dump_yaml, dumps_yaml @@ -74,4 +81,11 @@ "apply_pathspecs", "make_and_apply_pathspecs", "make_pathspecs", + "set_working_directory", + "get_file_size", + "get_file_size_in_mb", + "format_b_in_mb", + "format_b_in_gb", + "format_bytes_human_readable", + "PathSpecRepr", ] diff --git a/src/packg/multiproc/multiproc_worker.py b/src/packg/multiproc/multiproc_worker.py index afb0cec..4f3cc92 100644 --- a/src/packg/multiproc/multiproc_worker.py +++ b/src/packg/multiproc/multiproc_worker.py @@ -7,16 +7,16 @@ # todo move the main() example outside of this file from __future__ import annotations +from multiprocessing import Process, Queue + import random import time -from multiprocessing import Process, Queue +from attr import define, field +from loguru import logger from timeit import default_timer from traceback import format_exception from typing import Optional -from attr import define, field -from loguru import logger - from packg.dtime import format_seconds_adaptive from packg.log import configure_logger from packg.tqdmext import tqdm_max_ncols @@ -149,6 +149,8 @@ def get(self): def close(self): # note: output queue has to be empty before workers can be joined otherwise this will hang logger.debug(f"Joining workers") + for w in self.worker_list: + w.close() for p in self.process_list: p.join() self.update_pbar(1) @@ -222,6 +224,9 @@ def multi_fn_no_output( pbar.update(1) pbar.close() + def close(self): + pass + class ExampleWorkerWithOutput(Worker): def setup(self): diff --git a/src/packg/strings/__init__.py b/src/packg/strings/__init__.py index b8b95f1..e904b44 100644 --- a/src/packg/strings/__init__.py +++ b/src/packg/strings/__init__.py @@ -11,3 +11,4 @@ from .hasher import hash_object from .quote_urlparse import quote_with_urlparse, unquote_with_urlparse from .tabul import format_pseudo_table +from .formatters import clean_string_for_filename, dict_to_str_comma_equals diff --git a/src/packg/strings/formatters.py b/src/packg/strings/formatters.py index fe5f56d..98fc7e7 100644 --- a/src/packg/strings/formatters.py +++ b/src/packg/strings/formatters.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + def dict_to_str_comma_equals(in_dict: dict[str, any] | list[tuple[any, any]]): try: @@ -10,3 +12,20 @@ def dict_to_str_comma_equals(in_dict: dict[str, any] | list[tuple[any, any]]): for k, v in items: str_list.append(f"{k}={v}") return ", ".join(str_list) + + +def clean_string_for_filename(input_str): + """ + Clean string to make it less annoying when used as a filename. + + Replace spaces and some other special characters with underscores, + then remove any multiple underscores and underscore at the beginning or end. + """ + output_str = input_str + output_str = output_str.replace('"', "_") + output_str = re.sub(r"\s+", "_", output_str) + output_str = re.sub(r"[()\[\]{}&*?!<>|\\/:;\"']", "_", output_str) + output_str = re.sub(r"_+", "_", output_str) + output_str = re.sub(r"^_+", "", output_str) + output_str = re.sub(r"_+$", "", output_str) + return output_str diff --git a/src/packg/system/systemcall.py b/src/packg/system/systemcall.py index 5cee796..e9397ec 100644 --- a/src/packg/system/systemcall.py +++ b/src/packg/system/systemcall.py @@ -1,11 +1,11 @@ """Helper to run system commands and process their output.""" import subprocess -from typing import Tuple, Optional +from typing import Tuple, Optional, Union def systemcall( - call: str, verbose: bool = False, decode: Optional[str] = "utf-8", shell: bool = True + call: Union[str, list[str]], verbose: bool = False, decode: Optional[str] = "utf-8", shell: bool = True ) -> Tuple[str, str, int]: """Run a command with subprocess.Popen and process the output. This call is synchronous so output will only returned once the command is done. @@ -53,7 +53,7 @@ def assert_command_worked(errmsg: str, cmd: str, out: str, err: str, retcode: in def systemcall_with_assert( - call: str, + call: Union[str, list[str]], errmsg: str = "none", verbose: bool = False, decode: Optional[str] = "utf-8", diff --git a/src/packg/web/robust_request.py b/src/packg/web/robust_request.py new file mode 100644 index 0000000..4df5c45 --- /dev/null +++ b/src/packg/web/robust_request.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import requests +import time +from typing import Optional + + +def send_robust_post_request( + url: str, + data: Optional[any] = None, + json: Optional[any] = None, + n_trials: int = 3, + n_retry_sleep: int = 5, + print_fn: callable = print, + **kwargs, +) -> requests.Response: + """ + Sends a robust POST request with retries and error logging. + + Parameters: + url: The URL for the POST request. + data: dict, list of tuples, bytes, or file-like object to send in the body of the Request + json: A JSON serializable Python object to send in the body of the Request + n_trials: Number of retry attempts. + n_retry_sleep: Seconds to wait between retries. + print_fn: Function to use for logging. + **kwargs: Optional arguments that the request takes, e.g., the headers dict + + Returns: + requests.Response: The response object if successful. + + Raises: + Exception: If all retries fail. + """ + for attempt in range(1, n_trials + 1): + try: + response = requests.post(url, data=data, json=json, **kwargs) + if response.status_code == 200: + return response + else: + print_fn( + f"Attempt {attempt}/{n_trials}: Query failed with status code " + f"{response.status_code}. Response: {response.text}" + ) + except requests.exceptions.RequestException as e: + print_fn(f"Attempt {attempt}/{n_trials}: Request failed with exception: {e}") + + if attempt < n_trials: + print_fn(f"Retrying in {n_retry_sleep} seconds...") + time.sleep(n_retry_sleep) + + raise Exception(f"Failed to send POST request to {url} after {n_trials} attempts.")