diff --git a/pdf_compressor/__init__.py b/pdf_compressor/__init__.py index 995deeb..ebd6c59 100644 --- a/pdf_compressor/__init__.py +++ b/pdf_compressor/__init__.py @@ -1,7 +1,7 @@ from importlib.metadata import PackageNotFoundError, version from pdf_compressor.ilovepdf import Compress, ILovePDF, Task -from pdf_compressor.main import DEFAULT_SUFFIX, main +from pdf_compressor.main import DEFAULT_SUFFIX, compress, main from pdf_compressor.utils import si_fmt try: diff --git a/pdf_compressor/main.py b/pdf_compressor/main.py index a589410..230be77 100644 --- a/pdf_compressor/main.py +++ b/pdf_compressor/main.py @@ -5,7 +5,7 @@ from argparse import ArgumentParser from glob import glob from importlib.metadata import version -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from pdf_compressor.ilovepdf import Compress, ILovePDF from pdf_compressor.utils import ROOT, del_or_keep_compressed, load_dotenv @@ -14,6 +14,12 @@ from collections.abc import Sequence DEFAULT_SUFFIX = "-compressed" +API_KEY_KEY = "ILOVEPDF_PUBLIC_KEY" +MISSING_API_KEY_ERR = KeyError( + "pdf-compressor needs an iLovePDF public key to access its API. Set one " + "with pdf-compressor --set-api-key project_public_7af905e... or as environment " + f"variable {API_KEY_KEY}" +) def main(argv: Sequence[str] | None = None) -> int: @@ -110,7 +116,7 @@ def main(argv: Sequence[str] | None = None) -> int: ) parser.add_argument( - "--write-stats", + "--write-stats-path", type=str, default="", help="File path to write a CSV, Excel or other pandas supported file format " @@ -123,24 +129,20 @@ def main(argv: Sequence[str] | None = None) -> int: ) args = parser.parse_args(argv) - if api_key := args.set_api_key: - if not api_key.startswith("project_public_"): + if new_key := args.set_api_key: + if not new_key.startswith("project_public_"): raise ValueError( - f"invalid API key, must start with 'project_public_', got {api_key=}" + f"invalid API key, must start with 'project_public_', got {new_key=}" ) with open(f"{ROOT}/.env", "w+", encoding="utf8") as file: - file.write(f"ILOVEPDF_PUBLIC_KEY={api_key}\n") + file.write(f"ILOVEPDF_PUBLIC_KEY={new_key}\n") return 0 load_dotenv() - - if not (api_key := os.environ["ILOVEPDF_PUBLIC_KEY"]): - raise ValueError( - "pdf-compressor needs an iLovePDF public key to access its API. Set one " - "with pdf-compressor --set-api-key project_public_7af905e..." - ) + if not (api_key := os.environ[API_KEY_KEY]): + raise MISSING_API_KEY_ERR if args.report_quota: remaining_files = ILovePDF(api_key).get_quota() @@ -149,14 +151,60 @@ def main(argv: Sequence[str] | None = None) -> int: return 0 - if not (args.inplace or args.suffix): + return compress(**vars(args)) + + +def compress( + filenames: Sequence[str], + inplace: bool = False, + suffix: str = DEFAULT_SUFFIX, + compression_level: str = "recommended", + min_size_reduction: int | None = None, + debug: bool = False, + verbose: bool = False, + on_no_files: str = "ignore", + on_bad_files: str = "error", + write_stats_path: str = "", + **kwargs: Any, # noqa: ARG001 +) -> int: + """Compress PDFs using iLovePDF's API. + + Args: + filenames (list[str]): List of PDF files to compress. + inplace (bool): Whether to compress PDFs in place. + suffix (str): String to append to the filename of compressed PDFs. + compression_level (str): How hard to squeeze the file size. + min_size_reduction (int): How much compressed files need to be smaller than + originals (in percent) for them to be kept. + debug (bool): When true, iLovePDF won't process the request but will output the + parameters received by the server. + verbose (bool): When true, progress will be reported while tasks are running. + on_no_files (str): What to do when no input PDFs received. + on_bad_files (str): How to behave when receiving input files that don't appear + to be PDFs. + write_stats_path (str): File path to write a CSV, Excel or other pandas + supported file format with original vs compressed file sizes and actions + taken on each input file + **kwargs: Additional keywords are ignored. + + Returns: + int: 0 if successful, else error code. + """ + if min_size_reduction is None: + min_size_reduction = 10 if inplace else 0 + + load_dotenv() + if not (api_key := os.environ[API_KEY_KEY]): + raise MISSING_API_KEY_ERR + + if not (inplace or suffix): raise ValueError( "Files must either be compressed in-place (--inplace) or you must specify a" " non-empty suffix to append to the name of compressed files." ) # use set() to ensure no duplicate files - files: list[str] = sorted({f.replace("\\", "/").strip() for f in args.filenames}) + files: list[str] = sorted({fn.replace("\\", "/").strip() for fn in filenames}) # for each directory received glob for all PDFs in it for filepath in files: if os.path.isdir(filepath): @@ -166,30 +214,30 @@ def main(argv: Sequence[str] | None = None) -> int: pdfs = [f for f in files if re.match(r".*\.pdf[ax]?\s*$", f.lower())] not_pdfs = {*files} - {*pdfs} - if args.on_bad_files == "error" and len(not_pdfs) > 0: + if on_bad_files == "error" and len(not_pdfs) > 0: raise ValueError( f"Input files must be PDFs, got {len(not_pdfs):,} files with unexpected " f"extension: {', '.join(not_pdfs)}" ) - if args.on_bad_files == "warn" and len(not_pdfs) > 0: + if on_bad_files == "warn" and len(not_pdfs) > 0: print( f"Warning: Got {len(not_pdfs):,} input files without '.pdf' " f"extension: {', '.join(not_pdfs)}" ) - if args.verbose: + if verbose: if len(pdfs) > 0: print(f"PDFs to be compressed with iLovePDF: {len(pdfs):,}") else: print("Nothing to do: received no input PDF files.") if len(pdfs) == 0: - if args.on_no_files == "error": + if on_no_files == "error": raise ValueError("No input files provided") return 0 - task = Compress(api_key, compression_level=args.compression_level, debug=args.debug) - task.verbose = args.verbose + task = Compress(api_key, compression_level=compression_level, debug=debug) + task.verbose = verbose for pdf in pdfs: task.add_file(pdf) @@ -200,14 +248,14 @@ def main(argv: Sequence[str] | None = None) -> int: task.delete_current_task() - min_size_red = args.min_size_reduction or (10 if args.inplace else 0) + min_size_red = min_size_reduction or (10 if inplace else 0) - if not args.debug: + if not debug: stats = del_or_keep_compressed( - pdfs, downloaded_file, args.inplace, args.suffix, min_size_red, args.verbose + pdfs, downloaded_file, inplace, suffix, min_size_red, verbose ) - if args.write_stats: + if write_stats_path: try: import pandas as pd except ImportError: @@ -216,16 +264,16 @@ def main(argv: Sequence[str] | None = None) -> int: df_stats = pd.DataFrame(stats).T df_stats.index.name = "file" - stats_path = args.write_stats.strip().lower() - - if ".csv" in stats_path: - df_stats.to_csv(args.write_stats, float_format="%.4f") - elif ".xlsx" in stats_path or ".xls" in stats_path: - df_stats.to_excel(args.write_stats, float_format="%.4f") - elif ".json" in stats_path: - df_stats.to_json(args.write_stats) - elif ".html" in stats_path: - df_stats.to_html(args.write_stats, float_format="%.4f") + stats_path_lower = write_stats_path.strip().lower() + + if ".csv" in stats_path_lower: + df_stats.to_csv(write_stats_path, float_format="%.4f") + elif ".xlsx" in stats_path_lower or ".xls" in stats_path_lower: + df_stats.to_excel(write_stats_path, float_format="%.4f") + elif ".json" in stats_path_lower: + df_stats.to_json(write_stats_path) + elif ".html" in stats_path_lower: + df_stats.to_html(write_stats_path, float_format="%.4f") return 0 diff --git a/pdf_compressor/utils.py b/pdf_compressor/utils.py index 52b5042..85ef32b 100644 --- a/pdf_compressor/utils.py +++ b/pdf_compressor/utils.py @@ -13,9 +13,7 @@ ROOT = dirname(dirname(abspath(__file__))) -def si_fmt( - val: float, binary: bool = True, fmt_spec: str = ".1f", sep: str = "" -) -> str: +def si_fmt(val: float, binary: bool = True, fmt: str = ".1f", sep: str = "") -> str: """Convert large numbers into human readable format using SI prefixes in binary (1024) or metric (1000) mode. @@ -25,7 +23,7 @@ def si_fmt( val (int | float): Some numerical value to format. binary (bool, optional): If True, scaling factor is 2^10 = 1024 else 1000. Defaults to True. - fmt_spec (str): f-string format specifier. Configure precision and left/right + fmt (str): f-string format specifier. Configure precision and left/right padding in returned string. Defaults to ".1f". Can be used to ensure leading or trailing whitespace for shorter numbers. Ex.1: ">10.2f" has 2 decimal places and is at least 10 characters long with leading spaces if necessary. @@ -52,7 +50,7 @@ def si_fmt( break val *= factor - return f"{val:{fmt_spec}}{sep}{_scale}" + return f"{val:{fmt}}{sep}{_scale}" def load_dotenv(filepath: str | None = None) -> None: @@ -64,16 +62,14 @@ def load_dotenv(filepath: str | None = None) -> None: if filepath is None: filepath = os.path.join(f"{ROOT}", ".env") - if not isfile(filepath) or getsize(filepath) == 0: - return + if isfile(filepath): + with open(filepath, encoding="utf8") as dotenv: + for line in dotenv: + if line.startswith("#"): + continue - with open(filepath, encoding="utf8") as dotenv: - for line in dotenv: - if line.startswith("#"): - continue - - key, val = line.replace("\n", "").split("=") - os.environ[key] = val + key, val = line.replace("\n", "").split("=") + os.environ[key] = val def del_or_keep_compressed( @@ -151,7 +147,12 @@ def del_or_keep_compressed( print("Old file moved to trash.") orig_file_name = os.path.split(orig_path)[1] - os.rename(orig_path, f"{expanduser('~')}/.Trash/{orig_file_name}") + trash_path = f"{expanduser('~')}/.Trash/{orig_file_name}" + if isfile(trash_path): # if file with same name already in trash, + # delete it to avoid PermissionError: [Errno 1] Operation not + # permitted + os.remove(trash_path) + os.rename(orig_path, trash_path) else: print("Old file deleted.") diff --git a/pyproject.toml b/pyproject.toml index b0651d3..3c253b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ Package = "https://pypi.org/project/pdf-compressor" [project.optional-dependencies] test = ["pytest", "pytest-cov"] -stats = ["pandas"] # needed for --write-stats option +stats = ["pandas"] # needed for --write-stats-path option [project.scripts] pdf-compressor = "pdf_compressor:main" diff --git a/tests/test_main.py b/tests/test_main.py index cbd48d7..5414bc6 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -11,6 +11,7 @@ from pytest import CaptureFixture from pdf_compressor import DEFAULT_SUFFIX, main +from pdf_compressor.main import API_KEY_KEY from pdf_compressor.utils import load_dotenv if TYPE_CHECKING: @@ -32,7 +33,9 @@ def test_main_batch_compress(tmp_path: Path, capsys: CaptureFixture[str]) -> Non # add input_path twice to test how we handle duplicate input files stats_path = f"{tmp_path}/stats.csv" - ret_code = main([input_path, input_path, input_path_2, "--write-stats", stats_path]) + ret_code = main( + [input_path, input_path, input_path_2, "--write-stats-path", stats_path] + ) assert ret_code == 0, f"expected main() exit code to be 0, got {ret_code}" # check stats file was written and has expected content @@ -104,7 +107,7 @@ def test_main_set_api_key() -> None: """Test CLI setting iLovePDF public API key.""" load_dotenv() - api_key = os.environ["ILOVEPDF_PUBLIC_KEY"] # save API key to reset it later + api_key = os.environ[API_KEY_KEY] # save API key to reset it later with pytest.raises(ValueError, match="invalid API key"): main(["--set-api-key", "foo"]) @@ -113,7 +116,7 @@ def test_main_set_api_key() -> None: load_dotenv() - assert os.environ["ILOVEPDF_PUBLIC_KEY"] == "project_public_foobar" + assert os.environ[API_KEY_KEY] == "project_public_foobar" main(["--set-api-key", api_key]) # restore previous value diff --git a/tests/test_utils.py b/tests/test_utils.py index 9dcc48c..ef6d791 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,8 +6,8 @@ def test_si_fmt() -> None: assert si_fmt(123456) == "120.6K" - assert si_fmt(12345678, fmt_spec=">6.2f", sep=" ") == " 11.77 M" + assert si_fmt(12345678, fmt=">6.2f", sep=" ") == " 11.77 M" - assert si_fmt(0.00123, fmt_spec=".3g", binary=False) == "1.23m" + assert si_fmt(0.00123, fmt=".3g", binary=False) == "1.23m" - assert si_fmt(0.00000123, fmt_spec="5.1f", sep=" ") == " 1.3 μ" + assert si_fmt(0.00000123, fmt="5.1f", sep=" ") == " 1.3 μ"