Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split main() into compress() and leaner main() function #23

Merged
merged 5 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pdf_compressor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from importlib.metadata import PackageNotFoundError, version

from pdf_compressor.ilovepdf import Compress, ILovePDF, Task
from pdf_compressor.main import DEFAULT_SUFFIX, main
from pdf_compressor.main import DEFAULT_SUFFIX, compress, main
from pdf_compressor.utils import si_fmt

try:
Expand Down
116 changes: 82 additions & 34 deletions pdf_compressor/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from argparse import ArgumentParser
from glob import glob
from importlib.metadata import version
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

from pdf_compressor.ilovepdf import Compress, ILovePDF
from pdf_compressor.utils import ROOT, del_or_keep_compressed, load_dotenv
Expand All @@ -14,6 +14,12 @@
from collections.abc import Sequence

DEFAULT_SUFFIX = "-compressed"
API_KEY_KEY = "ILOVEPDF_PUBLIC_KEY"
MISSING_API_KEY_ERR = KeyError(
"pdf-compressor needs an iLovePDF public key to access its API. Set one "
"with pdf-compressor --set-api-key project_public_7af905e... or as environment "
f"variable {API_KEY_KEY}"
)


def main(argv: Sequence[str] | None = None) -> int:
Expand Down Expand Up @@ -110,7 +116,7 @@ def main(argv: Sequence[str] | None = None) -> int:
)

parser.add_argument(
"--write-stats",
"--write-stats-path",
type=str,
default="",
help="File path to write a CSV, Excel or other pandas supported file format "
Expand All @@ -123,24 +129,20 @@ def main(argv: Sequence[str] | None = None) -> int:
)
args = parser.parse_args(argv)

if api_key := args.set_api_key:
if not api_key.startswith("project_public_"):
if new_key := args.set_api_key:
if not new_key.startswith("project_public_"):
raise ValueError(
f"invalid API key, must start with 'project_public_', got {api_key=}"
f"invalid API key, must start with 'project_public_', got {new_key=}"
)

with open(f"{ROOT}/.env", "w+", encoding="utf8") as file:
file.write(f"ILOVEPDF_PUBLIC_KEY={api_key}\n")
file.write(f"ILOVEPDF_PUBLIC_KEY={new_key}\n")

return 0

load_dotenv()

if not (api_key := os.environ["ILOVEPDF_PUBLIC_KEY"]):
raise ValueError(
"pdf-compressor needs an iLovePDF public key to access its API. Set one "
"with pdf-compressor --set-api-key project_public_7af905e..."
)
if not (api_key := os.environ[API_KEY_KEY]):
raise MISSING_API_KEY_ERR

if args.report_quota:
remaining_files = ILovePDF(api_key).get_quota()
Expand All @@ -149,14 +151,60 @@ def main(argv: Sequence[str] | None = None) -> int:

return 0

if not (args.inplace or args.suffix):
return compress(**vars(args))


def compress(
filenames: Sequence[str],
inplace: bool = False,
suffix: str = DEFAULT_SUFFIX,
compression_level: str = "recommended",
min_size_reduction: int | None = None,
debug: bool = False,
verbose: bool = False,
on_no_files: str = "ignore",
on_bad_files: str = "error",
write_stats_path: str = "",
**kwargs: Any, # noqa: ARG001
) -> int:
"""Compress PDFs using iLovePDF's API.

Args:
filenames (list[str]): List of PDF files to compress.
inplace (bool): Whether to compress PDFs in place.
suffix (str): String to append to the filename of compressed PDFs.
compression_level (str): How hard to squeeze the file size.
min_size_reduction (int): How much compressed files need to be smaller than
originals (in percent) for them to be kept.
debug (bool): When true, iLovePDF won't process the request but will output the
parameters received by the server.
verbose (bool): When true, progress will be reported while tasks are running.
on_no_files (str): What to do when no input PDFs received.
on_bad_files (str): How to behave when receiving input files that don't appear
to be PDFs.
write_stats_path (str): File path to write a CSV, Excel or other pandas
supported file format with original vs compressed file sizes and actions
taken on each input file
**kwargs: Additional keywords are ignored.

Returns:
int: 0 if successful, else error code.
"""
if min_size_reduction is None:
min_size_reduction = 10 if inplace else 0

load_dotenv()
if not (api_key := os.environ[API_KEY_KEY]):
raise MISSING_API_KEY_ERR

if not (inplace or suffix):
raise ValueError(
"Files must either be compressed in-place (--inplace) or you must specify a"
" non-empty suffix to append to the name of compressed files."
)

# use set() to ensure no duplicate files
files: list[str] = sorted({f.replace("\\", "/").strip() for f in args.filenames})
files: list[str] = sorted({fn.replace("\\", "/").strip() for fn in filenames})
# for each directory received glob for all PDFs in it
for filepath in files:
if os.path.isdir(filepath):
Expand All @@ -166,30 +214,30 @@ def main(argv: Sequence[str] | None = None) -> int:
pdfs = [f for f in files if re.match(r".*\.pdf[ax]?\s*$", f.lower())]
not_pdfs = {*files} - {*pdfs}

if args.on_bad_files == "error" and len(not_pdfs) > 0:
if on_bad_files == "error" and len(not_pdfs) > 0:
raise ValueError(
f"Input files must be PDFs, got {len(not_pdfs):,} files with unexpected "
f"extension: {', '.join(not_pdfs)}"
)
if args.on_bad_files == "warn" and len(not_pdfs) > 0:
if on_bad_files == "warn" and len(not_pdfs) > 0:
print(
f"Warning: Got {len(not_pdfs):,} input files without '.pdf' "
f"extension: {', '.join(not_pdfs)}"
)

if args.verbose:
if verbose:
if len(pdfs) > 0:
print(f"PDFs to be compressed with iLovePDF: {len(pdfs):,}")
else:
print("Nothing to do: received no input PDF files.")

if len(pdfs) == 0:
if args.on_no_files == "error":
if on_no_files == "error":
raise ValueError("No input files provided")
return 0

task = Compress(api_key, compression_level=args.compression_level, debug=args.debug)
task.verbose = args.verbose
task = Compress(api_key, compression_level=compression_level, debug=debug)
task.verbose = verbose

for pdf in pdfs:
task.add_file(pdf)
Expand All @@ -200,14 +248,14 @@ def main(argv: Sequence[str] | None = None) -> int:

task.delete_current_task()

min_size_red = args.min_size_reduction or (10 if args.inplace else 0)
min_size_red = min_size_reduction or (10 if inplace else 0)

if not args.debug:
if not debug:
stats = del_or_keep_compressed(
pdfs, downloaded_file, args.inplace, args.suffix, min_size_red, args.verbose
pdfs, downloaded_file, inplace, suffix, min_size_red, verbose
)

if args.write_stats:
if write_stats_path:
try:
import pandas as pd
except ImportError:
Expand All @@ -216,16 +264,16 @@ def main(argv: Sequence[str] | None = None) -> int:

df_stats = pd.DataFrame(stats).T
df_stats.index.name = "file"
stats_path = args.write_stats.strip().lower()

if ".csv" in stats_path:
df_stats.to_csv(args.write_stats, float_format="%.4f")
elif ".xlsx" in stats_path or ".xls" in stats_path:
df_stats.to_excel(args.write_stats, float_format="%.4f")
elif ".json" in stats_path:
df_stats.to_json(args.write_stats)
elif ".html" in stats_path:
df_stats.to_html(args.write_stats, float_format="%.4f")
stats_path_lower = write_stats_path.strip().lower()

if ".csv" in stats_path_lower:
df_stats.to_csv(write_stats_path, float_format="%.4f")
elif ".xlsx" in stats_path_lower or ".xls" in stats_path_lower:
df_stats.to_excel(write_stats_path, float_format="%.4f")
elif ".json" in stats_path_lower:
df_stats.to_json(write_stats_path)
elif ".html" in stats_path_lower:
df_stats.to_html(write_stats_path, float_format="%.4f")

return 0

Expand Down
31 changes: 16 additions & 15 deletions pdf_compressor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
ROOT = dirname(dirname(abspath(__file__)))


def si_fmt(
val: float, binary: bool = True, fmt_spec: str = ".1f", sep: str = ""
) -> str:
def si_fmt(val: float, binary: bool = True, fmt: str = ".1f", sep: str = "") -> str:
"""Convert large numbers into human readable format using SI prefixes in binary
(1024) or metric (1000) mode.

Expand All @@ -25,7 +23,7 @@ def si_fmt(
val (int | float): Some numerical value to format.
binary (bool, optional): If True, scaling factor is 2^10 = 1024 else 1000.
Defaults to True.
fmt_spec (str): f-string format specifier. Configure precision and left/right
fmt (str): f-string format specifier. Configure precision and left/right
padding in returned string. Defaults to ".1f". Can be used to ensure leading
or trailing whitespace for shorter numbers. Ex.1: ">10.2f" has 2 decimal
places and is at least 10 characters long with leading spaces if necessary.
Expand All @@ -52,7 +50,7 @@ def si_fmt(
break
val *= factor

return f"{val:{fmt_spec}}{sep}{_scale}"
return f"{val:{fmt}}{sep}{_scale}"


def load_dotenv(filepath: str | None = None) -> None:
Expand All @@ -64,16 +62,14 @@ def load_dotenv(filepath: str | None = None) -> None:
if filepath is None:
filepath = os.path.join(f"{ROOT}", ".env")

if not isfile(filepath) or getsize(filepath) == 0:
return
if isfile(filepath):
with open(filepath, encoding="utf8") as dotenv:
for line in dotenv:
if line.startswith("#"):
continue

with open(filepath, encoding="utf8") as dotenv:
for line in dotenv:
if line.startswith("#"):
continue

key, val = line.replace("\n", "").split("=")
os.environ[key] = val
key, val = line.replace("\n", "").split("=")
os.environ[key] = val


def del_or_keep_compressed(
Expand Down Expand Up @@ -151,7 +147,12 @@ def del_or_keep_compressed(
print("Old file moved to trash.")
orig_file_name = os.path.split(orig_path)[1]

os.rename(orig_path, f"{expanduser('~')}/.Trash/{orig_file_name}")
trash_path = f"{expanduser('~')}/.Trash/{orig_file_name}"
if isfile(trash_path): # if file with same name already in trash,
# delete it to avoid PermissionError: [Errno 1] Operation not
# permitted
os.remove(trash_path)
os.rename(orig_path, trash_path)
else:
print("Old file deleted.")

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Package = "https://pypi.org/project/pdf-compressor"

[project.optional-dependencies]
test = ["pytest", "pytest-cov"]
stats = ["pandas"] # needed for --write-stats option
stats = ["pandas"] # needed for --write-stats-path option

[project.scripts]
pdf-compressor = "pdf_compressor:main"
Expand Down
9 changes: 6 additions & 3 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pytest import CaptureFixture

from pdf_compressor import DEFAULT_SUFFIX, main
from pdf_compressor.main import API_KEY_KEY
from pdf_compressor.utils import load_dotenv

if TYPE_CHECKING:
Expand All @@ -32,7 +33,9 @@ def test_main_batch_compress(tmp_path: Path, capsys: CaptureFixture[str]) -> Non

# add input_path twice to test how we handle duplicate input files
stats_path = f"{tmp_path}/stats.csv"
ret_code = main([input_path, input_path, input_path_2, "--write-stats", stats_path])
ret_code = main(
[input_path, input_path, input_path_2, "--write-stats-path", stats_path]
)
assert ret_code == 0, f"expected main() exit code to be 0, got {ret_code}"

# check stats file was written and has expected content
Expand Down Expand Up @@ -104,7 +107,7 @@ def test_main_set_api_key() -> None:
"""Test CLI setting iLovePDF public API key."""
load_dotenv()

api_key = os.environ["ILOVEPDF_PUBLIC_KEY"] # save API key to reset it later
api_key = os.environ[API_KEY_KEY] # save API key to reset it later

with pytest.raises(ValueError, match="invalid API key"):
main(["--set-api-key", "foo"])
Expand All @@ -113,7 +116,7 @@ def test_main_set_api_key() -> None:

load_dotenv()

assert os.environ["ILOVEPDF_PUBLIC_KEY"] == "project_public_foobar"
assert os.environ[API_KEY_KEY] == "project_public_foobar"

main(["--set-api-key", api_key]) # restore previous value

Expand Down
6 changes: 3 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
def test_si_fmt() -> None:
assert si_fmt(123456) == "120.6K"

assert si_fmt(12345678, fmt_spec=">6.2f", sep=" ") == " 11.77 M"
assert si_fmt(12345678, fmt=">6.2f", sep=" ") == " 11.77 M"

assert si_fmt(0.00123, fmt_spec=".3g", binary=False) == "1.23m"
assert si_fmt(0.00123, fmt=".3g", binary=False) == "1.23m"

assert si_fmt(0.00000123, fmt_spec="5.1f", sep=" ") == " 1.3 μ"
assert si_fmt(0.00000123, fmt="5.1f", sep=" ") == " 1.3 μ"