freedomofpress · deeplow · Nov 14, 2022 · Sep 20, 2022 · Nov 1, 2022 · Sep 20, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,8 @@
 - Feature: Re-add Fedora 37 support
 - Feature: Add Debian Bookworm (12) support
 - Reinstate Ubuntu Focal support ([issue #206](https://github.com/freedomofpress/dangerzone/issues/206))
+- Feature: support multiple input documents in the CLI-version
+- Bug fix: Failed execution no longer produces an empty "safe" documents ([issue #214](https://github.com/freedomofpress/dangerzone/issues/214))
 
 ## Dangerzone 0.3.2
 - Bug fix: some non-ascii characters like “ would prevent Dangerzone from working  ([issue #144](https://github.com/freedomofpress/dangerzone/issues/144))

diff --git a/dangerzone/args.py b/dangerzone/args.py
@@ -1,4 +1,6 @@
-from typing import Optional
+import functools
+import os
+from typing import List, Optional, Tuple
 
 import click
 
@@ -17,6 +19,18 @@ def _validate_input_filename(
     return filename
 
 
+@errors.handle_document_errors
+def _validate_input_filenames(
+    ctx: click.Context, param: List[str], value: Tuple[str]
+) -> List[str]:
+    normalized_filenames = []
+    for filename in value:
+        filename = Document.normalize_filename(filename)
+        Document.validate_input_filename(filename)
+        normalized_filenames.append(filename)
+    return normalized_filenames
+
+
 @errors.handle_document_errors
 def _validate_output_filename(
     ctx: click.Context, param: str, value: Optional[str]
@@ -42,7 +56,53 @@ def validate_input_filename(
     return _validate_input_filename(ctx, param, value)
 
 
+def validate_input_filenames(
+    ctx: click.Context, param: List[str], value: Tuple[str]
+) -> List[str]:
+    return _validate_input_filenames(ctx, param, value)
+
+
 def validate_output_filename(
     ctx: click.Context, param: str, value: Optional[str]
 ) -> Optional[str]:
     return _validate_output_filename(ctx, param, value)
+
+
+def check_suspicious_options(args: List[str]) -> None:
+    options = set([arg for arg in args if arg.startswith("-")])
+    try:
+        files = set(os.listdir())
+    except Exception:
+        # If we can list files in the current working directory, this means that
+        # we're probably in an unlinked directory. Dangerzone should still work in
+        # this case, so we should return here.
+        return
+
+    intersection = options & files
+    if intersection:
+        filenames_str = ", ".join(intersection)
+        msg = (
+            f"Security: Detected CLI options that are also present as files in the"
+            f" current working directory: {filenames_str}"
+        )
+        click.echo(msg)
+        exit(1)
+
+
+def override_parser_and_check_suspicious_options(click_main: click.Command) -> None:
+    """Override the argument parsing logic of Click.
+
+    Click does not allow us to have access to the raw arguments that it receives (either
+    from sys.argv or from its testing module). To circumvent this, we can override its
+    `Command.parse_args()` method, which is public and should be safe to do so.
+
+    We can use it to check for any suspicious options prior to arg parsing.
+    """
+    orig_parse_fn = click_main.parse_args
+
+    @functools.wraps(orig_parse_fn)
+    def custom_parse_fn(ctx: click.Context, args: List[str]) -> List[str]:
+        check_suspicious_options(args)
+        return orig_parse_fn(ctx, args)
+
+    click_main.parse_args = custom_parse_fn  # type: ignore [assignment]
diff --git a/dangerzone/cli.py b/dangerzone/cli.py
@@ -1,18 +1,17 @@
-import json
 import logging
-import os
 import sys
-from typing import Optional
+from typing import Any, Callable, List, Optional, TypeVar
 
 import click
 from colorama import Back, Fore, Style
 
 from . import args, container, errors
-from .container import convert
-from .document import SAFE_EXTENSION, Document
+from .document import SAFE_EXTENSION
 from .logic import DangerzoneCore
 from .util import get_version
 
+F = TypeVar("F", bound=Callable[..., Any])
+
 
 def print_header(s: str) -> None:
     click.echo("")
@@ -26,23 +25,29 @@ def print_header(s: str) -> None:
     help=f"Default is filename ending with {SAFE_EXTENSION}",
 )
 @click.option("--ocr-lang", help="Language to OCR, defaults to none")
-@click.argument("filename", required=True, callback=args.validate_input_filename)
+@click.argument(
+    "filenames",
+    required=True,
+    nargs=-1,
+    type=click.UNPROCESSED,
+    callback=args.validate_input_filenames,
+)
 @errors.handle_document_errors
 def cli_main(
-    output_filename: Optional[str], ocr_lang: Optional[str], filename: str
+    output_filename: Optional[str], ocr_lang: Optional[str], filenames: List[str]
 ) -> None:
     setup_logging()
     dangerzone = DangerzoneCore()
 
     display_banner()
-
-    document = Document(filename)
-
-    # Set PDF output filename
-    if output_filename:
-        document.output_filename = output_filename
+    if len(filenames) == 1 and output_filename:
+        dangerzone.add_document(filenames[0], output_filename)
+    elif len(filenames) > 1 and output_filename:
+        click.echo("--output-filename can only be used with one input file.")
+        exit(1)
     else:
-        document.set_default_output_filename()
+        for filename in filenames:
+            dangerzone.add_document(filename)
 
     # Validate OCR language
     if ocr_lang:
@@ -63,30 +68,24 @@ def cli_main(
     # Convert the document
     print_header("Converting document to safe PDF")
 
-    def stdout_callback(line: str) -> None:
-        try:
-            status = json.loads(line)
-            s = Style.BRIGHT + Fore.CYAN + f"{status['percentage']}% "
-            if status["error"]:
-                s += Style.RESET_ALL + Fore.RED + status["text"]
-            else:
-                s += Style.RESET_ALL + status["text"]
-            click.echo(s)
-        except:
-            click.echo(f"Invalid JSON returned from container: {line}")
+    dangerzone.convert_documents(ocr_lang)
+    documents_safe = dangerzone.get_safe_documents()
+    documents_failed = dangerzone.get_failed_documents()
 
-    if convert(
-        document.input_filename,
-        document.output_filename,
-        ocr_lang,
-        stdout_callback,
-    ):
-        print_header("Safe PDF created successfully")
-        click.echo(document.output_filename)
-        exit(0)
-    else:
-        print_header("Failed to convert document")
+    if documents_safe != []:
+        print_header("Safe PDF(s) created successfully")
+        for document in documents_safe:
+            click.echo(document.output_filename)
+    if documents_failed != []:
+        print_header("Failed to convert document(s)")
+        for document in documents_failed:
+            click.echo(document.input_filename)
         exit(1)
+    else:
+        exit(0)
+
+
+args.override_parser_and_check_suspicious_options(cli_main)
 
 
 def setup_logging() -> None:

diff --git a/dangerzone/container.py b/dangerzone/container.py
@@ -1,15 +1,18 @@
 import gzip
+import json
 import logging
 import os
 import pipes
 import platform
 import shutil
 import subprocess
 import tempfile
-from typing import Callable, List, Optional
+from typing import Callable, List, Optional, Tuple
 
 import appdirs
+from colorama import Fore, Style
 
+from .document import Document
 from .util import get_resource_path, get_subprocess_startupinfo
 
 container_name = "dangerzone.rocks/dangerzone"
@@ -127,7 +130,34 @@ def is_container_installed() -> bool:
     return installed
 
 
-def exec(args: List[str], stdout_callback: Callable[[str], None] = None) -> int:
+def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
+    """
+    Parses a line returned by the container.
+    """
+    try:
+        status = json.loads(line)
+    except:
+        error_message = f"Invalid JSON returned from container:\n\n\t {line}"
+        log.error(error_message)
+        return (True, error_message, -1)
+
+    s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
+    s += Fore.CYAN + f"{status['percentage']}% "
+    if status["error"]:
+        s += Style.RESET_ALL + Fore.RED + status["text"]
+        log.error(s)
+    else:
+        s += Style.RESET_ALL + status["text"]
+        log.info(s)
+
+    return (status["error"], status["text"], status["percentage"])
+
+
+def exec(
+    document: Document,
+    args: List[str],
+    stdout_callback: Optional[Callable] = None,
+) -> int:
     args_str = " ".join(pipes.quote(s) for s in args)
     log.info("> " + args_str)
 
@@ -140,18 +170,25 @@ def exec(args: List[str], stdout_callback: Callable[[str], None] = None) -> int:
         universal_newlines=True,
         startupinfo=startupinfo,
     ) as p:
-        if stdout_callback and p.stdout is not None:
+        if p.stdout is not None:
             for line in p.stdout:
-                stdout_callback(line)
+                (error, text, percentage) = parse_progress(document, line)
+                if error:
+                    document.mark_as_failed()
+                if percentage == 100.0:
+                    document.mark_as_safe()
+                if stdout_callback:
+                    stdout_callback(error, text, percentage)
 
         p.communicate()
         return p.returncode
 
 
 def exec_container(
+    document: Document,
     command: List[str],
     extra_args: List[str] = [],
-    stdout_callback: Callable[[str], None] = None,
+    stdout_callback: Optional[Callable] = None,
 ) -> int:
     container_runtime = get_runtime()
 
@@ -181,16 +218,16 @@ def exec_container(
     )
 
     args = [container_runtime] + args
-    return exec(args, stdout_callback)
+    return exec(document, args, stdout_callback)
 
 
 def convert(
-    input_filename: str,
-    output_filename: str,
+    document: Document,
     ocr_lang: Optional[str],
-    stdout_callback: Callable[[str], None],
+    stdout_callback: Optional[Callable] = None,
 ) -> bool:
     success = False
+    document.mark_as_converting()
 
     if ocr_lang:
         ocr = "1"
@@ -210,11 +247,11 @@ def convert(
     command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
     extra_args = [
         "-v",
-        f"{input_filename}:/tmp/input_file",
+        f"{document.input_filename}:/tmp/input_file",
         "-v",
         f"{pixel_dir}:/dangerzone",
     ]
-    ret = exec_container(command, extra_args, stdout_callback)
+    ret = exec_container(document, command, extra_args, stdout_callback)
     if ret != 0:
         log.error("documents-to-pixels failed")
     else:
@@ -232,18 +269,18 @@ def convert(
             "-e",
             f"OCR_LANGUAGE={ocr_lang}",
         ]
-        ret = exec_container(command, extra_args, stdout_callback)
+        ret = exec_container(document, command, extra_args, stdout_callback)
         if ret != 0:
             log.error("pixels-to-pdf failed")
         else:
             # Move the final file to the right place
-            if os.path.exists(output_filename):
-                os.remove(output_filename)
+            if os.path.exists(document.output_filename):
+                os.remove(document.output_filename)
 
             container_output_filename = os.path.join(
                 safe_dir, "safe-output-compressed.pdf"
             )
-            shutil.move(container_output_filename, output_filename)
+            shutil.move(container_output_filename, document.output_filename)
 
             # We did it
             success = True
@@ -254,6 +291,27 @@ def convert(
     return success
 
 
+def get_max_parallel_conversions() -> int:
+    n_cpu = 1
+    if platform.system() == "Linux":
+        # if on linux containers run natively
+        cpu_count = os.cpu_count()
+        if cpu_count is not None:
+            n_cpu = cpu_count
+
+    elif get_runtime_name() == "docker":
+        # For Windows and MacOS containers run in VM
+        # So we obtain the CPU count for the VM
+        n_cpu_str = subprocess.check_output(
+            [get_runtime(), "info", "--format", "{{.NCPU}}"],
+            text=True,
+            startupinfo=get_subprocess_startupinfo(),
+        )
+        n_cpu = int(n_cpu_str.strip())
+
+    return 2 * n_cpu + 1
+
+
 # From global_common:
 
 # def validate_convert_to_pixel_output(self, common, output):