From 133bac09d797309d1b1e92ddf7d69b82da64f276 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Mon, 25 Mar 2024 17:08:58 +0100 Subject: [PATCH] fix: handle arbitrary newline terminators --- src/picklescan/scanner.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/picklescan/scanner.py b/src/picklescan/scanner.py index 8412093..d4d7ad6 100644 --- a/src/picklescan/scanner.py +++ b/src/picklescan/scanner.py @@ -168,7 +168,6 @@ def _http_get(url) -> bytes: def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]]: - globals = set() memo = {} @@ -179,7 +178,17 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str] try: ops = list(pickletools.genops(data)) except Exception as e: - raise GenOpsError(str(e)) + # XXX: pickle will happily load files that contain arbitrarily placed new lines whereas pickletools errors in such cases. + # below is code to circumvent or skip these newlines while succeeding at parsing the opcodes. + err = str(e) + if "opcode b'\\n' unknown" not in err: + raise GenOpsError(err) + else: + pos = int(err.split(",")[0].replace("at position ", "")) + data.seek(-(pos + 1), 1) + ops = list(pickletools.genops(data.read(pos))) + data.seek(1, 1) + last_byte = data.read(1) data.seek(-1, 1) @@ -288,7 +297,6 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult: def scan_numpy(data: IO[bytes], file_id) -> ScanResult: - # Delay import to avoid dependency on NumPy import numpy as np