Skip to content

Commit

Permalink
fix: handle broken pickled files better (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
McPatate authored Jan 29, 2025
1 parent 670efbb commit efb2533
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 11 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ pytest-cov==3.0.0
requests==2.31.0
aiohttp==3.9.1
black==22.8.0
numpy==1.24.0
numpy>1.24.0
19 changes: 13 additions & 6 deletions src/picklescan/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,16 +184,15 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
memo = {}
# Scan the data for pickle buffers, stopping when parsing fails or stops making progress
last_byte = b"dummy"
parsing_pkl_error = None
while last_byte != b"":
# List opcodes
ops = []
try:
ops = list(pickletools.genops(data))
for op in pickletools.genops(data):
ops.append(op)
except Exception as e:
# XXX: given we can have multiple pickles in a file, we may have already successfully extracted globals from a valid pickle.
# Thus return the already found globals in the error & let the caller decide what to do.
globals_opt = globals if len(globals) > 0 else None
raise GenOpsError(str(e), globals_opt)

parsing_pkl_error = str(e)
last_byte = data.read(1)
data.seek(-1, 1)

Expand Down Expand Up @@ -244,6 +243,14 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
if not multiple_pickles:
break

if parsing_pkl_error is not None:
# XXX: given we can have multiple pickles in a file, we may have already successfully extracted globals from a valid pickle.
# Thus return the already found globals in the error & let the caller decide what to do.
# Additionally, we return the error at the end of the loop to scan imports in partially broken files,
# which can unpickle and be dangerous regardless of being valid pickle.
globals_opt = globals if len(globals) > 0 else None
raise GenOpsError(parsing_pkl_error, globals_opt)

return globals


Expand Down
4 changes: 4 additions & 0 deletions tests/data/broken_model.pkl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cbuiltins
exec
(X>
f = open('my_file.txt', 'a'); f.write('Malicious'); f.close()tRX.
14 changes: 10 additions & 4 deletions tests/test_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,12 @@ def initialize_pickle_files():
),
)

# Broken model
initialize_data_file(
f"{_root_path}/data/broken_model.pkl",
b"cbuiltins\nexec\n(X>\nf = open('my_file.txt', 'a'); f.write('Malicious'); f.close()tRX.",
)

# Code which created malicious12.pkl using pickleassem (see https://github.com/gousaiyang/pickleassem)
#
# p = PickleAssembler(proto=4)
Expand Down Expand Up @@ -629,7 +635,6 @@ def test_scan_file_path():


def test_scan_file_path_npz():

compare_scan_results(
scan_file_path(f"{_root_path}/data2/object_arrays.npz"),
ScanResult(
Expand Down Expand Up @@ -726,10 +731,11 @@ def test_scan_directory_path():
Global("bdb", "Bdb", SafetyLevel.Dangerous),
Global("bdb", "Bdb", SafetyLevel.Dangerous),
Global("bdb", "Bdb.run", SafetyLevel.Dangerous),
Global("builtins", "exec", SafetyLevel.Dangerous),
],
scanned_files=30,
issues_count=30,
infected_files=25,
scanned_files=31,
issues_count=31,
infected_files=26,
scan_err=True,
)
compare_scan_results(scan_directory_path(f"{_root_path}/data/"), sr)
Expand Down

0 comments on commit efb2533

Please sign in to comment.