From d6a026381e09e8b0d3b3da7e5f6d184efbce754d Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 22 Sep 2020 12:08:35 +0200 Subject: [PATCH] Instance-level dispatch table --- cloudpickle/cloudpickle_fast.py | 47 ++++++++++++++------ tests/cloudpickle_test.py | 76 +++++++++++++++++++++++++++++++-- 2 files changed, 105 insertions(+), 18 deletions(-) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index fa8da0f63..6581325ba 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -496,9 +496,6 @@ class CloudPickler(Pickler): _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce - - dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) - # function reducers are defined as instance methods of CloudPickler # objects, as they rely on a CloudPickler attribute (globals_ref) def _dynamic_function_reduce(self, func): @@ -572,17 +569,6 @@ def dump(self, obj): raise if pickle.HIGHEST_PROTOCOL >= 5: - # `CloudPickler.dispatch` is only left for backward compatibility - note - # that when using protocol 5, `CloudPickler.dispatch` is not an - # extension of `Pickler.dispatch` dictionary, because CloudPickler - # subclasses the C-implemented Pickler, which does not expose a - # `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler - # used `CloudPickler.dispatch` as a class-level attribute storing all - # reducers implemented by cloudpickle, but the attribute name was not a - # great choice given the meaning of `Cloudpickler.dispatch` when - # `CloudPickler` extends the pure-python pickler. - dispatch = dispatch_table - # Implementation of the reducer_override callback, in order to # efficiently serialize dynamic functions and classes by subclassing # the C-implemented Pickler. @@ -604,6 +590,30 @@ def __init__(self, file, protocol=None, buffer_callback=None): self.globals_ref = {} self.proto = int(protocol) + # Instance-level public dispatch_table should not mutate the + # private class-level _dispatch_table while still reflecting. + # Note that it is important to lazy init the dispatch_table + # public attribute to make it possible to re-assign it later + # and get the C implementation of Pickler take the + # re-assignment into account. + self.dispatch_table = ChainMap( + {}, # to register instance-level custom reducers + self._dispatch_table, # cloudpickle specific reducers + copyreg.dispatch_table, # base Python types + ) + + # `CloudPickler.dispatch` is only left for backward compatibility - + # note that when using protocol 5, `CloudPickler.dispatch` is not + # an extension of `Pickler.dispatch` dictionary, because + # CloudPickler subclasses the C-implemented Pickler, which does not + # expose a `dispatch` attribute. Earlier versions of the protocol + # 5 CloudPickler used `CloudPickler.dispatch` as a class-level + # attribute storing all reducers implemented by cloudpickle, but + # the attribute name was not a great choice given the meaning of + # `Cloudpickler.dispatch` when `CloudPickler` extends the + # pure-python pickler. + self.dispatch = self.dispatch_table + def reducer_override(self, obj): """Type-agnostic reducing callback for function and classes. @@ -673,6 +683,15 @@ def __init__(self, file, protocol=None): self.globals_ref = {} assert hasattr(self, 'proto') + # Isolated the instance dispatch table to make it possible to + # customize the reducers of a give Pickler instance without + # impacting the class-level picklers. + self.dispatch = self.dispatch.copy() + + # Compatibility alias to get the same attributes irrespective of + # the version of Python. + self.dispatch_table = self.dispatch + def _save_reduce_pickle5(self, func, args, state=None, listitems=None, dictitems=None, state_setter=None, obj=None): save = self.save diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index a456b6372..541e38d17 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1,4 +1,5 @@ from __future__ import division +from pickle import HIGHEST_PROTOCOL import _collections_abc import abc @@ -1330,12 +1331,17 @@ def f(): # serializable. from cloudpickle import CloudPickler from cloudpickle import cloudpickle_fast as cp_fast - CloudPickler.dispatch_table[type(py.builtin)] = cp_fast._module_reduce + py_builtin_type = type(py.builtin) + assert py_builtin_type not in CloudPickler._dispatch_table + try: + CloudPickler._dispatch_table[py_builtin_type] = cp_fast._module_reduce - g = cloudpickle.loads(cloudpickle.dumps(f, protocol=self.protocol)) + g = cloudpickle.loads(cloudpickle.dumps(f, protocol=self.protocol)) - result = g() - self.assertEqual(1, result) + result = g() + self.assertEqual(1, result) + finally: + del CloudPickler._dispatch_table[py_builtin_type] def test_function_module_name(self): func = lambda x: x @@ -2283,6 +2289,68 @@ def reduce_myclass(x): finally: copyreg.dispatch_table.pop(MyClass) + def test_side_effect_free_pickler_instance(self): + class A: + pass + + iob = io.BytesIO() + p = cloudpickle.CloudPickler(iob, protocol=self.protocol) + p.dispatch_table[A] = lambda obj: (int, (42,)) + p.dump(A()) + assert pickle.loads(iob.getvalue()) == 42 + + # Check there is no side-effect on other CloudPickler instances + iob = io.BytesIO() + p = cloudpickle.CloudPickler(iob, protocol=self.protocol) + p.dump(A()) + assert isinstance(pickle.loads(iob.getvalue()), A) + + def test_pickler_dispatch_table_reassign(self): + # Check that re-assigning the dispatch table attribute on an instance + # works as expected. This would not waork on Python 3.8 if + # dispatch_table was a class attribute of CloudPickler because of + # implementation details in initialization step of the C implementation + # of the Pickler class in the standard library. + class A: + pass + + iob = io.BytesIO() + p = cloudpickle.CloudPickler(iob, protocol=self.protocol) + p.dispatch_table = p.dispatch_table.copy() + p.dispatch_table[A] = lambda obj: (int, (42,)) + p.dump(A()) + assert pickle.loads(iob.getvalue()) == 42 + + def test_pickler_subclass_with_custom_dispatch_table(self): + class PicklerSubclass(cloudpickle.CloudPickler): + def __init__(self, buffer, custom_reducers, + protocol=HIGHEST_PROTOCOL): + # initialize the instance -level dispatch_table attribute + super().__init__(buffer, protocol=protocol) + self.dispatch_table.update(custom_reducers) + + class A: + pass + + class B: + pass + + custom_reducers = { + A: lambda obj: (int, (42,)) + } + + iob = io.BytesIO() + p = PicklerSubclass(iob, custom_reducers, protocol=self.protocol) + p.dump((A(), B())) + a, b = pickle.loads(iob.getvalue()) + assert a == 42 + assert isinstance(b, B) # pickling locally defined classes works + + # Check there is no side-effect on other CloudPickler instances + iob = io.BytesIO() + p = cloudpickle.CloudPickler(iob, protocol=self.protocol) + p.dump(A()) + assert isinstance(pickle.loads(iob.getvalue()), A) class Protocol2CloudPickleTest(CloudPickleTest):