diff --git a/pymemcache/codecs.py b/pymemcache/codecs.py new file mode 100644 index 00000000..cb46d3b6 --- /dev/null +++ b/pymemcache/codecs.py @@ -0,0 +1,171 @@ +# Copyright 2012 Pinterest.com +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +import logging +from io import BytesIO +import six +from six.moves import cPickle as pickle + +log = logging.getLogger(__name__) + +try: + long_type = long # noqa +except NameError: + long_type = None + + +@six.add_metaclass(abc.ABCMeta) +class ICodec(object): + """ + Interface for serializers. + """ + + @abc.abstractmethod + def serialize(self, key, value): + """ + Serialize a python object. + + :param str|unicode key: Key + :param str|unicode value: Value + :return tuple[str, int]: tuple(value, flags) + """ + raise NotImplementedError() + + @abc.abstractmethod + def deserialize(self, key, value, flags): + """ + Deserialize a value into a python object. + + :param str|unicode key: Key + :param str|unicode value: Value + :param int flags: Bitflag containing flags used to specify how to + deserialize this object. + :return object: Deserialized python object. + """ + raise NotImplementedError() + + +class Serde(ICodec): + """ + Serialization handler. + + Meant to be compatible with `python-memcached`. + """ + + FLAG_BYTES = 0 + FLAG_PICKLE = 1 << 0 + FLAG_INTEGER = 1 << 1 + FLAG_LONG = 1 << 2 + # unused, to main compatibility with python-memcached + FLAG_COMPRESSED = 1 << 3 + FLAG_TEXT = 1 << 4 + + pickle_version = 0 + + def __init__(self, pickle_version=0): + """ + Init + + :param int pickle_version: Pickle protocol version. + Use `-1` to use the highest supported at runtime. + Deserialization is not affected by this parameter. + + A forewarning with `0` (the default): If somewhere in your value lies + a slotted object, ie defines `__slots__`, even if you do not include + it in your pickleable state via `__getstate__`, python will raise: + ``` + TypeError: a class that defines __slots__ without defining + __getstate__ cannot be pickled + ``` + """ + if pickle_version is not None: + self.pickle_version = pickle_version + + def serialize(self, key, value): + """ + Serialize a python object. + + :param str|unicode key: Key + :param str|unicode value: Value + :return tuple[str, int]: tuple(value, flags) + """ + flags = 0 + value_type = type(value) + + # Check against exact types so that subclasses of native types will be + # restored as their native type + if value_type is bytes: + pass + + elif value_type is six.text_type: + flags |= self.FLAG_TEXT + value = value.encode('utf8') + + elif value_type is int: + flags |= self.FLAG_INTEGER + value = "%d" % value + + elif six.PY2 and value_type is long_type: + flags |= self.FLAG_LONG + value = "%d" % value + + else: + flags |= self.FLAG_PICKLE + + output = BytesIO() + pickler = pickle.Pickler(output, self.pickle_version) + pickler.dump(value) + value = output.getvalue() + + return value, flags + + def deserialize(self, key, value, flags): + """ + Deserialize a value into a python object. + + :param str|unicode key: Key + :param str|unicode value: Value + :param int flags: Bitflag containing flags used to specify how to + deserialize this object. + :return object: Deserialized python object. + """ + if flags == 0: + return value + + elif flags & self.FLAG_TEXT: + return value.decode('utf8') + + elif flags & self.FLAG_INTEGER: + return int(value) + + elif flags & self.FLAG_LONG: + if six.PY3: + return int(value) + else: + return long_type(value) + + elif flags & self.FLAG_PICKLE: + try: + buf = BytesIO(value) + unpickler = pickle.Unpickler(buf) + return unpickler.load() + except Exception as exc: + # This includes exc as a string for troubleshooting as well as + # providing a trace. + log.exception('Could not depickle value (len=%d): %s', + len(value), exc) + return None + + return value diff --git a/pymemcache/serde.py b/pymemcache/serde.py index 23cbd7b9..c8d4e9d8 100644 --- a/pymemcache/serde.py +++ b/pymemcache/serde.py @@ -1,98 +1,10 @@ -# Copyright 2012 Pinterest.com -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +""" +Backwards compatibility with the older serialization api previously +provided by this module. +""" +from pymemcache import codecs -import logging -from io import BytesIO -import six -from six.moves import cPickle as pickle +_SERDE = codecs.Serde() -try: - long_type = long # noqa -except NameError: - long_type = None - - -FLAG_BYTES = 0 -FLAG_PICKLE = 1 << 0 -FLAG_INTEGER = 1 << 1 -FLAG_LONG = 1 << 2 -FLAG_COMPRESSED = 1 << 3 # unused, to main compatibility with python-memcached -FLAG_TEXT = 1 << 4 - -# Pickle protocol version (-1 for highest available to runtime) -# Warning with `0`: If somewhere in your value lies a slotted object, -# ie defines `__slots__`, even if you do not include it in your pickleable -# state via `__getstate__`, python will complain with something like: -# TypeError: a class that defines __slots__ without defining __getstate__ -# cannot be pickled -PICKLE_VERSION = -1 - - -def python_memcache_serializer(key, value): - flags = 0 - value_type = type(value) - - # Check against exact types so that subclasses of native types will be - # restored as their native type - if value_type is bytes: - pass - - elif value_type is six.text_type: - flags |= FLAG_TEXT - value = value.encode('utf8') - - elif value_type is int: - flags |= FLAG_INTEGER - value = "%d" % value - - elif six.PY2 and value_type is long_type: - flags |= FLAG_LONG - value = "%d" % value - - else: - flags |= FLAG_PICKLE - output = BytesIO() - pickler = pickle.Pickler(output, PICKLE_VERSION) - pickler.dump(value) - value = output.getvalue() - - return value, flags - - -def python_memcache_deserializer(key, value, flags): - if flags == 0: - return value - - elif flags & FLAG_TEXT: - return value.decode('utf8') - - elif flags & FLAG_INTEGER: - return int(value) - - elif flags & FLAG_LONG: - if six.PY3: - return int(value) - else: - return long_type(value) - - elif flags & FLAG_PICKLE: - try: - buf = BytesIO(value) - unpickler = pickle.Unpickler(buf) - return unpickler.load() - except Exception: - logging.info('Pickle error', exc_info=True) - return None - - return value +python_memcache_serializer = _SERDE.serialize +python_memcache_deserializer = _SERDE.deserialize diff --git a/pymemcache/test/test_serde.py b/pymemcache/test/test_codecs.py similarity index 50% rename from pymemcache/test/test_serde.py rename to pymemcache/test/test_codecs.py index 04c8e079..2411a173 100644 --- a/pymemcache/test/test_serde.py +++ b/pymemcache/test/test_codecs.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- from unittest import TestCase -from pymemcache.serde import (python_memcache_serializer, - python_memcache_deserializer, FLAG_BYTES, - FLAG_PICKLE, FLAG_INTEGER, FLAG_LONG, FLAG_TEXT) import pytest import six +from six.moves import cPickle as pickle + +from pymemcache import codecs class CustomInt(int): @@ -20,9 +20,12 @@ class CustomInt(int): @pytest.mark.unit() class TestSerde(TestCase): + Serde = codecs.Serde + + def check(self, value, expected_flags, pickle_version=None): + s = self.Serde(pickle_version=pickle_version) - def check(self, value, expected_flags): - serialized, flags = python_memcache_serializer(b'key', value) + serialized, flags = s.serialize(b'key', value) assert flags == expected_flags # pymemcache stores values as byte strings, so we immediately the value @@ -30,32 +33,40 @@ def check(self, value, expected_flags): if not isinstance(serialized, six.binary_type): serialized = six.text_type(serialized).encode('ascii') - deserialized = python_memcache_deserializer(b'key', serialized, flags) + deserialized = s.deserialize(b'key', serialized, flags) assert deserialized == value def test_bytes(self): - self.check(b'value', FLAG_BYTES) - self.check(b'\xc2\xa3 $ \xe2\x82\xac', FLAG_BYTES) # £ $ € + self.check(b'value', self.Serde.FLAG_BYTES) + self.check(b'\xc2\xa3 $ \xe2\x82\xac', self.Serde.FLAG_BYTES) # £ $ € def test_unicode(self): - self.check(u'value', FLAG_TEXT) - self.check(u'£ $ €', FLAG_TEXT) + self.check(u'value', self.Serde.FLAG_TEXT) + self.check(u'£ $ €', self.Serde.FLAG_TEXT) def test_int(self): - self.check(1, FLAG_INTEGER) + self.check(1, self.Serde.FLAG_INTEGER) def test_long(self): # long only exists with Python 2, so we're just testing for another # integer with Python 3 if six.PY2: - expected_flags = FLAG_LONG + expected_flags = self.Serde.FLAG_LONG else: - expected_flags = FLAG_INTEGER + expected_flags = self.Serde.FLAG_INTEGER self.check(123123123123123123123, expected_flags) def test_pickleable(self): - self.check({'a': 'dict'}, FLAG_PICKLE) + self.check({'a': 'dict'}, self.Serde.FLAG_PICKLE) def test_subtype(self): # Subclass of a native type will be restored as the same type - self.check(CustomInt(123123), FLAG_PICKLE) + self.check(CustomInt(123123), self.Serde.FLAG_PICKLE) + + def test_pickle_version(self): + for pickle_version in range(-1, pickle.HIGHEST_PROTOCOL): + self.check( + dict(whoa='nelly', humans=u'amazing', answer=42), + self.Serde.FLAG_PICKLE, + pickle_version=pickle_version, + )