Skip to content

Commit

Permalink
LRU raises (#48)
Browse files Browse the repository at this point in the history
  • Loading branch information
ncclementi authored Feb 16, 2022
1 parent 52449dd commit e1886a4
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 51 deletions.
42 changes: 23 additions & 19 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
#!/usr/bin/env python

import os

from setuptools import setup

setup(name='zict',
version='2.0.0',
description='Mutable mapping tools',
url='http://zict.readthedocs.io/en/latest/',
maintainer='Matthew Rocklin',
maintainer_email='[email protected]',
license='BSD',
keywords='mutable mapping,dict,dask',
packages=['zict'],
install_requires=open('requirements.txt').read().strip().split('\n'),
long_description=(open('README.rst').read() if os.path.exists('README.rst')
else ''),
classifiers=[
"Programming Language :: Python",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
],
zip_safe=False)
setup(
name="zict",
version="2.1.0.dev1",
description="Mutable mapping tools",
url="http://zict.readthedocs.io/en/latest/",
maintainer="Matthew Rocklin",
maintainer_email="[email protected]",
license="BSD",
keywords="mutable mapping,dict,dask",
packages=["zict"],
install_requires=open("requirements.txt").read().strip().split("\n"),
long_description=(
open("README.rst").read() if os.path.exists("README.rst") else ""
),
classifiers=[
"Programming Language :: Python",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
],
zip_safe=False,
)
8 changes: 4 additions & 4 deletions zict/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .zip import Zip
from .buffer import Buffer
from .file import File
from .func import Func
from .lmdb import LMDB
from .lru import LRU
from .buffer import Buffer
from .sieve import Sieve
from .lmdb import LMDB
from .zip import Zip

__version__ = "2.0.0"
__version__ = "2.1.0.dev1"
28 changes: 16 additions & 12 deletions zict/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Buffer(ZictBase):
This creates a MutableMapping by combining two MutableMappings, one that
feeds into the other when it overflows, based on an LRU mechanism. When
the first evicts elements these get placed into the second. When an item
the first evicts elements these get placed into the second. When an item
is retrieved from the second it is placed back into the first.
Parameters
Expand All @@ -19,6 +19,8 @@ class Buffer(ZictBase):
fast_to_slow_callbacks: list of callables
These functions run every time data moves from the fast to the slow
mapping. They take two arguments, a key and a value
If an exception occurs during a fast_to_slow_callbacks (e.g a callback tried
storing to disk and raised a disk full error) the key will remain in the LRU.
slow_to_fast_callbacks: list of callables
These functions run every time data moves form the slow to the fast
mapping.
Expand Down Expand Up @@ -58,8 +60,13 @@ def __init__(

def fast_to_slow(self, key, value):
self.slow[key] = value
for cb in self.fast_to_slow_callbacks:
cb(key, value)
try:
for cb in self.fast_to_slow_callbacks:
cb(key, value)
# LRU catches exception, raises and makes sure keys are not lost and located in fast.
except Exception:
del self.slow[key]
raise

def slow_to_fast(self, key):
value = self.slow[key]
Expand All @@ -80,15 +87,12 @@ def __getitem__(self, key):
raise KeyError(key)

def __setitem__(self, key, value):
# Avoid useless movement for heavy values
if self.weight(key, value) <= self.n:
if key in self.slow:
del self.slow[key]
self.fast[key] = value
else:
if key in self.fast:
del self.fast[key]
self.slow[key] = value
if key in self.slow:
del self.slow[key]
# This may trigger an eviction from fast to slow of older keys.
# If the weight is individually greater than n, then key/value will be stored
# into self.slow instead (see LRU.__setitem__).
self.fast[key] = value

def __delitem__(self, key):
if key in self.fast:
Expand Down
41 changes: 30 additions & 11 deletions zict/lru.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def do_nothing(k, v):


class LRU(ZictBase):
"""Evict Least Recently Used Elements
"""Evict Least Recently Used Elements.
Parameters
----------
Expand All @@ -18,6 +18,8 @@ class LRU(ZictBase):
Dictionary in which to hold elements
on_evict: list of callables
Function:: k, v -> action to call on key value pairs prior to eviction
If an exception occurs during an on_evict callback (e.g a callback tried
storing to disk and raised a disk full error) the key will remain in the LRU.
weight: callable
Function:: k, v -> number to determine the size of keeping the item in
the mapping. Defaults to ``(k, v) -> 1``
Expand Down Expand Up @@ -55,19 +57,29 @@ def __setitem__(self, key, value):

weight = self.weight(key, value)

if weight <= self.n:
def set_():
self.d[key] = value
self.i += 1
self.heap[key] = self.i

self.weights[key] = weight
self.total_weight += weight
else:
for cb in self.on_evict:
cb(key, value)
# Evicting the last key/value pair is guaranteed to fail, so don't try.
# This is because it is always the last one inserted by virtue of this
# being an LRU, which in turn means we reached this point because
# weight > self.n and a callbacks raised exception (e.g. disk full).
while self.total_weight > self.n and len(self.d) > 1:
self.evict()

while self.total_weight > self.n:
self.evict()
if weight <= self.n:
set_()
else:
try:
for cb in self.on_evict:
cb(key, value)
except Exception:
# e.g. if a callback tried storing to disk and raised a disk full error
set_()
raise

def evict(self):
"""Evict least recently used key
Expand All @@ -82,11 +94,18 @@ def evict(self):
w: weight
"""
k, priority = self.heap.popitem()
v = self.d.pop(k)
try:
for cb in self.on_evict:
cb(k, v)
except Exception:
# e.g. if a callback tried storing to disk and raised a disk full error
self.heap[k] = priority
self.d[k] = v
raise

weight = self.weights.pop(k)
self.total_weight -= weight
v = self.d.pop(k)
for cb in self.on_evict:
cb(k, v)
return k, v, weight

def __delitem__(self, key):
Expand Down
69 changes: 64 additions & 5 deletions zict/tests/test_buffer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from zict import Buffer
import pytest

import zict

from . import utils_test


def test_simple():
a = dict()
b = dict()
buff = Buffer(a, b, n=10, weight=lambda k, v: v)
buff = zict.Buffer(a, b, n=10, weight=lambda k, v: v)

buff["x"] = 1
buff["y"] = 2
Expand Down Expand Up @@ -65,7 +68,7 @@ def test_setitem_avoid_fast_slow_duplicate():

a = dict()
b = dict()
buff = Buffer(a, b, n=10, weight=lambda k, v: v)
buff = zict.Buffer(a, b, n=10, weight=lambda k, v: v)
for first, second in [(1, 12), (12, 1)]:
buff["a"] = first
assert buff["a"] == first
Expand All @@ -89,7 +92,7 @@ def test_mapping():
"""
a = {}
b = {}
buff = Buffer(a, b, n=2)
buff = zict.Buffer(a, b, n=2)
utils_test.check_mapping(buff)
utils_test.check_closing(buff)

Expand All @@ -107,7 +110,7 @@ def s2f_cb(k, v):

a = dict()
b = dict()
buff = Buffer(
buff = zict.Buffer(
a,
b,
n=10,
Expand Down Expand Up @@ -136,3 +139,59 @@ def s2f_cb(k, v):
buff["x"]
assert f2s == ["x", "y"]
assert s2f == ["x"]


def test_callbacks_exception_catch():
class MyError(Exception):
pass

f2s = []

def f2s_cb(k, v):
if v > 10:
raise MyError()
f2s.append(k)

s2f = []

def s2f_cb(k, v):
s2f.append(k)

a = {}
b = {}
buff = zict.Buffer(
a,
b,
n=10,
weight=lambda k, v: v,
fast_to_slow_callbacks=f2s_cb,
slow_to_fast_callbacks=s2f_cb,
)

buff["x"] = 1
buff["y"] = 2

assert buff["x"] == 1
assert buff["y"] == 2
assert not f2s
assert not s2f
assert a == {"x": 1, "y": 2} # keys are in fast/memory
assert not b

# Add key < n but total weight > n this will move x out of fast
buff["z"] = 8

assert f2s == ["x"]
assert s2f == []
assert a == {"y": 2, "z": 8}
assert b == {"x": 1}

# Add key > n, again total weight > n this will move everything to slow except w
# that stays in fast due after callback raise
with pytest.raises(MyError):
buff["w"] = 11

assert f2s == ["x", "y", "z"]
assert s2f == []
assert a == {"w": 11}
assert b == {"x": 1, "y": 2, "z": 8}
Loading

0 comments on commit e1886a4

Please sign in to comment.