Skip to content

Commit

Permalink
Trac #24135: Clean up in coerce_dict
Browse files Browse the repository at this point in the history
1. Deprecate various arguments which should have been deprecated in
#15367.

2. Use a tuple instead of a list to throw away. This is very slightly
faster.

3. Use safe memory functions from cysignals instead of `PyMem`
functions.

4. Split `__init__` into `__cinit__` and `__init__`.

5. Rename the `iteritems()` method to `items()`.

6. Introduce a new inline function `valid(ptr)` to replace the very
common `ptr != NULL and ptr != dummy`.

7. Change type of `key_id` from `void*` to `PyObject*`. This avoids a
lot of casts.

8. Use a custom class with `@cython.freelist` instead of a `PyCapsule`
to wrap a `PyObject*`.

9. In `tp_clear`, only delete references to elements contained in the
dict. Move deallocation of the data structure to `__dealloc__`.

10. Use random 31-bit multipliers (instead of `13` and `503`) in the
hash function for `TripleDict`. This might give better mixing for large
sizes (in any case, it can't hurt).

11. Rename `dummy` -> `deleted_key` to make it more clear what it means.
Also, there was no reason that this was of type `bytes`. Now it is
simply created by `object()`.

12. Change the logic of the `lookup()` methods a bit to make them easier
to understand.

13. Generic code cleanup: PEP 8, `is` instead of `==` for pointers, ...

'''Timings''':

All the changes above lead to a modest speed-up:

''`MonoDict` lookup'':
{{{
sage: from sage.structure.coerce_dict import MonoDict; D = MonoDict()
sage: L = [Integer(x) for x in range(1000)]
sage: for k in L: D[k] = k
sage: timeit('[D[k] for k in L]', repeat=20, number=20000)
}}}

Before: {{{20000 loops, best of 20: 72.7 µs per loop}}}

After: {{{20000 loops, best of 20: 64.5 µs per loop}}}

''`TripleDict` lookup'':
{{{
sage: from sage.structure.coerce_dict import TripleDict; D =
TripleDict()
sage: L = [(None, None, Integer(x)) for x in range(1000)]
sage: for k in L: D[k] = None
sage: timeit('[D[k] for k in L]', repeat=20, number=20000)
}}}

Before: {{{20000 loops, best of 20: 97.3 µs per loop}}}

After: {{{20000 loops, best of 20: 87.3 µs per loop}}}

URL: https://trac.sagemath.org/24135
Reported by: jdemeyer
Ticket author(s): Jeroen Demeyer
Reviewer(s): Travis Scrimshaw
  • Loading branch information
Release Manager authored and vbraun committed Dec 10, 2017
2 parents 3b336bc + 7c83e05 commit 954f976
Show file tree
Hide file tree
Showing 8 changed files with 724 additions and 634 deletions.
2 changes: 1 addition & 1 deletion src/sage/categories/homset.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
# trac ticket #14159

from sage.structure.coerce_dict import TripleDict
_cache = TripleDict(53, weak_values=True)
_cache = TripleDict(weak_values=True)

def Hom(X, Y, category=None, check=True):
"""
Expand Down
42 changes: 22 additions & 20 deletions src/sage/structure/coerce.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -406,33 +406,28 @@ cdef class CoercionModel_cache_maps(CoercionModel):
- Robert Bradshaw
"""

def __init__(self, lookup_dict_size=127, lookup_dict_threshold=.75):
def __init__(self, *args, **kwds):
"""
INPUT:
- ``lookup_dict_size`` - initial size of the coercion hashtables
- ``lookup_dict_threshold`` - maximal density of the coercion
hashtables before forcing a re-hash
EXAMPLES::
sage: from sage.structure.coerce import CoercionModel_cache_maps
sage: cm = CoercionModel_cache_maps(4, .95)
sage: cm = CoercionModel_cache_maps()
sage: K = NumberField(x^2-2, 'a')
sage: A = cm.get_action(ZZ, K, operator.mul)
sage: f, g = cm.coercion_maps(QQ, int)
sage: f, g = cm.coercion_maps(ZZ, int)
.. NOTE::
TESTS::
In practice 4 would be a really bad number to choose, but
it makes the hashing deterministic.
sage: cm = CoercionModel_cache_maps(4, .95)
doctest:...: DeprecationWarning: the 'lookup_dict_size' argument is deprecated
See http://trac.sagemath.org/24135 for details.
doctest:...: DeprecationWarning: the 'lookup_dict_threshold' argument is deprecated
See http://trac.sagemath.org/24135 for details.
"""
self.reset_cache(lookup_dict_size, lookup_dict_threshold)
self.reset_cache(*args, **kwds)

def reset_cache(self, lookup_dict_size=127, lookup_dict_threshold=.75):
def reset_cache(self, lookup_dict_size=None, lookup_dict_threshold=None):
"""
Clear the coercion cache.
Expand All @@ -450,14 +445,20 @@ cdef class CoercionModel_cache_maps(CoercionModel):
sage: cm.get_cache()
({}, {})
"""
if lookup_dict_size is not None:
from sage.misc.superseded import deprecation
deprecation(24135, "the 'lookup_dict_size' argument is deprecated")
if lookup_dict_threshold is not None:
from sage.misc.superseded import deprecation
deprecation(24135, "the 'lookup_dict_threshold' argument is deprecated")
# This MUST be a mapping of tuples, where each
# tuple contains at least two elements that are either
# None or of type Map.
self._coercion_maps = TripleDict(lookup_dict_size, threshold=lookup_dict_threshold)
self._coercion_maps = TripleDict()
# This MUST be a mapping to actions.
self._action_maps = TripleDict(lookup_dict_size, threshold=lookup_dict_threshold)
self._action_maps = TripleDict()
# This is a mapping from Parents to Parents, storing the result of division in the given parent.
self._division_parents = TripleDict(lookup_dict_size, threshold=lookup_dict_threshold)
self._division_parents = TripleDict()

def get_cache(self):
"""
Expand Down Expand Up @@ -521,8 +522,9 @@ cdef class CoercionModel_cache_maps(CoercionModel):
sage: act(1/5, x+10)
1/5*x + 2
"""
return dict([((S, R), mors) for (S, R, op), mors in self._coercion_maps.iteritems()]), \
dict(self._action_maps.iteritems())
d1 = {(S, R): mors for (S, R, op), mors in self._coercion_maps.items()}
d2 = self._action_maps.copy()
return d1, d2

def record_exceptions(self, bint value=True):
r"""
Expand Down
42 changes: 29 additions & 13 deletions src/sage/structure/coerce_dict.pxd
Original file line number Diff line number Diff line change
@@ -1,34 +1,50 @@
cimport cython
from cpython cimport PyObject

cdef struct mono_cell

cdef struct mono_cell:
PyObject* key_id
PyObject* key_weakref
PyObject* value


@cython.final
cdef class MonoDict:
cdef __weakref__
cdef size_t mask
cdef size_t used
cdef size_t fill
cdef size_t mask # size - 1 with 'size' the length of the table array
cdef size_t used # number of valid entries
cdef size_t fill # number of non-NULL entries (including deleted entries)
cdef mono_cell* table
cdef bint weak_values
cdef eraser
cdef mono_cell* lookup(self,PyObject* key)
cdef get(self, object k)
cdef set(self, object k, value)

cdef mono_cell* lookup(self, PyObject* key)
cdef get(self, k)
cdef int set(self, k, value) except -1
cdef int resize(self) except -1

cdef struct triple_cell

cdef struct triple_cell:
PyObject* key_id1
PyObject* key_id2
PyObject* key_id3
PyObject* key_weakref1
PyObject* key_weakref2
PyObject* key_weakref3
PyObject* value


@cython.final
cdef class TripleDict:
cdef __weakref__
cdef size_t mask
cdef size_t used
cdef size_t fill
cdef size_t mask # size - 1 with 'size' the length of the table array
cdef size_t used # number of valid entries
cdef size_t fill # number of non-NULL entries (including deleted entries)
cdef triple_cell* table
cdef bint weak_values
cdef eraser

cdef triple_cell* lookup(self, PyObject* key1, PyObject* key2, PyObject* key3)
cdef get(self, object k1, object k2, object k3)
cdef set(self, object k1, object k2, object k3, value)
cdef get(self, k1, k2, k3)
cdef int set(self, k1, k2, k3, value) except -1
cdef int resize(self) except -1
Loading

0 comments on commit 954f976

Please sign in to comment.