Skip to content

Commit

Permalink
Add a lot more validation and fix a couple of lingering bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
markshannon committed Aug 12, 2023
1 parent af24cd7 commit 97f762a
Showing 1 changed file with 76 additions and 28 deletions.
104 changes: 76 additions & 28 deletions Modules/gcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,8 @@ gc_list_merge(PyGC_Head *from, PyGC_Head *to)
PyGC_Head *from_tail = GC_PREV(from);
assert(from_head != from);
assert(from_tail != from);
assert(gc_list_is_empty(to) ||
gc_old_space(to_tail) == gc_old_space(from_tail));

_PyGCHead_SET_NEXT(to_tail, from_head);
_PyGCHead_SET_PREV(from_head, to_tail);
Expand Down Expand Up @@ -413,8 +415,8 @@ enum flagstates {collecting_clear_unreachable_clear,
static void
validate_list(PyGC_Head *head, enum flagstates flags)
{
assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0);
assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0);
uintptr_t prev_value = 0, next_value = 0;
switch (flags) {
case collecting_clear_unreachable_clear:
Expand Down Expand Up @@ -488,9 +490,37 @@ validate_list_is_aging(PyGC_Head *head)
assert(prev == GC_PREV(head));
}

static void
validate_consistent_old_space(PyGC_Head *head)
{
PyGC_Head *prev = head;
PyGC_Head *gc = GC_NEXT(head);
if (gc == head) {
return;
}
uintptr_t old_space = gc_old_space(gc);
while (gc != head) {
PyGC_Head *truenext = GC_NEXT(gc);
assert(truenext != NULL);
assert(gc_old_space(gc) == old_space);
prev = gc;
gc = truenext;
}
assert(prev == GC_PREV(head));
}

static void
validate_list_header(PyGC_Head *head)
{
assert(GC_PREV(head) == (PyGC_Head *)head->_gc_prev);
assert(GC_NEXT(head) == (PyGC_Head *)head->_gc_next);
}

#else
#define validate_old(g) do{}while(0)
#define validate_list_header(g) do{}while(0)
#define validate_list_is_aging(l) do{}while(0)
#define validate_consistent_old_space(l) do{}while(0)
#endif

/*** end of list stuff ***/
Expand Down Expand Up @@ -618,7 +648,7 @@ visit_reachable(PyObject *op, PyGC_Head *reachable)
prev->_gc_next & NEXT_MASK_UNREACHABLE);
_PyObject_ASSERT(FROM_GC(next),
next->_gc_next & NEXT_MASK_UNREACHABLE);
prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE
prev->_gc_next = gc->_gc_next; // copy flag bits
gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
_PyGCHead_SET_PREV(next, prev);

Expand Down Expand Up @@ -670,6 +700,9 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
* or to the right have been scanned yet.
*/

validate_consistent_old_space(young);
/* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */
uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1);
while (gc != young) {
if (gc_get_refs(gc)) {
/* gc is definitely reachable from outside the
Expand Down Expand Up @@ -715,15 +748,16 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
// But this may pollute the unreachable list head's 'next' pointer
// too. That's semantically senseless but expedient here - the
// damage is repaired when this function ends.
last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
last->_gc_next = flags | (uintptr_t)gc;
_PyGCHead_SET_PREV(gc, last);
gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
gc->_gc_next = flags | (uintptr_t)unreachable;
unreachable->_gc_prev = (uintptr_t)gc;
}
gc = _PyGCHead_NEXT(prev);
}
// young->_gc_prev must be last element remained in the list.
young->_gc_prev = (uintptr_t)prev;
young->_gc_next &= _PyGC_PREV_MASK;
// don't let the pollution of the list head's next pointer leak
unreachable->_gc_next &= _PyGC_PREV_MASK;
}
Expand Down Expand Up @@ -782,8 +816,8 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
PyObject *op = FROM_GC(gc);

_PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
gc->_gc_next &= _PyGC_PREV_MASK;
next = (PyGC_Head*)gc->_gc_next;
next = GC_NEXT(gc);
gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;

if (has_legacy_finalizer(op)) {
gc_clear_collecting(gc);
Expand All @@ -802,8 +836,8 @@ clear_unreachable_mask(PyGC_Head *unreachable)
assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
_PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
gc->_gc_next &= _PyGC_PREV_MASK;
next = (PyGC_Head*)gc->_gc_next;
next = GC_NEXT(gc);
gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
}
validate_list(unreachable, collecting_set_unreachable_clear);
}
Expand Down Expand Up @@ -1181,8 +1215,11 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
* refcount greater than 0 when all the references within the
* set are taken into account).
*/
validate_list_header(base);
update_refs(base); // gc_prev is used for gc_refs
validate_list_header(base);
subtract_refs(base);
validate_list_header(base);

/* Leave everything reachable from outside base in base, and move
* everything else (in base) to unreachable.
Expand Down Expand Up @@ -1219,7 +1256,6 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
* the reachable objects instead. But this is a one-time cost, probably not
* worth complicating the code to speed just a little.
*/
gc_list_init(unreachable);
move_unreachable(base, unreachable); // gc_prev is pointer again
validate_list(base, collecting_clear_unreachable_clear);
validate_list(unreachable, collecting_set_unreachable_set);
Expand All @@ -1244,13 +1280,16 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
{
// Remove the PREV_MASK_COLLECTING from unreachable
// to prepare it for a new call to 'deduce_unreachable'
validate_list_header(unreachable);
gc_list_clear_collecting(unreachable);

// After the call to deduce_unreachable, the 'still_unreachable' set will
// have the PREV_MARK_COLLECTING set, but the objects are going to be
// removed so we can skip the expense of clearing the flag.
PyGC_Head* resurrected = unreachable;
validate_list_header(resurrected);
deduce_unreachable(resurrected, still_unreachable);
validate_list_header(resurrected);
clear_unreachable_mask(still_unreachable);

// Move the resurrected objects to the old generation for future collection.
Expand Down Expand Up @@ -1437,7 +1476,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
PyGC_Head increment;
gc_list_init(&increment);
Py_ssize_t work_to_do = -gcstate->incremental_gc_progress;
Py_ssize_t region_size = 0;
validate_old(gcstate);
if (gc_list_is_empty(oldest)) {
if (gc_list_is_empty(aging)) {
Expand All @@ -1453,6 +1491,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
gcstate->aging_space = flip_old_space(gcstate->aging_space);
}
validate_old(gcstate);
Py_ssize_t region_size = 0;
while (region_size < work_to_do) {
if (gc_list_is_empty(oldest)) {
gcstate->incremental_gc_progress = 0;
Expand All @@ -1465,17 +1504,12 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
}
validate_old(gcstate);
validate_list_is_aging(&increment);
#ifdef Py_STATS
{
Py_ssize_t count = 0;
PyGC_Head *gc;
for (gc = GC_NEXT(&increment); gc != &increment; gc = GC_NEXT(gc)) {
count++;
}
GC_STAT_ADD(NUM_GENERATIONS, objects_queued, count);
}
#endif
gc_collect_region(tstate, &increment, aging, 0, stats);
GC_STAT_ADD(1, objects_queued, region_size);
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, &increment, &survivors, 0, stats);
validate_list_is_aging(&survivors);
gc_list_merge(&survivors, aging);
validate_old(gcstate);
assert(gc_list_is_empty(&increment));
gcstate->incremental_gc_progress += region_size;
Expand Down Expand Up @@ -1525,8 +1559,6 @@ gc_collect_region(PyThreadState *tstate,
int untrack,
struct gc_collection_stats *stats)
{
Py_ssize_t m = 0; /* # objects collected */
Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
PyGC_Head unreachable; /* non-problematic unreachable trash */
PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
PyGC_Head *gc; /* initialize to prevent a compiler warning */
Expand All @@ -1537,17 +1569,23 @@ gc_collect_region(PyThreadState *tstate,

validate_list(from, collecting_clear_unreachable_clear);
validate_list(to, collecting_clear_unreachable_clear);
validate_consistent_old_space(from);
validate_consistent_old_space(to);

gc_list_init(&unreachable);
deduce_unreachable(from, &unreachable);
validate_consistent_old_space(from);
if (untrack & UNTRACK_TUPLES) {
untrack_tuples(from);
}
if (untrack & UNTRACK_DICTS) {
untrack_dicts(from);
}
validate_consistent_old_space(to);
if (from != to) {
gc_list_merge(from, to);
}
validate_consistent_old_space(to);
validate_old(gcstate);
/* Move reachable objects to next generation. */

Expand All @@ -1558,11 +1596,14 @@ gc_collect_region(PyThreadState *tstate,
// NEXT_MASK_UNREACHABLE is cleared here.
// After move_legacy_finalizers(), unreachable is normal list.
move_legacy_finalizers(&unreachable, &finalizers);
validate_consistent_old_space(&unreachable);
validate_consistent_old_space(&finalizers);
/* finalizers contains the unreachable objects with a legacy finalizer;
* unreachable objects reachable *from* those are also uncollectable,
* and we move those into the finalizers list too.
*/
move_legacy_finalizer_reachable(&finalizers);
validate_consistent_old_space(&finalizers);

validate_list(&finalizers, collecting_clear_unreachable_clear);
validate_list(&unreachable, collecting_set_unreachable_clear);
Expand All @@ -1575,44 +1616,51 @@ gc_collect_region(PyThreadState *tstate,
}

/* Clear weakrefs and invoke callbacks as necessary. */
m += handle_weakrefs(&unreachable, to);
stats->collected += handle_weakrefs(&unreachable, to);
validate_consistent_old_space(to);

validate_list(to, collecting_clear_unreachable_clear);
validate_list(&unreachable, collecting_set_unreachable_clear);

/* Call tp_finalize on objects which have one. */
finalize_garbage(tstate, &unreachable);
validate_consistent_old_space(&unreachable);

/* Handle any objects that may have resurrected after the call
* to 'finalize_garbage' and continue the collection with the
* objects that are still unreachable */
PyGC_Head final_unreachable;
gc_list_init(&final_unreachable);
validate_consistent_old_space(to);
handle_resurrected_objects(&unreachable, &final_unreachable, to);
validate_consistent_old_space(to);

/* Call tp_clear on objects in the final_unreachable set. This will cause
* the reference cycles to be broken. It may also cause some objects
* in finalizers to be freed.
*/
m += gc_list_size(&final_unreachable);
stats->collected += gc_list_size(&final_unreachable);
delete_garbage(tstate, gcstate, &final_unreachable, to);
validate_consistent_old_space(to);

/* Collect statistics on uncollectable objects found and print
* debugging information. */
Py_ssize_t n = 0;
for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
n++;
if (gcstate->debug & DEBUG_UNCOLLECTABLE)
debug_cycle("uncollectable", FROM_GC(gc));
}

stats->uncollectable = n;
/* Append instances in the uncollectable set to a Python
* reachable list of garbage. The programmer has to deal with
* this if they insist on creating this type of structure.
*/
handle_legacy_finalizers(tstate, gcstate, &finalizers, to);
validate_list(to, collecting_clear_unreachable_clear);
validate_consistent_old_space(to);

stats->collected = m;
stats->uncollectable = n;
validate_old(gcstate);
}

Expand Down

0 comments on commit 97f762a

Please sign in to comment.