Skip to content

Commit

Permalink
Clean up sys.modules when the loader is GC'ed
Browse files Browse the repository at this point in the history
This is done in two stages, first, the module instance in `sys.modules` is set to `None`.
Then at a second stage, when creating a loader with the same namespace, it's actually removed from `sys.modules`.

The reason for this two stage cleanup procedure is because this function might get called during the GC collection cycle and trigger https://bugs.python.org/issue40327

We seem to specially trigger this during the CI test runs with `coverage.py` tracking
the code usage:

    Traceback (most recent call last):
      File "/urs/lib64/python3.6/site-packages/coverage/multiproc.py", line 37, in _bootstrap
        cov.start()
      File "/urs/lib64/python3.6/site-packages/coverage/control.py", line 527, in start
        self._init_for_start()
      File "/urs/lib64/python3.6/site-packages/coverage/control.py", line 455, in _init_for_start
        concurrency=concurrency,
      File "/urs/lib64/python3.6/site-packages/coverage/collector.py", line 111, in __init__
        self.origin = short_stack()
      File "/urs/lib64/python3.6/site-packages/coverage/debug.py", line 157, in short_stack
        stack = inspect.stack()[limit:skip:-1]
      File "/usr/lib64/python3.6/inspect.py", line 1501, in stack
        return getouterframes(sys._getframe(1), context)
      File "/usr/lib64/python3.6/inspect.py", line 1478, in getouterframes
        frameinfo = (frame,) + getframeinfo(frame, context)
      File "/usr/lib64/python3.6/inspect.py", line 1452, in getframeinfo
        lines, lnum = findsource(frame)
      File "/usr/lib64/python3.6/inspect.py", line 780, in findsource
        module = getmodule(object, file)
      File "/usr/lib64/python3.6/inspect.py", line 732, in getmodule
        for modname, module in list(sys.modules.items()):
    RuntimeError: dictionary changed size during iteration
  • Loading branch information
s0undt3ch committed Oct 7, 2020
1 parent c41bdc5 commit a6921d6
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 4 deletions.
67 changes: 63 additions & 4 deletions salt/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import time
import traceback
import types
import weakref
from collections.abc import MutableMapping
from zipimport import zipimporter

Expand Down Expand Up @@ -1129,6 +1130,51 @@ def _mod_type(module_path):
return "ext"


def _cleanup_module_namespace(loaded_base_name, delete_from_sys_modules=False):
"""
Clean module namespace.
If ``delete_from_sys_modules`` is ``False``, then the module instance in ``sys.modules``
will only be set to ``None``, when ``True``, it's actually ``del``elted.
The reason for this two stage cleanup procedure is because this function might
get called during the GC collection cycle and trigger https://bugs.python.org/issue40327
We seem to specially trigger this during the CI test runs with ``coverage.py`` tracking
the code usage:
Traceback (most recent call last):
File "/urs/lib64/python3.6/site-packages/coverage/multiproc.py", line 37, in _bootstrap
cov.start()
File "/urs/lib64/python3.6/site-packages/coverage/control.py", line 527, in start
self._init_for_start()
File "/urs/lib64/python3.6/site-packages/coverage/control.py", line 455, in _init_for_start
concurrency=concurrency,
File "/urs/lib64/python3.6/site-packages/coverage/collector.py", line 111, in __init__
self.origin = short_stack()
File "/urs/lib64/python3.6/site-packages/coverage/debug.py", line 157, in short_stack
stack = inspect.stack()[limit:skip:-1]
File "/usr/lib64/python3.6/inspect.py", line 1501, in stack
return getouterframes(sys._getframe(1), context)
File "/usr/lib64/python3.6/inspect.py", line 1478, in getouterframes
frameinfo = (frame,) + getframeinfo(frame, context)
File "/usr/lib64/python3.6/inspect.py", line 1452, in getframeinfo
lines, lnum = findsource(frame)
File "/usr/lib64/python3.6/inspect.py", line 780, in findsource
module = getmodule(object, file)
File "/usr/lib64/python3.6/inspect.py", line 732, in getmodule
for modname, module in list(sys.modules.items()):
RuntimeError: dictionary changed size during iteration
"""
for name in list(sys.modules):
if name.startswith(loaded_base_name):
if delete_from_sys_modules:
del sys.modules[name]
else:
mod = sys.modules[name]
sys.modules[name] = None
del mod


# TODO: move somewhere else?
class FilterDictWrapper(MutableMapping):
"""
Expand Down Expand Up @@ -1219,10 +1265,23 @@ def __init__(

self.module_dirs = module_dirs
self.tag = tag
self._gc_finalizer = None
if loaded_base_name:
self.loaded_base_name = loaded_base_name
else:
self.loaded_base_name = "{}_{}".format(LOADED_BASE_NAME, id(self))
# Remove any modules matching self.loaded_base_name that have been set to None previously
self.clean_modules()
# Make sure that, when this module is about to be GC'ed, we at least set any modules in
# sys.modules which match self.loaded_base_name to None to reduce memory usage over time.
# ATTENTION: Do not replace the '_cleanup_module_namespace' function on the call below with
# self.clean_modules as that WILL prevent this loader object from being garbage
# collected and the finalizer running.
self._gc_finalizer = weakref.finalize(
self, _cleanup_module_namespace, "{}".format(self.loaded_base_name)
)
# This finalizer does not need to run when the process is exiting
self._gc_finalizer.atexit = False
self.mod_type_check = mod_type_check or _mod_type

if "__context__" not in self.pack:
Expand Down Expand Up @@ -1282,10 +1341,10 @@ def clean_modules(self):
"""
Clean modules
"""
for name in list(sys.modules):
if name.startswith(self.loaded_base_name):
mod = sys.modules.pop(name)
del mod
if self._gc_finalizer is not None and self._gc_finalizer.alive:
# Prevent the weakref.finalizer instance from running, there's no point after the next call.
self._gc_finalizer.detach()
_cleanup_module_namespace(self.loaded_base_name, delete_from_sys_modules=True)

def __getitem__(self, item):
"""
Expand Down
90 changes: 90 additions & 0 deletions tests/unit/test_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import collections
import compileall
import copy
import gc
import imp
import inspect
import logging
Expand Down Expand Up @@ -1271,6 +1272,95 @@ def test_loader_globals(self):
assert func2.__globals__["__foo__"] == "bar2"


class LoaderCleanupTest(ModuleCase):
"""
Tests the loader cleanup procedures
"""

def setUp(self):
opts = salt.config.minion_config(None)
self.loader1 = salt.loader.LazyLoader(
salt.loader._module_dirs(copy.deepcopy(opts), "modules", "module"),
copy.deepcopy(opts),
pack={},
tag="module",
)

def tearDown(self):
del self.loader1

def test_loader_gc_cleanup(self):
loaded_base_name = self.loader1.loaded_base_name
for name in list(sys.modules):
if name.startswith(loaded_base_name):
break
else:
self.fail(
"Did not find any modules in sys.modules matching {!r}".format(
loaded_base_name
)
)

# Assert that the weakref.finalizer is alive
assert self.loader1._gc_finalizer.alive is True

gc.collect()
# Even after a gc.collect call, we should still have our module in sys.modules
for name in list(sys.modules):
if name.startswith(loaded_base_name):
if sys.modules[name] is None:
self.fail(
"Found at least one module in sys.modules matching {!r} prematurely set to None".format(
loaded_base_name
)
)
break
else:
self.fail(
"Did not find any modules in sys.modules matching {!r}".format(
loaded_base_name
)
)
# Should still be true because there's still at least one reference to self.loader1
assert self.loader1._gc_finalizer.alive is True

# Now we remove our refence to loader and trigger GC, thus triggering the loader weakref finalizer
self.loader1 = None
gc.collect()

for name in list(sys.modules):
if name.startswith(loaded_base_name):
if sys.modules[name] is not None:
self.fail(
"Found a real module reference in sys.modules matching {!r}.".format(
loaded_base_name,
)
)
break
else:
self.fail(
"Did not find any modules in sys.modules matching {!r}".format(
loaded_base_name
)
)

def test_loader_clean_modules(self):
loaded_base_name = self.loader1.loaded_base_name
self.loader1.clean_modules()

for name in list(sys.modules):
if name.startswith(loaded_base_name):
self.fail(
"Found a real module reference in sys.modules matching {!r}".format(
loaded_base_name
)
)
break

# Additionally, assert that the weakref.finalizer is now dead
assert self.loader1._gc_finalizer.alive is False


class LoaderGlobalsTest(ModuleCase):
"""
Test all of the globals that the loader is responsible for adding to modules
Expand Down

0 comments on commit a6921d6

Please sign in to comment.