From dbf311a021d80aecb38033a9f44fcf13be9e7a6f Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Wed, 18 Sep 2024 13:52:19 -0500 Subject: [PATCH] Greater safety and fewer assumptions doing cross-thread cleanup. --- make-manylinux | 2 +- src/greenlet/CObjects.cpp | 2 +- src/greenlet/PyGreenlet.cpp | 2 +- src/greenlet/PyGreenlet.hpp | 2 +- src/greenlet/PyGreenletUnswitchable.cpp | 2 +- src/greenlet/PyModule.cpp | 2 +- src/greenlet/TGreenlet.cpp | 2 +- src/greenlet/TGreenletGlobals.cpp | 2 +- src/greenlet/TMainGreenlet.cpp | 2 +- ...nlet_thread_state.hpp => TThreadState.hpp} | 86 +-- src/greenlet/TThreadStateCreator.hpp | 499 +----------------- src/greenlet/TThreadStateDestroy.cpp | 212 +++++--- src/greenlet/TUserGreenlet.cpp | 2 +- src/greenlet/greenlet.cpp | 8 +- src/greenlet/greenlet_internal.hpp | 4 + 15 files changed, 158 insertions(+), 671 deletions(-) rename src/greenlet/{greenlet_thread_state.hpp => TThreadState.hpp} (87%) diff --git a/make-manylinux b/make-manylinux index 787a98b1..677e4898 100755 --- a/make-manylinux +++ b/make-manylinux @@ -45,7 +45,7 @@ if [ -d /greenlet -a -d /opt/python ]; then python -mpip install -U pip python -mpip install -U setuptools wheel - python -mpip wheel --wheel-dir ./dist . + python -mpip wheel -v --wheel-dir ./dist . python -mpip install -U .[test] python -m unittest discover -v greenlet.tests PATH="$OPATH" auditwheel repair dist/greenlet*.whl diff --git a/src/greenlet/CObjects.cpp b/src/greenlet/CObjects.cpp index a2bce441..c135995b 100644 --- a/src/greenlet/CObjects.cpp +++ b/src/greenlet/CObjects.cpp @@ -15,7 +15,7 @@ #include "greenlet_internal.hpp" #include "greenlet_refs.hpp" -#include "greenlet_thread_state.hpp" + #include "TThreadStateDestroy.cpp" #include "PyGreenlet.hpp" diff --git a/src/greenlet/PyGreenlet.cpp b/src/greenlet/PyGreenlet.cpp index 147a2312..29c0bba0 100644 --- a/src/greenlet/PyGreenlet.cpp +++ b/src/greenlet/PyGreenlet.cpp @@ -20,7 +20,7 @@ The Python slot functions for TGreenlet. #include "greenlet_refs.hpp" #include "greenlet_slp_switch.hpp" -#include "greenlet_thread_state.hpp" + #include "greenlet_thread_support.hpp" #include "TGreenlet.hpp" diff --git a/src/greenlet/PyGreenlet.hpp b/src/greenlet/PyGreenlet.hpp index d56f339f..df6cd805 100644 --- a/src/greenlet/PyGreenlet.hpp +++ b/src/greenlet/PyGreenlet.hpp @@ -5,7 +5,7 @@ #include "greenlet.h" #include "greenlet_compiler_compat.hpp" #include "greenlet_refs.hpp" -#include "greenlet_thread_state.hpp" + using greenlet::refs::OwnedGreenlet; using greenlet::refs::BorrowedGreenlet; diff --git a/src/greenlet/PyGreenletUnswitchable.cpp b/src/greenlet/PyGreenletUnswitchable.cpp index cf14b02a..1b768ee3 100644 --- a/src/greenlet/PyGreenletUnswitchable.cpp +++ b/src/greenlet/PyGreenletUnswitchable.cpp @@ -17,7 +17,7 @@ // as well. #include "greenlet_refs.hpp" #include "greenlet_slp_switch.hpp" -#include "greenlet_thread_state.hpp" + #include "greenlet_thread_support.hpp" #include "TGreenlet.hpp" diff --git a/src/greenlet/PyModule.cpp b/src/greenlet/PyModule.cpp index 6a236dfe..6adcb5c3 100644 --- a/src/greenlet/PyModule.cpp +++ b/src/greenlet/PyModule.cpp @@ -3,7 +3,7 @@ #include "greenlet_internal.hpp" -#include "greenlet_thread_state.hpp" + #include "TGreenletGlobals.cpp" #include "TMainGreenlet.cpp" #include "TThreadStateDestroy.cpp" diff --git a/src/greenlet/TGreenlet.cpp b/src/greenlet/TGreenlet.cpp index a9e56b6e..4698a178 100644 --- a/src/greenlet/TGreenlet.cpp +++ b/src/greenlet/TGreenlet.cpp @@ -13,7 +13,7 @@ #define TGREENLET_CPP #include "greenlet_internal.hpp" #include "TGreenlet.hpp" -#include "greenlet_thread_state.hpp" + #include "TGreenletGlobals.cpp" #include "TThreadStateDestroy.cpp" diff --git a/src/greenlet/TGreenletGlobals.cpp b/src/greenlet/TGreenletGlobals.cpp index c71c963b..0087d2ff 100644 --- a/src/greenlet/TGreenletGlobals.cpp +++ b/src/greenlet/TGreenletGlobals.cpp @@ -15,7 +15,7 @@ #include "greenlet_refs.hpp" #include "greenlet_exceptions.hpp" #include "greenlet_thread_support.hpp" -#include "greenlet_thread_state.hpp" +#include "greenlet_internal.hpp" namespace greenlet { diff --git a/src/greenlet/TMainGreenlet.cpp b/src/greenlet/TMainGreenlet.cpp index 25f33ddc..a2a9cfe4 100644 --- a/src/greenlet/TMainGreenlet.cpp +++ b/src/greenlet/TMainGreenlet.cpp @@ -13,7 +13,7 @@ #define T_MAIN_GREENLET_CPP #include "TGreenlet.hpp" -#include "greenlet_thread_state.hpp" + // Protected by the GIL. Incremented when we create a main greenlet, diff --git a/src/greenlet/greenlet_thread_state.hpp b/src/greenlet/TThreadState.hpp similarity index 87% rename from src/greenlet/greenlet_thread_state.hpp rename to src/greenlet/TThreadState.hpp index 39cc0d00..e4e6f6cb 100644 --- a/src/greenlet/greenlet_thread_state.hpp +++ b/src/greenlet/TThreadState.hpp @@ -214,14 +214,14 @@ class ThreadState { return 0; } - inline BorrowedMainGreenlet borrow_main_greenlet() const + inline BorrowedMainGreenlet borrow_main_greenlet() const noexcept { assert(this->main_greenlet); assert(this->main_greenlet.REFCNT() >= 2); return this->main_greenlet; }; - inline OwnedMainGreenlet get_main_greenlet() + inline OwnedMainGreenlet get_main_greenlet() const noexcept { return this->main_greenlet; } @@ -488,91 +488,9 @@ ImmortalString ThreadState::get_referrers_name(nullptr); PythonAllocator ThreadState::allocator; std::clock_t ThreadState::_clocks_used_doing_gc(0); -template -class ThreadStateCreator -{ -private: - // Initialized to 1, and, if still 1, created on access. - // Set to 0 on destruction. - ThreadState* _state; - G_NO_COPIES_OF_CLS(ThreadStateCreator); - - inline bool has_initialized_state() const - { - return this->_state != (ThreadState*)1; - } - - inline bool has_state() const - { - return this->has_initialized_state() && this->_state != nullptr; - } - -public: - // Only one of these, auto created per thread. - // Constructing the state constructs the MainGreenlet. - ThreadStateCreator() : - _state((ThreadState*)1) - { - } - - ~ThreadStateCreator() - { - ThreadState* tmp = this->_state; - this->_state = nullptr; - if (tmp && tmp != (ThreadState*)1) { - Destructor x(tmp); - } - } - - inline ThreadState& state() - { - // The main greenlet will own this pointer when it is created, - // which will be right after this. The plan is to give every - // greenlet a pointer to the main greenlet for the thread it - // runs in; if we are doing something cross-thread, we need to - // access the pointer from the main greenlet. Deleting the - // thread, and hence the thread-local storage, will delete the - // state pointer in the main greenlet. - if (!this->has_initialized_state()) { - // XXX: Assuming allocation never fails - this->_state = new ThreadState; - // For non-standard threading, we need to store an object - // in the Python thread state dictionary so that it can be - // DECREF'd when the thread ends (ideally; the dict could - // last longer) and clean this object up. - } - if (!this->_state) { - throw std::runtime_error("Accessing state after destruction."); - } - return *this->_state; - } - - operator ThreadState&() - { - return this->state(); - } - - operator ThreadState*() - { - return &this->state(); - } - - inline int tp_traverse(visitproc visit, void* arg) - { - if (this->has_state()) { - return this->_state->tp_traverse(visit, arg); - } - return 0; - } - -}; -// We can't use the PythonAllocator for this, because we push to it -// from the thread state destructor, which doesn't have the GIL, -// and Python's allocators can only be called with the GIL. -typedef std::vector cleanup_queue_t; }; // namespace greenlet diff --git a/src/greenlet/TThreadStateCreator.hpp b/src/greenlet/TThreadStateCreator.hpp index 39cc0d00..2ec7ab55 100644 --- a/src/greenlet/TThreadStateCreator.hpp +++ b/src/greenlet/TThreadStateCreator.hpp @@ -1,5 +1,5 @@ -#ifndef GREENLET_THREAD_STATE_HPP -#define GREENLET_THREAD_STATE_HPP +#ifndef GREENLET_THREAD_STATE_CREATOR_HPP +#define GREENLET_THREAD_STATE_CREATOR_HPP #include #include @@ -8,487 +8,14 @@ #include "greenlet_refs.hpp" #include "greenlet_thread_support.hpp" -using greenlet::refs::BorrowedObject; -using greenlet::refs::BorrowedGreenlet; -using greenlet::refs::BorrowedMainGreenlet; -using greenlet::refs::OwnedMainGreenlet; -using greenlet::refs::OwnedObject; -using greenlet::refs::OwnedGreenlet; -using greenlet::refs::OwnedList; -using greenlet::refs::PyErrFetchParam; -using greenlet::refs::PyArgParseParam; -using greenlet::refs::ImmortalString; -using greenlet::refs::CreatedModule; -using greenlet::refs::PyErrPieces; -using greenlet::refs::NewReference; +#include "TThreadState.hpp" namespace greenlet { -/** - * Thread-local state of greenlets. - * - * Each native thread will get exactly one of these objects, - * automatically accessed through the best available thread-local - * mechanism the compiler supports (``thread_local`` for C++11 - * compilers or ``__thread``/``declspec(thread)`` for older GCC/clang - * or MSVC, respectively.) - * - * Previously, we kept thread-local state mostly in a bunch of - * ``static volatile`` variables in the main greenlet file.. This had - * the problem of requiring extra checks, loops, and great care - * accessing these variables if we potentially invoked any Python code - * that could release the GIL, because the state could change out from - * under us. Making the variables thread-local solves this problem. - * - * When we detected that a greenlet API accessing the current greenlet - * was invoked from a different thread than the greenlet belonged to, - * we stored a reference to the greenlet in the Python thread - * dictionary for the thread the greenlet belonged to. This could lead - * to memory leaks if the thread then exited (because of a reference - * cycle, as greenlets referred to the thread dictionary, and deleting - * non-current greenlets leaked their frame plus perhaps arguments on - * the C stack). If a thread exited while still having running - * greenlet objects (perhaps that had just switched back to the main - * greenlet), and did not invoke one of the greenlet APIs *in that - * thread, immediately before it exited, without some other thread - * then being invoked*, such a leak was guaranteed. - * - * This can be partly solved by using compiler thread-local variables - * instead of the Python thread dictionary, thus avoiding a cycle. - * - * To fully solve this problem, we need a reliable way to know that a - * thread is done and we should clean up the main greenlet. On POSIX, - * we can use the destructor function of ``pthread_key_create``, but - * there's nothing similar on Windows; a C++11 thread local object - * reliably invokes its destructor when the thread it belongs to exits - * (non-C++11 compilers offer ``__thread`` or ``declspec(thread)`` to - * create thread-local variables, but they can't hold C++ objects that - * invoke destructors; the C++11 version is the most portable solution - * I found). When the thread exits, we can drop references and - * otherwise manipulate greenlets and frames that we know can no - * longer be switched to. For compilers that don't support C++11 - * thread locals, we have a solution that uses the python thread - * dictionary, though it may not collect everything as promptly as - * other compilers do, if some other library is using the thread - * dictionary and has a cycle or extra reference. - * - * There are two small wrinkles. The first is that when the thread - * exits, it is too late to actually invoke Python APIs: the Python - * thread state is gone, and the GIL is released. To solve *this* - * problem, our destructor uses ``Py_AddPendingCall`` to transfer the - * destruction work to the main thread. (This is not an issue for the - * dictionary solution.) - * - * The second is that once the thread exits, the thread local object - * is invalid and we can't even access a pointer to it, so we can't - * pass it to ``Py_AddPendingCall``. This is handled by actually using - * a second object that's thread local (ThreadStateCreator) and having - * it dynamically allocate this object so it can live until the - * pending call runs. - */ +typedef void (*ThreadStateDestructor)(ThreadState* const); -class ThreadState { -private: - // As of commit 08ad1dd7012b101db953f492e0021fb08634afad - // this class needed 56 bytes in o Py_DEBUG build - // on 64-bit macOS 11. - // Adding the vector takes us up to 80 bytes () - - /* Strong reference to the main greenlet */ - OwnedMainGreenlet main_greenlet; - - /* Strong reference to the current greenlet. */ - OwnedGreenlet current_greenlet; - - /* Strong reference to the trace function, if any. */ - OwnedObject tracefunc; - - typedef std::vector > deleteme_t; - /* A vector of raw PyGreenlet pointers representing things that need - deleted when this thread is running. The vector owns the - references, but you need to manually INCREF/DECREF as you use - them. We don't use a vector because we - make copy of this vector, and that would become O(n) as all the - refcounts are incremented in the copy. - */ - deleteme_t deleteme; - -#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED - void* exception_state; -#endif - - static std::clock_t _clocks_used_doing_gc; - static ImmortalString get_referrers_name; - static PythonAllocator allocator; - - G_NO_COPIES_OF_CLS(ThreadState); - - - // Allocates a main greenlet for the thread state. If this fails, - // exits the process. Called only during constructing a ThreadState. - MainGreenlet* alloc_main() - { - PyGreenlet* gmain; - - /* create the main greenlet for this thread */ - gmain = reinterpret_cast(PyType_GenericAlloc(&PyGreenlet_Type, 0)); - if (gmain == NULL) { - throw PyFatalError("alloc_main failed to alloc"); //exits the process - } - - MainGreenlet* const main = new MainGreenlet(gmain, this); - - assert(Py_REFCNT(gmain) == 1); - assert(gmain->pimpl == main); - return main; - } - - -public: - static void* operator new(size_t UNUSED(count)) - { - return ThreadState::allocator.allocate(1); - } - - static void operator delete(void* ptr) - { - return ThreadState::allocator.deallocate(static_cast(ptr), - 1); - } - - static void init() - { - ThreadState::get_referrers_name = "get_referrers"; - ThreadState::_clocks_used_doing_gc = 0; - } - - ThreadState() - { - -#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED - this->exception_state = slp_get_exception_state(); -#endif - - // XXX: Potentially dangerous, exposing a not fully - // constructed object. - MainGreenlet* const main = this->alloc_main(); - this->main_greenlet = OwnedMainGreenlet::consuming( - main->self() - ); - assert(this->main_greenlet); - this->current_greenlet = main->self(); - // The main greenlet starts with 1 refs: The returned one. We - // then copied it to the current greenlet. - assert(this->main_greenlet.REFCNT() == 2); - } - - inline void restore_exception_state() - { -#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED - // It's probably important this be inlined and only call C - // functions to avoid adding an SEH frame. - slp_set_exception_state(this->exception_state); -#endif - } - - inline bool has_main_greenlet() const noexcept - { - return bool(this->main_greenlet); - } - - // Called from the ThreadStateCreator when we're in non-standard - // threading mode. In that case, there is an object in the Python - // thread state dictionary that points to us. The main greenlet - // also traverses into us, in which case it's crucial not to - // traverse back into the main greenlet. - int tp_traverse(visitproc visit, void* arg, bool traverse_main=true) - { - if (traverse_main) { - Py_VISIT(main_greenlet.borrow_o()); - } - if (traverse_main || current_greenlet != main_greenlet) { - Py_VISIT(current_greenlet.borrow_o()); - } - Py_VISIT(tracefunc.borrow()); - return 0; - } - - inline BorrowedMainGreenlet borrow_main_greenlet() const - { - assert(this->main_greenlet); - assert(this->main_greenlet.REFCNT() >= 2); - return this->main_greenlet; - }; - - inline OwnedMainGreenlet get_main_greenlet() - { - return this->main_greenlet; - } - - /** - * In addition to returning a new reference to the currunt - * greenlet, this performs any maintenance needed. - */ - inline OwnedGreenlet get_current() - { - /* green_dealloc() cannot delete greenlets from other threads, so - it stores them in the thread dict; delete them now. */ - this->clear_deleteme_list(); - //assert(this->current_greenlet->main_greenlet == this->main_greenlet); - //assert(this->main_greenlet->main_greenlet == this->main_greenlet); - return this->current_greenlet; - } - - /** - * As for non-const get_current(); - */ - inline BorrowedGreenlet borrow_current() - { - this->clear_deleteme_list(); - return this->current_greenlet; - } - - /** - * Does no maintenance. - */ - inline OwnedGreenlet get_current() const - { - return this->current_greenlet; - } - - template - inline bool is_current(const refs::PyObjectPointer& obj) const - { - return this->current_greenlet.borrow_o() == obj.borrow_o(); - } - - inline void set_current(const OwnedGreenlet& target) - { - this->current_greenlet = target; - } - -private: - /** - * Deref and remove the greenlets from the deleteme list. Must be - * holding the GIL. - * - * If *murder* is true, then we must be called from a different - * thread than the one that these greenlets were running in. - * In that case, if the greenlet was actually running, we destroy - * the frame reference and otherwise make it appear dead before - * proceeding; otherwise, we would try (and fail) to raise an - * exception in it and wind up right back in this list. - */ - inline void clear_deleteme_list(const bool murder=false) - { - if (!this->deleteme.empty()) { - // It's possible we could add items to this list while - // running Python code if there's a thread switch, so we - // need to defensively copy it before that can happen. - deleteme_t copy = this->deleteme; - this->deleteme.clear(); // in case things come back on the list - for(deleteme_t::iterator it = copy.begin(), end = copy.end(); - it != end; - ++it ) { - PyGreenlet* to_del = *it; - if (murder) { - // Force each greenlet to appear dead; we can't raise an - // exception into it anymore anyway. - to_del->pimpl->murder_in_place(); - } - - // The only reference to these greenlets should be in - // this list, decreffing them should let them be - // deleted again, triggering calls to green_dealloc() - // in the correct thread (if we're not murdering). - // This may run arbitrary Python code and switch - // threads or greenlets! - Py_DECREF(to_del); - if (PyErr_Occurred()) { - PyErr_WriteUnraisable(nullptr); - PyErr_Clear(); - } - } - } - } - -public: - - /** - * Returns a new reference, or a false object. - */ - inline OwnedObject get_tracefunc() const - { - return tracefunc; - }; - - - inline void set_tracefunc(BorrowedObject tracefunc) - { - assert(tracefunc); - if (tracefunc == BorrowedObject(Py_None)) { - this->tracefunc.CLEAR(); - } - else { - this->tracefunc = tracefunc; - } - } - - /** - * Given a reference to a greenlet that some other thread - * attempted to delete (has a refcount of 0) store it for later - * deletion when the thread this state belongs to is current. - */ - inline void delete_when_thread_running(PyGreenlet* to_del) - { - Py_INCREF(to_del); - this->deleteme.push_back(to_del); - } - - /** - * Set to std::clock_t(-1) to disable. - */ - inline static std::clock_t& clocks_used_doing_gc() - { - return ThreadState::_clocks_used_doing_gc; - } - - ~ThreadState() - { - if (!PyInterpreterState_Head()) { - // We shouldn't get here (our callers protect us) - // but if we do, all we can do is bail early. - return; - } - - // We should not have an "origin" greenlet; that only exists - // for the temporary time during a switch, which should not - // be in progress as the thread dies. - //assert(!this->switching_state.origin); - - this->tracefunc.CLEAR(); - - // Forcibly GC as much as we can. - this->clear_deleteme_list(true); - - // The pending call did this. - assert(this->main_greenlet->thread_state() == nullptr); - - // If the main greenlet is the current greenlet, - // then we "fell off the end" and the thread died. - // It's possible that there is some other greenlet that - // switched to us, leaving a reference to the main greenlet - // on the stack, somewhere uncollectible. Try to detect that. - if (this->current_greenlet == this->main_greenlet && this->current_greenlet) { - assert(this->current_greenlet->is_currently_running_in_some_thread()); - // Drop one reference we hold. - this->current_greenlet.CLEAR(); - assert(!this->current_greenlet); - // Only our reference to the main greenlet should be left, - // But hold onto the pointer in case we need to do extra cleanup. - PyGreenlet* old_main_greenlet = this->main_greenlet.borrow(); - Py_ssize_t cnt = this->main_greenlet.REFCNT(); - this->main_greenlet.CLEAR(); - if (ThreadState::_clocks_used_doing_gc != std::clock_t(-1) - && cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) { - // Highly likely that the reference is somewhere on - // the stack, not reachable by GC. Verify. - // XXX: This is O(n) in the total number of objects. - // TODO: Add a way to disable this at runtime, and - // another way to report on it. - std::clock_t begin = std::clock(); - NewReference gc(PyImport_ImportModule("gc")); - if (gc) { - OwnedObject get_referrers = gc.PyRequireAttr(ThreadState::get_referrers_name); - OwnedList refs(get_referrers.PyCall(old_main_greenlet)); - if (refs && refs.empty()) { - assert(refs.REFCNT() == 1); - // We found nothing! So we left a dangling - // reference: Probably the last thing some - // other greenlet did was call - // 'getcurrent().parent.switch()' to switch - // back to us. Clean it up. This will be the - // case on CPython 3.7 and newer, as they use - // an internal calling conversion that avoids - // creating method objects and storing them on - // the stack. - Py_DECREF(old_main_greenlet); - } - else if (refs - && refs.size() == 1 - && PyCFunction_Check(refs.at(0)) - && Py_REFCNT(refs.at(0)) == 2) { - assert(refs.REFCNT() == 1); - // Ok, we found a C method that refers to the - // main greenlet, and its only referenced - // twice, once in the list we just created, - // once from...somewhere else. If we can't - // find where else, then this is a leak. - // This happens in older versions of CPython - // that create a bound method object somewhere - // on the stack that we'll never get back to. - if (PyCFunction_GetFunction(refs.at(0).borrow()) == (PyCFunction)green_switch) { - BorrowedObject function_w = refs.at(0); - refs.clear(); // destroy the reference - // from the list. - // back to one reference. Can *it* be - // found? - assert(function_w.REFCNT() == 1); - refs = get_referrers.PyCall(function_w); - if (refs && refs.empty()) { - // Nope, it can't be found so it won't - // ever be GC'd. Drop it. - Py_CLEAR(function_w); - } - } - } - std::clock_t end = std::clock(); - ThreadState::_clocks_used_doing_gc += (end - begin); - } - } - } - - // We need to make sure this greenlet appears to be dead, - // because otherwise deallocing it would fail to raise an - // exception in it (the thread is dead) and put it back in our - // deleteme list. - if (this->current_greenlet) { - this->current_greenlet->murder_in_place(); - this->current_greenlet.CLEAR(); - } - - if (this->main_greenlet) { - // Couldn't have been the main greenlet that was running - // when the thread exited (because we already cleared this - // pointer if it was). This shouldn't be possible? - - // If the main greenlet was current when the thread died (it - // should be, right?) then we cleared its self pointer above - // when we cleared the current greenlet's main greenlet pointer. - // assert(this->main_greenlet->main_greenlet == this->main_greenlet - // || !this->main_greenlet->main_greenlet); - // // self reference, probably gone - // this->main_greenlet->main_greenlet.CLEAR(); - - // This will actually go away when the ivar is destructed. - this->main_greenlet.CLEAR(); - } - - if (PyErr_Occurred()) { - PyErr_WriteUnraisable(NULL); - PyErr_Clear(); - } - - } - -}; - -ImmortalString ThreadState::get_referrers_name(nullptr); -PythonAllocator ThreadState::allocator; -std::clock_t ThreadState::_clocks_used_doing_gc(0); - -template +template class ThreadStateCreator { private: @@ -497,12 +24,12 @@ class ThreadStateCreator ThreadState* _state; G_NO_COPIES_OF_CLS(ThreadStateCreator); - inline bool has_initialized_state() const + inline bool has_initialized_state() const noexcept { return this->_state != (ThreadState*)1; } - inline bool has_state() const + inline bool has_state() const noexcept { return this->has_initialized_state() && this->_state != nullptr; } @@ -518,11 +45,11 @@ class ThreadStateCreator ~ThreadStateCreator() { - ThreadState* tmp = this->_state; - this->_state = nullptr; - if (tmp && tmp != (ThreadState*)1) { - Destructor x(tmp); + if (this->has_state()) { + Destructor(this->_state); } + + this->_state = nullptr; } inline ThreadState& state() @@ -569,10 +96,6 @@ class ThreadStateCreator }; -// We can't use the PythonAllocator for this, because we push to it -// from the thread state destructor, which doesn't have the GIL, -// and Python's allocators can only be called with the GIL. -typedef std::vector cleanup_queue_t; }; // namespace greenlet diff --git a/src/greenlet/TThreadStateDestroy.cpp b/src/greenlet/TThreadStateDestroy.cpp index e5a9661a..9c4f2e03 100644 --- a/src/greenlet/TThreadStateDestroy.cpp +++ b/src/greenlet/TThreadStateDestroy.cpp @@ -13,24 +13,141 @@ #define T_THREADSTATE_DESTROY #include "TGreenlet.hpp" -#include "greenlet_thread_state.hpp" + #include "greenlet_thread_support.hpp" #include "greenlet_cpython_add_pending.hpp" #include "TGreenletGlobals.cpp" +#include "TThreadState.hpp" +#include "TThreadStateCreator.hpp" namespace greenlet { -struct ThreadState_DestroyWithGIL +extern "C" { + +struct ThreadState_DestroyNoGIL { - ThreadState_DestroyWithGIL(ThreadState* state) + /** + This function uses the same lock that the PendingCallback does + */ + static void + MarkGreenletDeadAndQueueCleanup(ThreadState* const state) + { + // We are *NOT* holding the GIL. Our thread is in the middle + // of its death throes and the Python thread state is already + // gone so we can't use most Python APIs. One that is safe is + // ``Py_AddPendingCall``, unless the interpreter itself has + // been torn down. There is a limited number of calls that can + // be queued: 32 (NPENDINGCALLS) in CPython 3.10, so we + // coalesce these calls using our own queue. + + if (!MarkGreenletDeadIfNeeded(state)) { + // No state, or no greenlet + return; + } + + // XXX: Because we don't have the GIL, this is a race condition. + if (!PyInterpreterState_Head()) { + // We have to leak the thread state, if the + // interpreter has shut down when we're getting + // deallocated, we can't run the cleanup code that + // deleting it would imply. + return; + } + + AddToCleanupQueue(state); + + } + +private: + + // If the state has an allocated main greenlet: + // - mark the greenlet as dead by disassociating it from the state; + // - return 1 + // Otherwise, return 0. + static bool + MarkGreenletDeadIfNeeded(ThreadState* const state) { if (state && state->has_main_greenlet()) { - DestroyWithGIL(state); + // mark the thread as dead ASAP. + // this is racy! If we try to throw or switch to a + // greenlet from this thread from some other thread before + // we clear the state pointer, it won't realize the state + // is dead which can crash the process. + PyGreenlet* p(state->borrow_main_greenlet().borrow()); + assert(p->pimpl->thread_state() == state || p->pimpl->thread_state() == nullptr); + dynamic_cast(p->pimpl)->thread_state(nullptr); + return true; + } + return false; + } + + static void + AddToCleanupQueue(ThreadState* const state) + { + assert(state && state->has_main_greenlet()); + + // NOTE: Because we're not holding the GIL here, some other + // Python thread could run and call ``os.fork()``, which would + // be bad if that happened while we are holding the cleanup + // lock (it wouldn't function in the child process). + // Make a best effort to try to keep the duration we hold the + // lock short. + // TODO: On platforms that support it, use ``pthread_atfork`` to + // drop this lock. + LockGuard cleanup_lock(*mod_globs->thread_states_to_destroy_lock); + + mod_globs->queue_to_destroy(state); + if (mod_globs->thread_states_to_destroy.size() == 1) { + // We added the first item to the queue. We need to schedule + // the cleanup. + + // A size greater than 1 means that we have already added the pending call, + // and in fact, it may be executing now. + // If it is executing, our lock makes sure that it will see the item we just added + // to the queue on its next iteration (after we release the lock) + // + // A size of 1 means there is no pending call, OR the pending call is + // currently executing, has dropped the lock, and is deleting the last item + // from the queue; its next iteration will go ahead and delete the item we just added. + // And the pending call we schedule here will have no work to do. + int result = AddPendingCall( + PendingCallback_DestroyQueueWithGIL, + nullptr); + if (result < 0) { + // Hmm, what can we do here? + fprintf(stderr, + "greenlet: WARNING: failed in call to Py_AddPendingCall; " + "expect a memory leak.\n"); + } } } static int - DestroyWithGIL(ThreadState* state) + PendingCallback_DestroyQueueWithGIL(void* UNUSED(arg)) + { + // We're holding the GIL here, so no Python code should be able to + // run to call ``os.fork()``. + while (1) { + ThreadState* to_destroy; + { + LockGuard cleanup_lock(*mod_globs->thread_states_to_destroy_lock); + if (mod_globs->thread_states_to_destroy.empty()) { + break; + } + to_destroy = mod_globs->take_next_to_destroy(); + } + assert(to_destroy); + assert(to_destroy->has_main_greenlet()); + // Drop the lock while we do the actual deletion. + // This allows other calls to MarkGreenletDeadAndQueueCleanup + // to enter and add to our queue. + DestroyOneWithGIL(to_destroy); + } + return 0; + } + + static void + DestroyOneWithGIL(const ThreadState* const state) { // Holding the GIL. // Passed a non-shared pointer to the actual thread state. @@ -42,17 +159,11 @@ struct ThreadState_DestroyWithGIL // We do this here, rather than in a Python dealloc function // for the greenlet, in case there's still a reference out // there. - static_cast(main->pimpl)->thread_state(nullptr); + dynamic_cast(main->pimpl)->thread_state(nullptr); delete state; // Deleting this runs the destructor, DECREFs the main greenlet. - return 0; } -}; - - -struct ThreadState_DestroyNoGIL -{ // ensure this is actually defined. static_assert(GREENLET_BROKEN_PY_ADD_PENDING == 1 || GREENLET_BROKEN_PY_ADD_PENDING == 0, "GREENLET_BROKEN_PY_ADD_PENDING not defined correctly."); @@ -121,83 +232,10 @@ struct ThreadState_DestroyNoGIL } #endif - ThreadState_DestroyNoGIL(ThreadState* state) - { - // We are *NOT* holding the GIL. Our thread is in the middle - // of its death throes and the Python thread state is already - // gone so we can't use most Python APIs. One that is safe is - // ``Py_AddPendingCall``, unless the interpreter itself has - // been torn down. There is a limited number of calls that can - // be queued: 32 (NPENDINGCALLS) in CPython 3.10, so we - // coalesce these calls using our own queue. - if (state && state->has_main_greenlet()) { - // mark the thread as dead ASAP. - // this is racy! If we try to throw or switch to a - // greenlet from this thread from some other thread before - // we clear the state pointer, it won't realize the state - // is dead which can crash the process. - PyGreenlet* p = state->borrow_main_greenlet(); - assert(p->pimpl->thread_state() == state || p->pimpl->thread_state() == nullptr); - static_cast(p->pimpl)->thread_state(nullptr); - } - - // NOTE: Because we're not holding the GIL here, some other - // Python thread could run and call ``os.fork()``, which would - // be bad if that happened while we are holding the cleanup - // lock (it wouldn't function in the child process). - // Make a best effort to try to keep the duration we hold the - // lock short. - // TODO: On platforms that support it, use ``pthread_atfork`` to - // drop this lock. - LockGuard cleanup_lock(*mod_globs->thread_states_to_destroy_lock); - - if (state && state->has_main_greenlet()) { - // Because we don't have the GIL, this is a race condition. - if (!PyInterpreterState_Head()) { - // We have to leak the thread state, if the - // interpreter has shut down when we're getting - // deallocated, we can't run the cleanup code that - // deleting it would imply. - return; - } - mod_globs->queue_to_destroy(state); - if (mod_globs->thread_states_to_destroy.size() == 1) { - // We added the first item to the queue. We need to schedule - // the cleanup. - int result = ThreadState_DestroyNoGIL::AddPendingCall( - ThreadState_DestroyNoGIL::DestroyQueueWithGIL, - NULL); - if (result < 0) { - // Hmm, what can we do here? - fprintf(stderr, - "greenlet: WARNING: failed in call to Py_AddPendingCall; " - "expect a memory leak.\n"); - } - } - } - } - static int - DestroyQueueWithGIL(void* UNUSED(arg)) - { - // We're holding the GIL here, so no Python code should be able to - // run to call ``os.fork()``. - while (1) { - ThreadState* to_destroy; - { - LockGuard cleanup_lock(*mod_globs->thread_states_to_destroy_lock); - if (mod_globs->thread_states_to_destroy.empty()) { - break; - } - to_destroy = mod_globs->take_next_to_destroy(); - } - // Drop the lock while we do the actual deletion. - ThreadState_DestroyWithGIL::DestroyWithGIL(to_destroy); - } - return 0; - } +}; }; }; // namespace greenlet @@ -209,7 +247,7 @@ struct ThreadState_DestroyNoGIL // initial function call in each function that uses a thread local); // in contrast, static volatile variables are at some pre-computed // offset. -typedef greenlet::ThreadStateCreator ThreadStateCreator; +typedef greenlet::ThreadStateCreator ThreadStateCreator; static thread_local ThreadStateCreator g_thread_state_global; #define GET_THREAD_STATE() g_thread_state_global diff --git a/src/greenlet/TUserGreenlet.cpp b/src/greenlet/TUserGreenlet.cpp index 83db972f..73a81330 100644 --- a/src/greenlet/TUserGreenlet.cpp +++ b/src/greenlet/TUserGreenlet.cpp @@ -14,7 +14,7 @@ #include "greenlet_internal.hpp" #include "TGreenlet.hpp" -#include "greenlet_thread_state.hpp" + #include "TThreadStateDestroy.cpp" diff --git a/src/greenlet/greenlet.cpp b/src/greenlet/greenlet.cpp index 7a7b5095..e8d92a00 100644 --- a/src/greenlet/greenlet.cpp +++ b/src/greenlet/greenlet.cpp @@ -22,12 +22,12 @@ // as well. #include "greenlet_refs.hpp" #include "greenlet_slp_switch.hpp" -#include "greenlet_thread_state.hpp" + #include "greenlet_thread_support.hpp" #include "TGreenlet.hpp" #include "TGreenletGlobals.cpp" -#include "TThreadStateDestroy.cpp" + #include "TGreenlet.cpp" #include "TMainGreenlet.cpp" #include "TUserGreenlet.cpp" @@ -36,6 +36,10 @@ #include "TPythonState.cpp" #include "TStackState.cpp" +#include "TThreadState.hpp" +#include "TThreadStateCreator.hpp" +#include "TThreadStateDestroy.cpp" + #include "PyGreenlet.cpp" #include "PyGreenletUnswitchable.cpp" #include "CObjects.cpp" diff --git a/src/greenlet/greenlet_internal.hpp b/src/greenlet/greenlet_internal.hpp index 6d79cc2a..f2b15d5f 100644 --- a/src/greenlet/greenlet_internal.hpp +++ b/src/greenlet/greenlet_internal.hpp @@ -27,6 +27,10 @@ typedef struct _greenlet PyGreenlet; namespace greenlet { class ThreadState; + // We can't use the PythonAllocator for this, because we push to it + // from the thread state destructor, which doesn't have the GIL, + // and Python's allocators can only be called with the GIL. + typedef std::vector cleanup_queue_t; };