You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

544 lines
20 KiB
C++

#ifndef GREENLET_THREAD_STATE_HPP
#define GREENLET_THREAD_STATE_HPP
#include <ctime>
#include <stdexcept>
#include "greenlet_internal.hpp"
#include "greenlet_refs.hpp"
#include "greenlet_thread_support.hpp"
using greenlet::refs::BorrowedObject;
using greenlet::refs::BorrowedGreenlet;
using greenlet::refs::BorrowedMainGreenlet;
using greenlet::refs::OwnedMainGreenlet;
using greenlet::refs::OwnedObject;
using greenlet::refs::OwnedGreenlet;
using greenlet::refs::OwnedList;
using greenlet::refs::PyErrFetchParam;
using greenlet::refs::PyArgParseParam;
using greenlet::refs::ImmortalString;
using greenlet::refs::CreatedModule;
using greenlet::refs::PyErrPieces;
using greenlet::refs::NewReference;
namespace greenlet {
/**
* Thread-local state of greenlets.
*
* Each native thread will get exactly one of these objects,
* automatically accessed through the best available thread-local
* mechanism the compiler supports (``thread_local`` for C++11
* compilers or ``__thread``/``declspec(thread)`` for older GCC/clang
* or MSVC, respectively.)
*
* Previously, we kept thread-local state mostly in a bunch of
* ``static volatile`` variables in the main greenlet file.. This had
* the problem of requiring extra checks, loops, and great care
* accessing these variables if we potentially invoked any Python code
* that could release the GIL, because the state could change out from
* under us. Making the variables thread-local solves this problem.
*
* When we detected that a greenlet API accessing the current greenlet
* was invoked from a different thread than the greenlet belonged to,
* we stored a reference to the greenlet in the Python thread
* dictionary for the thread the greenlet belonged to. This could lead
* to memory leaks if the thread then exited (because of a reference
* cycle, as greenlets referred to the thread dictionary, and deleting
* non-current greenlets leaked their frame plus perhaps arguments on
* the C stack). If a thread exited while still having running
* greenlet objects (perhaps that had just switched back to the main
* greenlet), and did not invoke one of the greenlet APIs *in that
* thread, immediately before it exited, without some other thread
* then being invoked*, such a leak was guaranteed.
*
* This can be partly solved by using compiler thread-local variables
* instead of the Python thread dictionary, thus avoiding a cycle.
*
* To fully solve this problem, we need a reliable way to know that a
* thread is done and we should clean up the main greenlet. On POSIX,
* we can use the destructor function of ``pthread_key_create``, but
* there's nothing similar on Windows; a C++11 thread local object
* reliably invokes its destructor when the thread it belongs to exits
* (non-C++11 compilers offer ``__thread`` or ``declspec(thread)`` to
* create thread-local variables, but they can't hold C++ objects that
* invoke destructors; the C++11 version is the most portable solution
* I found). When the thread exits, we can drop references and
* otherwise manipulate greenlets and frames that we know can no
* longer be switched to. For compilers that don't support C++11
* thread locals, we have a solution that uses the python thread
* dictionary, though it may not collect everything as promptly as
* other compilers do, if some other library is using the thread
* dictionary and has a cycle or extra reference.
*
* There are two small wrinkles. The first is that when the thread
* exits, it is too late to actually invoke Python APIs: the Python
* thread state is gone, and the GIL is released. To solve *this*
* problem, our destructor uses ``Py_AddPendingCall`` to transfer the
* destruction work to the main thread. (This is not an issue for the
* dictionary solution.)
*
* The second is that once the thread exits, the thread local object
* is invalid and we can't even access a pointer to it, so we can't
* pass it to ``Py_AddPendingCall``. This is handled by actually using
* a second object that's thread local (ThreadStateCreator) and having
* it dynamically allocate this object so it can live until the
* pending call runs.
*/
class ThreadState {
private:
// As of commit 08ad1dd7012b101db953f492e0021fb08634afad
// this class needed 56 bytes in o Py_DEBUG build
// on 64-bit macOS 11.
// Adding the vector takes us up to 80 bytes ()
/* Strong reference to the main greenlet */
OwnedMainGreenlet main_greenlet;
/* Strong reference to the current greenlet. */
OwnedGreenlet current_greenlet;
/* Strong reference to the trace function, if any. */
OwnedObject tracefunc;
typedef std::vector<PyGreenlet*, PythonAllocator<PyGreenlet*> > deleteme_t;
/* A vector of raw PyGreenlet pointers representing things that need
deleted when this thread is running. The vector owns the
references, but you need to manually INCREF/DECREF as you use
them. We don't use a vector<refs::OwnedGreenlet> because we
make copy of this vector, and that would become O(n) as all the
refcounts are incremented in the copy.
*/
deleteme_t deleteme;
#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED
void* exception_state;
#endif
static std::clock_t _clocks_used_doing_gc;
static ImmortalString get_referrers_name;
static PythonAllocator<ThreadState> allocator;
G_NO_COPIES_OF_CLS(ThreadState);
public:
static void* operator new(size_t UNUSED(count))
{
return ThreadState::allocator.allocate(1);
}
static void operator delete(void* ptr)
{
return ThreadState::allocator.deallocate(static_cast<ThreadState*>(ptr),
1);
}
static void init()
{
ThreadState::get_referrers_name = "get_referrers";
ThreadState::_clocks_used_doing_gc = 0;
}
ThreadState()
: main_greenlet(OwnedMainGreenlet::consuming(green_create_main(this))),
current_greenlet(main_greenlet)
{
if (!this->main_greenlet) {
// We failed to create the main greenlet. That's bad.
throw PyFatalError("Failed to create main greenlet");
}
// The main greenlet starts with 1 refs: The returned one. We
// then copied it to the current greenlet.
assert(this->main_greenlet.REFCNT() == 2);
#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED
this->exception_state = slp_get_exception_state();
#endif
}
inline void restore_exception_state()
{
#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED
// It's probably important this be inlined and only call C
// functions to avoid adding an SEH frame.
slp_set_exception_state(this->exception_state);
#endif
}
inline bool has_main_greenlet()
{
return !!this->main_greenlet;
}
// Called from the ThreadStateCreator when we're in non-standard
// threading mode. In that case, there is an object in the Python
// thread state dictionary that points to us. The main greenlet
// also traverses into us, in which case it's crucial not to
// traverse back into the main greenlet.
int tp_traverse(visitproc visit, void* arg, bool traverse_main=true)
{
if (traverse_main) {
Py_VISIT(main_greenlet.borrow_o());
}
if (traverse_main || current_greenlet != main_greenlet) {
Py_VISIT(current_greenlet.borrow_o());
}
Py_VISIT(tracefunc.borrow());
return 0;
}
inline BorrowedMainGreenlet borrow_main_greenlet() const
{
assert(this->main_greenlet);
assert(this->main_greenlet.REFCNT() >= 2);
return this->main_greenlet;
};
inline OwnedMainGreenlet get_main_greenlet()
{
return this->main_greenlet;
}
/**
* In addition to returning a new reference to the currunt
* greenlet, this performs any maintenance needed.
*/
inline OwnedGreenlet get_current()
{
/* green_dealloc() cannot delete greenlets from other threads, so
it stores them in the thread dict; delete them now. */
this->clear_deleteme_list();
//assert(this->current_greenlet->main_greenlet == this->main_greenlet);
//assert(this->main_greenlet->main_greenlet == this->main_greenlet);
return this->current_greenlet;
}
/**
* As for non-const get_current();
*/
inline BorrowedGreenlet borrow_current()
{
this->clear_deleteme_list();
return this->current_greenlet;
}
/**
* Does no maintenance.
*/
inline OwnedGreenlet get_current() const
{
return this->current_greenlet;
}
template<typename T, refs::TypeChecker TC>
inline bool is_current(const refs::PyObjectPointer<T, TC>& obj) const
{
return this->current_greenlet.borrow_o() == obj.borrow_o();
}
inline void set_current(const OwnedGreenlet& target)
{
this->current_greenlet = target;
}
private:
/**
* Deref and remove the greenlets from the deleteme list. Must be
* holding the GIL.
*
* If *murder* is true, then we must be called from a different
* thread than the one that these greenlets were running in.
* In that case, if the greenlet was actually running, we destroy
* the frame reference and otherwise make it appear dead before
* proceeding; otherwise, we would try (and fail) to raise an
* exception in it and wind up right back in this list.
*/
inline void clear_deleteme_list(const bool murder=false)
{
if (!this->deleteme.empty()) {
// It's possible we could add items to this list while
// running Python code if there's a thread switch, so we
// need to defensively copy it before that can happen.
deleteme_t copy = this->deleteme;
this->deleteme.clear(); // in case things come back on the list
for(deleteme_t::iterator it = copy.begin(), end = copy.end();
it != end;
++it ) {
PyGreenlet* to_del = *it;
if (murder) {
// Force each greenlet to appear dead; we can't raise an
// exception into it anymore anyway.
to_del->pimpl->murder_in_place();
}
// The only reference to these greenlets should be in
// this list, decreffing them should let them be
// deleted again, triggering calls to green_dealloc()
// in the correct thread (if we're not murdering).
// This may run arbitrary Python code and switch
// threads or greenlets!
Py_DECREF(to_del);
if (PyErr_Occurred()) {
PyErr_WriteUnraisable(nullptr);
PyErr_Clear();
}
}
}
}
public:
/**
* Returns a new reference, or a false object.
*/
inline OwnedObject get_tracefunc() const
{
return tracefunc;
};
inline void set_tracefunc(BorrowedObject tracefunc)
{
assert(tracefunc);
if (tracefunc == BorrowedObject(Py_None)) {
this->tracefunc.CLEAR();
}
else {
this->tracefunc = tracefunc;
}
}
/**
* Given a reference to a greenlet that some other thread
* attempted to delete (has a refcount of 0) store it for later
* deletion when the thread this state belongs to is current.
*/
inline void delete_when_thread_running(PyGreenlet* to_del)
{
Py_INCREF(to_del);
this->deleteme.push_back(to_del);
}
/**
* Set to std::clock_t(-1) to disable.
*/
inline static std::clock_t& clocks_used_doing_gc()
{
return ThreadState::_clocks_used_doing_gc;
}
~ThreadState()
{
if (!PyInterpreterState_Head()) {
// We shouldn't get here (our callers protect us)
// but if we do, all we can do is bail early.
return;
}
// We should not have an "origin" greenlet; that only exists
// for the temporary time during a switch, which should not
// be in progress as the thread dies.
//assert(!this->switching_state.origin);
this->tracefunc.CLEAR();
// Forcibly GC as much as we can.
this->clear_deleteme_list(true);
// The pending call did this.
assert(this->main_greenlet->thread_state() == nullptr);
// If the main greenlet is the current greenlet,
// then we "fell off the end" and the thread died.
// It's possible that there is some other greenlet that
// switched to us, leaving a reference to the main greenlet
// on the stack, somewhere uncollectible. Try to detect that.
if (this->current_greenlet == this->main_greenlet && this->current_greenlet) {
assert(this->current_greenlet->is_currently_running_in_some_thread());
// Drop one reference we hold.
this->current_greenlet.CLEAR();
assert(!this->current_greenlet);
// Only our reference to the main greenlet should be left,
// But hold onto the pointer in case we need to do extra cleanup.
PyGreenlet* old_main_greenlet = this->main_greenlet.borrow();
Py_ssize_t cnt = this->main_greenlet.REFCNT();
this->main_greenlet.CLEAR();
if (ThreadState::_clocks_used_doing_gc != std::clock_t(-1)
&& cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) {
// Highly likely that the reference is somewhere on
// the stack, not reachable by GC. Verify.
// XXX: This is O(n) in the total number of objects.
// TODO: Add a way to disable this at runtime, and
// another way to report on it.
std::clock_t begin = std::clock();
NewReference gc(PyImport_ImportModule("gc"));
if (gc) {
OwnedObject get_referrers = gc.PyRequireAttr(ThreadState::get_referrers_name);
OwnedList refs(get_referrers.PyCall(old_main_greenlet));
if (refs && refs.empty()) {
assert(refs.REFCNT() == 1);
// We found nothing! So we left a dangling
// reference: Probably the last thing some
// other greenlet did was call
// 'getcurrent().parent.switch()' to switch
// back to us. Clean it up. This will be the
// case on CPython 3.7 and newer, as they use
// an internal calling conversion that avoids
// creating method objects and storing them on
// the stack.
Py_DECREF(old_main_greenlet);
}
else if (refs
&& refs.size() == 1
&& PyCFunction_Check(refs.at(0))
&& Py_REFCNT(refs.at(0)) == 2) {
assert(refs.REFCNT() == 1);
// Ok, we found a C method that refers to the
// main greenlet, and its only referenced
// twice, once in the list we just created,
// once from...somewhere else. If we can't
// find where else, then this is a leak.
// This happens in older versions of CPython
// that create a bound method object somewhere
// on the stack that we'll never get back to.
if (PyCFunction_GetFunction(refs.at(0).borrow()) == (PyCFunction)green_switch) {
BorrowedObject function_w = refs.at(0);
refs.clear(); // destroy the reference
// from the list.
// back to one reference. Can *it* be
// found?
assert(function_w.REFCNT() == 1);
refs = get_referrers.PyCall(function_w);
if (refs && refs.empty()) {
// Nope, it can't be found so it won't
// ever be GC'd. Drop it.
Py_CLEAR(function_w);
}
}
}
std::clock_t end = std::clock();
ThreadState::_clocks_used_doing_gc += (end - begin);
}
}
}
// We need to make sure this greenlet appears to be dead,
// because otherwise deallocing it would fail to raise an
// exception in it (the thread is dead) and put it back in our
// deleteme list.
if (this->current_greenlet) {
this->current_greenlet->murder_in_place();
this->current_greenlet.CLEAR();
}
if (this->main_greenlet) {
// Couldn't have been the main greenlet that was running
// when the thread exited (because we already cleared this
// pointer if it was). This shouldn't be possible?
// If the main greenlet was current when the thread died (it
// should be, right?) then we cleared its self pointer above
// when we cleared the current greenlet's main greenlet pointer.
// assert(this->main_greenlet->main_greenlet == this->main_greenlet
// || !this->main_greenlet->main_greenlet);
// // self reference, probably gone
// this->main_greenlet->main_greenlet.CLEAR();
// This will actually go away when the ivar is destructed.
this->main_greenlet.CLEAR();
}
if (PyErr_Occurred()) {
PyErr_WriteUnraisable(NULL);
PyErr_Clear();
}
}
};
ImmortalString ThreadState::get_referrers_name(nullptr);
PythonAllocator<ThreadState> ThreadState::allocator;
std::clock_t ThreadState::_clocks_used_doing_gc(0);
template<typename Destructor>
class ThreadStateCreator
{
private:
// Initialized to 1, and, if still 1, created on access.
// Set to 0 on destruction.
ThreadState* _state;
G_NO_COPIES_OF_CLS(ThreadStateCreator);
public:
// Only one of these, auto created per thread
ThreadStateCreator() :
_state((ThreadState*)1)
{
}
~ThreadStateCreator()
{
ThreadState* tmp = this->_state;
this->_state = nullptr;
if (tmp && tmp != (ThreadState*)1) {
Destructor x(tmp);
}
}
inline ThreadState& state()
{
// The main greenlet will own this pointer when it is created,
// which will be right after this. The plan is to give every
// greenlet a pointer to the main greenlet for the thread it
// runs in; if we are doing something cross-thread, we need to
// access the pointer from the main greenlet. Deleting the
// thread, and hence the thread-local storage, will delete the
// state pointer in the main greenlet.
if (this->_state == (ThreadState*)1) {
// XXX: Assuming allocation never fails
this->_state = new ThreadState;
// For non-standard threading, we need to store an object
// in the Python thread state dictionary so that it can be
// DECREF'd when the thread ends (ideally; the dict could
// last longer) and clean this object up.
}
if (!this->_state) {
throw std::runtime_error("Accessing state after destruction.");
}
return *this->_state;
}
operator ThreadState&()
{
return this->state();
}
operator ThreadState*()
{
return &this->state();
}
inline int tp_traverse(visitproc visit, void* arg)
{
if (this->_state) {
return this->_state->tp_traverse(visit, arg);
}
return 0;
}
};
// We can't use the PythonAllocator for this, because we push to it
// from the thread state destructor, which doesn't have the GIL,
// and Python's allocators can only be called with the GIL.
typedef std::vector<ThreadState*> cleanup_queue_t;
}; // namespace greenlet
#endif