gh-137433: Fix deadlock with stop-the-world and daemon threads (gh-137735)

There was a deadlock originally seen by Memray when a daemon thread
enabled or disabled profiling while the interpreter was shutting down.
I think this could also happen with garbage collection, but I haven't
seen that in practice.

The daemon thread could be hung while trying acquire the global rwmutex
that prevents overlapping global and per-interpreter stop-the-world events.
Since it already held the main interpreter's stop-the-world lock, it
also deadlocked the main thread, which is trying to perform interpreter
finalization.

Swap the order of lock acquisition to prevent this deadlock.
Additionally, refactor `_PyParkingLot_Park` so that the global buckets
hashtable is left in a clean state if the thread is hung in
`PyEval_AcquireThread`.
This commit is contained in:
Sam Gross
2025-09-16 09:21:58 +01:00
committed by GitHub
parent 4c0d7bc52a
commit 90fe3250f8
6 changed files with 55 additions and 31 deletions

View File

@@ -227,7 +227,7 @@ _PyRawMutex_LockSlow(_PyRawMutex *m)
// Wait for us to be woken up. Note that we still have to lock the
// mutex ourselves: it is NOT handed off to us.
_PySemaphore_Wait(&waiter.sema, -1, /*detach=*/0);
_PySemaphore_Wait(&waiter.sema, -1);
}
_PySemaphore_Destroy(&waiter.sema);

View File

@@ -91,8 +91,8 @@ _PySemaphore_Destroy(_PySemaphore *sema)
#endif
}
static int
_PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout)
int
_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout)
{
int res;
#if defined(MS_WINDOWS)
@@ -225,27 +225,6 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout)
return res;
}
int
_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout, int detach)
{
PyThreadState *tstate = NULL;
if (detach) {
tstate = _PyThreadState_GET();
if (tstate && _PyThreadState_IsAttached(tstate)) {
// Only detach if we are attached
PyEval_ReleaseThread(tstate);
}
else {
tstate = NULL;
}
}
int res = _PySemaphore_PlatformWait(sema, timeout);
if (tstate) {
PyEval_AcquireThread(tstate);
}
return res;
}
void
_PySemaphore_Wakeup(_PySemaphore *sema)
{
@@ -342,7 +321,19 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
enqueue(bucket, addr, &wait);
_PyRawMutex_Unlock(&bucket->mutex);
int res = _PySemaphore_Wait(&wait.sema, timeout_ns, detach);
PyThreadState *tstate = NULL;
if (detach) {
tstate = _PyThreadState_GET();
if (tstate && _PyThreadState_IsAttached(tstate)) {
// Only detach if we are attached
PyEval_ReleaseThread(tstate);
}
else {
tstate = NULL;
}
}
int res = _PySemaphore_Wait(&wait.sema, timeout_ns);
if (res == Py_PARK_OK) {
goto done;
}
@@ -354,7 +345,7 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
// Another thread has started to unpark us. Wait until we process the
// wakeup signal.
do {
res = _PySemaphore_Wait(&wait.sema, -1, detach);
res = _PySemaphore_Wait(&wait.sema, -1);
} while (res != Py_PARK_OK);
goto done;
}
@@ -366,6 +357,9 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
done:
_PySemaphore_Destroy(&wait.sema);
if (tstate) {
PyEval_AcquireThread(tstate);
}
return res;
}

View File

@@ -2253,13 +2253,15 @@ stop_the_world(struct _stoptheworld_state *stw)
{
_PyRuntimeState *runtime = &_PyRuntime;
PyMutex_Lock(&stw->mutex);
// gh-137433: Acquire the rwmutex first to avoid deadlocks with daemon
// threads that may hang when blocked on lock acquisition.
if (stw->is_global) {
_PyRWMutex_Lock(&runtime->stoptheworld_mutex);
}
else {
_PyRWMutex_RLock(&runtime->stoptheworld_mutex);
}
PyMutex_Lock(&stw->mutex);
HEAD_LOCK(runtime);
stw->requested = 1;
@@ -2325,13 +2327,13 @@ start_the_world(struct _stoptheworld_state *stw)
}
stw->requester = NULL;
HEAD_UNLOCK(runtime);
PyMutex_Unlock(&stw->mutex);
if (stw->is_global) {
_PyRWMutex_Unlock(&runtime->stoptheworld_mutex);
}
else {
_PyRWMutex_RUnlock(&runtime->stoptheworld_mutex);
}
PyMutex_Unlock(&stw->mutex);
}
#endif // Py_GIL_DISABLED