gh-138709: Fix race condition in test_external_inspection (#139209)

Fix race condition in test_external_inspection thread status tests

The tests test_thread_status_detection and test_thread_status_gil_detection
had a race condition where the test could sample thread status between when
the sleeper thread sends its "ready" message and when it actually calls
time.sleep(). This caused intermittent test failures where the sleeper
thread would show as running (status=0) instead of idle (status=1 or 2).

The fix moves the thread status collection inside the retry loop and
specifically waits for the expected thread states before proceeding with
assertions. The retry loop now continues until:
- The sleeper thread shows as idle (status=1 for CPU mode, status=2 for GIL mode)
- The busy thread shows as running (status=0)
- Both thread IDs are found in the status collection

This ensures the test waits for threads to settle into their expected states
before making assertions, eliminating the race condition.
This commit is contained in:
Pablo Galindo Salgado
2025-09-21 18:32:03 +01:00
committed by GitHub
parent 080faf2d47
commit 9df477c0ce

View File

@@ -1751,14 +1751,23 @@ class TestDetectionOfThreadStatus(unittest.TestCase):
break
attempts = 10
statuses = {}
try:
unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_CPU,
skip_non_matching_threads=False)
for _ in range(attempts):
traces = unwinder.get_stack_trace()
# Check if any thread is running
if any(thread_info.status == 0 for interpreter_info in traces
for thread_info in interpreter_info.threads):
# Find threads and their statuses
statuses = {}
for interpreter_info in traces:
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] = thread_info.status
# Check if sleeper thread is idle and busy thread is running
if (sleeper_tid in statuses and
busy_tid in statuses and
statuses[sleeper_tid] == 1 and
statuses[busy_tid] == 0):
break
time.sleep(0.5) # Give a bit of time to let threads settle
except PermissionError:
@@ -1766,13 +1775,6 @@ class TestDetectionOfThreadStatus(unittest.TestCase):
"Insufficient permissions to read the stack trace"
)
# Find threads and their statuses
statuses = {}
for interpreter_info in traces:
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] = thread_info.status
self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
self.assertIsNotNone(busy_tid, "Busy thread id not received")
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
@@ -1861,14 +1863,23 @@ class TestDetectionOfThreadStatus(unittest.TestCase):
break
attempts = 10
statuses = {}
try:
unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_GIL,
skip_non_matching_threads=False)
for _ in range(attempts):
traces = unwinder.get_stack_trace()
# Check if any thread is running
if any(thread_info.status == 0 for interpreter_info in traces
for thread_info in interpreter_info.threads):
# Find threads and their statuses
statuses = {}
for interpreter_info in traces:
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] = thread_info.status
# Check if sleeper thread is idle (status 2 for GIL mode) and busy thread is running
if (sleeper_tid in statuses and
busy_tid in statuses and
statuses[sleeper_tid] == 2 and
statuses[busy_tid] == 0):
break
time.sleep(0.5) # Give a bit of time to let threads settle
except PermissionError:
@@ -1876,13 +1887,6 @@ class TestDetectionOfThreadStatus(unittest.TestCase):
"Insufficient permissions to read the stack trace"
)
# Find threads and their statuses
statuses = {}
for interpreter_info in traces:
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] = thread_info.status
self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
self.assertIsNotNone(busy_tid, "Busy thread id not received")
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")