From c28eb287a5ba08a8e8499a5a16ec935d1f93e05e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 22 Apr 2026 14:38:46 -0400 Subject: [PATCH 1/2] gh-113956: Make intern_common thread-safe in free-threaded build Avoid racing with the owning thread's refcount operations when immortalizing an interned string: if we don't own it and its refcount isn't merged, intern a copy we own instead. Use atomic stores in _Py_SetImmortalUntracked so concurrent atomic reads are race-free. --- Lib/test/test_free_threading/test_str.py | 22 +++++++++- Objects/object.c | 6 +-- Objects/unicodeobject.c | 56 +++++++++++++++++++++--- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_free_threading/test_str.py b/Lib/test/test_free_threading/test_str.py index 9a1ce3620ac4b2..11e04009956db1 100644 --- a/Lib/test/test_free_threading/test_str.py +++ b/Lib/test/test_free_threading/test_str.py @@ -1,7 +1,9 @@ +import sys +import threading import unittest from itertools import cycle -from threading import Event, Thread +from threading import Barrier, Event, Thread from unittest import TestCase from test.support import threading_helper @@ -69,6 +71,24 @@ def reader_func(): for reader in readers: reader.join() + def test_intern_unowned_string(self): + # Test interning strings owned by various threads. + strings = [f"intern_race_owner_{i}" for i in range(50)] + + NUM_THREADS = 5 + b = Barrier(NUM_THREADS) + + def interner(): + tid = threading.get_ident() + for i in range(20): + strings.append(f"intern_{tid}_{i}") + b.wait() + for s in strings: + r = sys.intern(s) + self.assertTrue(sys._is_interned(r)) + + threading_helper.run_concurrently(interner, nthreads=NUM_THREADS) + def test_maketrans_dict_concurrent_modification(self): for _ in range(5): d = {2000: 'a'} diff --git a/Objects/object.c b/Objects/object.c index 3166254f6f640b..4fa20470601eb3 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2768,9 +2768,9 @@ _Py_SetImmortalUntracked(PyObject *op) return; } #ifdef Py_GIL_DISABLED - op->ob_tid = _Py_UNOWNED_TID; - op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; - op->ob_ref_shared = 0; + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, _Py_IMMORTAL_REFCNT_LOCAL); + _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0); _Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED); #elif SIZEOF_VOID_P > 4 op->ob_flags = _Py_IMMORTAL_FLAGS; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d2569132998acc..9aee7120c811de 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -589,6 +589,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) { #define CHECK(expr) \ do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0) +#ifdef Py_GIL_DISABLED +# define CHECK_IF_GIL(expr) (void)(expr) +# define CHECK_IF_FT(expr) CHECK(expr) +#else +# define CHECK_IF_GIL(expr) CHECK(expr) +# define CHECK_IF_FT(expr) (void)(expr) +#endif + assert(op != NULL); CHECK(PyUnicode_Check(op)); @@ -669,11 +677,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) /* Check interning state */ #ifdef Py_DEBUG - // Note that we do not check `_Py_IsImmortal(op)`, since stable ABI - // extensions can make immortal strings mortal (but with a high enough - // refcount). - // The other way is extremely unlikely (worth a potential failed assertion - // in a debug build), so we do check `!_Py_IsImmortal(op)`. + // Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build + // since stable ABI extensions can make immortal strings mortal (but with a + // high enough refcount). switch (PyUnicode_CHECK_INTERNED(op)) { case SSTATE_NOT_INTERNED: if (ascii->state.statically_allocated) { @@ -683,18 +689,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) // are static but use SSTATE_NOT_INTERNED } else { - CHECK(!_Py_IsImmortal(op)); + CHECK_IF_GIL(!_Py_IsImmortal(op)); } break; case SSTATE_INTERNED_MORTAL: CHECK(!ascii->state.statically_allocated); - CHECK(!_Py_IsImmortal(op)); + CHECK_IF_GIL(!_Py_IsImmortal(op)); break; case SSTATE_INTERNED_IMMORTAL: CHECK(!ascii->state.statically_allocated); + CHECK_IF_FT(_Py_IsImmortal(op)); break; case SSTATE_INTERNED_IMMORTAL_STATIC: CHECK(ascii->state.statically_allocated); + CHECK_IF_FT(_Py_IsImmortal(op)); break; default: Py_UNREACHABLE(); @@ -14208,6 +14216,18 @@ immortalize_interned(PyObject *s) FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); } +#ifdef Py_GIL_DISABLED +static bool +can_immortalize_safely(PyObject *s) +{ + if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) { + return true; + } + Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared); + return _Py_REF_IS_MERGED(shared); +} +#endif + static /* non-null */ PyObject* intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, bool immortalize) @@ -14236,11 +14256,16 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, // no, go on break; case SSTATE_INTERNED_MORTAL: +#ifndef Py_GIL_DISABLED // yes but we might need to make it immortal if (immortalize) { immortalize_interned(s); } return s; +#else + // not fully interned yet; fall through to the locking path + break; +#endif default: // all done return s; @@ -14305,6 +14330,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, Py_DECREF(r); } #endif + +#ifdef Py_GIL_DISABLED + // Immortalization writes to the refcount fields non-atomically. That + // races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we + // don't own it (and its refcount hasn't been merged), intern a copy + // we own instead. + if (!can_immortalize_safely(s)) { + PyObject *copy = _PyUnicode_Copy(s); + if (copy == NULL) { + PyErr_Clear(); + return s; + } + Py_DECREF(s); + s = copy; + } +#endif + FT_MUTEX_LOCK(INTERN_MUTEX); PyObject *t; { From c11e525158dc9610a5fb5f975679038766e4aa3d Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 22 Apr 2026 14:55:30 -0400 Subject: [PATCH 2/2] Add NEWS entry --- .../2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst new file mode 100644 index 00000000000000..54c04bbc28d416 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst @@ -0,0 +1,4 @@ +Fix a data race in :func:`sys.intern` in the free-threaded build when +interning a string owned by another thread. An interned copy owned by the +current thread is used instead when it is not safe to immortalize the +original.