diff --git a/Lib/test/test_free_threading/test_str.py b/Lib/test/test_free_threading/test_str.py index 9a1ce3620ac4b2..11e04009956db1 100644 --- a/Lib/test/test_free_threading/test_str.py +++ b/Lib/test/test_free_threading/test_str.py @@ -1,7 +1,9 @@ +import sys +import threading import unittest from itertools import cycle -from threading import Event, Thread +from threading import Barrier, Event, Thread from unittest import TestCase from test.support import threading_helper @@ -69,6 +71,24 @@ def reader_func(): for reader in readers: reader.join() + def test_intern_unowned_string(self): + # Test interning strings owned by various threads. + strings = [f"intern_race_owner_{i}" for i in range(50)] + + NUM_THREADS = 5 + b = Barrier(NUM_THREADS) + + def interner(): + tid = threading.get_ident() + for i in range(20): + strings.append(f"intern_{tid}_{i}") + b.wait() + for s in strings: + r = sys.intern(s) + self.assertTrue(sys._is_interned(r)) + + threading_helper.run_concurrently(interner, nthreads=NUM_THREADS) + def test_maketrans_dict_concurrent_modification(self): for _ in range(5): d = {2000: 'a'} diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst new file mode 100644 index 00000000000000..54c04bbc28d416 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst @@ -0,0 +1,4 @@ +Fix a data race in :func:`sys.intern` in the free-threaded build when +interning a string owned by another thread. An interned copy owned by the +current thread is used instead when it is not safe to immortalize the +original. diff --git a/Objects/object.c b/Objects/object.c index 3166254f6f640b..4fa20470601eb3 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2768,9 +2768,9 @@ _Py_SetImmortalUntracked(PyObject *op) return; } #ifdef Py_GIL_DISABLED - op->ob_tid = _Py_UNOWNED_TID; - op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; - op->ob_ref_shared = 0; + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, _Py_IMMORTAL_REFCNT_LOCAL); + _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0); _Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED); #elif SIZEOF_VOID_P > 4 op->ob_flags = _Py_IMMORTAL_FLAGS; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d2569132998acc..9aee7120c811de 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -589,6 +589,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) { #define CHECK(expr) \ do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0) +#ifdef Py_GIL_DISABLED +# define CHECK_IF_GIL(expr) (void)(expr) +# define CHECK_IF_FT(expr) CHECK(expr) +#else +# define CHECK_IF_GIL(expr) CHECK(expr) +# define CHECK_IF_FT(expr) (void)(expr) +#endif + assert(op != NULL); CHECK(PyUnicode_Check(op)); @@ -669,11 +677,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) /* Check interning state */ #ifdef Py_DEBUG - // Note that we do not check `_Py_IsImmortal(op)`, since stable ABI - // extensions can make immortal strings mortal (but with a high enough - // refcount). - // The other way is extremely unlikely (worth a potential failed assertion - // in a debug build), so we do check `!_Py_IsImmortal(op)`. + // Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build + // since stable ABI extensions can make immortal strings mortal (but with a + // high enough refcount). switch (PyUnicode_CHECK_INTERNED(op)) { case SSTATE_NOT_INTERNED: if (ascii->state.statically_allocated) { @@ -683,18 +689,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) // are static but use SSTATE_NOT_INTERNED } else { - CHECK(!_Py_IsImmortal(op)); + CHECK_IF_GIL(!_Py_IsImmortal(op)); } break; case SSTATE_INTERNED_MORTAL: CHECK(!ascii->state.statically_allocated); - CHECK(!_Py_IsImmortal(op)); + CHECK_IF_GIL(!_Py_IsImmortal(op)); break; case SSTATE_INTERNED_IMMORTAL: CHECK(!ascii->state.statically_allocated); + CHECK_IF_FT(_Py_IsImmortal(op)); break; case SSTATE_INTERNED_IMMORTAL_STATIC: CHECK(ascii->state.statically_allocated); + CHECK_IF_FT(_Py_IsImmortal(op)); break; default: Py_UNREACHABLE(); @@ -14208,6 +14216,18 @@ immortalize_interned(PyObject *s) FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); } +#ifdef Py_GIL_DISABLED +static bool +can_immortalize_safely(PyObject *s) +{ + if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) { + return true; + } + Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared); + return _Py_REF_IS_MERGED(shared); +} +#endif + static /* non-null */ PyObject* intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, bool immortalize) @@ -14236,11 +14256,16 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, // no, go on break; case SSTATE_INTERNED_MORTAL: +#ifndef Py_GIL_DISABLED // yes but we might need to make it immortal if (immortalize) { immortalize_interned(s); } return s; +#else + // not fully interned yet; fall through to the locking path + break; +#endif default: // all done return s; @@ -14305,6 +14330,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, Py_DECREF(r); } #endif + +#ifdef Py_GIL_DISABLED + // Immortalization writes to the refcount fields non-atomically. That + // races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we + // don't own it (and its refcount hasn't been merged), intern a copy + // we own instead. + if (!can_immortalize_safely(s)) { + PyObject *copy = _PyUnicode_Copy(s); + if (copy == NULL) { + PyErr_Clear(); + return s; + } + Py_DECREF(s); + s = copy; + } +#endif + FT_MUTEX_LOCK(INTERN_MUTEX); PyObject *t; {