diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index cb9300f072b9e7..a55b3addf26f33 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -740,43 +740,38 @@ stack manipulations such as ``dup``, ``drop``, ``swap``, ``over``, ``pick``, arguments. - :class:`defaultdict` objects support the following method in addition to the - standard :class:`dict` operations: + :class:`defaultdict` overrides the following method: - .. method:: __missing__(key, /) + .. method:: __getitem__(key, /) - If the :attr:`default_factory` attribute is ``None``, this raises a - :exc:`KeyError` exception with the *key* as argument. + Return ``self[key]``. If the item doesn't exist, it is automatically created. + The value is generated by calling either the :meth:`~object.__missing__` method + (if it exists) or the :attr:`default_factory` attribute (if it isn't None). If + neither can be called, a :exc:`KeyError` is raised. - If :attr:`default_factory` is not ``None``, it is called without arguments - to provide a default value for the given *key*, this value is inserted in - the dictionary for the *key*, and returned. - - If calling :attr:`default_factory` raises an exception this exception is - propagated unchanged. - - This method is called by the :meth:`~object.__getitem__` method of the - :class:`dict` class when the requested key is not found; whatever it - returns or raises is then returned or raised by :meth:`~object.__getitem__`. - - Note that :meth:`__missing__` is *not* called for any operations besides - :meth:`~object.__getitem__`. This means that :meth:`~dict.get` will, like - normal dictionaries, return ``None`` as a default rather than using - :attr:`default_factory`. + When :term:`free threading` is enabled, the defaultdict is locked while the + key is being looked up and the default value is being generated. :class:`defaultdict` objects support the following instance variable: - .. attribute:: default_factory - This attribute is used by the :meth:`~defaultdict.__missing__` method; - it is initialized from the first argument to the constructor, if present, - or to ``None``, if absent. + This attribute is called by the :meth:`defaultdict.__getitem__` method + if the requested key isn't in the dictionary. It must be either a + callable that takes no arguments, or :const:`None`. + .. versionchanged:: 3.9 - Added merge (``|``) and update (``|=``) operators, specified in - :pep:`584`. + Added merge (``|``) and update (``|=``) operators, specified in + :pep:`584`. + + .. versionchanged:: 3.15 + The built-in ``defaultdict.__missing__`` method no longer exists. A + custom :meth:`~object.__missing__` method should no longer insert the + generated value into the dictionary, as this is done by the new + :meth:`__getitem__` method. defaultdict is now safe to use with + :term:`free threading`. :class:`defaultdict` Examples diff --git a/Doc/tools/removed-ids.txt b/Doc/tools/removed-ids.txt index 7bffbb8d86197d..f2ff5175960d77 100644 --- a/Doc/tools/removed-ids.txt +++ b/Doc/tools/removed-ids.txt @@ -3,3 +3,4 @@ # Remove from here in 3.16 c-api/allocation.html: deprecated-aliases c-api/file.html: deprecated-api +library/collections.html: collections.defaultdict.__missing__ diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 6c6e3b77e69fab..56a29c586f67f5 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -123,6 +123,8 @@ PyAPI_FUNC(Py_ssize_t) _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_ extern Py_ssize_t _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr); extern Py_ssize_t _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr); +extern void _Py_dict_unhashable_type(PyObject *op, PyObject *key); + extern int _PyDict_GetMethodStackRef(PyDictObject *dict, PyObject *name, _PyStackRef *method); // Exported for external JIT support diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 32f4b7d2d6e08b..b3acfa48688c3c 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -22,6 +22,7 @@ import sys import textwrap import types +from collections import defaultdict from collections.abc import Iterable, Mapping from contextlib import suppress from importlib import import_module @@ -30,7 +31,7 @@ from typing import Any from . import _meta -from ._collections import FreezableDefaultDict, Pair +from ._collections import Pair from ._context import ExceptionTrap from ._functools import method_cache, noop, pass_none, passthrough from ._itertools import always_iterable, bucket, unique_everseen @@ -889,8 +890,8 @@ def __init__(self, path: FastPath): base = os.path.basename(path.root).lower() base_is_egg = base.endswith(".egg") - self.infos = FreezableDefaultDict(list) - self.eggs = FreezableDefaultDict(list) + self.infos = defaultdict(list) + self.eggs = defaultdict(list) for child in path.children(): low = child.lower() @@ -904,20 +905,17 @@ def __init__(self, path: FastPath): legacy_normalized = Prepared.legacy_normalize(name) self.eggs[legacy_normalized].append(path.joinpath(child)) - self.infos.freeze() - self.eggs.freeze() - def search(self, prepared: Prepared): """ Yield all infos and eggs matching the Prepared query. """ infos = ( - self.infos[prepared.normalized] + self.infos.get(prepared.normalized, ()) if prepared else itertools.chain.from_iterable(self.infos.values()) ) eggs = ( - self.eggs[prepared.legacy_normalized] + self.eggs.get(prepared.legacy_normalized, ()) if prepared else itertools.chain.from_iterable(self.eggs.values()) ) diff --git a/Lib/importlib/metadata/_collections.py b/Lib/importlib/metadata/_collections.py index fc5045d36be572..0049814eafd37d 100644 --- a/Lib/importlib/metadata/_collections.py +++ b/Lib/importlib/metadata/_collections.py @@ -1,30 +1,6 @@ -import collections import typing -# from jaraco.collections 3.3 -class FreezableDefaultDict(collections.defaultdict): - """ - Often it is desirable to prevent the mutation of - a default dict after its initial construction, such - as to prevent mutation during iteration. - - >>> dd = FreezableDefaultDict(list) - >>> dd[0].append('1') - >>> dd.freeze() - >>> dd[1] - [] - >>> len(dd) - 1 - """ - - def __missing__(self, key): - return getattr(self, '_frozen', super().__missing__)(key) - - def freeze(self): - self._frozen = lambda key: self.default_factory() - - class Pair(typing.NamedTuple): name: str value: str diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 68f6771d4cedcf..599e934ed9838c 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -13626,8 +13626,7 @@ class dict(iterable, /, **kwargs) 1 The example above shows part of the implementation of - "collections.Counter". A different "__missing__()" method is - used by "collections.defaultdict". + "collections.Counter". d[key] = value diff --git a/Lib/test/test_defaultdict.py b/Lib/test/test_defaultdict.py index a193eb10f16d17..4021a4d12fb644 100644 --- a/Lib/test/test_defaultdict.py +++ b/Lib/test/test_defaultdict.py @@ -14,6 +14,7 @@ class TestDefaultDict(unittest.TestCase): def test_basic(self): d1 = defaultdict() self.assertEqual(d1.default_factory, None) + self.assertRaises(KeyError, d1.__getitem__, 42) d1.default_factory = list d1[12].append(42) self.assertEqual(d1, {12: [42]}) @@ -48,10 +49,15 @@ def test_basic(self): self.assertRaises(TypeError, defaultdict, 1) def test_missing(self): - d1 = defaultdict() - self.assertRaises(KeyError, d1.__missing__, 42) - d1.default_factory = list - self.assertEqual(d1.__missing__(42), []) + # Check that __missing__ is called when it exists + class A(defaultdict): + def __missing__(self, key): + return [] + d1 = A() + self.assertEqual(d1.__missing__(1), []) + # Check that default_factory isn't called when __missing__ exists + d1.default_factory = dict + self.assertEqual(d1.__missing__(2), []) def test_repr(self): d1 = defaultdict() @@ -186,7 +192,7 @@ def test_union(self): with self.assertRaises(TypeError): i |= None - def test_factory_conflict_with_set_value(self): + def test_reentering_getitem_method(self): key = "conflict_test" count = 0 @@ -201,7 +207,7 @@ def default_factory(): test_dict = defaultdict(default_factory) self.assertEqual(count, 0) - self.assertEqual(test_dict[key], 2) + self.assertEqual(test_dict[key], 1) self.assertEqual(count, 2) def test_repr_recursive_factory(self): diff --git a/Misc/NEWS.d/next/Library/2026-04-21-22-19-35.gh-issue-148242.unTO3m.rst b/Misc/NEWS.d/next/Library/2026-04-21-22-19-35.gh-issue-148242.unTO3m.rst new file mode 100644 index 00000000000000..c60a2197911806 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-21-22-19-35.gh-issue-148242.unTO3m.rst @@ -0,0 +1,4 @@ +The built-in ``defaultdict.__missing__`` method no longer exists. A custom :meth:`~object.__missing__` +method should no longer insert the generated value into the dictionary, as this is done by the new +:meth:`~collections.defaultdict.__getitem__` method. defaultdict is now safe to use with +:term:`free threading`. diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index 4ff05727ebc8ce..0eb0fde73ed1d7 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -2222,36 +2222,57 @@ typedef struct { static PyType_Spec defdict_spec; -PyDoc_STRVAR(defdict_missing_doc, -"__missing__(key) # Called by __getitem__ for missing key; pseudo-code:\n\ - if self.default_factory is None: raise KeyError((key,))\n\ - self[key] = value = self.default_factory()\n\ - return value\n\ +PyDoc_STRVAR(defdict_getitem_doc, +"__getitem__($self, key, /)\n--\n\n\ +Return self[key]. If the item doesn't exist, it is automatically created.\n\ +The value is generated by calling either the __missing__ method (if it exists)\n\ +or the default_factory attribute (if it isn't None). If neither can be called,\n\ +a KeyError is raised.\ "); static PyObject * -defdict_missing(PyObject *op, PyObject *key) +defdict_subscript(PyObject *op, PyObject *key) { - defdictobject *dd = defdictobject_CAST(op); - PyObject *factory = dd->default_factory; + PyDictObject *mp = (PyDictObject *)op; + Py_ssize_t ix; + Py_hash_t hash; PyObject *value; - if (factory == NULL || factory == Py_None) { - /* XXX Call dict.__missing__(key) */ - PyObject *tup; - tup = PyTuple_Pack(1, key); - if (!tup) return NULL; - PyErr_SetObject(PyExc_KeyError, tup); - Py_DECREF(tup); + + hash = _PyObject_HashFast(key); + if (hash == -1) { + _Py_dict_unhashable_type(op, key); return NULL; } - value = _PyObject_CallNoArgs(factory); - if (value == NULL) - return value; - PyObject *result = NULL; - (void)PyDict_SetDefaultRef(op, key, value, &result); - // 'result' is NULL, or a strong reference to 'value' or 'op[key]' - Py_DECREF(value); - return result; + Py_BEGIN_CRITICAL_SECTION(op); + ix = _Py_dict_lookup(mp, key, hash, &value); + if (value != NULL) { + Py_INCREF(value); + } else if (ix != DKIX_ERROR) { + /* Try to call self.__missing__(key) */ + PyObject *missing; + int ret = PyObject_GetOptionalAttr(op, &_Py_ID(__missing__), &missing); + if (ret == 1) { + value = PyObject_CallOneArg(missing, key); + Py_DECREF(missing); + } else if (ret == 0) { + /* Try to call self.default_factory() */ + PyObject *factory = defdictobject_CAST(op)->default_factory; + if (factory != NULL && factory != Py_None) { + value = _PyObject_CallNoArgs(factory); + } else { + _PyErr_SetKeyError(key); + } + } + /* Try to insert the new value in the dict */ + if (value != NULL) { + ret = _PyDict_SetItem_KnownHash_LockHeld(mp, Py_NewRef(key), + Py_NewRef(value), hash); + if (ret < 0) + value = NULL; + } + } + Py_END_CRITICAL_SECTION(); + return value; } static inline PyObject* @@ -2331,8 +2352,8 @@ defdict_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) } static PyMethodDef defdict_methods[] = { - {"__missing__", defdict_missing, METH_O, - defdict_missing_doc}, + {"__getitem__", defdict_subscript, METH_O|METH_COEXIST, + defdict_getitem_doc}, {"copy", defdict_copy, METH_NOARGS, defdict_copy_doc}, {"__copy__", defdict_copy, METH_NOARGS, @@ -2347,7 +2368,7 @@ static PyMethodDef defdict_methods[] = { static PyMemberDef defdict_members[] = { {"default_factory", _Py_T_OBJECT, offsetof(defdictobject, default_factory), 0, - PyDoc_STR("Factory for default value called by __missing__().")}, + PyDoc_STR("Factory for default value, called by __getitem__().")}, {NULL} }; @@ -2511,6 +2532,7 @@ static PyType_Slot defdict_slots[] = { {Py_tp_init, defdict_init}, {Py_tp_alloc, PyType_GenericAlloc}, {Py_tp_free, PyObject_GC_Del}, + {Py_mp_subscript, defdict_subscript}, {0, NULL}, }; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 09db93b2d31820..1ae4be4b234ac9 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2398,8 +2398,8 @@ PyDict_GetItem(PyObject *op, PyObject *key) "PyDict_GetItemRef() or PyDict_GetItemWithError()"); } -static void -dict_unhashable_type(PyObject *op, PyObject *key) +void +_Py_dict_unhashable_type(PyObject *op, PyObject *key) { PyObject *exc = PyErr_GetRaisedException(); assert(exc != NULL); @@ -2428,7 +2428,7 @@ _PyDict_LookupIndexAndValue(PyDictObject *mp, PyObject *key, PyObject **value) Py_hash_t hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type((PyObject*)mp, key); + _Py_dict_unhashable_type((PyObject*)mp, key); return -1; } @@ -2532,7 +2532,7 @@ PyDict_GetItemRef(PyObject *op, PyObject *key, PyObject **result) Py_hash_t hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type(op, key); + _Py_dict_unhashable_type(op, key); *result = NULL; return -1; } @@ -2548,7 +2548,7 @@ _PyDict_GetItemRef_Unicode_LockHeld(PyDictObject *op, PyObject *key, PyObject ** Py_hash_t hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type((PyObject*)op, key); + _Py_dict_unhashable_type((PyObject*)op, key); *result = NULL; return -1; } @@ -2586,7 +2586,7 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key) } hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type(op, key); + _Py_dict_unhashable_type(op, key); return NULL; } @@ -2746,7 +2746,7 @@ setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value) { Py_hash_t hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type((PyObject*)mp, key); + _Py_dict_unhashable_type((PyObject*)mp, key); Py_DECREF(key); Py_DECREF(value); return -1; @@ -2924,7 +2924,7 @@ PyDict_DelItem(PyObject *op, PyObject *key) assert(key); Py_hash_t hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type(op, key); + _Py_dict_unhashable_type(op, key); return -1; } @@ -3266,7 +3266,7 @@ pop_lock_held(PyObject *op, PyObject *key, PyObject **result) Py_hash_t hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type(op, key); + _Py_dict_unhashable_type(op, key); if (result) { *result = NULL; } @@ -3679,7 +3679,7 @@ dict_subscript(PyObject *self, PyObject *key) hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type(self, key); + _Py_dict_unhashable_type(self, key); return NULL; } ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value); @@ -4650,7 +4650,7 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value) hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type((PyObject*)self, key); + _Py_dict_unhashable_type((PyObject*)self, key); return NULL; } ix = _Py_dict_lookup_threadsafe(self, key, hash, &val); @@ -4687,7 +4687,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type(d, key); + _Py_dict_unhashable_type(d, key); if (result) { *result = NULL; } @@ -5128,7 +5128,7 @@ dict_contains(PyObject *op, PyObject *key) { Py_hash_t hash = _PyObject_HashFast(key); if (hash == -1) { - dict_unhashable_type(op, key); + _Py_dict_unhashable_type(op, key); return -1; } @@ -7234,7 +7234,7 @@ _PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value) if (value == NULL) { Py_hash_t hash = _PyObject_HashFast(name); if (hash == -1) { - dict_unhashable_type((PyObject*)dict, name); + _Py_dict_unhashable_type((PyObject*)dict, name); return -1; } return _PyDict_DelItem_KnownHash_LockHeld((PyObject *)dict, name, hash); diff --git a/Python/ceval.c b/Python/ceval.c index 967d92f4ea6855..24deb3c428f3e9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -435,8 +435,8 @@ _PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys) PyObject *values = NULL; // We use the two argument form of map.get(key, default) for two reasons: // - Atomically check for a key and get its value without error handling. - // - Don't cause key creation or resizing in dict subclasses like - // collections.defaultdict that define __missing__ (or similar). + // - Don't cause key creation or resizing in dict subclasses that define + // __missing__ (or similar). _PyCStackRef self, method; _PyThreadState_PushCStackRef(tstate, &self); _PyThreadState_PushCStackRef(tstate, &method);