Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.10] bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422) #30425

Merged
merged 2 commits into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,9 @@ struct _Py_unicode_state {
PyObject *latin1[256];
struct _Py_unicode_fs_codec fs_codec;

/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
function will delete the reference from this dictionary.

Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
PyObject *interned;
// Unused member kept for ABI backward compatibility with Python 3.10.0:
// see bpo-46006.
PyObject *unused_interned;

// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
struct _Py_unicode_ids ids;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fix a regression when a type method like ``__init__()`` is modified in a
subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
``update_slot()``. Revert the change which made the Unicode dictionary of
interned strings compatible with subinterpreters: the internal interned
dictionary is shared again by all interpreters. Patch by Victor Stinner.
22 changes: 22 additions & 0 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ typedef struct PySlot_Offset {
} PySlot_Offset;


/* bpo-40521: Interned strings are shared by all subinterpreters */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define INTERN_NAME_STRINGS
#endif

/* alphabetical order */
_Py_IDENTIFIER(__abstractmethods__);
_Py_IDENTIFIER(__annotations__);
Expand Down Expand Up @@ -3988,6 +3993,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
if (name == NULL)
return -1;
}
#ifdef INTERN_NAME_STRINGS
if (!PyUnicode_CHECK_INTERNED(name)) {
PyUnicode_InternInPlace(&name);
if (!PyUnicode_CHECK_INTERNED(name)) {
Expand All @@ -3997,6 +4003,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
return -1;
}
}
#endif
}
else {
/* Will fail in _PyObject_GenericSetAttrWithDict. */
Expand Down Expand Up @@ -8344,10 +8351,17 @@ _PyTypes_InitSlotDefs(void)
for (slotdef *p = slotdefs; p->name; p++) {
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
assert(!p[1].name || p->offset <= p[1].offset);
#ifdef INTERN_NAME_STRINGS
p->name_strobj = PyUnicode_InternFromString(p->name);
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
return _PyStatus_NO_MEMORY();
}
#else
p->name_strobj = PyUnicode_FromString(p->name);
if (!p->name_strobj) {
return _PyStatus_NO_MEMORY();
}
#endif
}
slotdefs_initialized = 1;
return _PyStatus_OK();
Expand All @@ -8372,16 +8386,24 @@ update_slot(PyTypeObject *type, PyObject *name)
int offset;

assert(PyUnicode_CheckExact(name));
#ifdef INTERN_NAME_STRINGS
assert(PyUnicode_CHECK_INTERNED(name));
#endif

assert(slotdefs_initialized);
pp = ptrs;
for (p = slotdefs; p->name; p++) {
assert(PyUnicode_CheckExact(p->name_strobj));
assert(PyUnicode_CheckExact(name));
#ifdef INTERN_NAME_STRINGS
if (p->name_strobj == name) {
*pp++ = p;
}
#else
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
*pp++ = p;
}
#endif
}
*pp = NULL;
for (pp = ptrs; *pp; pp++) {
Expand Down
63 changes: 46 additions & 17 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,22 @@ extern "C" {
# define OVERALLOCATE_FACTOR 4
#endif

/* bpo-40521: Interned strings are shared by all interpreters. */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define INTERNED_STRINGS
#endif

/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
function will delete the reference from this dictionary.

Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
#ifdef INTERNED_STRINGS
static PyObject *interned = NULL;
#endif

static struct _Py_unicode_state*
get_unicode_state(void)
Expand Down Expand Up @@ -1936,20 +1952,21 @@ unicode_dealloc(PyObject *unicode)

case SSTATE_INTERNED_MORTAL:
{
struct _Py_unicode_state *state = get_unicode_state();
#ifdef INTERNED_STRINGS
/* Revive the dead object temporarily. PyDict_DelItem() removes two
references (key and value) which were ignored by
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
to prevent calling unicode_dealloc() again. Adjust refcnt after
PyDict_DelItem(). */
assert(Py_REFCNT(unicode) == 0);
Py_SET_REFCNT(unicode, 3);
if (PyDict_DelItem(state->interned, unicode) != 0) {
if (PyDict_DelItem(interned, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL);
}
assert(Py_REFCNT(unicode) == 1);
Py_SET_REFCNT(unicode, 0);
#endif
break;
}

Expand Down Expand Up @@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
if (PyUnicode_CHECK_INTERNED(left))
return 0;

#ifdef INTERNED_STRINGS
assert(_PyUnicode_HASH(right_uni) != -1);
Py_hash_t hash = _PyUnicode_HASH(left);
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
return 0;
}
#endif

return unicode_compare_eq(left, right_uni);
}
Expand Down Expand Up @@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}

#ifdef INTERNED_STRINGS
if (PyUnicode_READY(s) == -1) {
PyErr_Clear();
return;
}

struct _Py_unicode_state *state = get_unicode_state();
if (state->interned == NULL) {
state->interned = PyDict_New();
if (state->interned == NULL) {
if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL) {
PyErr_Clear(); /* Don't leave an exception */
return;
}
}

PyObject *t = PyDict_SetDefault(state->interned, s, s);
PyObject *t = PyDict_SetDefault(interned, s, s);
if (t == NULL) {
PyErr_Clear();
return;
Expand All @@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p)
this. */
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
#else
// PyDict expects that interned strings have their hash
// (PyASCIIObject.hash) already computed.
(void)unicode_hash(s);
#endif
}


void
PyUnicode_InternImmortal(PyObject **p)
{
Expand Down Expand Up @@ -15900,25 +15923,29 @@ PyUnicode_InternFromString(const char *cp)
void
_PyUnicode_ClearInterned(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;
if (state->interned == NULL) {
if (!_Py_IsMainInterpreter(interp)) {
// interned dict is shared by all interpreters
return;
}

if (interned == NULL) {
return;
}
assert(PyDict_CheckExact(state->interned));
assert(PyDict_CheckExact(interned));

/* Interned unicode strings are not forcibly deallocated; rather, we give
them their stolen references back, and then clear and DECREF the
interned dict. */

#ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(state->interned));
PyDict_GET_SIZE(interned));

Py_ssize_t immortal_size = 0, mortal_size = 0;
#endif
Py_ssize_t pos = 0;
PyObject *s, *ignored_value;
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
assert(PyUnicode_IS_READY(s));

switch (PyUnicode_CHECK_INTERNED(s)) {
Expand Down Expand Up @@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
mortal_size, immortal_size);
#endif

PyDict_Clear(state->interned);
Py_CLEAR(state->interned);
PyDict_Clear(interned);
Py_CLEAR(interned);
}


Expand Down Expand Up @@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;

// _PyUnicode_ClearInterned() must be called before
assert(state->interned == NULL);
if (_Py_IsMainInterpreter(interp)) {
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(interned == NULL);
}

_PyUnicode_FiniEncodings(&state->fs_codec);

Expand Down