From e08cfa9d3b51b15b5479580b7fedbfc017db284e Mon Sep 17 00:00:00 2001
From: Sam Gross <colesbury@gmail.com>
Date: Wed, 15 Jan 2025 21:49:14 +0000
Subject: [PATCH 1/9] Fix data race when using shared variables (free
 threading)

In the free threading build, there's a race between wrapper re-use and
wrapper deallocation. This can happen with a static variable accessed by
multiple threads.

Fixing this requires using some private CPython APIs: _Py_TryIncref and
_PyObject_SetMaybeWeakref. The implementations of these functions are
included until they're made available as public ("unstable") APIs.

Fixes #5489
---
 include/pybind11/detail/class.h            | 22 ++++++++++
 include/pybind11/detail/type_caster_base.h | 48 +++++++++++++++++++++-
 tests/test_thread.cpp                      |  6 +++
 tests/test_thread.py                       | 18 ++++++++
 4 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h
index 88e6d60a98..5cea7cc8a0 100644
--- a/include/pybind11/detail/class.h
+++ b/include/pybind11/detail/class.h
@@ -312,7 +312,29 @@ inline void traverse_offset_bases(void *valueptr,
     }
 }
 
+static inline void enable_try_inc_ref(PyObject *op) {
+#ifdef Py_GIL_DISABLED
+    // TODO: Replace with PyUnstable_Object_EnableTryIncRef when available.
+    // See https://github.com/python/cpython/issues/128844
+    if (_Py_IsImmortal(op)) {
+        return;
+    }
+    for (;;) {
+        Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared);
+        if ((shared & _Py_REF_SHARED_FLAG_MASK) != 0) {
+            // Nothing to do if it's in WEAKREFS, QUEUED, or MERGED states.
+            return;
+        }
+        if (_Py_atomic_compare_exchange_ssize(
+                &op->ob_ref_shared, &shared, shared | _Py_REF_MAYBE_WEAKREF)) {
+            return;
+        }
+    }
+#endif
+}
+
 inline bool register_instance_impl(void *ptr, instance *self) {
+    enable_try_inc_ref((PyObject *) self);
     with_instance_map(ptr, [&](instance_map &instances) { instances.emplace(ptr, self); });
     return true; // unused, but gives the same signature as the deregister func
 }
diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h
index d5d86dc6c1..4570cab380 100644
--- a/include/pybind11/detail/type_caster_base.h
+++ b/include/pybind11/detail/type_caster_base.h
@@ -241,6 +241,49 @@ PYBIND11_NOINLINE handle get_type_handle(const std::type_info &tp, bool throw_if
     return handle(type_info ? ((PyObject *) type_info->type) : nullptr);
 }
 
+inline bool try_incref(PyObject *obj) {
+    // Tries to increment the reference count of an object if it's not zero.
+    // TODO: Use PyUnstable_TryIncref when available.
+    // See https://github.com/python/cpython/issues/128844
+#ifdef Py_GIL_DISABLED
+    // See https://github.com/python/cpython/blob/d05140f9f77d7dfc753dd1e5ac3a5962aaa03eff/Include/internal/pycore_object.h#L761
+    uint32_t local = _Py_atomic_load_uint32_relaxed(&obj->ob_ref_local);
+    local += 1;
+    if (local == 0) {
+        // immortal
+        return true;
+    }
+    if (_Py_IsOwnedByCurrentThread(obj)) {
+        _Py_atomic_store_uint32_relaxed(&obj->ob_ref_local, local);
+#ifdef Py_REF_DEBUG
+        _Py_INCREF_IncRefTotal();
+#endif
+        return true;
+    }
+    Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&obj->ob_ref_shared);
+    for (;;) {
+        // If the shared refcount is zero and the object is either merged
+        // or may not have weak references, then we cannot incref it.
+        if (shared == 0 || shared == _Py_REF_MERGED) {
+            return false;
+        }
+
+        if (_Py_atomic_compare_exchange_ssize(
+                &obj->ob_ref_shared,
+                &shared,
+                shared + (1 << _Py_REF_SHARED_SHIFT))) {
+#ifdef Py_REF_DEBUG
+            _Py_INCREF_IncRefTotal();
+#endif
+            return true;
+        }
+    }
+#else
+    assert(Py_REFCNT(obj) > 0);
+    Py_INCREF(obj);
+#endif
+}
+
 // Searches the inheritance graph for a registered Python instance, using all_type_info().
 PYBIND11_NOINLINE handle find_registered_python_instance(void *src,
                                                          const detail::type_info *tinfo) {
@@ -249,7 +292,10 @@ PYBIND11_NOINLINE handle find_registered_python_instance(void *src,
         for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) {
             for (auto *instance_type : detail::all_type_info(Py_TYPE(it_i->second))) {
                 if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) {
-                    return handle((PyObject *) it_i->second).inc_ref();
+                    PyObject *wrapper = (PyObject *) it_i->second;
+                    if (try_incref(wrapper)) {
+                        return handle(wrapper);
+                    }
                 }
             }
         }
diff --git a/tests/test_thread.cpp b/tests/test_thread.cpp
index e727109d79..c31af14a87 100644
--- a/tests/test_thread.cpp
+++ b/tests/test_thread.cpp
@@ -28,6 +28,9 @@ struct IntStruct {
     int value;
 };
 
+struct EmptyStruct {};
+static EmptyStruct SharedInstance;
+
 } // namespace
 
 TEST_SUBMODULE(thread, m) {
@@ -61,6 +64,9 @@ TEST_SUBMODULE(thread, m) {
         },
         py::call_guard<py::gil_scoped_release>());
 
+    py::class_<EmptyStruct>(m, "EmptyStruct")
+        .def_readonly_static("SharedInstance", &SharedInstance);
+
     // NOTE: std::string_view also uses loader_life_support to ensure that
     // the string contents remain alive, but that's a C++ 17 feature.
 }
diff --git a/tests/test_thread.py b/tests/test_thread.py
index f12020e41b..a31b1dc249 100644
--- a/tests/test_thread.py
+++ b/tests/test_thread.py
@@ -47,3 +47,21 @@ def test_implicit_conversion_no_gil():
         x.start()
     for x in [c, b, a]:
         x.join()
+
+
+@pytest.mark.skipif(sys.platform.startswith("emscripten"), reason="Requires threads")
+def test_bind_shared_instance():
+    nb_threads = 4
+    b = threading.Barrier(nb_threads)
+
+    def access_shared_instance():
+        b.wait()
+        for _ in range(1000):
+            x = m.EmptyStruct.SharedInstance
+            del x
+
+    threads = [threading.Thread(target=access_shared_instance) for _ in range(nb_threads)]
+    for thread in threads:
+        thread.start()
+    for thread in threads:
+        thread.join()

From bb215b53ba3b6f2cb50c4e1196b3e86e3d778839 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 15 Jan 2025 22:22:52 +0000
Subject: [PATCH 2/9] style: pre-commit fixes

---
 include/pybind11/detail/type_caster_base.h | 15 +++++++--------
 tests/test_thread.py                       |  4 +++-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h
index 4570cab380..13aebca3d8 100644
--- a/include/pybind11/detail/type_caster_base.h
+++ b/include/pybind11/detail/type_caster_base.h
@@ -246,7 +246,8 @@ inline bool try_incref(PyObject *obj) {
     // TODO: Use PyUnstable_TryIncref when available.
     // See https://github.com/python/cpython/issues/128844
 #ifdef Py_GIL_DISABLED
-    // See https://github.com/python/cpython/blob/d05140f9f77d7dfc753dd1e5ac3a5962aaa03eff/Include/internal/pycore_object.h#L761
+    // See
+    // https://github.com/python/cpython/blob/d05140f9f77d7dfc753dd1e5ac3a5962aaa03eff/Include/internal/pycore_object.h#L761
     uint32_t local = _Py_atomic_load_uint32_relaxed(&obj->ob_ref_local);
     local += 1;
     if (local == 0) {
@@ -255,9 +256,9 @@ inline bool try_incref(PyObject *obj) {
     }
     if (_Py_IsOwnedByCurrentThread(obj)) {
         _Py_atomic_store_uint32_relaxed(&obj->ob_ref_local, local);
-#ifdef Py_REF_DEBUG
+#    ifdef Py_REF_DEBUG
         _Py_INCREF_IncRefTotal();
-#endif
+#    endif
         return true;
     }
     Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&obj->ob_ref_shared);
@@ -269,12 +270,10 @@ inline bool try_incref(PyObject *obj) {
         }
 
         if (_Py_atomic_compare_exchange_ssize(
-                &obj->ob_ref_shared,
-                &shared,
-                shared + (1 << _Py_REF_SHARED_SHIFT))) {
-#ifdef Py_REF_DEBUG
+                &obj->ob_ref_shared, &shared, shared + (1 << _Py_REF_SHARED_SHIFT))) {
+#    ifdef Py_REF_DEBUG
             _Py_INCREF_IncRefTotal();
-#endif
+#    endif
             return true;
         }
     }
diff --git a/tests/test_thread.py b/tests/test_thread.py
index a31b1dc249..fbb17eb72c 100644
--- a/tests/test_thread.py
+++ b/tests/test_thread.py
@@ -60,7 +60,9 @@ def access_shared_instance():
             x = m.EmptyStruct.SharedInstance
             del x
 
-    threads = [threading.Thread(target=access_shared_instance) for _ in range(nb_threads)]
+    threads = [
+        threading.Thread(target=access_shared_instance) for _ in range(nb_threads)
+    ]
     for thread in threads:
         thread.start()
     for thread in threads:

From 9f16d6b2a3a6adb021bac571ce78c229e520b12b Mon Sep 17 00:00:00 2001
From: Sam Gross <colesbury@gmail.com>
Date: Wed, 15 Jan 2025 22:25:33 +0000
Subject: [PATCH 3/9] Avoid unused parameter

---
 include/pybind11/detail/class.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h
index 5cea7cc8a0..1fe3039e6b 100644
--- a/include/pybind11/detail/class.h
+++ b/include/pybind11/detail/class.h
@@ -312,8 +312,8 @@ inline void traverse_offset_bases(void *valueptr,
     }
 }
 
-static inline void enable_try_inc_ref(PyObject *op) {
 #ifdef Py_GIL_DISABLED
+static inline void enable_try_inc_ref(PyObject *op) {
     // TODO: Replace with PyUnstable_Object_EnableTryIncRef when available.
     // See https://github.com/python/cpython/issues/128844
     if (_Py_IsImmortal(op)) {
@@ -330,11 +330,13 @@ static inline void enable_try_inc_ref(PyObject *op) {
             return;
         }
     }
-#endif
 }
+#endif
 
 inline bool register_instance_impl(void *ptr, instance *self) {
+#ifdef Py_GIL_DISABLED
     enable_try_inc_ref((PyObject *) self);
+#endif
     with_instance_map(ptr, [&](instance_map &instances) { instances.emplace(ptr, self); });
     return true; // unused, but gives the same signature as the deregister func
 }

From 72238e41683252f851acccf04d88d93ffca17a6a Mon Sep 17 00:00:00 2001
From: Sam Gross <colesbury@gmail.com>
Date: Wed, 15 Jan 2025 22:28:34 +0000
Subject: [PATCH 4/9] Add missing return for default build

---
 include/pybind11/detail/type_caster_base.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h
index 13aebca3d8..56745942fc 100644
--- a/include/pybind11/detail/type_caster_base.h
+++ b/include/pybind11/detail/type_caster_base.h
@@ -280,6 +280,7 @@ inline bool try_incref(PyObject *obj) {
 #else
     assert(Py_REFCNT(obj) > 0);
     Py_INCREF(obj);
+    return true;
 #endif
 }
 

From e9e28bcb48d02852c1b888cb5d9c69630ec57063 Mon Sep 17 00:00:00 2001
From: Sam Gross <colesbury@gmail.com>
Date: Thu, 16 Jan 2025 17:29:52 +0000
Subject: [PATCH 5/9] Changes from review

---
 include/pybind11/detail/class.h            | 10 +++++-----
 include/pybind11/detail/type_caster_base.h |  2 +-
 tests/test_thread.py                       |  3 +--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h
index 1fe3039e6b..f501467ded 100644
--- a/include/pybind11/detail/class.h
+++ b/include/pybind11/detail/class.h
@@ -313,20 +313,20 @@ inline void traverse_offset_bases(void *valueptr,
 }
 
 #ifdef Py_GIL_DISABLED
-static inline void enable_try_inc_ref(PyObject *op) {
+inline void enable_try_inc_ref(PyObject *obj) {
     // TODO: Replace with PyUnstable_Object_EnableTryIncRef when available.
     // See https://github.com/python/cpython/issues/128844
-    if (_Py_IsImmortal(op)) {
+    if (_Py_IsImmortal(obj)) {
         return;
     }
     for (;;) {
-        Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared);
+        Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&obj->ob_ref_shared);
         if ((shared & _Py_REF_SHARED_FLAG_MASK) != 0) {
             // Nothing to do if it's in WEAKREFS, QUEUED, or MERGED states.
             return;
         }
         if (_Py_atomic_compare_exchange_ssize(
-                &op->ob_ref_shared, &shared, shared | _Py_REF_MAYBE_WEAKREF)) {
+                &obj->ob_ref_shared, &shared, shared | _Py_REF_MAYBE_WEAKREF)) {
             return;
         }
     }
@@ -335,7 +335,7 @@ static inline void enable_try_inc_ref(PyObject *op) {
 
 inline bool register_instance_impl(void *ptr, instance *self) {
 #ifdef Py_GIL_DISABLED
-    enable_try_inc_ref((PyObject *) self);
+    enable_try_inc_ref(reinterpret_cast<PyObject *>(self));
 #endif
     with_instance_map(ptr, [&](instance_map &instances) { instances.emplace(ptr, self); });
     return true; // unused, but gives the same signature as the deregister func
diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h
index 56745942fc..e2eb825d17 100644
--- a/include/pybind11/detail/type_caster_base.h
+++ b/include/pybind11/detail/type_caster_base.h
@@ -292,7 +292,7 @@ PYBIND11_NOINLINE handle find_registered_python_instance(void *src,
         for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) {
             for (auto *instance_type : detail::all_type_info(Py_TYPE(it_i->second))) {
                 if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) {
-                    PyObject *wrapper = (PyObject *) it_i->second;
+                    auto *wrapper = reinterpret_cast<PyObject *>(it_i->second);
                     if (try_incref(wrapper)) {
                         return handle(wrapper);
                     }
diff --git a/tests/test_thread.py b/tests/test_thread.py
index fbb17eb72c..49fbae2cd7 100644
--- a/tests/test_thread.py
+++ b/tests/test_thread.py
@@ -57,8 +57,7 @@ def test_bind_shared_instance():
     def access_shared_instance():
         b.wait()
         for _ in range(1000):
-            x = m.EmptyStruct.SharedInstance
-            del x
+            m.EmptyStruct.SharedInstance
 
     threads = [
         threading.Thread(target=access_shared_instance) for _ in range(nb_threads)

From 178cea5d1e5e62681e74dece9d53f96239ed20e4 Mon Sep 17 00:00:00 2001
From: Sam Gross <colesbury@gmail.com>
Date: Thu, 16 Jan 2025 17:32:48 +0000
Subject: [PATCH 6/9] Assign result to local variable

---
 tests/test_thread.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_thread.py b/tests/test_thread.py
index 49fbae2cd7..bc85d5dc41 100644
--- a/tests/test_thread.py
+++ b/tests/test_thread.py
@@ -57,7 +57,9 @@ def test_bind_shared_instance():
     def access_shared_instance():
         b.wait()
         for _ in range(1000):
-            m.EmptyStruct.SharedInstance
+            # assign to local variable to make clang-tidy happy
+            x = m.EmptyStruct.SharedInstance
+            del x
 
     threads = [
         threading.Thread(target=access_shared_instance) for _ in range(nb_threads)

From fc33e36bda52ce0ca806b1dbf489135ff17d7c95 Mon Sep 17 00:00:00 2001
From: Sam Gross <colesbury@gmail.com>
Date: Thu, 16 Jan 2025 17:34:06 +0000
Subject: [PATCH 7/9] s/clang-tidy/ruff

---
 tests/test_thread.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_thread.py b/tests/test_thread.py
index bc85d5dc41..429c52adcb 100644
--- a/tests/test_thread.py
+++ b/tests/test_thread.py
@@ -57,7 +57,7 @@ def test_bind_shared_instance():
     def access_shared_instance():
         b.wait()
         for _ in range(1000):
-            # assign to local variable to make clang-tidy happy
+            # assign to local variable to make ruff happy
             x = m.EmptyStruct.SharedInstance
             del x
 

From c4c0b48b4d38a09a6ec46d66b077c9ae53715c98 Mon Sep 17 00:00:00 2001
From: Sam Gross <colesbury@gmail.com>
Date: Thu, 16 Jan 2025 18:04:25 +0000
Subject: [PATCH 8/9] clang-tidy: static is redundant

---
 tests/test_thread.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_thread.cpp b/tests/test_thread.cpp
index c31af14a87..eabf39afa1 100644
--- a/tests/test_thread.cpp
+++ b/tests/test_thread.cpp
@@ -29,7 +29,7 @@ struct IntStruct {
 };
 
 struct EmptyStruct {};
-static EmptyStruct SharedInstance;
+EmptyStruct SharedInstance;
 
 } // namespace
 

From d3f9249d57a7c4aece9382fd6182ceffe6ee7ec6 Mon Sep 17 00:00:00 2001
From: Sam Gross <colesbury@gmail.com>
Date: Thu, 16 Jan 2025 18:33:38 +0000
Subject: [PATCH 9/9] Use 'noqa: B018'

---
 tests/test_thread.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_thread.py b/tests/test_thread.py
index 429c52adcb..e9d7bafb2f 100644
--- a/tests/test_thread.py
+++ b/tests/test_thread.py
@@ -57,9 +57,7 @@ def test_bind_shared_instance():
     def access_shared_instance():
         b.wait()
         for _ in range(1000):
-            # assign to local variable to make ruff happy
-            x = m.EmptyStruct.SharedInstance
-            del x
+            m.EmptyStruct.SharedInstance  # noqa: B018
 
     threads = [
         threading.Thread(target=access_shared_instance) for _ in range(nb_threads)