diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index a1715720..1ddbe74c 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -5,4 +5,10 @@ aae30e864449442cf0b04e94f8a242b1b667de9a 16dc3153b3cb684ca72445ed058babc8f5d97f42 # chore(linting): lint all C++ files -58cd4b45777b046f03a63255c1d93e289e1cab5e \ No newline at end of file +58cd4b45777b046f03a63255c1d93e289e1cab5e + +# chore(linting): lint PyBytesProxyHandler.cc +d540ed6e0edfe9538dc726cf587dfb2cc76dde34 + +# chore(linting): lint PyObjectProxyHandler.cc +1d45ea98e42294cce16deec5454725d4de36f59f \ No newline at end of file diff --git a/.github/workflows/test-and-publish.yaml b/.github/workflows/test-and-publish.yaml index 1544ba71..ca4cc8fc 100644 --- a/.github/workflows/test-and-publish.yaml +++ b/.github/workflows/test-and-publish.yaml @@ -129,13 +129,6 @@ jobs: matrix: os: [ 'ubuntu-20.04', 'macos-12', 'macos-14', 'windows-2022', 'pi' ] python_version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] - exclude: - # actions/setup-python: The version '3.8'/'3.9' with architecture 'arm64' was not found for macOS. - # see https://raw.githubusercontent.com/actions/python-versions/main/versions-manifest.json - - os: 'macos-14' - python_version: '3.8' - - os: 'macos-14' - python_version: '3.9' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/update-mozcentral-version.yaml b/.github/workflows/update-mozcentral-version.yaml index 8f95895f..027720b8 100644 --- a/.github/workflows/update-mozcentral-version.yaml +++ b/.github/workflows/update-mozcentral-version.yaml @@ -2,7 +2,8 @@ name: 'Create pull requests to update mozilla-central version to the latest' on: schedule: - - cron: "00 14 * * 1" # run every Monday at 14:00 UTC (10:00 Eastern Daylight Time) + - cron: "00 14 */100,1-7 * 1" # run on the first Monday of each month at 14:00 UTC (10:00 Eastern Daylight Time) + # See https://blog.healthchecks.io/2022/09/schedule-cron-job-the-funky-way/ workflow_call: workflow_dispatch: # or you can run it manually @@ -33,8 +34,10 @@ jobs: add-paths: mozcentral.version commit-message: | chore(deps): upgrade SpiderMonkey to `${{ env.MOZCENTRAL_VERSION }}` + author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> branch: chore/upgrade-spidermonkey-to-${{ env.MOZCENTRAL_VERSION_SHORT }} title: Upgrade SpiderMonkey to mozilla-central commit `${{ env.MOZCENTRAL_VERSION }}` body: | Changeset: https://hg.mozilla.org/mozilla-central/rev/${{ env.MOZCENTRAL_VERSION }} labels: dependencies + assignees: Xmader diff --git a/include/JSStringProxy.hh b/include/JSStringProxy.hh index fb376cc0..ed696c19 100644 --- a/include/JSStringProxy.hh +++ b/include/JSStringProxy.hh @@ -15,6 +15,8 @@ #include +#include + /** * @brief The typedef for the backing store that will be used by JSStringProxy objects. All it contains is a pointer to the JSString * @@ -24,6 +26,8 @@ typedef struct { JS::PersistentRootedValue *jsString; } JSStringProxy; +extern std::unordered_set jsStringProxies; // a collection of all JSStringProxy objects, used during a GCCallback to ensure they continue to point to the correct char buffer + /** * @brief This struct is a bundle of methods used by the JSStringProxy type * @@ -37,7 +41,7 @@ public: */ static void JSStringProxy_dealloc(JSStringProxy *self); - /** + /** * @brief copy protocol method for both copy and deepcopy * * @param self - The JSObjectProxy @@ -49,13 +53,13 @@ public: // docs for methods, copied from cpython PyDoc_STRVAR(stringproxy_deepcopy__doc__, "__deepcopy__($self, memo, /)\n" -"--\n" -"\n"); + "--\n" + "\n"); PyDoc_STRVAR(stringproxy_copy__doc__, -"__copy__($self, /)\n" -"--\n" -"\n"); + "__copy__($self, /)\n" + "--\n" + "\n"); /** * @brief Struct for the other methods diff --git a/mozcentral.version b/mozcentral.version index 0efd2b63..9f4b8194 100644 --- a/mozcentral.version +++ b/mozcentral.version @@ -1 +1 @@ -8a28ad54f9f516c41ceddfa7ea32368fccf4a0eb \ No newline at end of file +a283127a5d0aa005c54d339e8ca27414b55f079b \ No newline at end of file diff --git a/src/JSStringProxy.cc b/src/JSStringProxy.cc index 90ae9d5a..2b68f6bb 100644 --- a/src/JSStringProxy.cc +++ b/src/JSStringProxy.cc @@ -12,12 +12,13 @@ #include "include/StrType.hh" - +std::unordered_set jsStringProxies; extern JSContext *GLOBAL_CX; void JSStringProxyMethodDefinitions::JSStringProxy_dealloc(JSStringProxy *self) { + jsStringProxies.erase(self); delete self->jsString; } diff --git a/src/PyBytesProxyHandler.cc b/src/PyBytesProxyHandler.cc index 753e67e5..14da16c9 100644 --- a/src/PyBytesProxyHandler.cc +++ b/src/PyBytesProxyHandler.cc @@ -26,30 +26,30 @@ static bool array_valueOf(JSContext *cx, unsigned argc, JS::Value *vp) { return false; } - JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); + JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get()); auto byteLength = JS::GetArrayBufferByteLength(rootedArrayBuffer); - bool isSharedMemory; + bool isSharedMemory; JS::AutoCheckCannotGC autoNoGC(cx); uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC); - + size_t numberOfDigits = 0; for (size_t i = 0; i < byteLength; i++) { - numberOfDigits += data[i] < 10 ? 1 : data[i] < 100 ? 2 : 3; + numberOfDigits += data[i] < 10 ? 1 : data[i] < 100 ? 2 : 3; } const size_t STRING_LENGTH = byteLength + numberOfDigits; - JS::Latin1Char* buffer = (JS::Latin1Char *)malloc(sizeof(JS::Latin1Char) * STRING_LENGTH); - + JS::Latin1Char *buffer = (JS::Latin1Char *)malloc(sizeof(JS::Latin1Char) * STRING_LENGTH); + size_t charIndex = 0; - sprintf((char*)&buffer[charIndex], "%d", data[0]); + snprintf((char *)&buffer[charIndex], 4, "%d", data[0]); charIndex += data[0] < 10 ? 1 : data[0] < 100 ? 2 : 3; for (size_t dataIndex = 1; dataIndex < byteLength; dataIndex++) { buffer[charIndex] = ','; charIndex++; - sprintf((char*)&buffer[charIndex], "%d", data[dataIndex]); + snprintf((char *)&buffer[charIndex], 4, "%d", data[dataIndex]); charIndex += data[dataIndex] < 10 ? 1 : data[dataIndex] < 100 ? 2 : 3; } @@ -84,7 +84,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) { JS::RootedObject thisObj(cx); if (!args.computeThis(cx, &thisObj)) return false; - JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot(thisObj, BytesIteratorSlotIteratedObject); + JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot(thisObj, BytesIteratorSlotIteratedObject); JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get()); JS::RootedValue rootedNextIndex(cx, JS::GetReservedSlot(thisObj, BytesIteratorSlotNextIndex)); @@ -112,7 +112,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) { if (!JS_SetProperty(cx, result, "done", done)) return false; if (itemKind == ITEM_KIND_VALUE) { - bool isSharedMemory; + bool isSharedMemory; JS::AutoCheckCannotGC autoNoGC(cx); uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC); @@ -125,7 +125,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) { JS::RootedValue rootedNextIndex(cx, JS::Int32Value(nextIndex)); items[0].set(rootedNextIndex); - bool isSharedMemory; + bool isSharedMemory; JS::AutoCheckCannotGC autoNoGC(cx); uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC); @@ -216,8 +216,8 @@ static bool array_iterator_func(JSContext *cx, unsigned argc, JS::Value *vp, int if (!JS::Construct(cx, constructor_val, JS::HandleValueArray::empty(), &obj)) return false; if (!obj) return false; - JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); - + JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); + JS::SetReservedSlot(obj, BytesIteratorSlotIteratedObject, JS::PrivateValue(arrayBuffer)); JS::SetReservedSlot(obj, BytesIteratorSlotNextIndex, JS::Int32Value(0)); JS::SetReservedSlot(obj, BytesIteratorSlotItemKind, JS::Int32Value(itemKind)); @@ -253,13 +253,13 @@ bool PyBytesProxyHandler::set(JSContext *cx, JS::HandleObject proxy, JS::HandleI JS::HandleValue v, JS::HandleValue receiver, JS::ObjectOpResult &result) const { - // block all modifications - + // block all modifications + PyObject *self = JS::GetMaybePtrFromReservedSlot(proxy, PyObjectSlot); PyErr_Format(PyExc_TypeError, - "'%.100s' object has only read-only attributes", - Py_TYPE(self)->tp_name); + "'%.100s' object has only read-only attributes", + Py_TYPE(self)->tp_name); return result.failReadOnly(); } @@ -298,7 +298,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor( // "length" and "byteLength" properties have the same value if ((JS_StringEqualsLiteral(cx, idString, "length", &isProperty) && isProperty) || (JS_StringEqualsLiteral(cx, id.toString(), "byteLength", &isProperty) && isProperty)) { - JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); + JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get()); @@ -314,7 +314,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor( // "buffer" property if (JS_StringEqualsLiteral(cx, idString, "buffer", &isProperty) && isProperty) { - JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); + JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); desc.set(mozilla::Some( JS::PropertyDescriptor::Data( @@ -392,10 +392,10 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor( // item Py_ssize_t index; if (idToIndex(cx, id, &index)) { - JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); + JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot(proxy, OtherSlot); JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get()); - bool isSharedMemory; + bool isSharedMemory; JS::AutoCheckCannotGC autoNoGC(cx); uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC); @@ -406,7 +406,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor( )); return true; - } + } PyObject *attrName = idToKey(cx, id); PyObject *self = JS::GetMaybePtrFromReservedSlot(proxy, PyObjectSlot); diff --git a/src/PyListProxyHandler.cc b/src/PyListProxyHandler.cc index 688cf82d..d6d9747b 100644 --- a/src/PyListProxyHandler.cc +++ b/src/PyListProxyHandler.cc @@ -2099,8 +2099,8 @@ void PyListProxyHandler::finalize(JS::GCContext *gcx, JSObject *proxy) const { // We cannot call Py_DECREF here when shutting down as the thread state is gone. // Then, when shutting down, there is only on reference left, and we don't need // to free the object since the entire process memory is being released. - PyObject *self = JS::GetMaybePtrFromReservedSlot(proxy, PyObjectSlot); - if (Py_REFCNT(self) > 1) { + if (!_Py_IsFinalizing()) { + PyObject *self = JS::GetMaybePtrFromReservedSlot(proxy, PyObjectSlot); Py_DECREF(self); } } diff --git a/src/PyObjectProxyHandler.cc b/src/PyObjectProxyHandler.cc index aae9ad94..63e77dd2 100644 --- a/src/PyObjectProxyHandler.cc +++ b/src/PyObjectProxyHandler.cc @@ -44,8 +44,9 @@ bool PyObjectProxyHandler::handleGetOwnPropertyDescriptor(JSContext *cx, JS::Han JS::MutableHandle> desc, PyObject *item) { // see if we're calling a function if (id.isString()) { - JS::RootedString idString(cx, id.toString()); - const char *methodName = JS_EncodeStringToUTF8(cx, idString).get(); + JS::UniqueChars idString = JS_EncodeStringToUTF8(cx, JS::RootedString(cx, id.toString())); + const char *methodName = idString.get(); + if (!strcmp(methodName, "toString") || !strcmp(methodName, "toLocaleString") || !strcmp(methodName, "valueOf")) { JS::RootedObject objectPrototype(cx); if (!JS_GetClassPrototype(cx, JSProto_Object, &objectPrototype)) { @@ -87,8 +88,8 @@ void PyObjectProxyHandler::finalize(JS::GCContext *gcx, JSObject *proxy) const { // We cannot call Py_DECREF here when shutting down as the thread state is gone. // Then, when shutting down, there is only on reference left, and we don't need // to free the object since the entire process memory is being released. - PyObject *self = JS::GetMaybePtrFromReservedSlot(proxy, PyObjectSlot); - if (Py_REFCNT(self) > 1) { + if (!_Py_IsFinalizing()) { + PyObject *self = JS::GetMaybePtrFromReservedSlot(proxy, PyObjectSlot); Py_DECREF(self); } } @@ -109,12 +110,12 @@ bool PyObjectProxyHandler::ownPropertyKeys(JSContext *cx, JS::HandleObject proxy } return handleOwnPropertyKeys(cx, nonDunderKeys, PyList_Size(nonDunderKeys), props); - } + } else { if (PyErr_Occurred()) { - PyErr_Clear(); + PyErr_Clear(); } - + return handleOwnPropertyKeys(cx, PyList_New(0), 0, props); } } diff --git a/src/StrType.cc b/src/StrType.cc index f01ce1ac..714bed7d 100644 --- a/src/StrType.cc +++ b/src/StrType.cc @@ -112,6 +112,7 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { JS::RootedObject obj(cx); pyString->jsString = new JS::PersistentRootedValue(cx); pyString->jsString->setString((JSString *)lstr); + jsStringProxies.insert(pyString); // Initialize as legacy string (https://github.com/python/cpython/blob/v3.12.0b1/Include/cpython/unicodeobject.h#L78-L93) // see https://github.com/python/cpython/blob/v3.11.3/Objects/unicodeobject.c#L1230-L1245 diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index a5ccead4..60834188 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -49,17 +49,23 @@ static PyObjectProxyHandler pyObjectProxyHandler; static PyListProxyHandler pyListProxyHandler; static PyIterableProxyHandler pyIterableProxyHandler; -std::unordered_map ucs2ToPyObjectMap; // a map of char16_t (UCS-2) buffers to their corresponding PyObjects, used when finalizing JSExternalStrings -std::unordered_map latin1ToPyObjectMap; // a map of Latin-1 char buffers to their corresponding PyObjects, used when finalizing JSExternalStrings +std::unordered_map externalStringObjToRefCountMap;// a map of python string objects to the number of JSExternalStrings that depend on it, used when finalizing JSExternalStrings PyObject *PythonExternalString::getPyString(const char16_t *chars) { - return ucs2ToPyObjectMap[chars]; + for (auto it: externalStringObjToRefCountMap) { + if (PyUnicode_DATA(it.first) == (void *)chars) { // PyUnicode_<2/1>BYTE_DATA are just type casts of PyUnicode_DATA + return it.first; + } + } + + return NULL; // this shouldn't be reachable } PyObject *PythonExternalString::getPyString(const JS::Latin1Char *chars) { - return latin1ToPyObjectMap[chars]; + + return PythonExternalString::getPyString((const char16_t *)chars); } void PythonExternalString::finalize(char16_t *chars) const @@ -67,28 +73,40 @@ void PythonExternalString::finalize(char16_t *chars) const // We cannot call Py_DECREF here when shutting down as the thread state is gone. // Then, when shutting down, there is only on reference left, and we don't need // to free the object since the entire process memory is being released. - PyObject *object = ucs2ToPyObjectMap[chars]; - if (Py_REFCNT(object) > 1) { - Py_DECREF(object); + if (_Py_IsFinalizing()) { return; } + + for (auto it = externalStringObjToRefCountMap.cbegin(), next_it = it; it != externalStringObjToRefCountMap.cend(); it = next_it) { + next_it++; + if (PyUnicode_DATA(it->first) == (void *)chars) { + Py_DECREF(it->first); + externalStringObjToRefCountMap[it->first] = externalStringObjToRefCountMap[it->first] - 1; + + if (externalStringObjToRefCountMap[it->first] == 0) { + externalStringObjToRefCountMap.erase(it); + } + } } } void PythonExternalString::finalize(JS::Latin1Char *chars) const { - PyObject *object = latin1ToPyObjectMap[chars]; - if (Py_REFCNT(object) > 1) { - Py_DECREF(object); - } + PythonExternalString::finalize((char16_t *)chars); } size_t PythonExternalString::sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const { - return PyUnicode_GetLength(ucs2ToPyObjectMap[chars]); + for (auto it: externalStringObjToRefCountMap) { + if (PyUnicode_DATA(it.first) == (void *)chars) { + return PyUnicode_GetLength(it.first); + } + } + + return 0; // // this shouldn't be reachable } size_t PythonExternalString::sizeOfBuffer(const JS::Latin1Char *chars, mozilla::MallocSizeOf mallocSizeOf) const { - return PyUnicode_GetLength(latin1ToPyObjectMap[chars]); + return PythonExternalString::sizeOfBuffer((const char16_t *)chars, mallocSizeOf); } PythonExternalString PythonExternalStringCallbacks = {}; @@ -151,13 +169,15 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) { break; } case (PyUnicode_2BYTE_KIND): { - ucs2ToPyObjectMap[(char16_t *)PyUnicode_2BYTE_DATA(object)] = object; + externalStringObjToRefCountMap[object] = externalStringObjToRefCountMap[object] + 1; + Py_INCREF(object); JSString *str = JS_NewExternalUCString(cx, (char16_t *)PyUnicode_2BYTE_DATA(object), PyUnicode_GET_LENGTH(object), &PythonExternalStringCallbacks); returnType.setString(str); break; } case (PyUnicode_1BYTE_KIND): { - latin1ToPyObjectMap[(JS::Latin1Char *)PyUnicode_1BYTE_DATA(object)] = object; + externalStringObjToRefCountMap[object] = externalStringObjToRefCountMap[object] + 1; + Py_INCREF(object); JSString *str = JS_NewExternalStringLatin1(cx, (JS::Latin1Char *)PyUnicode_1BYTE_DATA(object), PyUnicode_GET_LENGTH(object), &PythonExternalStringCallbacks); // JSExternalString can now be properly treated as either one-byte or two-byte strings when GCed // see https://hg.mozilla.org/releases/mozilla-esr128/file/tip/js/src/vm/StringType-inl.h#l785 @@ -165,7 +185,6 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) { break; } } - Py_INCREF(object); } else if (PyMethod_Check(object) || PyFunction_Check(object) || PyCFunction_Check(object)) { // can't determine number of arguments for PyCFunctions, so just assume potentially unbounded diff --git a/src/modules/pythonmonkey/pythonmonkey.cc b/src/modules/pythonmonkey/pythonmonkey.cc index e49b6f81..36a15d20 100644 --- a/src/modules/pythonmonkey/pythonmonkey.cc +++ b/src/modules/pythonmonkey/pythonmonkey.cc @@ -48,10 +48,39 @@ JS::PersistentRootedObject jsFunctionRegistry; -void finalizationRegistryGCCallback(JSContext *cx, JSGCStatus status, JS::GCReason reason, void *data) { +/** + * @brief During a GC, string buffers may have moved, so we need to re-point our JSStringProxies + * The char buffer pointer obtained by previous `JS::Get{Latin1,TwoByte}LinearStringChars` calls remains valid only as long as no GC occurs. + */ +void updateCharBufferPointers() { + if (_Py_IsFinalizing()) { + return; // do not move char pointers around if python is finalizing + } + + JS::AutoCheckCannotGC nogc; + for (const JSStringProxy *jsStringProxy: jsStringProxies) { + JSLinearString *str = JS_ASSERT_STRING_IS_LINEAR(jsStringProxy->jsString->toString()); + void *updatedCharBufPtr; // pointer to the moved char buffer after a GC + if (JS::LinearStringHasLatin1Chars(str)) { + updatedCharBufPtr = (void *)JS::GetLatin1LinearStringChars(nogc, str); + } else { // utf16 / ucs2 string + updatedCharBufPtr = (void *)JS::GetTwoByteLinearStringChars(nogc, str); + } + ((PyUnicodeObject *)(jsStringProxy))->data.any = updatedCharBufPtr; + } +} + +void pythonmonkeyGCCallback(JSContext *cx, JSGCStatus status, JS::GCReason reason, void *data) { if (status == JSGCStatus::JSGC_END) { JS::ClearKeptObjects(GLOBAL_CX); while (JOB_QUEUE->runFinalizationRegistryCallbacks(GLOBAL_CX)); + updateCharBufferPointers(); + } +} + +void nurseryCollectionCallback(JSContext *cx, JS::GCNurseryProgress progress, JS::GCReason reason, void *data) { + if (progress == JS::GCNurseryProgress::GC_NURSERY_COLLECTION_END) { + updateCharBufferPointers(); } } @@ -547,7 +576,10 @@ PyMODINIT_FUNC PyInit_pythonmonkey(void) return NULL; } - JS_SetGCCallback(GLOBAL_CX, finalizationRegistryGCCallback, NULL); + JS_SetGCParameter(GLOBAL_CX, JSGC_MAX_BYTES, (uint32_t)-1); + + JS_SetGCCallback(GLOBAL_CX, pythonmonkeyGCCallback, NULL); + JS::AddGCNurseryCollectionCallback(GLOBAL_CX, nurseryCollectionCallback, NULL); JS::RealmCreationOptions creationOptions = JS::RealmCreationOptions(); JS::RealmBehaviors behaviours = JS::RealmBehaviors();