Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

caleb/fix/stringdata #417

Merged
merged 12 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .git-blame-ignore-revs
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,10 @@ aae30e864449442cf0b04e94f8a242b1b667de9a
16dc3153b3cb684ca72445ed058babc8f5d97f42

# chore(linting): lint all C++ files
58cd4b45777b046f03a63255c1d93e289e1cab5e
58cd4b45777b046f03a63255c1d93e289e1cab5e

# chore(linting): lint PyBytesProxyHandler.cc
d540ed6e0edfe9538dc726cf587dfb2cc76dde34

# chore(linting): lint PyObjectProxyHandler.cc
1d45ea98e42294cce16deec5454725d4de36f59f
16 changes: 10 additions & 6 deletions include/JSStringProxy.hh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include <Python.h>

#include <unordered_set>

/**
* @brief The typedef for the backing store that will be used by JSStringProxy objects. All it contains is a pointer to the JSString
*
Expand All @@ -24,6 +26,8 @@ typedef struct {
JS::PersistentRootedValue *jsString;
} JSStringProxy;

extern std::unordered_set<JSStringProxy *> jsStringProxies; // a collection of all JSStringProxy objects, used during a GCCallback to ensure they continue to point to the correct char buffer

/**
* @brief This struct is a bundle of methods used by the JSStringProxy type
*
Expand All @@ -37,7 +41,7 @@ public:
*/
static void JSStringProxy_dealloc(JSStringProxy *self);

/**
/**
* @brief copy protocol method for both copy and deepcopy
*
* @param self - The JSObjectProxy
Expand All @@ -49,13 +53,13 @@ public:
// docs for methods, copied from cpython
PyDoc_STRVAR(stringproxy_deepcopy__doc__,
"__deepcopy__($self, memo, /)\n"
"--\n"
"\n");
"--\n"
"\n");

PyDoc_STRVAR(stringproxy_copy__doc__,
"__copy__($self, /)\n"
"--\n"
"\n");
"__copy__($self, /)\n"
"--\n"
"\n");

/**
* @brief Struct for the other methods
Expand Down
3 changes: 2 additions & 1 deletion src/JSStringProxy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@

#include "include/StrType.hh"


std::unordered_set<JSStringProxy *> jsStringProxies;
extern JSContext *GLOBAL_CX;


void JSStringProxyMethodDefinitions::JSStringProxy_dealloc(JSStringProxy *self)
{
jsStringProxies.erase(self);
delete self->jsString;
}

Expand Down
44 changes: 22 additions & 22 deletions src/PyBytesProxyHandler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,30 @@ static bool array_valueOf(JSContext *cx, unsigned argc, JS::Value *vp) {
return false;
}

JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());

auto byteLength = JS::GetArrayBufferByteLength(rootedArrayBuffer);

bool isSharedMemory;
bool isSharedMemory;
JS::AutoCheckCannotGC autoNoGC(cx);
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);

size_t numberOfDigits = 0;
for (size_t i = 0; i < byteLength; i++) {
numberOfDigits += data[i] < 10 ? 1 : data[i] < 100 ? 2 : 3;
numberOfDigits += data[i] < 10 ? 1 : data[i] < 100 ? 2 : 3;
}
const size_t STRING_LENGTH = byteLength + numberOfDigits;
JS::Latin1Char* buffer = (JS::Latin1Char *)malloc(sizeof(JS::Latin1Char) * STRING_LENGTH);
JS::Latin1Char *buffer = (JS::Latin1Char *)malloc(sizeof(JS::Latin1Char) * STRING_LENGTH);

size_t charIndex = 0;
sprintf((char*)&buffer[charIndex], "%d", data[0]);
snprintf((char *)&buffer[charIndex], 4, "%d", data[0]);
charIndex += data[0] < 10 ? 1 : data[0] < 100 ? 2 : 3;

for (size_t dataIndex = 1; dataIndex < byteLength; dataIndex++) {
buffer[charIndex] = ',';
charIndex++;
sprintf((char*)&buffer[charIndex], "%d", data[dataIndex]);
snprintf((char *)&buffer[charIndex], 4, "%d", data[dataIndex]);
charIndex += data[dataIndex] < 10 ? 1 : data[dataIndex] < 100 ? 2 : 3;
}

Expand Down Expand Up @@ -84,7 +84,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
JS::RootedObject thisObj(cx);
if (!args.computeThis(cx, &thisObj)) return false;

JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(thisObj, BytesIteratorSlotIteratedObject);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(thisObj, BytesIteratorSlotIteratedObject);
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());

JS::RootedValue rootedNextIndex(cx, JS::GetReservedSlot(thisObj, BytesIteratorSlotNextIndex));
Expand Down Expand Up @@ -112,7 +112,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
if (!JS_SetProperty(cx, result, "done", done)) return false;

if (itemKind == ITEM_KIND_VALUE) {
bool isSharedMemory;
bool isSharedMemory;
JS::AutoCheckCannotGC autoNoGC(cx);
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);

Expand All @@ -125,7 +125,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
JS::RootedValue rootedNextIndex(cx, JS::Int32Value(nextIndex));
items[0].set(rootedNextIndex);

bool isSharedMemory;
bool isSharedMemory;
JS::AutoCheckCannotGC autoNoGC(cx);
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);

Expand Down Expand Up @@ -216,8 +216,8 @@ static bool array_iterator_func(JSContext *cx, unsigned argc, JS::Value *vp, int
if (!JS::Construct(cx, constructor_val, JS::HandleValueArray::empty(), &obj)) return false;
if (!obj) return false;

JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);

JS::SetReservedSlot(obj, BytesIteratorSlotIteratedObject, JS::PrivateValue(arrayBuffer));
JS::SetReservedSlot(obj, BytesIteratorSlotNextIndex, JS::Int32Value(0));
JS::SetReservedSlot(obj, BytesIteratorSlotItemKind, JS::Int32Value(itemKind));
Expand Down Expand Up @@ -253,13 +253,13 @@ bool PyBytesProxyHandler::set(JSContext *cx, JS::HandleObject proxy, JS::HandleI
JS::HandleValue v, JS::HandleValue receiver,
JS::ObjectOpResult &result) const {

// block all modifications
// block all modifications

PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);

PyErr_Format(PyExc_TypeError,
"'%.100s' object has only read-only attributes",
Py_TYPE(self)->tp_name);
"'%.100s' object has only read-only attributes",
Py_TYPE(self)->tp_name);

return result.failReadOnly();
}
Expand Down Expand Up @@ -298,7 +298,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(

// "length" and "byteLength" properties have the same value
if ((JS_StringEqualsLiteral(cx, idString, "length", &isProperty) && isProperty) || (JS_StringEqualsLiteral(cx, id.toString(), "byteLength", &isProperty) && isProperty)) {
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);

JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());

Expand All @@ -314,7 +314,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(

// "buffer" property
if (JS_StringEqualsLiteral(cx, idString, "buffer", &isProperty) && isProperty) {
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);

desc.set(mozilla::Some(
JS::PropertyDescriptor::Data(
Expand Down Expand Up @@ -392,10 +392,10 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(
// item
Py_ssize_t index;
if (idToIndex(cx, id, &index)) {
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());

bool isSharedMemory;
bool isSharedMemory;
JS::AutoCheckCannotGC autoNoGC(cx);
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);

Expand All @@ -406,7 +406,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(
));

return true;
}
}

PyObject *attrName = idToKey(cx, id);
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
Expand Down
4 changes: 2 additions & 2 deletions src/PyListProxyHandler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2099,8 +2099,8 @@ void PyListProxyHandler::finalize(JS::GCContext *gcx, JSObject *proxy) const {
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
// Then, when shutting down, there is only on reference left, and we don't need
// to free the object since the entire process memory is being released.
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
if (Py_REFCNT(self) > 1) {
if (!_Py_IsFinalizing()) {
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
Py_DECREF(self);
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/PyObjectProxyHandler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ void PyObjectProxyHandler::finalize(JS::GCContext *gcx, JSObject *proxy) const {
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
// Then, when shutting down, there is only on reference left, and we don't need
// to free the object since the entire process memory is being released.
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
if (Py_REFCNT(self) > 1) {
if (!_Py_IsFinalizing()) {
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
Py_DECREF(self);
}
}
Expand All @@ -109,12 +109,12 @@ bool PyObjectProxyHandler::ownPropertyKeys(JSContext *cx, JS::HandleObject proxy
}

return handleOwnPropertyKeys(cx, nonDunderKeys, PyList_Size(nonDunderKeys), props);
}
}
else {
if (PyErr_Occurred()) {
PyErr_Clear();
PyErr_Clear();
}

return handleOwnPropertyKeys(cx, PyList_New(0), 0, props);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/StrType.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) {
JS::RootedObject obj(cx);
pyString->jsString = new JS::PersistentRootedValue(cx);
pyString->jsString->setString((JSString *)lstr);
jsStringProxies.insert(pyString);

// Initialize as legacy string (https://github.com/python/cpython/blob/v3.12.0b1/Include/cpython/unicodeobject.h#L78-L93)
// see https://github.com/python/cpython/blob/v3.11.3/Objects/unicodeobject.c#L1230-L1245
Expand Down
51 changes: 35 additions & 16 deletions src/jsTypeFactory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,46 +49,64 @@ static PyObjectProxyHandler pyObjectProxyHandler;
static PyListProxyHandler pyListProxyHandler;
static PyIterableProxyHandler pyIterableProxyHandler;

std::unordered_map<const char16_t *, PyObject *> ucs2ToPyObjectMap; // a map of char16_t (UCS-2) buffers to their corresponding PyObjects, used when finalizing JSExternalStrings
std::unordered_map<const JS::Latin1Char *, PyObject *> latin1ToPyObjectMap; // a map of Latin-1 char buffers to their corresponding PyObjects, used when finalizing JSExternalStrings
std::unordered_map<PyObject *, size_t> externalStringObjToRefCountMap;// a map of python string objects to the number of JSExternalStrings that depend on it, used when finalizing JSExternalStrings

PyObject *PythonExternalString::getPyString(const char16_t *chars)
{
return ucs2ToPyObjectMap[chars];
for (auto it: externalStringObjToRefCountMap) {
if (PyUnicode_DATA(it.first) == (void *)chars) { // PyUnicode_<2/1>BYTE_DATA are just type casts of PyUnicode_DATA
return it.first;
}
}

return NULL; // this shouldn't be reachable
}

PyObject *PythonExternalString::getPyString(const JS::Latin1Char *chars)
{
return latin1ToPyObjectMap[chars];

return PythonExternalString::getPyString((const char16_t *)chars);
}

void PythonExternalString::finalize(char16_t *chars) const
{
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
// Then, when shutting down, there is only on reference left, and we don't need
// to free the object since the entire process memory is being released.
PyObject *object = ucs2ToPyObjectMap[chars];
if (Py_REFCNT(object) > 1) {
Py_DECREF(object);
if (_Py_IsFinalizing()) { return; }

for (auto it = externalStringObjToRefCountMap.cbegin(), next_it = it; it != externalStringObjToRefCountMap.cend(); it = next_it) {
next_it++;
if (PyUnicode_DATA(it->first) == (void *)chars) {
Py_DECREF(it->first);
externalStringObjToRefCountMap[it->first] = externalStringObjToRefCountMap[it->first] - 1;

if (externalStringObjToRefCountMap[it->first] == 0) {
externalStringObjToRefCountMap.erase(it);
}
}
}
}

void PythonExternalString::finalize(JS::Latin1Char *chars) const
{
PyObject *object = latin1ToPyObjectMap[chars];
if (Py_REFCNT(object) > 1) {
Py_DECREF(object);
}
PythonExternalString::finalize((char16_t *)chars);
}

size_t PythonExternalString::sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const
{
return PyUnicode_GetLength(ucs2ToPyObjectMap[chars]);
for (auto it: externalStringObjToRefCountMap) {
if (PyUnicode_DATA(it.first) == (void *)chars) {
return PyUnicode_GetLength(it.first);
}
}

return 0; // // this shouldn't be reachable
}

size_t PythonExternalString::sizeOfBuffer(const JS::Latin1Char *chars, mozilla::MallocSizeOf mallocSizeOf) const
{
return PyUnicode_GetLength(latin1ToPyObjectMap[chars]);
return PythonExternalString::sizeOfBuffer((const char16_t *)chars, mallocSizeOf);
}

PythonExternalString PythonExternalStringCallbacks = {};
Expand Down Expand Up @@ -151,21 +169,22 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) {
break;
}
case (PyUnicode_2BYTE_KIND): {
ucs2ToPyObjectMap[(char16_t *)PyUnicode_2BYTE_DATA(object)] = object;
externalStringObjToRefCountMap[object] = externalStringObjToRefCountMap[object] + 1;
Py_INCREF(object);
JSString *str = JS_NewExternalUCString(cx, (char16_t *)PyUnicode_2BYTE_DATA(object), PyUnicode_GET_LENGTH(object), &PythonExternalStringCallbacks);
returnType.setString(str);
break;
}
case (PyUnicode_1BYTE_KIND): {
latin1ToPyObjectMap[(JS::Latin1Char *)PyUnicode_1BYTE_DATA(object)] = object;
externalStringObjToRefCountMap[object] = externalStringObjToRefCountMap[object] + 1;
Py_INCREF(object);
JSString *str = JS_NewExternalStringLatin1(cx, (JS::Latin1Char *)PyUnicode_1BYTE_DATA(object), PyUnicode_GET_LENGTH(object), &PythonExternalStringCallbacks);
// JSExternalString can now be properly treated as either one-byte or two-byte strings when GCed
// see https://hg.mozilla.org/releases/mozilla-esr128/file/tip/js/src/vm/StringType-inl.h#l785
returnType.setString(str);
break;
}
}
Py_INCREF(object);
}
else if (PyMethod_Check(object) || PyFunction_Check(object) || PyCFunction_Check(object)) {
// can't determine number of arguments for PyCFunctions, so just assume potentially unbounded
Expand Down
19 changes: 17 additions & 2 deletions src/modules/pythonmonkey/pythonmonkey.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,25 @@

JS::PersistentRootedObject jsFunctionRegistry;

void finalizationRegistryGCCallback(JSContext *cx, JSGCStatus status, JS::GCReason reason, void *data) {
void pythonmonkeyGCCallback(JSContext *cx, JSGCStatus status, JS::GCReason reason, void *data) {
if (status == JSGCStatus::JSGC_END) {
JS::ClearKeptObjects(GLOBAL_CX);
while (JOB_QUEUE->runFinalizationRegistryCallbacks(GLOBAL_CX));

if (_Py_IsFinalizing()) {
return; // do not move char pointers around if python is finalizing
}

JS::AutoCheckCannotGC nogc;
for (const JSStringProxy *jsStringProxy: jsStringProxies) { // char buffers may have moved, so we need to re-point our JSStringProxies
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In symmetry to this, would a Python str object move its char buffers, and how to notify the JS land?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know if python string objects move their buffers? As far as I'm aware they never move

JSLinearString *str = (JSLinearString *)(jsStringProxy->jsString->toString()); // jsString is guaranteed to be linear
if (JS::LinearStringHasLatin1Chars(str)) {
(((PyUnicodeObject *)(jsStringProxy))->data.any) = (void *)JS::GetLatin1LinearStringChars(nogc, str);
}
else { // utf16 / ucs2 string
(((PyUnicodeObject *)(jsStringProxy))->data.any) = (void *)JS::GetTwoByteLinearStringChars(nogc, str);
}
}
}
}

Expand Down Expand Up @@ -547,7 +562,7 @@ PyMODINIT_FUNC PyInit_pythonmonkey(void)
return NULL;
}

JS_SetGCCallback(GLOBAL_CX, finalizationRegistryGCCallback, NULL);
JS_SetGCCallback(GLOBAL_CX, pythonmonkeyGCCallback, NULL);

JS::RealmCreationOptions creationOptions = JS::RealmCreationOptions();
JS::RealmBehaviors behaviours = JS::RealmBehaviors();
Expand Down
Loading