Skip to content

Commit

Permalink
Merge pull request #417 from Distributive-Network/caleb/fix/stringdata
Browse files Browse the repository at this point in the history
caleb/fix/stringdata
  • Loading branch information
Xmader authored Aug 28, 2024
2 parents cfdde8f + 1951efc commit a0b3602
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 55 deletions.
8 changes: 7 additions & 1 deletion .git-blame-ignore-revs
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,10 @@ aae30e864449442cf0b04e94f8a242b1b667de9a
16dc3153b3cb684ca72445ed058babc8f5d97f42

# chore(linting): lint all C++ files
58cd4b45777b046f03a63255c1d93e289e1cab5e
58cd4b45777b046f03a63255c1d93e289e1cab5e

# chore(linting): lint PyBytesProxyHandler.cc
d540ed6e0edfe9538dc726cf587dfb2cc76dde34

# chore(linting): lint PyObjectProxyHandler.cc
1d45ea98e42294cce16deec5454725d4de36f59f
16 changes: 10 additions & 6 deletions include/JSStringProxy.hh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include <Python.h>

#include <unordered_set>

/**
* @brief The typedef for the backing store that will be used by JSStringProxy objects. All it contains is a pointer to the JSString
*
Expand All @@ -24,6 +26,8 @@ typedef struct {
JS::PersistentRootedValue *jsString;
} JSStringProxy;

extern std::unordered_set<JSStringProxy *> jsStringProxies; // a collection of all JSStringProxy objects, used during a GCCallback to ensure they continue to point to the correct char buffer

/**
* @brief This struct is a bundle of methods used by the JSStringProxy type
*
Expand All @@ -37,7 +41,7 @@ public:
*/
static void JSStringProxy_dealloc(JSStringProxy *self);

/**
/**
* @brief copy protocol method for both copy and deepcopy
*
* @param self - The JSObjectProxy
Expand All @@ -49,13 +53,13 @@ public:
// docs for methods, copied from cpython
PyDoc_STRVAR(stringproxy_deepcopy__doc__,
"__deepcopy__($self, memo, /)\n"
"--\n"
"\n");
"--\n"
"\n");

PyDoc_STRVAR(stringproxy_copy__doc__,
"__copy__($self, /)\n"
"--\n"
"\n");
"__copy__($self, /)\n"
"--\n"
"\n");

/**
* @brief Struct for the other methods
Expand Down
3 changes: 2 additions & 1 deletion src/JSStringProxy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@

#include "include/StrType.hh"


std::unordered_set<JSStringProxy *> jsStringProxies;
extern JSContext *GLOBAL_CX;


void JSStringProxyMethodDefinitions::JSStringProxy_dealloc(JSStringProxy *self)
{
jsStringProxies.erase(self);
delete self->jsString;
}

Expand Down
44 changes: 22 additions & 22 deletions src/PyBytesProxyHandler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,30 @@ static bool array_valueOf(JSContext *cx, unsigned argc, JS::Value *vp) {
return false;
}

JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());

auto byteLength = JS::GetArrayBufferByteLength(rootedArrayBuffer);

bool isSharedMemory;
bool isSharedMemory;
JS::AutoCheckCannotGC autoNoGC(cx);
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);

size_t numberOfDigits = 0;
for (size_t i = 0; i < byteLength; i++) {
numberOfDigits += data[i] < 10 ? 1 : data[i] < 100 ? 2 : 3;
numberOfDigits += data[i] < 10 ? 1 : data[i] < 100 ? 2 : 3;
}
const size_t STRING_LENGTH = byteLength + numberOfDigits;
JS::Latin1Char* buffer = (JS::Latin1Char *)malloc(sizeof(JS::Latin1Char) * STRING_LENGTH);
JS::Latin1Char *buffer = (JS::Latin1Char *)malloc(sizeof(JS::Latin1Char) * STRING_LENGTH);

size_t charIndex = 0;
sprintf((char*)&buffer[charIndex], "%d", data[0]);
snprintf((char *)&buffer[charIndex], 4, "%d", data[0]);
charIndex += data[0] < 10 ? 1 : data[0] < 100 ? 2 : 3;

for (size_t dataIndex = 1; dataIndex < byteLength; dataIndex++) {
buffer[charIndex] = ',';
charIndex++;
sprintf((char*)&buffer[charIndex], "%d", data[dataIndex]);
snprintf((char *)&buffer[charIndex], 4, "%d", data[dataIndex]);
charIndex += data[dataIndex] < 10 ? 1 : data[dataIndex] < 100 ? 2 : 3;
}

Expand Down Expand Up @@ -84,7 +84,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
JS::RootedObject thisObj(cx);
if (!args.computeThis(cx, &thisObj)) return false;

JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(thisObj, BytesIteratorSlotIteratedObject);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(thisObj, BytesIteratorSlotIteratedObject);
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());

JS::RootedValue rootedNextIndex(cx, JS::GetReservedSlot(thisObj, BytesIteratorSlotNextIndex));
Expand Down Expand Up @@ -112,7 +112,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
if (!JS_SetProperty(cx, result, "done", done)) return false;

if (itemKind == ITEM_KIND_VALUE) {
bool isSharedMemory;
bool isSharedMemory;
JS::AutoCheckCannotGC autoNoGC(cx);
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);

Expand All @@ -125,7 +125,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
JS::RootedValue rootedNextIndex(cx, JS::Int32Value(nextIndex));
items[0].set(rootedNextIndex);

bool isSharedMemory;
bool isSharedMemory;
JS::AutoCheckCannotGC autoNoGC(cx);
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);

Expand Down Expand Up @@ -216,8 +216,8 @@ static bool array_iterator_func(JSContext *cx, unsigned argc, JS::Value *vp, int
if (!JS::Construct(cx, constructor_val, JS::HandleValueArray::empty(), &obj)) return false;
if (!obj) return false;

JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);

JS::SetReservedSlot(obj, BytesIteratorSlotIteratedObject, JS::PrivateValue(arrayBuffer));
JS::SetReservedSlot(obj, BytesIteratorSlotNextIndex, JS::Int32Value(0));
JS::SetReservedSlot(obj, BytesIteratorSlotItemKind, JS::Int32Value(itemKind));
Expand Down Expand Up @@ -253,13 +253,13 @@ bool PyBytesProxyHandler::set(JSContext *cx, JS::HandleObject proxy, JS::HandleI
JS::HandleValue v, JS::HandleValue receiver,
JS::ObjectOpResult &result) const {

// block all modifications
// block all modifications

PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);

PyErr_Format(PyExc_TypeError,
"'%.100s' object has only read-only attributes",
Py_TYPE(self)->tp_name);
"'%.100s' object has only read-only attributes",
Py_TYPE(self)->tp_name);

return result.failReadOnly();
}
Expand Down Expand Up @@ -298,7 +298,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(

// "length" and "byteLength" properties have the same value
if ((JS_StringEqualsLiteral(cx, idString, "length", &isProperty) && isProperty) || (JS_StringEqualsLiteral(cx, id.toString(), "byteLength", &isProperty) && isProperty)) {
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);

JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());

Expand All @@ -314,7 +314,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(

// "buffer" property
if (JS_StringEqualsLiteral(cx, idString, "buffer", &isProperty) && isProperty) {
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);

desc.set(mozilla::Some(
JS::PropertyDescriptor::Data(
Expand Down Expand Up @@ -392,10 +392,10 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(
// item
Py_ssize_t index;
if (idToIndex(cx, id, &index)) {
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());

bool isSharedMemory;
bool isSharedMemory;
JS::AutoCheckCannotGC autoNoGC(cx);
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);

Expand All @@ -406,7 +406,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(
));

return true;
}
}

PyObject *attrName = idToKey(cx, id);
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
Expand Down
4 changes: 2 additions & 2 deletions src/PyListProxyHandler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2099,8 +2099,8 @@ void PyListProxyHandler::finalize(JS::GCContext *gcx, JSObject *proxy) const {
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
// Then, when shutting down, there is only on reference left, and we don't need
// to free the object since the entire process memory is being released.
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
if (Py_REFCNT(self) > 1) {
if (!_Py_IsFinalizing()) {
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
Py_DECREF(self);
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/PyObjectProxyHandler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ void PyObjectProxyHandler::finalize(JS::GCContext *gcx, JSObject *proxy) const {
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
// Then, when shutting down, there is only on reference left, and we don't need
// to free the object since the entire process memory is being released.
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
if (Py_REFCNT(self) > 1) {
if (!_Py_IsFinalizing()) {
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
Py_DECREF(self);
}
}
Expand All @@ -109,12 +109,12 @@ bool PyObjectProxyHandler::ownPropertyKeys(JSContext *cx, JS::HandleObject proxy
}

return handleOwnPropertyKeys(cx, nonDunderKeys, PyList_Size(nonDunderKeys), props);
}
}
else {
if (PyErr_Occurred()) {
PyErr_Clear();
PyErr_Clear();
}

return handleOwnPropertyKeys(cx, PyList_New(0), 0, props);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/StrType.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) {
JS::RootedObject obj(cx);
pyString->jsString = new JS::PersistentRootedValue(cx);
pyString->jsString->setString((JSString *)lstr);
jsStringProxies.insert(pyString);

// Initialize as legacy string (https://github.com/python/cpython/blob/v3.12.0b1/Include/cpython/unicodeobject.h#L78-L93)
// see https://github.com/python/cpython/blob/v3.11.3/Objects/unicodeobject.c#L1230-L1245
Expand Down
51 changes: 35 additions & 16 deletions src/jsTypeFactory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,46 +49,64 @@ static PyObjectProxyHandler pyObjectProxyHandler;
static PyListProxyHandler pyListProxyHandler;
static PyIterableProxyHandler pyIterableProxyHandler;

std::unordered_map<const char16_t *, PyObject *> ucs2ToPyObjectMap; // a map of char16_t (UCS-2) buffers to their corresponding PyObjects, used when finalizing JSExternalStrings
std::unordered_map<const JS::Latin1Char *, PyObject *> latin1ToPyObjectMap; // a map of Latin-1 char buffers to their corresponding PyObjects, used when finalizing JSExternalStrings
std::unordered_map<PyObject *, size_t> externalStringObjToRefCountMap;// a map of python string objects to the number of JSExternalStrings that depend on it, used when finalizing JSExternalStrings

PyObject *PythonExternalString::getPyString(const char16_t *chars)
{
return ucs2ToPyObjectMap[chars];
for (auto it: externalStringObjToRefCountMap) {
if (PyUnicode_DATA(it.first) == (void *)chars) { // PyUnicode_<2/1>BYTE_DATA are just type casts of PyUnicode_DATA
return it.first;
}
}

return NULL; // this shouldn't be reachable
}

PyObject *PythonExternalString::getPyString(const JS::Latin1Char *chars)
{
return latin1ToPyObjectMap[chars];

return PythonExternalString::getPyString((const char16_t *)chars);
}

void PythonExternalString::finalize(char16_t *chars) const
{
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
// Then, when shutting down, there is only on reference left, and we don't need
// to free the object since the entire process memory is being released.
PyObject *object = ucs2ToPyObjectMap[chars];
if (Py_REFCNT(object) > 1) {
Py_DECREF(object);
if (_Py_IsFinalizing()) { return; }

for (auto it = externalStringObjToRefCountMap.cbegin(), next_it = it; it != externalStringObjToRefCountMap.cend(); it = next_it) {
next_it++;
if (PyUnicode_DATA(it->first) == (void *)chars) {
Py_DECREF(it->first);
externalStringObjToRefCountMap[it->first] = externalStringObjToRefCountMap[it->first] - 1;

if (externalStringObjToRefCountMap[it->first] == 0) {
externalStringObjToRefCountMap.erase(it);
}
}
}
}

void PythonExternalString::finalize(JS::Latin1Char *chars) const
{
PyObject *object = latin1ToPyObjectMap[chars];
if (Py_REFCNT(object) > 1) {
Py_DECREF(object);
}
PythonExternalString::finalize((char16_t *)chars);
}

size_t PythonExternalString::sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const
{
return PyUnicode_GetLength(ucs2ToPyObjectMap[chars]);
for (auto it: externalStringObjToRefCountMap) {
if (PyUnicode_DATA(it.first) == (void *)chars) {
return PyUnicode_GetLength(it.first);
}
}

return 0; // // this shouldn't be reachable
}

size_t PythonExternalString::sizeOfBuffer(const JS::Latin1Char *chars, mozilla::MallocSizeOf mallocSizeOf) const
{
return PyUnicode_GetLength(latin1ToPyObjectMap[chars]);
return PythonExternalString::sizeOfBuffer((const char16_t *)chars, mallocSizeOf);
}

PythonExternalString PythonExternalStringCallbacks = {};
Expand Down Expand Up @@ -151,21 +169,22 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) {
break;
}
case (PyUnicode_2BYTE_KIND): {
ucs2ToPyObjectMap[(char16_t *)PyUnicode_2BYTE_DATA(object)] = object;
externalStringObjToRefCountMap[object] = externalStringObjToRefCountMap[object] + 1;
Py_INCREF(object);
JSString *str = JS_NewExternalUCString(cx, (char16_t *)PyUnicode_2BYTE_DATA(object), PyUnicode_GET_LENGTH(object), &PythonExternalStringCallbacks);
returnType.setString(str);
break;
}
case (PyUnicode_1BYTE_KIND): {
latin1ToPyObjectMap[(JS::Latin1Char *)PyUnicode_1BYTE_DATA(object)] = object;
externalStringObjToRefCountMap[object] = externalStringObjToRefCountMap[object] + 1;
Py_INCREF(object);
JSString *str = JS_NewExternalStringLatin1(cx, (JS::Latin1Char *)PyUnicode_1BYTE_DATA(object), PyUnicode_GET_LENGTH(object), &PythonExternalStringCallbacks);
// JSExternalString can now be properly treated as either one-byte or two-byte strings when GCed
// see https://hg.mozilla.org/releases/mozilla-esr128/file/tip/js/src/vm/StringType-inl.h#l785
returnType.setString(str);
break;
}
}
Py_INCREF(object);
}
else if (PyMethod_Check(object) || PyFunction_Check(object) || PyCFunction_Check(object)) {
// can't determine number of arguments for PyCFunctions, so just assume potentially unbounded
Expand Down
19 changes: 17 additions & 2 deletions src/modules/pythonmonkey/pythonmonkey.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,25 @@

JS::PersistentRootedObject jsFunctionRegistry;

void finalizationRegistryGCCallback(JSContext *cx, JSGCStatus status, JS::GCReason reason, void *data) {
void pythonmonkeyGCCallback(JSContext *cx, JSGCStatus status, JS::GCReason reason, void *data) {
if (status == JSGCStatus::JSGC_END) {
JS::ClearKeptObjects(GLOBAL_CX);
while (JOB_QUEUE->runFinalizationRegistryCallbacks(GLOBAL_CX));

if (_Py_IsFinalizing()) {
return; // do not move char pointers around if python is finalizing
}

JS::AutoCheckCannotGC nogc;
for (const JSStringProxy *jsStringProxy: jsStringProxies) { // char buffers may have moved, so we need to re-point our JSStringProxies
JSLinearString *str = (JSLinearString *)(jsStringProxy->jsString->toString()); // jsString is guaranteed to be linear
if (JS::LinearStringHasLatin1Chars(str)) {
(((PyUnicodeObject *)(jsStringProxy))->data.any) = (void *)JS::GetLatin1LinearStringChars(nogc, str);
}
else { // utf16 / ucs2 string
(((PyUnicodeObject *)(jsStringProxy))->data.any) = (void *)JS::GetTwoByteLinearStringChars(nogc, str);
}
}
}
}

Expand Down Expand Up @@ -547,7 +562,7 @@ PyMODINIT_FUNC PyInit_pythonmonkey(void)
return NULL;
}

JS_SetGCCallback(GLOBAL_CX, finalizationRegistryGCCallback, NULL);
JS_SetGCCallback(GLOBAL_CX, pythonmonkeyGCCallback, NULL);

JS::RealmCreationOptions creationOptions = JS::RealmCreationOptions();
JS::RealmBehaviors behaviours = JS::RealmBehaviors();
Expand Down

0 comments on commit a0b3602

Please sign in to comment.