Skip to content

Commit

Permalink
Fix memory leak due to ignored constructor flag
Browse files Browse the repository at this point in the history
Fixes #150

PyArray_NewFromDescr ignores the NPY_ARRAY_OWNDATA flag, so we
need to set it manually so that NumPy frees the memory.

Also switch memory allocation to use PyDataMem_NEW/FREE. This
should generally be the same as PyMem_Malloc, but it could end up
different in a situation where the C ext is not linked against
the same C stdlib. In that case, NumPy would not call the correct
de-allocator when freeing the memory it gains ownership over.
  • Loading branch information
ihnorton committed May 14, 2019
1 parent 8744efc commit dd88380
Showing 1 changed file with 37 additions and 21 deletions.
58 changes: 37 additions & 21 deletions tiledb/libtiledb.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ from cpython.bytes cimport (PyBytes_GET_SIZE,
PyBytes_FromString,
PyBytes_FromStringAndSize)

from cpython.mem cimport (PyMem_Malloc,
PyMem_Realloc,
PyMem_Free)

from cpython.ref cimport (Py_INCREF, Py_DECREF, PyTypeObject)

from libc.stdio cimport (FILE, stdout)
Expand Down Expand Up @@ -52,6 +48,12 @@ cdef extern from "numpy/arrayobject.h":
object obj)
# Steals a reference to dtype, need to incref the dtype
object PyArray_Scalar(void* ptr, np.dtype descr, object itemsize)
void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
# Defensively use the NumPy allocator (which should be the same
# as malloc/PyMem_Malloc in almost all situations)
char* PyDataMem_NEW(size_t nbytes) nogil
void PyDataMem_FREE(void* data) nogil
char* PyDataMem_RENEW(void* data, size_t nbytes) nogil

import sys
from os.path import abspath
Expand Down Expand Up @@ -3840,7 +3842,6 @@ cdef class ReadQuery(object):
@property
def _offsets(self): return self._offsets


def __init__(self, Array array, np.ndarray subarray, list attr_names, tiledb_layout_t layout):
self._buffers = dict()
self._offsets = dict()
Expand All @@ -3856,6 +3857,7 @@ cdef class ReadQuery(object):
vector [uint64_t*] offsets_ptrs
void* subarray_ptr = NULL
np.npy_intp dims[1]
np.ndarray tmparray
bytes battr_name
Py_ssize_t nattr = len(attr_names)

Expand All @@ -3880,11 +3882,13 @@ cdef class ReadQuery(object):
tiledb_query_free(&query_ptr)
_raise_ctx_err(ctx_ptr, rc)

cdef uint64_t* buffer_sizes_ptr = <uint64_t*> PyMem_Malloc(nattr * sizeof(uint64_t))
# lifetime: free in finally clause
cdef uint64_t* buffer_sizes_ptr = <uint64_t*> PyDataMem_NEW(nattr * sizeof(uint64_t))
if buffer_sizes_ptr == NULL:
tiledb_query_free(&query_ptr)
raise MemoryError()
cdef uint64_t* offsets_sizes_ptr = <uint64_t*> PyMem_Malloc(nattr * sizeof(uint64_t))
# lifetime: free in finally clause
cdef uint64_t* offsets_sizes_ptr = <uint64_t*> PyDataMem_NEW(nattr * sizeof(uint64_t))
if offsets_sizes_ptr == NULL:
tiledb_query_free(&query_ptr)
raise MemoryError()
Expand All @@ -3911,8 +3915,11 @@ cdef class ReadQuery(object):

# allocate buffer to hold offsets for var-length attribute
# NOTE offsets_sizes is in BYTES
offsets_ptrs.push_back(<uint64_t*> PyMem_Malloc(<size_t>(offsets_sizes_ptr[i])))
#self._offsets[name] = np.empty(offsets_sizes_ptr[i], dtype=np.uint8)

# lifetime:
# - free on exception
# - otherwise, ownership transferred to NumPy
offsets_ptrs.push_back(<uint64_t*> PyDataMem_NEW(<size_t>(offsets_sizes_ptr[i])))
else:

rc = tiledb_array_max_buffer_size(ctx_ptr, array_ptr, battr_name,
Expand All @@ -3922,8 +3929,10 @@ cdef class ReadQuery(object):
_raise_ctx_err(ctx_ptr, rc)
offsets_ptrs.push_back(NULL)

buffer_ptrs.push_back(<void*> PyMem_Malloc(<size_t>(buffer_sizes_ptr[i])))
#self._buffers[name] = np.empty(buffer_sizes_ptr[i], dtype=np.uint8)
# lifetime:
# - free on exception
# - otherwise, ownership transferred to NumPy
buffer_ptrs.push_back(<void*> PyDataMem_NEW(<size_t>(buffer_sizes_ptr[i])))

# set the query buffers
for i in range(nattr):
Expand Down Expand Up @@ -3964,31 +3973,38 @@ cdef class ReadQuery(object):
if name != "coords" and schema.attr(name).isvar:
dims[0] = offsets_sizes_ptr[i]
Py_INCREF(dtype)
self._offsets[name] = \
tmparray = \
PyArray_NewFromDescr(
<PyTypeObject*> np.ndarray,
dtype, 1, dims, NULL,
PyMem_Realloc(offsets_ptrs[i], <size_t>(offsets_sizes_ptr[i])),
np.NPY_OWNDATA, <object> NULL)
PyDataMem_RENEW(offsets_ptrs[i], <size_t>(offsets_sizes_ptr[i])),
0, <object> NULL)
PyArray_ENABLEFLAGS(tmparray, np.NPY_ARRAY_OWNDATA)
self._offsets[name] = tmparray

dims[0] = buffer_sizes_ptr[i]
Py_INCREF(dtype)
self._buffers[name] = \
tmparray = \
PyArray_NewFromDescr(
<PyTypeObject*> np.ndarray,
dtype, 1, dims, NULL,
PyMem_Realloc(buffer_ptrs[i], <size_t>(buffer_sizes_ptr[i])),
np.NPY_OWNDATA, <object> NULL)
PyDataMem_RENEW(buffer_ptrs[i], <size_t>(buffer_sizes_ptr[i])),
0, <object> NULL)
PyArray_ENABLEFLAGS(tmparray, np.NPY_ARRAY_OWNDATA)
self._buffers[name] = tmparray

except:
# we only free the PyDataMem_NEW malloc'd buffers on exception
# otherwise NumPy manages them
for i in range(nattr):
if buffer_ptrs[i] != NULL:
PyMem_Free(buffer_ptrs[i])
PyDataMem_FREE(buffer_ptrs[i])
if offsets_ptrs[i] != NULL:
PyMem_Free(offsets_ptrs[i])
PyDataMem_FREE(offsets_ptrs[i])
raise
finally:
PyMem_Free(buffer_sizes_ptr)
PyMem_Free(offsets_sizes_ptr)
PyDataMem_FREE(buffer_sizes_ptr)
PyDataMem_FREE(offsets_sizes_ptr)
tiledb_query_free(&query_ptr)


Expand Down

0 comments on commit dd88380

Please sign in to comment.