Skip to content

Commit

Permalink
read packed arrays, 1d or nd, as numpy ndarray
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Apr 26, 2022
1 parent 2dddc48 commit e2a5e6a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 11 deletions.
42 changes: 36 additions & 6 deletions bjdata/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@
TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR,
TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_FLOAT16,
TYPE_STRING, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT)
from numpy import array as ndarray, dtype as npdtype
from numpy import array as ndarray, dtype as npdtype, frombuffer as buffer2numpy
from array import array as typedarray

__TYPES = frozenset((TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32,
TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_FLOAT16,
TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, ARRAY_START, OBJECT_START))
__TYPES_NO_DATA = frozenset((TYPE_NULL, TYPE_BOOL_FALSE, TYPE_BOOL_TRUE))
__TYPES_INT = frozenset((TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64))
__TYPES_FIXLEN = frozenset((TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64,
TYPE_FLOAT16, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_CHAR))

__SMALL_INTS_DECODED = [{pack('>b', i): i for i in range(-128, 128)}, {pack('<b', i): i for i in range(-128, 128)}]
__SMALL_UINTS_DECODED = [{pack('>B', i): i for i in range(256)}, {pack('<B', i): i for i in range(256)}]
Expand All @@ -49,14 +52,28 @@
__DTYPE_MAP = { TYPE_INT8: 'b',
TYPE_UINT8: 'B',
TYPE_INT16: 'h',
TYPE_UINT16: 'H',
TYPE_UINT16: 'H',
TYPE_INT32: 'i',
TYPE_UINT32: 'I',
TYPE_UINT32: 'I',
TYPE_INT64: 'q',
TYPE_UINT64: 'Q',
TYPE_FLOAT32: 'h',
TYPE_UINT64: 'Q',
TYPE_FLOAT16: 'h',
TYPE_FLOAT32: 'f',
TYPE_FLOAT64: 'd'}
TYPE_FLOAT64: 'd',
TYPE_CHAR: 'c'}

__DTYPELEN_MAP={ TYPE_INT8: 1,
TYPE_UINT8: 1,
TYPE_INT16: 2,
TYPE_UINT16: 2,
TYPE_INT32: 4,
TYPE_UINT32: 4,
TYPE_INT64: 8,
TYPE_UINT64: 8,
TYPE_FLOAT16: 2,
TYPE_FLOAT32: 4,
TYPE_FLOAT64: 8,
TYPE_CHAR: 1}

class DecoderException(ValueError):
"""Raised when decoding of a UBJSON stream fails."""
Expand Down Expand Up @@ -331,6 +348,19 @@ def __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_obj
raise DecoderException('Container bytes array too short')
return container

if type_ in __TYPES_FIXLEN and count>0:
container = fp_read(count*__DTYPELEN_MAP[type_])
if len(container) < count*__DTYPELEN_MAP[type_]:
raise DecoderException('Container bytes array too short')

#container=typedarray(__DTYPE_MAP[type_], container)
if len(dims)>0:
container=buffer2numpy(container, dtype=npdtype(__DTYPE_MAP[type_]))
container=container.reshape(dims)
else:
container=buffer2numpy(container, dtype=npdtype(__DTYPE_MAP[type_]))
return container

container = []
while count > 0 and (counting or marker != ARRAY_END):
if marker == TYPE_NOOP:
Expand Down
27 changes: 24 additions & 3 deletions src/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
* limitations under the License.
*/

//#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION

#include <Python.h>
#include <bytesobject.h>
#include <numpy/arrayobject.h>
Expand Down Expand Up @@ -143,6 +145,7 @@ static PyObject* _decode_char(_bjdata_decoder_buffer_t *buffer);
static PyObject* _decode_string(_bjdata_decoder_buffer_t *buffer);
static _container_params_t _get_container_params(_bjdata_decoder_buffer_t *buffer, int in_mapping, unsigned int *ndim, long long **dims);
static int _is_no_data_type(char type);
static int _is_fixed_len_type(char type);
static int _get_type_info(char type, int *bytelen);
static PyObject* _no_data_type(char type);
static PyObject* _decode_array(_bjdata_decoder_buffer_t *buffer);
Expand Down Expand Up @@ -794,6 +797,12 @@ static int _is_no_data_type(char type) {
return ((TYPE_NULL == type) || (TYPE_BOOL_TRUE == type) || (TYPE_BOOL_FALSE == type));
}

static int _is_fixed_len_type(char type) {
return ((TYPE_INT8 == type) || (TYPE_UINT8 == type) || (TYPE_INT16 == type)
|| (TYPE_UINT16 == type) || (TYPE_INT32 == type) || (TYPE_UINT32 == type)
|| (TYPE_INT64 == type) || (TYPE_UINT64 == type) || (TYPE_CHAR == type)
|| (TYPE_FLOAT16 == type) || (TYPE_FLOAT32 == type) || (TYPE_FLOAT64 == type));
}

// Note: Does NOT reserve a new reference
static int _get_type_info(char type, int *bytelen) {
Expand Down Expand Up @@ -831,6 +840,9 @@ static int _get_type_info(char type, int *bytelen) {
case TYPE_UINT64:
*bytelen=8;
return PyArray_ULONGLONG;
case TYPE_CHAR:
*bytelen=1;
return PyArray_CHAR;
default:
*bytelen=0;
PyErr_SetString(PyExc_RuntimeError, "Internal error - _get_type_info");
Expand Down Expand Up @@ -865,15 +877,14 @@ static PyObject* _decode_array(_bjdata_decoder_buffer_t *buffer) {
goto bail;
}
marker = params.marker;

if (params.counting) {
// special case - byte array
if ((TYPE_UINT8 == params.type) && !buffer->prefs.no_bytes && ndims==0) {
BAIL_ON_NULL(list = PyBytes_FromStringAndSize(NULL, params.count));
READ_INTO_OR_BAIL(params.count, PyBytes_AS_STRING(list), "bytes array");
return list;
// special case - no data types
} else if (ndims) {
// special case - nd-array
} else if (ndims && params.type) {
unsigned int i;
int bytelen=0;
npy_intp *arraydim=calloc(sizeof(npy_intp),ndims);
Expand All @@ -897,6 +908,16 @@ static PyObject* _decode_array(_bjdata_decoder_buffer_t *buffer) {
Py_INCREF(value);
}
value = NULL;
} else if (_is_fixed_len_type(params.type) && params.count > 0) { // 1d packed array
int bytelen=0;
npy_intp *arraydim=calloc(sizeof(npy_intp),1);
int pytype=_get_type_info(params.type,&bytelen);
PyArrayObject *jdarray=NULL;
arraydim[0]=params.count;
BAIL_ON_NULL(jdarray = (PyArrayObject *) PyArray_SimpleNew(1, arraydim, pytype));
READ_INTO_OR_BAIL(bytelen*params.count, (char *)PyArray_DATA(jdarray), "1D packed array");
free(arraydim);
return PyArray_Return(jdarray);
// take advantage of faster creation/setting of list since count known
} else {
Py_ssize_t list_pos = 0; // position in list for far fast setting via PyList_SET_ITEM
Expand Down
5 changes: 3 additions & 2 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from bjdata.encoder import dump as bjdpuredump, dumpb as bjdpuredumpb
from bjdata.decoder import load as bjdpureload, loadb as bjdpureloadb
from numpy import array as ndarray, int8 as npint8
from array import array as typedarray

PY2 = version_info[0] < 3

Expand Down Expand Up @@ -263,7 +264,7 @@ def test_bytes(self):
for cast in (bytes, bytearray):
self.check_enc_dec(cast(b''))
self.check_enc_dec(cast(b'\x01' * 4))
self.assertEqual(self.bjdloadb(self.bjddumpb(cast(b'\x04' * 4)), no_bytes=True), [4] * 4)
self.assertEqual((self.bjdloadb(self.bjddumpb(cast(b'\x04' * 4)), no_bytes=True) == ndarray([4] * 4, npint8)).all(), True)
self.check_enc_dec(cast(b'largebinary' * 100))

def test_nd_array(self):
Expand All @@ -286,7 +287,7 @@ def test_array_fixed(self):
self.bjdloadb(ARRAY_START + CONTAINER_TYPE + bjd_type + CONTAINER_COUNT + TYPE_UINT8 + b'\x05'),
[py_obj] * 5
)
self.assertEqual(self.bjdloadb(raw_start + b'\x03' + (b'\x01' * 3)), [1, 1, 1])
self.assertEqual((self.bjdloadb(raw_start + b'\x03' + (b'\x01' * 3))==ndarray([1, 1, 1], dtype=npint8)).all(), True)

# invalid type
with self.assertRaises(DecoderException):
Expand Down

0 comments on commit e2a5e6a

Please sign in to comment.