From e2a5e6ace0acec23f117db15788ef84ef522ffbf Mon Sep 17 00:00:00 2001 From: Qianqian Fang Date: Tue, 26 Apr 2022 18:20:56 -0400 Subject: [PATCH] read packed arrays, 1d or nd, as numpy ndarray --- bjdata/decoder.py | 42 ++++++++++++++++++++++++++++++++++++------ src/decoder.c | 27 ++++++++++++++++++++++++--- test/test.py | 5 +++-- 3 files changed, 63 insertions(+), 11 deletions(-) diff --git a/bjdata/decoder.py b/bjdata/decoder.py index c599a73..800340d 100644 --- a/bjdata/decoder.py +++ b/bjdata/decoder.py @@ -26,13 +26,16 @@ TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_FLOAT16, TYPE_STRING, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT) -from numpy import array as ndarray, dtype as npdtype +from numpy import array as ndarray, dtype as npdtype, frombuffer as buffer2numpy +from array import array as typedarray __TYPES = frozenset((TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_FLOAT16, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, ARRAY_START, OBJECT_START)) __TYPES_NO_DATA = frozenset((TYPE_NULL, TYPE_BOOL_FALSE, TYPE_BOOL_TRUE)) __TYPES_INT = frozenset((TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64)) +__TYPES_FIXLEN = frozenset((TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, + TYPE_FLOAT16, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_CHAR)) __SMALL_INTS_DECODED = [{pack('>b', i): i for i in range(-128, 128)}, {pack('B', i): i for i in range(256)}, {pack('0: + container = fp_read(count*__DTYPELEN_MAP[type_]) + if len(container) < count*__DTYPELEN_MAP[type_]: + raise DecoderException('Container bytes array too short') + + #container=typedarray(__DTYPE_MAP[type_], container) + if len(dims)>0: + container=buffer2numpy(container, dtype=npdtype(__DTYPE_MAP[type_])) + container=container.reshape(dims) + else: + container=buffer2numpy(container, dtype=npdtype(__DTYPE_MAP[type_])) + return container + container = [] while count > 0 and (counting or marker != ARRAY_END): if marker == TYPE_NOOP: diff --git a/src/decoder.c b/src/decoder.c index 376e2eb..440cd5f 100644 --- a/src/decoder.c +++ b/src/decoder.c @@ -15,6 +15,8 @@ * limitations under the License. */ +//#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + #include #include #include @@ -143,6 +145,7 @@ static PyObject* _decode_char(_bjdata_decoder_buffer_t *buffer); static PyObject* _decode_string(_bjdata_decoder_buffer_t *buffer); static _container_params_t _get_container_params(_bjdata_decoder_buffer_t *buffer, int in_mapping, unsigned int *ndim, long long **dims); static int _is_no_data_type(char type); +static int _is_fixed_len_type(char type); static int _get_type_info(char type, int *bytelen); static PyObject* _no_data_type(char type); static PyObject* _decode_array(_bjdata_decoder_buffer_t *buffer); @@ -794,6 +797,12 @@ static int _is_no_data_type(char type) { return ((TYPE_NULL == type) || (TYPE_BOOL_TRUE == type) || (TYPE_BOOL_FALSE == type)); } +static int _is_fixed_len_type(char type) { + return ((TYPE_INT8 == type) || (TYPE_UINT8 == type) || (TYPE_INT16 == type) + || (TYPE_UINT16 == type) || (TYPE_INT32 == type) || (TYPE_UINT32 == type) + || (TYPE_INT64 == type) || (TYPE_UINT64 == type) || (TYPE_CHAR == type) + || (TYPE_FLOAT16 == type) || (TYPE_FLOAT32 == type) || (TYPE_FLOAT64 == type)); +} // Note: Does NOT reserve a new reference static int _get_type_info(char type, int *bytelen) { @@ -831,6 +840,9 @@ static int _get_type_info(char type, int *bytelen) { case TYPE_UINT64: *bytelen=8; return PyArray_ULONGLONG; + case TYPE_CHAR: + *bytelen=1; + return PyArray_CHAR; default: *bytelen=0; PyErr_SetString(PyExc_RuntimeError, "Internal error - _get_type_info"); @@ -865,15 +877,14 @@ static PyObject* _decode_array(_bjdata_decoder_buffer_t *buffer) { goto bail; } marker = params.marker; - if (params.counting) { // special case - byte array if ((TYPE_UINT8 == params.type) && !buffer->prefs.no_bytes && ndims==0) { BAIL_ON_NULL(list = PyBytes_FromStringAndSize(NULL, params.count)); READ_INTO_OR_BAIL(params.count, PyBytes_AS_STRING(list), "bytes array"); return list; - // special case - no data types - } else if (ndims) { + // special case - nd-array + } else if (ndims && params.type) { unsigned int i; int bytelen=0; npy_intp *arraydim=calloc(sizeof(npy_intp),ndims); @@ -897,6 +908,16 @@ static PyObject* _decode_array(_bjdata_decoder_buffer_t *buffer) { Py_INCREF(value); } value = NULL; + } else if (_is_fixed_len_type(params.type) && params.count > 0) { // 1d packed array + int bytelen=0; + npy_intp *arraydim=calloc(sizeof(npy_intp),1); + int pytype=_get_type_info(params.type,&bytelen); + PyArrayObject *jdarray=NULL; + arraydim[0]=params.count; + BAIL_ON_NULL(jdarray = (PyArrayObject *) PyArray_SimpleNew(1, arraydim, pytype)); + READ_INTO_OR_BAIL(bytelen*params.count, (char *)PyArray_DATA(jdarray), "1D packed array"); + free(arraydim); + return PyArray_Return(jdarray); // take advantage of faster creation/setting of list since count known } else { Py_ssize_t list_pos = 0; // position in list for far fast setting via PyList_SET_ITEM diff --git a/test/test.py b/test/test.py index c2423eb..a31461e 100644 --- a/test/test.py +++ b/test/test.py @@ -34,6 +34,7 @@ from bjdata.encoder import dump as bjdpuredump, dumpb as bjdpuredumpb from bjdata.decoder import load as bjdpureload, loadb as bjdpureloadb from numpy import array as ndarray, int8 as npint8 +from array import array as typedarray PY2 = version_info[0] < 3 @@ -263,7 +264,7 @@ def test_bytes(self): for cast in (bytes, bytearray): self.check_enc_dec(cast(b'')) self.check_enc_dec(cast(b'\x01' * 4)) - self.assertEqual(self.bjdloadb(self.bjddumpb(cast(b'\x04' * 4)), no_bytes=True), [4] * 4) + self.assertEqual((self.bjdloadb(self.bjddumpb(cast(b'\x04' * 4)), no_bytes=True) == ndarray([4] * 4, npint8)).all(), True) self.check_enc_dec(cast(b'largebinary' * 100)) def test_nd_array(self): @@ -286,7 +287,7 @@ def test_array_fixed(self): self.bjdloadb(ARRAY_START + CONTAINER_TYPE + bjd_type + CONTAINER_COUNT + TYPE_UINT8 + b'\x05'), [py_obj] * 5 ) - self.assertEqual(self.bjdloadb(raw_start + b'\x03' + (b'\x01' * 3)), [1, 1, 1]) + self.assertEqual((self.bjdloadb(raw_start + b'\x03' + (b'\x01' * 3))==ndarray([1, 1, 1], dtype=npint8)).all(), True) # invalid type with self.assertRaises(DecoderException):