From 8b1776198013e6b6c45fede63dc6a8250772cd64 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 13 Mar 2020 13:34:38 -0700 Subject: [PATCH 01/13] raising error for non 1-d data --- python/cudf/cudf/core/column/column.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index f7f75aa92a2..4fa86d1f974 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1175,6 +1175,9 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): elif isinstance(arbitrary, np.ndarray): # CUDF assumes values are always contiguous + if hasattr(arbitrary, "ndim") and arbitrary.ndim != 1: + raise ValueError("Data must be 1-dimensional") + if not arbitrary.flags["C_CONTIGUOUS"]: arbitrary = np.ascontiguousarray(arbitrary) From 43775bf99dd84767e15c6664fc6f319b008b4b17 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 13 Mar 2020 15:07:58 -0700 Subject: [PATCH 02/13] change ndim comparison to allow 0-dim data aswell --- python/cudf/cudf/core/column/column.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 4fa86d1f974..d61d747f8e4 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1175,7 +1175,7 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): elif isinstance(arbitrary, np.ndarray): # CUDF assumes values are always contiguous - if hasattr(arbitrary, "ndim") and arbitrary.ndim != 1: + if arbitrary.ndim > 1: raise ValueError("Data must be 1-dimensional") if not arbitrary.flags["C_CONTIGUOUS"]: From f5777658bcf613d407f1e606d5bff259dfd81eb9 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 13 Mar 2020 15:14:02 -0700 Subject: [PATCH 03/13] tests for multi-dim data --- python/cudf/cudf/tests/test_column.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py index 1c9e36fc50e..981dcc7a340 100644 --- a/python/cudf/cudf/tests/test_column.py +++ b/python/cudf/cudf/tests/test_column.py @@ -63,3 +63,18 @@ def test_column_offset_and_size(pandas_input, offset, size): expect = pandas_input.iloc[slicer].reset_index(drop=True) assert_eq(expect, got) + + +@pytest.mark.parametrize( + "data", + [ + np.array([[23, 68, 2, 38, 9, 83, 72, 6, 98, 30]]), + np.array([[1, 2], [7, 6]]), + ], +) +def test_column_series_multi_dim(data): + with pytest.raises(ValueError): + cudf.Series(data) + + with pytest.raises(ValueError): + cudf.core.column.as_column(data) From d71cdb996e56840417a4450a7b1ac2686840a931 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 13 Mar 2020 17:16:25 -0500 Subject: [PATCH 04/13] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2d1d394bac..0afa190dfc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -166,6 +166,7 @@ - PR #4316 Add Java and JNI bindings for substring expression - PR #4314 Add Java and JNI bindings for string contains - PR #4461 Port nvstrings Miscellaneous functions to cuDF Python/Cython +- PR #4506 Check for multi-dimensional data in column/Series creation ## Bug Fixes From 9af59371e01563a68b2a44541b26555ffbcb3d68 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Sat, 14 Mar 2020 18:11:53 -0700 Subject: [PATCH 05/13] refactor np.ndarray instance check to __array_interface__ --- python/cudf/cudf/core/column/column.py | 46 ++++++++++++++------------ 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index d61d747f8e4..a9d50ff0852 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1173,27 +1173,6 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): ) return col - elif isinstance(arbitrary, np.ndarray): - # CUDF assumes values are always contiguous - if arbitrary.ndim > 1: - raise ValueError("Data must be 1-dimensional") - - if not arbitrary.flags["C_CONTIGUOUS"]: - arbitrary = np.ascontiguousarray(arbitrary) - - if dtype is not None: - arbitrary = arbitrary.astype(dtype) - - if arbitrary.dtype.kind == "M": - data = datetime.DatetimeColumn.from_numpy(arbitrary) - - elif arbitrary.dtype.kind in ("O", "U"): - data = as_column( - pa.Array.from_pandas(arbitrary), dtype=arbitrary.dtype - ) - else: - data = as_column(cupy.asarray(arbitrary), nan_as_null=nan_as_null) - elif isinstance(arbitrary, pa.Array): if isinstance(arbitrary, pa.StringArray): pa_size, pa_offset, nbuf, obuf, sbuf = buffers_from_pyarrow( @@ -1360,6 +1339,31 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): elif np.issubdtype(data.dtype, np.datetime64): data = data.fillna(np.datetime64("NaT")) + elif hasattr(arbitrary, "__array_interface__"): + # CUDF assumes values are always contiguous + desc = arbitrary.__array_interface__ + shape = desc["shape"] + arb_dtype = np.dtype(desc["typestr"]) + # CUDF assumes values are always contiguous + if len(shape) > 1: + raise ValueError("Data must be 1-dimensional") + + if desc["strides"] is not None: + arbitrary = np.ascontiguousarray(arbitrary) + + if dtype is not None: + arbitrary = arbitrary.astype(dtype) + + if arb_dtype.kind == "M": + data = datetime.DatetimeColumn.from_numpy(arbitrary) + + elif arb_dtype.kind in ("O", "U"): + data = as_column( + pa.Array.from_pandas(arbitrary), dtype=arbitrary.dtype + ) + else: + data = as_column(cupy.asarray(arbitrary), nan_as_null=nan_as_null) + elif isinstance(arbitrary, memoryview): data = as_column( np.asarray(arbitrary), dtype=dtype, nan_as_null=nan_as_null From 518c44ce00900a399c56ee9407fa7f5d9374eb28 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 16 Mar 2020 09:55:39 -0700 Subject: [PATCH 06/13] create a np.array from arbitrary __array_interface__ --- python/cudf/cudf/core/column/column.py | 27 +++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index a9d50ff0852..a56bd7e9f51 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1348,6 +1348,7 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): if len(shape) > 1: raise ValueError("Data must be 1-dimensional") + arbitrary = _arr_from_array_interface_(array_interface=desc) if desc["strides"] is not None: arbitrary = np.ascontiguousarray(arbitrary) @@ -1356,7 +1357,6 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): if arb_dtype.kind == "M": data = datetime.DatetimeColumn.from_numpy(arbitrary) - elif arb_dtype.kind in ("O", "U"): data = as_column( pa.Array.from_pandas(arbitrary), dtype=arbitrary.dtype @@ -1414,6 +1414,31 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): return data +def _arr_from_array_interface_(array_interface=None, copy=False): + """Generates numpy array from memory address + https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html + + Parameters + ---------- + array_interface : dict + Contains `__array_interface__` which has pointer to + the memory address for array re-construction. + + copy : bool + Copy array. Default False + + read_only_flag : bool + Read only array. Default False. + """ + + class numpy_holder: + pass + + holder = numpy_holder() + holder.__array_interface__ = array_interface + return np.array(holder, copy=copy) + + def column_applymap(udf, column, out_dtype): """Apply a elemenwise function to transform the values in the Column. From e5a80222cf05d1601c459420e134710a475bf5ab Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 16 Mar 2020 12:04:47 -0500 Subject: [PATCH 07/13] Update column.py --- python/cudf/cudf/core/column/column.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index a56bd7e9f51..420b1a6afb8 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1348,7 +1348,7 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): if len(shape) > 1: raise ValueError("Data must be 1-dimensional") - arbitrary = _arr_from_array_interface_(array_interface=desc) + arbitrary = _arr_from_array_interface(array_interface=desc) if desc["strides"] is not None: arbitrary = np.ascontiguousarray(arbitrary) @@ -1414,7 +1414,7 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): return data -def _arr_from_array_interface_(array_interface=None, copy=False): +def _arr_from_array_interface(array_interface=None, copy=False): """Generates numpy array from memory address https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html @@ -1427,8 +1427,6 @@ def _arr_from_array_interface_(array_interface=None, copy=False): copy : bool Copy array. Default False - read_only_flag : bool - Read only array. Default False. """ class numpy_holder: From 01716dad37f9ab3981dbb97474a02af7e3020348 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 16 Mar 2020 10:30:19 -0700 Subject: [PATCH 08/13] use np.asarray to convert contiguous inputs --- python/cudf/cudf/core/column/column.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 420b1a6afb8..3ab25756333 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1348,9 +1348,10 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): if len(shape) > 1: raise ValueError("Data must be 1-dimensional") - arbitrary = _arr_from_array_interface(array_interface=desc) if desc["strides"] is not None: arbitrary = np.ascontiguousarray(arbitrary) + else: + arbitrary = np.asarray(arbitrary) if dtype is not None: arbitrary = arbitrary.astype(dtype) @@ -1414,29 +1415,6 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): return data -def _arr_from_array_interface(array_interface=None, copy=False): - """Generates numpy array from memory address - https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html - - Parameters - ---------- - array_interface : dict - Contains `__array_interface__` which has pointer to - the memory address for array re-construction. - - copy : bool - Copy array. Default False - - """ - - class numpy_holder: - pass - - holder = numpy_holder() - holder.__array_interface__ = array_interface - return np.array(holder, copy=copy) - - def column_applymap(udf, column, out_dtype): """Apply a elemenwise function to transform the values in the Column. From 2b8632bf48b904e60daa4207f89092a4edfc7644 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 16 Mar 2020 12:35:15 -0500 Subject: [PATCH 09/13] Update python/cudf/cudf/core/column/column.py Co-Authored-By: Keith Kraus --- python/cudf/cudf/core/column/column.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 3ab25756333..b7d19758acd 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1348,10 +1348,9 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): if len(shape) > 1: raise ValueError("Data must be 1-dimensional") - if desc["strides"] is not None: + arbitrary = np.asarray(arbitrary) + if not arbitrary.flags["C_CONTIGUOUS"]: arbitrary = np.ascontiguousarray(arbitrary) - else: - arbitrary = np.asarray(arbitrary) if dtype is not None: arbitrary = arbitrary.astype(dtype) From 6b9fbd998e4a1c56f438da5d52b924387e8af8fb Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 16 Mar 2020 11:30:19 -0700 Subject: [PATCH 10/13] removing DatetimeColumn.from_numpy call and instead letting as_column handle it --- python/cudf/cudf/core/column/column.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 3ab25756333..988ecbef579 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1357,7 +1357,7 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): arbitrary = arbitrary.astype(dtype) if arb_dtype.kind == "M": - data = datetime.DatetimeColumn.from_numpy(arbitrary) + data = as_column(pa.array(arbitrary), nan_as_null=nan_as_null) elif arb_dtype.kind in ("O", "U"): data = as_column( pa.Array.from_pandas(arbitrary), dtype=arbitrary.dtype From 36362e72a4372588f1ad8b1c3e4ccf2053d20ffc Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 16 Mar 2020 16:30:55 -0700 Subject: [PATCH 11/13] refactor replacing nulls in DatetimeColumn --- python/cudf/cudf/core/column/column.py | 14 +++----------- python/cudf/cudf/core/column/datetime.py | 16 ++-------------- python/cudf/cudf/utils/utils.py | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 4eb174ba3b7..6ffcdc241cf 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1160,17 +1160,7 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): col = col.set_mask(mask) elif np.issubdtype(col.dtype, np.datetime64): if nan_as_null or (mask is None and nan_as_null is None): - null = column_empty_like(col, masked=True, newsize=1) - col = libcudfxx.replace.replace( - col, - as_column( - Buffer( - np.array([np.datetime64("NaT")], dtype=col.dtype) - ), - dtype=col.dtype, - ), - null, - ) + col = utils.time_col_replace_nulls(col) return col elif isinstance(arbitrary, pa.Array): @@ -1357,6 +1347,8 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): if arb_dtype.kind == "M": data = as_column(pa.array(arbitrary), nan_as_null=nan_as_null) + if np.issubdtype(data.dtype, np.datetime64): + data = utils.time_col_replace_nulls(data) elif arb_dtype.kind in ("O", "U"): data = as_column( pa.Array.from_pandas(arbitrary), dtype=arbitrary.dtype diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 78e125a93cd..e25b580f4bc 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -4,7 +4,6 @@ import pandas as pd import pyarrow as pa -import cudf import cudf._lib as libcudf import cudf._libxx as libcudfxx from cudf.core.buffer import Buffer @@ -78,19 +77,8 @@ def from_numpy(cls, array): mask = None if np.any(np.isnat(array)): - null = cudf.core.column.column_empty_like( - array, masked=True, newsize=1 - ) - col = libcudfxx.replace.replace( - as_column(Buffer(array), dtype=array.dtype), - as_column( - Buffer( - np.array([np.datetime64("NaT")], dtype=array.dtype) - ), - dtype=array.dtype, - ), - null, - ) + col = as_column(Buffer(array), dtype=array.dtype) + col = utils.time_col_replace_nulls(col) mask = col.mask return cls(data=Buffer(array), mask=mask, dtype=array.dtype) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index c0ad5869507..4d0f5734325 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -414,3 +414,19 @@ def to_nested_dict(d): Convert the given dictionary with tuple keys to a NestedOrderedDict. """ return NestedOrderedDict(d) + + +def time_col_replace_nulls(input_col): + from cudf.core.column import column_empty_like, as_column + import cudf._libxx.replace as replace + + null = column_empty_like(input_col, masked=True, newsize=1) + out_col = replace.replace( + input_col, + as_column( + Buffer(np.array([np.datetime64("NaT")], dtype=input_col.dtype)), + dtype=input_col.dtype, + ), + null, + ) + return out_col From e317a5c058b09e172ba966f24007e768ec1bac7c Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 17 Mar 2020 07:55:21 -0700 Subject: [PATCH 12/13] remove DatetimeColumn.from_numpy and instead redirect same logic to column.as_column --- python/cudf/cudf/core/column/column.py | 28 +++++++++++++++++++-- python/cudf/cudf/core/column/datetime.py | 32 +----------------------- python/cudf/cudf/core/index.py | 8 +++--- python/dask_cudf/dask_cudf/backends.py | 2 +- 4 files changed, 31 insertions(+), 39 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 6ffcdc241cf..3b9f917ce8c 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1346,9 +1346,33 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): arbitrary = arbitrary.astype(dtype) if arb_dtype.kind == "M": - data = as_column(pa.array(arbitrary), nan_as_null=nan_as_null) - if np.issubdtype(data.dtype, np.datetime64): + import datetime as dt + + time_unit, _ = np.datetime_data(arbitrary.dtype) + cast_dtype = time_unit in ("D", "W", "M", "Y") or ( + len(arbitrary) > 0 + and ( + isinstance(arbitrary[0], str) + or isinstance(arbitrary[0], dt.datetime) + ) + ) + + if cast_dtype: + arbitrary = arbitrary.astype(np.dtype("datetime64[s]")) + assert arbitrary.dtype.itemsize == 8 + + buffer = Buffer(arbitrary) + mask = None + if np.any(np.isnat(arbitrary)) or nan_as_null: + data = as_column( + buffer, dtype=arbitrary.dtype, nan_as_null=nan_as_null + ) data = utils.time_col_replace_nulls(data) + mask = data.mask + + data = datetime.DatetimeColumn( + data=buffer, mask=mask, dtype=arbitrary.dtype + ) elif arb_dtype.kind in ("O", "U"): data = as_column( pa.Array.from_pandas(arbitrary), dtype=arbitrary.dtype diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index e25b580f4bc..6811fd8b700 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -7,7 +7,7 @@ import cudf._lib as libcudf import cudf._libxx as libcudfxx from cudf.core.buffer import Buffer -from cudf.core.column import as_column, column +from cudf.core.column import column from cudf.utils import utils from cudf.utils.dtypes import is_scalar, np_to_pa_dtype @@ -53,36 +53,6 @@ def __contains__(self, item): return False return item.astype("int_") in self.as_numerical - @classmethod - def from_numpy(cls, array): - cast_dtype = array.dtype.type == np.int64 - if array.dtype.kind == "M": - time_unit, _ = np.datetime_data(array.dtype) - cast_dtype = time_unit in ("D", "W", "M", "Y") or ( - len(array) > 0 - and ( - isinstance(array[0], str) - or isinstance(array[0], dt.datetime) - ) - ) - elif not cast_dtype: - raise ValueError( - ("Cannot infer datetime dtype " + "from np.array dtype `%s`") - % (array.dtype) - ) - - if cast_dtype: - array = array.astype(np.dtype("datetime64[s]")) - assert array.dtype.itemsize == 8 - - mask = None - if np.any(np.isnat(array)): - col = as_column(Buffer(array), dtype=array.dtype) - col = utils.time_col_replace_nulls(col) - mask = col.mask - - return cls(data=Buffer(array), mask=mask, dtype=array.dtype) - @property def time_unit(self): return self._time_unit diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index ec3fb4336ef..872ef0bea57 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -848,13 +848,11 @@ def __init__(self, values, **kwargs): # and then just dispatch upstream kwargs = _setdefault_name(values, kwargs) if isinstance(values, np.ndarray) and values.dtype.kind == "M": - values = DatetimeColumn.from_numpy(values) + values = column.as_column(values) elif isinstance(values, pd.DatetimeIndex): - values = DatetimeColumn.from_numpy(values.values) + values = column.as_column(values.values) elif isinstance(values, (list, tuple)): - values = DatetimeColumn.from_numpy( - np.array(values, dtype=" Date: Tue, 17 Mar 2020 08:25:19 -0700 Subject: [PATCH 13/13] remove redundant assertion, np.any(np.isnat()) check and object check --- python/cudf/cudf/core/column/column.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 3b9f917ce8c..bf84b83a5dc 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1346,24 +1346,16 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None): arbitrary = arbitrary.astype(dtype) if arb_dtype.kind == "M": - import datetime as dt time_unit, _ = np.datetime_data(arbitrary.dtype) - cast_dtype = time_unit in ("D", "W", "M", "Y") or ( - len(arbitrary) > 0 - and ( - isinstance(arbitrary[0], str) - or isinstance(arbitrary[0], dt.datetime) - ) - ) + cast_dtype = time_unit in ("D", "W", "M", "Y") if cast_dtype: arbitrary = arbitrary.astype(np.dtype("datetime64[s]")) - assert arbitrary.dtype.itemsize == 8 buffer = Buffer(arbitrary) mask = None - if np.any(np.isnat(arbitrary)) or nan_as_null: + if nan_as_null: data = as_column( buffer, dtype=arbitrary.dtype, nan_as_null=nan_as_null )