Skip to content

Commit

Permalink
refactor tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Feb 28, 2024
1 parent 6e0870f commit 5e6c3d5
Showing 1 changed file with 75 additions and 132 deletions.
207 changes: 75 additions & 132 deletions python/pyarrow/tests/test_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,10 @@ def test_export_import_field():
pa.Field._import_from_c(ptr_schema)


@needs_cffi
def test_export_import_array():
def check_export_import_array(array_type, exporter, importer):
c_schema = ffi.new("struct ArrowSchema*")
ptr_schema = int(ffi.cast("uintptr_t", c_schema))
c_array = ffi.new("struct ArrowArray*")
c_array = ffi.new(f"struct {array_type}*")
ptr_array = int(ffi.cast("uintptr_t", c_array))

gc.collect() # Make sure no Arrow data dangles in a ref cycle
Expand All @@ -195,35 +194,63 @@ def test_export_import_array():
typ = pa.list_(pa.int32())
arr = pa.array([[1], [2, 42]], type=typ)
py_value = arr.to_pylist()
arr._export_to_c(ptr_array)
exporter(arr, ptr_array)
assert pa.total_allocated_bytes() > old_allocated
# Delete recreate C++ object from exported pointer
del arr
arr_new = pa.Array._import_from_c(ptr_array, typ)
arr_new = importer(ptr_array, typ)
assert arr_new.to_pylist() == py_value
assert arr_new.type == pa.list_(pa.int32())
assert pa.total_allocated_bytes() > old_allocated
del arr_new, typ
assert pa.total_allocated_bytes() == old_allocated
# Now released
with assert_array_released:
pa.Array._import_from_c(ptr_array, pa.list_(pa.int32()))
importer(ptr_array, pa.list_(pa.int32()))

# Type is exported and imported at the same time
arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
py_value = arr.to_pylist()
arr._export_to_c(ptr_array, ptr_schema)
exporter(arr, ptr_array, ptr_schema)
# Delete and recreate C++ objects from exported pointers
del arr
arr_new = pa.Array._import_from_c(ptr_array, ptr_schema)
arr_new = importer(ptr_array, ptr_schema)
assert arr_new.to_pylist() == py_value
assert arr_new.type == pa.list_(pa.int32())
assert pa.total_allocated_bytes() > old_allocated
del arr_new
assert pa.total_allocated_bytes() == old_allocated
# Now released
with assert_schema_released:
pa.Array._import_from_c(ptr_array, ptr_schema)
importer(ptr_array, ptr_schema)


@needs_cffi
def test_export_import_array():
check_export_import_array(
"ArrowArray",
pa.Array._export_to_c,
pa.Array._import_from_c,
)


@needs_cffi
def test_export_import_device_array():
check_export_import_array(
"ArrowDeviceArray",
pa.Array._export_to_c_device,
pa.Array._import_from_c_device,
)

# verify exported struct
c_array = ffi.new("struct ArrowDeviceArray*")
ptr_array = int(ffi.cast("uintptr_t", c_array))
arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
arr._export_to_c_device(ptr_array)

assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1
assert c_array.device_id == -1
assert c_array.array.length == 2


def check_export_import_schema(schema_factory, expected_schema_factory=None):
Expand Down Expand Up @@ -289,10 +316,10 @@ def test_export_import_schema_float_pointer():
assert schema_new == make_schema()


def check_export_import_batch(batch_factory):
def check_export_import_batch(array_type, exporter, importer, batch_factory):
c_schema = ffi.new("struct ArrowSchema*")
ptr_schema = int(ffi.cast("uintptr_t", c_schema))
c_array = ffi.new("struct ArrowArray*")
c_array = ffi.new(f"struct {array_type}*")
ptr_array = int(ffi.cast("uintptr_t", c_array))

gc.collect() # Make sure no Arrow data dangles in a ref cycle
Expand All @@ -302,56 +329,84 @@ def check_export_import_batch(batch_factory):
batch = batch_factory()
schema = batch.schema
py_value = batch.to_pydict()
batch._export_to_c(ptr_array)
exporter(batch, ptr_array)
assert pa.total_allocated_bytes() > old_allocated
# Delete and recreate C++ object from exported pointer
del batch
batch_new = pa.RecordBatch._import_from_c(ptr_array, schema)
batch_new = importer(ptr_array, schema)
assert batch_new.to_pydict() == py_value
assert batch_new.schema == schema
assert pa.total_allocated_bytes() > old_allocated
del batch_new, schema
assert pa.total_allocated_bytes() == old_allocated
# Now released
with assert_array_released:
pa.RecordBatch._import_from_c(ptr_array, make_schema())
importer(ptr_array, make_schema())

# Type is exported and imported at the same time
batch = batch_factory()
py_value = batch.to_pydict()
batch._export_to_c(ptr_array, ptr_schema)
# Delete and recreate C++ objects from exported pointers
del batch
batch_new = pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
batch_new = importer(ptr_array, ptr_schema)
assert batch_new.to_pydict() == py_value
assert batch_new.schema == batch_factory().schema
assert pa.total_allocated_bytes() > old_allocated
del batch_new
assert pa.total_allocated_bytes() == old_allocated
# Now released
with assert_schema_released:
pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
importer(ptr_array, ptr_schema)

# Not a struct type
pa.int32()._export_to_c(ptr_schema)
batch_factory()._export_to_c(ptr_array)
with pytest.raises(ValueError,
match="ArrowSchema describes non-struct type"):
pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
importer(ptr_array, ptr_schema)
# Now released
with assert_schema_released:
pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
importer(ptr_array, ptr_schema)


@needs_cffi
def test_export_import_batch():
check_export_import_batch(make_batch)
check_export_import_batch(
"ArrowArray",
pa.RecordBatch._export_to_c,
pa.RecordBatch._import_from_c,
make_batch,
)


@needs_cffi
def test_export_import_batch_with_extension():
with registered_extension_type(ParamExtType(1)):
check_export_import_batch(make_extension_batch)
check_export_import_batch(
"ArrowArray",
pa.RecordBatch._export_to_c,
pa.RecordBatch._import_from_c,
make_extension_batch,
)

@needs_cffi
def test_export_import_device_batch():
check_export_import_batch(
"ArrowDeviceArray",
pa.RecordBatch._export_to_c_device,
pa.RecordBatch._import_from_c_device,
make_batch,
)

# verify exported struct
c_array = ffi.new("struct ArrowDeviceArray*")
ptr_array = int(ffi.cast("uintptr_t", c_array))
batch = make_batch()
batch._export_to_c_device(ptr_array)
assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1
assert c_array.device_id == -1
assert c_array.array.length == 2


def _export_import_batch_reader(ptr_stream, reader_factory):
Expand Down Expand Up @@ -627,115 +682,3 @@ def test_roundtrip_chunked_array_capsule_requested_schema():
requested_capsule = requested_type.__arrow_c_schema__()
with pytest.raises(NotImplementedError):
chunked.__arrow_c_stream__(requested_capsule)


@needs_cffi
def test_export_import_device_array():
c_schema = ffi.new("struct ArrowSchema*")
ptr_schema = int(ffi.cast("uintptr_t", c_schema))
c_array = ffi.new("struct ArrowDeviceArray*")
ptr_array = int(ffi.cast("uintptr_t", c_array))

gc.collect() # Make sure no Arrow data dangles in a ref cycle
old_allocated = pa.total_allocated_bytes()

# Type is known up front
typ = pa.list_(pa.int32())
arr = pa.array([[1], [2, 42]], type=typ)
py_value = arr.to_pylist()
arr._export_to_c_device(ptr_array)
assert pa.total_allocated_bytes() > old_allocated

# verify exported struct
assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1
assert c_array.device_id == -1
assert c_array.array.length == 2

# Delete recreate C++ object from exported pointer
del arr
arr_new = pa.Array._import_from_c_device(ptr_array, typ)
assert arr_new.to_pylist() == py_value
assert arr_new.type == pa.list_(pa.int32())
assert pa.total_allocated_bytes() > old_allocated
del arr_new, typ
assert pa.total_allocated_bytes() == old_allocated
# Now released
with assert_array_released:
pa.Array._import_from_c(ptr_array, pa.list_(pa.int32()))

# Type is exported and imported at the same time
arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
py_value = arr.to_pylist()
arr._export_to_c(ptr_array, ptr_schema)
# Delete and recreate C++ objects from exported pointers
del arr
arr_new = pa.Array._import_from_c(ptr_array, ptr_schema)
assert arr_new.to_pylist() == py_value
assert arr_new.type == pa.list_(pa.int32())
assert pa.total_allocated_bytes() > old_allocated
del arr_new
assert pa.total_allocated_bytes() == old_allocated
# Now released
with assert_schema_released:
pa.Array._import_from_c(ptr_array, ptr_schema)


@needs_cffi
def test_export_import_device_batch():
c_schema = ffi.new("struct ArrowSchema*")
ptr_schema = int(ffi.cast("uintptr_t", c_schema))
c_array = ffi.new("struct ArrowDeviceArray*")
ptr_array = int(ffi.cast("uintptr_t", c_array))

gc.collect() # Make sure no Arrow data dangles in a ref cycle
old_allocated = pa.total_allocated_bytes()

# Schema is known up front
batch = make_batch()
schema = batch.schema
py_value = batch.to_pydict()
batch._export_to_c_device(ptr_array)
assert pa.total_allocated_bytes() > old_allocated

# verify exported struct
assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1
assert c_array.device_id == -1
assert c_array.array.length == 2

# Delete and recreate C++ object from exported pointer
del batch
batch_new = pa.RecordBatch._import_from_c_device(ptr_array, schema)
assert batch_new.to_pydict() == py_value
assert batch_new.schema == schema
assert pa.total_allocated_bytes() > old_allocated
del batch_new, schema
assert pa.total_allocated_bytes() == old_allocated
# Now released
with assert_array_released:
pa.RecordBatch._import_from_c_device(ptr_array, make_schema())

# Type is exported and imported at the same time
batch = make_batch()
py_value = batch.to_pydict()
batch._export_to_c_device(ptr_array, ptr_schema)
# Delete and recreate C++ objects from exported pointers
del batch
batch_new = pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
assert batch_new.to_pydict() == py_value
assert batch_new.schema == make_batch().schema
assert pa.total_allocated_bytes() > old_allocated
del batch_new
assert pa.total_allocated_bytes() == old_allocated
# Now released
with assert_schema_released:
pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)

# Not a struct type
pa.int32()._export_to_c(ptr_schema)
make_batch()._export_to_c_device(ptr_array)
with pytest.raises(ValueError,
match="ArrowSchema describes non-struct type"):
pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
# Now released
with assert_schema_released:
pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)

0 comments on commit 5e6c3d5

Please sign in to comment.