From 5e6c3d50a0a6661895ca93d6ca76d1760957ef79 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 28 Feb 2024 10:15:21 +0100 Subject: [PATCH] refactor tests --- python/pyarrow/tests/test_cffi.py | 207 +++++++++++------------------- 1 file changed, 75 insertions(+), 132 deletions(-) diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py index c1cd6c5a7c339..298ca342370d5 100644 --- a/python/pyarrow/tests/test_cffi.py +++ b/python/pyarrow/tests/test_cffi.py @@ -181,11 +181,10 @@ def test_export_import_field(): pa.Field._import_from_c(ptr_schema) -@needs_cffi -def test_export_import_array(): +def check_export_import_array(array_type, exporter, importer): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowArray*") + c_array = ffi.new(f"struct {array_type}*") ptr_array = int(ffi.cast("uintptr_t", c_array)) gc.collect() # Make sure no Arrow data dangles in a ref cycle @@ -195,11 +194,11 @@ def test_export_import_array(): typ = pa.list_(pa.int32()) arr = pa.array([[1], [2, 42]], type=typ) py_value = arr.to_pylist() - arr._export_to_c(ptr_array) + exporter(arr, ptr_array) assert pa.total_allocated_bytes() > old_allocated # Delete recreate C++ object from exported pointer del arr - arr_new = pa.Array._import_from_c(ptr_array, typ) + arr_new = importer(ptr_array, typ) assert arr_new.to_pylist() == py_value assert arr_new.type == pa.list_(pa.int32()) assert pa.total_allocated_bytes() > old_allocated @@ -207,15 +206,15 @@ def test_export_import_array(): assert pa.total_allocated_bytes() == old_allocated # Now released with assert_array_released: - pa.Array._import_from_c(ptr_array, pa.list_(pa.int32())) + importer(ptr_array, pa.list_(pa.int32())) # Type is exported and imported at the same time arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32())) py_value = arr.to_pylist() - arr._export_to_c(ptr_array, ptr_schema) + exporter(arr, ptr_array, ptr_schema) # Delete and recreate C++ objects from exported pointers del arr - arr_new = pa.Array._import_from_c(ptr_array, ptr_schema) + arr_new = importer(ptr_array, ptr_schema) assert arr_new.to_pylist() == py_value assert arr_new.type == pa.list_(pa.int32()) assert pa.total_allocated_bytes() > old_allocated @@ -223,7 +222,35 @@ def test_export_import_array(): assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: - pa.Array._import_from_c(ptr_array, ptr_schema) + importer(ptr_array, ptr_schema) + + +@needs_cffi +def test_export_import_array(): + check_export_import_array( + "ArrowArray", + pa.Array._export_to_c, + pa.Array._import_from_c, + ) + + +@needs_cffi +def test_export_import_device_array(): + check_export_import_array( + "ArrowDeviceArray", + pa.Array._export_to_c_device, + pa.Array._import_from_c_device, + ) + + # verify exported struct + c_array = ffi.new("struct ArrowDeviceArray*") + ptr_array = int(ffi.cast("uintptr_t", c_array)) + arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32())) + arr._export_to_c_device(ptr_array) + + assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1 + assert c_array.device_id == -1 + assert c_array.array.length == 2 def check_export_import_schema(schema_factory, expected_schema_factory=None): @@ -289,10 +316,10 @@ def test_export_import_schema_float_pointer(): assert schema_new == make_schema() -def check_export_import_batch(batch_factory): +def check_export_import_batch(array_type, exporter, importer, batch_factory): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowArray*") + c_array = ffi.new(f"struct {array_type}*") ptr_array = int(ffi.cast("uintptr_t", c_array)) gc.collect() # Make sure no Arrow data dangles in a ref cycle @@ -302,11 +329,11 @@ def check_export_import_batch(batch_factory): batch = batch_factory() schema = batch.schema py_value = batch.to_pydict() - batch._export_to_c(ptr_array) + exporter(batch, ptr_array) assert pa.total_allocated_bytes() > old_allocated # Delete and recreate C++ object from exported pointer del batch - batch_new = pa.RecordBatch._import_from_c(ptr_array, schema) + batch_new = importer(ptr_array, schema) assert batch_new.to_pydict() == py_value assert batch_new.schema == schema assert pa.total_allocated_bytes() > old_allocated @@ -314,7 +341,7 @@ def check_export_import_batch(batch_factory): assert pa.total_allocated_bytes() == old_allocated # Now released with assert_array_released: - pa.RecordBatch._import_from_c(ptr_array, make_schema()) + importer(ptr_array, make_schema()) # Type is exported and imported at the same time batch = batch_factory() @@ -322,7 +349,7 @@ def check_export_import_batch(batch_factory): batch._export_to_c(ptr_array, ptr_schema) # Delete and recreate C++ objects from exported pointers del batch - batch_new = pa.RecordBatch._import_from_c(ptr_array, ptr_schema) + batch_new = importer(ptr_array, ptr_schema) assert batch_new.to_pydict() == py_value assert batch_new.schema == batch_factory().schema assert pa.total_allocated_bytes() > old_allocated @@ -330,28 +357,56 @@ def check_export_import_batch(batch_factory): assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: - pa.RecordBatch._import_from_c(ptr_array, ptr_schema) + importer(ptr_array, ptr_schema) # Not a struct type pa.int32()._export_to_c(ptr_schema) batch_factory()._export_to_c(ptr_array) with pytest.raises(ValueError, match="ArrowSchema describes non-struct type"): - pa.RecordBatch._import_from_c(ptr_array, ptr_schema) + importer(ptr_array, ptr_schema) # Now released with assert_schema_released: - pa.RecordBatch._import_from_c(ptr_array, ptr_schema) + importer(ptr_array, ptr_schema) @needs_cffi def test_export_import_batch(): - check_export_import_batch(make_batch) + check_export_import_batch( + "ArrowArray", + pa.RecordBatch._export_to_c, + pa.RecordBatch._import_from_c, + make_batch, + ) @needs_cffi def test_export_import_batch_with_extension(): with registered_extension_type(ParamExtType(1)): - check_export_import_batch(make_extension_batch) + check_export_import_batch( + "ArrowArray", + pa.RecordBatch._export_to_c, + pa.RecordBatch._import_from_c, + make_extension_batch, + ) + +@needs_cffi +def test_export_import_device_batch(): + check_export_import_batch( + "ArrowDeviceArray", + pa.RecordBatch._export_to_c_device, + pa.RecordBatch._import_from_c_device, + make_batch, + ) + + # verify exported struct + c_array = ffi.new("struct ArrowDeviceArray*") + ptr_array = int(ffi.cast("uintptr_t", c_array)) + batch = make_batch() + batch._export_to_c_device(ptr_array) + assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1 + assert c_array.device_id == -1 + assert c_array.array.length == 2 def _export_import_batch_reader(ptr_stream, reader_factory): @@ -627,115 +682,3 @@ def test_roundtrip_chunked_array_capsule_requested_schema(): requested_capsule = requested_type.__arrow_c_schema__() with pytest.raises(NotImplementedError): chunked.__arrow_c_stream__(requested_capsule) - - -@needs_cffi -def test_export_import_device_array(): - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowDeviceArray*") - ptr_array = int(ffi.cast("uintptr_t", c_array)) - - gc.collect() # Make sure no Arrow data dangles in a ref cycle - old_allocated = pa.total_allocated_bytes() - - # Type is known up front - typ = pa.list_(pa.int32()) - arr = pa.array([[1], [2, 42]], type=typ) - py_value = arr.to_pylist() - arr._export_to_c_device(ptr_array) - assert pa.total_allocated_bytes() > old_allocated - - # verify exported struct - assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1 - assert c_array.device_id == -1 - assert c_array.array.length == 2 - - # Delete recreate C++ object from exported pointer - del arr - arr_new = pa.Array._import_from_c_device(ptr_array, typ) - assert arr_new.to_pylist() == py_value - assert arr_new.type == pa.list_(pa.int32()) - assert pa.total_allocated_bytes() > old_allocated - del arr_new, typ - assert pa.total_allocated_bytes() == old_allocated - # Now released - with assert_array_released: - pa.Array._import_from_c(ptr_array, pa.list_(pa.int32())) - - # Type is exported and imported at the same time - arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32())) - py_value = arr.to_pylist() - arr._export_to_c(ptr_array, ptr_schema) - # Delete and recreate C++ objects from exported pointers - del arr - arr_new = pa.Array._import_from_c(ptr_array, ptr_schema) - assert arr_new.to_pylist() == py_value - assert arr_new.type == pa.list_(pa.int32()) - assert pa.total_allocated_bytes() > old_allocated - del arr_new - assert pa.total_allocated_bytes() == old_allocated - # Now released - with assert_schema_released: - pa.Array._import_from_c(ptr_array, ptr_schema) - - -@needs_cffi -def test_export_import_device_batch(): - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowDeviceArray*") - ptr_array = int(ffi.cast("uintptr_t", c_array)) - - gc.collect() # Make sure no Arrow data dangles in a ref cycle - old_allocated = pa.total_allocated_bytes() - - # Schema is known up front - batch = make_batch() - schema = batch.schema - py_value = batch.to_pydict() - batch._export_to_c_device(ptr_array) - assert pa.total_allocated_bytes() > old_allocated - - # verify exported struct - assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1 - assert c_array.device_id == -1 - assert c_array.array.length == 2 - - # Delete and recreate C++ object from exported pointer - del batch - batch_new = pa.RecordBatch._import_from_c_device(ptr_array, schema) - assert batch_new.to_pydict() == py_value - assert batch_new.schema == schema - assert pa.total_allocated_bytes() > old_allocated - del batch_new, schema - assert pa.total_allocated_bytes() == old_allocated - # Now released - with assert_array_released: - pa.RecordBatch._import_from_c_device(ptr_array, make_schema()) - - # Type is exported and imported at the same time - batch = make_batch() - py_value = batch.to_pydict() - batch._export_to_c_device(ptr_array, ptr_schema) - # Delete and recreate C++ objects from exported pointers - del batch - batch_new = pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema) - assert batch_new.to_pydict() == py_value - assert batch_new.schema == make_batch().schema - assert pa.total_allocated_bytes() > old_allocated - del batch_new - assert pa.total_allocated_bytes() == old_allocated - # Now released - with assert_schema_released: - pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema) - - # Not a struct type - pa.int32()._export_to_c(ptr_schema) - make_batch()._export_to_c_device(ptr_array) - with pytest.raises(ValueError, - match="ArrowSchema describes non-struct type"): - pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema) - # Now released - with assert_schema_released: - pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)