Skip to content

Commit

Permalink
Python bindings: add a ogr.Layer.GetArrowArrayStreamInterface() method
Browse files Browse the repository at this point in the history
  • Loading branch information
rouault committed Jan 10, 2024
1 parent 9aef014 commit e4f78be
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 2 deletions.
10 changes: 10 additions & 0 deletions autotest/ogr/ogr_mem.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,16 @@ def test_ogr_mem_arrow_stream_pycapsule_interface():
# "something" should rather by a PyCapsule with an ArrowSchema...
lyr.__arrow_c_stream__(requested_schema="something")

# Also test GetArrowArrayStreamInterface() to be able to specify options
stream = lyr.GetArrowArrayStreamInterface(
{"INCLUDE_FID": "NO"}
).__arrow_c_stream__()
assert stream
t = type(stream)
assert t.__module__ == "builtins"
assert t.__name__ == "PyCapsule"
del stream


###############################################################################

Expand Down
4 changes: 2 additions & 2 deletions swig/include/ogr.i
Original file line number Diff line number Diff line change
Expand Up @@ -1523,12 +1523,12 @@ public:

#ifdef SWIGPYTHON

PyObject* ExportArrowArrayStreamPyCapsule()
PyObject* ExportArrowArrayStreamPyCapsule(char** options = NULL)
{
struct ArrowArrayStream* stream =
(struct ArrowArrayStream*)CPLMalloc(sizeof(struct ArrowArrayStream));

const int success = OGR_L_GetArrowStream(self, stream, NULL);
const int success = OGR_L_GetArrowStream(self, stream, options);

PyObject* ret;
SWIG_PYTHON_THREAD_BEGIN_BLOCK;
Expand Down
55 changes: 55 additions & 0 deletions swig/include/python/ogr_python.i
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,9 @@ def ReleaseResultSet(self, sql_lyr):
Also note that only one active stream can be queried at a time for a
given layer.
To specify options how the ArrowStream should be generated, use
the GetArrowArrayStreamInterface(self, options) method
Parameters
----------
requested_schema : PyCapsule, default None
Expand All @@ -440,6 +443,58 @@ def ReleaseResultSet(self, sql_lyr):
return self.ExportArrowArrayStreamPyCapsule()


def GetArrowArrayStreamInterface(self, options = []):
"""
Return a proxy object that implements the __arrow_c_stream__() method,
but allows the user to pass options.
Parameters
----------
options : List of strings or dict with options such as INCLUDE_FID=NO, MAX_FEATURES_IN_BATCH=<number>, etc.
Returns
-------
a proxy object which implements the __arrow_c_stream__() method
"""

class ArrowArrayStreamInterface:
def __init__(self, lyr, options):
self.lyr = lyr
self.options = options

def __arrow_c_stream__(self, requested_schema=None):
"""
Export to a C ArrowArrayStream PyCapsule, according to
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html

Also note that only one active stream can be queried at a time for a
given layer.

To specify options how the ArrowStream should be generated, use
the GetArrowArrayStreamInterface(self, options) method

Parameters
----------
requested_schema : PyCapsule, default None
The schema to which the stream should be casted, passed as a
PyCapsule containing a C ArrowSchema representation of the
requested schema.
Currently, this is not supported and will raise a
NotImplementedError if the schema is not None

Returns
-------
PyCapsule
A capsule containing a C ArrowArrayStream struct.
"""
if requested_schema is not None:
raise NotImplementedError("requested_schema != None not implemented")

return self.lyr.ExportArrowArrayStreamPyCapsule(self.options)

return ArrowArrayStreamInterface(self, options)


def GetArrowStreamAsPyArrow(self, options = []):
""" Return an ArrowStream as PyArrow Schema and Array objects """

Expand Down

0 comments on commit e4f78be

Please sign in to comment.