diff --git a/autotest/ogr/ogr_mem.py b/autotest/ogr/ogr_mem.py index 077363a20bf8..2875a8fce028 100755 --- a/autotest/ogr/ogr_mem.py +++ b/autotest/ogr/ogr_mem.py @@ -746,6 +746,16 @@ def test_ogr_mem_arrow_stream_pycapsule_interface(): # "something" should rather by a PyCapsule with an ArrowSchema... lyr.__arrow_c_stream__(requested_schema="something") + # Also test GetArrowArrayStreamInterface() to be able to specify options + stream = lyr.GetArrowArrayStreamInterface( + {"INCLUDE_FID": "NO"} + ).__arrow_c_stream__() + assert stream + t = type(stream) + assert t.__module__ == "builtins" + assert t.__name__ == "PyCapsule" + del stream + ############################################################################### diff --git a/swig/include/ogr.i b/swig/include/ogr.i index 816097a12612..19b4de6e2667 100644 --- a/swig/include/ogr.i +++ b/swig/include/ogr.i @@ -1523,12 +1523,12 @@ public: #ifdef SWIGPYTHON - PyObject* ExportArrowArrayStreamPyCapsule() + PyObject* ExportArrowArrayStreamPyCapsule(char** options = NULL) { struct ArrowArrayStream* stream = (struct ArrowArrayStream*)CPLMalloc(sizeof(struct ArrowArrayStream)); - const int success = OGR_L_GetArrowStream(self, stream, NULL); + const int success = OGR_L_GetArrowStream(self, stream, options); PyObject* ret; SWIG_PYTHON_THREAD_BEGIN_BLOCK; diff --git a/swig/include/python/ogr_python.i b/swig/include/python/ogr_python.i index 773e391578de..bf27eaba9526 100644 --- a/swig/include/python/ogr_python.i +++ b/swig/include/python/ogr_python.i @@ -419,6 +419,9 @@ def ReleaseResultSet(self, sql_lyr): Also note that only one active stream can be queried at a time for a given layer. + To specify options how the ArrowStream should be generated, use + the GetArrowArrayStreamInterface(self, options) method + Parameters ---------- requested_schema : PyCapsule, default None @@ -440,6 +443,58 @@ def ReleaseResultSet(self, sql_lyr): return self.ExportArrowArrayStreamPyCapsule() + def GetArrowArrayStreamInterface(self, options = []): + """ + Return a proxy object that implements the __arrow_c_stream__() method, + but allows the user to pass options. + + Parameters + ---------- + options : List of strings or dict with options such as INCLUDE_FID=NO, MAX_FEATURES_IN_BATCH=, etc. + + Returns + ------- + a proxy object which implements the __arrow_c_stream__() method + """ + + class ArrowArrayStreamInterface: + def __init__(self, lyr, options): + self.lyr = lyr + self.options = options + + def __arrow_c_stream__(self, requested_schema=None): + """ + Export to a C ArrowArrayStream PyCapsule, according to + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + Also note that only one active stream can be queried at a time for a + given layer. + + To specify options how the ArrowStream should be generated, use + the GetArrowArrayStreamInterface(self, options) method + + Parameters + ---------- + requested_schema : PyCapsule, default None + The schema to which the stream should be casted, passed as a + PyCapsule containing a C ArrowSchema representation of the + requested schema. + Currently, this is not supported and will raise a + NotImplementedError if the schema is not None + + Returns + ------- + PyCapsule + A capsule containing a C ArrowArrayStream struct. + """ + if requested_schema is not None: + raise NotImplementedError("requested_schema != None not implemented") + + return self.lyr.ExportArrowArrayStreamPyCapsule(self.options) + + return ArrowArrayStreamInterface(self, options) + + def GetArrowStreamAsPyArrow(self, options = []): """ Return an ArrowStream as PyArrow Schema and Array objects """