diff --git a/autotest/ogr/ogr_mem.py b/autotest/ogr/ogr_mem.py index e144ab0412e4..1e2acf703a66 100755 --- a/autotest/ogr/ogr_mem.py +++ b/autotest/ogr/ogr_mem.py @@ -709,6 +709,39 @@ def test_ogr_mem_alter_geom_field_defn(): assert lyr.GetSpatialRef() is None +############################################################################### +# Test ogr.Layer.__arrow_c_stream__() interface. +# Cf https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + +@gdaltest.enable_exceptions() +def test_ogr_mem_arrow_stream_pycapsule_interface(): + + ds = ogr.GetDriverByName("Memory").CreateDataSource("") + lyr = ds.CreateLayer("foo") + + stream = lyr.__arrow_c_stream__() + assert stream + t = type(stream) + assert t.__module__ == "builtins" + assert t.__name__ == "PyCapsule" + + with pytest.raises( + Exception, match="An arrow Arrow Stream is in progress on that layer" + ): + lyr.__arrow_c_stream__() + + del stream + + stream = lyr.__arrow_c_stream__() + assert stream + del stream + + with pytest.raises(Exception, match="requested_schema != None not implemented"): + # "something" should rather by a PyCapsule with an ArrowSchema... + lyr.__arrow_c_stream__(requested_schema="something") + + ############################################################################### diff --git a/swig/include/ogr.i b/swig/include/ogr.i index bfefba922e34..bda071c04bd4 100644 --- a/swig/include/ogr.i +++ b/swig/include/ogr.i @@ -1143,6 +1143,22 @@ public: }; /* class ArrowArrayStream */ #endif +#ifdef SWIGPYTHON +// Implements __arrow_c_stream__ export interface: +// https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#create-a-pycapsule +%{ +static void ReleaseArrowArrayStreamPyCapsule(PyObject* capsule) { + struct ArrowArrayStream* stream = + (struct ArrowArrayStream*)PyCapsule_GetPointer(capsule, "arrow_array_stream"); + if (stream->release != NULL) { + stream->release(stream); + } + CPLFree(stream); +} +%} + +#endif + /************************************************************************/ /* OGRLayer */ /************************************************************************/ @@ -1507,6 +1523,30 @@ public: #ifdef SWIGPYTHON + PyObject* ExportArrowArrayStreamPyCapsule() + { + struct ArrowArrayStream* stream = + (struct ArrowArrayStream*)CPLMalloc(sizeof(struct ArrowArrayStream)); + + PyObject* ret; + SWIG_PYTHON_THREAD_BEGIN_BLOCK; + + if( OGR_L_GetArrowStream(self, stream, NULL) ) + { + ret = PyCapsule_New(stream, "arrow_array_stream", ReleaseArrowArrayStreamPyCapsule); + } + else + { + CPLFree(stream); + Py_INCREF(Py_None); + ret = Py_None; + } + + SWIG_PYTHON_THREAD_END_BLOCK; + + return ret; + } + %newobject GetArrowStream; ArrowArrayStream* GetArrowStream(char** options = NULL) { struct ArrowArrayStream* stream = (struct ArrowArrayStream* )malloc(sizeof(struct ArrowArrayStream)); diff --git a/swig/include/python/ogr_python.i b/swig/include/python/ogr_python.i index 2129e8baac3b..773e391578de 100644 --- a/swig/include/python/ogr_python.i +++ b/swig/include/python/ogr_python.i @@ -411,6 +411,35 @@ def ReleaseResultSet(self, sql_lyr): schema = property(schema) + def __arrow_c_stream__(self, requested_schema=None): + """ + Export to a C ArrowArrayStream PyCapsule, according to + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + Also note that only one active stream can be queried at a time for a + given layer. + + Parameters + ---------- + requested_schema : PyCapsule, default None + The schema to which the stream should be casted, passed as a + PyCapsule containing a C ArrowSchema representation of the + requested schema. + Currently, this is not supported and will raise a + NotImplementedError if the schema is not None + + Returns + ------- + PyCapsule + A capsule containing a C ArrowArrayStream struct. + """ + + if requested_schema is not None: + raise NotImplementedError("requested_schema != None not implemented") + + return self.ExportArrowArrayStreamPyCapsule() + + def GetArrowStreamAsPyArrow(self, options = []): """ Return an ArrowStream as PyArrow Schema and Array objects """