From 11d57a9532fed73f5659fda46f57faad25865d30 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Fri, 19 Aug 2022 12:12:59 +0200 Subject: [PATCH 01/10] Better repr for Buffer, MemoryPool, NativeFile and Codec --- python/pyarrow/io.pxi | 25 ++++++++++++++++++++++++- python/pyarrow/memory.pxi | 6 ++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index d1d3feb3c175e..514d3a1b4688b 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -121,6 +121,14 @@ cdef class NativeFile(_Weakrefable): def __exit__(self, exc_type, exc_value, tb): self.close() + def __repr__(self): + name = f"{self.__class__.__module__}.{self.__class__.__name__}" + return (f"{name}(" + f"own_file={self.own_file}, " + f"is_seekable={self.is_seekable}, " + f"is_writable={self.is_writable}, " + f"is_readable={self.is_readable})") + @property def mode(self): """ @@ -1053,6 +1061,13 @@ cdef class Buffer(_Weakrefable): def __len__(self): return self.size + def __repr__(self): + name = f"{self.__class__.__module__}.{self.__class__.__name__}" + return (f"{name}(" + f"size={self.size}, " + f"is_cpu={self.is_cpu}, " + f"is_mutable={self.is_mutable})") + @property def size(self): """ @@ -1964,7 +1979,9 @@ cdef class Codec(_Weakrefable): @property def compression_level(self): """Returns the compression level parameter of the codec""" - return frombytes(self.unwrap().compression_level()) + if self.name == 'snappy': + return None + return self.unwrap().compression_level() def compress(self, object buf, asbytes=False, memory_pool=None): """ @@ -2080,6 +2097,12 @@ cdef class Codec(_Weakrefable): return pybuf if asbytes else out_buf + def __repr__(self): + name = f"{self.__class__.__module__}.{self.__class__.__name__}" + return (f"{name}(" + f"name={self.name}, " + f"compression_level={self.compression_level})") + def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): """ diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index 2258be78d5479..fb7acd33e1f98 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -76,6 +76,12 @@ cdef class MemoryPool(_Weakrefable): """ return frombytes(self.pool.backend_name()) + def __repr__(self): + name = f"{self.__class__.__module__}.{self.__class__.__name__}" + return (f"{name}(" + f"backend_name={self.backend_name}, " + f"bytes_allocated={self.bytes_allocated()}, " + f"max_memory={self.max_memory()})") cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool): if memory_pool is None: From f5e6f23e7f458e049260173dd9f2a0dbc3a5cc1c Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Mon, 22 Aug 2022 09:18:44 +0200 Subject: [PATCH 02/10] Add hex address to reprs --- python/pyarrow/io.pxi | 3 +++ python/pyarrow/memory.pxi | 1 + 2 files changed, 4 insertions(+) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 514d3a1b4688b..11d0e453cea23 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -124,6 +124,7 @@ cdef class NativeFile(_Weakrefable): def __repr__(self): name = f"{self.__class__.__module__}.{self.__class__.__name__}" return (f"{name}(" + f"{hex(id(self))}, " f"own_file={self.own_file}, " f"is_seekable={self.is_seekable}, " f"is_writable={self.is_writable}, " @@ -1064,6 +1065,7 @@ cdef class Buffer(_Weakrefable): def __repr__(self): name = f"{self.__class__.__module__}.{self.__class__.__name__}" return (f"{name}(" + f"{hex(id(self))}, " f"size={self.size}, " f"is_cpu={self.is_cpu}, " f"is_mutable={self.is_mutable})") @@ -2100,6 +2102,7 @@ cdef class Codec(_Weakrefable): def __repr__(self): name = f"{self.__class__.__module__}.{self.__class__.__name__}" return (f"{name}(" + f"{hex(id(self))}, " f"name={self.name}, " f"compression_level={self.compression_level})") diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index fb7acd33e1f98..2ee7440ae4714 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -79,6 +79,7 @@ cdef class MemoryPool(_Weakrefable): def __repr__(self): name = f"{self.__class__.__module__}.{self.__class__.__name__}" return (f"{name}(" + f"{hex(id(self))}, " f"backend_name={self.backend_name}, " f"bytes_allocated={self.bytes_allocated()}, " f"max_memory={self.max_memory()})") From 99257d32b29b5d15733bfd0dca9f0b5e8f1418f8 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Mon, 22 Aug 2022 14:14:31 +0200 Subject: [PATCH 03/10] Use hex(self.address) for Buffer --- python/pyarrow/io.pxi | 4 +--- python/pyarrow/memory.pxi | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 11d0e453cea23..02daca33d3bde 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -124,7 +124,6 @@ cdef class NativeFile(_Weakrefable): def __repr__(self): name = f"{self.__class__.__module__}.{self.__class__.__name__}" return (f"{name}(" - f"{hex(id(self))}, " f"own_file={self.own_file}, " f"is_seekable={self.is_seekable}, " f"is_writable={self.is_writable}, " @@ -1065,7 +1064,7 @@ cdef class Buffer(_Weakrefable): def __repr__(self): name = f"{self.__class__.__module__}.{self.__class__.__name__}" return (f"{name}(" - f"{hex(id(self))}, " + f"address={hex(self.address)}, " f"size={self.size}, " f"is_cpu={self.is_cpu}, " f"is_mutable={self.is_mutable})") @@ -2102,7 +2101,6 @@ cdef class Codec(_Weakrefable): def __repr__(self): name = f"{self.__class__.__module__}.{self.__class__.__name__}" return (f"{name}(" - f"{hex(id(self))}, " f"name={self.name}, " f"compression_level={self.compression_level})") diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index 2ee7440ae4714..fb7acd33e1f98 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -79,7 +79,6 @@ cdef class MemoryPool(_Weakrefable): def __repr__(self): name = f"{self.__class__.__module__}.{self.__class__.__name__}" return (f"{name}(" - f"{hex(id(self))}, " f"backend_name={self.backend_name}, " f"bytes_allocated={self.bytes_allocated()}, " f"max_memory={self.max_memory()})") From 76f924e2faa39462661ee6df8001b4f114ae6f78 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Mon, 22 Aug 2022 16:15:01 +0200 Subject: [PATCH 04/10] Fix cython doc tests --- python/pyarrow/table.pxi | 2 +- python/pyarrow/types.pxi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 5d84716fc9824..a4233a13f11bd 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -2013,7 +2013,7 @@ cdef class RecordBatch(_PandasConvertible): >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], ... names=["n_legs", "animals"]) >>> batch.serialize() - + pyarrow.lib.Buffer(address=..., size=..., is_cpu=True, is_mutable=True) """ cdef shared_ptr[CBuffer] buffer cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults() diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 8407f95c984c3..cb53fb4e79226 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -2065,7 +2065,7 @@ cdef class Schema(_Weakrefable): Write schema to Buffer: >>> schema.serialize() - + pyarrow.lib.Buffer(address=..., size=..., is_cpu=True, is_mutable=True) """ cdef: shared_ptr[CBuffer] buffer From febd2efa417feba35b57ca41481b18c0992fd9c8 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 23 Aug 2022 11:01:02 +0200 Subject: [PATCH 05/10] Update reprs appearance --- python/pyarrow/io.pxi | 32 ++++++++++++++++---------------- python/pyarrow/memory.pxi | 10 +++++----- python/pyarrow/table.pxi | 2 +- python/pyarrow/types.pxi | 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 02daca33d3bde..b576fc8ece745 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -122,12 +122,12 @@ cdef class NativeFile(_Weakrefable): self.close() def __repr__(self): - name = f"{self.__class__.__module__}.{self.__class__.__name__}" - return (f"{name}(" - f"own_file={self.own_file}, " - f"is_seekable={self.is_seekable}, " - f"is_writable={self.is_writable}, " - f"is_readable={self.is_readable})") + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"own_file={self.own_file} " + f"is_seekable={self.is_seekable} " + f"is_writable={self.is_writable} " + f"is_readable={self.is_readable}>") @property def mode(self): @@ -1062,12 +1062,12 @@ cdef class Buffer(_Weakrefable): return self.size def __repr__(self): - name = f"{self.__class__.__module__}.{self.__class__.__name__}" - return (f"{name}(" - f"address={hex(self.address)}, " - f"size={self.size}, " - f"is_cpu={self.is_cpu}, " - f"is_mutable={self.is_mutable})") + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"address={hex(self.address)} " + f"size={self.size} " + f"is_cpu={self.is_cpu} " + f"is_mutable={self.is_mutable}>") @property def size(self): @@ -2099,10 +2099,10 @@ cdef class Codec(_Weakrefable): return pybuf if asbytes else out_buf def __repr__(self): - name = f"{self.__class__.__module__}.{self.__class__.__name__}" - return (f"{name}(" - f"name={self.name}, " - f"compression_level={self.compression_level})") + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"name={self.name} " + f"compression_level={self.compression_level}>") def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index fb7acd33e1f98..8e95f2a02423c 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -77,11 +77,11 @@ cdef class MemoryPool(_Weakrefable): return frombytes(self.pool.backend_name()) def __repr__(self): - name = f"{self.__class__.__module__}.{self.__class__.__name__}" - return (f"{name}(" - f"backend_name={self.backend_name}, " - f"bytes_allocated={self.bytes_allocated()}, " - f"max_memory={self.max_memory()})") + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"backend_name={self.backend_name} " + f"bytes_allocated={self.bytes_allocated()} " + f"max_memory={self.max_memory()}>") cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool): if memory_pool is None: diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index a4233a13f11bd..be3a7deaef7bc 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -2013,7 +2013,7 @@ cdef class RecordBatch(_PandasConvertible): >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], ... names=["n_legs", "animals"]) >>> batch.serialize() - pyarrow.lib.Buffer(address=..., size=..., is_cpu=True, is_mutable=True) + """ cdef shared_ptr[CBuffer] buffer cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults() diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index cb53fb4e79226..d263e96c65197 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -2065,7 +2065,7 @@ cdef class Schema(_Weakrefable): Write schema to Buffer: >>> schema.serialize() - pyarrow.lib.Buffer(address=..., size=..., is_cpu=True, is_mutable=True) + """ cdef: shared_ptr[CBuffer] buffer From 6b49d22ada830b43c94260678a31eca8769de4b6 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 23 Aug 2022 11:07:00 +0200 Subject: [PATCH 06/10] Test Codec compression_level attr --- python/pyarrow/tests/test_io.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index ca49c5218e88b..a6488d70df53e 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -719,6 +719,12 @@ def test_compression_level(compression): if not Codec.is_available(compression): pytest.skip("{} support is not built".format(compression)) + codec = Codec(compression) + if codec.name == "snappy": + assert codec.compression_level is None + else: + assert isinstance(codec.compression_level, int) + # These codecs do not support a compression level no_level = ['snappy'] if compression in no_level: From eed7303948245599d498cbd3bfa2e157617346a5 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 23 Aug 2022 11:19:29 +0200 Subject: [PATCH 07/10] Doctest for Codec --- python/pyarrow/io.pxi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index b576fc8ece745..a423e2e7fe700 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1859,6 +1859,17 @@ cdef class Codec(_Weakrefable): ------ ValueError If invalid compression value is passed. + + Examples + -------- + >>> import pyarrow as pa + >>> pa.Codec.is_available('gzip') + True + >>> codec = pa.Codec('gzip') + >>> codec.name + 'gzip' + >>> codec.compression_level + 9 """ def __init__(self, str compression not None, compression_level=None): From ae55f755a259b9079cb306fe224c98bcd93829b4 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 23 Aug 2022 11:26:13 +0200 Subject: [PATCH 08/10] Doctest for MemoryPool repr --- python/pyarrow/memory.pxi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index 8e95f2a02423c..f68360a5142cc 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -124,6 +124,11 @@ cdef class ProxyMemoryPool(MemoryPool): def default_memory_pool(): """ Return the process-global memory pool. + + Examples + -------- + >>> default_memory_pool() + """ cdef: MemoryPool pool = MemoryPool.__new__(MemoryPool) From 889881e40cfd5886547c0e97c5f067c986a09508 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 23 Aug 2022 12:05:10 +0200 Subject: [PATCH 09/10] Add closed to NativeFile and Doctest --- python/pyarrow/io.pxi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index a423e2e7fe700..96d8f4c241240 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -124,6 +124,7 @@ cdef class NativeFile(_Weakrefable): def __repr__(self): name = f"pyarrow.{self.__class__.__name__}" return (f"<{name} " + f"closed={self.closed} " f"own_file={self.own_file} " f"is_seekable={self.is_seekable} " f"is_writable={self.is_writable} " @@ -774,6 +775,13 @@ cdef class PythonFile(NativeFile): As a downside, there is a non-zero redirection cost in translating Arrow stream calls to Python method calls. Furthermore, Python's Global Interpreter Lock may limit parallelism in some situations. + + Examples + -------- + >>> import io + >>> import pyarrow as pa + >>> pa.PythonFile(io.BytesIO()) + """ cdef: object handle From 440d64766ceea938afbedbf6a9727aef8195e2d1 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Wed, 24 Aug 2022 10:02:41 +0200 Subject: [PATCH 10/10] Fix whitepacing in lints --- python/pyarrow/io.pxi | 2 +- python/pyarrow/memory.pxi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 96d8f4c241240..3dd60735c3cc8 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -775,7 +775,7 @@ cdef class PythonFile(NativeFile): As a downside, there is a non-zero redirection cost in translating Arrow stream calls to Python method calls. Furthermore, Python's Global Interpreter Lock may limit parallelism in some situations. - + Examples -------- >>> import io diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index f68360a5142cc..1ddcb01ccb6ab 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -124,7 +124,7 @@ cdef class ProxyMemoryPool(MemoryPool): def default_memory_pool(): """ Return the process-global memory pool. - + Examples -------- >>> default_memory_pool()