From 66e28a20b15a91ed5ba836a0fe2c366cc36812c8 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Wed, 8 Jan 2025 17:23:45 +0100 Subject: [PATCH 1/9] arrow ~= 18.0 --- .github/workflows/python.yml | 2 +- python/pyproject.toml | 7 ++++--- python/requirements.txt | 3 ++- python/test/test_palletjack.py | 4 ++++ 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index bcda68a..0ce121b 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -136,7 +136,7 @@ jobs: run: python -m cibuildwheel --output-dir dist # to supply options, put them in 'env', like: env: - CIBW_REPAIR_WHEEL_COMMAND_LINUX: auditwheel repair --exclude libarrow.so.1700 --exclude libparquet.so.1700 -w {dest_dir} {wheel} + CIBW_REPAIR_WHEEL_COMMAND_LINUX: auditwheel repair --exclude libarrow.so.1801 --exclude libparquet.so.1801 -w {dest_dir} {wheel} CIBW_ENVIRONMENT: VCPKG_TARGET_TRIPLET="${{ steps.vcpkg-info.outputs.triplet }}" CIBW_BUILD_VERBOSITY: 1 # We use manylinux_2_28 for ABI compatibility with pyarrow diff --git a/python/pyproject.toml b/python/pyproject.toml index 2b2f11b..7eca6db 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -2,7 +2,8 @@ requires = [ "setuptools>=55.0", "Cython>=3", - "pyarrow~=17.0", + "numpy", + "pyarrow~=18.0", "thrift", ] @@ -10,7 +11,7 @@ build-backend = "setuptools.build_meta" [project] name = "palletjack" -version = "2.3.1" +version = "2.4.0" description = "Faster parquet metadata reading" readme = "README.md" requires-python = ">=3.9" @@ -20,7 +21,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "pyarrow~=17.0", + "pyarrow~=18.0", ] [tool.setuptools.packages.find] diff --git a/python/requirements.txt b/python/requirements.txt index 382afb7..984d0ca 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,3 +1,4 @@ setuptools>=55.0 Cython>=3 -pyarrow~=16.0 +pyarrow~=18.0 +numpy diff --git a/python/test/test_palletjack.py b/python/test/test_palletjack.py index 7ea1c04..e1c1a9d 100644 --- a/python/test/test_palletjack.py +++ b/python/test/test_palletjack.py @@ -33,6 +33,10 @@ class TestPalletJack(unittest.TestCase): def test_read_metadata_columns_rows(self): def validate_reading(parquet_path, index_path, row_groups, column_indices): + + # Passing empty list to the read_row_groups method is an invalid operation since Arrow 18.0 + if (len(row_groups) == 0): return + # Reading using the original metadata pr = pq.ParquetReader() pr.open(parquet_path) From ce4dd5e2e10a54b581c939f34b7dc922aeefd736 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Wed, 8 Jan 2025 17:51:54 +0100 Subject: [PATCH 2/9] explicit numpy --- python/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 7eca6db..04b421a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -2,7 +2,7 @@ requires = [ "setuptools>=55.0", "Cython>=3", - "numpy", + "numpy>=1.16.6", "pyarrow~=18.0", "thrift", ] @@ -22,6 +22,7 @@ classifiers = [ ] dependencies = [ "pyarrow~=18.0", + "numpy>=1.16.6", ] [tool.setuptools.packages.find] From 47c0e0e5b73015199bd325f8a0f8846125b56611 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Wed, 8 Jan 2025 18:03:10 +0100 Subject: [PATCH 3/9] is numpy necessary? --- python/pyproject.toml | 2 -- python/setup.py | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 04b421a..321c21b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -2,7 +2,6 @@ requires = [ "setuptools>=55.0", "Cython>=3", - "numpy>=1.16.6", "pyarrow~=18.0", "thrift", ] @@ -22,7 +21,6 @@ classifiers = [ ] dependencies = [ "pyarrow~=18.0", - "numpy>=1.16.6", ] [tool.setuptools.packages.find] diff --git a/python/setup.py b/python/setup.py index 7cccf3c..f1f3918 100644 --- a/python/setup.py +++ b/python/setup.py @@ -7,7 +7,6 @@ from distutils.extension import Extension from Cython.Build import cythonize import pyarrow -import numpy # https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#distributing-cython-modules def no_cythonize(extensions, **_ignore): @@ -26,7 +25,7 @@ def no_cythonize(extensions, **_ignore): return extensions vcpkg_installed = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'vcpkg_installed', os.getenv('VCPKG_TARGET_TRIPLET', '')) -include_dirs = [os.path.join(vcpkg_installed, 'include'), pyarrow.get_include(), numpy.get_include()] +include_dirs = [os.path.join(vcpkg_installed, 'include'), pyarrow.get_include()] library_dirs = [os.path.join(vcpkg_installed, 'lib')] + pyarrow.get_library_dirs() print ("VCPKG_ROOT=", vcpkg_installed) From bfb6caa3d01d1f941b343724be7f7348f8829a87 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Wed, 8 Jan 2025 22:37:38 +0100 Subject: [PATCH 4/9] Revert "is numpy necessary?" This reverts commit 47c0e0e5b73015199bd325f8a0f8846125b56611. --- python/pyproject.toml | 2 ++ python/setup.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 321c21b..04b421a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -2,6 +2,7 @@ requires = [ "setuptools>=55.0", "Cython>=3", + "numpy>=1.16.6", "pyarrow~=18.0", "thrift", ] @@ -21,6 +22,7 @@ classifiers = [ ] dependencies = [ "pyarrow~=18.0", + "numpy>=1.16.6", ] [tool.setuptools.packages.find] diff --git a/python/setup.py b/python/setup.py index f1f3918..7cccf3c 100644 --- a/python/setup.py +++ b/python/setup.py @@ -7,6 +7,7 @@ from distutils.extension import Extension from Cython.Build import cythonize import pyarrow +import numpy # https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#distributing-cython-modules def no_cythonize(extensions, **_ignore): @@ -25,7 +26,7 @@ def no_cythonize(extensions, **_ignore): return extensions vcpkg_installed = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'vcpkg_installed', os.getenv('VCPKG_TARGET_TRIPLET', '')) -include_dirs = [os.path.join(vcpkg_installed, 'include'), pyarrow.get_include()] +include_dirs = [os.path.join(vcpkg_installed, 'include'), pyarrow.get_include(), numpy.get_include()] library_dirs = [os.path.join(vcpkg_installed, 'lib')] + pyarrow.get_library_dirs() print ("VCPKG_ROOT=", vcpkg_installed) From f793328f541a181980b23fa1795c46425895ca44 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Wed, 8 Jan 2025 22:40:56 +0100 Subject: [PATCH 5/9] grammar --- python/test/test_palletjack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/test/test_palletjack.py b/python/test/test_palletjack.py index e1c1a9d..be42c32 100644 --- a/python/test/test_palletjack.py +++ b/python/test/test_palletjack.py @@ -34,7 +34,7 @@ def test_read_metadata_columns_rows(self): def validate_reading(parquet_path, index_path, row_groups, column_indices): - # Passing empty list to the read_row_groups method is an invalid operation since Arrow 18.0 + # Passing an empty list to the read_row_groups method is an invalid operation since Arrow 18.0. if (len(row_groups) == 0): return # Reading using the original metadata From 857cd72b46c1724e970ae0407e2d618a857bdedc Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Thu, 9 Jan 2025 08:50:35 +0100 Subject: [PATCH 6/9] try removing unnecessary dependencies --- python/pyproject.toml | 1 - python/requirements.txt | 3 --- 2 files changed, 4 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 04b421a..d4b0b9c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -22,7 +22,6 @@ classifiers = [ ] dependencies = [ "pyarrow~=18.0", - "numpy>=1.16.6", ] [tool.setuptools.packages.find] diff --git a/python/requirements.txt b/python/requirements.txt index 984d0ca..24ce15a 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,4 +1 @@ -setuptools>=55.0 -Cython>=3 -pyarrow~=18.0 numpy From dda314cbcac50b016942cd563d3725fc7e219fc6 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Thu, 9 Jan 2025 08:55:47 +0100 Subject: [PATCH 7/9] install requirements --- .github/workflows/python.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 0ce121b..b609bf3 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -181,6 +181,7 @@ jobs: - name: Test with pytest run: | + pip install -r requirements.txt # Keep in mind that if the local and remote versions are the same, the remote version will be installed pip install PalletJack --pre --find-links ./dist --break-system-packages --only-binary=:all: # So now ensure that the local version is installed From ac21376e7c8b6926a25cbe5eca261363a644fc78 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Thu, 9 Jan 2025 09:10:57 +0100 Subject: [PATCH 8/9] python/requirements.txt --- .github/workflows/python.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index b609bf3..7669331 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -181,7 +181,7 @@ jobs: - name: Test with pytest run: | - pip install -r requirements.txt + pip install -r python/requirements.txt # Keep in mind that if the local and remote versions are the same, the remote version will be installed pip install PalletJack --pre --find-links ./dist --break-system-packages --only-binary=:all: # So now ensure that the local version is installed @@ -231,6 +231,7 @@ jobs: - name: Run benchmarks run: | + pip install -r python/requirements.txt # Keep in mind that if the local and remote versions are the same, the remote version will be installed pip install PalletJack --pre --find-links ./dist --break-system-packages --only-binary=:all: # So now ensure that the local version is installed From 8986e2718433b2307907c418c7f79ac70216fb19 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Thu, 9 Jan 2025 09:32:26 +0100 Subject: [PATCH 9/9] use a correct version specifier --- python/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index d4b0b9c..6c41e89 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "setuptools>=55.0", "Cython>=3", "numpy>=1.16.6", - "pyarrow~=18.0", + "pyarrow~=18.1.0", "thrift", ] @@ -21,7 +21,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "pyarrow~=18.0", + "pyarrow~=18.1.0", ] [tool.setuptools.packages.find]