From 51970e066e69ab01f9bdcc81219781ae07b9799b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 22 Dec 2023 02:06:50 +0100 Subject: [PATCH] GH-39006: [Python] Extract libparquet requirements out of libarrow_python.so to new libarrow_python_parquet_encryption.so (#39316) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change If I build pyarrow with everything and then I remove some of the Arrow CPP .so in order to have a minimal build I can't import pyarrow because it requires libarrow and libparquet. This is relevant in order to have a minimal build for Conda. Please see the related issue for more information. ### What changes are included in this PR? Move libarrow parquet encryption for pyarrow to its own shared object. ### Are these changes tested? I will run extensive CI with extra python archery tests. ### Are there any user-facing changes? No, and yes :) There will be a new .so on pyarrow but shouldn't be relevant in my opinion. * Closes: #39006 Lead-authored-by: Raúl Cumplido Co-authored-by: Antoine Pitrou Signed-off-by: Sutou Kouhei --- ci/scripts/python_test.sh | 2 + ci/scripts/python_wheel_unix_test.sh | 1 + ci/scripts/python_wheel_windows_test.bat | 1 + python/CMakeLists.txt | 38 ++++++++++--------- .../src/arrow/python/parquet_encryption.h | 33 +++++++++++++--- 5 files changed, 53 insertions(+), 22 deletions(-) diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index 8d818346faa6e..341c2dd0577ef 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -45,6 +45,7 @@ export ARROW_DEBUG_MEMORY_POOL=trap : ${PYARROW_TEST_HDFS:=${ARROW_HDFS:-ON}} : ${PYARROW_TEST_ORC:=${ARROW_ORC:-ON}} : ${PYARROW_TEST_PARQUET:=${ARROW_PARQUET:-ON}} +: ${PYARROW_TEST_PARQUET_ENCRYPTION:=${PARQUET_REQUIRE_ENCRYPTION:-ON}} : ${PYARROW_TEST_S3:=${ARROW_S3:-ON}} export PYARROW_TEST_ACERO @@ -56,6 +57,7 @@ export PYARROW_TEST_GCS export PYARROW_TEST_HDFS export PYARROW_TEST_ORC export PYARROW_TEST_PARQUET +export PYARROW_TEST_PARQUET_ENCRYPTION export PYARROW_TEST_S3 # Testing PyArrow diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh index a6cc3bb7b29b7..01250ff7ef40c 100755 --- a/ci/scripts/python_wheel_unix_test.sh +++ b/ci/scripts/python_wheel_unix_test.sh @@ -46,6 +46,7 @@ export PYARROW_TEST_HDFS=ON export PYARROW_TEST_ORC=ON export PYARROW_TEST_PANDAS=ON export PYARROW_TEST_PARQUET=ON +export PYARROW_TEST_PARQUET_ENCRYPTION=ON export PYARROW_TEST_SUBSTRAIT=${ARROW_SUBSTRAIT} export PYARROW_TEST_S3=${ARROW_S3} export PYARROW_TEST_TENSORFLOW=ON diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat index c73b0cfd1b9bd..b14bfddfb36d3 100755 --- a/ci/scripts/python_wheel_windows_test.bat +++ b/ci/scripts/python_wheel_windows_test.bat @@ -26,6 +26,7 @@ set PYARROW_TEST_GCS=ON set PYARROW_TEST_HDFS=ON set PYARROW_TEST_ORC=OFF set PYARROW_TEST_PARQUET=ON +set PYARROW_TEST_PARQUET_ENCRYPTION=ON set PYARROW_TEST_SUBSTRAIT=ON set PYARROW_TEST_S3=OFF set PYARROW_TEST_TENSORFLOW=ON diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 3f810d27271e5..2df1e67b9f4c7 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -332,22 +332,6 @@ if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION) find_package(Parquet REQUIRED) endif() -if(PYARROW_BUILD_PARQUET_ENCRYPTION) - if(PARQUET_REQUIRE_ENCRYPTION) - list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc) - if(ARROW_BUILD_SHARED) - list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_shared) - else() - list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_static) - endif() - message(STATUS "Parquet Encryption Enabled") - else() - message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON") - endif() -else() - message(STATUS "Parquet Encryption is NOT Enabled") -endif() - if(PYARROW_BUILD_HDFS) if(NOT ARROW_HDFS) message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") @@ -391,6 +375,26 @@ install(TARGETS arrow_python LIBRARY DESTINATION . RUNTIME DESTINATION .) +set(PYARROW_CPP_ENCRYPTION_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc) +if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION) + message(STATUS "Parquet Encryption is NOT Enabled") +else() + if(PARQUET_REQUIRE_ENCRYPTION) + add_library(arrow_python_parquet_encryption SHARED ${PYARROW_CPP_ENCRYPTION_SRCS}) + target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python + ${PARQUET_LINK_LIBS}) + target_compile_definitions(arrow_python_parquet_encryption + PRIVATE ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING) + install(TARGETS arrow_python_parquet_encryption + ARCHIVE DESTINATION . + LIBRARY DESTINATION . + RUNTIME DESTINATION .) + message(STATUS "Parquet Encryption Enabled") + else() + message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON") + endif() +endif() + set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc) if(PYARROW_BUILD_FLIGHT) if(NOT ARROW_FLIGHT) @@ -814,6 +818,6 @@ endif() if(PYARROW_BUILD_PARQUET) target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS}) if(PYARROW_BUILD_PARQUET_ENCRYPTION) - target_link_libraries(_parquet_encryption PRIVATE ${PARQUET_LINK_LIBS}) + target_link_libraries(_parquet_encryption PRIVATE arrow_python_parquet_encryption) endif() endif() diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h index 23ee478348ecd..a1aaa30e260f5 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.h +++ b/python/pyarrow/src/arrow/python/parquet_encryption.h @@ -26,6 +26,27 @@ #include "parquet/encryption/kms_client.h" #include "parquet/encryption/kms_client_factory.h" +#if defined(_WIN32) || defined(__CYGWIN__) // Windows +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#else +#pragma GCC diagnostic ignored "-Wattributes" +#endif + +#ifdef ARROW_PYTHON_STATIC +#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT +#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING) +#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport) +#else +#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport) +#endif + +#else // Not Windows +#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT +#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default"))) +#endif +#endif // Non-Windows + namespace arrow { namespace py { namespace parquet { @@ -33,7 +54,7 @@ namespace encryption { /// \brief A table of function pointers for calling from C++ into /// Python. -class ARROW_PYTHON_EXPORT PyKmsClientVtable { +class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable { public: std::function @@ -44,7 +65,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientVtable { }; /// \brief A helper for KmsClient implementation in Python. -class ARROW_PYTHON_EXPORT PyKmsClient : public ::parquet::encryption::KmsClient { +class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClient + : public ::parquet::encryption::KmsClient { public: PyKmsClient(PyObject* handler, PyKmsClientVtable vtable); ~PyKmsClient() override; @@ -62,7 +84,7 @@ class ARROW_PYTHON_EXPORT PyKmsClient : public ::parquet::encryption::KmsClient /// \brief A table of function pointers for calling from C++ into /// Python. -class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable { +class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactoryVtable { public: std::function> SafeGetFileEncryptionProperties(