Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug/update libcudf to handle arrow12 changes #13794

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,14 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB

rapids_cpm_find(
Arrow ${VERSION}
GLOBAL_TARGETS arrow_shared parquet_shared arrow_dataset_shared arrow_static parquet_static
arrow_dataset_static
GLOBAL_TARGETS arrow_shared parquet_shared arrow_acero_shared arrow_dataset_shared arrow_static
parquet_static arrow_acero_static arrow_dataset_static
CPM_ARGS
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG apache-arrow-${VERSION}
GIT_SHALLOW TRUE SOURCE_SUBDIR cpp
OPTIONS "CMAKE_VERBOSE_MAKEFILE ON"
"ARROW_ACERO ON"
"ARROW_IPC ON"
"ARROW_DATASET ON"
"ARROW_WITH_BACKTRACE ON"
Expand Down Expand Up @@ -221,7 +222,8 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
# Set this to enable `find_package(Parquet)`
set(Parquet_DIR "${Arrow_DIR}")
endif()
# Set this to enable `find_package(ArrowDataset)`
# Set this to enable `find_package(ArrowDataset)`. This will call find_package(ArrowAcero) for
# us
set(ArrowDataset_DIR "${Arrow_DIR}")
find_package(ArrowDataset REQUIRED QUIET)
endif()
Expand Down Expand Up @@ -314,6 +316,26 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB

if(ENABLE_PARQUET)

set(arrow_acero_code_string
[=[
if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared))
add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared)
endif()
if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static))
add_library(arrow_acero_static ALIAS cudf::arrow_acero_static)
endif()
]=]
)

rapids_export(
BUILD ArrowAcero
VERSION ${VERSION}
EXPORT_SET arrow_acero_targets
GLOBAL_TARGETS arrow_acero_shared arrow_acero_static
NAMESPACE cudf::
FINAL_CODE_BLOCK arrow_acero_code_string
)

set(arrow_dataset_code_string
[=[
if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared))
Expand Down
14 changes: 13 additions & 1 deletion cpp/tests/io/arrow_io_source_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,19 @@ TEST_F(ArrowIOTest, S3FileSystem)
ASSERT_EQ(1, tbl.tbl->num_columns()); // Only single column specified in reader_options
ASSERT_EQ(244, tbl.tbl->num_rows()); // known number of rows from the S3 file
}
CUDF_EXPECTS(arrow::fs::EnsureS3Finalized().ok(), "Failed to finalize s3 filesystem");
if (!s3_unsupported) {
// Verify that we are using Arrow with S3, and call finalize
// https://github.com/apache/arrow/issues/36974
// This needs to be in a separate conditional to ensure we call
// finalize after all arrow_io_source instances have been deleted.
void* whole_app = dlopen(NULL, RTLD_LAZY);
decltype(arrow::fs::EnsureS3Finalized)* close_s3_func = nullptr;

close_s3_func = reinterpret_cast<decltype(close_s3_func)>(
dlsym(whole_app, "_ZN5arrow2fs17EnsureS3FinalizedEv"));
if (close_s3_func) { CUDF_EXPECTS(close_s3_func().ok(), "Failed to finalize s3 filesystem"); }
dlclose(whole_app);
}
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
}

CUDF_TEST_PROGRAM_MAIN()