diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index fd23e0cf217e6..b2f7fbc2cb4a4 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -409,12 +409,10 @@ jobs: ARROW_WITH_SNAPPY: ON ARROW_WITH_ZLIB: ON ARROW_WITH_ZSTD: ON - # Don't use preinstalled Boost by empty BOOST_ROOT and - # -DBoost_NO_BOOST_CMAKE=ON + # Don't use preinstalled Boost by empty BOOST_ROOT BOOST_ROOT: "" ARROW_CMAKE_ARGS: >- -DARROW_PACKAGE_PREFIX=/${{ matrix.msystem_lower}} - -DBoost_NO_BOOST_CMAKE=ON -DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON # We can't use unity build because we don't have enough memory on # GitHub Actions. diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index e4d650e74a8ad..4b74b8d7fc84d 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -406,7 +406,10 @@ jobs: -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" - name: Build C++ vcpkg dependencies run: | - vcpkg\vcpkg.exe install --triplet $env:VCPKG_TRIPLET --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed + vcpkg\vcpkg.exe install ` + --triplet $env:VCPKG_TRIPLET ` + --x-manifest-root cpp ` + --x-install-root build\cpp\vcpkg_installed - name: Build C++ shell: cmd run: | diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 63e2c036c9a6f..b31037a973279 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -259,7 +259,7 @@ macro(resolve_dependency DEPENDENCY_NAME) IS_RUNTIME_DEPENDENCY REQUIRED_VERSION USE_CONFIG) - set(multi_value_args COMPONENTS PC_PACKAGE_NAMES) + set(multi_value_args COMPONENTS OPTIONAL_COMPONENTS PC_PACKAGE_NAMES) cmake_parse_arguments(ARG "${options}" "${one_value_args}" @@ -287,6 +287,9 @@ macro(resolve_dependency DEPENDENCY_NAME) if(ARG_COMPONENTS) list(APPEND FIND_PACKAGE_ARGUMENTS COMPONENTS ${ARG_COMPONENTS}) endif() + if(ARG_OPTIONAL_COMPONENTS) + list(APPEND FIND_PACKAGE_ARGUMENTS OPTIONAL_COMPONENTS ${ARG_OPTIONAL_COMPONENTS}) + endif() if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO") find_package(${FIND_PACKAGE_ARGUMENTS}) set(COMPATIBLE ${${PACKAGE_NAME}_FOUND}) @@ -1289,15 +1292,19 @@ if(ARROW_USE_BOOST) set(Boost_USE_STATIC_LIBS ON) endif() if(ARROW_BOOST_REQUIRE_LIBRARY) - set(ARROW_BOOST_COMPONENTS system filesystem) + set(ARROW_BOOST_COMPONENTS filesystem system) + set(ARROW_BOOST_OPTIONAL_COMPONENTS process) else() set(ARROW_BOOST_COMPONENTS) + set(ARROW_BOOST_OPTIONAL_COMPONENTS) endif() resolve_dependency(Boost REQUIRED_VERSION ${ARROW_BOOST_REQUIRED_VERSION} COMPONENTS ${ARROW_BOOST_COMPONENTS} + OPTIONAL_COMPONENTS + ${ARROW_BOOST_OPTIONAL_COMPONENTS} IS_RUNTIME_DEPENDENCY # libarrow.so doesn't depend on libboost*. FALSE) @@ -1316,14 +1323,35 @@ if(ARROW_USE_BOOST) endif() endforeach() - if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - # boost/process/detail/windows/handle_workaround.hpp doesn't work - # without BOOST_USE_WINDOWS_H with MinGW because MinGW doesn't - # provide __kernel_entry without winternl.h. - # - # See also: - # https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp - target_compile_definitions(Boost::headers INTERFACE "BOOST_USE_WINDOWS_H=1") + if(TARGET Boost::process) + # Boost >= 1.86 + target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V1") + target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2") + else() + # Boost < 1.86 + add_library(Boost::process INTERFACE IMPORTED) + if(TARGET Boost::filesystem) + target_link_libraries(Boost::process INTERFACE Boost::filesystem) + endif() + if(TARGET Boost::system) + target_link_libraries(Boost::process INTERFACE Boost::system) + endif() + if(TARGET Boost::headers) + target_link_libraries(Boost::process INTERFACE Boost::headers) + endif() + if(Boost_VERSION VERSION_GREATER_EQUAL 1.80) + target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2") + # Boost < 1.86 has a bug that + # boost::process::v2::process_environment::on_setup() isn't + # defined. We need to build Boost Process source to define it. + # + # See also: + # https://github.com/boostorg/process/issues/312 + target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_NEED_SOURCE") + if(WIN32) + target_link_libraries(Boost::process INTERFACE bcrypt ntdll) + endif() + endif() endif() message(STATUS "Boost include dir: ${Boost_INCLUDE_DIRS}") diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 65343df1291ba..01ac813f4713b 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -644,9 +644,13 @@ else() endif() set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST}) -set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON) -set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static - ${ARROW_GTEST_GTEST}) +set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON Boost::process) +set(ARROW_TESTING_STATIC_LINK_LIBS + arrow::flatbuffers + RapidJSON + Boost::process + arrow_static + ${ARROW_GTEST_GTEST}) set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) # that depend on gtest @@ -667,9 +671,10 @@ set(ARROW_TESTING_SRCS io/test_common.cc ipc/test_common.cc testing/fixed_width_test_util.cc + testing/generator.cc testing/gtest_util.cc + testing/process.cc testing/random.cc - testing/generator.cc testing/util.cc) # diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index dec4bb6e3d465..7afdf566f2fb5 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -47,9 +47,7 @@ if(ARROW_GCS) EXTRA_LABELS filesystem EXTRA_LINK_LIBS - google-cloud-cpp::storage - Boost::filesystem - Boost::system) + google-cloud-cpp::storage) endif() if(ARROW_AZURE) @@ -57,9 +55,7 @@ if(ARROW_AZURE) EXTRA_LABELS filesystem EXTRA_LINK_LIBS - ${AZURE_SDK_LINK_LIBRARIES} - Boost::filesystem - Boost::system) + ${AZURE_SDK_LINK_LIBRARIES}) endif() if(ARROW_S3) @@ -75,11 +71,7 @@ if(ARROW_S3) else() list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static) endif() - list(APPEND - ARROW_S3_TEST_EXTRA_LINK_LIBS - ${AWSSDK_LINK_LIBRARIES} - Boost::filesystem - Boost::system) + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) add_arrow_test(s3fs_test SOURCES s3fs_test.cc @@ -122,9 +114,7 @@ if(ARROW_S3) s3_test_util.cc STATIC_LINK_LIBS ${AWSSDK_LINK_LIBRARIES} - ${ARROW_BENCHMARK_LINK_LIBS} - Boost::filesystem - Boost::system) + ${ARROW_BENCHMARK_LINK_LIBS}) if(ARROW_TEST_LINKAGE STREQUAL "static") target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static) else() diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 9d437d1f83aac..a8dc923476752 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -15,24 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include // Missing include in boost/process - -// This boost/asio/io_context.hpp include is needless for no MinGW -// build. -// -// This is for including boost/asio/detail/socket_types.hpp before any -// "#include ". boost/asio/detail/socket_types.hpp doesn't -// work if windows.h is already included. boost/process.h -> -// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp -// includes windows.h. boost/process/args.hpp is included before -// boost/process/async.h that includes -// boost/asio/detail/socket_types.hpp implicitly is included. -#include -// We need BOOST_USE_WINDOWS_H definition with MinGW when we use -// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in -// cpp/cmake_modules/ThirdpartyToolchain.cmake for details. -#include - #include "arrow/filesystem/azurefs.h" #include "arrow/filesystem/azurefs_internal.h" @@ -53,6 +35,7 @@ #include "arrow/result.h" #include "arrow/status.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/process.h" #include "arrow/testing/util.h" #include "arrow/util/future.h" #include "arrow/util/io_util.h" @@ -67,7 +50,6 @@ namespace arrow { using internal::TemporaryDir; namespace fs { using internal::ConcatAbstractPath; -namespace bp = boost::process; using ::testing::IsEmpty; using ::testing::Not; @@ -174,42 +156,32 @@ class AzuriteEnv : public AzureEnvImpl { private: std::unique_ptr temp_dir_; arrow::internal::PlatformFilename debug_log_path_; - bp::child server_process_; + std::unique_ptr server_process_; using AzureEnvImpl::AzureEnvImpl; public: static const AzureBackend kBackend = AzureBackend::kAzurite; - ~AzuriteEnv() override { - server_process_.terminate(); - server_process_.wait(); - } + ~AzuriteEnv() = default; static Result> Make() { auto self = std::unique_ptr( new AzuriteEnv("devstoreaccount1", "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" "K1SZFPTOtr/KBHBeksoGMGw==")); - auto exe_path = bp::search_path("azurite"); - if (exe_path.empty()) { - return Status::Invalid("Could not find Azurite emulator."); - } + self->server_process_ = std::make_unique(); + ARROW_RETURN_NOT_OK(self->server_process_->SetExecutable("azurite")); ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-")); ARROW_ASSIGN_OR_RAISE(self->debug_log_path_, self->temp_dir_->path().Join("debug.log")); - auto server_process = bp::child( - boost::this_process::environment(), exe_path, "--silent", "--location", - self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString(), - // For old Azurite. We can't install the latest Azurite with - // old Node.js on old Ubuntu. - "--skipApiVersionCheck"); - if (!server_process.valid() || !server_process.running()) { - server_process.terminate(); - server_process.wait(); - return Status::Invalid("Could not start Azurite emulator."); - } - self->server_process_ = std::move(server_process); + self->server_process_->SetArgs({"--silent", "--location", + self->temp_dir_->path().ToString(), "--debug", + self->debug_log_path_.ToString(), + // For old Azurite. We can't install the latest + // Azurite with old Node.js on old Ubuntu. + "--skipApiVersionCheck"}); + ARROW_RETURN_NOT_OK(self->server_process_->Execute()); return self; } diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index 2098cf4d7f319..d4d5edf4b8993 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -15,26 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include // Missing include in boost/process - -#define BOOST_NO_CXX98_FUNCTION_BASE // ARROW-17805 -// This boost/asio/io_context.hpp include is needless for no MinGW -// build. -// -// This is for including boost/asio/detail/socket_types.hpp before any -// "#include ". boost/asio/detail/socket_types.hpp doesn't -// work if windows.h is already included. boost/process.h -> -// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp -// includes windows.h. boost/process/args.hpp is included before -// boost/process/async.h that includes -// boost/asio/detail/socket_types.hpp implicitly is included. -#include -// We need BOOST_USE_WINDOWS_H definition with MinGW when we use -// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in -// cpp/cmake_modules/ThirdpartyToolchain.cmake for details. -#include -#include - #include "arrow/filesystem/gcsfs.h" #include @@ -45,16 +25,15 @@ #include #include -#include #include #include -#include #include "arrow/filesystem/gcsfs_internal.h" #include "arrow/filesystem/path_util.h" #include "arrow/filesystem/test_util.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" +#include "arrow/testing/process.h" #include "arrow/testing/util.h" #include "arrow/util/future.h" #include "arrow/util/key_value_metadata.h" @@ -64,7 +43,6 @@ namespace fs { namespace { -namespace bp = boost::process; namespace gc = google::cloud; namespace gcs = google::cloud::storage; @@ -89,70 +67,62 @@ class GcsTestbench : public ::testing::Environment { public: GcsTestbench() { port_ = std::to_string(GetListenPort()); - std::vector names{"python3", "python"}; - // If the build script or application developer provides a value in the PYTHON - // environment variable, then just use that. - if (const auto* env = std::getenv("PYTHON")) { - names = {env}; - } auto error = std::string("Could not start GCS emulator 'storage-testbench'"); + auto server_process = std::make_unique(); + auto status = server_process->SetExecutable("storage-testbench"); + if (!status.ok()) { + error += ": " + status.ToString(); + error_ = std::move(error); + return; + } - auto testbench_is_running = [](bp::child& process, bp::ipstream& output) { - // Wait for message: "* Restarting with" - std::string line; + server_process->SetArgs({"--port", port_}); + server_process->IgnoreStderr(); + status = server_process->Execute(); + if (!status.ok()) { + error += ": " + status.ToString(); + error_ = std::move(error); + return; + } + + auto testbench_is_running = [&server_process, this]() { + auto ready_timeout = std::chrono::seconds(10); std::chrono::time_point end = - std::chrono::steady_clock::now() + std::chrono::seconds(10); - while (process.valid() && process.running() && - std::chrono::steady_clock::now() < end) { - if (output.peek() && std::getline(output, line)) { - std::cerr << line << std::endl; - if (line.find("* Restarting with") != std::string::npos) return true; - } else { - std::this_thread::sleep_for(std::chrono::milliseconds(20)); + std::chrono::steady_clock::now() + ready_timeout; + while (server_process->IsRunning() && std::chrono::steady_clock::now() < end) { + auto client = gcs::Client( + google::cloud::Options{} + .set("http://127.0.0.1:" + port_) + .set(gc::MakeInsecureCredentials()) + .set( + gcs::LimitedTimeRetryPolicy(ready_timeout).clone())); + auto metadata = client.GetBucketMetadata("nonexistent"); + if (metadata.status().code() == google::cloud::StatusCode::kNotFound) { + return true; } } return false; }; - auto exe_path = bp::search_path("storage-testbench"); - if (!exe_path.empty()) { - bp::ipstream output; - server_process_ = - bp::child(exe_path, "--port", port_, group_, bp::std_err > output); - if (!testbench_is_running(server_process_, output)) { - error += " (failed to start)"; - server_process_.terminate(); - server_process_.wait(); - } - } else { - error += " (exe not found)"; - } - if (!server_process_.valid()) { + if (!testbench_is_running()) { + error += " (failed to listen)"; error_ = std::move(error); + return; } + + server_process_ = std::move(server_process); } - bool running() { return server_process_.running(); } + bool running() { return server_process_ && server_process_->IsRunning(); } - ~GcsTestbench() override { - // Brutal shutdown, kill the full process group because the GCS testbench may launch - // additional children. - try { - group_.terminate(); - } catch (bp::process_error&) { - } - if (server_process_.valid()) { - server_process_.wait(); - } - } + ~GcsTestbench() = default; const std::string& port() const { return port_; } const std::string& error() const { return error_; } private: std::string port_; - bp::child server_process_; - bp::group group_; + std::unique_ptr server_process_; std::string error_; }; diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc index eb29a677dae9e..003afa68f1e35 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.cc +++ b/cpp/src/arrow/filesystem/s3_test_util.cc @@ -15,33 +15,13 @@ // specific language governing permissions and limitations // under the License. -#include // Missing include in boost/process - #ifndef _WIN32 #include #endif -// This boost/asio/io_context.hpp include is needless for no MinGW -// build. -// -// This is for including boost/asio/detail/socket_types.hpp before any -// "#include ". boost/asio/detail/socket_types.hpp doesn't -// work if windows.h is already included. boost/process.h -> -// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp -// includes windows.h. boost/process/args.hpp is included before -// boost/process/async.h that includes -// boost/asio/detail/socket_types.hpp implicitly is included. -#ifdef __MINGW32__ -#include -#endif -#define BOOST_NO_CXX98_FUNCTION_BASE // ARROW-17805 -// We need BOOST_USE_WINDOWS_H definition with MinGW when we use -// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in -// cpp/cmake_modules/ThirdpartyToolchain.cmake for details. -#include - #include "arrow/filesystem/s3_test_util.h" #include "arrow/filesystem/s3fs.h" +#include "arrow/testing/process.h" #include "arrow/testing/util.h" #include "arrow/util/async_generator.h" #include "arrow/util/future.h" @@ -53,8 +33,6 @@ namespace fs { using ::arrow::internal::TemporaryDir; -namespace bp = boost::process; - namespace { const char* kMinioExecutableName = "minio"; @@ -75,7 +53,7 @@ struct MinioTestServer::Impl { std::string connect_string_; std::string access_key_ = kMinioAccessKey; std::string secret_key_ = kMinioSecretKey; - std::shared_ptr<::boost::process::child> server_process_; + std::unique_ptr server_process_; }; MinioTestServer::MinioTestServer() : impl_(new Impl) {} @@ -105,44 +83,23 @@ Status MinioTestServer::Start() { ARROW_ASSIGN_OR_RAISE(impl_->temp_dir_, TemporaryDir::Make("s3fs-test-")); - // Get a copy of the current environment. - // (NOTE: using "auto" would return a native_environment that mutates - // the current environment) - bp::environment env = boost::this_process::environment(); - env["MINIO_ACCESS_KEY"] = kMinioAccessKey; - env["MINIO_SECRET_KEY"] = kMinioSecretKey; + impl_->server_process_ = std::make_unique(); + impl_->server_process_->SetEnv("MINIO_ACCESS_KEY", kMinioAccessKey); + impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey); // Disable the embedded console (one less listening address to care about) - env["MINIO_BROWSER"] = "off"; - + impl_->server_process_->SetEnv("MINIO_BROWSER", "off"); impl_->connect_string_ = GenerateConnectString(); - auto exe_path = bp::search_path(kMinioExecutableName); - if (exe_path.empty()) { - return Status::IOError("Failed to find minio executable ('", kMinioExecutableName, - "') in PATH"); - } - - try { - // NOTE: --quiet makes startup faster by suppressing remote version check - impl_->server_process_ = std::make_shared( - env, exe_path, "server", "--quiet", "--compat", "--address", - impl_->connect_string_, impl_->temp_dir_->path().ToString()); - } catch (const std::exception& e) { - return Status::IOError("Failed to launch Minio server: ", e.what()); - } + ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName)); + // NOTE: --quiet makes startup faster by suppressing remote version check + impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--address", + impl_->connect_string_, + impl_->temp_dir_->path().ToString()}); + ARROW_RETURN_NOT_OK(impl_->server_process_->Execute()); return Status::OK(); } Status MinioTestServer::Stop() { - if (impl_->server_process_ && impl_->server_process_->valid()) { - // Brutal shutdown - impl_->server_process_->terminate(); - impl_->server_process_->wait(); -#ifndef _WIN32 - // Despite calling wait() above, boost::process fails to clear zombies - // so do it ourselves. - waitpid(impl_->server_process_->id(), nullptr, 0); -#endif - } + impl_->server_process_ = nullptr; return Status::OK(); } diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt index 98f93705f6f56..65f07b6cc38d2 100644 --- a/cpp/src/arrow/flight/CMakeLists.txt +++ b/cpp/src/arrow/flight/CMakeLists.txt @@ -64,11 +64,6 @@ if(ARROW_BUILD_BENCHMARKS endif() endif() endif() -list(APPEND - ARROW_FLIGHT_TEST_INTERFACE_LIBS - Boost::headers - Boost::filesystem - Boost::system) list(APPEND ARROW_FLIGHT_TEST_LINK_LIBS gRPC::grpc++) # TODO(wesm): Protobuf shared vs static linking diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc index 057ef15c3c7ae..661c47737f024 100644 --- a/cpp/src/arrow/flight/flight_benchmark.cc +++ b/cpp/src/arrow/flight/flight_benchmark.cc @@ -491,7 +491,7 @@ int main(int argc, char** argv) { if (FLAGS_cuda && FLAGS_test_put) { server_args.push_back("-cuda"); } - server->Start(server_args); + ABORT_NOT_OK(server->Start(server_args)); } std::cout << "Server host: " << FLAGS_server_host << std::endl << "Server port: " << FLAGS_server_port << std::endl; diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc index 3d52bc3f5ae06..6425233dadec4 100644 --- a/cpp/src/arrow/flight/flight_test.cc +++ b/cpp/src/arrow/flight/flight_test.cc @@ -204,7 +204,7 @@ ARROW_FLIGHT_TEST_ASYNC_CLIENT(GrpcAsyncClientTest); TEST(TestFlight, ConnectUri) { TestServer server("flight-test-server"); - server.Start(); + ASSERT_OK(server.Start()); ASSERT_TRUE(server.IsRunning()); std::stringstream ss; @@ -230,7 +230,7 @@ TEST(TestFlight, InvalidUriScheme) { #ifndef _WIN32 TEST(TestFlight, ConnectUriUnix) { TestServer server("flight-test-server", "/tmp/flight-test.sock"); - server.Start(); + ASSERT_OK(server.Start()); ASSERT_TRUE(server.IsRunning()); std::stringstream ss; diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc index 127827ff38cdd..aa10d9a7da822 100644 --- a/cpp/src/arrow/flight/test_util.cc +++ b/cpp/src/arrow/flight/test_util.cc @@ -17,11 +17,6 @@ #include "arrow/flight/test_util.h" -#ifdef __APPLE__ -#include -#include -#endif - #include #include #include @@ -31,18 +26,13 @@ #include "arrow/util/windows_compatibility.h" #include -#include -#define BOOST_NO_CXX98_FUNCTION_BASE // ARROW-17805 -// We need BOOST_USE_WINDOWS_H definition with MinGW when we use -// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in -// cpp/cmake_modules/ThirdpartyToolchain.cmake for details. -#include #include "arrow/array.h" #include "arrow/array/builder_primitive.h" #include "arrow/ipc/test_common.h" #include "arrow/testing/generator.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/process.h" #include "arrow/testing/util.h" #include "arrow/util/logging.h" @@ -51,101 +41,27 @@ namespace arrow::flight { -namespace bp = boost::process; -namespace fs = boost::filesystem; - -namespace { - -Status ResolveCurrentExecutable(fs::path* out) { - // See https://stackoverflow.com/a/1024937/10194 for various - // platform-specific recipes. - - boost::system::error_code ec; - -#if defined(__linux__) - *out = fs::canonical("/proc/self/exe", ec); -#elif defined(__APPLE__) - char buf[PATH_MAX + 1]; - uint32_t bufsize = sizeof(buf); - if (_NSGetExecutablePath(buf, &bufsize) < 0) { - return Status::Invalid("Can't resolve current exe: path too large"); - } - *out = fs::canonical(buf, ec); -#elif defined(_WIN32) - char buf[MAX_PATH + 1]; - if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) { - return Status::Invalid("Can't get executable file path"); - } - *out = fs::canonical(buf, ec); -#else - ARROW_UNUSED(ec); - return Status::NotImplemented("Not available on this system"); -#endif - if (ec) { - // XXX fold this into the Status class? - return Status::IOError("Can't resolve current exe: ", ec.message()); +Status TestServer::Start(const std::vector& extra_args) { + server_process_ = std::make_unique(); + ARROW_RETURN_NOT_OK(server_process_->SetExecutable(executable_name_)); + std::vector args = {}; + if (unix_sock_.empty()) { + args.push_back("-port"); + args.push_back(std::to_string(port_)); } else { - return Status::OK(); - } -} - -} // namespace - -void TestServer::Start(const std::vector& extra_args) { - namespace fs = boost::filesystem; - - std::string str_port = std::to_string(port_); - std::vector search_path = ::boost::this_process::path(); - // If possible, prepend current executable directory to search path, - // since it's likely that the test server executable is located in - // the same directory as the running unit test. - fs::path current_exe; - Status st = ResolveCurrentExecutable(¤t_exe); - if (st.ok()) { - search_path.insert(search_path.begin(), current_exe.parent_path()); - } else if (st.IsNotImplemented()) { - ARROW_CHECK(st.IsNotImplemented()) << st.ToString(); - } - - try { - if (unix_sock_.empty()) { - server_process_ = - std::make_shared(bp::search_path(executable_name_, search_path), - "-port", str_port, bp::args(extra_args)); - } else { - server_process_ = - std::make_shared(bp::search_path(executable_name_, search_path), - "-server_unix", unix_sock_, bp::args(extra_args)); - } - } catch (...) { - std::stringstream ss; - ss << "Failed to launch test server '" << executable_name_ << "', looked in "; - for (const auto& path : search_path) { - ss << path << " : "; - } - ARROW_LOG(FATAL) << ss.str(); - throw; + args.push_back("-server_unix"); + args.push_back(unix_sock_); } - std::cout << "Server running with pid " << server_process_->id() << std::endl; + args.insert(args.end(), extra_args.begin(), extra_args.end()); + server_process_->SetArgs(args); + ARROW_RETURN_NOT_OK(server_process_->Execute()); + std::cout << "Server running with pid " << server_process_->pid() << std::endl; + return Status::OK(); } -int TestServer::Stop() { - if (server_process_ && server_process_->valid()) { -#ifndef _WIN32 - kill(server_process_->id(), SIGTERM); -#else - // This would use SIGKILL on POSIX, which is more brutal than SIGTERM - server_process_->terminate(); -#endif - server_process_->wait(); - return server_process_->exit_code(); - } else { - // Presumably the server wasn't able to start - return -1; - } -} +void TestServer::Stop() { server_process_ = nullptr; } -bool TestServer::IsRunning() { return server_process_->running(); } +bool TestServer::IsRunning() { return server_process_->IsRunning(); } int TestServer::port() const { return port_; } diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h index 15ba6145ecd2b..946caebcc2b5a 100644 --- a/cpp/src/arrow/flight/test_util.h +++ b/cpp/src/arrow/flight/test_util.h @@ -29,6 +29,7 @@ #include "arrow/status.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/process.h" #include "arrow/testing/util.h" #include "arrow/flight/client.h" @@ -36,14 +37,6 @@ #include "arrow/flight/types.h" #include "arrow/flight/visibility.h" -namespace boost { -namespace process { - -class child; - -} // namespace process -} // namespace boost - namespace arrow { namespace flight { @@ -76,10 +69,10 @@ class ARROW_FLIGHT_EXPORT TestServer { TestServer(const std::string& executable_name, const std::string& unix_sock) : executable_name_(executable_name), unix_sock_(unix_sock) {} - void Start(const std::vector& extra_args); - void Start() { Start({}); } + Status Start(const std::vector& extra_args); + Status Start() { return Start({}); } - int Stop(); + void Stop(); bool IsRunning(); @@ -90,7 +83,7 @@ class ARROW_FLIGHT_EXPORT TestServer { std::string executable_name_; int port_; std::string unix_sock_; - std::shared_ptr<::boost::process::child> server_process_; + std::unique_ptr server_process_; }; // Helper to initialize a server and matching client with callbacks to diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc new file mode 100644 index 0000000000000..32da81f14630e --- /dev/null +++ b/cpp/src/arrow/testing/process.cc @@ -0,0 +1,298 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/testing/process.h" +#include "arrow/result.h" + +// This boost/asio/io_context.hpp include is needless for no MinGW +// build. +// +// This is for including boost/asio/detail/socket_types.hpp before any +// "#include ". boost/asio/detail/socket_types.hpp doesn't +// work if windows.h is already included. +#include + +#ifdef BOOST_PROCESS_HAVE_V2 +// We can't use v2 API on Windows because v2 API doesn't support +// process group [1] and GCS testbench uses multiple processes [2]. +// +// [1] https://github.com/boostorg/process/issues/259 +// [2] https://github.com/googleapis/storage-testbench/issues/669 +#ifndef _WIN32 +#define BOOST_PROCESS_USE_V2 +#endif +#endif + +#ifdef BOOST_PROCESS_USE_V2 +#ifdef BOOST_PROCESS_NEED_SOURCE +// Workaround for https://github.com/boostorg/process/issues/312 +#define BOOST_PROCESS_V2_SEPARATE_COMPILATION +#ifdef __APPLE__ +#include +#endif +#include +#include +#else +#include +#endif +#include +#else +// We need BOOST_USE_WINDOWS_H definition with MinGW when we use +// boost/process.hpp. boost/process/detail/windows/handle_workaround.hpp +// doesn't work without BOOST_USE_WINDOWS_H with MinGW because MinGW +// doesn't provide __kernel_entry without winternl.h. +// +// See also: +// https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp +#ifdef __MINGW32__ +#define BOOST_USE_WINDOWS_H = 1 +#endif +#ifdef BOOST_PROCESS_HAVE_V1 +#include +#else +#include +#endif +#endif + +#ifdef __APPLE__ +#include +#include +#endif + +#include +#include +#include +#include + +#ifdef BOOST_PROCESS_USE_V2 +namespace asio = BOOST_PROCESS_V2_ASIO_NAMESPACE; +namespace process = BOOST_PROCESS_V2_NAMESPACE; +namespace filesystem = process::filesystem; +#elif defined(BOOST_PROCESS_HAVE_V1) +namespace process = boost::process::v1; +namespace filesystem = boost::process::v1::filesystem; +#else +namespace process = boost::process; +namespace filesystem = boost::filesystem; +#endif + +namespace arrow::util { + +class Process::Impl { + public: + Impl() { + // Get a copy of the current environment. +#ifdef BOOST_PROCESS_USE_V2 + for (const auto& kv : process::environment::current()) { + env_[kv.key()] = process::environment::value(kv.value()); + } +#else + env_ = process::environment(boost::this_process::environment()); +#endif + } + + ~Impl() { +#ifdef BOOST_PROCESS_USE_V2 + // V2 doesn't provide process group support yet: + // https://github.com/boostorg/process/issues/259 + // + // So we try graceful shutdown (SIGTERM + waitpid()) before + // immediate shutdown (SIGKILL). This assumes that the target + // executable such as "python3 -m testbench" terminates all related + // processes by graceful shutdown. + boost::system::error_code error_code; + if (process_ && process_->running(error_code)) { + process_->request_exit(error_code); + if (!error_code) { + auto timeout = std::chrono::seconds(3); + std::chrono::time_point end = + std::chrono::steady_clock::now() + timeout; + while (process_->running(error_code) && std::chrono::steady_clock::now() < end) { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + } + } +#else + process_group_ = nullptr; +#endif + process_ = nullptr; + } + + Status SetExecutable(const std::string& name) { +#ifdef BOOST_PROCESS_USE_V2 + executable_ = process::environment::find_executable(name); +#else + executable_ = process::search_path(name); +#endif + if (executable_.empty()) { + // Search the current executable directory as fallback. + ARROW_ASSIGN_OR_RAISE(auto current_exe, ResolveCurrentExecutable()); +#ifdef BOOST_PROCESS_USE_V2 + std::unordered_map env; + for (const auto& kv : process::environment::current()) { + env[kv.key()] = process::environment::value(kv.value()); + } + env["PATH"] = process::environment::value(current_exe.parent_path()); + executable_ = process::environment::find_executable(name, env); +#else + executable_ = process::search_path(name, {current_exe.parent_path()}); +#endif + } + if (executable_.empty()) { + return Status::IOError("Failed to find '", name, "' in PATH"); + } + return Status::OK(); + } + + void SetArgs(const std::vector& args) { args_ = args; } + + void SetEnv(const std::string& name, const std::string& value) { +#ifdef BOOST_PROCESS_USE_V2 + env_[name] = process::environment::value(value); +#else + env_[name] = value; +#endif + } + + void IgnoreStderr() { keep_stderr_ = false; } + + Status Execute() { + try { +#ifdef BOOST_PROCESS_USE_V2 + return ExecuteV2(); +#else + return ExecuteV1(); +#endif + } catch (const std::exception& e) { + return Status::IOError("Failed to launch '", executable_, "': ", e.what()); + } + } + + bool IsRunning() { +#ifdef BOOST_PROCESS_USE_V2 + boost::system::error_code error_code; + return process_ && process_->running(error_code); +#else + return process_ && process_->running(); +#endif + } + + uint64_t pid() { + if (!process_) { + return 0; + } + return process_->id(); + } + + private: + filesystem::path executable_; + std::vector args_; + bool keep_stderr_ = true; +#ifdef BOOST_PROCESS_USE_V2 + std::unordered_map env_; + std::unique_ptr process_; + asio::io_context ctx_; + // boost/process/v2/ doesn't support process group yet: + // https://github.com/boostorg/process/issues/259 +#else + process::environment env_; + std::unique_ptr process_; + std::unique_ptr process_group_; +#endif + + Result ResolveCurrentExecutable() { + // See https://stackoverflow.com/a/1024937/10194 for various + // platform-specific recipes. + + filesystem::path path; + boost::system::error_code error_code; + +#if defined(__linux__) + path = filesystem::canonical("/proc/self/exe", error_code); +#elif defined(__APPLE__) + char buf[PATH_MAX + 1]; + uint32_t bufsize = sizeof(buf); + if (_NSGetExecutablePath(buf, &bufsize) < 0) { + return Status::Invalid("Can't resolve current exe: path too large"); + } + path = filesystem::canonical(buf, error_code); +#elif defined(_WIN32) + char buf[MAX_PATH + 1]; + if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) { + return Status::Invalid("Can't get executable file path"); + } + path = filesystem::canonical(buf, error_code); +#else + ARROW_UNUSED(error_code); + return Status::NotImplemented("Not available on this system"); +#endif + if (error_code) { + // XXX fold this into the Status class? + return Status::IOError("Can't resolve current exe: ", error_code.message()); + } else { + return path; + } + } + +#ifdef BOOST_PROCESS_USE_V2 + Status ExecuteV2() { + process::process_environment env(env_); + // We can't use std::make_unique. + process_ = std::unique_ptr( + new process::process(ctx_, executable_, args_, env, + keep_stderr_ ? process::process_stdio{{}, {}, {}} + : process::process_stdio{{}, {}, nullptr})); + return Status::OK(); + } +#else + Status ExecuteV1() { + process_group_ = std::make_unique(); + if (keep_stderr_) { + process_ = std::make_unique(executable_, process::args(args_), env_, + *process_group_); + } else { + process_ = std::make_unique(executable_, process::args(args_), env_, + *process_group_, + process::std_err > process::null); + } + return Status::OK(); + } +#endif +}; + +Process::Process() : impl_(new Impl()) {} + +Process::~Process() {} + +Status Process::SetExecutable(const std::string& path) { + return impl_->SetExecutable(path); +} + +void Process::SetArgs(const std::vector& args) { impl_->SetArgs(args); } + +void Process::SetEnv(const std::string& key, const std::string& value) { + impl_->SetEnv(key, value); +} + +void Process::IgnoreStderr() { impl_->IgnoreStderr(); } + +Status Process::Execute() { return impl_->Execute(); } + +bool Process::IsRunning() { return impl_->IsRunning(); } + +uint64_t Process::pid() { return impl_->pid(); } +} // namespace arrow::util diff --git a/cpp/src/arrow/testing/process.h b/cpp/src/arrow/testing/process.h new file mode 100644 index 0000000000000..d4d2ae124f427 --- /dev/null +++ b/cpp/src/arrow/testing/process.h @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "arrow/status.h" +#include "arrow/testing/visibility.h" + +namespace arrow::util { + +class ARROW_TESTING_EXPORT Process { + public: + Process(); + ~Process(); + + Status SetExecutable(const std::string& path); + void SetArgs(const std::vector& args); + void SetEnv(const std::string& name, const std::string& value); + void IgnoreStderr(); + Status Execute(); + bool IsRunning(); + uint64_t pid(); + + private: + class Impl; + std::unique_ptr impl_; +}; +} // namespace arrow::util diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index c092ff4fd011f..c2bc7fc02797e 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -53,8 +53,8 @@ add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH}) # testing -if(ARROW_BUILD_TESTS) - add_executable(gandiva-precompiled-test +add_gandiva_test(precompiled-test + SOURCES ../context_helper.cc bitmap_test.cc bitmap.cc @@ -75,16 +75,12 @@ if(ARROW_BUILD_TESTS) decimal_ops_test.cc decimal_ops.cc ../decimal_type_util.cc - ../decimal_xlarge.cc) - target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src) - target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS} - Boost::headers) - target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1 - ARROW_STATIC GANDIVA_STATIC) - set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test") - add_test(gandiva-precompiled-test ${TEST_PATH}) - set_property(TEST gandiva-precompiled-test - APPEND - PROPERTY LABELS "unittest;gandiva-tests") - add_dependencies(gandiva-tests gandiva-precompiled-test) -endif() + ../decimal_xlarge.cc + EXTRA_INCLUDES + ${CMAKE_SOURCE_DIR}/src + EXTRA_LINK_LIBS + Boost::headers + DEFINITIONS + GANDIVA_UNIT_TEST=1 + ARROW_STATIC + GANDIVA_STATIC) diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 6f825b55cfd94..103e678ebb4ac 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -15,11 +15,11 @@ ] }, "benchmark", + "boost-crc", "boost-filesystem", "boost-multiprecision", "boost-process", "boost-system", - "boost-crc", "brotli", "bzip2", "c-ares",