Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into test-pyarrow-device…
Browse files Browse the repository at this point in the history
…-interface
  • Loading branch information
jorisvandenbossche committed Feb 14, 2024
2 parents 864a52c + 91bf1c9 commit 7f78d83
Show file tree
Hide file tree
Showing 97 changed files with 3,818 additions and 598 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ ULIMIT_CORE=-1
ALMALINUX=8
ALPINE_LINUX=3.16
DEBIAN=11
FEDORA=38
FEDORA=39
UBUNTU=20.04

# Default versions for various dependencies
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/comment_bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
ARROW_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CROSSBOW_GITHUB_TOKEN: ${{ secrets.CROSSBOW_GITHUB_TOKEN }}
run: |
archery trigger-bot \
archery --debug trigger-bot \
--event-name ${{ github.event_name }} \
--event-payload ${{ github.event_path }}
Expand Down
57 changes: 57 additions & 0 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,63 @@ env:
DOCKER_VOLUME_PREFIX: ".docker/"

jobs:
ubuntu-minimum-cpp-version:
name: Check minimum supported Arrow C++ Version (${{ matrix.cpp_version }})
runs-on: ubuntu-latest
strategy:
matrix:
include:
- cpp_version: "13.0.0"
steps:
- name: Checkout Arrow
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
with:
path: src
submodules: recursive

- name: Install Arrow C++ (${{ matrix.cpp_version }})
run: |
sudo apt update
sudo apt install -y -V ca-certificates lsb-release wget
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt update
# We have to list all packages to avoid version conflicts.
sudo apt install -y -V libarrow-dev=${{ matrix.cpp_version }}-1 \
libarrow-acero-dev=${{ matrix.cpp_version }}-1 \
libparquet-dev=${{ matrix.cpp_version }}-1 \
libarrow-dataset-dev=${{ matrix.cpp_version }}-1
- name: Install checkbashisms
run: |
sudo apt-get install devscripts
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
install-r: false

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check
working-directory: src/r

- uses: r-lib/actions/check-r-package@v2
with:
working-directory: src/r
env:
LIBARROW_BINARY: "false"
LIBARROW_BUILD: "false"
ARROW_R_VERBOSE_TEST: "true"
ARROW_R_ALLOW_CPP_VERSION_MISMATCH: "true"

- name: Show install output
if: always()
run: find src/r/check -name '00install.out*' -exec cat '{}' \; || true
shell: bash


ubuntu:
name: AMD64 Ubuntu ${{ matrix.ubuntu }} R ${{ matrix.r }} Force-Tests ${{ matrix.force-tests }}
runs-on: ubuntu-latest
Expand Down
5 changes: 4 additions & 1 deletion ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ git submodule update --init || exit /B
set ARROW_TEST_DATA=%CD%\testing\data
set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data

set ARROW_DEBUG_MEMORY_POOL=trap
@rem Enable memory debug checks if the env is not set already
IF "%ARROW_DEBUG_MEMORY_POOL%"=="" (
set ARROW_DEBUG_MEMORY_POOL=trap
)

set CMAKE_BUILD_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS%
set CTEST_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS%
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

ARG arch
FROM ${arch}/fedora:38
FROM ${arch}/fedora:39
ARG arch

# install dependencies
Expand Down Expand Up @@ -76,6 +76,8 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# PYARROW_TEST_GANDIVA=OFF: GH-39695: We need to make LLVM symbols visible in
# Python process explicitly if we use LLVM 17 or later.
ENV absl_SOURCE=BUNDLED \
ARROW_ACERO=ON \
ARROW_BUILD_TESTS=ON \
Expand Down Expand Up @@ -103,4 +105,5 @@ ENV absl_SOURCE=BUNDLED \
google_cloud_cpp_storage_SOURCE=BUNDLED \
PARQUET_BUILD_EXAMPLES=ON \
PARQUET_BUILD_EXECUTABLES=ON \
PATH=/usr/lib/ccache/:$PATH
PATH=/usr/lib/ccache/:$PATH \
PYARROW_TEST_GANDIVA=OFF
2 changes: 1 addition & 1 deletion ci/docker/linux-apt-docs.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ RUN apt-get update -y && \

ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64

ARG maven=3.5.4
ARG maven=3.6.3
COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/util_download_apache.sh \
"maven/maven-3/${maven}/binaries/apache-maven-${maven}-bin.tar.gz" /opt
Expand Down
1 change: 1 addition & 0 deletions ci/docker/python-wheel-manylinux.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ RUN vcpkg install \
--clean-after-build \
--x-install-root=${VCPKG_ROOT}/installed \
--x-manifest-root=/arrow/ci/vcpkg \
--x-feature=azure \
--x-feature=flight \
--x-feature=gcs \
--x-feature=json \
Expand Down
6 changes: 4 additions & 2 deletions ci/scripts/c_glib_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0

# Enable memory debug checks.
export ARROW_DEBUG_MEMORY_POOL=trap
# Enable memory debug checks if the env is not set already
if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
export ARROW_DEBUG_MEMORY_POOL=trap
fi

pushd ${source_dir}

Expand Down
1 change: 1 addition & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ if [ "${GITHUB_ACTIONS:-false}" = "true" ]; then
fi

if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then
ARROW_AZURE=OFF
ARROW_FLIGHT=OFF
ARROW_FLIGHT_SQL=OFF
ARROW_GCS=OFF
Expand Down
6 changes: 4 additions & 2 deletions ci/scripts/cpp_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_P
# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
export AWS_EC2_METADATA_DISABLED=TRUE

# Enable memory debug checks.
export ARROW_DEBUG_MEMORY_POOL=trap
# Enable memory debug checks if the env is not set already
if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
export ARROW_DEBUG_MEMORY_POOL=trap
fi

ctest_options=()
case "$(uname)" in
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}

export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF}
export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF}
export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
export PYARROW_WITH_DATASET=${ARROW_DATASET:-ON}
export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
Expand Down
7 changes: 5 additions & 2 deletions ci/scripts/python_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,14 @@ export ARROW_GDB_SCRIPT=${arrow_dir}/cpp/gdb_arrow.py
# Enable some checks inside Python itself
export PYTHONDEVMODE=1

# Enable memory debug checks.
export ARROW_DEBUG_MEMORY_POOL=trap
# Enable memory debug checks if the env is not set already
if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
export ARROW_DEBUG_MEMORY_POOL=trap
fi

# By default, force-test all optional components
: ${PYARROW_TEST_ACERO:=${ARROW_ACERO:-ON}}
: ${PYARROW_TEST_AZURE:=${ARROW_AZURE:-ON}}
: ${PYARROW_TEST_CUDA:=${ARROW_CUDA:-ON}}
: ${PYARROW_TEST_DATASET:=${ARROW_DATASET:-ON}}
: ${PYARROW_TEST_FLIGHT:=${ARROW_FLIGHT:-ON}}
Expand Down
3 changes: 3 additions & 0 deletions ci/scripts/python_wheel_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ pip install "delocate>=0.10.3"

echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
: ${ARROW_ACERO:=ON}
: ${ARROW_AZURE:=ON}
: ${ARROW_DATASET:=ON}
: ${ARROW_FLIGHT:=ON}
: ${ARROW_GANDIVA:=OFF}
Expand Down Expand Up @@ -95,6 +96,7 @@ pushd ${build_dir}/build

cmake \
-DARROW_ACERO=${ARROW_ACERO} \
-DARROW_AZURE=${ARROW_AZURE} \
-DARROW_BUILD_SHARED=ON \
-DARROW_BUILD_STATIC=OFF \
-DARROW_BUILD_TESTS=OFF \
Expand Down Expand Up @@ -148,6 +150,7 @@ export PYARROW_BUNDLE_ARROW_CPP=1
export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
export PYARROW_INSTALL_TESTS=1
export PYARROW_WITH_ACERO=${ARROW_ACERO}
export PYARROW_WITH_AZURE=${ARROW_AZURE}
export PYARROW_WITH_DATASET=${ARROW_DATASET}
export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
Expand Down
3 changes: 3 additions & 0 deletions ci/scripts/python_wheel_manylinux_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ rm -rf /arrow/python/pyarrow/*.so.*

echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
: ${ARROW_ACERO:=ON}
: ${ARROW_AZURE:=ON}
: ${ARROW_DATASET:=ON}
: ${ARROW_FLIGHT:=ON}
: ${ARROW_GANDIVA:=OFF}
Expand Down Expand Up @@ -87,6 +88,7 @@ pushd /tmp/arrow-build

cmake \
-DARROW_ACERO=${ARROW_ACERO} \
-DARROW_AZURE=${ARROW_AZURE} \
-DARROW_BUILD_SHARED=ON \
-DARROW_BUILD_STATIC=OFF \
-DARROW_BUILD_TESTS=OFF \
Expand Down Expand Up @@ -141,6 +143,7 @@ export PYARROW_BUNDLE_ARROW_CPP=1
export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
export PYARROW_INSTALL_TESTS=1
export PYARROW_WITH_ACERO=${ARROW_ACERO}
export PYARROW_WITH_AZURE=${ARROW_AZURE}
export PYARROW_WITH_DATASET=${ARROW_DATASET}
export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
Expand Down
6 changes: 4 additions & 2 deletions ci/scripts/python_wheel_unix_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,17 @@ fi

source_dir=${1}

: ${ARROW_AZURE:=ON}
: ${ARROW_FLIGHT:=ON}
: ${ARROW_SUBSTRAIT:=ON}
: ${ARROW_S3:=ON}
: ${ARROW_GCS:=ON}
: ${ARROW_S3:=ON}
: ${ARROW_SUBSTRAIT:=ON}
: ${CHECK_IMPORTS:=ON}
: ${CHECK_UNITTESTS:=ON}
: ${INSTALL_PYARROW:=ON}

export PYARROW_TEST_ACERO=ON
export PYARROW_TEST_AZURE=${ARROW_AZURE}
export PYARROW_TEST_CYTHON=OFF
export PYARROW_TEST_DATASET=ON
export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT}
Expand Down
6 changes: 4 additions & 2 deletions ci/scripts/r_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,10 @@ export _R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_=TRUE
# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
export AWS_EC2_METADATA_DISABLED=TRUE

# Enable memory debug checks.
export ARROW_DEBUG_MEMORY_POOL=trap
# Enable memory debug checks if the env is not set already
if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
export ARROW_DEBUG_MEMORY_POOL=trap
fi

# Hack so that texlive2020 doesn't pollute the home dir
export TEXMFCONFIG=/tmp/texmf-config
Expand Down
6 changes: 4 additions & 2 deletions ci/scripts/ruby_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0

# Enable memory debug checks.
export ARROW_DEBUG_MEMORY_POOL=trap
# Enable memory debug checks if the env is not set already
if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
export ARROW_DEBUG_MEMORY_POOL=trap
fi

rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes
10 changes: 10 additions & 0 deletions ci/vcpkg/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,16 @@
}
]
},
"azure": {
"description": "Azure blob storage support",
"dependencies": [
"azure-core-cpp",
"azure-identity-cpp",
"azure-storage-blobs-cpp",
"azure-storage-common-cpp",
"azure-storage-files-datalake-cpp"
]
},
"orc": {
"description": "ORC support",
"dependencies": [
Expand Down
14 changes: 9 additions & 5 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -774,8 +774,7 @@ if(ARROW_ORC)
list(APPEND ARROW_SHARED_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF})
list(APPEND ARROW_STATIC_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF})
if(ORC_SOURCE STREQUAL "SYSTEM")
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc
${ARROW_PROTOBUF_LIBPROTOBUF})
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc)
endif()
endif()

Expand Down Expand Up @@ -824,9 +823,6 @@ if(ARROW_WITH_OPENTELEMETRY)
opentelemetry-cpp::ostream_span_exporter
opentelemetry-cpp::otlp_http_exporter)
endif()
if(Protobuf_SOURCE STREQUAL "SYSTEM")
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
endif()
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
endif()

Expand Down Expand Up @@ -861,6 +857,14 @@ if(ARROW_USE_XSIMD)
list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_XSIMD})
endif()

# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY)
# because they depend on Protobuf.
if(ARROW_WITH_PROTOBUF)
if(Protobuf_SOURCE STREQUAL "SYSTEM")
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
endif()
endif()

add_custom_target(arrow_dependencies)
add_custom_target(arrow_benchmark_dependencies)
add_custom_target(arrow_test_dependencies)
Expand Down
23 changes: 23 additions & 0 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,29 @@ TEST_F(TestArray, TestAppendArraySlice) {
}
}

// GH-39976: Test out-of-line data size calculation in
// BinaryViewBuilder::AppendArraySlice.
TEST_F(TestArray, TestBinaryViewAppendArraySlice) {
BinaryViewBuilder src_builder(pool_);
ASSERT_OK(src_builder.AppendNull());
ASSERT_OK(src_builder.Append("long string; not inlined"));
ASSERT_EQ(2, src_builder.length());
ASSERT_OK_AND_ASSIGN(auto src, src_builder.Finish());
ASSERT_OK(src->ValidateFull());

ArraySpan span;
span.SetMembers(*src->data());
BinaryViewBuilder dst_builder(pool_);
ASSERT_OK(dst_builder.AppendArraySlice(span, 0, 1));
ASSERT_EQ(1, dst_builder.length());
ASSERT_OK(dst_builder.AppendArraySlice(span, 1, 1));
ASSERT_EQ(2, dst_builder.length());
ASSERT_OK_AND_ASSIGN(auto dst, dst_builder.Finish());
ASSERT_OK(dst->ValidateFull());

AssertArraysEqual(*src, *dst);
}

TEST_F(TestArray, ValidateBuffersPrimitive) {
auto empty_buffer = std::make_shared<Buffer>("");
auto null_buffer = Buffer::FromString("\xff");
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Status BinaryViewBuilder::AppendArraySlice(const ArraySpan& array, int64_t offse

int64_t out_of_line_total = 0, i = 0;
VisitNullBitmapInline(
array.buffers[0].data, array.offset, array.length, array.null_count,
array.buffers[0].data, array.offset + offset, length, array.null_count,
[&] {
if (!values[i].is_inline()) {
out_of_line_total += static_cast<int64_t>(values[i].size());
Expand Down
Loading

0 comments on commit 7f78d83

Please sign in to comment.