diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml deleted file mode 100644 index 13a3008b7..000000000 --- a/.github/actions/setup-builder/action.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Prepare Rust Builder -description: 'Prepare Rust Build Environment' -inputs: - rust-version: - description: 'version of rust to install (e.g. stable)' - required: true - default: 'stable' -runs: - using: "composite" - steps: - - name: Install Build Dependencies - shell: bash - run: | - apt-get update - apt-get install -y protobuf-compiler - - name: Setup Rust toolchain - shell: bash - run: | - echo "Installing ${{ inputs.rust-version }}" - rustup toolchain install ${{ inputs.rust-version }} - rustup default ${{ inputs.rust-version }} - rustup component add rustfmt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 01ef4a7f6..f457b3dc5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -97,9 +97,15 @@ jobs: name: python-wheel-license path: . - run: cat LICENSE.txt + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' - name: Build wheels uses: PyO3/maturin-action@v1 with: + env: + RUST_BACKTRACE: 1 rust-toolchain: nightly target: x86_64 manylinux: auto diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 5ca3cd44b..0f4681ac5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -53,18 +53,10 @@ jobs: toolchain: ${{ matrix.toolchain }} override: true - - name: Install protobuf compiler - shell: bash - run: | - mkdir -p $HOME/d/protoc - cd $HOME/d/protoc - export PROTO_ZIP="protoc-21.4-linux-x86_64.zip" - curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP - unzip $PROTO_ZIP - export PATH=$PATH:$HOME/d/protoc/bin - export PROTOC=$HOME/d/protoc/bin - sudo chown -R $(whoami) $HOME/d/protoc - protoc --version + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' - name: Setup Python uses: actions/setup-python@v4 @@ -112,22 +104,11 @@ jobs: flake8 --exclude venv --ignore=E501,W503 black --line-length 79 --diff --check . - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - command: build - args: --release --out dist - - name: Run tests + env: + RUST_BACKTRACE: 1 run: | git submodule update --init - export PATH=$PATH:$HOME/d/protoc/bin - export PROTOC=$HOME/d/protoc/bin - sudo chown -R $(whoami) $HOME/d/protoc - ls -l $HOME/d/protoc/ - ls -l $HOME/d/protoc/bin - pip install datafusion-python --no-index --find-links dist --force-reinstall - pip install pytest - cargo clean - maturin develop - RUST_BACKTRACE=1 pytest -v . + source venv/bin/activate + pip install -e . -vv + pytest -v . diff --git a/Cargo.lock b/Cargo.lock index 0ffe60ce0..d2a10ca0a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -677,8 +677,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "16.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=5238e8c97f998b4d2cb9fab85fb182f325a1a7fb#5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6d90cae91414aaeda37ae8022a23ef1124ca8efc08ac7d7770274249f7cf148" dependencies = [ "ahash", "apache-avro", @@ -725,8 +726,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "16.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=5238e8c97f998b4d2cb9fab85fb182f325a1a7fb#5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b21c4b8e8b7815e86d79d25da16854fee6d4d1b386572e802a248b7d43188e86" dependencies = [ "apache-avro", "arrow", @@ -740,8 +742,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "16.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=5238e8c97f998b4d2cb9fab85fb182f325a1a7fb#5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8c07b051fbaf01657a3eb910a76b042ecfed0350a40412f70cf6b949bd5328" dependencies = [ "ahash", "arrow", @@ -752,8 +755,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "16.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=5238e8c97f998b4d2cb9fab85fb182f325a1a7fb#5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2ce4d34a808cd2e4c4864cdc759dd1bd22dcac2b8af38aa570e30fd54577c4d" dependencies = [ "arrow", "async-trait", @@ -768,8 +772,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "16.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=5238e8c97f998b4d2cb9fab85fb182f325a1a7fb#5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a38afa11a09505c24bd7e595039d7914ec39329ba490209413ef2d37895c8220" dependencies = [ "ahash", "arrow", @@ -819,8 +824,9 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "16.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=5238e8c97f998b4d2cb9fab85fb182f325a1a7fb#5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9172411b25ff4aa97f8e99884898595a581636d93cc96c12f96dbe3bf51cd7e5" dependencies = [ "arrow", "datafusion-common", @@ -830,8 +836,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "16.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=5238e8c97f998b4d2cb9fab85fb182f325a1a7fb#5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fbe5e61563ced2f6992a60afea568ff3de69e32940bbf07db06fc5c9d8cd866" dependencies = [ "arrow-schema", "datafusion-common", @@ -842,11 +849,13 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "16.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=5238e8c97f998b4d2cb9fab85fb182f325a1a7fb#5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e5af8bc23708f6d9d1721947c8486c96153ce671269522d7d917bb428d2fa73" dependencies = [ "async-recursion", "datafusion", + "itertools", "prost 0.11.6", "prost-build 0.9.0", "prost-types 0.11.6", diff --git a/Cargo.toml b/Cargo.toml index f93931475..cbf6c5acc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,11 +34,11 @@ default = ["mimalloc"] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "~0.17.3", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "5238e8c97f998b4d2cb9fab85fb182f325a1a7fb", features = ["pyarrow", "avro"] } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion", rev = "5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "5238e8c97f998b4d2cb9fab85fb182f325a1a7fb", features = ["pyarrow"] } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion", rev = "5238e8c97f998b4d2cb9fab85fb182f325a1a7fb" } +datafusion = { version = "17.0.0", features = ["pyarrow", "avro"] } +datafusion-expr = "17.0.0" +datafusion-optimizer = "17.0.0" +datafusion-common = { version = "17.0.0", features = ["pyarrow"] } +datafusion-substrait = "17.0.0" uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "*", optional = true, default-features = false } async-trait = "0.1" diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index eb69a9b8f..a50532c52 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -52,11 +52,12 @@ def struct_df(): return ctx.create_dataframe([[batch]]) + @pytest.fixture def aggregate_df(): ctx = SessionContext() - ctx.register_csv('test', 'testing/data/csv/aggregate_test_100.csv') - return ctx.sql('select c1, sum(c2) from test group by c1') + ctx.register_csv("test", "testing/data/csv/aggregate_test_100.csv") + return ctx.sql("select c1, sum(c2) from test group by c1") def test_select(df): @@ -271,10 +272,11 @@ def test_logical_plan(aggregate_df): assert expected == plan.display() - expected = \ - "Projection: test.c1, SUM(test.c2)\n" \ - " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" \ + expected = ( + "Projection: test.c1, SUM(test.c2)\n" + " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" " TableScan: test" + ) assert expected == plan.display_indent() @@ -286,10 +288,11 @@ def test_optimized_logical_plan(aggregate_df): assert expected == plan.display() - expected = \ - "Projection: test.c1, SUM(test.c2)\n" \ - " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" \ + expected = ( + "Projection: test.c1, SUM(test.c2)\n" + " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" " TableScan: test projection=[c1, c2]" + ) assert expected == plan.display_indent() @@ -297,14 +300,17 @@ def test_optimized_logical_plan(aggregate_df): def test_execution_plan(aggregate_df): plan = aggregate_df.execution_plan() - expected = "ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n" + expected = ( + "ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n" + ) assert expected == plan.display() - expected = \ - "ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n" \ - " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" \ + expected = ( + "ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n" + " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" " TableScan: test projection=[c1, c2]" + ) indent = plan.display_indent() @@ -317,7 +323,6 @@ def test_execution_plan(aggregate_df): assert "CsvExec:" in indent - def test_repartition(df): df.repartition(2) diff --git a/datafusion/tests/test_substrait.py b/datafusion/tests/test_substrait.py index 7f31c5a99..4c6c8a365 100644 --- a/datafusion/tests/test_substrait.py +++ b/datafusion/tests/test_substrait.py @@ -16,9 +16,8 @@ # under the License. import pyarrow as pa -import pyarrow.dataset as ds -from datafusion import column, literal, SessionContext +from datafusion import SessionContext from datafusion import substrait as ss import pytest @@ -39,8 +38,14 @@ def test_substrait_serialization(ctx): assert ctx.tables() == {"t"} # For now just make sure the method calls blow up - substrait_plan = ss.substrait.serde.serialize_to_plan("SELECT * FROM t", ctx) - substrait_bytes = ss.substrait.serde.serialize_bytes("SELECT * FROM t", ctx) + substrait_plan = ss.substrait.serde.serialize_to_plan( + "SELECT * FROM t", ctx + ) + substrait_bytes = ss.substrait.serde.serialize_bytes( + "SELECT * FROM t", ctx + ) substrait_plan = ss.substrait.serde.deserialize_bytes(substrait_bytes) - df_logical_plan = ss.substrait.consumer.from_substrait_plan(ctx, substrait_plan) + df_logical_plan = ss.substrait.consumer.from_substrait_plan( + ctx, substrait_plan + ) substrait_plan = ss.substrait.producer.to_substrait_plan(df_logical_plan) diff --git a/pyproject.toml b/pyproject.toml index 4617613cd..6d8a9d213 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ classifier = [ "Programming Language :: Rust", ] dependencies = [ - "pyarrow>=1", + "pyarrow>=6.0.1", ] [project.urls]