From 763aa23881b9abcdac9587361730042e9b215aef Mon Sep 17 00:00:00 2001 From: fsdvh <105630300+fsdvh@users.noreply.github.com> Date: Mon, 23 Jan 2023 10:59:56 +0200 Subject: [PATCH] Upstream rebase (#29) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * configure_me_codegen retroactively reserved on our `bind_host` parame… (#520) * configure_me_codegen retroactively reserved on our `bind_host` parameter name * Add label and pray * Add more labels why not * Prepare 0.10.0 Release (#522) * bump version * CHANGELOG * Ballista gets a docker image!!! (#521) * Ballista gets a docker image!!! * Enable flight sql * Allow executing startup script * Allow executing executables * Clippy * Remove capture group (#527) * fix python build in CI (#528) * fix python build in CI * save progress * use same min rust version in all crates * fix * use image from pyo3 * use newer image from pyo3 * do not require protoc * wheels now generated * rat - exclude generated file * Update docs for simplified instructions (#532) * Update docs for simplified instructions * Fix whoopsie * Update docs/source/user-guide/flightsql.md Co-authored-by: Andy Grove Co-authored-by: Andy Grove * remove --locked (#533) * Bump actions/labeler from 4.0.2 to 4.1.0 (#525) * Provide a memory StateBackendClient (#523) * Rename StateBackend::Standalone to StateBackend:Sled * Copy utility files from sled crate since they cannot be used directly * Provide a memory StateBackendClient * Fix dashmap deadlock issue * Fix for the comments Co-authored-by: yangzhong * only build docker images on rc tags (#535) * docs: fix style in the Helm readme (#551) * Fix Helm chart's image format (#550) * Update datafusion requirement from 14.0.0 to 15.0.0 (#552) * Update datafusion requirement from 14.0.0 to 15.0.0 * Fix UT * Fix python * Fix python * Fix Python Co-authored-by: yangzhong * Make it concurrently to launch tasks to executors (#557) * Make it concurrently to launch tasks to executors * Refine for comments Co-authored-by: yangzhong * fix(ui): fix last seen (#562) * Support Alibaba Cloud OSS with ObjectStore (#567) * Fix cargo clippy (#571) Co-authored-by: yangzhong * Super minor spelling error (#573) * Update env_logger requirement from 0.9 to 0.10 (#539) Updates the requirements on [env_logger](https://github.com/rust-cli/env_logger) to permit the latest version. - [Release notes](https://github.com/rust-cli/env_logger/releases) - [Changelog](https://github.com/rust-cli/env_logger/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-cli/env_logger/compare/v0.9.0...v0.10.0) --- updated-dependencies: - dependency-name: env_logger dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update graphviz-rust requirement from 0.4.0 to 0.5.0 (#574) Updates the requirements on [graphviz-rust](https://github.com/besok/graphviz-rust) to permit the latest version. - [Release notes](https://github.com/besok/graphviz-rust/releases) - [Changelog](https://github.com/besok/graphviz-rust/blob/master/CHANGELOG.md) - [Commits](https://github.com/besok/graphviz-rust/commits) --- updated-dependencies: - dependency-name: graphviz-rust dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * updated readme to contain correct versions of dependencies. (#580) * Fix benchmark image link (#596) * Add support for Azure (#599) * Remove outdated script and use evergreen version of rust (#597) * Remove outdated script and use evergreen version of rust * Use debian protobuf * Customize session builder * Add setter for executor slots policy * Construct Executor with functions * Add queued and completed timestamps to successful job status * Add public methods to SchedulerServer * Public method for getting execution graph * Public method for stage metrics * Use node-level local limit (#20) * Use node-level local limit * serialize limit in shuffle writer * Revert "Merge pull request #19 from coralogix/sc-5792" This reverts commit 08140ef3f1ebe573ce9f0d2f3253422546c8a31a, reversing changes made to a7f13844e44328203f803e5073cfff69f3981f4e. * add log * make sure we don't forget limit for shuffle writer * update accum correctly and try to break early * Check local limit accumulator before polling for more data * fix build Co-authored-by: Martins Purins * Add ClusterState trait * Expose active job count * Remove println * Resubmit jobs when no resources available for scheduling * Make parse_physical_expr public * Reduce log spam * Fix job submitted metric by ignoring resubmissions * Record when job is queued in scheduler metrics (#28) * Record when job is queueud in scheduler metrics * add additional buckets for exec times * fmt * clippy * tomlfmt Signed-off-by: dependabot[bot] Co-authored-by: Brent Gardner Co-authored-by: Andy Grove Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: yahoNanJing <90197956+yahoNanJing@users.noreply.github.com> Co-authored-by: yangzhong Co-authored-by: Xin Hao Co-authored-by: Duyet Le <5009534+duyet@users.noreply.github.com> Co-authored-by: r.4ntix Co-authored-by: Jeremy Dyer Co-authored-by: Sai Krishna Reddy Lakkam <86965352+saikrishna1-bidgely@users.noreply.github.com> Co-authored-by: Aidan Kovacic <95648995+aidankovacic-8451@users.noreply.github.com> Co-authored-by: Dan Harris Co-authored-by: Dan Harris <1327726+thinkharderdev@users.noreply.github.com> Co-authored-by: Martins Purins Co-authored-by: Dan Harris --- .github/workflows/dev_pr.yml | 2 +- .github/workflows/python_build.yml | 14 +- .github/workflows/rust.yml | 20 +- README.md | 2 +- ballista-cli/Cargo.toml | 12 +- ballista/CHANGELOG.md | 78 + ballista/client/Cargo.toml | 17 +- ballista/client/README.md | 4 +- ballista/client/src/context.rs | 19 +- ballista/core/Cargo.toml | 18 +- ballista/core/build.rs | 35 +- ballista/core/proto/ballista.proto | 1 + ballista/core/proto/datafusion.proto | 25 +- .../src/execution_plans/shuffle_reader.rs | 2 +- ballista/core/src/plugin/mod.rs | 2 +- ballista/core/src/serde/generated/.gitignore | 4 - ballista/core/src/serde/generated/ballista.rs | 2867 +++++++++++++++++ ballista/core/src/serde/mod.rs | 4 +- .../src/serde/physical_plan/from_proto.rs | 2 + ballista/core/src/serde/physical_plan/mod.rs | 8 +- .../core/src/serde/physical_plan/to_proto.rs | 6 +- ballista/core/src/utils.rs | 27 +- ballista/executor/Cargo.toml | 12 +- ballista/executor/src/executor_server.rs | 8 +- ballista/scheduler/Cargo.toml | 16 +- ballista/scheduler/scheduler_config_spec.toml | 4 +- ballista/scheduler/src/display.rs | 2 +- ballista/scheduler/src/flight_sql.rs | 53 +- ballista/scheduler/src/main.rs | 63 +- .../scheduler/src/scheduler_server/grpc.rs | 14 +- .../scheduler/src/scheduler_server/mod.rs | 4 +- ballista/scheduler/src/standalone.rs | 7 +- ballista/scheduler/src/state/backend/etcd.rs | 2 +- .../scheduler/src/state/backend/memory.rs | 411 +++ ballista/scheduler/src/state/backend/mod.rs | 15 +- .../state/backend/{standalone.rs => sled.rs} | 18 +- .../scheduler/src/state/backend/utils/mod.rs | 21 + .../src/state/backend/utils/oneshot.rs | 179 + .../src/state/backend/utils/subscriber.rs | 248 ++ .../scheduler/src/state/execution_graph.rs | 20 +- .../state/execution_graph/execution_stage.rs | 6 +- .../src/state/execution_graph_dot.rs | 129 +- .../scheduler/src/state/executor_manager.rs | 10 +- ballista/scheduler/src/state/mod.rs | 87 +- ballista/scheduler/src/test_utils.rs | 6 +- .../scheduler/ui/src/components/DataTable.tsx | 3 +- benchmarks/Cargo.toml | 14 +- benchmarks/src/bin/tpch.rs | 10 +- dev/build-ballista-docker.sh | 4 +- ...-rust.sh => build-ballista-executables.sh} | 4 + dev/build-ui.sh | 23 - dev/docker/ballista-builder.Dockerfile | 2 +- dev/docker/ballista-standalone.Dockerfile | 3 + dev/docker/standalone-entrypoint.sh | 0 dev/release/README.md | 13 +- dev/release/rat_exclude_files.txt | 1 + docs/source/user-guide/flightsql.md | 44 +- docs/source/user-guide/python.md | 2 +- examples/Cargo.toml | 8 +- helm/README.md | 2 + helm/ballista/templates/executor.yaml | 2 +- helm/ballista/templates/scheduler.yaml | 2 +- helm/ballista/values.yaml | 2 +- python/Cargo.toml | 6 +- python/src/context.rs | 16 +- python/src/dataset.rs | 4 +- python/src/datatype.rs | 39 + python/src/lib.rs | 2 + python/src/udaf.rs | 4 + 69 files changed, 4393 insertions(+), 321 deletions(-) delete mode 100644 ballista/core/src/serde/generated/.gitignore create mode 100644 ballista/core/src/serde/generated/ballista.rs create mode 100644 ballista/scheduler/src/state/backend/memory.rs rename ballista/scheduler/src/state/backend/{standalone.rs => sled.rs} (96%) create mode 100644 ballista/scheduler/src/state/backend/utils/mod.rs create mode 100644 ballista/scheduler/src/state/backend/utils/oneshot.rs create mode 100644 ballista/scheduler/src/state/backend/utils/subscriber.rs rename dev/{build-ballista-rust.sh => build-ballista-executables.sh} (79%) delete mode 100755 dev/build-ui.sh mode change 100644 => 100755 dev/docker/standalone-entrypoint.sh create mode 100644 python/src/datatype.rs diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml index e6da4e94a..5bc9822d2 100644 --- a/.github/workflows/dev_pr.yml +++ b/.github/workflows/dev_pr.yml @@ -36,7 +36,7 @@ jobs: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'synchronize') - uses: actions/labeler@v4.0.2 + uses: actions/labeler@4.1.0 with: repo-token: ${{ secrets.GITHUB_TOKEN }} configuration-path: .github/workflows/dev_pr/labeler.yml diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml index 88e829d2f..9cac3d2b0 100644 --- a/.github/workflows/python_build.yml +++ b/.github/workflows/python_build.yml @@ -94,16 +94,6 @@ jobs: steps: - uses: actions/checkout@v3 - run: rm LICENSE.txt - - name: Install protobuf compiler - shell: bash - run: | - mkdir -p $HOME/d/protoc - cd $HOME/d/protoc - export PROTO_ZIP="protoc-21.4-linux-x86_64.zip" - curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP - unzip $PROTO_ZIP - export PATH=$PATH:$HOME/d/protoc/bin - protoc --version - name: Download LICENSE.txt uses: actions/download-artifact@v3 with: @@ -112,11 +102,11 @@ jobs: - run: cat LICENSE.txt - name: Build wheels run: | - export PATH=$PATH:$HOME/d/protoc/bin export RUSTFLAGS='-C target-cpu=skylake' + rm ../ballista/core/proto/* docker run --rm -v $(pwd)/..:/io \ --workdir /io/python \ - konstin2/maturin:v0.11.2 \ + ghcr.io/pyo3/maturin:v0.13.7 \ build --release --manylinux 2010 - name: Archive wheels uses: actions/upload-artifact@v3 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 3f595114d..52e46d6e8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -117,12 +117,7 @@ jobs: - name: Install protobuf compiler shell: bash run: | - mkdir -p $HOME/d/protoc - cd $HOME/d/protoc - export PROTO_ZIP="protoc-21.4-linux-x86_64.zip" - curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP - unzip $PROTO_ZIP - export PATH=$PATH:$HOME/d/protoc/bin + apt-get -qq update && apt-get -y -qq install protobuf-compiler protoc --version - name: Cache Cargo uses: actions/cache@v3 @@ -145,7 +140,7 @@ jobs: export PATH=$PATH:$HOME/d/protoc/bin export ARROW_TEST_DATA=$(pwd)/testing/data export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data - cargo test --features flight-sql + cargo test cd examples cargo run --example standalone_sql --features=ballista/standalone env: @@ -304,14 +299,15 @@ jobs: - name: Build and push Docker image run: | echo "github user is $DOCKER_USER" - export DOCKER_TAG="$(git describe --exact-match --tags $(git log -n1 --pretty='%h') || echo '0.10.0-test')" - if [[ $DOCKER_TAG =~ ^[0-9\.]+$ ]] + docker build -t arrow-ballista-standalone:latest -f dev/docker/ballista-standalone.Dockerfile . + export DOCKER_TAG="$(git describe --exact-match --tags $(git log -n1 --pretty='%h') || echo '')" + if [[ $DOCKER_TAG =~ ^[0-9\.]+-rc[0-9]+$ ]] then echo "publishing docker tag $DOCKER_TAG" + docker tag arrow-ballista-standalone:latest ghcr.io/apache/arrow-ballista-standalone:$DOCKER_TAG + docker login ghcr.io -u $DOCKER_USER -p "$DOCKER_PASS" + docker push ghcr.io/apache/arrow-ballista-standalone:$DOCKER_TAG fi - docker login ghcr.io -u $DOCKER_USER -p "$DOCKER_PASS" - docker build -t ghcr.io/apache/arrow-ballista-standalone:$DOCKER_TAG -f dev/docker/ballista-standalone.Dockerfile . - docker push ghcr.io/apache/arrow-ballista-standalone:$DOCKER_TAG env: DOCKER_USER: ${{ github.actor }} DOCKER_PASS: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index bcd7c7341..6e07d8ea3 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ queries at scale factor 10 (10 GB) on a single node with a single executor and 2 The tracking issue for improving these results is [#339](https://github.com/apache/arrow-ballista/issues/339). -![benchmarks](./docs/developer/images/ballista-benchmarks.png) +![benchmarks](https://sqlbenchmarks.io/sqlbench-h/results/env/workstation/sf10/distributed/sqlbench-h-workstation-10-distributed-perquery.png) # Getting Started diff --git a/ballista-cli/Cargo.toml b/ballista-cli/Cargo.toml index 18b3ddb82..8c295efca 100644 --- a/ballista-cli/Cargo.toml +++ b/ballista-cli/Cargo.toml @@ -18,25 +18,25 @@ [package] name = "ballista-cli" description = "Command Line Client for Ballista distributed query engine." -version = "0.9.0" +version = "0.10.0" authors = ["Apache Arrow "] edition = "2021" keywords = ["ballista", "cli"] license = "Apache-2.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" -rust-version = "1.59" +rust-version = "1.63" readme = "README.md" [dependencies] -ballista = { path = "../ballista/client", version = "0.9.0", features = [ +ballista = { path = "../ballista/client", version = "0.10.0", features = [ "standalone", ] } clap = { version = "3", features = ["derive", "cargo"] } -datafusion = "14.0.0" -datafusion-cli = "14.0.0" +datafusion = "15.0.0" +datafusion-cli = "15.0.0" dirs = "4.0.0" -env_logger = "0.9" +env_logger = "0.10" mimalloc = { version = "0.1", default-features = false } num_cpus = "1.13.0" rustyline = "10.0" diff --git a/ballista/CHANGELOG.md b/ballista/CHANGELOG.md index 11933a417..99aa17455 100644 --- a/ballista/CHANGELOG.md +++ b/ballista/CHANGELOG.md @@ -19,6 +19,84 @@ # Changelog +## [0.10.0](https://github.com/apache/arrow-ballista/tree/0.10.0) (2022-11-18) + +[Full Changelog](https://github.com/apache/arrow-ballista/compare/0.9.0...0.10.0) + +**Implemented enhancements:** + +- Add user guide section on prometheus metrics [\#507](https://github.com/apache/arrow-ballista/issues/507) +- Don't throw error when job path not exist in remove\_job\_data [\#502](https://github.com/apache/arrow-ballista/issues/502) +- Fix clippy warning [\#494](https://github.com/apache/arrow-ballista/issues/494) +- Use job\_data\_clean\_up\_interval\_seconds == 0 to indicate executor\_cleanup\_enable [\#488](https://github.com/apache/arrow-ballista/issues/488) +- Add a config for tracing log rolling policy for both scheduler and executor [\#486](https://github.com/apache/arrow-ballista/issues/486) +- Set up repo where we can push benchmark results [\#473](https://github.com/apache/arrow-ballista/issues/473) +- Make the delayed time interval for cleanup job data in both scheduler and executor configurable [\#469](https://github.com/apache/arrow-ballista/issues/469) +- Add some validation for the remove\_job\_data grpc service [\#467](https://github.com/apache/arrow-ballista/issues/467) +- Add ability to build docker images using `release-lto` profile [\#463](https://github.com/apache/arrow-ballista/issues/463) +- Suggest users download \(rather than build\) the FlightSQL JDBC Driver [\#460](https://github.com/apache/arrow-ballista/issues/460) +- Clean up legacy job shuffle data [\#459](https://github.com/apache/arrow-ballista/issues/459) +- Add grpc service for the scheduler to make it able to be triggered by client explicitly [\#458](https://github.com/apache/arrow-ballista/issues/458) +- Replace Mutex\ by using DashMap [\#448](https://github.com/apache/arrow-ballista/issues/448) +- Refine log level [\#446](https://github.com/apache/arrow-ballista/issues/446) +- Upgrade to DataFusion 14.0.0 [\#445](https://github.com/apache/arrow-ballista/issues/445) +- Add a feature for hdfs3 [\#419](https://github.com/apache/arrow-ballista/issues/419) +- Add optional flag which advertises host for Arrow Flight SQL [\#418](https://github.com/apache/arrow-ballista/issues/418) +- Partitioning reasoning in DataFusion and Ballista [\#284](https://github.com/apache/arrow-ballista/issues/284) +- Stop wasting time in CI on MIRI runs [\#283](https://github.com/apache/arrow-ballista/issues/283) +- Publish Docker images as part of each release [\#236](https://github.com/apache/arrow-ballista/issues/236) +- Cleanup job/stage status from TaskManager and clean up shuffle data after a period after JobFinished [\#185](https://github.com/apache/arrow-ballista/issues/185) + +**Fixed bugs:** + +- build broken: configure\_me\_codegen retroactively reserved `bind_host` [\#519](https://github.com/apache/arrow-ballista/issues/519) +- Return empty results for SQLs with order by [\#451](https://github.com/apache/arrow-ballista/issues/451) +- ballista scheduler is not taken inline parameters into account [\#443](https://github.com/apache/arrow-ballista/issues/443) +- \[FlightSQL\] Cannot connect with Tableau Desktop [\#428](https://github.com/apache/arrow-ballista/issues/428) +- Benchmark q15 fails [\#372](https://github.com/apache/arrow-ballista/issues/372) +- Incorrect documentation for building Ballista on Linux when using docker-compose [\#362](https://github.com/apache/arrow-ballista/issues/362) +- Scheduler silently replaces `ParquetExec` with `EmptyExec` if data path is not correctly mounted in container [\#353](https://github.com/apache/arrow-ballista/issues/353) +- SQL with order by limit returns nothing [\#334](https://github.com/apache/arrow-ballista/issues/334) + +**Documentation updates:** + +- README updates [\#433](https://github.com/apache/arrow-ballista/pull/433) ([andygrove](https://github.com/andygrove)) + +**Merged pull requests:** + +- configure\_me\_codegen retroactively reserved on our `bind_host` parame… [\#520](https://github.com/apache/arrow-ballista/pull/520) ([avantgardnerio](https://github.com/avantgardnerio)) +- Bump actions/cache from 2 to 3 [\#517](https://github.com/apache/arrow-ballista/pull/517) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update graphviz-rust requirement from 0.3.0 to 0.4.0 [\#515](https://github.com/apache/arrow-ballista/pull/515) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add Prometheus metrics endpoint [\#511](https://github.com/apache/arrow-ballista/pull/511) ([thinkharderdev](https://github.com/thinkharderdev)) +- Enable tests that work since upgrading to DataFusion 14 [\#510](https://github.com/apache/arrow-ballista/pull/510) ([andygrove](https://github.com/andygrove)) +- Update hashbrown requirement from 0.12 to 0.13 [\#506](https://github.com/apache/arrow-ballista/pull/506) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Don't throw error when job shuffle data path not exist in executor [\#503](https://github.com/apache/arrow-ballista/pull/503) ([yahoNanJing](https://github.com/yahoNanJing)) +- Upgrade to DataFusion 14.0.0 and Arrow 26.0.0 [\#499](https://github.com/apache/arrow-ballista/pull/499) ([andygrove](https://github.com/andygrove)) +- Fix clippy warning [\#495](https://github.com/apache/arrow-ballista/pull/495) ([yahoNanJing](https://github.com/yahoNanJing)) +- Stop wasting time in CI on MIRI runs [\#491](https://github.com/apache/arrow-ballista/pull/491) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Remove executor config executor\_cleanup\_enable and make the configuation name for executor cleanup more intuitive [\#489](https://github.com/apache/arrow-ballista/pull/489) ([yahoNanJing](https://github.com/yahoNanJing)) +- Add a config for tracing log rolling policy for both scheduler and executor [\#487](https://github.com/apache/arrow-ballista/pull/487) ([yahoNanJing](https://github.com/yahoNanJing)) +- Add grpc service of cleaning up job shuffle data for the scheduler to make it able to be triggered by client explicitly [\#485](https://github.com/apache/arrow-ballista/pull/485) ([yahoNanJing](https://github.com/yahoNanJing)) +- \[Minor\] Bump DataFusion [\#480](https://github.com/apache/arrow-ballista/pull/480) ([Dandandan](https://github.com/Dandandan)) +- Remove benchmark results from README [\#478](https://github.com/apache/arrow-ballista/pull/478) ([andygrove](https://github.com/andygrove)) +- Update `flightsql.md` to provide correct instruction [\#476](https://github.com/apache/arrow-ballista/pull/476) ([iajoiner](https://github.com/iajoiner)) +- Add support for Tableau [\#475](https://github.com/apache/arrow-ballista/pull/475) ([avantgardnerio](https://github.com/avantgardnerio)) +- Add SchedulerConfig for the scheduler configurations, like event\_loop\_buffer\_size, finished\_job\_data\_clean\_up\_interval\_seconds, finished\_job\_state\_clean\_up\_interval\_seconds [\#472](https://github.com/apache/arrow-ballista/pull/472) ([yahoNanJing](https://github.com/yahoNanJing)) +- Bump DataFusion [\#471](https://github.com/apache/arrow-ballista/pull/471) ([Dandandan](https://github.com/Dandandan)) +- Add some validation for remove\_job\_data in the executor server [\#468](https://github.com/apache/arrow-ballista/pull/468) ([yahoNanJing](https://github.com/yahoNanJing)) +- Update documentation to reflect the release of the FlightSQL JDBC Driver [\#461](https://github.com/apache/arrow-ballista/pull/461) ([avantgardnerio](https://github.com/avantgardnerio)) +- Bump DataFusion version [\#453](https://github.com/apache/arrow-ballista/pull/453) ([andygrove](https://github.com/andygrove)) +- Add shuffle for SortPreservingMergeExec physical operator [\#452](https://github.com/apache/arrow-ballista/pull/452) ([yahoNanJing](https://github.com/yahoNanJing)) +- Replace Mutex\ by using DashMap [\#449](https://github.com/apache/arrow-ballista/pull/449) ([yahoNanJing](https://github.com/yahoNanJing)) +- Refine log level for trial info and periodically invoked places [\#447](https://github.com/apache/arrow-ballista/pull/447) ([yahoNanJing](https://github.com/yahoNanJing)) +- MINOR: Add `set -e` to scripts, fix a typo [\#444](https://github.com/apache/arrow-ballista/pull/444) ([andygrove](https://github.com/andygrove)) +- Add optional flag which advertises host for Arrow Flight SQL \#418 [\#442](https://github.com/apache/arrow-ballista/pull/442) ([DaltonModlin](https://github.com/DaltonModlin)) +- Reorder joins after resolving stage inputs [\#441](https://github.com/apache/arrow-ballista/pull/441) ([Dandandan](https://github.com/Dandandan)) +- Add a feature for hdfs3 [\#439](https://github.com/apache/arrow-ballista/pull/439) ([yahoNanJing](https://github.com/yahoNanJing)) +- Add Spark benchmarks [\#438](https://github.com/apache/arrow-ballista/pull/438) ([andygrove](https://github.com/andygrove)) +- scheduler now verifies that `file://` ListingTable URLs are accessible [\#414](https://github.com/apache/arrow-ballista/pull/414) ([andygrove](https://github.com/andygrove)) + + ## [0.9.0](https://github.com/apache/arrow-ballista/tree/0.9.0) (2022-10-22) [Full Changelog](https://github.com/apache/arrow-ballista/compare/0.8.0...0.9.0) diff --git a/ballista/client/Cargo.toml b/ballista/client/Cargo.toml index fd358a8f3..48beaa98e 100644 --- a/ballista/client/Cargo.toml +++ b/ballista/client/Cargo.toml @@ -19,28 +19,29 @@ name = "ballista" description = "Ballista Distributed Compute" license = "Apache-2.0" -version = "0.9.0" +version = "0.10.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" readme = "README.md" authors = ["Apache Arrow "] edition = "2021" -rust-version = "1.59" +rust-version = "1.63" [dependencies] -ballista-core = { path = "../core", version = "0.9.0" } -ballista-executor = { path = "../executor", version = "0.9.0", optional = true } -ballista-scheduler = { path = "../scheduler", version = "0.9.0", optional = true } -datafusion = "14.0.0" -datafusion-proto = "14.0.0" +ballista-core = { path = "../core", version = "0.10.0" } +ballista-executor = { path = "../executor", version = "0.10.0", optional = true } +ballista-scheduler = { path = "../scheduler", version = "0.10.0", optional = true } +datafusion = "15.0.0" +datafusion-proto = "15.0.0" futures = "0.3" log = "0.4" parking_lot = "0.12" -sqlparser = "0.26" +sqlparser = "0.27" tempfile = "3" tokio = "1.0" [features] +azure = ["ballista-core/azure"] default = [] hdfs = ["ballista-core/hdfs"] hdfs3 = ["ballista-core/hdfs3"] diff --git a/ballista/client/README.md b/ballista/client/README.md index 1c6a15ce0..fd3eaa7fc 100644 --- a/ballista/client/README.md +++ b/ballista/client/README.md @@ -84,8 +84,8 @@ To build a simple ballista example, add the following dependencies to your `Carg ```toml [dependencies] -ballista = "0.8" -datafusion = "12.0.0" +ballista = "0.10" +datafusion = "14.0.0" tokio = "1.0" ``` diff --git a/ballista/client/src/context.rs b/ballista/client/src/context.rs index 9c1b9e67d..51a6c3bc2 100644 --- a/ballista/client/src/context.rs +++ b/ballista/client/src/context.rs @@ -17,6 +17,7 @@ //! Distributed execution context. +use datafusion::arrow::datatypes::SchemaRef; use log::info; use parking_lot::Mutex; use sqlparser::ast::Statement; @@ -375,6 +376,16 @@ impl BallistaContext { .. }) => { let table_exists = ctx.table_exist(name.as_str())?; + let schema: SchemaRef = Arc::new(schema.as_ref().to_owned().into()); + let table_partition_cols = table_partition_cols + .iter() + .map(|col| { + schema + .field_with_name(col) + .map(|f| (f.name().to_owned(), f.data_type().to_owned())) + .map_err(DataFusionError::ArrowError) + }) + .collect::>>()?; match (if_not_exists, table_exists) { (_, false) => match file_type.to_lowercase().as_str() { @@ -383,9 +394,8 @@ impl BallistaContext { .has_header(*has_header) .delimiter(*delimiter as u8) .table_partition_cols(table_partition_cols.to_vec()); - let csv_schema = schema.as_ref().to_owned().into(); if !schema.fields().is_empty() { - options = options.schema(&csv_schema); + options = options.schema(&schema); } self.register_csv(name, location, options).await?; Ok(Arc::new(DataFrame::new(ctx.state.clone(), &plan))) @@ -395,7 +405,7 @@ impl BallistaContext { name, location, ParquetReadOptions::default() - .table_partition_cols(table_partition_cols.to_vec()), + .table_partition_cols(table_partition_cols), ) .await?; Ok(Arc::new(DataFrame::new(ctx.state.clone(), &plan))) @@ -405,7 +415,7 @@ impl BallistaContext { name, location, AvroReadOptions::default() - .table_partition_cols(table_partition_cols.to_vec()), + .table_partition_cols(table_partition_cols), ) .await?; Ok(Arc::new(DataFrame::new(ctx.state.clone(), &plan))) @@ -582,6 +592,7 @@ mod tests { table_partition_cols: x.table_partition_cols.clone(), collect_stat: x.collect_stat, target_partitions: x.target_partitions, + file_sort_order: None, }; let table_paths = listing_table diff --git a/ballista/core/Cargo.toml b/ballista/core/Cargo.toml index fa290d897..792bd5601 100644 --- a/ballista/core/Cargo.toml +++ b/ballista/core/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-core" description = "Ballista Distributed Compute" license = "Apache-2.0" -version = "0.9.0" +version = "0.10.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" readme = "README.md" @@ -27,10 +27,14 @@ authors = ["Apache Arrow "] edition = "2018" build = "build.rs" +# Exclude proto files so crates.io consumers don't need protoc +exclude = ["*.proto"] + [package.metadata.docs.rs] rustc-args = ["--cfg", "docsrs"] [features] +azure = ["object_store/azure"] # Used for testing ONLY: causes all values to hash to the same value (test for collisions) force_hash_collisions = ["datafusion/force_hash_collisions"] # Used to enable hdfs to be registered in the ObjectStoreRegistry by default @@ -42,22 +46,22 @@ simd = ["datafusion/simd"] [dependencies] ahash = { version = "0.8", default-features = false } -arrow-flight = { version = "26.0.0", features = ["flight-sql-experimental"] } +arrow-flight = { version = "28.0.0", features = ["flight-sql-experimental"] } async-trait = "0.1.41" chrono = { version = "0.4", default-features = false } clap = { version = "3", features = ["derive", "cargo"] } -datafusion = "14.0.0" +datafusion = "15.0.0" datafusion-objectstore-hdfs = { version = "0.1.1", default-features = false, optional = true } -datafusion-proto = "14.0.0" +datafusion-proto = "15.0.0" futures = "0.3" hashbrown = "0.13" +itertools = "0.10" lazy_static = "1.4.0" -itertools = "0.10" libloading = "0.7.3" log = "0.4" lru = "0.8.1" -object_store = "0.5.0" +object_store = "0.5.2" once_cell = "1.9.0" parking_lot = "0.12" @@ -66,7 +70,7 @@ prost = "0.11" prost-types = "0.11" rand = "0.8" serde = { version = "1", features = ["derive"] } -sqlparser = "0.25" +sqlparser = "0.27" sys-info = "0.9.0" tokio = "1.0" tokio-stream = { version = "0.1", features = ["net"] } diff --git a/ballista/core/build.rs b/ballista/core/build.rs index ab5d050d0..0fdbdc38f 100644 --- a/ballista/core/build.rs +++ b/ballista/core/build.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use std::path::Path; + fn main() -> Result<(), String> { use std::io::Write; @@ -23,14 +25,8 @@ fn main() -> Result<(), String> { // for use in docker build where file changes can be wonky println!("cargo:rerun-if-env-changed=FORCE_REBUILD"); - println!("cargo:rerun-if-changed=proto/ballista.proto"); let version = rustc_version::version().unwrap(); println!("cargo:rustc-env=RUSTC_VERSION={}", version); - println!("cargo:rerun-if-changed=proto/datafusion.proto"); - tonic_build::configure() - .extern_path(".datafusion", "::datafusion_proto::protobuf") - .compile(&["proto/ballista.proto"], &["proto"]) - .map_err(|e| format!("protobuf compilation failed: {}", e))?; // TODO: undo when resolved: https://github.com/intellij-rust/intellij-rust/issues/9402 #[cfg(feature = "docsrs")] @@ -38,14 +34,25 @@ fn main() -> Result<(), String> { #[cfg(not(feature = "docsrs"))] let path = "src/serde/generated/ballista.rs"; - let code = std::fs::read_to_string(out.join("ballista.protobuf.rs")).unwrap(); - let mut file = std::fs::OpenOptions::new() - .write(true) - .truncate(true) - .create(true) - .open(path) - .unwrap(); - file.write_all(code.as_str().as_ref()).unwrap(); + // We don't include the proto files in releases so that downstreams + // do not need to have PROTOC included + if Path::new("proto/datafusion.proto").exists() { + println!("cargo:rerun-if-changed=proto/datafusion.proto"); + println!("cargo:rerun-if-changed=proto/ballista.proto"); + tonic_build::configure() + .extern_path(".datafusion", "::datafusion_proto::protobuf") + .compile(&["proto/ballista.proto"], &["proto"]) + .map_err(|e| format!("protobuf compilation failed: {}", e))?; + let generated_source_path = out.join("ballista.protobuf.rs"); + let code = std::fs::read_to_string(generated_source_path).unwrap(); + let mut file = std::fs::OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(path) + .unwrap(); + file.write_all(code.as_str().as_ref()).unwrap(); + } Ok(()) } diff --git a/ballista/core/proto/ballista.proto b/ballista/core/proto/ballista.proto index ea1ad3c5f..507456878 100644 --- a/ballista/core/proto/ballista.proto +++ b/ballista/core/proto/ballista.proto @@ -267,6 +267,7 @@ message AvroScanExecNode { enum PartitionMode { COLLECT_LEFT = 0; PARTITIONED = 1; + AUTO = 2; } message HashJoinExecNode { diff --git a/ballista/core/proto/datafusion.proto b/ballista/core/proto/datafusion.proto index c8ea70de7..4152802db 100644 --- a/ballista/core/proto/datafusion.proto +++ b/ballista/core/proto/datafusion.proto @@ -109,6 +109,7 @@ message ListingTableScanNode { ParquetFormat parquet = 11; AvroFormat avro = 12; } + repeated datafusion.LogicalExprNode file_sort_order = 13; } message ViewTableScanNode { @@ -176,6 +177,7 @@ message CreateExternalTableNode { string delimiter = 8; string definition = 9; string file_compression_type = 10; + map options = 11; } message CreateCatalogSchemaNode { @@ -409,8 +411,10 @@ message AliasNode { } message BinaryExprNode { - LogicalExprNode l = 1; - LogicalExprNode r = 2; + // Represents the operands from the left inner most expression + // to the right outer most expression where each of them are chained + // with the operator 'op'. + repeated LogicalExprNode operands = 1; string op = 3; } @@ -739,6 +743,20 @@ message ScalarListValue{ repeated ScalarValue values = 2; } +message ScalarTime32Value { + oneof value { + int32 time32_second_value = 1; + int32 time32_millisecond_value = 2; + }; +} + +message ScalarTime64Value { + oneof value { + int64 time64_microsecond_value = 1; + int64 time64_nanosecond_value = 2; + }; +} + message ScalarTimestampValue { oneof value { int64 time_microsecond_value = 1; @@ -797,6 +815,7 @@ message ScalarValue{ double float64_value = 13; // Literal Date32 value always has a unit of day int32 date_32_value = 14; + ScalarTime32Value time32_value = 15; ScalarListValue list_value = 17; //WAS: ScalarType null_list_value = 18; @@ -808,7 +827,7 @@ message ScalarValue{ ScalarDictionaryValue dictionary_value = 27; bytes binary_value = 28; bytes large_binary_value = 29; - int64 time64_value = 30; + ScalarTime64Value time64_value = 30; IntervalMonthDayNanoValue interval_month_day_nano = 31; StructValue struct_value = 32; ScalarFixedSizeBinary fixed_size_binary_value = 34; diff --git a/ballista/core/src/execution_plans/shuffle_reader.rs b/ballista/core/src/execution_plans/shuffle_reader.rs index 5411978fc..97e24fd62 100644 --- a/ballista/core/src/execution_plans/shuffle_reader.rs +++ b/ballista/core/src/execution_plans/shuffle_reader.rs @@ -364,7 +364,7 @@ async fn fetch_partition_remote( // TODO for shuffle client connections, we should avoid creating new connections again and again. // And we should also avoid to keep alive too many connections for long time. let host = metadata.host.as_str(); - let port = metadata.port as u16; + let port = metadata.port; let mut ballista_client = BallistaClient::try_new(host, port) .await diff --git a/ballista/core/src/plugin/mod.rs b/ballista/core/src/plugin/mod.rs index 3579c5467..d18817136 100644 --- a/ballista/core/src/plugin/mod.rs +++ b/ballista/core/src/plugin/mod.rs @@ -50,7 +50,7 @@ impl PluginEnum { /// new a struct which impl the PluginRegistrar trait pub fn init_plugin_manager(&self) -> Box { match self { - PluginEnum::UDF => Box::new(UDFPluginManager::default()), + PluginEnum::UDF => Box::::default(), } } } diff --git a/ballista/core/src/serde/generated/.gitignore b/ballista/core/src/serde/generated/.gitignore deleted file mode 100644 index 42eb8bcd5..000000000 --- a/ballista/core/src/serde/generated/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -* - -!.gitignore -!mod.rs diff --git a/ballista/core/src/serde/generated/ballista.rs b/ballista/core/src/serde/generated/ballista.rs new file mode 100644 index 000000000..9a00a2ec7 --- /dev/null +++ b/ballista/core/src/serde/generated/ballista.rs @@ -0,0 +1,2867 @@ +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Statistics { + #[prost(int64, tag = "1")] + pub num_rows: i64, + #[prost(int64, tag = "2")] + pub total_byte_size: i64, + #[prost(message, repeated, tag = "3")] + pub column_stats: ::prost::alloc::vec::Vec, + #[prost(bool, tag = "4")] + pub is_exact: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FileRange { + #[prost(int64, tag = "1")] + pub start: i64, + #[prost(int64, tag = "2")] + pub end: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PartitionedFile { + #[prost(string, tag = "1")] + pub path: ::prost::alloc::string::String, + #[prost(uint64, tag = "2")] + pub size: u64, + #[prost(uint64, tag = "3")] + pub last_modified_ns: u64, + #[prost(message, repeated, tag = "4")] + pub partition_values: ::prost::alloc::vec::Vec< + ::datafusion_proto::protobuf::ScalarValue, + >, + #[prost(message, optional, tag = "5")] + pub range: ::core::option::Option, +} +/// PhysicalPlanNode is a nested type +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalPlanNode { + #[prost( + oneof = "physical_plan_node::PhysicalPlanType", + tags = "1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24" + )] + pub physical_plan_type: ::core::option::Option, +} +/// Nested message and enum types in `PhysicalPlanNode`. +pub mod physical_plan_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum PhysicalPlanType { + #[prost(message, tag = "1")] + ParquetScan(super::ParquetScanExecNode), + #[prost(message, tag = "2")] + CsvScan(super::CsvScanExecNode), + #[prost(message, tag = "3")] + Empty(super::EmptyExecNode), + #[prost(message, tag = "4")] + Projection(::prost::alloc::boxed::Box), + #[prost(message, tag = "6")] + GlobalLimit(::prost::alloc::boxed::Box), + #[prost(message, tag = "7")] + LocalLimit(::prost::alloc::boxed::Box), + #[prost(message, tag = "8")] + Aggregate(::prost::alloc::boxed::Box), + #[prost(message, tag = "9")] + HashJoin(::prost::alloc::boxed::Box), + #[prost(message, tag = "10")] + ShuffleReader(super::ShuffleReaderExecNode), + #[prost(message, tag = "11")] + Sort(::prost::alloc::boxed::Box), + #[prost(message, tag = "12")] + CoalesceBatches(::prost::alloc::boxed::Box), + #[prost(message, tag = "13")] + Filter(::prost::alloc::boxed::Box), + #[prost(message, tag = "14")] + Merge(::prost::alloc::boxed::Box), + #[prost(message, tag = "15")] + Unresolved(super::UnresolvedShuffleExecNode), + #[prost(message, tag = "16")] + Repartition(::prost::alloc::boxed::Box), + #[prost(message, tag = "17")] + Window(::prost::alloc::boxed::Box), + #[prost(message, tag = "18")] + ShuffleWriter(::prost::alloc::boxed::Box), + #[prost(message, tag = "19")] + CrossJoin(::prost::alloc::boxed::Box), + #[prost(message, tag = "20")] + AvroScan(super::AvroScanExecNode), + #[prost(message, tag = "21")] + Extension(super::PhysicalExtensionNode), + #[prost(message, tag = "22")] + Union(super::UnionExecNode), + #[prost(message, tag = "23")] + Explain(super::ExplainExecNode), + #[prost(message, tag = "24")] + SortPreservingMerge( + ::prost::alloc::boxed::Box, + ), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalExtensionNode { + #[prost(bytes = "vec", tag = "1")] + pub node: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "2")] + pub inputs: ::prost::alloc::vec::Vec, +} +/// physical expressions +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalExprNode { + #[prost( + oneof = "physical_expr_node::ExprType", + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17" + )] + pub expr_type: ::core::option::Option, +} +/// Nested message and enum types in `PhysicalExprNode`. +pub mod physical_expr_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ExprType { + /// column references + #[prost(message, tag = "1")] + Column(super::PhysicalColumn), + #[prost(message, tag = "2")] + Literal(::datafusion_proto::protobuf::ScalarValue), + /// binary expressions + #[prost(message, tag = "3")] + BinaryExpr(::prost::alloc::boxed::Box), + /// aggregate expressions + #[prost(message, tag = "4")] + AggregateExpr(super::PhysicalAggregateExprNode), + /// null checks + #[prost(message, tag = "5")] + IsNullExpr(::prost::alloc::boxed::Box), + #[prost(message, tag = "6")] + IsNotNullExpr(::prost::alloc::boxed::Box), + #[prost(message, tag = "7")] + NotExpr(::prost::alloc::boxed::Box), + #[prost(message, tag = "8")] + Case(::prost::alloc::boxed::Box), + #[prost(message, tag = "9")] + Cast(::prost::alloc::boxed::Box), + #[prost(message, tag = "10")] + Sort(::prost::alloc::boxed::Box), + #[prost(message, tag = "11")] + Negative(::prost::alloc::boxed::Box), + #[prost(message, tag = "12")] + InList(::prost::alloc::boxed::Box), + #[prost(message, tag = "13")] + ScalarFunction(super::PhysicalScalarFunctionNode), + #[prost(message, tag = "14")] + TryCast(::prost::alloc::boxed::Box), + /// window expressions + #[prost(message, tag = "15")] + WindowExpr(::prost::alloc::boxed::Box), + #[prost(message, tag = "16")] + ScalarUdf(super::PhysicalScalarUdfNode), + #[prost(message, tag = "17")] + DateTimeIntervalExpr( + ::prost::alloc::boxed::Box, + ), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalScalarUdfNode { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub args: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "4")] + pub return_type: ::core::option::Option<::datafusion_proto::protobuf::ArrowType>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalAggregateExprNode { + #[prost(enumeration = "::datafusion_proto::protobuf::AggregateFunction", tag = "1")] + pub aggr_function: i32, + #[prost(message, repeated, tag = "2")] + pub expr: ::prost::alloc::vec::Vec, + #[prost(bool, tag = "3")] + pub distinct: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalWindowExprNode { + #[prost(message, optional, boxed, tag = "4")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(oneof = "physical_window_expr_node::WindowFunction", tags = "1, 2")] + pub window_function: ::core::option::Option< + physical_window_expr_node::WindowFunction, + >, +} +/// Nested message and enum types in `PhysicalWindowExprNode`. +pub mod physical_window_expr_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum WindowFunction { + #[prost( + enumeration = "::datafusion_proto::protobuf::AggregateFunction", + tag = "1" + )] + AggrFunction(i32), + /// udaf = 3 + #[prost( + enumeration = "::datafusion_proto::protobuf::BuiltInWindowFunction", + tag = "2" + )] + BuiltInFunction(i32), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalIsNull { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalIsNotNull { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalNot { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalAliasNode { + #[prost(message, optional, tag = "1")] + pub expr: ::core::option::Option, + #[prost(string, tag = "2")] + pub alias: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalBinaryExprNode { + #[prost(message, optional, boxed, tag = "1")] + pub l: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag = "2")] + pub r: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag = "3")] + pub op: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalDateTimeIntervalExprNode { + #[prost(message, optional, boxed, tag = "1")] + pub l: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag = "2")] + pub r: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag = "3")] + pub op: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalSortExprNode { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag = "2")] + pub asc: bool, + #[prost(bool, tag = "3")] + pub nulls_first: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalWhenThen { + #[prost(message, optional, tag = "1")] + pub when_expr: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub then_expr: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalInListNode { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag = "2")] + pub list: ::prost::alloc::vec::Vec, + #[prost(bool, tag = "3")] + pub negated: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalCaseNode { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag = "2")] + pub when_then_expr: ::prost::alloc::vec::Vec, + #[prost(message, optional, boxed, tag = "3")] + pub else_expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalScalarFunctionNode { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(enumeration = "::datafusion_proto::protobuf::ScalarFunction", tag = "2")] + pub fun: i32, + #[prost(message, repeated, tag = "3")] + pub args: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "4")] + pub return_type: ::core::option::Option<::datafusion_proto::protobuf::ArrowType>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalTryCastNode { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "2")] + pub arrow_type: ::core::option::Option<::datafusion_proto::protobuf::ArrowType>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalCastNode { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "2")] + pub arrow_type: ::core::option::Option<::datafusion_proto::protobuf::ArrowType>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalNegativeNode { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UnresolvedShuffleExecNode { + #[prost(uint32, tag = "1")] + pub stage_id: u32, + #[prost(message, optional, tag = "2")] + pub schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, + #[prost(uint32, tag = "3")] + pub input_partition_count: u32, + #[prost(uint32, tag = "4")] + pub output_partition_count: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FilterExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "2")] + pub expr: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FileGroup { + #[prost(message, repeated, tag = "1")] + pub files: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScanLimit { + /// wrap into a message to make it optional + #[prost(uint32, tag = "1")] + pub limit: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FileScanExecConf { + #[prost(message, repeated, tag = "1")] + pub file_groups: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "2")] + pub schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, + #[prost(uint32, repeated, tag = "4")] + pub projection: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "5")] + pub limit: ::core::option::Option, + #[prost(message, optional, tag = "6")] + pub statistics: ::core::option::Option, + #[prost(string, repeated, tag = "7")] + pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(string, tag = "8")] + pub object_store_url: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ParquetScanExecNode { + #[prost(message, optional, tag = "1")] + pub base_conf: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub pruning_predicate: ::core::option::Option< + ::datafusion_proto::protobuf::LogicalExprNode, + >, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CsvScanExecNode { + #[prost(message, optional, tag = "1")] + pub base_conf: ::core::option::Option, + #[prost(bool, tag = "2")] + pub has_header: bool, + #[prost(string, tag = "3")] + pub delimiter: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AvroScanExecNode { + #[prost(message, optional, tag = "1")] + pub base_conf: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct HashJoinExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub left: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag = "2")] + pub right: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag = "3")] + pub on: ::prost::alloc::vec::Vec, + #[prost(enumeration = "::datafusion_proto::protobuf::JoinType", tag = "4")] + pub join_type: i32, + #[prost(enumeration = "PartitionMode", tag = "6")] + pub partition_mode: i32, + #[prost(bool, tag = "7")] + pub null_equals_null: bool, + #[prost(message, optional, tag = "8")] + pub filter: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UnionExecNode { + #[prost(message, repeated, tag = "1")] + pub inputs: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExplainExecNode { + #[prost(message, optional, tag = "1")] + pub schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, + #[prost(message, repeated, tag = "2")] + pub stringified_plans: ::prost::alloc::vec::Vec< + ::datafusion_proto::protobuf::StringifiedPlan, + >, + #[prost(bool, tag = "3")] + pub verbose: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CrossJoinExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub left: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag = "2")] + pub right: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalColumn { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(uint32, tag = "2")] + pub index: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct JoinOn { + #[prost(message, optional, tag = "1")] + pub left: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub right: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct EmptyExecNode { + #[prost(bool, tag = "1")] + pub produce_one_row: bool, + #[prost(message, optional, tag = "2")] + pub schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ProjectionExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag = "2")] + pub expr: ::prost::alloc::vec::Vec, + #[prost(string, repeated, tag = "3")] + pub expr_name: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowAggExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag = "2")] + pub window_expr: ::prost::alloc::vec::Vec, + #[prost(string, repeated, tag = "3")] + pub window_expr_name: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(message, optional, tag = "4")] + pub input_schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AggregateExecNode { + #[prost(message, repeated, tag = "1")] + pub group_expr: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "2")] + pub aggr_expr: ::prost::alloc::vec::Vec, + #[prost(enumeration = "AggregateMode", tag = "3")] + pub mode: i32, + #[prost(message, optional, boxed, tag = "4")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, repeated, tag = "5")] + pub group_expr_name: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(string, repeated, tag = "6")] + pub aggr_expr_name: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// we need the input schema to the partial aggregate to pass to the final aggregate + #[prost(message, optional, tag = "7")] + pub input_schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, + #[prost(message, repeated, tag = "8")] + pub null_expr: ::prost::alloc::vec::Vec, + #[prost(bool, repeated, tag = "9")] + pub groups: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ShuffleWriterExecNode { + /// TODO it seems redundant to provide job and stage id here since we also have them + /// in the TaskDefinition that wraps this plan + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "2")] + pub stage_id: u32, + #[prost(message, optional, boxed, tag = "3")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "4")] + pub output_partitioning: ::core::option::Option, + #[prost(oneof = "shuffle_writer_exec_node::OptionalLimit", tags = "6")] + pub optional_limit: ::core::option::Option, +} +/// Nested message and enum types in `ShuffleWriterExecNode`. +pub mod shuffle_writer_exec_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum OptionalLimit { + #[prost(uint64, tag = "6")] + Limit(u64), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ShuffleReaderExecNode { + #[prost(message, repeated, tag = "1")] + pub partition: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "2")] + pub schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ShuffleReaderPartition { + /// each partition of a shuffle read can read data from multiple locations + #[prost(message, repeated, tag = "1")] + pub location: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GlobalLimitExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// The number of rows to skip before fetch + #[prost(uint32, tag = "2")] + pub skip: u32, + /// Maximum number of rows to fetch; negative means no limit + #[prost(int64, tag = "3")] + pub fetch: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LocalLimitExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(uint32, tag = "2")] + pub fetch: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SortExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag = "2")] + pub expr: ::prost::alloc::vec::Vec, + /// Maximum number of highest/lowest rows to fetch; negative means no limit + #[prost(int64, tag = "3")] + pub fetch: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SortPreservingMergeExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag = "2")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CoalesceBatchesExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(uint32, tag = "2")] + pub target_batch_size: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CoalescePartitionsExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PhysicalHashRepartition { + #[prost(message, repeated, tag = "1")] + pub hash_expr: ::prost::alloc::vec::Vec, + #[prost(uint64, tag = "2")] + pub partition_count: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RepartitionExecNode { + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(oneof = "repartition_exec_node::PartitionMethod", tags = "2, 3, 4")] + pub partition_method: ::core::option::Option, +} +/// Nested message and enum types in `RepartitionExecNode`. +pub mod repartition_exec_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum PartitionMethod { + #[prost(uint64, tag = "2")] + RoundRobin(u64), + #[prost(message, tag = "3")] + Hash(super::PhysicalHashRepartition), + #[prost(uint64, tag = "4")] + Unknown(u64), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct JoinFilter { + #[prost(message, optional, tag = "1")] + pub expression: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub column_indices: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "3")] + pub schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ColumnIndex { + #[prost(uint32, tag = "1")] + pub index: u32, + #[prost(enumeration = "JoinSide", tag = "2")] + pub side: i32, +} +/// ///////////////////////////////////////////////////////////////////////////////////////////////// +/// Ballista Scheduling +/// ///////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutionGraph { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub session_id: ::prost::alloc::string::String, + #[prost(message, optional, tag = "3")] + pub status: ::core::option::Option, + #[prost(message, repeated, tag = "4")] + pub stages: ::prost::alloc::vec::Vec, + #[prost(uint64, tag = "5")] + pub output_partitions: u64, + #[prost(message, repeated, tag = "6")] + pub output_locations: ::prost::alloc::vec::Vec, + #[prost(string, tag = "7")] + pub scheduler_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "8")] + pub task_id_gen: u32, + #[prost(message, repeated, tag = "9")] + pub failed_attempts: ::prost::alloc::vec::Vec, + #[prost(string, tag = "10")] + pub job_name: ::prost::alloc::string::String, + #[prost(uint64, tag = "11")] + pub start_time: u64, + #[prost(uint64, tag = "12")] + pub end_time: u64, + #[prost(uint64, tag = "13")] + pub queued_at: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StageAttempts { + #[prost(uint32, tag = "1")] + pub stage_id: u32, + #[prost(uint32, repeated, tag = "2")] + pub stage_attempt_num: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutionGraphStage { + #[prost(oneof = "execution_graph_stage::StageType", tags = "1, 2, 3, 4")] + pub stage_type: ::core::option::Option, +} +/// Nested message and enum types in `ExecutionGraphStage`. +pub mod execution_graph_stage { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum StageType { + #[prost(message, tag = "1")] + UnresolvedStage(super::UnResolvedStage), + #[prost(message, tag = "2")] + ResolvedStage(super::ResolvedStage), + #[prost(message, tag = "3")] + SuccessfulStage(super::SuccessfulStage), + #[prost(message, tag = "4")] + FailedStage(super::FailedStage), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UnResolvedStage { + #[prost(uint32, tag = "1")] + pub stage_id: u32, + #[prost(message, optional, tag = "2")] + pub output_partitioning: ::core::option::Option, + #[prost(uint32, repeated, tag = "3")] + pub output_links: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "4")] + pub inputs: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "5")] + pub plan: ::prost::alloc::vec::Vec, + #[prost(uint32, tag = "6")] + pub stage_attempt_num: u32, + #[prost(string, repeated, tag = "7")] + pub last_attempt_failure_reasons: ::prost::alloc::vec::Vec< + ::prost::alloc::string::String, + >, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ResolvedStage { + #[prost(uint32, tag = "1")] + pub stage_id: u32, + #[prost(uint32, tag = "2")] + pub partitions: u32, + #[prost(message, optional, tag = "3")] + pub output_partitioning: ::core::option::Option, + #[prost(uint32, repeated, tag = "4")] + pub output_links: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "5")] + pub inputs: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "6")] + pub plan: ::prost::alloc::vec::Vec, + #[prost(uint32, tag = "7")] + pub stage_attempt_num: u32, + #[prost(string, repeated, tag = "8")] + pub last_attempt_failure_reasons: ::prost::alloc::vec::Vec< + ::prost::alloc::string::String, + >, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SuccessfulStage { + #[prost(uint32, tag = "1")] + pub stage_id: u32, + #[prost(uint32, tag = "2")] + pub partitions: u32, + #[prost(message, optional, tag = "3")] + pub output_partitioning: ::core::option::Option, + #[prost(uint32, repeated, tag = "4")] + pub output_links: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "5")] + pub inputs: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "6")] + pub plan: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "7")] + pub task_infos: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "8")] + pub stage_metrics: ::prost::alloc::vec::Vec, + #[prost(uint32, tag = "9")] + pub stage_attempt_num: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FailedStage { + #[prost(uint32, tag = "1")] + pub stage_id: u32, + #[prost(uint32, tag = "2")] + pub partitions: u32, + #[prost(message, optional, tag = "3")] + pub output_partitioning: ::core::option::Option, + #[prost(uint32, repeated, tag = "4")] + pub output_links: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "5")] + pub plan: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "6")] + pub task_infos: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "7")] + pub stage_metrics: ::prost::alloc::vec::Vec, + #[prost(string, tag = "8")] + pub error_message: ::prost::alloc::string::String, + #[prost(uint32, tag = "9")] + pub stage_attempt_num: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TaskInfo { + #[prost(uint32, tag = "1")] + pub task_id: u32, + #[prost(uint32, tag = "2")] + pub partition_id: u32, + /// Scheduler schedule time + #[prost(uint64, tag = "3")] + pub scheduled_time: u64, + /// Scheduler launch time + #[prost(uint64, tag = "4")] + pub launch_time: u64, + /// The time the Executor start to run the task + #[prost(uint64, tag = "5")] + pub start_exec_time: u64, + /// The time the Executor finish the task + #[prost(uint64, tag = "6")] + pub end_exec_time: u64, + /// Scheduler side finish time + #[prost(uint64, tag = "7")] + pub finish_time: u64, + #[prost(oneof = "task_info::Status", tags = "8, 9, 10")] + pub status: ::core::option::Option, +} +/// Nested message and enum types in `TaskInfo`. +pub mod task_info { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Status { + #[prost(message, tag = "8")] + Running(super::RunningTask), + #[prost(message, tag = "9")] + Failed(super::FailedTask), + #[prost(message, tag = "10")] + Successful(super::SuccessfulTask), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GraphStageInput { + #[prost(uint32, tag = "1")] + pub stage_id: u32, + #[prost(message, repeated, tag = "2")] + pub partition_locations: ::prost::alloc::vec::Vec, + #[prost(bool, tag = "3")] + pub complete: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TaskInputPartitions { + #[prost(uint32, tag = "1")] + pub partition: u32, + #[prost(message, repeated, tag = "2")] + pub partition_location: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct KeyValuePair { + #[prost(string, tag = "1")] + pub key: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub value: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Action { + /// configuration settings + #[prost(message, repeated, tag = "100")] + pub settings: ::prost::alloc::vec::Vec, + #[prost(oneof = "action::ActionType", tags = "3")] + pub action_type: ::core::option::Option, +} +/// Nested message and enum types in `Action`. +pub mod action { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ActionType { + /// Fetch a partition from an executor + #[prost(message, tag = "3")] + FetchPartition(super::FetchPartition), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutePartition { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "2")] + pub stage_id: u32, + #[prost(uint32, repeated, tag = "3")] + pub partition_id: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "4")] + pub plan: ::core::option::Option, + /// The task could need to read partitions from other executors + #[prost(message, repeated, tag = "5")] + pub partition_location: ::prost::alloc::vec::Vec, + /// Output partition for shuffle writer + #[prost(message, optional, tag = "6")] + pub output_partitioning: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FetchPartition { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "2")] + pub stage_id: u32, + #[prost(uint32, tag = "3")] + pub partition_id: u32, + #[prost(string, tag = "4")] + pub path: ::prost::alloc::string::String, + #[prost(string, tag = "5")] + pub host: ::prost::alloc::string::String, + #[prost(uint32, tag = "6")] + pub port: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PartitionLocation { + /// partition_id of the map stage who produces the shuffle. + #[prost(uint32, tag = "1")] + pub map_partition_id: u32, + /// partition_id of the shuffle, a composition of(job_id + map_stage_id + partition_id). + #[prost(message, optional, tag = "2")] + pub partition_id: ::core::option::Option, + #[prost(message, optional, tag = "3")] + pub executor_meta: ::core::option::Option, + #[prost(message, optional, tag = "4")] + pub partition_stats: ::core::option::Option, + #[prost(string, tag = "5")] + pub path: ::prost::alloc::string::String, +} +/// Unique identifier for a materialized partition of data +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PartitionId { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "2")] + pub stage_id: u32, + #[prost(uint32, tag = "4")] + pub partition_id: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TaskId { + #[prost(uint32, tag = "1")] + pub task_id: u32, + #[prost(uint32, tag = "2")] + pub task_attempt_num: u32, + #[prost(uint32, tag = "3")] + pub partition_id: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PartitionStats { + #[prost(int64, tag = "1")] + pub num_rows: i64, + #[prost(int64, tag = "2")] + pub num_batches: i64, + #[prost(int64, tag = "3")] + pub num_bytes: i64, + #[prost(message, repeated, tag = "4")] + pub column_stats: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ColumnStats { + #[prost(message, optional, tag = "1")] + pub min_value: ::core::option::Option<::datafusion_proto::protobuf::ScalarValue>, + #[prost(message, optional, tag = "2")] + pub max_value: ::core::option::Option<::datafusion_proto::protobuf::ScalarValue>, + #[prost(uint32, tag = "3")] + pub null_count: u32, + #[prost(uint32, tag = "4")] + pub distinct_count: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct OperatorMetricsSet { + #[prost(message, repeated, tag = "1")] + pub metrics: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NamedCount { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(uint64, tag = "2")] + pub value: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NamedGauge { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(uint64, tag = "2")] + pub value: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NamedTime { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(uint64, tag = "2")] + pub value: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct OperatorMetric { + #[prost(oneof = "operator_metric::Metric", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10")] + pub metric: ::core::option::Option, +} +/// Nested message and enum types in `OperatorMetric`. +pub mod operator_metric { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Metric { + #[prost(uint64, tag = "1")] + OutputRows(u64), + #[prost(uint64, tag = "2")] + ElapseTime(u64), + #[prost(uint64, tag = "3")] + SpillCount(u64), + #[prost(uint64, tag = "4")] + SpilledBytes(u64), + #[prost(uint64, tag = "5")] + CurrentMemoryUsage(u64), + #[prost(message, tag = "6")] + Count(super::NamedCount), + #[prost(message, tag = "7")] + Gauge(super::NamedGauge), + #[prost(message, tag = "8")] + Time(super::NamedTime), + #[prost(int64, tag = "9")] + StartTimestamp(i64), + #[prost(int64, tag = "10")] + EndTimestamp(i64), + } +} +/// Used by scheduler +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorMetadata { + #[prost(string, tag = "1")] + pub id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub host: ::prost::alloc::string::String, + #[prost(uint32, tag = "3")] + pub port: u32, + #[prost(uint32, tag = "4")] + pub grpc_port: u32, + #[prost(message, optional, tag = "5")] + pub specification: ::core::option::Option, +} +/// Used by grpc +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorRegistration { + #[prost(string, tag = "1")] + pub id: ::prost::alloc::string::String, + #[prost(uint32, tag = "3")] + pub port: u32, + #[prost(uint32, tag = "4")] + pub grpc_port: u32, + #[prost(message, optional, tag = "5")] + pub specification: ::core::option::Option, + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[prost(oneof = "executor_registration::OptionalHost", tags = "2")] + pub optional_host: ::core::option::Option, +} +/// Nested message and enum types in `ExecutorRegistration`. +pub mod executor_registration { + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum OptionalHost { + #[prost(string, tag = "2")] + Host(::prost::alloc::string::String), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorHeartbeat { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + /// Unix epoch-based timestamp in seconds + #[prost(uint64, tag = "2")] + pub timestamp: u64, + #[prost(message, repeated, tag = "3")] + pub metrics: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "4")] + pub status: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorMetric { + /// TODO add more metrics + #[prost(oneof = "executor_metric::Metric", tags = "1")] + pub metric: ::core::option::Option, +} +/// Nested message and enum types in `ExecutorMetric`. +pub mod executor_metric { + /// TODO add more metrics + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Metric { + #[prost(uint64, tag = "1")] + AvailableMemory(u64), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorStatus { + #[prost(oneof = "executor_status::Status", tags = "1, 2, 3")] + pub status: ::core::option::Option, +} +/// Nested message and enum types in `ExecutorStatus`. +pub mod executor_status { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Status { + #[prost(string, tag = "1")] + Active(::prost::alloc::string::String), + #[prost(string, tag = "2")] + Dead(::prost::alloc::string::String), + #[prost(string, tag = "3")] + Unknown(::prost::alloc::string::String), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorSpecification { + #[prost(message, repeated, tag = "1")] + pub resources: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorResource { + /// TODO add more resources + #[prost(oneof = "executor_resource::Resource", tags = "1")] + pub resource: ::core::option::Option, +} +/// Nested message and enum types in `ExecutorResource`. +pub mod executor_resource { + /// TODO add more resources + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Resource { + #[prost(uint32, tag = "1")] + TaskSlots(u32), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorData { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub resources: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorResourcePair { + #[prost(message, optional, tag = "1")] + pub total: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub available: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RunningTask { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FailedTask { + #[prost(string, tag = "1")] + pub error: ::prost::alloc::string::String, + #[prost(bool, tag = "2")] + pub retryable: bool, + /// Whether this task failure should be counted to the maximum number of times the task is allowed to retry + #[prost(bool, tag = "3")] + pub count_to_failures: bool, + #[prost(oneof = "failed_task::FailedReason", tags = "4, 5, 6, 7, 8, 9")] + pub failed_reason: ::core::option::Option, +} +/// Nested message and enum types in `FailedTask`. +pub mod failed_task { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum FailedReason { + #[prost(message, tag = "4")] + ExecutionError(super::ExecutionError), + #[prost(message, tag = "5")] + FetchPartitionError(super::FetchPartitionError), + #[prost(message, tag = "6")] + IoError(super::IoError), + #[prost(message, tag = "7")] + ExecutorLost(super::ExecutorLost), + /// A successful task's result is lost due to executor lost + #[prost(message, tag = "8")] + ResultLost(super::ResultLost), + #[prost(message, tag = "9")] + TaskKilled(super::TaskKilled), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SuccessfulTask { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + /// TODO tasks are currently always shuffle writes but this will not always be the case + /// so we might want to think about some refactoring of the task definitions + #[prost(message, repeated, tag = "2")] + pub partitions: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutionError {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FetchPartitionError { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "2")] + pub map_stage_id: u32, + #[prost(uint32, tag = "3")] + pub map_partition_id: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IoError {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorLost {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ResultLost {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TaskKilled {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ShuffleWritePartition { + #[prost(uint64, tag = "1")] + pub partition_id: u64, + #[prost(string, tag = "2")] + pub path: ::prost::alloc::string::String, + #[prost(uint64, tag = "3")] + pub num_batches: u64, + #[prost(uint64, tag = "4")] + pub num_rows: u64, + #[prost(uint64, tag = "5")] + pub num_bytes: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TaskStatus { + #[prost(uint32, tag = "1")] + pub task_id: u32, + #[prost(string, tag = "2")] + pub job_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "3")] + pub stage_id: u32, + #[prost(uint32, tag = "4")] + pub stage_attempt_num: u32, + #[prost(uint32, tag = "5")] + pub partition_id: u32, + #[prost(uint64, tag = "6")] + pub launch_time: u64, + #[prost(uint64, tag = "7")] + pub start_exec_time: u64, + #[prost(uint64, tag = "8")] + pub end_exec_time: u64, + #[prost(message, repeated, tag = "12")] + pub metrics: ::prost::alloc::vec::Vec, + #[prost(oneof = "task_status::Status", tags = "9, 10, 11")] + pub status: ::core::option::Option, +} +/// Nested message and enum types in `TaskStatus`. +pub mod task_status { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Status { + #[prost(message, tag = "9")] + Running(super::RunningTask), + #[prost(message, tag = "10")] + Failed(super::FailedTask), + #[prost(message, tag = "11")] + Successful(super::SuccessfulTask), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PollWorkParams { + #[prost(message, optional, tag = "1")] + pub metadata: ::core::option::Option, + #[prost(uint32, tag = "2")] + pub num_free_slots: u32, + /// All tasks must be reported until they reach the failed or completed state + #[prost(message, repeated, tag = "3")] + pub task_status: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TaskDefinition { + #[prost(uint32, tag = "1")] + pub task_id: u32, + #[prost(uint32, tag = "2")] + pub task_attempt_num: u32, + #[prost(string, tag = "3")] + pub job_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "4")] + pub stage_id: u32, + #[prost(uint32, tag = "5")] + pub stage_attempt_num: u32, + #[prost(uint32, tag = "6")] + pub partition_id: u32, + #[prost(bytes = "vec", tag = "7")] + pub plan: ::prost::alloc::vec::Vec, + /// Output partition for shuffle writer + #[prost(message, optional, tag = "8")] + pub output_partitioning: ::core::option::Option, + #[prost(string, tag = "9")] + pub session_id: ::prost::alloc::string::String, + #[prost(uint64, tag = "10")] + pub launch_time: u64, + #[prost(message, repeated, tag = "11")] + pub props: ::prost::alloc::vec::Vec, +} +/// A set of tasks in the same stage +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct MultiTaskDefinition { + #[prost(message, repeated, tag = "1")] + pub task_ids: ::prost::alloc::vec::Vec, + #[prost(string, tag = "2")] + pub job_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "3")] + pub stage_id: u32, + #[prost(uint32, tag = "4")] + pub stage_attempt_num: u32, + #[prost(bytes = "vec", tag = "5")] + pub plan: ::prost::alloc::vec::Vec, + /// Output partition for shuffle writer + #[prost(message, optional, tag = "6")] + pub output_partitioning: ::core::option::Option, + #[prost(string, tag = "7")] + pub session_id: ::prost::alloc::string::String, + #[prost(uint64, tag = "8")] + pub launch_time: u64, + #[prost(message, repeated, tag = "9")] + pub props: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SessionSettings { + #[prost(message, repeated, tag = "1")] + pub configs: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct JobSessionConfig { + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub configs: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PollWorkResult { + #[prost(message, repeated, tag = "1")] + pub tasks: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RegisterExecutorParams { + #[prost(message, optional, tag = "1")] + pub metadata: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RegisterExecutorResult { + #[prost(bool, tag = "1")] + pub success: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct HeartBeatParams { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub metrics: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "3")] + pub status: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct HeartBeatResult { + /// TODO it's from Spark for BlockManager + #[prost(bool, tag = "1")] + pub reregister: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StopExecutorParams { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + /// stop reason + #[prost(string, tag = "2")] + pub reason: ::prost::alloc::string::String, + /// force to stop the executor immediately + #[prost(bool, tag = "3")] + pub force: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StopExecutorResult {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorStoppedParams { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + /// stop reason + #[prost(string, tag = "2")] + pub reason: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorStoppedResult {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UpdateTaskStatusParams { + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + /// All tasks must be reported until they reach the failed or completed state + #[prost(message, repeated, tag = "2")] + pub task_status: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UpdateTaskStatusResult { + #[prost(bool, tag = "1")] + pub success: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecuteQueryParams { + #[prost(message, repeated, tag = "4")] + pub settings: ::prost::alloc::vec::Vec, + #[prost(oneof = "execute_query_params::Query", tags = "1, 2")] + pub query: ::core::option::Option, + #[prost(oneof = "execute_query_params::OptionalSessionId", tags = "3")] + pub optional_session_id: ::core::option::Option< + execute_query_params::OptionalSessionId, + >, +} +/// Nested message and enum types in `ExecuteQueryParams`. +pub mod execute_query_params { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Query { + #[prost(bytes, tag = "1")] + LogicalPlan(::prost::alloc::vec::Vec), + #[prost(string, tag = "2")] + Sql(::prost::alloc::string::String), + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum OptionalSessionId { + #[prost(string, tag = "3")] + SessionId(::prost::alloc::string::String), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecuteSqlParams { + #[prost(string, tag = "1")] + pub sql: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecuteQueryResult { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub session_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetJobStatusParams { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SuccessfulJob { + #[prost(message, repeated, tag = "1")] + pub partition_location: ::prost::alloc::vec::Vec, + #[prost(uint64, tag = "2")] + pub queued_at: u64, + #[prost(uint64, tag = "3")] + pub completed_at: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct QueuedJob {} +/// TODO: add progress report +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RunningJob {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FailedJob { + #[prost(string, tag = "1")] + pub error: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct JobStatus { + #[prost(oneof = "job_status::Status", tags = "1, 2, 3, 4")] + pub status: ::core::option::Option, +} +/// Nested message and enum types in `JobStatus`. +pub mod job_status { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Status { + #[prost(message, tag = "1")] + Queued(super::QueuedJob), + #[prost(message, tag = "2")] + Running(super::RunningJob), + #[prost(message, tag = "3")] + Failed(super::FailedJob), + #[prost(message, tag = "4")] + Successful(super::SuccessfulJob), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetJobStatusResult { + #[prost(message, optional, tag = "1")] + pub status: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetFileMetadataParams { + #[prost(string, tag = "1")] + pub path: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub file_type: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetFileMetadataResult { + #[prost(message, optional, tag = "1")] + pub schema: ::core::option::Option<::datafusion_proto::protobuf::Schema>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FilePartitionMetadata { + #[prost(string, repeated, tag = "1")] + pub filename: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CancelJobParams { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CancelJobResult { + #[prost(bool, tag = "1")] + pub cancelled: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CleanJobDataParams { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CleanJobDataResult {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LaunchTaskParams { + /// Allow to launch a task set to an executor at once + #[prost(message, repeated, tag = "1")] + pub tasks: ::prost::alloc::vec::Vec, + #[prost(string, tag = "2")] + pub scheduler_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LaunchMultiTaskParams { + /// Allow to launch a task set to an executor at once + #[prost(message, repeated, tag = "1")] + pub multi_tasks: ::prost::alloc::vec::Vec, + #[prost(string, tag = "2")] + pub scheduler_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LaunchTaskResult { + /// TODO when part of the task set are scheduled successfully + #[prost(bool, tag = "1")] + pub success: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LaunchMultiTaskResult { + /// TODO when part of the task set are scheduled successfully + #[prost(bool, tag = "1")] + pub success: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CancelTasksParams { + #[prost(message, repeated, tag = "1")] + pub task_infos: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CancelTasksResult { + #[prost(bool, tag = "1")] + pub cancelled: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RemoveJobDataParams { + #[prost(string, tag = "1")] + pub job_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RemoveJobDataResult {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RunningTaskInfo { + #[prost(uint32, tag = "1")] + pub task_id: u32, + #[prost(string, tag = "2")] + pub job_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "3")] + pub stage_id: u32, + #[prost(uint32, tag = "4")] + pub partition_id: u32, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum PartitionMode { + CollectLeft = 0, + Partitioned = 1, + Auto = 2, +} +impl PartitionMode { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + PartitionMode::CollectLeft => "COLLECT_LEFT", + PartitionMode::Partitioned => "PARTITIONED", + PartitionMode::Auto => "AUTO", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum AggregateMode { + Partial = 0, + Final = 1, + FinalPartitioned = 2, +} +impl AggregateMode { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + AggregateMode::Partial => "PARTIAL", + AggregateMode::Final => "FINAL", + AggregateMode::FinalPartitioned => "FINAL_PARTITIONED", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum JoinSide { + LeftSide = 0, + RightSide = 1, +} +impl JoinSide { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + JoinSide::LeftSide => "LEFT_SIDE", + JoinSide::RightSide => "RIGHT_SIDE", + } + } +} +/// Generated client implementations. +pub mod scheduler_grpc_client { + #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct SchedulerGrpcClient { + inner: tonic::client::Grpc, + } + impl SchedulerGrpcClient { + /// Attempt to create a new client by connecting to a given endpoint. + pub async fn connect(dst: D) -> Result + where + D: std::convert::TryInto, + D::Error: Into, + { + let conn = tonic::transport::Endpoint::new(dst)?.connect().await?; + Ok(Self::new(conn)) + } + } + impl SchedulerGrpcClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + Send + 'static, + ::Error: Into + Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> SchedulerGrpcClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + Send + Sync, + { + SchedulerGrpcClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Executors must poll the scheduler for heartbeat and to receive tasks + pub async fn poll_work( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/PollWork", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn register_executor( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/RegisterExecutor", + ); + self.inner.unary(request.into_request(), path, codec).await + } + /// Push-based task scheduler will only leverage this interface + /// rather than the PollWork interface to report executor states + pub async fn heart_beat_from_executor( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/HeartBeatFromExecutor", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn update_task_status( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/UpdateTaskStatus", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn get_file_metadata( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/GetFileMetadata", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn execute_query( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/ExecuteQuery", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn get_job_status( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/GetJobStatus", + ); + self.inner.unary(request.into_request(), path, codec).await + } + /// Used by Executor to tell Scheduler it is stopped. + pub async fn executor_stopped( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/ExecutorStopped", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn cancel_job( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/CancelJob", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn clean_job_data( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.SchedulerGrpc/CleanJobData", + ); + self.inner.unary(request.into_request(), path, codec).await + } + } +} +/// Generated client implementations. +pub mod executor_grpc_client { + #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct ExecutorGrpcClient { + inner: tonic::client::Grpc, + } + impl ExecutorGrpcClient { + /// Attempt to create a new client by connecting to a given endpoint. + pub async fn connect(dst: D) -> Result + where + D: std::convert::TryInto, + D::Error: Into, + { + let conn = tonic::transport::Endpoint::new(dst)?.connect().await?; + Ok(Self::new(conn)) + } + } + impl ExecutorGrpcClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + Send + 'static, + ::Error: Into + Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> ExecutorGrpcClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + Send + Sync, + { + ExecutorGrpcClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + pub async fn launch_task( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.ExecutorGrpc/LaunchTask", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn launch_multi_task( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.ExecutorGrpc/LaunchMultiTask", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn stop_executor( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.ExecutorGrpc/StopExecutor", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn cancel_tasks( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.ExecutorGrpc/CancelTasks", + ); + self.inner.unary(request.into_request(), path, codec).await + } + pub async fn remove_job_data( + &mut self, + request: impl tonic::IntoRequest, + ) -> Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/ballista.protobuf.ExecutorGrpc/RemoveJobData", + ); + self.inner.unary(request.into_request(), path, codec).await + } + } +} +/// Generated server implementations. +pub mod scheduler_grpc_server { + #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)] + use tonic::codegen::*; + ///Generated trait containing gRPC methods that should be implemented for use with SchedulerGrpcServer. + #[async_trait] + pub trait SchedulerGrpc: Send + Sync + 'static { + /// Executors must poll the scheduler for heartbeat and to receive tasks + async fn poll_work( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn register_executor( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + /// Push-based task scheduler will only leverage this interface + /// rather than the PollWork interface to report executor states + async fn heart_beat_from_executor( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn update_task_status( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn get_file_metadata( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn execute_query( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn get_job_status( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + /// Used by Executor to tell Scheduler it is stopped. + async fn executor_stopped( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn cancel_job( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn clean_job_data( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + } + #[derive(Debug)] + pub struct SchedulerGrpcServer { + inner: _Inner, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + } + struct _Inner(Arc); + impl SchedulerGrpcServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + let inner = _Inner(inner); + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + } + impl tonic::codegen::Service> for SchedulerGrpcServer + where + T: SchedulerGrpc, + B: Body + Send + 'static, + B::Error: Into + Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + let inner = self.inner.clone(); + match req.uri().path() { + "/ballista.protobuf.SchedulerGrpc/PollWork" => { + #[allow(non_camel_case_types)] + struct PollWorkSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for PollWorkSvc { + type Response = super::PollWorkResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { (*inner).poll_work(request).await }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = PollWorkSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/RegisterExecutor" => { + #[allow(non_camel_case_types)] + struct RegisterExecutorSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for RegisterExecutorSvc { + type Response = super::RegisterExecutorResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).register_executor(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = RegisterExecutorSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/HeartBeatFromExecutor" => { + #[allow(non_camel_case_types)] + struct HeartBeatFromExecutorSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for HeartBeatFromExecutorSvc { + type Response = super::HeartBeatResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).heart_beat_from_executor(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = HeartBeatFromExecutorSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/UpdateTaskStatus" => { + #[allow(non_camel_case_types)] + struct UpdateTaskStatusSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for UpdateTaskStatusSvc { + type Response = super::UpdateTaskStatusResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).update_task_status(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = UpdateTaskStatusSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/GetFileMetadata" => { + #[allow(non_camel_case_types)] + struct GetFileMetadataSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for GetFileMetadataSvc { + type Response = super::GetFileMetadataResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).get_file_metadata(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = GetFileMetadataSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/ExecuteQuery" => { + #[allow(non_camel_case_types)] + struct ExecuteQuerySvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for ExecuteQuerySvc { + type Response = super::ExecuteQueryResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).execute_query(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = ExecuteQuerySvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/GetJobStatus" => { + #[allow(non_camel_case_types)] + struct GetJobStatusSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for GetJobStatusSvc { + type Response = super::GetJobStatusResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).get_job_status(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = GetJobStatusSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/ExecutorStopped" => { + #[allow(non_camel_case_types)] + struct ExecutorStoppedSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for ExecutorStoppedSvc { + type Response = super::ExecutorStoppedResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).executor_stopped(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = ExecutorStoppedSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/CancelJob" => { + #[allow(non_camel_case_types)] + struct CancelJobSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for CancelJobSvc { + type Response = super::CancelJobResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { (*inner).cancel_job(request).await }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = CancelJobSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.SchedulerGrpc/CleanJobData" => { + #[allow(non_camel_case_types)] + struct CleanJobDataSvc(pub Arc); + impl< + T: SchedulerGrpc, + > tonic::server::UnaryService + for CleanJobDataSvc { + type Response = super::CleanJobDataResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).clean_job_data(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = CleanJobDataSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + Ok( + http::Response::builder() + .status(200) + .header("grpc-status", "12") + .header("content-type", "application/grpc") + .body(empty_body()) + .unwrap(), + ) + }) + } + } + } + } + impl Clone for SchedulerGrpcServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + } + } + } + impl Clone for _Inner { + fn clone(&self) -> Self { + Self(self.0.clone()) + } + } + impl std::fmt::Debug for _Inner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.0) + } + } + impl tonic::server::NamedService for SchedulerGrpcServer { + const NAME: &'static str = "ballista.protobuf.SchedulerGrpc"; + } +} +/// Generated server implementations. +pub mod executor_grpc_server { + #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)] + use tonic::codegen::*; + ///Generated trait containing gRPC methods that should be implemented for use with ExecutorGrpcServer. + #[async_trait] + pub trait ExecutorGrpc: Send + Sync + 'static { + async fn launch_task( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn launch_multi_task( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn stop_executor( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn cancel_tasks( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + async fn remove_job_data( + &self, + request: tonic::Request, + ) -> Result, tonic::Status>; + } + #[derive(Debug)] + pub struct ExecutorGrpcServer { + inner: _Inner, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + } + struct _Inner(Arc); + impl ExecutorGrpcServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + let inner = _Inner(inner); + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + } + impl tonic::codegen::Service> for ExecutorGrpcServer + where + T: ExecutorGrpc, + B: Body + Send + 'static, + B::Error: Into + Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + let inner = self.inner.clone(); + match req.uri().path() { + "/ballista.protobuf.ExecutorGrpc/LaunchTask" => { + #[allow(non_camel_case_types)] + struct LaunchTaskSvc(pub Arc); + impl< + T: ExecutorGrpc, + > tonic::server::UnaryService + for LaunchTaskSvc { + type Response = super::LaunchTaskResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { (*inner).launch_task(request).await }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = LaunchTaskSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.ExecutorGrpc/LaunchMultiTask" => { + #[allow(non_camel_case_types)] + struct LaunchMultiTaskSvc(pub Arc); + impl< + T: ExecutorGrpc, + > tonic::server::UnaryService + for LaunchMultiTaskSvc { + type Response = super::LaunchMultiTaskResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).launch_multi_task(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = LaunchMultiTaskSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.ExecutorGrpc/StopExecutor" => { + #[allow(non_camel_case_types)] + struct StopExecutorSvc(pub Arc); + impl< + T: ExecutorGrpc, + > tonic::server::UnaryService + for StopExecutorSvc { + type Response = super::StopExecutorResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).stop_executor(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = StopExecutorSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.ExecutorGrpc/CancelTasks" => { + #[allow(non_camel_case_types)] + struct CancelTasksSvc(pub Arc); + impl< + T: ExecutorGrpc, + > tonic::server::UnaryService + for CancelTasksSvc { + type Response = super::CancelTasksResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).cancel_tasks(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = CancelTasksSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/ballista.protobuf.ExecutorGrpc/RemoveJobData" => { + #[allow(non_camel_case_types)] + struct RemoveJobDataSvc(pub Arc); + impl< + T: ExecutorGrpc, + > tonic::server::UnaryService + for RemoveJobDataSvc { + type Response = super::RemoveJobDataResult; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { + (*inner).remove_job_data(request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = RemoveJobDataSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + Ok( + http::Response::builder() + .status(200) + .header("grpc-status", "12") + .header("content-type", "application/grpc") + .body(empty_body()) + .unwrap(), + ) + }) + } + } + } + } + impl Clone for ExecutorGrpcServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + } + } + } + impl Clone for _Inner { + fn clone(&self) -> Self { + Self(self.0.clone()) + } + } + impl std::fmt::Debug for _Inner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.0) + } + } + impl tonic::server::NamedService for ExecutorGrpcServer { + const NAME: &'static str = "ballista.protobuf.ExecutorGrpc"; + } +} diff --git a/ballista/core/src/serde/mod.rs b/ballista/core/src/serde/mod.rs index 040438228..b1b2ab386 100644 --- a/ballista/core/src/serde/mod.rs +++ b/ballista/core/src/serde/mod.rs @@ -392,8 +392,8 @@ mod tests { None } - fn required_child_distribution(&self) -> Distribution { - Distribution::SinglePartition + fn required_input_distribution(&self) -> Vec { + vec![Distribution::SinglePartition] } fn children(&self) -> Vec> { diff --git a/ballista/core/src/serde/physical_plan/from_proto.rs b/ballista/core/src/serde/physical_plan/from_proto.rs index 43c341f03..41eaa226c 100644 --- a/ballista/core/src/serde/physical_plan/from_proto.rs +++ b/ballista/core/src/serde/physical_plan/from_proto.rs @@ -411,6 +411,8 @@ impl TryInto for &protobuf::FileScanExecConf { projection, limit: self.limit.as_ref().map(|sl| sl.limit as usize), table_partition_cols: vec![], + // TODO add ordering info to the ballista proto file + output_ordering: None, }) } } diff --git a/ballista/core/src/serde/physical_plan/mod.rs b/ballista/core/src/serde/physical_plan/mod.rs index d07104e84..65eeadaa9 100644 --- a/ballista/core/src/serde/physical_plan/mod.rs +++ b/ballista/core/src/serde/physical_plan/mod.rs @@ -331,6 +331,8 @@ impl AsExecutionPlan for PhysicalPlanNode { physical_window_expr, input, Arc::new((&input_schema).try_into()?), + vec![], + None, )?)) } PhysicalPlanType::Aggregate(hash_agg) => { @@ -528,6 +530,7 @@ impl AsExecutionPlan for PhysicalPlanNode { let partition_mode = match partition_mode { protobuf::PartitionMode::CollectLeft => PartitionMode::CollectLeft, protobuf::PartitionMode::Partitioned => PartitionMode::Partitioned, + protobuf::PartitionMode::Auto => PartitionMode::Auto, }; Ok(Arc::new(HashJoinExec::try_new( left, @@ -800,7 +803,7 @@ impl AsExecutionPlan for PhysicalPlanNode { input: Some(Box::new(input)), skip: limit.skip() as u32, fetch: match limit.fetch() { - Some(n) => *n as i64, + Some(n) => n as i64, _ => -1, // no limit }, }, @@ -875,6 +878,7 @@ impl AsExecutionPlan for PhysicalPlanNode { let partition_mode = match exec.partition_mode() { PartitionMode::CollectLeft => protobuf::PartitionMode::CollectLeft, PartitionMode::Partitioned => protobuf::PartitionMode::Partitioned, + PartitionMode::Auto => protobuf::PartitionMode::Auto, }; Ok(protobuf::PhysicalPlanNode { @@ -1285,6 +1289,7 @@ fn decode_scan_config( projection, limit: proto.limit.as_ref().map(|sl| sl.limit as usize), table_partition_cols: vec![], + output_ordering: None, }) } @@ -1612,6 +1617,7 @@ mod roundtrip_tests { projection: None, limit: None, table_partition_cols: vec![], + output_ordering: None, }; let predicate = datafusion::prelude::col("col").eq(datafusion::prelude::lit("1")); diff --git a/ballista/core/src/serde/physical_plan/to_proto.rs b/ballista/core/src/serde/physical_plan/to_proto.rs index fbf8e5e72..c303ba453 100644 --- a/ballista/core/src/serde/physical_plan/to_proto.rs +++ b/ballista/core/src/serde/physical_plan/to_proto.rs @@ -457,7 +457,11 @@ impl TryFrom<&FileScanConfig> for protobuf::FileScanExecConf { .map(|n| *n as u32) .collect(), schema: Some(conf.file_schema.as_ref().try_into()?), - table_partition_cols: conf.table_partition_cols.to_vec(), + table_partition_cols: conf + .table_partition_cols + .iter() + .map(|col| col.0.to_owned()) + .collect(), object_store_url: conf.object_store_url.to_string(), }) } diff --git a/ballista/core/src/utils.rs b/ballista/core/src/utils.rs index 66f2f75e1..bc2fa8020 100644 --- a/ballista/core/src/utils.rs +++ b/ballista/core/src/utils.rs @@ -53,6 +53,8 @@ use log::error; use log::info; #[cfg(feature = "s3")] use object_store::aws::AmazonS3Builder; +#[cfg(feature = "azure")] +use object_store::azure::MicrosoftAzureBuilder; use object_store::ObjectStore; use std::io::{BufWriter, Write}; use std::marker::PhantomData; @@ -100,13 +102,36 @@ impl ObjectStoreProvider for FeatureBasedObjectStoreProvider { #[cfg(feature = "s3")] { - if url.to_string().starts_with("s3://") { + if url.as_str().starts_with("s3://") { if let Some(bucket_name) = url.host_str() { let store = AmazonS3Builder::from_env() .with_bucket_name(bucket_name) .build()?; return Ok(Arc::new(store)); } + // Support Alibaba Cloud OSS + // Use S3 compatibility mode to access Alibaba Cloud OSS + // The `AWS_ENDPOINT` should have bucket name included + } else if url.as_str().starts_with("oss://") { + if let Some(bucket_name) = url.host_str() { + let store = AmazonS3Builder::from_env() + .with_virtual_hosted_style_request(true) + .with_bucket_name(bucket_name) + .build()?; + return Ok(Arc::new(store)); + } + } + } + + #[cfg(feature = "azure")] + { + if url.to_string().starts_with("azure://") { + if let Some(bucket_name) = url.host_str() { + let store = MicrosoftAzureBuilder::from_env() + .with_container_name(bucket_name) + .build()?; + return Ok(Arc::new(store)); + } } } diff --git a/ballista/executor/Cargo.toml b/ballista/executor/Cargo.toml index 0f0837ccd..d43a95b2d 100644 --- a/ballista/executor/Cargo.toml +++ b/ballista/executor/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-executor" description = "Ballista Distributed Compute - Executor" license = "Apache-2.0" -version = "0.9.0" +version = "0.10.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" readme = "README.md" @@ -35,15 +35,15 @@ default = ["mimalloc"] [dependencies] anyhow = "1" -arrow = { version = "26.0.0" } -arrow-flight = { version = "26.0.0" } +arrow = { version = "28.0.0" } +arrow-flight = { version = "28.0.0" } async-trait = "0.1.41" -ballista-core = { path = "../core", version = "0.9.0" } +ballista-core = { path = "../core", version = "0.10.0" } chrono = { version = "0.4", default-features = false } configure_me = "0.4.0" dashmap = "5.4.0" -datafusion = "14.0.0" -datafusion-proto = "14.0.0" +datafusion = "15.0.0" +datafusion-proto = "15.0.0" futures = "0.3" hyper = "0.14.4" log = "0.4" diff --git a/ballista/executor/src/executor_server.rs b/ballista/executor/src/executor_server.rs index da7fb74f6..de4696d29 100644 --- a/ballista/executor/src/executor_server.rs +++ b/ballista/executor/src/executor_server.rs @@ -342,12 +342,12 @@ impl ExecutorServer ExecutorServer ExecutionPlanVisitor for IndentVisitor<'a, 'b> { plan.fmt_as(self.t, self.f)?; if let Some(metrics) = self.metrics.get(self.metric_index) { let metrics = metrics - .aggregate_by_partition() + .aggregate_by_name() .sorted_for_display() .timestamps_removed(); write!(self.f, ", metrics=[{}]", metrics)?; diff --git a/ballista/scheduler/src/flight_sql.rs b/ballista/scheduler/src/flight_sql.rs index a3c706eff..a5a613c44 100644 --- a/ballista/scheduler/src/flight_sql.rs +++ b/ballista/scheduler/src/flight_sql.rs @@ -89,6 +89,7 @@ impl FlightSqlServiceImpl { } } + #[allow(deprecated)] fn tables(&self, ctx: Arc) -> Result { let schema = Arc::new(Schema::new(vec![ Field::new("catalog_name", DataType::Utf8, true), @@ -96,7 +97,7 @@ impl FlightSqlServiceImpl { Field::new("table_name", DataType::Utf8, false), Field::new("table_type", DataType::Utf8, false), ])); - let tables = ctx.tables()?; + let tables = ctx.tables()?; // resolved in #501 let names: Vec<_> = tables.iter().map(|it| Some(it.as_str())).collect(); let types: Vec<_> = names.iter().map(|_| Some("TABLE")).collect(); let cats: Vec<_> = names.iter().map(|_| None).collect(); @@ -104,7 +105,7 @@ impl FlightSqlServiceImpl { let rb = RecordBatch::try_new( schema, [cats, schemas, names, types] - .into_iter() + .iter() .map(|i| Arc::new(StringArray::from(i.clone())) as ArrayRef) .collect::>(), )?; @@ -120,7 +121,7 @@ impl FlightSqlServiceImpl { RecordBatch::try_new( schema, [TABLE_TYPES] - .into_iter() + .iter() .map(|i| Arc::new(StringArray::from(i.to_vec())) as ArrayRef) .collect::>(), ) @@ -141,7 +142,7 @@ impl FlightSqlServiceImpl { Status::internal(format!("Failed to create SessionContext: {:?}", e)) })?; let handle = Uuid::new_v4(); - self.contexts.insert(handle.clone(), ctx); + self.contexts.insert(handle, ctx); Ok(handle) } @@ -149,14 +150,14 @@ impl FlightSqlServiceImpl { let auth = req .metadata() .get("authorization") - .ok_or(Status::internal("No authorization header!"))?; + .ok_or_else(|| Status::internal("No authorization header!"))?; let str = auth .to_str() .map_err(|e| Status::internal(format!("Error parsing header: {}", e)))?; let authorization = str.to_string(); let bearer = "Bearer "; if !authorization.starts_with(bearer) { - Err(Status::internal(format!("Invalid auth header!")))?; + Err(Status::internal("Invalid auth header!"))?; } let auth = authorization[bearer.len()..].to_string(); @@ -249,17 +250,17 @@ impl FlightSqlServiceImpl { .advertise_flight_sql_endpoint { Some(endpoint) => { - let advertise_endpoint_vec: Vec<&str> = endpoint.split(":").collect(); + let advertise_endpoint_vec: Vec<&str> = endpoint.split(':').collect(); match advertise_endpoint_vec.as_slice() { [host_ip, port] => { - (String::from(*host_ip), FromStr::from_str(*port).expect("Failed to parse port from advertise-endpoint.")) + (String::from(*host_ip), FromStr::from_str(port).expect("Failed to parse port from advertise-endpoint.")) } _ => { Err(Status::internal("advertise-endpoint flag has incorrect format. Expected IP:Port".to_string()))? } } } - None => (exec_host.clone(), exec_port.clone()), + None => (exec_host.clone(), exec_port), }; let fetch = if let Some(ref id) = loc.partition_id { @@ -442,21 +443,19 @@ impl FlightSqlServiceImpl { Response> + Send>>>, Status, > { - let (tx, rx): ( - Sender>, - Receiver>, - ) = channel(2); + type FlightResult = Result; + let (tx, rx): (Sender, Receiver) = channel(2); let options = IpcWriteOptions::default(); let schema = SchemaAsIpc::new(rb.schema().as_ref(), &options).into(); tx.send(Ok(schema)) .await - .map_err(|e| Status::internal("Error sending schema".to_string()))?; - let (dict, flight) = flight_data_from_arrow_batch(&rb, &options); + .map_err(|_| Status::internal("Error sending schema".to_string()))?; + let (dict, flight) = flight_data_from_arrow_batch(rb, &options); let flights = dict.into_iter().chain(std::iter::once(flight)); - for flight in flights.into_iter() { + for flight in flights { tx.send(Ok(flight)) .await - .map_err(|e| Status::internal("Error sending flight".to_string()))?; + .map_err(|_| Status::internal("Error sending flight".to_string()))?; } let resp = Response::new(Box::pin(ReceiverStream::new(rx)) as Pin> + Send + 'static>>); @@ -468,7 +467,7 @@ impl FlightSqlServiceImpl { data: &RecordBatch, name: &str, ) -> Result, Status> { - let num_bytes = batch_byte_size(&data) as i64; + let num_bytes = batch_byte_size(data) as i64; let schema = data.schema(); let num_rows = data.num_rows() as i64; @@ -499,7 +498,7 @@ impl FlightSqlService for FlightSqlServiceImpl { let authorization = request .metadata() .get("authorization") - .ok_or(Status::invalid_argument("authorization field not present"))? + .ok_or_else(|| Status::invalid_argument("authorization field not present"))? .to_str() .map_err(|_| Status::invalid_argument("authorization not parsable"))?; if !authorization.starts_with(basic) { @@ -513,11 +512,9 @@ impl FlightSqlService for FlightSqlServiceImpl { .map_err(|_| Status::invalid_argument("authorization not parsable"))?; let str = String::from_utf8(bytes) .map_err(|_| Status::invalid_argument("authorization not parsable"))?; - let parts: Vec<_> = str.split(":").collect(); + let parts: Vec<_> = str.split(':').collect(); if parts.len() != 2 { - Err(Status::invalid_argument(format!( - "Invalid authorization header" - )))?; + Err(Status::invalid_argument("Invalid authorization header"))?; } let user = parts[0]; let pass = parts[1]; @@ -533,7 +530,7 @@ impl FlightSqlService for FlightSqlServiceImpl { }; let result = Ok(result); let output = futures::stream::iter(vec![result]); - let str = format!("Bearer {}", token.to_string()); + let str = format!("Bearer {}", token); let mut resp: Response> + Send>>> = Response::new(Box::pin(output)); let md = MetadataValue::try_from(str) @@ -559,7 +556,7 @@ impl FlightSqlService for FlightSqlServiceImpl { let action: protobuf::Action = message .unpack() .map_err(|e| Status::internal(format!("{:?}", e)))? - .ok_or(Status::internal("Expected an Action but got None!"))?; + .ok_or_else(|| Status::internal("Expected an Action but got None!"))?; let fp = match &action.action_type { Some(FetchPartition(fp)) => fp.clone(), None => Err(Status::internal("Expected an ActionType but got None!"))?, @@ -569,7 +566,7 @@ impl FlightSqlService for FlightSqlServiceImpl { match fp.job_id.as_str() { "get_flight_info_table_types" => { debug!("Responding with table types"); - let rb = FlightSqlServiceImpl::table_types().map_err(|e| { + let rb = FlightSqlServiceImpl::table_types().map_err(|_| { Status::internal("Error getting table types".to_string()) })?; let resp = Self::record_batch_to_resp(&rb).await?; @@ -579,7 +576,7 @@ impl FlightSqlService for FlightSqlServiceImpl { debug!("Responding with tables"); let rb = self .tables(ctx) - .map_err(|e| Status::internal("Error getting tables".to_string()))?; + .map_err(|_| Status::internal("Error getting tables".to_string()))?; let resp = Self::record_batch_to_resp(&rb).await?; return Ok(resp); } @@ -675,7 +672,7 @@ impl FlightSqlService for FlightSqlServiceImpl { async fn get_flight_info_table_types( &self, _query: CommandGetTableTypes, - request: Request, + _request: Request, ) -> Result, Status> { debug!("get_flight_info_table_types"); let data = FlightSqlServiceImpl::table_types() diff --git a/ballista/scheduler/src/main.rs b/ballista/scheduler/src/main.rs index b8946eab1..ee6244a72 100644 --- a/ballista/scheduler/src/main.rs +++ b/ballista/scheduler/src/main.rs @@ -21,6 +21,7 @@ use anyhow::{Context, Result}; #[cfg(feature = "flight-sql")] use arrow_flight::flight_service_server::FlightServiceServer; use ballista_scheduler::scheduler_server::externalscaler::external_scaler_server::ExternalScalerServer; +use ballista_scheduler::state::backend::memory::MemoryBackendClient; use futures::future::{self, Either, TryFutureExt}; use hyper::{server::conn::AddrStream, service::make_service_fn, Server}; use std::convert::Infallible; @@ -37,7 +38,7 @@ use ballista_scheduler::api::{get_routes, EitherBody, Error}; #[cfg(feature = "etcd")] use ballista_scheduler::state::backend::etcd::EtcdClient; #[cfg(feature = "sled")] -use ballista_scheduler::state::backend::standalone::StandaloneClient; +use ballista_scheduler::state::backend::sled::SledClient; use datafusion_proto::protobuf::LogicalPlanNode; use ballista_scheduler::scheduler_server::SchedulerServer; @@ -155,11 +156,9 @@ async fn start_server( async fn init_state_backend( opt: &Config, ) -> Result<(Arc, Arc)> { - let config_backend: (Arc, Arc) = match opt.config_backend { - #[cfg(not(any(feature = "sled", feature = "etcd")))] - _ => std::compile_error!( - "To build the scheduler enable at least one config backend feature (`etcd` or `sled`)" - ), + let config_backend: (Arc, Arc) = match opt + .config_backend + { #[cfg(feature = "etcd")] StateBackend::Etcd => { let etcd = etcd_client::Client::connect(&[opt.etcd_urls.clone()], None) @@ -176,27 +175,31 @@ async fn init_state_backend( ) } #[cfg(feature = "sled")] - StateBackend::Standalone => { + StateBackend::Sled => { let backend = if opt.sled_dir.is_empty() { Arc::new( - StandaloneClient::try_new_temporary() + SledClient::try_new_temporary() .context("Could not create standalone config backend")?, ) } else { println!("{}", opt.sled_dir); Arc::new( - StandaloneClient::try_new(opt.sled_dir.clone()) + SledClient::try_new(opt.sled_dir.clone()) .context("Could not create standalone config backend")?, ) }; (backend.clone(), backend) } #[cfg(not(feature = "sled"))] - StateBackend::Standalone => { + StateBackend::Sled => { unimplemented!( "build the scheduler with the `sled` feature to use the standalone config backend" ) } + StateBackend::Memory => { + let backend = Arc::new(MemoryBackendClient::new()); + (backend.clone(), backend) + } }; Ok(config_backend) @@ -214,7 +217,7 @@ async fn main() -> Result<()> { std::process::exit(0); } - let (config_backend, cluster_backend) = init_state_backend(&opt).await?; + let (_config_backend, cluster_backend) = init_state_backend(&opt).await?; let special_mod_log_level = opt.log_level_setting; let namespace = opt.namespace; @@ -266,6 +269,44 @@ async fn main() -> Result<()> { let addr = format!("{}:{}", bind_host, port); let addr = addr.parse()?; + let config_backend: Arc = match opt.config_backend { + #[cfg(feature = "etcd")] + StateBackend::Etcd => { + let etcd = etcd_client::Client::connect(&[opt.etcd_urls], None) + .await + .context("Could not connect to etcd")?; + Arc::new(EtcdClient::new(namespace.clone(), etcd)) + } + #[cfg(not(feature = "etcd"))] + StateBackend::Etcd => { + unimplemented!( + "build the scheduler with the `etcd` feature to use the etcd config backend" + ) + } + #[cfg(feature = "sled")] + StateBackend::Sled => { + if opt.sled_dir.is_empty() { + Arc::new( + SledClient::try_new_temporary() + .context("Could not create sled config backend")?, + ) + } else { + println!("{}", opt.sled_dir); + Arc::new( + SledClient::try_new(opt.sled_dir) + .context("Could not create sled config backend")?, + ) + } + } + #[cfg(not(feature = "sled"))] + StateBackend::Sled => { + unimplemented!( + "build the scheduler with the `sled` feature to use the sled config backend" + ) + } + StateBackend::Memory => Arc::new(MemoryBackendClient::new()), + }; + let config = SchedulerConfig { scheduling_policy: opt.scheduler_policy, event_loop_buffer_size: opt.event_loop_buffer_size, diff --git a/ballista/scheduler/src/scheduler_server/grpc.rs b/ballista/scheduler/src/scheduler_server/grpc.rs index 572290526..ad99ac7da 100644 --- a/ballista/scheduler/src/scheduler_server/grpc.rs +++ b/ballista/scheduler/src/scheduler_server/grpc.rs @@ -44,6 +44,7 @@ use std::ops::Deref; use std::sync::Arc; use crate::scheduler_server::event::QueryStageSchedulerEvent; +use datafusion::prelude::SessionConfig; use std::time::{SystemTime, UNIX_EPOCH}; use tonic::{Request, Response, Status}; @@ -292,9 +293,10 @@ impl SchedulerGrpc // TODO shouldn't this take a ListingOption object as input? let GetFileMetadataParams { path, file_type } = request.into_inner(); - + // Here, we use the default config, since we don't know the session id + let config = SessionConfig::default().config_options(); let file_format: Arc = match file_type.as_str() { - "parquet" => Ok(Arc::new(ParquetFormat::default())), + "parquet" => Ok(Arc::new(ParquetFormat::new(config))), // TODO implement for CSV _ => Err(tonic::Status::unimplemented( "get_file_metadata unsupported file type", @@ -587,13 +589,13 @@ mod test { use ballista_core::utils::default_session_builder; use crate::state::executor_manager::DEFAULT_EXECUTOR_TIMEOUT_SECONDS; - use crate::state::{backend::standalone::StandaloneClient, SchedulerState}; + use crate::state::{backend::sled::SledClient, SchedulerState}; use super::{SchedulerGrpc, SchedulerServer}; #[tokio::test] async fn test_poll_work() -> Result<(), BallistaError> { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let mut scheduler: SchedulerServer = SchedulerServer::new( "localhost:50050".to_owned(), @@ -683,7 +685,7 @@ mod test { #[tokio::test] async fn test_stop_executor() -> Result<(), BallistaError> { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let mut scheduler: SchedulerServer = SchedulerServer::new( "localhost:50050".to_owned(), @@ -765,7 +767,7 @@ mod test { #[tokio::test] #[ignore] async fn test_expired_executor() -> Result<(), BallistaError> { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let mut scheduler: SchedulerServer = SchedulerServer::new( "localhost:50050".to_owned(), diff --git a/ballista/scheduler/src/scheduler_server/mod.rs b/ballista/scheduler/src/scheduler_server/mod.rs index 252dab1ad..7b20cf116 100644 --- a/ballista/scheduler/src/scheduler_server/mod.rs +++ b/ballista/scheduler/src/scheduler_server/mod.rs @@ -387,7 +387,7 @@ mod test { use ballista_core::serde::BallistaCodec; use crate::scheduler_server::{timestamp_millis, SchedulerServer}; - use crate::state::backend::standalone::StandaloneClient; + use crate::state::backend::sled::SledClient; use crate::test_utils::{ assert_completed_event, assert_failed_event, assert_no_submitted_event, @@ -656,7 +656,7 @@ mod test { async fn test_scheduler( scheduling_policy: TaskSchedulingPolicy, ) -> Result> { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let mut scheduler: SchedulerServer = SchedulerServer::new( "localhost:50050".to_owned(), diff --git a/ballista/scheduler/src/standalone.rs b/ballista/scheduler/src/standalone.rs index ef457abda..eb90e176a 100644 --- a/ballista/scheduler/src/standalone.rs +++ b/ballista/scheduler/src/standalone.rs @@ -17,9 +17,8 @@ use crate::config::SchedulerConfig; use crate::metrics::default_metrics_collector; -use crate::{ - scheduler_server::SchedulerServer, state::backend::standalone::StandaloneClient, -}; +use crate::scheduler_server::SchedulerServer; +use crate::state::backend::sled::SledClient; use ballista_core::serde::protobuf::PhysicalPlanNode; use ballista_core::serde::BallistaCodec; use ballista_core::utils::create_grpc_server; @@ -33,7 +32,7 @@ use std::{net::SocketAddr, sync::Arc}; use tokio::net::TcpListener; pub async fn new_standalone_scheduler() -> Result { - let client = Arc::new(StandaloneClient::try_new_temporary()?); + let client = Arc::new(SledClient::try_new_temporary()?); let metrics_collector = default_metrics_collector()?; diff --git a/ballista/scheduler/src/state/backend/etcd.rs b/ballista/scheduler/src/state/backend/etcd.rs index dc31f77b4..aa2a071ce 100644 --- a/ballista/scheduler/src/state/backend/etcd.rs +++ b/ballista/scheduler/src/state/backend/etcd.rs @@ -35,7 +35,7 @@ use crate::state::backend::{ ClusterState, Keyspace, Lock, Operation, StateBackendClient, Watch, WatchEvent, }; -/// A [`StateBackendClient`] implementation that uses etcd to save cluster configuration. +/// A [`StateBackendClient`] implementation that uses etcd to save cluster state. #[derive(Clone)] pub struct EtcdClient { namespace: String, diff --git a/ballista/scheduler/src/state/backend/memory.rs b/ballista/scheduler/src/state/backend/memory.rs new file mode 100644 index 000000000..6cbedb843 --- /dev/null +++ b/ballista/scheduler/src/state/backend/memory.rs @@ -0,0 +1,411 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::state::backend::utils::subscriber::{Subscriber, Subscribers}; +use crate::state::backend::{ + Keyspace, Lock, Operation, StateBackendClient, Watch, WatchEvent, +}; +use ballista_core::error::Result; +use dashmap::DashMap; +use futures::{FutureExt, Stream}; +use log::warn; +use std::collections::{BTreeMap, HashSet}; +use std::sync::Arc; +use tokio::sync::Mutex; + +type KeySpaceState = BTreeMap>; +type KeyLock = Arc>; + +/// A [`StateBackendClient`] implementation that uses in memory map to save cluster state. +#[derive(Clone, Default)] +pub struct MemoryBackendClient { + /// The key is the KeySpace. For every KeySpace, there will be a tree map which is better for prefix filtering + states: DashMap, + /// The key is the full key formatted like "/KeySpace/key". It's a flatted map + locks: DashMap, + subscribers: Arc, +} + +impl MemoryBackendClient { + pub fn new() -> Self { + Self::default() + } + + fn get_space_key(keyspace: &Keyspace) -> String { + format!("/{:?}", keyspace) + } + + fn get_flat_key(keyspace: &Keyspace, key: &str) -> String { + format!("/{:?}/{}", keyspace, key) + } +} + +#[tonic::async_trait] +impl StateBackendClient for MemoryBackendClient { + async fn get(&self, keyspace: Keyspace, key: &str) -> Result> { + let space_key = Self::get_space_key(&keyspace); + Ok(self + .states + .get(&space_key) + .map(|space_state| space_state.value().get(key).cloned().unwrap_or_default()) + .unwrap_or_default()) + } + + async fn get_from_prefix( + &self, + keyspace: Keyspace, + prefix: &str, + ) -> Result)>> { + let space_key = Self::get_space_key(&keyspace); + Ok(self + .states + .get(&space_key) + .map(|space_state| { + space_state + .value() + .range(prefix.to_owned()..) + .take_while(|(k, _)| k.starts_with(prefix)) + .map(|e| (format!("{}/{}", space_key, e.0), e.1.clone())) + .collect() + }) + .unwrap_or_default()) + } + + async fn scan( + &self, + keyspace: Keyspace, + limit: Option, + ) -> Result)>> { + let space_key = Self::get_space_key(&keyspace); + Ok(self + .states + .get(&space_key) + .map(|space_state| { + if let Some(limit) = limit { + space_state + .value() + .iter() + .take(limit) + .map(|e| (format!("{}/{}", space_key, e.0), e.1.clone())) + .collect::)>>() + } else { + space_state + .value() + .iter() + .map(|e| (format!("{}/{}", space_key, e.0), e.1.clone())) + .collect::)>>() + } + }) + .unwrap_or_default()) + } + + async fn scan_keys(&self, keyspace: Keyspace) -> Result> { + let space_key = Self::get_space_key(&keyspace); + Ok(self + .states + .get(&space_key) + .map(|space_state| { + space_state + .value() + .iter() + .map(|e| format!("{}/{}", space_key, e.0)) + .collect::>() + }) + .unwrap_or_default()) + } + + async fn put(&self, keyspace: Keyspace, key: String, value: Vec) -> Result<()> { + let space_key = Self::get_space_key(&keyspace); + if !self.states.contains_key(&space_key) { + self.states.insert(space_key.clone(), BTreeMap::default()); + } + self.states + .get_mut(&space_key) + .unwrap() + .value_mut() + .insert(key.clone(), value.clone()); + + // Notify subscribers + let full_key = format!("{}/{}", space_key, key); + if let Some(res) = self.subscribers.reserve(&full_key) { + let event = WatchEvent::Put(full_key, value); + res.complete(&event); + } + + Ok(()) + } + + /// Currently the locks should be acquired before invoking this method. + /// Later need to be refined by acquiring all of the related locks inside this method + async fn apply_txn(&self, ops: Vec<(Operation, Keyspace, String)>) -> Result<()> { + for (op, keyspace, key) in ops.into_iter() { + match op { + Operation::Delete => { + self.delete(keyspace, &key).await?; + } + Operation::Put(value) => { + self.put(keyspace, key, value).await?; + } + }; + } + + Ok(()) + } + + /// Currently it's not used. Later will refine the caller side by leveraging this method + async fn mv( + &self, + from_keyspace: Keyspace, + to_keyspace: Keyspace, + key: &str, + ) -> Result<()> { + let from_space_key = Self::get_space_key(&from_keyspace); + + let ops = if let Some(from_space_state) = self.states.get(&from_space_key) { + if let Some(state) = from_space_state.value().get(key) { + Some(vec![ + (Operation::Delete, from_keyspace, key.to_owned()), + (Operation::Put(state.clone()), to_keyspace, key.to_owned()), + ]) + } else { + // TODO should this return an error? + warn!( + "Cannot move value at {}/{}, does not exist", + from_space_key, key + ); + None + } + } else { + // TODO should this return an error? + warn!( + "Cannot move value at {}/{}, does not exist", + from_space_key, key + ); + None + }; + + if let Some(ops) = ops { + self.apply_txn(ops).await?; + } + + Ok(()) + } + + async fn lock(&self, keyspace: Keyspace, key: &str) -> Result> { + let flat_key = Self::get_flat_key(&keyspace, key); + let lock = self + .locks + .entry(flat_key) + .or_insert_with(|| Arc::new(Mutex::new(()))); + Ok(Box::new(lock.value().clone().lock_owned().await)) + } + + async fn watch(&self, keyspace: Keyspace, prefix: String) -> Result> { + let prefix = format!("/{:?}/{}", keyspace, prefix); + + Ok(Box::new(MemoryWatch { + subscriber: self.subscribers.register(prefix.as_bytes()), + })) + } + + async fn delete(&self, keyspace: Keyspace, key: &str) -> Result<()> { + let space_key = Self::get_space_key(&keyspace); + if let Some(mut space_state) = self.states.get_mut(&space_key) { + if space_state.value_mut().remove(key).is_some() { + // Notify subscribers + let full_key = format!("{}/{}", space_key, key); + if let Some(res) = self.subscribers.reserve(&full_key) { + let event = WatchEvent::Delete(full_key); + res.complete(&event); + } + } + } + + Ok(()) + } +} + +struct MemoryWatch { + subscriber: Subscriber, +} + +#[tonic::async_trait] +impl Watch for MemoryWatch { + async fn cancel(&mut self) -> Result<()> { + Ok(()) + } +} + +impl Stream for MemoryWatch { + type Item = WatchEvent; + + fn poll_next( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + self.get_mut().subscriber.poll_unpin(cx) + } + + fn size_hint(&self) -> (usize, Option) { + self.subscriber.size_hint() + } +} + +#[cfg(test)] +mod tests { + use super::{StateBackendClient, Watch, WatchEvent}; + + use crate::state::backend::memory::MemoryBackendClient; + use crate::state::backend::{Keyspace, Operation}; + use crate::state::with_locks; + use futures::StreamExt; + use std::result::Result; + + #[tokio::test] + async fn put_read() -> Result<(), Box> { + let client = MemoryBackendClient::new(); + let key = "key"; + let value = "value".as_bytes(); + client + .put(Keyspace::Slots, key.to_owned(), value.to_vec()) + .await?; + assert_eq!(client.get(Keyspace::Slots, key).await?, value); + Ok(()) + } + + #[tokio::test] + async fn put_move() -> Result<(), Box> { + let client = MemoryBackendClient::new(); + let key = "key"; + let value = "value".as_bytes(); + client + .put(Keyspace::ActiveJobs, key.to_owned(), value.to_vec()) + .await?; + client + .mv(Keyspace::ActiveJobs, Keyspace::FailedJobs, key) + .await?; + assert_eq!(client.get(Keyspace::FailedJobs, key).await?, value); + Ok(()) + } + + #[tokio::test] + async fn multiple_operation() -> Result<(), Box> { + let client = MemoryBackendClient::new(); + let key = "key".to_string(); + let value = "value".as_bytes().to_vec(); + let locks = client + .acquire_locks(vec![(Keyspace::ActiveJobs, ""), (Keyspace::Slots, "")]) + .await?; + + let _r: ballista_core::error::Result<()> = with_locks(locks, async { + let txn_ops = vec![ + (Operation::Put(value.clone()), Keyspace::Slots, key.clone()), + ( + Operation::Put(value.clone()), + Keyspace::ActiveJobs, + key.clone(), + ), + ]; + client.apply_txn(txn_ops).await?; + Ok(()) + }) + .await; + + assert_eq!(client.get(Keyspace::Slots, key.as_str()).await?, value); + assert_eq!(client.get(Keyspace::ActiveJobs, key.as_str()).await?, value); + Ok(()) + } + + #[tokio::test] + async fn read_empty() -> Result<(), Box> { + let client = MemoryBackendClient::new(); + let key = "key"; + let empty: &[u8] = &[]; + assert_eq!(client.get(Keyspace::Slots, key).await?, empty); + Ok(()) + } + + #[tokio::test] + async fn read_prefix() -> Result<(), Box> { + let client = MemoryBackendClient::new(); + let key = "key"; + let value = "value".as_bytes(); + client + .put(Keyspace::Slots, format!("{}/1", key), value.to_vec()) + .await?; + client + .put(Keyspace::Slots, format!("{}/2", key), value.to_vec()) + .await?; + assert_eq!( + client.get_from_prefix(Keyspace::Slots, key).await?, + vec![ + ("/Slots/key/1".to_owned(), value.to_vec()), + ("/Slots/key/2".to_owned(), value.to_vec()) + ] + ); + Ok(()) + } + + #[tokio::test] + async fn read_watch() -> Result<(), Box> { + let client = MemoryBackendClient::new(); + let key = "key"; + let value = "value".as_bytes(); + let mut watch_keyspace: Box = + client.watch(Keyspace::Slots, "".to_owned()).await?; + let mut watch_key: Box = + client.watch(Keyspace::Slots, key.to_owned()).await?; + client + .put(Keyspace::Slots, key.to_owned(), value.to_vec()) + .await?; + assert_eq!( + watch_keyspace.next().await, + Some(WatchEvent::Put( + format!("/{:?}/{}", Keyspace::Slots, key.to_owned()), + value.to_owned() + )) + ); + assert_eq!( + watch_key.next().await, + Some(WatchEvent::Put( + format!("/{:?}/{}", Keyspace::Slots, key.to_owned()), + value.to_owned() + )) + ); + let value2 = "value2".as_bytes(); + client + .put(Keyspace::Slots, key.to_owned(), value2.to_vec()) + .await?; + assert_eq!( + watch_keyspace.next().await, + Some(WatchEvent::Put( + format!("/{:?}/{}", Keyspace::Slots, key.to_owned()), + value2.to_owned() + )) + ); + assert_eq!( + watch_key.next().await, + Some(WatchEvent::Put( + format!("/{:?}/{}", Keyspace::Slots, key.to_owned()), + value2.to_owned() + )) + ); + watch_keyspace.cancel().await?; + watch_key.cancel().await?; + Ok(()) + } +} diff --git a/ballista/scheduler/src/state/backend/mod.rs b/ballista/scheduler/src/state/backend/mod.rs index 7524d918c..f02c647fc 100644 --- a/ballista/scheduler/src/state/backend/mod.rs +++ b/ballista/scheduler/src/state/backend/mod.rs @@ -34,15 +34,18 @@ use tokio::sync::OwnedMutexGuard; #[cfg(feature = "etcd")] pub mod etcd; +pub mod memory; #[cfg(feature = "sled")] -pub mod standalone; +pub mod sled; +mod utils; // an enum used to configure the backend // needs to be visible to code generated by configure_me #[derive(Debug, Clone, ArgEnum, serde::Deserialize)] pub enum StateBackend { Etcd, - Standalone, + Memory, + Sled, } impl std::str::FromStr for StateBackend { @@ -640,7 +643,7 @@ pub trait Watch: Stream + Send + Unpin { async fn cancel(&mut self) -> Result<()>; } -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum WatchEvent { /// Contains the inserted or updated key and the new value Put(String, Vec), @@ -661,7 +664,7 @@ impl Lock for OwnedMutexGuard { #[cfg(test)] mod tests { - use crate::state::backend::standalone::StandaloneClient; + use crate::state::backend::sled::SledClient; use crate::state::backend::ClusterState; use ballista_core::error::Result; @@ -675,7 +678,7 @@ mod tests { #[tokio::test] async fn test_heartbeat_stream() -> Result<()> { - let sled = StandaloneClient::try_new_temporary()?; + let sled = SledClient::try_new_temporary()?; let cluster_state: Arc = Arc::new(sled); @@ -707,7 +710,7 @@ mod tests { #[tokio::test] async fn test_heartbeats() -> Result<()> { - let sled = StandaloneClient::try_new_temporary()?; + let sled = SledClient::try_new_temporary()?; let cluster_state: Arc = Arc::new(sled); diff --git a/ballista/scheduler/src/state/backend/standalone.rs b/ballista/scheduler/src/state/backend/sled.rs similarity index 96% rename from ballista/scheduler/src/state/backend/standalone.rs rename to ballista/scheduler/src/state/backend/sled.rs index de3cd5953..455d46019 100644 --- a/ballista/scheduler/src/state/backend/standalone.rs +++ b/ballista/scheduler/src/state/backend/sled.rs @@ -29,15 +29,15 @@ use crate::state::backend::{ ClusterState, Keyspace, Lock, Operation, StateBackendClient, Watch, WatchEvent, }; -/// A [`StateBackendClient`] implementation that uses file-based storage to save cluster configuration. +/// A [`StateBackendClient`] implementation that uses file-based storage to save cluster state. #[derive(Clone)] -pub struct StandaloneClient { +pub struct SledClient { db: sled::Db, locks: Arc>>>>, } -impl StandaloneClient { - /// Creates a StandaloneClient that saves data to the specified file. +impl SledClient { + /// Creates a SledClient that saves data to the specified file. pub fn try_new>(path: P) -> Result { Ok(Self { db: sled::open(path).map_err(sled_to_ballista_error)?, @@ -45,7 +45,7 @@ impl StandaloneClient { }) } - /// Creates a StandaloneClient that saves data to a temp file. + /// Creates a SledClient that saves data to a temp file. pub fn try_new_temporary() -> Result { Ok(Self { db: sled::Config::new() @@ -69,7 +69,7 @@ fn sled_to_ballista_error(e: sled::Error) -> BallistaError { } #[tonic::async_trait] -impl StateBackendClient for StandaloneClient { +impl StateBackendClient for SledClient { async fn get(&self, keyspace: Keyspace, key: &str) -> Result> { let key = format!("/{:?}/{}", keyspace, key); Ok(self @@ -286,15 +286,15 @@ impl Stream for SledWatch { #[cfg(test)] mod tests { - use super::{StandaloneClient, StateBackendClient, Watch, WatchEvent}; + use super::{SledClient, StateBackendClient, Watch, WatchEvent}; use crate::state::backend::{Keyspace, Operation}; use crate::state::with_locks; use futures::StreamExt; use std::result::Result; - fn create_instance() -> Result> { - Ok(StandaloneClient::try_new_temporary()?) + fn create_instance() -> Result> { + Ok(SledClient::try_new_temporary()?) } #[tokio::test] diff --git a/ballista/scheduler/src/state/backend/utils/mod.rs b/ballista/scheduler/src/state/backend/utils/mod.rs new file mode 100644 index 000000000..de95dd6e0 --- /dev/null +++ b/ballista/scheduler/src/state/backend/utils/mod.rs @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[allow(dead_code)] +mod oneshot; +#[allow(dead_code)] +pub(crate) mod subscriber; diff --git a/ballista/scheduler/src/state/backend/utils/oneshot.rs b/ballista/scheduler/src/state/backend/utils/oneshot.rs new file mode 100644 index 000000000..a0d146996 --- /dev/null +++ b/ballista/scheduler/src/state/backend/utils/oneshot.rs @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! It's mainly a modified version of sled::oneshot + +use std::{ + future::Future, + pin::Pin, + sync::Arc, + task::{Context, Poll, Waker}, + time::{Duration, Instant}, +}; + +use parking_lot::{Condvar, Mutex}; + +#[derive(Debug)] +struct OneShotState { + filled: bool, + fused: bool, + item: Option, + waker: Option, +} + +impl Default for OneShotState { + fn default() -> OneShotState { + OneShotState { + filled: false, + fused: false, + item: None, + waker: None, + } + } +} + +/// A Future value which may or may not be filled +#[derive(Debug)] +pub struct OneShot { + mu: Arc>>, + cv: Arc, +} + +/// The completer side of the Future +pub struct OneShotFiller { + mu: Arc>>, + cv: Arc, +} + +impl OneShot { + /// Create a new `OneShotFiller` and the `OneShot` + /// that will be filled by its completion. + pub fn pair() -> (OneShotFiller, Self) { + let mu = Arc::new(Mutex::new(OneShotState::default())); + let cv = Arc::new(Condvar::new()); + let future = Self { + mu: mu.clone(), + cv: cv.clone(), + }; + let filler = OneShotFiller { mu, cv }; + + (filler, future) + } + + /// Block on the `OneShot`'s completion + /// or dropping of the `OneShotFiller` + pub fn wait(self) -> Option { + let mut inner = self.mu.lock(); + while !inner.filled { + self.cv.wait(&mut inner); + } + inner.item.take() + } + + /// Block on the `OneShot`'s completion + /// or dropping of the `OneShotFiller`, + /// returning an error if not filled + /// before a given timeout or if the + /// system shuts down before then. + /// + /// Upon a successful receive, the + /// oneshot should be dropped, as it + /// will never yield that value again. + pub fn wait_timeout( + &mut self, + mut timeout: Duration, + ) -> Result { + let mut inner = self.mu.lock(); + while !inner.filled { + let start = Instant::now(); + let res = self.cv.wait_for(&mut inner, timeout); + if res.timed_out() { + return Err(std::sync::mpsc::RecvTimeoutError::Disconnected); + } + timeout = if let Some(timeout) = timeout.checked_sub(start.elapsed()) { + timeout + } else { + Duration::from_nanos(0) + }; + } + if let Some(item) = inner.item.take() { + Ok(item) + } else { + Err(std::sync::mpsc::RecvTimeoutError::Disconnected) + } + } +} + +impl Future for OneShot { + type Output = Option; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let mut state = self.mu.lock(); + if state.fused { + return Poll::Pending; + } + if state.filled { + state.fused = true; + Poll::Ready(state.item.take()) + } else { + state.waker = Some(cx.waker().clone()); + Poll::Pending + } + } +} + +impl OneShotFiller { + /// Complete the `OneShot` + pub fn fill(self, inner: T) { + let mut state = self.mu.lock(); + + if let Some(waker) = state.waker.take() { + waker.wake(); + } + + state.filled = true; + state.item = Some(inner); + + // having held the mutex makes this linearized + // with the notify below. + drop(state); + + let _notified = self.cv.notify_all(); + } +} + +impl Drop for OneShotFiller { + fn drop(&mut self) { + let mut state = self.mu.lock(); + + if state.filled { + return; + } + + if let Some(waker) = state.waker.take() { + waker.wake(); + } + + state.filled = true; + + // having held the mutex makes this linearized + // with the notify below. + drop(state); + + let _notified = self.cv.notify_all(); + } +} diff --git a/ballista/scheduler/src/state/backend/utils/subscriber.rs b/ballista/scheduler/src/state/backend/utils/subscriber.rs new file mode 100644 index 000000000..dd74b6642 --- /dev/null +++ b/ballista/scheduler/src/state/backend/utils/subscriber.rs @@ -0,0 +1,248 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! It's mainly a modified version of sled::subscriber + +use crate::state::backend::utils::oneshot::{OneShot, OneShotFiller}; +use crate::state::backend::WatchEvent; + +use parking_lot::RwLock; +use std::collections::{BTreeMap, HashMap}; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::Ordering::Relaxed; +use std::sync::atomic::{AtomicBool, AtomicUsize}; +use std::sync::mpsc::{sync_channel, Receiver, SyncSender, TryRecvError}; +use std::sync::Arc; +use std::task::{Context, Poll, Waker}; +use std::time::{Duration, Instant}; + +static ID_GEN: AtomicUsize = AtomicUsize::new(0); + +type Senders = HashMap, SyncSender>>)>; + +/// Aynchronous, non-blocking subscriber: +/// +/// `Subscription` implements `Future>`. +/// +/// `while let Some(event) = (&mut subscriber).await { /* use it */ }` +pub struct Subscriber { + id: usize, + rx: Receiver>>, + existing: Option>>, + home: Arc>, +} + +impl Drop for Subscriber { + fn drop(&mut self) { + let mut w_senders = self.home.write(); + w_senders.remove(&self.id); + } +} + +impl Subscriber { + /// Attempts to wait for a value on this `Subscriber`, returning + /// an error if no event arrives within the provided `Duration` + /// or if the backing `Db` shuts down. + pub fn next_timeout( + &mut self, + mut timeout: Duration, + ) -> std::result::Result { + loop { + let start = Instant::now(); + let mut future_rx = if let Some(future_rx) = self.existing.take() { + future_rx + } else { + self.rx.recv_timeout(timeout)? + }; + timeout = if let Some(timeout) = timeout.checked_sub(start.elapsed()) { + timeout + } else { + Duration::from_nanos(0) + }; + + let start = Instant::now(); + match future_rx.wait_timeout(timeout) { + Ok(Some(event)) => return Ok(event), + Ok(None) => (), + Err(timeout_error) => { + self.existing = Some(future_rx); + return Err(timeout_error); + } + } + timeout = if let Some(timeout) = timeout.checked_sub(start.elapsed()) { + timeout + } else { + Duration::from_nanos(0) + }; + } + } +} + +impl Future for Subscriber { + type Output = Option; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + loop { + let mut future_rx = if let Some(future_rx) = self.existing.take() { + future_rx + } else { + match self.rx.try_recv() { + Ok(future_rx) => future_rx, + Err(TryRecvError::Empty) => break, + Err(TryRecvError::Disconnected) => return Poll::Ready(None), + } + }; + + match Future::poll(Pin::new(&mut future_rx), cx) { + Poll::Ready(Some(event)) => return Poll::Ready(event), + Poll::Ready(None) => continue, + Poll::Pending => { + self.existing = Some(future_rx); + return Poll::Pending; + } + } + } + let mut home = self.home.write(); + let entry = home.get_mut(&self.id).unwrap(); + entry.0 = Some(cx.waker().clone()); + Poll::Pending + } +} + +impl Iterator for Subscriber { + type Item = WatchEvent; + + fn next(&mut self) -> Option { + loop { + let future_rx = self.rx.recv().ok()?; + match future_rx.wait() { + Some(Some(event)) => return Some(event), + Some(None) => return None, + None => continue, + } + } + } +} + +#[derive(Debug, Default)] +pub(crate) struct Subscribers { + watched: RwLock, Arc>>>, + ever_used: AtomicBool, +} + +impl Drop for Subscribers { + fn drop(&mut self) { + let watched = self.watched.read(); + + for senders in watched.values() { + let senders = std::mem::take(&mut *senders.write()); + for (_, (waker, sender)) in senders { + drop(sender); + if let Some(waker) = waker { + waker.wake(); + } + } + } + } +} + +impl Subscribers { + pub(crate) fn register(&self, prefix: &[u8]) -> Subscriber { + self.ever_used.store(true, Relaxed); + let r_mu = { + let r_mu = self.watched.read(); + if r_mu.contains_key(prefix) { + r_mu + } else { + drop(r_mu); + let mut w_mu = self.watched.write(); + if !w_mu.contains_key(prefix) { + let old = w_mu.insert( + prefix.to_vec(), + Arc::new(RwLock::new(HashMap::default())), + ); + assert!(old.is_none()); + } + drop(w_mu); + self.watched.read() + } + }; + + let (tx, rx) = sync_channel(1024); + + let arc_senders = &r_mu[prefix]; + let mut w_senders = arc_senders.write(); + + let id = ID_GEN.fetch_add(1, Relaxed); + + w_senders.insert(id, (None, tx)); + + Subscriber { + id, + rx, + existing: None, + home: arc_senders.clone(), + } + } + + pub(crate) fn reserve>(&self, key: R) -> Option { + if !self.ever_used.load(Relaxed) { + return None; + } + + let r_mu = self.watched.read(); + let prefixes = r_mu.iter().filter(|(k, _)| key.as_ref().starts_with(k)); + + let mut subscribers = vec![]; + + for (_, subs_rwl) in prefixes { + let subs = subs_rwl.read(); + + for (_id, (waker, sender)) in subs.iter() { + let (tx, rx) = OneShot::pair(); + if sender.send(rx).is_err() { + continue; + } + subscribers.push((waker.clone(), tx)); + } + } + + if subscribers.is_empty() { + None + } else { + Some(ReservedBroadcast { subscribers }) + } + } +} + +pub(crate) struct ReservedBroadcast { + subscribers: Vec<(Option, OneShotFiller>)>, +} + +impl ReservedBroadcast { + pub fn complete(self, event: &WatchEvent) { + let iter = self.subscribers.into_iter(); + + for (waker, tx) in iter { + tx.fill(Some(event.clone())); + if let Some(waker) = waker { + waker.wake(); + } + } + } +} diff --git a/ballista/scheduler/src/state/execution_graph.rs b/ballista/scheduler/src/state/execution_graph.rs index 54fcaf0ed..dc516efd8 100644 --- a/ballista/scheduler/src/state/execution_graph.rs +++ b/ballista/scheduler/src/state/execution_graph.rs @@ -307,7 +307,6 @@ impl ExecutionGraph { let mut locations = vec![]; for task_status in stage_task_statuses.into_iter() { { - let stage_id = stage_id as usize; let task_stage_attempt_num = task_status.stage_attempt_num as usize; if task_stage_attempt_num < running_stage.stage_attempt_num { @@ -492,7 +491,6 @@ impl ExecutionGraph { ); } else if let ExecutionStage::UnResolved(unsolved_stage) = stage { for task_status in stage_task_statuses.into_iter() { - let stage_id = stage_id as usize; let task_stage_attempt_num = task_status.stage_attempt_num as usize; let partition_id = task_status.clone().partition_id as usize; @@ -826,8 +824,8 @@ impl ExecutionGraph { /// Total number of tasks in this plan that are ready for scheduling pub fn available_tasks(&self) -> usize { self.stages - .iter() - .map(|(_, stage)| { + .values() + .map(|stage| { if let ExecutionStage::Running(stage) = stage { stage.available_tasks() } else { @@ -1425,8 +1423,8 @@ impl Debug for ExecutionGraph { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let stages = self .stages - .iter() - .map(|(_, stage)| format!("{:?}", stage)) + .values() + .map(|stage| format!("{:?}", stage)) .collect::>() .join(""); write!(f, "ExecutionGraph[job_id={}, session_id={}, available_tasks={}, is_successful={}]\n{}", @@ -1704,13 +1702,15 @@ mod test { let executor2 = mock_executor("executor-id2".to_string()); let mut join_graph = test_join_plan(4).await; - assert_eq!(join_graph.stage_count(), 5); + // With the improvement of https://github.com/apache/arrow-datafusion/pull/4122, + // unnecessary RepartitionExec can be removed + assert_eq!(join_graph.stage_count(), 4); assert_eq!(join_graph.available_tasks(), 0); // Call revive to move the two leaf Resolved stages to Running join_graph.revive(); - assert_eq!(join_graph.stage_count(), 5); + assert_eq!(join_graph.stage_count(), 4); assert_eq!(join_graph.available_tasks(), 2); // Complete the first stage @@ -1755,13 +1755,13 @@ mod test { let executor2 = mock_executor("executor-id2".to_string()); let mut join_graph = test_join_plan(4).await; - assert_eq!(join_graph.stage_count(), 5); + assert_eq!(join_graph.stage_count(), 4); assert_eq!(join_graph.available_tasks(), 0); // Call revive to move the two leaf Resolved stages to Running join_graph.revive(); - assert_eq!(join_graph.stage_count(), 5); + assert_eq!(join_graph.stage_count(), 4); assert_eq!(join_graph.available_tasks(), 2); // Complete the first stage diff --git a/ballista/scheduler/src/state/execution_graph/execution_stage.rs b/ballista/scheduler/src/state/execution_graph/execution_stage.rs index 23bcafcf8..87d06346f 100644 --- a/ballista/scheduler/src/state/execution_graph/execution_stage.rs +++ b/ballista/scheduler/src/state/execution_graph/execution_stage.rs @@ -22,7 +22,7 @@ use std::iter::FromIterator; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; -use datafusion::physical_optimizer::hash_build_probe_order::HashBuildProbeOrder; +use datafusion::physical_optimizer::join_selection::JoinSelection; use datafusion::physical_optimizer::PhysicalOptimizerRule; use datafusion::physical_plan::display::DisplayableExecutionPlan; use datafusion::physical_plan::metrics::{MetricValue, MetricsSet}; @@ -378,7 +378,7 @@ impl UnresolvedStage { )?; // Optimize join order based on new resolved statistics - let optimize_join = HashBuildProbeOrder::new(); + let optimize_join = JoinSelection::new(); let plan = optimize_join.optimize(plan, &SessionConfig::new())?; Ok(ResolvedStage::new( @@ -844,7 +844,7 @@ impl RunningStage { let new_metric = Arc::new(Metric::new(metric_value, Some(partition))); first.push(new_metric); } - first.aggregate_by_partition() + first.aggregate_by_name() } pub(super) fn task_failure_number(&self, partition_id: usize) -> usize { diff --git a/ballista/scheduler/src/state/execution_graph_dot.rs b/ballista/scheduler/src/state/execution_graph_dot.rs index 708b5077f..6e65612fc 100644 --- a/ballista/scheduler/src/state/execution_graph_dot.rs +++ b/ballista/scheduler/src/state/execution_graph_dot.rs @@ -473,10 +473,10 @@ filter_expr="] subgraph cluster4 { label = "Stage 5 [Unresolved]"; stage_5_0 [shape=box, label="ShuffleWriter [48 partitions]"] - stage_5_0_0 [shape=box, label="Projection: a@0, a@1, a@2"] + stage_5_0_0 [shape=box, label="Projection: a@0, b@1, a@2, b@3, a@4, b@5"] stage_5_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"] stage_5_0_0_0_0 [shape=box, label="HashJoin -join_expr=a@1 = a@0 +join_expr=b@3 = b@1 filter_expr="] stage_5_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"] stage_5_0_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"] @@ -528,7 +528,132 @@ filter_expr="] Ok(()) } + #[tokio::test] + async fn dot_optimized() -> Result<()> { + let graph = test_graph_optimized().await?; + let dot = ExecutionGraphDot::generate(Arc::new(graph)) + .map_err(|e| BallistaError::Internal(format!("{:?}", e)))?; + + let expected = r#"digraph G { + subgraph cluster0 { + label = "Stage 1 [Resolved]"; + stage_1_0 [shape=box, label="ShuffleWriter [0 partitions]"] + stage_1_0_0 [shape=box, label="MemoryExec"] + stage_1_0_0 -> stage_1_0 + } + subgraph cluster1 { + label = "Stage 2 [Resolved]"; + stage_2_0 [shape=box, label="ShuffleWriter [0 partitions]"] + stage_2_0_0 [shape=box, label="MemoryExec"] + stage_2_0_0 -> stage_2_0 + } + subgraph cluster2 { + label = "Stage 3 [Resolved]"; + stage_3_0 [shape=box, label="ShuffleWriter [0 partitions]"] + stage_3_0_0 [shape=box, label="MemoryExec"] + stage_3_0_0 -> stage_3_0 + } + subgraph cluster3 { + label = "Stage 4 [Unresolved]"; + stage_4_0 [shape=box, label="ShuffleWriter [48 partitions]"] + stage_4_0_0 [shape=box, label="Projection: a@0, a@1, a@2"] + stage_4_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0 [shape=box, label="HashJoin +join_expr=a@1 = a@0 +filter_expr="] + stage_4_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0_0_0 [shape=box, label="HashJoin +join_expr=a@0 = a@0 +filter_expr="] + stage_4_0_0_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"] + stage_4_0_0_0_0_0_0_0_0 -> stage_4_0_0_0_0_0_0_0 + stage_4_0_0_0_0_0_0_0 -> stage_4_0_0_0_0_0_0 + stage_4_0_0_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] + stage_4_0_0_0_0_0_0_1_0 -> stage_4_0_0_0_0_0_0_1 + stage_4_0_0_0_0_0_0_1 -> stage_4_0_0_0_0_0_0 + stage_4_0_0_0_0_0_0 -> stage_4_0_0_0_0_0 + stage_4_0_0_0_0_0 -> stage_4_0_0_0_0 + stage_4_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"] + stage_4_0_0_0_0_1_0 -> stage_4_0_0_0_0_1 + stage_4_0_0_0_0_1 -> stage_4_0_0_0_0 + stage_4_0_0_0_0 -> stage_4_0_0_0 + stage_4_0_0_0 -> stage_4_0_0 + stage_4_0_0 -> stage_4_0 + } + stage_1_0 -> stage_4_0_0_0_0_0_0_0_0 + stage_2_0 -> stage_4_0_0_0_0_0_0_1_0 + stage_3_0 -> stage_4_0_0_0_0_1_0 +} +"#; + assert_eq!(expected, &dot); + Ok(()) + } + + #[tokio::test] + async fn query_stage_optimized() -> Result<()> { + let graph = test_graph_optimized().await?; + let dot = ExecutionGraphDot::generate_for_query_stage(Arc::new(graph), 4) + .map_err(|e| BallistaError::Internal(format!("{:?}", e)))?; + + let expected = r#"digraph G { + stage_4_0 [shape=box, label="ShuffleWriter [48 partitions]"] + stage_4_0_0 [shape=box, label="Projection: a@0, a@1, a@2"] + stage_4_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0 [shape=box, label="HashJoin +join_expr=a@1 = a@0 +filter_expr="] + stage_4_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0_0_0 [shape=box, label="HashJoin +join_expr=a@0 = a@0 +filter_expr="] + stage_4_0_0_0_0_0_0_0 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"] + stage_4_0_0_0_0_0_0_0_0 -> stage_4_0_0_0_0_0_0_0 + stage_4_0_0_0_0_0_0_0 -> stage_4_0_0_0_0_0_0 + stage_4_0_0_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] + stage_4_0_0_0_0_0_0_1_0 -> stage_4_0_0_0_0_0_0_1 + stage_4_0_0_0_0_0_0_1 -> stage_4_0_0_0_0_0_0 + stage_4_0_0_0_0_0_0 -> stage_4_0_0_0_0_0 + stage_4_0_0_0_0_0 -> stage_4_0_0_0_0 + stage_4_0_0_0_0_1 [shape=box, label="CoalesceBatches [batchSize=4096]"] + stage_4_0_0_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"] + stage_4_0_0_0_0_1_0 -> stage_4_0_0_0_0_1 + stage_4_0_0_0_0_1 -> stage_4_0_0_0_0 + stage_4_0_0_0_0 -> stage_4_0_0_0 + stage_4_0_0_0 -> stage_4_0_0 + stage_4_0_0 -> stage_4_0 +} +"#; + assert_eq!(expected, &dot); + Ok(()) + } + async fn test_graph() -> Result { + let ctx = + SessionContext::with_config(SessionConfig::new().with_target_partitions(48)); + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::UInt32, false), + Field::new("b", DataType::UInt32, false), + ])); + let table = Arc::new(MemTable::try_new(schema.clone(), vec![])?); + ctx.register_table("foo", table.clone())?; + ctx.register_table("bar", table.clone())?; + ctx.register_table("baz", table)?; + let df = ctx + .sql("SELECT * FROM foo JOIN bar ON foo.a = bar.a JOIN baz on bar.b = baz.b") + .await?; + let plan = df.to_logical_plan()?; + let plan = ctx.create_physical_plan(&plan).await?; + ExecutionGraph::new("scheduler_id", "job_id", "job_name", "session_id", plan, 0) + } + + // With the improvement of https://github.com/apache/arrow-datafusion/pull/4122, + // Redundant RepartitionExec can be removed so that the stage number will be reduced + async fn test_graph_optimized() -> Result { let ctx = SessionContext::with_config(SessionConfig::new().with_target_partitions(48)); let schema = diff --git a/ballista/scheduler/src/state/executor_manager.rs b/ballista/scheduler/src/state/executor_manager.rs index ad5235948..89ec3f05f 100644 --- a/ballista/scheduler/src/state/executor_manager.rs +++ b/ballista/scheduler/src/state/executor_manager.rs @@ -648,7 +648,7 @@ impl ExecutorManager { #[cfg(test)] mod test { use crate::config::SlotsPolicy; - use crate::state::backend::standalone::StandaloneClient; + use crate::state::backend::sled::SledClient; use crate::state::executor_manager::{ExecutorManager, ExecutorReservation}; use ballista_core::error::Result; use ballista_core::serde::scheduler::{ @@ -666,7 +666,7 @@ mod test { } async fn test_reserve_and_cancel_inner(slots_policy: SlotsPolicy) -> Result<()> { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let executor_manager = ExecutorManager::new(state_storage, slots_policy); @@ -714,7 +714,7 @@ mod test { } async fn test_reserve_partial_inner(slots_policy: SlotsPolicy) -> Result<()> { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let executor_manager = ExecutorManager::new(state_storage, slots_policy); @@ -769,7 +769,7 @@ mod test { let executors = test_executors(10, 4); - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let executor_manager = ExecutorManager::new(state_storage, slots_policy); @@ -814,7 +814,7 @@ mod test { } async fn test_register_reserve_inner(slots_policy: SlotsPolicy) -> Result<()> { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let executor_manager = ExecutorManager::new(state_storage, slots_policy); diff --git a/ballista/scheduler/src/state/mod.rs b/ballista/scheduler/src/state/mod.rs index baed7fbec..b692b36e6 100644 --- a/ballista/scheduler/src/state/mod.rs +++ b/ballista/scheduler/src/state/mod.rs @@ -240,47 +240,66 @@ impl SchedulerState> = tasks.into_values().collect(); // Total number of tasks to be launched for one executor let n_tasks: usize = tasks.iter().map(|stage_tasks| stage_tasks.len()).sum(); - match self - .executor_manager - .get_executor_metadata(&executor_id) - .await - { - Ok(executor) => { - if let Err(e) = self - .task_manager - .launch_multi_task( - &executor, - tasks, - &self.executor_manager, - ) - .await - { - error!("Failed to launch new task: {:?}", e); - for _i in 0..n_tasks { - unassigned_reservations.push( - ExecutorReservation::new_free( - executor_id.clone(), - ), - ); + let task_manager = self.task_manager.clone(); + let executor_manager = self.executor_manager.clone(); + let join_handle = tokio::spawn(async move { + let success = match executor_manager + .get_executor_metadata(&executor_id) + .await + { + Ok(executor) => { + if let Err(e) = task_manager + .launch_multi_task( + &executor, + tasks, + &executor_manager, + ) + .await + { + error!("Failed to launch new task: {:?}", e); + false + } else { + true } } - } - Err(e) => { - error!("Failed to launch new task, could not get executor metadata: {:?}", e); - for _i in 0..n_tasks { - unassigned_reservations.push( - ExecutorReservation::new_free(executor_id.clone()), - ); + Err(e) => { + error!("Failed to launch new task, could not get executor metadata: {:?}", e); + false } + }; + if success { + vec![] + } else { + vec![ + ExecutorReservation::new_free(executor_id.clone(),); + n_tasks + ] } - } + }); + join_handles.push(join_handle); } + + let unassigned_executor_reservations = + futures::future::join_all(join_handles) + .await + .into_iter() + .collect::>, + tokio::task::JoinError, + >>()?; + unassigned_reservations.append( + &mut unassigned_executor_reservations + .into_iter() + .flatten() + .collect::>(), + ); (unassigned_reservations, pending_tasks) } Err(e) => { @@ -432,7 +451,7 @@ pub async fn with_locks>( #[cfg(test)] mod test { - use crate::state::backend::standalone::StandaloneClient; + use crate::state::backend::sled::SledClient; use crate::state::SchedulerState; use ballista_core::config::{BallistaConfig, BALLISTA_DEFAULT_SHUFFLE_PARTITIONS}; use ballista_core::error::Result; @@ -458,7 +477,7 @@ mod test { // We should free any reservations which are not assigned #[tokio::test] async fn test_offer_free_reservations() -> Result<()> { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let state: Arc> = Arc::new(SchedulerState::new_with_default_scheduler_name( state_storage.clone(), @@ -495,7 +514,7 @@ mod test { let config = BallistaConfig::builder() .set(BALLISTA_DEFAULT_SHUFFLE_PARTITIONS, "4") .build()?; - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let state: Arc> = Arc::new(SchedulerState::with_task_launcher( state_storage.clone(), @@ -581,7 +600,7 @@ mod test { let config = BallistaConfig::builder() .set(BALLISTA_DEFAULT_SHUFFLE_PARTITIONS, "4") .build()?; - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let state: Arc> = Arc::new(SchedulerState::with_task_launcher( state_storage.clone(), diff --git a/ballista/scheduler/src/test_utils.rs b/ballista/scheduler/src/test_utils.rs index 3cb3212e7..6e8d26396 100644 --- a/ballista/scheduler/src/test_utils.rs +++ b/ballista/scheduler/src/test_utils.rs @@ -27,7 +27,7 @@ use async_trait::async_trait; use crate::config::SchedulerConfig; use crate::metrics::SchedulerMetricsCollector; use crate::scheduler_server::{timestamp_millis, SchedulerServer}; -use crate::state::backend::standalone::StandaloneClient; +use crate::state::backend::sled::SledClient; use crate::state::executor_manager::ExecutorManager; use crate::state::task_manager::TaskLauncher; @@ -81,7 +81,7 @@ impl TableProvider for ExplodingTableProvider { async fn scan( &self, _ctx: &SessionState, - _projection: &Option>, + _projection: Option<&Vec>, _filters: &[Expr], _limit: Option, ) -> datafusion::common::Result> { @@ -380,7 +380,7 @@ impl SchedulerTest { task_slots_per_executor: usize, runner: Option>, ) -> Result { - let state_storage = Arc::new(StandaloneClient::try_new_temporary()?); + let state_storage = Arc::new(SledClient::try_new_temporary()?); let ballista_config = BallistaConfig::builder() .set( diff --git a/ballista/scheduler/ui/src/components/DataTable.tsx b/ballista/scheduler/ui/src/components/DataTable.tsx index 70e17c02d..be8edf864 100644 --- a/ballista/scheduler/ui/src/components/DataTable.tsx +++ b/ballista/scheduler/ui/src/components/DataTable.tsx @@ -62,10 +62,9 @@ interface DataTableProps { } export const ElapsedCell: (props: any) => React.ReactNode = (props: any) => { - const time = new Date(new Date().getTime() - props.value); return ( "] homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" license = "Apache-2.0" publish = false -rust-version = "1.59" +rust-version = "1.63" [features] default = ["mimalloc"] @@ -33,10 +33,10 @@ simd = ["datafusion/simd"] snmalloc = ["snmalloc-rs"] [dependencies] -ballista = { path = "../ballista/client", version = "0.9.0" } -datafusion = "14.0.0" -datafusion-proto = "14.0.0" -env_logger = "0.9" +ballista = { path = "../ballista/client", version = "0.10.0" } +datafusion = "15.0.0" +datafusion-proto = "15.0.0" +env_logger = "0.10" futures = "0.3" mimalloc = { version = "0.1", optional = true, default-features = false } num_cpus = "1.13.0" @@ -48,4 +48,4 @@ structopt = { version = "0.3", default-features = false } tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] } [dev-dependencies] -ballista-core = { path = "../ballista/core", version = "0.9.0" } +ballista-core = { path = "../ballista/core", version = "0.10.0" } diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index fd18a48fc..aabfea6ff 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -330,7 +330,7 @@ async fn benchmark_datafusion(opt: DataFusionBenchmarkOpt) -> Result Result<()> { .unwrap(); } let elapsed = start.elapsed().as_secs_f64() * 1000.0; - millis.push(elapsed as f64); + millis.push(elapsed); let row_count = batches.iter().map(|b| b.num_rows()).sum(); println!( "Query {} iteration {} took {:.1} ms and returned {} rows", @@ -556,7 +556,7 @@ fn get_query_sql_by_path(query: usize, mut sql_path: String) -> Result { } if query > 0 && query < 23 { let filename = format!("{}/q{}.sql", sql_path, query); - Ok(fs::read_to_string(&filename).expect("failed to read query")) + Ok(fs::read_to_string(filename).expect("failed to read query")) } else { Err(DataFusionError::Plan( "invalid query. Expected value between 1 and 22".to_owned(), @@ -816,7 +816,8 @@ async fn get_table( } "parquet" => { let path = format!("{}/{}", path, table); - let format = ParquetFormat::default().with_enable_pruning(true); + let format = ParquetFormat::new(ctx.config_options()) + .with_enable_pruning(Some(true)); (Arc::new(format), path, DEFAULT_PARQUET_EXTENSION) } @@ -832,6 +833,7 @@ async fn get_table( target_partitions, collect_stat: true, table_partition_cols: vec![], + file_sort_order: None, }; let url = ListingTableUrl::parse(path)?; diff --git a/dev/build-ballista-docker.sh b/dev/build-ballista-docker.sh index cead5e845..71f37fc82 100755 --- a/dev/build-ballista-docker.sh +++ b/dev/build-ballista-docker.sh @@ -21,9 +21,7 @@ set -e RELEASE_FLAG=${RELEASE_FLAG:=release} -docker build -t ballista-builder --build-arg EXT_UID="$(id -u)" -f dev/docker/ballista-builder.Dockerfile . - -docker run -v $(pwd):/home/builder/workspace --env RELEASE_FLAG=$RELEASE_FLAG ballista-builder +./dev/build-ballista-executables.sh docker-compose build diff --git a/dev/build-ballista-rust.sh b/dev/build-ballista-executables.sh similarity index 79% rename from dev/build-ballista-rust.sh rename to dev/build-ballista-executables.sh index 16e5bf8c1..4396e14dc 100755 --- a/dev/build-ballista-rust.sh +++ b/dev/build-ballista-executables.sh @@ -21,6 +21,10 @@ set -e RELEASE_FLAG=${RELEASE_FLAG:=release} +# TODO: it would be very nice if we could make CI work the exact same way so the build logic isn't duplicated + +# build a docker container in which to run the build - this is to make life easier for Windows & Mac users docker build -t ballista-builder --build-arg EXT_UID="$(id -u)" -f dev/docker/ballista-builder.Dockerfile . +# run cargo & yarn builds inside the builder container docker run -v $(pwd):/home/builder/workspace --env RELEASE_FLAG=$RELEASE_FLAG ballista-builder diff --git a/dev/build-ui.sh b/dev/build-ui.sh deleted file mode 100755 index c7b0f2d9a..000000000 --- a/dev/build-ui.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -. ./dev/build-set-env.sh -docker build -t ballista-scheduler-ui:$BALLISTA_VERSION -f dev/docker/ballista-scheduler-ui.Dockerfile ballista/scheduler/ui diff --git a/dev/docker/ballista-builder.Dockerfile b/dev/docker/ballista-builder.Dockerfile index e210e5260..8ae2c22c2 100644 --- a/dev/docker/ballista-builder.Dockerfile +++ b/dev/docker/ballista-builder.Dockerfile @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -FROM rust:1.63.0-buster +FROM rust:1-buster ARG EXT_UID diff --git a/dev/docker/ballista-standalone.Dockerfile b/dev/docker/ballista-standalone.Dockerfile index ade413d4b..c1027f8d5 100644 --- a/dev/docker/ballista-standalone.Dockerfile +++ b/dev/docker/ballista-standalone.Dockerfile @@ -33,6 +33,9 @@ RUN apt-get -qq update && apt-get install -qq -y nginx netcat wget COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor +RUN chmod a+x /root/ballista-scheduler && \ + chmod a+x /root/ballista-executor + # populate some sample data for ListingSchemaProvider RUN mkdir -p /data && \ wget -q https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2022-01.parquet -P /data/ diff --git a/dev/docker/standalone-entrypoint.sh b/dev/docker/standalone-entrypoint.sh old mode 100644 new mode 100755 diff --git a/dev/release/README.md b/dev/release/README.md index aa3af8c02..b9154ebe8 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -290,17 +290,14 @@ dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg ### Publish Docker Images -We do not yet publish Docker images to the official Apache DockerHub account but there is an issue open for this -([#236](https://github.com/apache/arrow-ballista/issues/236)). +We release the Docker image that was voted on rather than build a new image. We do this by re-tagging the image. -To build Docker images: - -```shell -RELEASE_FLAG=release-lto ./dev/build-ballista-docker.sh +```bash +$ docker pull ghcr.io/apache/arrow-ballista-standalone:0.10.0-rc3 +$ docker tag ghcr.io/apache/arrow-ballista-standalone:0.10.0-rc3 ghcr.io/apache/arrow-ballista-standalone:0.10.0 +$ docker push ghcr.io/apache/arrow-ballista-standalone:0.10.0 ``` -The Docker image is tagged as `apache/arrow-ballista:0.8.0`. - ### Call the vote Call the vote on the Arrow dev list by replying to the RC voting thread. The diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index ea6411703..7bd9d6813 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -53,3 +53,4 @@ Cargo.lock .history parquet-testing/* *rat.txt +ballista/core/src/serde/generated/ballista.rs \ No newline at end of file diff --git a/docs/source/user-guide/flightsql.md b/docs/source/user-guide/flightsql.md index 6a99aa74c..cb420e3de 100644 --- a/docs/source/user-guide/flightsql.md +++ b/docs/source/user-guide/flightsql.md @@ -24,12 +24,11 @@ One of the easiest ways to start with Ballista is to plug it into your existing Getting started involves these main steps: 1. [Installing prerequisites](#prereq) -2. Build the [Ballista rust code](#rust) -3. Build and run the [Ballista docker containers](#docker) -4. Build the [Arrow Flight SQL JDBC Driver](#jdbc) -5. [Install the driver](#tool) into your favorite JDBC tool -6. Run a ["hello, world!"](#hello) query -7. Register a table and run more complicated queries +2. Run the [Ballista docker container](#docker) +3. Download the [Arrow Flight SQL JDBC Driver](#jdbc) +4. [Install the driver](#tool) into your favorite JDBC tool +5. Run a ["hello, world!"](#hello) query +6. Register a table and run more complicated queries ## Prerequisites @@ -37,41 +36,25 @@ Getting started involves these main steps: ```shell sudo apt-get update -sudo apt-get install -y docker.io docker-compose +sudo apt-get install -y docker.io ``` ### MacOS ```shell -brew install docker docker-compose +brew install docker ``` ### Windows ```shell -choco install docker-desktop docker-compose +choco install docker-desktop ``` -## Building Ballista - -To build in docker (non-linux systems): - -```shell -git clone https://github.com/apache/arrow-ballista.git -dev/build-ballista-rust.sh -``` - -Or in linux-based systems with the correct dependencies installed, one can simply: - -```shell -cargo build --release --all --features flight-sql -``` - -## Run Docker Containers +## Run Docker Container ```shell -source dev/build-ballista-docker.sh -docker-compose up +docker run -p 50050:50050 --rm ghcr.io/apache/arrow-ballista-standalone:0.10.0 ``` ## Download the FlightSQL JDBC Driver @@ -103,17 +86,16 @@ select 'Hello from Arrow Ballista!' as greeting; In order to run queries against data, tables need to be "registered" with the current session (and re-registered upon each new connection). -To register a table, find a `.csv`, `.json`, or `.parquet` file for testing, and use the syntax below: +To register the built-in demo table, use the syntax below: ```sql -create external table customer stored as CSV with header row - location '/path/to/customer.csv'; +create external table taxi stored as parquet location '/data/yellow_tripdata_2022-01.parquet'; ``` Once the table has been registered, all the normal SQL queries can be performed: ```sql -select * from customer; +select * from taxi limit 10; ``` 🎉 Happy querying! 🎉 diff --git a/docs/source/user-guide/python.md b/docs/source/user-guide/python.md index 96ff815ab..80ce8aa5d 100644 --- a/docs/source/user-guide/python.md +++ b/docs/source/user-guide/python.md @@ -69,7 +69,7 @@ The `sql` method creates a `DataFrame`. The query is executed when an action suc ### Collecting Query Results -The `collect` method executres the query and returns the results in +The `collect` method executes the query and returns the results in [PyArrow](https://arrow.apache.org/docs/python/index.html) record batches. ```text diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 93cf8fd83..06aa536d6 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "ballista-examples" description = "Ballista usage examples" -version = "0.9.0" +version = "0.10.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" authors = ["Apache Arrow "] @@ -26,7 +26,7 @@ license = "Apache-2.0" keywords = [ "arrow", "distributed", "query", "sql" ] edition = "2021" publish = false -rust-version = "1.59" +rust-version = "1.63" [[example]] name = "standalone_sql" @@ -34,8 +34,8 @@ path = "examples/standalone-sql.rs" required-features = ["ballista/standalone"] [dependencies] -ballista = { path = "../ballista/client", version = "0.9.0" } -datafusion = "14.0.0" +ballista = { path = "../ballista/client", version = "0.10.0" } +datafusion = "15.0.0" futures = "0.3" num_cpus = "1.13.0" prost = "0.11" diff --git a/helm/README.md b/helm/README.md index 7be9832b7..c24749d5e 100644 --- a/helm/README.md +++ b/helm/README.md @@ -57,7 +57,9 @@ helm install ballista . Run the following command to redirect localhost port 8080 to port 80 in the scheduler container and then view the scheduler UI at http://localhost:8080. +```shell kubectl port-forward ballista-scheduler-0 8080:80 +``` ## Connect diff --git a/helm/ballista/templates/executor.yaml b/helm/ballista/templates/executor.yaml index bf1259713..4408d175c 100644 --- a/helm/ballista/templates/executor.yaml +++ b/helm/ballista/templates/executor.yaml @@ -50,7 +50,7 @@ spec: - name: {{ .Chart.Name }}-executor securityContext: {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.image.repository }}{{ .Values.image.executor }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + image: "{{ .Values.image.repository }}/{{ .Values.image.executor }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} command: ["/root/ballista-executor", "--scheduler-host=ballista-scheduler"] env: diff --git a/helm/ballista/templates/scheduler.yaml b/helm/ballista/templates/scheduler.yaml index 854f3472a..fc44c2f20 100644 --- a/helm/ballista/templates/scheduler.yaml +++ b/helm/ballista/templates/scheduler.yaml @@ -50,7 +50,7 @@ spec: - name: {{ .Chart.Name }}-scheduler securityContext: {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.image.repository }}{{ .Values.image.scheduler }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + image: "{{ .Values.image.repository }}/{{ .Values.image.scheduler }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} env: - name: AWS_DEFAULT_REGION diff --git a/helm/ballista/values.yaml b/helm/ballista/values.yaml index bbf316271..1bae89341 100644 --- a/helm/ballista/values.yaml +++ b/helm/ballista/values.yaml @@ -20,7 +20,7 @@ image: repository: "" scheduler: ballista-scheduler executor: ballista-executor - pullPolicy: Never + pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. tag: "latest" diff --git a/python/Cargo.toml b/python/Cargo.toml index d06aa3c61..5879d7443 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "ballista-python" -version = "0.9.0" +version = "0.10.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" authors = ["Apache Arrow "] @@ -35,8 +35,8 @@ default = ["mimalloc"] [dependencies] async-trait = "0.1" -ballista = { path = "../ballista/client", version = "0.9.0" } -datafusion = { version = "14.0.0", features = ["pyarrow"] } +ballista = { path = "../ballista/client", version = "0.10.0" } +datafusion = { version = "15.0.0", features = ["pyarrow"] } futures = "0.3" mimalloc = { version = "*", optional = true, default-features = false } pyo3 = { version = "~0.17.1", features = ["extension-module", "abi3", "abi3-py37"] } diff --git a/python/src/context.rs b/python/src/context.rs index 38a3cb9b9..26c5661ad 100644 --- a/python/src/context.rs +++ b/python/src/context.rs @@ -23,7 +23,7 @@ use uuid::Uuid; use pyo3::exceptions::{PyKeyError, PyValueError}; use pyo3::prelude::*; -use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::datatypes::{DataType, Schema}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; use datafusion::datasource::datasource::TableProvider; @@ -34,6 +34,7 @@ use datafusion::prelude::{CsvReadOptions, ParquetReadOptions}; use crate::catalog::{PyCatalog, PyTable}; use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; +use crate::datatype::PyDataType; use crate::errors::DataFusionError; use crate::udf::PyScalarUDF; use crate::utils::wait_for_future; @@ -159,13 +160,13 @@ impl PySessionContext { &mut self, name: &str, path: &str, - table_partition_cols: Vec, + table_partition_cols: Vec<(String, PyDataType)>, parquet_pruning: bool, file_extension: &str, py: Python, ) -> PyResult<()> { let mut options = ParquetReadOptions::default() - .table_partition_cols(table_partition_cols) + .table_partition_cols(convert_table_partition_cols(table_partition_cols)) .parquet_pruning(parquet_pruning); options.file_extension = file_extension; let result = self.ctx.register_parquet(name, path, options); @@ -255,3 +256,12 @@ impl PySessionContext { Ok(PyDataFrame::new(self.ctx.read_empty()?)) } } + +fn convert_table_partition_cols( + table_partition_cols: Vec<(String, PyDataType)>, +) -> Vec<(String, DataType)> { + table_partition_cols + .iter() + .map(|(name, t)| (name.clone(), t.data_type.clone())) + .collect() +} \ No newline at end of file diff --git a/python/src/dataset.rs b/python/src/dataset.rs index d34d974fc..f0b2b10e9 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -98,7 +98,7 @@ impl TableProvider for Dataset { async fn scan( &self, _ctx: &SessionState, - projection: &Option>, + projection: Option<&Vec>, filters: &[Expr], // limit can be used to reduce the amount scanned // from the datasource as a performance optimization. @@ -111,7 +111,7 @@ impl TableProvider for Dataset { DatasetExec::new( py, self.dataset.as_ref(py), - projection.clone(), + projection.cloned(), filters, ) .map_err(|err| DataFusionError::External(Box::new(err)))?, diff --git a/python/src/datatype.rs b/python/src/datatype.rs new file mode 100644 index 000000000..07f6cd230 --- /dev/null +++ b/python/src/datatype.rs @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Copied from https://github.com/apache/arrow-datafusion-python/pull/103 + +use datafusion::arrow::datatypes::DataType; +use pyo3::pyclass; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(name = "PyDataType", module = "datafusion", subclass)] +pub struct PyDataType { + pub(crate) data_type: DataType, +} + +impl From for DataType { + fn from(data_type: PyDataType) -> DataType { + data_type.data_type + } +} + +impl From for PyDataType { + fn from(data_type: DataType) -> PyDataType { + PyDataType { data_type } + } +} \ No newline at end of file diff --git a/python/src/lib.rs b/python/src/lib.rs index 01158d55e..106978a7c 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -29,6 +29,8 @@ mod context; mod dataframe; mod dataset; mod dataset_exec; +#[allow(clippy::borrow_deref_ref)] +mod datatype; pub mod errors; #[allow(clippy::borrow_deref_ref)] mod expression; diff --git a/python/src/udaf.rs b/python/src/udaf.rs index f29734764..42c388ed8 100644 --- a/python/src/udaf.rs +++ b/python/src/udaf.rs @@ -95,6 +95,10 @@ impl Accumulator for RustAccumulator { Ok(()) }) } + + fn size(&self) -> usize { + std::mem::size_of_val(self) + } } pub fn to_rust_accumulator(accum: PyObject) -> AccumulatorFunctionImplementation {