From afeee3265bd8047aa4ae1387459b0a388f990799 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 21 Apr 2022 12:49:50 -0400 Subject: [PATCH] Datafusion aggregate (#471) * Add basic predicate-pushdown optimization (#433) * basic predicate-pushdown support * remove explict Dispatch class * use _Frame.fillna * cleanup comments * test coverage * improve test coverage * add xfail test for dt accessor in predicate and fix test_show.py * fix some naming issues * add config and use assert_eq * add logging events when predicate-pushdown bails * move bail logic earlier in function * address easier code review comments * typo fix * fix creation_info access bug * convert any expression to DNF * csv test coverage * include IN coverage * improve test rigor * address code review * skip parquet tests when deps are not installed * fix bug * add pyarrow dep to cluster workers * roll back test skipping changes Co-authored-by: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> * Add workflow to keep datafusion dev branch up to date (#440) * Condition for BinaryExpr, filter, input_ref, rexcall, and rexliteral * Updates for test_filter * more of test_filter.py working with the exception of some date pytests * Updates to dates and parsing dates like postgresql does * Update gpuCI `RAPIDS_VER` to `22.06` (#434) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Bump black to 22.3.0 (#443) * Check for ucx-py nightlies when updating gpuCI (#441) * Simplify gpuCI updating workflow * Add check for cuML nightly version * Refactored to adjust for better type management * Refactor schema and statements * update types * fix syntax issues and renamed function name calls * Add handling for newer `prompt_toolkit` versions in cmd tests (#447) * Add handling for newer prompt-toolkit version * Place compatibility code in _compat * Fix version for gha-find-replace (#446) * Improved error handling and code clean up * move pieces of logical.rs to seperated files to ensure code readability * left join working * Update versions of Java dependencies (#445) * Update versions for java dependencies with cves * Rerun tests * Update jackson databind version (#449) * Update versions for java dependencies with cves * Rerun tests * update jackson-databind dependency * Disable SQL server functionality (#448) * Disable SQL server functionality * Update docs/source/server.rst Co-authored-by: Ayush Dattagupta * Disable server at lowest possible level * Skip all server tests * Add tests to ensure server is disabled * Fix CVE fix test Co-authored-by: Ayush Dattagupta * Update dask pinnings for release (#450) * Add Java source code to source distribution (#451) * Bump `httpclient` dependency (#453) * Revert "Disable SQL server functionality (#448)" This reverts commit 37a3a61fb13b0c56fcc10bf8ef01f4885a58dae8. * Bump httpclient version * Unpin Dask/distributed versions (#452) * Unpin dask/distributed post release * Remove dask/distributed version ceiling * Add jsonschema to ci testing (#454) * Add jsonschema to ci env * Fix typo in config schema * Switch tests from `pd.testing.assert_frame_equal` to `dd.assert_eq` (#365) * Start moving tests to dd.assert_eq * Use assert_eq in datetime filter test * Resolve most resulting test failures * Resolve remaining test failures * Convert over tests * Convert more tests * Consolidate select limit cpu/gpu test * Remove remaining assert_series_equal * Remove explicit cudf imports from many tests * Resolve rex test failures * Remove some additional compute calls * Consolidate sorting tests with getfixturevalue * Fix failed join test * Remove breakpoint * Use custom assert_eq function for tests * Resolve test failures / seg faults * Remove unnecessary testing utils * Resolve local test failures * Generalize RAND test * Avoid closing client if using independent cluster * Fix failures on Windows * Resolve black failures * Make random test variables more clear * First basic working checkpoint for group by * Set max pin on antlr4-python-runtime (#456) * Set max pin on antlr4-python-runtime due to incompatibilities with fugue_sql * update comment on antlr max pin version * Updates to style * stage pre-commit changes for upstream merge * Fix black failures * Updates to Rust formatting * Fix rust lint and clippy * Remove jar building step which is no longer needed * Remove Java from github workflows matrix * Removes jar and Java references from test.yml * Update Release workflow to remove references to Java * Update rust.yml to remove references from linux-build-lib * Add pre-commit.sh file to provide pre-commit support for Rust in a convenient script * Removed overlooked jdk references * cargo clippy auto fixes * Address all Rust clippy warnings * Include setuptools-rust in conda build recipie * Include setuptools-rust in conda build recipie, in host and run * Adjustments for conda build, committing for others to help with error and see it occurring in CI * Include sql.yaml in package files * Include pyarrow in run section of conda build to ensure tests pass * include setuptools-rust in host and run of conda since removing caused errors * to_string() method had been removed in rust and not removed here, caused conda run_test.py to fail when this line was hit * Replace commented out tests with pytest.skip and bump version of pyarrow to 7.0.0 * Fix setup.py syntax issue introduced on last commit by find/replace * Rename Datafusion -> DataFusion and Apache DataFusion -> Arrow DataFusion * Fix docs build environment * Include Rust compiler in docs environment * Bump Rust compiler version to 1.59 * Ok, well readthedocs didn't like that * Store libdask_planner.so and retrieve it between github workflows * Cache the Rust library binary * Remove Cargo.lock from git * Remove unused datafusion-expr crate * Build datafusion at each test step instead of caching binaries * Remove maven and jar cache steps from test-upstream.yaml * Removed dangling 'build' workflow step reference * Lowered PyArrow version to 6.0.1 since cudf has a hard requirement on that version for the version of cudf we are using * Add Rust build step to test in dask cluster * Install setuptools-rust for pip to use for bare requirements import * Include pyarrow 6.0.1 via conda as a bare minimum dependency * Remove cudf dependency for python 3.9 which is causing build issues on windows * Address documentation from review * Install Rust as readthedocs post_create_environment step * Run rust install non-interactively * Run rust install non-interactively * Rust isn't available in PyPi so remove that dependency * Append ~/.cargo/bin to the PATH * Print out some environment information for debugging * Print out some environment information for debugging * More - Increase verbosity * More - Increase verbosity * More - Increase verbosity * Switch RTD over to use Conda instead of Pip since having issues with Rust and pip * Try to use mamba for building docs environment * Partial review suggestion address, checking CI still works * Skip mistakenly enabled tests * Use DataFusion master branch, and fix syntax issues related to the version bump * More updates after bumping DataFusion version to master * Use actions-rs in github workflows debug flag for setup.py * Remove setuptools-rust from conda * Use re-exported Rust types for BuiltinScalarFunction * Move python imports to TYPE_CHECKING section where applicable * Address review concerns and remove pre-commit.sh file * Pin to a specific github rev for DataFusion Co-authored-by: Richard (Rick) Zamora Co-authored-by: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Ayush Dattagupta --- .github/docker-compose.yaml | 2 + .github/workflows/release.yml | 7 +- .github/workflows/test-upstream.yml | 54 +- .github/workflows/test.yml | 100 +- .github/workflows/update-gpuci.yml | 59 +- .gitignore | 1 + .pre-commit-config.yaml | 13 +- .readthedocs.yaml | 8 +- MANIFEST.in | 1 + README.md | 13 +- .../environment-3.10-dev.yaml | 9 +- .../environment-3.8-dev.yaml | 9 +- .../environment-3.9-dev.yaml | 21 +- continuous_integration/gpuci/axis.yaml | 2 +- continuous_integration/recipe/meta.yaml | 5 +- dask_planner/.github/workflows/CI.yml | 66 - dask_planner/Cargo.lock | 1593 ----------------- dask_planner/Cargo.toml | 12 +- dask_planner/README.md | 0 dask_planner/dask_planner/python/__init__.py | 95 - dask_planner/src/catalog.rs | 107 -- dask_planner/src/errors.rs | 41 - dask_planner/src/expression.rs | 493 ++++- dask_planner/src/functions.rs | 322 ---- dask_planner/src/lib.rs | 27 +- dask_planner/src/sql.rs | 915 +--------- dask_planner/src/sql/column.rs | 35 + dask_planner/src/sql/function.rs | 8 + dask_planner/src/sql/logical.rs | 150 ++ dask_planner/src/sql/logical/aggregate.rs | 78 + dask_planner/src/sql/logical/filter.rs | 30 + dask_planner/src/sql/logical/join.rs | 76 + dask_planner/src/sql/logical/projection.rs | 124 ++ dask_planner/src/sql/schema.rs | 32 + dask_planner/src/sql/statement.rs | 35 + dask_planner/src/sql/table.rs | 183 ++ dask_planner/src/sql/types.rs | 107 ++ dask_sql/_compat.py | 6 + dask_sql/cmd.py | 9 +- dask_sql/context.py | 10 +- dask_sql/datacontainer.py | 4 - dask_sql/input_utils/convert.py | 6 +- dask_sql/mappings.py | 9 +- dask_sql/physical/rel/base.py | 12 +- dask_sql/physical/rel/convert.py | 5 +- dask_sql/physical/rel/custom/alter.py | 9 +- dask_sql/physical/rel/custom/analyze.py | 5 +- dask_sql/physical/rel/custom/predict.py | 14 +- dask_sql/physical/rel/logical/aggregate.py | 90 +- dask_sql/physical/rel/logical/filter.py | 22 +- dask_sql/physical/rel/logical/join.py | 409 +++-- dask_sql/physical/rel/logical/project.py | 22 +- dask_sql/physical/rel/logical/sort.py | 31 +- dask_sql/physical/rel/logical/table_scan.py | 10 +- dask_sql/physical/rel/logical/window.py | 43 +- dask_sql/physical/rex/base.py | 8 +- dask_sql/physical/rex/convert.py | 20 +- dask_sql/physical/rex/core/call.py | 95 +- dask_sql/physical/rex/core/input_ref.py | 11 +- dask_sql/physical/rex/core/literal.py | 72 +- dask_sql/physical/utils/filter.py | 379 ++++ dask_sql/server/app.py | 4 +- dask_sql/sql-schema.yaml | 5 + dask_sql/sql.yaml | 2 + dask_sql/utils.py | 11 +- docker/conda.txt | 7 +- docker/main.dockerfile | 6 +- docs/environment.yml | 5 +- docs/requirements-docs.txt | 4 +- docs/source/installation.rst | 18 +- setup.py | 40 +- test.py | 29 - tests/integration/fixtures.py | 55 +- tests/integration/test_analyze.py | 18 +- tests/integration/test_cmd.py | 22 +- tests/integration/test_compatibility.py | 39 +- tests/integration/test_complex.py | 9 +- tests/integration/test_create.py | 97 +- tests/integration/test_distributeby.py | 16 +- tests/integration/test_filter.py | 334 ++-- tests/integration/test_fugue.py | 34 +- tests/integration/test_function.py | 85 +- tests/integration/test_groupby.py | 298 +-- tests/integration/test_hive.py | 16 +- tests/integration/test_intake.py | 9 +- tests/integration/test_jdbc.py | 8 + tests/integration/test_join.py | 185 +- tests/integration/test_model.py | 37 +- tests/integration/test_over.py | 85 +- tests/integration/test_postgres.py | 8 + tests/integration/test_rex.py | 155 +- tests/integration/test_sample.py | 15 +- tests/integration/test_schema.py | 20 +- tests/integration/test_select.py | 91 +- tests/integration/test_server.py | 13 +- tests/integration/test_show.py | 90 +- tests/integration/test_sort.py | 407 ++--- tests/integration/test_sqlite.py | 7 + tests/integration/test_union.py | 24 +- tests/unit/test_call.py | 72 +- tests/unit/test_context.py | 50 +- tests/unit/test_utils.py | 8 +- tests/utils.py | 13 + 103 files changed, 3703 insertions(+), 4882 deletions(-) delete mode 100644 dask_planner/.github/workflows/CI.yml delete mode 100644 dask_planner/Cargo.lock create mode 100644 dask_planner/README.md delete mode 100644 dask_planner/dask_planner/python/__init__.py delete mode 100644 dask_planner/src/catalog.rs delete mode 100644 dask_planner/src/errors.rs delete mode 100644 dask_planner/src/functions.rs create mode 100644 dask_planner/src/sql/column.rs create mode 100644 dask_planner/src/sql/function.rs create mode 100644 dask_planner/src/sql/logical.rs create mode 100644 dask_planner/src/sql/logical/aggregate.rs create mode 100644 dask_planner/src/sql/logical/filter.rs create mode 100644 dask_planner/src/sql/logical/join.rs create mode 100644 dask_planner/src/sql/logical/projection.rs create mode 100644 dask_planner/src/sql/schema.rs create mode 100644 dask_planner/src/sql/statement.rs create mode 100644 dask_planner/src/sql/table.rs create mode 100644 dask_planner/src/sql/types.rs create mode 100644 dask_sql/physical/utils/filter.py mode change 100755 => 100644 setup.py delete mode 100644 test.py create mode 100644 tests/utils.py diff --git a/.github/docker-compose.yaml b/.github/docker-compose.yaml index cfb7eb43f..01cd9e3ec 100644 --- a/.github/docker-compose.yaml +++ b/.github/docker-compose.yaml @@ -11,5 +11,7 @@ services: container_name: dask-worker image: daskdev/dask:latest command: dask-worker dask-scheduler:8786 + environment: + EXTRA_CONDA_PACKAGES: "pyarrow>=4.0.0" # required for parquet IO volumes: - /tmp:/tmp diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 623bd5dae..7d46b8e13 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,11 +16,6 @@ jobs: if: github.repository == 'dask-contrib/dask-sql' steps: - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -29,7 +24,7 @@ jobs: python-version: "3.8" channel-priority: strict activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml + environment-file: continuous_integration/environment-3.8-dev.yaml - name: Install dependencies run: | pip install setuptools wheel twine diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index a515395a0..b9fc415b9 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -5,64 +5,23 @@ on: workflow_dispatch: # allows you to trigger the workflow run manually jobs: - build: - # This build step should be similar to the deploy build, to make sure we actually test - # the future deployable - name: Build the jar on ubuntu - runs-on: ubuntu-latest - if: github.repository == 'dask-contrib/dask-sql' - defaults: - run: - shell: bash -l {0} - steps: - - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - - name: Set up Python - uses: conda-incubator/setup-miniconda@v2 - with: - miniforge-variant: Mambaforge - use-mamba: true - python-version: "3.8" - channel-priority: strict - activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml - - name: Install dependencies and build the jar - run: | - python setup.py build_ext - - name: Upload the jar - uses: actions/upload-artifact@v1 - with: - name: jar - path: dask_sql/jar/DaskSQL.jar - test-dev: - name: "Test upstream dev (${{ matrix.os }}, java: ${{ matrix.java }}, python: ${{ matrix.python }})" - needs: build + name: "Test upstream dev (${{ matrix.os }}, python: ${{ matrix.python }})" runs-on: ${{ matrix.os }} env: - CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-jdk${{ matrix.java }}-dev.yaml + CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-dev.yaml defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: - java: [8, 11] os: [ubuntu-latest, windows-latest] python: ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 # Fetch all history for all branches and tags. - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk${{ matrix.java }}-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -72,21 +31,12 @@ jobs: channel-priority: strict activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} - - name: Download the pre-build jar - uses: actions/download-artifact@v1 - with: - name: jar - path: dask_sql/jar/ - name: Install hive testing dependencies for Linux if: matrix.os == 'ubuntu-latest' run: | mamba install -c conda-forge sasl>=0.3.1 docker pull bde2020/hive:2.3.2-postgresql-metastore docker pull bde2020/hive-metastore-postgresql:2.3.0 - - name: Set proper JAVA_HOME for Windows - if: matrix.os == 'windows-latest' - run: | - echo "JAVA_HOME=${{ env.CONDA }}\envs\dask-sql\Library" >> $GITHUB_ENV - name: Install upstream dev Dask / dask-ml run: | python -m pip install --no-deps git+https://github.com/dask/dask diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b590260f7..cc5b078bd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,7 +1,7 @@ --- # Test the main branch and every pull request by -# 1. building the jar on ubuntu -# 2. testing code (using the build jar) on ubuntu and windows, with different java versions +# 1. build dask_planner (Arrow DataFusion Rust bindings) on ubuntu +# 2. testing code (using the build DataFusion bindings) on ubuntu and windows name: Test Python package on: push: @@ -36,55 +36,20 @@ jobs: with: keyword: "[test-upstream]" - build: - # This build step should be similar to the deploy build, to make sure we actually test - # the future deployable - name: Build the jar on ubuntu - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - - name: Set up Python - uses: conda-incubator/setup-miniconda@v2 - with: - miniforge-variant: Mambaforge - use-mamba: true - python-version: "3.8" - channel-priority: strict - activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml - - name: Build the jar - run: | - python setup.py build_ext - - name: Upload the jar - uses: actions/upload-artifact@v1 - with: - name: jar - path: dask_sql/jar/DaskSQL.jar - test: - name: "Test (${{ matrix.os }}, java: ${{ matrix.java }}, python: ${{ matrix.python }})" - needs: [detect-ci-trigger, build] + name: "Build & Test (${{ matrix.os }}, python: ${{ matrix.python }}, Rust: ${{ matrix.toolchain }})" + needs: [detect-ci-trigger] runs-on: ${{ matrix.os }} env: - CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-jdk${{ matrix.java }}-dev.yaml + CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-dev.yaml strategy: fail-fast: false matrix: - java: [8, 11] os: [ubuntu-latest, windows-latest] python: ["3.8", "3.9", "3.10"] + toolchain: [stable] steps: - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk${{ matrix.java }}-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -94,21 +59,21 @@ jobs: channel-priority: strict activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} - - name: Download the pre-build jar - uses: actions/download-artifact@v1 + - name: Setup Rust Toolchain + uses: actions-rs/toolchain@v1 + id: rust-toolchain with: - name: jar - path: dask_sql/jar/ + toolchain: stable + override: true + - name: Build the Rust DataFusion bindings + run: | + python setup.py build install - name: Install hive testing dependencies for Linux if: matrix.os == 'ubuntu-latest' run: | mamba install -c conda-forge sasl>=0.3.1 docker pull bde2020/hive:2.3.2-postgresql-metastore docker pull bde2020/hive-metastore-postgresql:2.3.0 - - name: Set proper JAVA_HOME for Windows - if: matrix.os == 'windows-latest' - run: | - echo "JAVA_HOME=${{ env.CONDA }}\envs\dask-sql\Library" >> $GITHUB_ENV - name: Optionally install upstream dev Dask / dask-ml if: needs.detect-ci-trigger.outputs.triggered == 'true' run: | @@ -133,15 +98,10 @@ jobs: cluster: name: "Test in a dask cluster" - needs: [detect-ci-trigger, build] + needs: [detect-ci-trigger] runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -150,12 +110,16 @@ jobs: python-version: "3.8" channel-priority: strict activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml - - name: Download the pre-build jar - uses: actions/download-artifact@v1 - with: - name: jar - path: dask_sql/jar/ + environment-file: continuous_integration/environment-3.8-dev.yaml + - name: Setup Rust Toolchain + uses: actions-rs/toolchain@v1 + id: rust-toolchain + with: + toolchain: stable + override: true + - name: Build the Rust DataFusion bindings + run: | + python setup.py build install - name: Install dependencies run: | mamba install python-blosc lz4 -c conda-forge @@ -184,15 +148,10 @@ jobs: import: name: "Test importing with bare requirements" - needs: [detect-ci-trigger, build] + needs: [detect-ci-trigger] runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -200,13 +159,10 @@ jobs: mamba-version: "*" channels: conda-forge,defaults channel-priority: strict - - name: Download the pre-build jar - uses: actions/download-artifact@v1 - with: - name: jar - path: dask_sql/jar/ - name: Install dependencies and nothing else run: | + conda install setuptools-rust + conda install pyarrow>=4.0.0 pip install -e . which python diff --git a/.github/workflows/update-gpuci.yml b/.github/workflows/update-gpuci.yml index e3c9c8469..1cf91c069 100644 --- a/.github/workflows/update-gpuci.yml +++ b/.github/workflows/update-gpuci.yml @@ -13,39 +13,70 @@ jobs: steps: - uses: actions/checkout@v2 + - name: Parse current axis YAML + uses: the-coding-turtle/ga-yaml-parser@v0.1.1 + with: + file: continuous_integration/gpuci/axis.yaml + - name: Get latest cuDF nightly version - id: latest_version + id: cudf_latest uses: jacobtomlinson/gha-anaconda-package-version@0.1.3 with: org: "rapidsai-nightly" package: "cudf" version_system: "CalVer" - - name: Strip git tags from versions + - name: Get latest cuML nightly version + id: cuml_latest + uses: jacobtomlinson/gha-anaconda-package-version@0.1.3 + with: + org: "rapidsai-nightly" + package: "cuml" + version_system: "CalVer" + + - name: Get latest UCX-Py nightly version + id: ucx_py_latest + uses: jacobtomlinson/gha-anaconda-package-version@0.1.3 + with: + org: "rapidsai-nightly" + package: "ucx-py" + version_system: "CalVer" + + - name: Get old RAPIDS / UCX-Py versions env: - FULL_RAPIDS_VER: ${{ steps.latest_version.outputs.version }} - run: echo "RAPIDS_VER=${FULL_RAPIDS_VER::-10}" >> $GITHUB_ENV + FULL_CUDF_VER: ${{ steps.cudf_latest.outputs.version }} + FULL_CUML_VER: ${{ steps.cuml_latest.outputs.version }} + FULL_UCX_PY_VER: ${{ steps.ucx_py_latest.outputs.version }} + run: | + echo RAPIDS_VER=$RAPIDS_VER_0 >> $GITHUB_ENV + echo UCX_PY_VER=$(curl -sL https://version.gpuci.io/rapids/$RAPIDS_VER_0) >> $GITHUB_ENV + echo NEW_CUDF_VER=${FULL_CUDF_VER::-10} >> $GITHUB_ENV + echo NEW_CUML_VER=${FULL_CUML_VER::-10} >> $GITHUB_ENV + echo NEW_UCX_PY_VER=${FULL_UCX_PY_VER::-10} >> $GITHUB_ENV - - name: Find and Replace Release - uses: jacobtomlinson/gha-find-replace@0.1.4 + - name: Update RAPIDS version + uses: jacobtomlinson/gha-find-replace@v2 with: include: 'continuous_integration\/gpuci\/axis\.yaml' - find: "RAPIDS_VER:\n- .*" - replace: |- - RAPIDS_VER: - - "${{ env.RAPIDS_VER }}" + find: "${{ env.RAPIDS_VER }}" + replace: "${{ env.NEW_CUDF_VER }}" + regex: false - name: Create Pull Request uses: peter-evans/create-pull-request@v3 + # make sure ucx-py nightlies are available and that cuDF/cuML nightly versions match up + if: | + env.UCX_PY_VER != env.NEW_UCX_PY_VER && + env.NEW_CUDF_VER == env.NEW_CUML_VER with: token: ${{ secrets.GITHUB_TOKEN }} draft: true - commit-message: "Update gpuCI `RAPIDS_VER` to `${{ env.RAPIDS_VER }}`" - title: "Update gpuCI `RAPIDS_VER` to `${{ env.RAPIDS_VER }}`" + commit-message: "Update gpuCI `RAPIDS_VER` to `${{ env.NEW_CUDF_VER }}`" + title: "Update gpuCI `RAPIDS_VER` to `${{ env.NEW_CUDF_VER }}`" team-reviewers: "dask/gpu" author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> branch: "upgrade-gpuci-rapids" body: | - A new cuDF nightly version has been detected. + New cuDF and ucx-py nightly versions have been detected. - Updated `axis.yaml` to use `${{ env.RAPIDS_VER }}`. + Updated `axis.yaml` to use `${{ env.NEW_CUDF_VER }}`. diff --git a/.gitignore b/.gitignore index 5bc336e03..950c92821 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,4 @@ dask_sql/jar dask-worker-space/ node_modules/ docs/source/_build/ +dask_planner/Cargo.lock diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e50a5f1d5..709938a81 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 22.3.0 hooks: - id: black language_version: python3 @@ -16,8 +16,17 @@ repos: args: - "--profile" - "black" + - repo: https://github.com/doublify/pre-commit-rust + rev: v1.0 + hooks: + - id: fmt + args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] + - id: cargo-check + args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] + - id: clippy + args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 + rev: v4.2.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 3b3682543..831ec6d50 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,15 +3,15 @@ version: 2 build: os: ubuntu-20.04 tools: - python: "3.8" - apt_packages: - - maven + python: "mambaforge-4.10" sphinx: configuration: docs/source/conf.py +conda: + environment: docs/environment.yml + python: install: - - requirements: docs/requirements-docs.txt - method: pip path: . diff --git a/MANIFEST.in b/MANIFEST.in index 2b6351550..d0108fedd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ recursive-include dask_sql *.yaml +recursive-include dask_planner * include versioneer.py include dask_sql/_version.py diff --git a/README.md b/README.md index 5b508651f..924ae1e41 100644 --- a/README.md +++ b/README.md @@ -101,19 +101,18 @@ If you want to have the newest (unreleased) `dask-sql` version or if you plan to Create a new conda environment and install the development environment: - conda env create -f continuous_integration/environment-3.9-jdk11-dev.yaml + conda env create -f continuous_integration/environment-3.9-dev.yaml It is not recommended to use `pip` instead of `conda` for the environment setup. -If you however need to, make sure to have Java (jdk >= 8) and maven installed and correctly setup before continuing. -Have a look into `environment-3.9-jdk11-dev.yaml` for the rest of the development environment. After that, you can install the package in development mode pip install -e ".[dev]" -To recompile the Java classes after changes have been made to the source contained in `planner/`, run +The Rust DataFusion bindings are built as part of the `pip install`. +If changes are made to the Rust source in `dask_planner/`, another build/install must be run to recompile the bindings: - python setup.py build_ext + python setup.py build install This repository uses [pre-commit](https://pre-commit.com/) hooks. To install them, call @@ -172,5 +171,5 @@ At the core, `dask-sql` does two things: - translate the SQL query using [Apache Calcite](https://calcite.apache.org/) into a relational algebra, which is specified as a tree of java objects - similar to many other SQL engines (Hive, Flink, ...) - convert this description of the query from java objects into dask API calls (and execute them) - returning a dask dataframe. -For the first step, Apache Calcite needs to know about the columns and types of the dask dataframes, therefore some java classes to store this information for dask dataframes are defined in `planner`. -After the translation to a relational algebra is done (using `RelationalAlgebraGenerator.getRelationalAlgebra`), the python methods defined in `dask_sql.physical` turn this into a physical dask execution plan by converting each piece of the relational algebra one-by-one. +For the first step, Arrow DataFusion needs to know about the columns and types of the dask dataframes, therefore some Rust code to store this information for dask dataframes are defined in `dask_planner`. +After the translation to a relational algebra is done (using `DaskSQLContext.logical_relational_algebra`), the python methods defined in `dask_sql.physical` turn this into a physical dask execution plan by converting each piece of the relational algebra one-by-one. diff --git a/continuous_integration/environment-3.10-dev.yaml b/continuous_integration/environment-3.10-dev.yaml index effdccbaf..51a7f6052 100644 --- a/continuous_integration/environment-3.10-dev.yaml +++ b/continuous_integration/environment-3.10-dev.yaml @@ -4,15 +4,16 @@ channels: - nodefaults dependencies: - adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 -- dask>=2021.11.1 +- dask>=2022.3.0 - fastapi>=0.61.1 - fs>=2.4.11 - intake>=0.6.0 - isort=5.7.0 +- jsonschema>=4.4.0 - lightgbm>=3.2.1 - mlflow>=1.19.0 - mock>=4.0.3 @@ -22,7 +23,7 @@ dependencies: - pre-commit>=2.11.1 - prompt_toolkit>=3.0.8 - psycopg2>=2.9.1 -- pyarrow>=0.15.1 +- pyarrow>=4.0.0 - pygments>=2.7.1 - pyhive>=0.6.4 - pytest-cov>=2.10.1 diff --git a/continuous_integration/environment-3.8-dev.yaml b/continuous_integration/environment-3.8-dev.yaml index 0a57856cf..10132bff6 100644 --- a/continuous_integration/environment-3.8-dev.yaml +++ b/continuous_integration/environment-3.8-dev.yaml @@ -4,15 +4,16 @@ channels: - nodefaults dependencies: - adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 -- dask>=2021.11.1 +- dask>=2022.3.0 - fastapi>=0.61.1 - fs>=2.4.11 - intake>=0.6.0 - isort=5.7.0 +- jsonschema>=4.4.0 - lightgbm>=3.2.1 - mlflow>=1.19.0 - mock>=4.0.3 @@ -22,7 +23,7 @@ dependencies: - pre-commit>=2.11.1 - prompt_toolkit>=3.0.8 - psycopg2>=2.9.1 -- pyarrow>=0.15.1 +- pyarrow>=4.0.0 - pygments>=2.7.1 - pyhive>=0.6.4 - pytest-cov>=2.10.1 diff --git a/continuous_integration/environment-3.9-dev.yaml b/continuous_integration/environment-3.9-dev.yaml index 0793150a5..571a265a7 100644 --- a/continuous_integration/environment-3.9-dev.yaml +++ b/continuous_integration/environment-3.9-dev.yaml @@ -6,15 +6,16 @@ channels: - nvidia dependencies: - adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 -- dask>=2021.11.1 +- dask>=2022.3.0 - fastapi>=0.61.1 - fs>=2.4.11 - intake>=0.6.0 - isort=5.7.0 +- jsonschema>=4.4.0 - lightgbm>=3.2.1 - mlflow>=1.19.0 - mock>=4.0.3 @@ -24,7 +25,7 @@ dependencies: - pre-commit>=2.11.1 - prompt_toolkit>=3.0.8 - psycopg2>=2.9.1 -- pyarrow>=0.15.1 +- pyarrow>=4.0.0 - pygments>=2.7.1 - pyhive>=0.6.4 - pytest-cov>=2.10.1 @@ -40,17 +41,5 @@ dependencies: - maturin>=0.12.8 - setuptools-rust>=1.1.2 - rust>=1.59.0 -# Items below this are added only for testing and should be removed later -- cudf=22.04 -- cudatoolkit=11.5 -- dask-cuda -- dask-cudf -- ipykernel -- pyngrok -- plotly -- s3fs -- requests -- nbformat -- cuml - pip: - fugue[sql]>=0.5.3 diff --git a/continuous_integration/gpuci/axis.yaml b/continuous_integration/gpuci/axis.yaml index 922eee23c..41ddb56ec 100644 --- a/continuous_integration/gpuci/axis.yaml +++ b/continuous_integration/gpuci/axis.yaml @@ -8,6 +8,6 @@ LINUX_VER: - ubuntu18.04 RAPIDS_VER: -- "22.04" +- "22.06" excludes: diff --git a/continuous_integration/recipe/meta.yaml b/continuous_integration/recipe/meta.yaml index d6211bf1a..1bfeb19cb 100644 --- a/continuous_integration/recipe/meta.yaml +++ b/continuous_integration/recipe/meta.yaml @@ -23,12 +23,14 @@ build: requirements: build: - {{ compiler('rust') }} + - setuptools-rust>=1.1.2 host: - pip - python >=3.8 + - setuptools-rust>=1.1.2 run: - python - - dask >=2021.11.1 + - dask >=2022.3.0 - pandas >=1.0.0 - fastapi >=0.61.1 - uvicorn >=0.11.3 @@ -37,6 +39,7 @@ requirements: - pygments - nest-asyncio - tabulate + - pyarrow>=4.0.0 test: imports: diff --git a/dask_planner/.github/workflows/CI.yml b/dask_planner/.github/workflows/CI.yml deleted file mode 100644 index 02573724e..000000000 --- a/dask_planner/.github/workflows/CI.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: CI - -on: - push: - pull_request: - -jobs: - linux: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: messense/maturin-action@v1 - with: - manylinux: auto - command: build - args: --release -o dist - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - name: wheels - path: dist - - windows: - runs-on: windows-latest - steps: - - uses: actions/checkout@v2 - - uses: messense/maturin-action@v1 - with: - command: build - args: --release --no-sdist -o dist - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - name: wheels - path: dist - - macos: - runs-on: macos-latest - steps: - - uses: actions/checkout@v2 - - uses: messense/maturin-action@v1 - with: - command: build - args: --release --no-sdist -o dist --universal2 - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - name: wheels - path: dist - - release: - name: Release - runs-on: ubuntu-latest - if: "startsWith(github.ref, 'refs/tags/')" - needs: [ macos, windows, linux ] - steps: - - uses: actions/download-artifact@v2 - with: - name: wheels - - name: Publish to PyPI - uses: messense/maturin-action@v1 - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - with: - command: upload - args: --skip-existing * diff --git a/dask_planner/Cargo.lock b/dask_planner/Cargo.lock deleted file mode 100644 index 6852589af..000000000 --- a/dask_planner/Cargo.lock +++ /dev/null @@ -1,1593 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom 0.2.5", - "once_cell", - "version_check", -] - -[[package]] -name = "aho-corasick" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloc-no-stdlib" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" - -[[package]] -name = "arrow" -version = "9.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9864ca2fdcd3d4883259495b4517879877c5991d9928cc9713794d8076d3e78b" -dependencies = [ - "bitflags", - "chrono", - "comfy-table", - "csv", - "flatbuffers", - "half", - "hex", - "indexmap", - "lazy_static", - "lexical-core", - "multiversion", - "num", - "pyo3", - "rand 0.8.5", - "regex", - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "async-trait" -version = "0.1.52" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "061a7acccaa286c011ddc30970520b98fa40e00c9d644633fb26b5fc63a265e3" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "base64" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "blake2" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - -[[package]] -name = "block-buffer" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" -dependencies = [ - "generic-array", -] - -[[package]] -name = "brotli" -version = "3.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f838e47a451d5a8fa552371f80024dd6ace9b7acdf25c4c3d0f9bc6816fb1c39" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - -[[package]] -name = "bstr" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "cc" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" -dependencies = [ - "jobserver", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" -dependencies = [ - "libc", - "num-integer", - "num-traits", - "winapi", -] - -[[package]] -name = "comfy-table" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" -dependencies = [ - "strum", - "strum_macros", - "unicode-width", -] - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "cpufeatures" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crypto-common" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "csv" -version = "1.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" -dependencies = [ - "bstr", - "csv-core", - "itoa 0.4.8", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "dask_planner" -version = "0.1.0" -dependencies = [ - "datafusion", - "datafusion-common", - "datafusion-expr", - "mimalloc", - "parking_lot 0.12.0", - "pyo3", - "rand 0.7.3", - "sqlparser", - "tokio", - "uuid", -] - -[[package]] -name = "datafusion" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cf8e6735817bb021748d72cecc33e468d8775bf749470c52aa7f55ee5cdf9e" -dependencies = [ - "ahash", - "arrow", - "async-trait", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-expr", - "futures", - "hashbrown 0.12.0", - "lazy_static", - "log", - "md-5", - "num_cpus", - "ordered-float 2.10.0", - "parking_lot 0.12.0", - "parquet", - "paste 1.0.6", - "pin-project-lite", - "pyo3", - "rand 0.8.5", - "regex", - "sha2", - "smallvec", - "sqlparser", - "tempfile", - "tokio", - "tokio-stream", - "unicode-segmentation", -] - -[[package]] -name = "datafusion-common" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d2e2a1a6508f9e66c6deb84ab655be0e9db177f617ec1904de458439981b03" -dependencies = [ - "arrow", - "ordered-float 2.10.0", - "parquet", - "pyo3", - "sqlparser", -] - -[[package]] -name = "datafusion-expr" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1061dc27f2e4843ddb0f93a8e208d1984260c18c8bbf5e67598be9a111259fe2" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "sqlparser", -] - -[[package]] -name = "digest" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "fastrand" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" -dependencies = [ - "instant", -] - -[[package]] -name = "flatbuffers" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6" -dependencies = [ - "bitflags", - "smallvec", - "thiserror", -] - -[[package]] -name = "flate2" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" -dependencies = [ - "cfg-if", - "crc32fast", - "libc", - "miniz_oxide", -] - -[[package]] -name = "futures" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" - -[[package]] -name = "futures-executor" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" - -[[package]] -name = "futures-macro" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" - -[[package]] -name = "futures-task" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" - -[[package]] -name = "futures-util" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.10.2+wasi-snapshot-preview1", -] - -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" - -[[package]] -name = "hashbrown" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c21d40587b92fa6a6c6e3c1bdbf87d75511db5672f9c93175574b3a00df1758" -dependencies = [ - "ahash", -] - -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "indexmap" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223" -dependencies = [ - "autocfg", - "hashbrown 0.11.2", -] - -[[package]] -name = "indoc" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8" -dependencies = [ - "indoc-impl", - "proc-macro-hack", -] - -[[package]] -name = "indoc-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", - "unindent", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "integer-encoding" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" - -[[package]] -name = "itoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" - -[[package]] -name = "itoa" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" - -[[package]] -name = "jobserver" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" -dependencies = [ - "libc", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lexical-core" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a3926d8f156019890be4abe5fd3785e0cff1001e06f59c597641fd513a5a284" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4d066d004fa762d9da995ed21aa8845bb9f6e4265f540d716fb4b315197bf0e" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c92badda8cc0fc4f3d3cc1c30aaefafb830510c8781ce4e8669881f3ed53ac" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff669ccaae16ee33af90dc51125755efed17f1309626ba5c12052512b11e291" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5186948c7b297abaaa51560f2581dae625e5ce7dfc2d8fdc56345adb6dc576" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece956492e0e40fd95ef8658a34d53a3b8c2015762fdcaaff2167b28de1f56ef" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.119" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" - -[[package]] -name = "libmimalloc-sys" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7705fc40f6ed493f73584abbb324e74f96b358ff60dfe5659a0f8fc12c590a69" -dependencies = [ - "cc", -] - -[[package]] -name = "lock_api" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b" -dependencies = [ - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "lz4" -version = "1.23.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "md-5" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" -dependencies = [ - "digest", -] - -[[package]] -name = "memchr" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" - -[[package]] -name = "mimalloc" -version = "0.1.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dfa131390c2f6bdb3242f65ff271fcdaca5ff7b6c08f28398be7f2280e3926" -dependencies = [ - "libmimalloc-sys", -] - -[[package]] -name = "miniz_oxide" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" -dependencies = [ - "adler", - "autocfg", -] - -[[package]] -name = "multiversion" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" -dependencies = [ - "multiversion-macros", -] - -[[package]] -name = "multiversion-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "num" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26873667bbbb7c5182d4a37c1add32cdf09f841af72da53318fdb81543c15085" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "once_cell" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" - -[[package]] -name = "ordered-float" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-float" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" -dependencies = [ - "num-traits", -] - -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.5", -] - -[[package]] -name = "parking_lot" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" -dependencies = [ - "lock_api", - "parking_lot_core 0.9.1", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall", - "smallvec", - "winapi", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-sys", -] - -[[package]] -name = "parquet" -version = "9.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1697d963e6319c19099adcf131a5440685053d4902890f9e4bb272cbd0dc6532" -dependencies = [ - "arrow", - "base64", - "brotli", - "byteorder", - "chrono", - "flate2", - "lz4", - "num", - "num-bigint", - "parquet-format", - "rand 0.8.5", - "snap", - "thrift", - "zstd", -] - -[[package]] -name = "parquet-format" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5" -dependencies = [ - "thrift", -] - -[[package]] -name = "paste" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" -dependencies = [ - "paste-impl", - "proc-macro-hack", -] - -[[package]] -name = "paste" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0744126afe1a6dd7f394cb50a716dbe086cb06e255e53d8d0185d82828358fb5" - -[[package]] -name = "paste-impl" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" -dependencies = [ - "proc-macro-hack", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "ppv-lite86" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" - -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - -[[package]] -name = "proc-macro2" -version = "1.0.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "pyo3" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cf01dbf1c05af0a14c7779ed6f3aa9deac9c3419606ac9de537a2d649005720" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "parking_lot 0.11.2", - "paste 0.1.18", - "pyo3-build-config", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf9e4d128bfbddc898ad3409900080d8d5095c379632fbbfbb9c8cfb1fb852b" -dependencies = [ - "once_cell", -] - -[[package]] -name = "pyo3-macros" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67701eb32b1f9a9722b4bc54b548ff9d7ebfded011c12daece7b9063be1fd755" -dependencies = [ - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f44f09e825ee49a105f2c7b23ebee50886a9aee0746f4dd5a704138a64b0218a" -dependencies = [ - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom 0.2.5", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "redox_syscall" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - -[[package]] -name = "rustversion" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" - -[[package]] -name = "ryu" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "serde" -version = "1.0.136" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" - -[[package]] -name = "serde_derive" -version = "1.0.136" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" -dependencies = [ - "indexmap", - "itoa 1.0.1", - "ryu", - "serde", -] - -[[package]] -name = "sha2" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "slab" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" - -[[package]] -name = "smallvec" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" - -[[package]] -name = "snap" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" - -[[package]] -name = "sqlparser" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8f192f29f4aa49e57bebd0aa05858e0a1f32dd270af36efe49edb82cbfffab6" -dependencies = [ - "log", -] - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "strum" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" - -[[package]] -name = "strum_macros" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - -[[package]] -name = "subtle" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" - -[[package]] -name = "syn" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "tempfile" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" -dependencies = [ - "cfg-if", - "fastrand", - "libc", - "redox_syscall", - "remove_dir_all", - "winapi", -] - -[[package]] -name = "thiserror" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - -[[package]] -name = "thrift" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" -dependencies = [ - "byteorder", - "integer-encoding", - "log", - "ordered-float 1.1.1", - "threadpool", -] - -[[package]] -name = "tokio" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee" -dependencies = [ - "num_cpus", - "parking_lot 0.12.0", - "pin-project-lite", - "tokio-macros", -] - -[[package]] -name = "tokio-macros" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio-stream" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "typenum" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" - -[[package]] -name = "unicode-segmentation" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" - -[[package]] -name = "unicode-width" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "unindent" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514672a55d7380da379785a4d70ca8386c8883ff7eaae877be4d2081cebe73d8" - -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom 0.2.5", -] - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6" -dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_msvc" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5" - -[[package]] -name = "windows_i686_gnu" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615" - -[[package]] -name = "windows_i686_msvc" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" - -[[package]] -name = "zstd" -version = "0.10.0+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "4.1.4+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee" -dependencies = [ - "libc", - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "1.6.3+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" -dependencies = [ - "cc", - "libc", -] diff --git a/dask_planner/Cargo.toml b/dask_planner/Cargo.toml index 5cb01bdb2..0ed665bf6 100644 --- a/dask_planner/Cargo.toml +++ b/dask_planner/Cargo.toml @@ -2,23 +2,23 @@ name = "dask_planner" repository = "https://github.com/dask-contrib/dask-sql" version = "0.1.0" -description = "Bindings for Datafusion used by Dask-SQL" +description = "Bindings for DataFusion used by Dask-SQL" readme = "README.md" license = "Apache-2.0" edition = "2021" -rust-version = "1.57" +rust-version = "1.59" [dependencies] tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } rand = "0.7" -pyo3 = { version = "0.15", features = ["extension-module", "abi3", "abi3-py36"] } -datafusion = { version = "^7.0.0", features = ["pyarrow"] } -datafusion-expr = { version = "^7.0.0" } -datafusion-common = { version = "^7.0.0", features = ["pyarrow"] } +pyo3 = { version = "0.15", features = ["extension-module", "abi3", "abi3-py38"] } +datafusion = { git="https://github.com/apache/arrow-datafusion/", rev = "583b4ab8dfe6148a7387841d112dd50b1151f6fb" } +datafusion-expr = { git="https://github.com/apache/arrow-datafusion/", rev = "583b4ab8dfe6148a7387841d112dd50b1151f6fb" } uuid = { version = "0.8", features = ["v4"] } mimalloc = { version = "*", default-features = false } sqlparser = "0.14.0" parking_lot = "0.12" +async-trait = "0.1.41" [lib] diff --git a/dask_planner/README.md b/dask_planner/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/dask_planner/dask_planner/python/__init__.py b/dask_planner/dask_planner/python/__init__.py deleted file mode 100644 index bc6c513f5..000000000 --- a/dask_planner/dask_planner/python/__init__.py +++ /dev/null @@ -1,95 +0,0 @@ -# from dask_planner import * - -# from abc import ABCMeta, abstractmethod -# from typing import List - -# try: -# import importlib.metadata as importlib_metadata -# except ImportError: -# import importlib_metadata - -# import pyarrow as pa - -# from ._internal import AggregateUDF, DataFrame, ExecutionContext, Expression, ScalarUDF - -# __version__ = importlib_metadata.version(__name__) - - -# __all__ = [ -# "DataFrame", -# "ExecutionContext", -# "Expression", -# "AggregateUDF", -# "ScalarUDF", -# "column", -# "literal", -# ] - - -# class Accumulator(metaclass=ABCMeta): -# @abstractmethod -# def state(self) -> List[pa.Scalar]: -# pass - -# @abstractmethod -# def update(self, values: pa.Array) -> None: -# pass - -# @abstractmethod -# def merge(self, states: pa.Array) -> None: -# pass - -# @abstractmethod -# def evaluate(self) -> pa.Scalar: -# pass - - -# def column(value): -# return Expression.column(value) - - -# col = column - - -# def literal(value): -# if not isinstance(value, pa.Scalar): -# value = pa.scalar(value) -# return Expression.literal(value) - - -# lit = literal - - -# def udf(func, input_types, return_type, volatility, name=None): -# """ -# Create a new User Defined Function -# """ -# if not callable(func): -# raise TypeError("`func` argument must be callable") -# if name is None: -# name = func.__qualname__ -# return ScalarUDF( -# name=name, -# func=func, -# input_types=input_types, -# return_type=return_type, -# volatility=volatility, -# ) - - -# def udaf(accum, input_type, return_type, state_type, volatility, name=None): -# """ -# Create a new User Defined Aggregate Function -# """ -# if not issubclass(accum, Accumulator): -# raise TypeError("`accum` must implement the abstract base class Accumulator") -# if name is None: -# name = accum.__qualname__ -# return AggregateUDF( -# name=name, -# accumulator=accum, -# input_type=input_type, -# return_type=return_type, -# state_type=state_type, -# volatility=volatility, -# ) diff --git a/dask_planner/src/catalog.rs b/dask_planner/src/catalog.rs deleted file mode 100644 index ed3acbed1..000000000 --- a/dask_planner/src/catalog.rs +++ /dev/null @@ -1,107 +0,0 @@ - -use std::collections::HashSet; -use std::sync::Arc; - -use pyo3::exceptions::PyKeyError; -use pyo3::prelude::*; - -use datafusion::{ - arrow::pyarrow::PyArrowConvert, - catalog::{catalog::CatalogProvider, schema::SchemaProvider}, - datasource::{TableProvider, TableType}, -}; - -#[pyclass(name = "Catalog", module = "datafusion", subclass)] -pub(crate) struct PyCatalog { - catalog: Arc, -} - -#[pyclass(name = "Database", module = "datafusion", subclass)] -pub(crate) struct PyDatabase { - database: Arc, -} - -#[pyclass(name = "Table", module = "datafusion", subclass)] -pub(crate) struct PyTable { - table: Arc, -} - -impl PyCatalog { - pub fn new(catalog: Arc) -> Self { - Self { catalog } - } -} - -impl PyDatabase { - pub fn new(database: Arc) -> Self { - Self { database } - } -} - -impl PyTable { - pub fn new(table: Arc) -> Self { - Self { table } - } -} - -#[pymethods] -impl PyCatalog { - fn names(&self) -> Vec { - self.catalog.schema_names() - } - - #[args(name = "\"public\"")] - fn database(&self, name: &str) -> PyResult { - match self.catalog.schema(name) { - Some(database) => Ok(PyDatabase::new(database)), - None => Err(PyKeyError::new_err(format!( - "Database with name {} doesn't exist.", - name - ))), - } - } -} - -#[pymethods] -impl PyDatabase { - fn names(&self) -> HashSet { - self.database.table_names().into_iter().collect() - } - - fn table(&self, name: &str) -> PyResult { - match self.database.table(name) { - Some(table) => Ok(PyTable::new(table)), - None => Err(PyKeyError::new_err(format!( - "Table with name {} doesn't exist.", - name - ))), - } - } - - // register_table - // deregister_table -} - -#[pymethods] -impl PyTable { - /// Get a reference to the schema for this table - #[getter] - fn schema(&self, py: Python) -> PyResult { - self.table.schema().to_pyarrow(py) - } - - /// Get the type of this table for metadata/catalog purposes. - #[getter] - fn kind(&self) -> &str { - match self.table.table_type() { - TableType::Base => "physical", - TableType::View => "view", - TableType::Temporary => "temporary", - } - } - - // fn scan - // fn statistics - // fn has_exact_statistics - // fn supports_filter_pushdown -} diff --git a/dask_planner/src/errors.rs b/dask_planner/src/errors.rs deleted file mode 100644 index 18c475f11..000000000 --- a/dask_planner/src/errors.rs +++ /dev/null @@ -1,41 +0,0 @@ - -use core::fmt; - -use datafusion::arrow::error::ArrowError; -use datafusion::error::DataFusionError as InnerDataFusionError; -use pyo3::{exceptions::PyException, PyErr}; - -#[derive(Debug)] -pub enum DataFusionError { - ExecutionError(InnerDataFusionError), - ArrowError(ArrowError), - Common(String), -} - -impl fmt::Display for DataFusionError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - DataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {:?}", e), - DataFusionError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), - DataFusionError::Common(e) => write!(f, "{}", e), - } - } -} - -impl From for DataFusionError { - fn from(err: ArrowError) -> DataFusionError { - DataFusionError::ArrowError(err) - } -} - -impl From for DataFusionError { - fn from(err: InnerDataFusionError) -> DataFusionError { - DataFusionError::ExecutionError(err) - } -} - -impl From for PyErr { - fn from(err: DataFusionError) -> PyErr { - PyException::new_err(err.to_string()) - } -} diff --git a/dask_planner/src/expression.rs b/dask_planner/src/expression.rs index f72455b3b..da10e7230 100644 --- a/dask_planner/src/expression.rs +++ b/dask_planner/src/expression.rs @@ -1,3 +1,5 @@ +use crate::sql::logical; +use crate::sql::types::PyDataType; use pyo3::PyMappingProtocol; use pyo3::{basic::CompareOp, prelude::*, PyNumberProtocol, PyObjectProtocol}; @@ -8,6 +10,10 @@ use datafusion::logical_plan::{col, lit, Expr}; use datafusion::scalar::ScalarValue; +pub use datafusion::logical_plan::plan::LogicalPlan; + +use datafusion::logical_expr::BuiltinScalarFunction; + /// An PyExpr that can be used on a DataFrame #[pyclass(name = "Expression", module = "datafusion", subclass)] #[derive(Debug, Clone)] @@ -27,6 +33,24 @@ impl From for PyExpr { } } +#[pyclass(name = "ScalarValue", module = "datafusion", subclass)] +#[derive(Debug, Clone)] +pub struct PyScalarValue { + pub scalar_value: ScalarValue, +} + +impl From for ScalarValue { + fn from(pyscalar: PyScalarValue) -> ScalarValue { + pyscalar.scalar_value + } +} + +impl From for PyScalarValue { + fn from(scalar_value: ScalarValue) -> PyScalarValue { + PyScalarValue { scalar_value } + } +} + #[pyproto] impl PyNumberProtocol for PyExpr { fn __add__(lhs: PyExpr, rhs: PyExpr) -> PyResult { @@ -84,84 +108,232 @@ impl PyObjectProtocol for PyExpr { #[pymethods] impl PyExpr { #[staticmethod] - pub fn literal(value: ScalarValue) -> PyExpr { - lit(value).into() + pub fn literal(value: PyScalarValue) -> PyExpr { + lit(value.scalar_value).into() } - /// Examine the current/"self" PyExpr and return its "type" /// In this context a "type" is what Dask-SQL Python /// RexConverter plugin instance should be invoked to handle /// the Rex conversion pub fn get_expr_type(&self) -> String { - match &self.expr { - Expr::Alias(..) => String::from("Alias"), - Expr::Column(..) => String::from("Column"), + String::from(match &self.expr { + Expr::Alias(..) => "Alias", + Expr::Column(..) => "Column", Expr::ScalarVariable(..) => panic!("ScalarVariable!!!"), - Expr::Literal(..) => panic!("Literal!!!"), - Expr::BinaryExpr {..} => String::from("BinaryExpr"), + Expr::Literal(..) => "Literal", + Expr::BinaryExpr { .. } => "BinaryExpr", Expr::Not(..) => panic!("Not!!!"), Expr::IsNotNull(..) => panic!("IsNotNull!!!"), Expr::Negative(..) => panic!("Negative!!!"), - Expr::GetIndexedField{..} => panic!("GetIndexedField!!!"), + Expr::GetIndexedField { .. } => panic!("GetIndexedField!!!"), Expr::IsNull(..) => panic!("IsNull!!!"), - Expr::Between{..} => panic!("Between!!!"), - Expr::Case{..} => panic!("Case!!!"), - Expr::Cast{..} => panic!("Cast!!!"), - Expr::TryCast{..} => panic!("TryCast!!!"), - Expr::Sort{..} => panic!("Sort!!!"), - Expr::ScalarFunction{..} => panic!("ScalarFunction!!!"), - Expr::AggregateFunction{..} => panic!("AggregateFunction!!!"), - Expr::WindowFunction{..} => panic!("WindowFunction!!!"), - Expr::AggregateUDF{..} => panic!("AggregateUDF!!!"), - Expr::InList{..} => panic!("InList!!!"), + Expr::Between { .. } => panic!("Between!!!"), + Expr::Case { .. } => panic!("Case!!!"), + Expr::Cast { .. } => "Cast", + Expr::TryCast { .. } => panic!("TryCast!!!"), + Expr::Sort { .. } => panic!("Sort!!!"), + Expr::ScalarFunction { .. } => "ScalarFunction", + Expr::AggregateFunction { .. } => "AggregateFunction", + Expr::WindowFunction { .. } => panic!("WindowFunction!!!"), + Expr::AggregateUDF { .. } => panic!("AggregateUDF!!!"), + Expr::InList { .. } => panic!("InList!!!"), Expr::Wildcard => panic!("Wildcard!!!"), - _ => String::from("OTHER") - } + _ => "OTHER", + }) } - pub fn column_name(&self) -> String { + pub fn column_name(&self, mut plan: logical::PyLogicalPlan) -> String { match &self.expr { - Expr::Alias(exprs, name) => { - println!("Expressions: {:?}, Expression Name: {:?}", exprs, name); - panic!("Alias") - }, - Expr::Column(column) => { column.name.clone() }, - Expr::ScalarVariable(..) => panic!("ScalarVariable!!!"), - Expr::Literal(..) => panic!("Literal!!!"), - Expr::BinaryExpr {..} => panic!("BinaryExpr"), - Expr::Not(..) => panic!("Not!!!"), - Expr::IsNotNull(..) => panic!("IsNotNull!!!"), - Expr::Negative(..) => panic!("Negative!!!"), - Expr::GetIndexedField{..} => panic!("GetIndexedField!!!"), - Expr::IsNull(..) => panic!("IsNull!!!"), - Expr::Between{..} => panic!("Between!!!"), - Expr::Case{..} => panic!("Case!!!"), - Expr::Cast{..} => panic!("Cast!!!"), - Expr::TryCast{..} => panic!("TryCast!!!"), - Expr::Sort{..} => panic!("Sort!!!"), - Expr::ScalarFunction{..} => panic!("ScalarFunction!!!"), - Expr::AggregateFunction{..} => panic!("AggregateFunction!!!"), - Expr::WindowFunction{..} => panic!("WindowFunction!!!"), - Expr::AggregateUDF{..} => panic!("AggregateUDF!!!"), - Expr::InList{..} => panic!("InList!!!"), - Expr::Wildcard => panic!("Wildcard!!!"), - _ => panic!("Nothing found!!!") + Expr::Alias(expr, name) => { + println!("Alias encountered with name: {:?}", name); + + // Only certain LogicalPlan variants are valid in this nested Alias scenario so we + // extract the valid ones and error on the invalid ones + match expr.as_ref() { + Expr::Column(col) => { + // First we must iterate the current node before getting its input + match plan.current_node() { + LogicalPlan::Projection(proj) => match proj.input.as_ref() { + LogicalPlan::Aggregate(agg) => { + let mut exprs = agg.group_expr.clone(); + exprs.extend_from_slice(&agg.aggr_expr); + match &exprs[plan.get_index(col)] { + Expr::AggregateFunction { args, .. } => match &args[0] { + Expr::Column(col) => { + println!("AGGREGATE COLUMN IS {}", col.name); + col.name.clone() + } + _ => name.clone(), + }, + _ => name.clone(), + } + } + _ => name.clone(), + }, + _ => name.clone(), + } + } + _ => name.clone(), + } + } + Expr::Column(column) => column.name.clone(), + Expr::ScalarVariable(..) => unimplemented!("ScalarVariable!!!"), + Expr::Literal(..) => unimplemented!("Literal!!!"), + Expr::BinaryExpr { + left: _, + op: _, + right: _, + } => { + // /// TODO: Examine this more deeply about whether name comes from the left or right + // self.column_name(left) + unimplemented!("BinaryExpr HERE!!!") + } + Expr::Not(..) => unimplemented!("Not!!!"), + Expr::IsNotNull(..) => unimplemented!("IsNotNull!!!"), + Expr::Negative(..) => unimplemented!("Negative!!!"), + Expr::GetIndexedField { .. } => unimplemented!("GetIndexedField!!!"), + Expr::IsNull(..) => unimplemented!("IsNull!!!"), + Expr::Between { .. } => unimplemented!("Between!!!"), + Expr::Case { .. } => unimplemented!("Case!!!"), + Expr::Cast { .. } => unimplemented!("Cast!!!"), + Expr::TryCast { .. } => unimplemented!("TryCast!!!"), + Expr::Sort { .. } => unimplemented!("Sort!!!"), + Expr::ScalarFunction { .. } => unimplemented!("ScalarFunction!!!"), + Expr::AggregateFunction { .. } => unimplemented!("AggregateFunction!!!"), + Expr::WindowFunction { .. } => unimplemented!("WindowFunction!!!"), + Expr::AggregateUDF { .. } => unimplemented!("AggregateUDF!!!"), + Expr::InList { .. } => unimplemented!("InList!!!"), + Expr::Wildcard => unimplemented!("Wildcard!!!"), + _ => panic!("Nothing found!!!"), } } - /// Gets the operands for a BinaryExpr call - pub fn getOperands(&self) -> PyResult> { + #[pyo3(name = "getOperands")] + pub fn get_operands(&self) -> PyResult> { match &self.expr { - Expr::BinaryExpr {left, op, right} => { - let operands: Vec = Vec::new(); + Expr::BinaryExpr { left, op: _, right } => { + let mut operands: Vec = Vec::new(); + let left_desc: Expr = *left.clone(); + operands.push(left_desc.into()); + let right_desc: Expr = *right.clone(); + operands.push(right_desc.into()); Ok(operands) - }, - _ => Err(PyErr::new::("current_node is not of type Projection")) + } + Expr::ScalarFunction { fun: _, args } => { + let mut operands: Vec = Vec::new(); + for arg in args { + operands.push(arg.clone().into()); + } + Ok(operands) + } + Expr::Cast { expr, data_type: _ } => { + let mut operands: Vec = Vec::new(); + let ex: Expr = *expr.clone(); + operands.push(ex.into()); + Ok(operands) + } + _ => Err(PyErr::new::( + "unknown Expr type encountered", + )), } } + #[pyo3(name = "getOperatorName")] + pub fn get_operator_name(&self) -> PyResult { + match &self.expr { + Expr::BinaryExpr { + left: _, + op, + right: _, + } => Ok(format!("{}", op)), + Expr::ScalarFunction { fun, args: _ } => Ok(format!("{}", fun)), + Expr::Cast { + expr: _, + data_type: _, + } => Ok(String::from("cast")), + _ => Err(PyErr::new::( + "Catch all triggered ....", + )), + } + } + + /// Gets the ScalarValue represented by the Expression + #[pyo3(name = "getType")] + pub fn get_type(&self) -> PyResult { + match &self.expr { + Expr::ScalarVariable(..) => panic!("ScalarVariable!!!"), + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Boolean(_value) => Ok(String::from("Boolean")), + ScalarValue::Float32(_value) => Ok(String::from("Float32")), + ScalarValue::Float64(_value) => Ok(String::from("Float64")), + ScalarValue::Decimal128(_value, ..) => Ok(String::from("Decimal128")), + ScalarValue::Int8(_value) => Ok(String::from("Int8")), + ScalarValue::Int16(_value) => Ok(String::from("Int16")), + ScalarValue::Int32(_value) => Ok(String::from("Int32")), + ScalarValue::Int64(_value) => Ok(String::from("Int64")), + ScalarValue::UInt8(_value) => Ok(String::from("UInt8")), + ScalarValue::UInt16(_value) => Ok(String::from("UInt16")), + ScalarValue::UInt32(_value) => Ok(String::from("UInt32")), + ScalarValue::UInt64(_value) => Ok(String::from("UInt64")), + ScalarValue::Utf8(_value) => Ok(String::from("Utf8")), + ScalarValue::LargeUtf8(_value) => Ok(String::from("LargeUtf8")), + ScalarValue::Binary(_value) => Ok(String::from("Binary")), + ScalarValue::LargeBinary(_value) => Ok(String::from("LargeBinary")), + ScalarValue::Date32(_value) => Ok(String::from("Date32")), + ScalarValue::Date64(_value) => Ok(String::from("Date64")), + _ => { + panic!("CatchAll") + } + }, + Expr::ScalarFunction { fun, args: _ } => match fun { + BuiltinScalarFunction::Abs => Ok(String::from("Abs")), + BuiltinScalarFunction::DatePart => Ok(String::from("DatePart")), + _ => { + panic!("fire here for scalar function") + } + }, + Expr::Cast { expr: _, data_type } => match data_type { + DataType::Null => Ok(String::from("NULL")), + DataType::Boolean => Ok(String::from("BOOLEAN")), + DataType::Int8 => Ok(String::from("TINYINT")), + DataType::UInt8 => Ok(String::from("TINYINT")), + DataType::Int16 => Ok(String::from("SMALLINT")), + DataType::UInt16 => Ok(String::from("SMALLINT")), + DataType::Int32 => Ok(String::from("INTEGER")), + DataType::UInt32 => Ok(String::from("INTEGER")), + DataType::Int64 => Ok(String::from("BIGINT")), + DataType::UInt64 => Ok(String::from("BIGINT")), + DataType::Float32 => Ok(String::from("FLOAT")), + DataType::Float64 => Ok(String::from("DOUBLE")), + DataType::Timestamp { .. } => Ok(String::from("TIMESTAMP")), + DataType::Date32 => Ok(String::from("DATE")), + DataType::Date64 => Ok(String::from("DATE")), + DataType::Time32(..) => Ok(String::from("TIME32")), + DataType::Time64(..) => Ok(String::from("TIME64")), + DataType::Duration(..) => Ok(String::from("DURATION")), + DataType::Interval(..) => Ok(String::from("INTERVAL")), + DataType::Binary => Ok(String::from("BINARY")), + DataType::FixedSizeBinary(..) => Ok(String::from("FIXEDSIZEBINARY")), + DataType::LargeBinary => Ok(String::from("LARGEBINARY")), + DataType::Utf8 => Ok(String::from("VARCHAR")), + DataType::LargeUtf8 => Ok(String::from("BIGVARCHAR")), + DataType::List(..) => Ok(String::from("LIST")), + DataType::FixedSizeList(..) => Ok(String::from("FIXEDSIZELIST")), + DataType::LargeList(..) => Ok(String::from("LARGELIST")), + DataType::Struct(..) => Ok(String::from("STRUCT")), + DataType::Union(..) => Ok(String::from("UNION")), + DataType::Dictionary(..) => Ok(String::from("DICTIONARY")), + DataType::Decimal(..) => Ok(String::from("DECIMAL")), + DataType::Map(..) => Ok(String::from("MAP")), + _ => { + panic!("This is not yet implemented!!!") + } + }, + _ => panic!("OTHER"), + } + } #[staticmethod] pub fn column(value: &str) -> PyExpr { @@ -183,17 +355,228 @@ impl PyExpr { self.expr.clone().is_null().into() } - pub fn cast(&self, to: DataType) -> PyExpr { + pub fn cast(&self, to: PyDataType) -> PyExpr { // self.expr.cast_to() requires DFSchema to validate that the cast // is supported, omit that for now let expr = Expr::Cast { expr: Box::new(self.expr.clone()), - data_type: to, + data_type: to.data_type, }; expr.into() } + + /// TODO: I can't express how much I dislike explicity listing all of these methods out + /// but PyO3 makes it necessary since its annotations cannot be used in trait impl blocks + #[pyo3(name = "getFloat32Value")] + pub fn float_32_value(&mut self) -> f32 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Float32(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getFloat64Value")] + pub fn float_64_value(&mut self) -> f64 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Float64(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getInt8Value")] + pub fn int_8_value(&mut self) -> i8 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Int8(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getInt16Value")] + pub fn int_16_value(&mut self) -> i16 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Int16(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getInt32Value")] + pub fn int_32_value(&mut self) -> i32 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Int32(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getInt64Value")] + pub fn int_64_value(&mut self) -> i64 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Int64(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getUInt8Value")] + pub fn uint_8_value(&mut self) -> u8 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::UInt8(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getUInt16Value")] + pub fn uint_16_value(&mut self) -> u16 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::UInt16(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getUInt32Value")] + pub fn uint_32_value(&mut self) -> u32 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::UInt32(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getUInt64Value")] + pub fn uint_64_value(&mut self) -> u64 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::UInt64(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getBoolValue")] + pub fn bool_value(&mut self) -> bool { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Boolean(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getStringValue")] + pub fn string_value(&mut self) -> String { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Utf8(iv) => iv.clone().unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } } +// pub trait ObtainValue { +// fn getValue(&mut self) -> T; +// } + +// /// Expansion macro to get all typed values from a DataFusion Expr +// macro_rules! get_typed_value { +// ($t:ty, $func_name:ident) => { +// impl ObtainValue<$t> for PyExpr { +// #[inline] +// fn getValue(&mut self) -> $t +// { +// match &self.expr { +// Expr::Literal(scalar_value) => { +// match scalar_value { +// ScalarValue::$func_name(iv) => { +// iv.unwrap() +// }, +// _ => { +// panic!("getValue() - Unexpected value") +// } +// } +// }, +// _ => panic!("getValue() - Non literal value encountered") +// } +// } +// } +// } +// } + +// get_typed_value!(u8, UInt8); +// get_typed_value!(u16, UInt16); +// get_typed_value!(u32, UInt32); +// get_typed_value!(u64, UInt64); +// get_typed_value!(i8, Int8); +// get_typed_value!(i16, Int16); +// get_typed_value!(i32, Int32); +// get_typed_value!(i64, Int64); +// get_typed_value!(bool, Boolean); +// get_typed_value!(f32, Float32); +// get_typed_value!(f64, Float64); + +// get_typed_value!(for usize u8 u16 u32 u64 isize i8 i16 i32 i64 bool f32 f64); +// get_typed_value!(usize, Integer); +// get_typed_value!(isize, ); +// Decimal128(Option, usize, usize), +// Utf8(Option), +// LargeUtf8(Option), +// Binary(Option>), +// LargeBinary(Option>), +// List(Option, Global>>, Box), +// Date32(Option), +// Date64(Option), + #[pyproto] impl PyMappingProtocol for PyExpr { fn __getitem__(&self, key: &str) -> PyResult { diff --git a/dask_planner/src/functions.rs b/dask_planner/src/functions.rs deleted file mode 100644 index 0c8784dca..000000000 --- a/dask_planner/src/functions.rs +++ /dev/null @@ -1,322 +0,0 @@ - -use pyo3::{prelude::*, wrap_pyfunction}; - -use datafusion::logical_plan; - -use datafusion::physical_plan::{aggregates::AggregateFunction, functions::BuiltinScalarFunction}; - -use crate::errors; -use crate::expression::PyExpr; - -#[pyfunction] -fn array(value: Vec) -> PyExpr { - PyExpr { - expr: logical_plan::array(value.into_iter().map(|x| x.expr).collect::>()), - } -} - -#[pyfunction] -fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { - logical_plan::in_list( - expr.expr, - value.into_iter().map(|x| x.expr).collect::>(), - negated, - ) - .into() -} - -/// Current date and time -#[pyfunction] -fn now() -> PyExpr { - PyExpr { - // here lit(0) is a stub for conform to arity - expr: logical_plan::now(logical_plan::lit(0)), - } -} - -/// Returns a random value in the range 0.0 <= x < 1.0 -#[pyfunction] -fn random() -> PyExpr { - PyExpr { - expr: logical_plan::random(), - } -} - -/// Computes a binary hash of the given data. type is the algorithm to use. -/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. -#[pyfunction(value, method)] -fn digest(value: PyExpr, method: PyExpr) -> PyExpr { - PyExpr { - expr: logical_plan::digest(value.expr, method.expr), - } -} - -/// Concatenates the text representations of all the arguments. -/// NULL arguments are ignored. -#[pyfunction(args = "*")] -fn concat(args: Vec) -> PyResult { - let args = args.into_iter().map(|e| e.expr).collect::>(); - Ok(logical_plan::concat(&args).into()) -} - -/// Concatenates all but the first argument, with separators. -/// The first argument is used as the separator string, and should not be NULL. -/// Other NULL arguments are ignored. -#[pyfunction(sep, args = "*")] -fn concat_ws(sep: String, args: Vec) -> PyResult { - let args = args.into_iter().map(|e| e.expr).collect::>(); - Ok(logical_plan::concat_ws(sep, &args).into()) -} - -/// Creates a new Sort expression -#[pyfunction] -fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyResult { - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::Sort { - expr: Box::new(expr.expr), - asc: asc.unwrap_or(true), - nulls_first: nulls_first.unwrap_or(true), - }, - }) -} - -/// Creates a new Alias expression -#[pyfunction] -fn alias(expr: PyExpr, name: &str) -> PyResult { - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::Alias(Box::new(expr.expr), String::from(name)), - }) -} - -/// Creates a new Window function expression -#[pyfunction] -fn window( - name: &str, - args: Vec, - partition_by: Option>, - order_by: Option>, -) -> PyResult { - use std::str::FromStr; - let fun = datafusion::physical_plan::window_functions::WindowFunction::from_str(name) - .map_err(|e| -> errors::DataFusionError { e.into() })?; - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::WindowFunction { - fun, - args: args.into_iter().map(|x| x.expr).collect::>(), - partition_by: partition_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - order_by: order_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - window_frame: None, - }, - }) -} - -macro_rules! scalar_function { - ($NAME: ident, $FUNC: ident) => { - scalar_function!($NAME, $FUNC, stringify!($NAME)); - }; - ($NAME: ident, $FUNC: ident, $DOC: expr) => { - #[doc = $DOC] - #[pyfunction(args = "*")] - fn $NAME(args: Vec) -> PyExpr { - let expr = logical_plan::Expr::ScalarFunction { - fun: BuiltinScalarFunction::$FUNC, - args: args.into_iter().map(|e| e.into()).collect(), - }; - expr.into() - } - }; -} - -macro_rules! aggregate_function { - ($NAME: ident, $FUNC: ident) => { - aggregate_function!($NAME, $FUNC, stringify!($NAME)); - }; - ($NAME: ident, $FUNC: ident, $DOC: expr) => { - #[doc = $DOC] - #[pyfunction(args = "*", distinct = "false")] - fn $NAME(args: Vec, distinct: bool) -> PyExpr { - let expr = logical_plan::Expr::AggregateFunction { - fun: AggregateFunction::$FUNC, - args: args.into_iter().map(|e| e.into()).collect(), - distinct, - }; - expr.into() - } - }; -} - -scalar_function!(abs, Abs); -scalar_function!(acos, Acos); -scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character."); -scalar_function!(asin, Asin); -scalar_function!(atan, Atan); -scalar_function!( - bit_length, - BitLength, - "Returns number of bits in the string (8 times the octet_length)." -); -scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string."); -scalar_function!(ceil, Ceil); -scalar_function!( - character_length, - CharacterLength, - "Returns number of characters in the string." -); -scalar_function!(chr, Chr, "Returns the character with the given code."); -scalar_function!(cos, Cos); -scalar_function!(exp, Exp); -scalar_function!(floor, Floor); -scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); -scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); -scalar_function!(ln, Ln); -scalar_function!(log10, Log10); -scalar_function!(log2, Log2); -scalar_function!(lower, Lower, "Converts the string to all lower case"); -scalar_function!(lpad, Lpad, "Extends the string to length length by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right)."); -scalar_function!(ltrim, Ltrim, "Removes the longest string containing only characters in characters (a space by default) from the start of string."); -scalar_function!( - md5, - MD5, - "Computes the MD5 hash of the argument, with the result written in hexadecimal." -); -scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); -scalar_function!(regexp_match, RegexpMatch); -scalar_function!( - regexp_replace, - RegexpReplace, - "Replaces substring(s) matching a POSIX regular expression" -); -scalar_function!( - repeat, - Repeat, - "Repeats string the specified number of times." -); -scalar_function!( - replace, - Replace, - "Replaces all occurrences in string of substring from with substring to." -); -scalar_function!( - reverse, - Reverse, - "Reverses the order of the characters in the string." -); -scalar_function!(right, Right, "Returns last n characters in the string, or when n is negative, returns all but first |n| characters."); -scalar_function!(round, Round); -scalar_function!(rpad, Rpad, "Extends the string to length length by appending the characters fill (a space by default). If the string is already longer than length then it is truncated."); -scalar_function!(rtrim, Rtrim, "Removes the longest string containing only characters in characters (a space by default) from the end of string."); -scalar_function!(sha224, SHA224); -scalar_function!(sha256, SHA256); -scalar_function!(sha384, SHA384); -scalar_function!(sha512, SHA512); -scalar_function!(signum, Signum); -scalar_function!(sin, Sin); -scalar_function!( - split_part, - SplitPart, - "Splits string at occurrences of delimiter and returns the n'th field (counting from one)." -); -scalar_function!(sqrt, Sqrt); -scalar_function!( - starts_with, - StartsWith, - "Returns true if string starts with prefix." -); -scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)"); -scalar_function!(substr, Substr); -scalar_function!(tan, Tan); -scalar_function!( - to_hex, - ToHex, - "Converts the number to its equivalent hexadecimal representation." -); -scalar_function!(to_timestamp, ToTimestamp); -scalar_function!(translate, Translate, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); -scalar_function!(trim, Trim, "Removes the longest string containing only characters in characters (a space by default) from the start, end, or both ends (BOTH is the default) of string."); -scalar_function!(trunc, Trunc); -scalar_function!(upper, Upper, "Converts the string to all upper case."); - -aggregate_function!(avg, Avg); -aggregate_function!(count, Count); -aggregate_function!(max, Max); -aggregate_function!(min, Min); -aggregate_function!(sum, Sum); -aggregate_function!(approx_distinct, ApproxDistinct); - -pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { - m.add_wrapped(wrap_pyfunction!(abs))?; - m.add_wrapped(wrap_pyfunction!(acos))?; - m.add_wrapped(wrap_pyfunction!(approx_distinct))?; - m.add_wrapped(wrap_pyfunction!(alias))?; - m.add_wrapped(wrap_pyfunction!(array))?; - m.add_wrapped(wrap_pyfunction!(ascii))?; - m.add_wrapped(wrap_pyfunction!(asin))?; - m.add_wrapped(wrap_pyfunction!(atan))?; - m.add_wrapped(wrap_pyfunction!(avg))?; - m.add_wrapped(wrap_pyfunction!(bit_length))?; - m.add_wrapped(wrap_pyfunction!(btrim))?; - m.add_wrapped(wrap_pyfunction!(ceil))?; - m.add_wrapped(wrap_pyfunction!(character_length))?; - m.add_wrapped(wrap_pyfunction!(chr))?; - m.add_wrapped(wrap_pyfunction!(concat_ws))?; - m.add_wrapped(wrap_pyfunction!(concat))?; - m.add_wrapped(wrap_pyfunction!(cos))?; - m.add_wrapped(wrap_pyfunction!(count))?; - m.add_wrapped(wrap_pyfunction!(digest))?; - m.add_wrapped(wrap_pyfunction!(exp))?; - m.add_wrapped(wrap_pyfunction!(floor))?; - m.add_wrapped(wrap_pyfunction!(in_list))?; - m.add_wrapped(wrap_pyfunction!(initcap))?; - m.add_wrapped(wrap_pyfunction!(left))?; - m.add_wrapped(wrap_pyfunction!(ln))?; - m.add_wrapped(wrap_pyfunction!(log10))?; - m.add_wrapped(wrap_pyfunction!(log2))?; - m.add_wrapped(wrap_pyfunction!(lower))?; - m.add_wrapped(wrap_pyfunction!(lpad))?; - m.add_wrapped(wrap_pyfunction!(ltrim))?; - m.add_wrapped(wrap_pyfunction!(max))?; - m.add_wrapped(wrap_pyfunction!(md5))?; - m.add_wrapped(wrap_pyfunction!(min))?; - m.add_wrapped(wrap_pyfunction!(now))?; - m.add_wrapped(wrap_pyfunction!(octet_length))?; - m.add_wrapped(wrap_pyfunction!(order_by))?; - m.add_wrapped(wrap_pyfunction!(random))?; - m.add_wrapped(wrap_pyfunction!(regexp_match))?; - m.add_wrapped(wrap_pyfunction!(regexp_replace))?; - m.add_wrapped(wrap_pyfunction!(repeat))?; - m.add_wrapped(wrap_pyfunction!(replace))?; - m.add_wrapped(wrap_pyfunction!(reverse))?; - m.add_wrapped(wrap_pyfunction!(right))?; - m.add_wrapped(wrap_pyfunction!(round))?; - m.add_wrapped(wrap_pyfunction!(rpad))?; - m.add_wrapped(wrap_pyfunction!(rtrim))?; - m.add_wrapped(wrap_pyfunction!(sha224))?; - m.add_wrapped(wrap_pyfunction!(sha256))?; - m.add_wrapped(wrap_pyfunction!(sha384))?; - m.add_wrapped(wrap_pyfunction!(sha512))?; - m.add_wrapped(wrap_pyfunction!(signum))?; - m.add_wrapped(wrap_pyfunction!(sin))?; - m.add_wrapped(wrap_pyfunction!(split_part))?; - m.add_wrapped(wrap_pyfunction!(sqrt))?; - m.add_wrapped(wrap_pyfunction!(starts_with))?; - m.add_wrapped(wrap_pyfunction!(strpos))?; - m.add_wrapped(wrap_pyfunction!(substr))?; - m.add_wrapped(wrap_pyfunction!(sum))?; - m.add_wrapped(wrap_pyfunction!(tan))?; - m.add_wrapped(wrap_pyfunction!(to_hex))?; - m.add_wrapped(wrap_pyfunction!(to_timestamp))?; - m.add_wrapped(wrap_pyfunction!(translate))?; - m.add_wrapped(wrap_pyfunction!(trim))?; - m.add_wrapped(wrap_pyfunction!(trunc))?; - m.add_wrapped(wrap_pyfunction!(upper))?; - m.add_wrapped(wrap_pyfunction!(window))?; - Ok(()) -} diff --git a/dask_planner/src/lib.rs b/dask_planner/src/lib.rs index 1bc79a0e3..df9343151 100644 --- a/dask_planner/src/lib.rs +++ b/dask_planner/src/lib.rs @@ -1,10 +1,7 @@ use mimalloc::MiMalloc; use pyo3::prelude::*; -mod catalog; -mod errors; mod expression; -mod functions; mod sql; #[global_allocator] @@ -15,26 +12,18 @@ static GLOBAL: MiMalloc = MiMalloc; /// The higher-level public API is defined in pure python files under the /// dask_planner directory. #[pymodule] +#[pyo3(name = "rust")] fn rust(_py: Python, m: &PyModule) -> PyResult<()> { // Register the python classes - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; m.add_class::()?; - - // SQL specific classes m.add_class::()?; - m.add_class::()?; - - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/dask_planner/src/sql.rs b/dask_planner/src/sql.rs index 8509c612d..11d37df72 100644 --- a/dask_planner/src/sql.rs +++ b/dask_planner/src/sql.rs @@ -1,126 +1,26 @@ - -use std::collections::HashMap; - -use pyo3::prelude::*; - -use datafusion::sql::parser::{DFParser, Statement}; -use sqlparser::ast::{Query, Select}; - -use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; - +pub mod column; +pub mod function; +pub mod logical; +pub mod schema; +pub mod statement; +pub mod table; +pub mod types; + +use datafusion::arrow::datatypes::{Field, Schema}; use datafusion::catalog::TableReference; -use datafusion::datasource::TableProvider; -use datafusion::sql::planner::{SqlToRel}; -use datafusion::logical_plan::plan::{ - LogicalPlan, - Projection, - Filter, - Window, - Aggregate, - Sort, - Join, - CrossJoin, - Repartition, - Union, - TableScan, - EmptyRelation, - Limit, - CreateExternalTable, - CreateMemoryTable, - DropTable, - Values, - Explain, - Analyze, - Extension -}; +use datafusion::error::DataFusionError; +use datafusion::physical_plan::udaf::AggregateUDF; +use datafusion::physical_plan::udf::ScalarUDF; +use datafusion::sql::parser::DFParser; +use datafusion::sql::planner::{ContextProvider, SqlToRel}; +use datafusion_expr::ScalarFunctionImplementation; +use std::collections::HashMap; use std::sync::Arc; -use crate::expression::PyExpr; - - -#[pyclass(name = "LogicalPlanGenerator", module = "dask_planner", subclass)] -#[derive(Clone)] -pub struct LogicalPlanGenerator { - // Holds the ordered plan steps - #[pyo3(get)] - pub plan_steps: Vec, - pub projection: Option, - pub table_scan: Option, -} - -impl Default for LogicalPlanGenerator { - fn default() -> LogicalPlanGenerator { - LogicalPlanGenerator { - plan_steps: Vec::new(), - projection: None, - table_scan: None, - } - } -} - -#[pymethods] -impl LogicalPlanGenerator { - - pub fn get_named_projects(&self) -> Vec { - match &self.projection { - Some(proj) => { - let mut exprs:Vec = Vec::new(); - for expr in &proj.expr { - exprs.push(expr.clone().into()); - } - exprs - }, - None => panic!("There is no Projection node present in the Logical Plan!") - } - } -} - -impl datafusion::logical_plan::plan::PlanVisitor for LogicalPlanGenerator { - type Error = String; - - fn pre_visit( - &mut self, - _plan: &LogicalPlan, - ) -> std::result::Result { - Ok(true) - } - - /// By inserting in `post_visit` we effectively create a depth first traversal of the SQL parsed tree - fn post_visit( - &mut self, - plan: &LogicalPlan, - ) -> std::result::Result { - let s = match plan { - LogicalPlan::Projection(projection) => { self.projection = Some(projection.clone()); "Projection" }, - LogicalPlan::Filter { .. } => "Filter", - LogicalPlan::Window { .. } => "Window", - LogicalPlan::Aggregate { .. } => "Aggregate", - LogicalPlan::Sort { .. } => "Sort", - LogicalPlan::Join { .. } => "Join", - LogicalPlan::CrossJoin { .. } => "CrossJoin", - LogicalPlan::Repartition { .. } => "Repartition", - LogicalPlan::Union { .. } => "Union", - LogicalPlan::TableScan(table_scan) => { self.table_scan = Some(table_scan.clone()); "TableScan" }, - LogicalPlan::EmptyRelation { .. } => "EmptyRelation", - LogicalPlan::Limit { .. } => "Limit", - LogicalPlan::CreateExternalTable { .. } => "CreateExternalTable", - LogicalPlan::CreateMemoryTable { .. } => "CreateMemoryTable", - LogicalPlan::DropTable { .. } => "DropTable", - LogicalPlan::Values { .. } => "Values", - LogicalPlan::Explain { .. } => "Explain", - LogicalPlan::Analyze { .. } => "Analyze", - LogicalPlan::Extension { .. } => "Extension", - }; - - self.plan_steps.push(s.into()); - Ok(true) - } -} - - +use pyo3::prelude::*; -/// DaskSQLContext is main interface used for interacting with Datafusion to +/// DaskSQLContext is main interface used for interacting with DataFusion to /// parse SQL queries, build logical plans, and optimize logical plans. /// /// The following example demonstrates how to generate an optimized LogicalPlan @@ -140,20 +40,18 @@ impl datafusion::logical_plan::plan::PlanVisitor for LogicalPlanGenerator { /// # } /// ``` #[pyclass(name = "DaskSQLContext", module = "dask_planner", subclass)] -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct DaskSQLContext { default_schema_name: String, - pub schemas: HashMap, + schemas: HashMap, } -impl datafusion::sql::planner::ContextProvider for DaskSQLContext { - fn get_table_provider( - &self, - name: TableReference, - ) -> Option> { - match self.schemas.get(&String::from(&self.default_schema_name)) { +impl ContextProvider for DaskSQLContext { + fn get_table_provider(&self, name: TableReference) -> Option> { + match self.schemas.get(&self.default_schema_name) { Some(schema) => { let mut resp = None; + let mut table_name: String = "".to_string(); for (_table_name, table) in &schema.tables { if table.name.eq(&name.table()) { // Build the Schema here @@ -161,744 +59,125 @@ impl datafusion::sql::planner::ContextProvider for DaskSQLContext { // Iterate through the DaskTable instance and create a Schema instance for (column_name, column_type) in &table.columns { - fields.push(Field::new(column_name, column_type.sqlType.clone(), false)); + fields.push(Field::new( + column_name, + column_type.sql_type.clone(), + false, + )); } resp = Some(Schema::new(fields)); + table_name = _table_name.clone(); } } - Some(Arc::new(datafusion::datasource::empty::EmptyTable::new( - Arc::new( - resp.unwrap() - ) + Some(Arc::new(table::DaskTableProvider::new( + Arc::new(resp.unwrap()), + table_name, ))) - }, - None => panic!("Schema with name {} not found", "table_name"), + } + None => { + DataFusionError::Execution(format!( + "Schema with name {} not found", + &self.default_schema_name + )); + None + } } } - fn get_function_meta(&self, name: &str) -> Option> { - println!("RUST: get_function_meta"); - let _f: datafusion::physical_plan::functions::ScalarFunctionImplementation = - Arc::new(|_| Err(datafusion::error::DataFusionError::NotImplemented("".to_string()))); - match name { - _ => None, - } + fn get_function_meta(&self, _name: &str) -> Option> { + let _f: ScalarFunctionImplementation = + Arc::new(|_| Err(DataFusionError::NotImplemented("".to_string()))); + None } - fn get_aggregate_meta(&self, _name: &str) -> Option> { - println!("RUST: get_aggregate_meta"); - unimplemented!() + fn get_aggregate_meta(&self, _name: &str) -> Option> { + unimplemented!("RUST: get_aggregate_meta is not yet implemented for DaskSQLContext"); } -} + fn get_variable_type(&self, _: &[String]) -> Option { + unimplemented!("RUST: get_variable_type is not yet implemented for DaskSQLContext") + } +} #[pymethods] impl DaskSQLContext { #[new] pub fn new(default_schema_name: String) -> Self { Self { - default_schema_name: default_schema_name, + default_schema_name, schemas: HashMap::new(), } } - pub fn register_schema(&mut self, schema_name:String, schema: DaskSchema) { + /// Register a Schema with the current DaskSQLContext + pub fn register_schema( + &mut self, + schema_name: String, + schema: schema::DaskSchema, + ) -> PyResult { self.schemas.insert(schema_name, schema); + Ok(true) } - pub fn register_table(&mut self, schema_name:String, table: DaskTable) { + /// Register a DaskTable instance under the specified schema in the current DaskSQLContext + pub fn register_table( + &mut self, + schema_name: String, + table: table::DaskTable, + ) -> PyResult { match self.schemas.get_mut(&schema_name) { - Some(schema) => schema.add_table(table), - None => println!("Schema: {} not found in DaskSQLContext", schema_name), + Some(schema) => { + schema.add_table(table); + Ok(true) + } + None => Err(PyErr::new::(format!( + "Schema: {} not found in DaskSQLContext", + schema_name + ))), } } /// Parses a SQL string into an AST presented as a Vec of Statements - pub fn parse_sql(&self, sql: &str) -> Vec { + pub fn parse_sql(&self, sql: &str) -> PyResult> { match DFParser::parse_sql(sql) { Ok(k) => { - let mut statements = Vec::new(); + let mut statements: Vec = Vec::new(); for statement in k { statements.push(statement.into()); } - statements - }, - Err(e) => panic!("{}", e.to_string()), + assert!( + statements.len() == 1, + "More than 1 expected statement was encounterd!" + ); + Ok(statements) + } + Err(e) => Err(PyErr::new::(format!( + "{}", + e + ))), } } /// Creates a non-optimized Relational Algebra LogicalPlan from an AST Statement - pub fn logical_relational_algebra(&self, statement: PyStatement) -> PyLogicalPlan { + pub fn logical_relational_algebra( + &self, + statement: statement::PyStatement, + ) -> PyResult { let planner = SqlToRel::new(self); - match planner.statement_to_plan(&statement.statement) { + match planner.statement_to_plan(statement.statement) { Ok(k) => { - println!("Full Logical Plan: {:?}", k); - PyLogicalPlan { + println!("\nLogicalPlan: {:?}\n\n", k); + Ok(logical::PyLogicalPlan { original_plan: k, current_node: None, - } - }, - Err(e) => panic!("{}", e.to_string()), - } - } -} - - -#[pyclass(name = "LogicalPlan", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct PyLogicalPlan { - /// The orginal LogicalPlan that was parsed by DataFusion from the input SQL - original_plan: LogicalPlan, - /// The original_plan is traversed. current_node stores the current node of this traversal - current_node: Option, -} - -impl From for LogicalPlan { - fn from(logical_plan: PyLogicalPlan) -> LogicalPlan { - logical_plan.original_plan - } -} - -impl From for PyLogicalPlan { - fn from(logical_plan: LogicalPlan) -> PyLogicalPlan { - PyLogicalPlan { - original_plan: logical_plan, - current_node: None, - } - } -} - - -/// Traverses the logical plan to locate the Table associated with the query -fn table_from_logical_plan(plan: &LogicalPlan) -> Option { - match plan { - datafusion::logical_plan::plan::LogicalPlan::Projection(projection) => { - println!("Projection Input: {:?}", &projection.input); - table_from_logical_plan(&projection.input) - }, - datafusion::logical_plan::plan::LogicalPlan::Filter(filter) => { - println!("Filter Input: {:?}", &filter.input); - table_from_logical_plan(&filter.input) - }, - datafusion::logical_plan::plan::LogicalPlan::Window(_window) => { - println!("Window"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::Aggregate(_aggregate) => { - println!("Aggregate"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::Sort(_sort) => { - println!("Sort"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::Join(_join) => { - println!("Join"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::CrossJoin(_crossJoin) => { - println!("CrossJoin"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::TableScan(tableScan) => { - - // Get the TableProvider for this Table instance - let tbl_provider: Arc = tableScan.source.clone(); - let tbl_schema: SchemaRef = tbl_provider.schema(); - let fields = tbl_schema.fields(); - - let mut cols: Vec<(String, DaskRelDataType)> = Vec::new(); - for field in fields { - cols.push( - ( - String::from(field.name()), - DaskRelDataType { - name: String::from(field.name()), - sqlType: field.data_type().clone(), - } - ) - ); + }) } - - Some(DaskTable { - name: String::from(&tableScan.table_name), - statistics: DaskStatistics { row_count: 0.0 }, - columns: cols, - }) - }, - _ => { - panic!("Ok something went wrong here!!!") - } - } -} - -/// Unfortunately PyO3 forces us to do this as placing these methods in the #[pymethods] version -/// of `impl PyLogicalPlan` causes issues with types not properly being mapped to Python from Rust -impl PyLogicalPlan { - /// Getter method for the LogicalPlan, if current_node is None return original_plan. - fn current_node(&mut self) -> LogicalPlan { - match &self.current_node { - Some(current) => current.clone(), - None => { - self.current_node = Some(self.original_plan.clone()); - self.current_node.clone().unwrap() - }, - } - } -} - - -#[pymethods] -impl PyLogicalPlan { - - /// Projection: Gets the names of the fields that should be projected - fn get_named_projects(&mut self) -> PyResult> { - match self.current_node() { - LogicalPlan::Projection(projection) => { - let mut projs: Vec = Vec::new(); - for expr in projection.expr { - projs.push(expr.clone().into()); - } - Ok(projs) - }, - _ => Err(PyErr::new::("current_node is not of type Projection")), - } - } - - /// Gets the "input" for the current LogicalPlan - pub fn get_inputs(&mut self) -> PyResult> { - let mut py_inputs: Vec = Vec::new(); - for input in self.current_node().inputs() { - py_inputs.push(input.clone().into()); - } - Ok(py_inputs) - } - - /// Examines the current_node and get the fields associated with it - pub fn get_field_names(&mut self) -> PyResult> { - let mut field_names: Vec = Vec::new(); - for field in self.current_node().schema().fields() { - field_names.push(String::from(field.name())); - } - Ok(field_names) - } - - - /// If the LogicalPlan represents access to a Table that instance is returned - /// otherwise None is returned - pub fn table(&mut self) -> PyResult { - match table_from_logical_plan(&self.current_node()) { - Some(table) => Ok(table), - None => Err(PyErr::new::("Unable to compute DaskTable from Datafusion LogicalPlan")), - } - } - - - /// Gets the Relation "type" of the current node. Ex: Projection, TableScan, etc - pub fn get_current_node_type(&mut self) -> PyResult { - match self.current_node() { - LogicalPlan::Projection(_projection) => Ok(String::from("Projection")), - LogicalPlan::Filter(_filter) => Ok(String::from("Filter")), - LogicalPlan::Window(_window) => Ok(String::from("Window")), - LogicalPlan::Aggregate(_aggregate) => Ok(String::from("Aggregate")), - LogicalPlan::Sort(_sort) => Ok(String::from("Sort")), - LogicalPlan::Join(_join) => Ok(String::from("Join")), - LogicalPlan::CrossJoin(_cross_join) => Ok(String::from("CrossJoin")), - LogicalPlan::Repartition(_repartition) => Ok(String::from("Repartition")), - LogicalPlan::Union(_union) => Ok(String::from("Union")), - LogicalPlan::TableScan(_table_scan) => Ok(String::from("TableScan")), - LogicalPlan::EmptyRelation(_empty_relation) => Ok(String::from("EmptyRelation")), - LogicalPlan::Limit(_limit) => Ok(String::from("Limit")), - LogicalPlan::CreateExternalTable(_create_external_table) => Ok(String::from("CreateExternalTable")), - LogicalPlan::CreateMemoryTable(_create_memory_table) => Ok(String::from("CreateMemoryTable")), - LogicalPlan::DropTable(_drop_table) => Ok(String::from("DropTable")), - LogicalPlan::Values(_values) => Ok(String::from("Values")), - LogicalPlan::Explain(_explain) => Ok(String::from("Explain")), - LogicalPlan::Analyze(_analyze) => Ok(String::from("Analyze")), - LogicalPlan::Extension(_extension) => Ok(String::from("Extension")), - } - } - - - /// Explain plan for the full and original LogicalPlan - pub fn explain_original(&self) -> PyResult { - Ok(format!("{}", self.original_plan.display_indent())) - } - - - /// Explain plan from the current node onward - pub fn explain_current(&mut self) -> PyResult { - Ok(format!("{}", self.current_node().display_indent())) - } - - - // pub fn plan_generator(&self) -> LogicalPlanGenerator { - // // Actually gonna test out walking the plan here .... - // let mut visitor = LogicalPlanGenerator::default(); - // self.logical_plan.accept(&mut visitor); - // // visitor.plan_steps.clone() - // visitor - // } - - /// LogicalPlan::Filter: The PyExpr, predicate, that represents the filtering condition - pub fn getCondition(&mut self) -> PyResult { - match self.current_node() { - LogicalPlan::Filter(filter) => { - Ok(filter.predicate.clone().into()) - }, - _ => panic!("Something wrong here") - } - } -} - - -#[pyclass(name = "Statement", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct PyStatement { - pub statement: Statement, -} - -impl From for Statement { - fn from(statement: PyStatement) -> Statement { - statement.statement - } -} - -impl From for PyStatement { - fn from(statement: Statement) -> PyStatement { - PyStatement { statement } - } -} - - -impl PyStatement { - pub fn new(statement: Statement) -> Self { - Self { statement } - } -} - - -#[pymethods] -impl PyStatement { - - #[staticmethod] - pub fn table_name() -> String { - String::from("Got here!!!") - } -} - -#[pyclass(name = "Query", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct PyQuery { - pub(crate) query: Query, -} - -impl From for Query { - fn from(query: PyQuery) -> Query { - query.query - } -} - -impl From for PyQuery { - fn from(query: Query) -> PyQuery { - PyQuery { query } - } -} - -#[pyclass(name = "DaskSQLNode", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct DaskSQLNode { - pub(crate) statements: Vec, -} - - -#[pyclass(name = "Select", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct PySelect { - pub(crate) select: Select, -} - -impl From for Select { - fn from(select: PySelect) -> Select { - select.select - } -} - -impl From