diff --git a/.github/workflows/ballista-python.build.yml b/.github/workflows/ballista-python.build.yml new file mode 100644 index 000000000..e788ed3ae --- /dev/null +++ b/.github/workflows/ballista-python.build.yml @@ -0,0 +1,127 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Python Release Build +on: + push: + tags: + - "*-rc*" + +jobs: + generate-license: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Generate license file + run: python ./dev/create_license.py + - uses: actions/upload-artifact@v2 + with: + name: python-wheel-license + path: LICENSE.txt + + build-python-mac-win: + needs: [generate-license] + name: Mac/Win + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + os: [macos-latest, windows-latest] + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install maturin==0.12.16 + + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v2 + with: + name: python-wheel-license + path: . + + - name: Build Python package + run: maturin build --release --strip --cargo-extra-args="--locked" + + - name: List Windows wheels + if: matrix.os == 'windows-latest' + run: dir target\wheels\ + + - name: List Mac wheels + if: matrix.os != 'windows-latest' + run: find target/wheels/ + + - name: Archive wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: target/wheels/* + + build-manylinux: + needs: [generate-license] + name: Manylinux + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v2 + with: + name: python-wheel-license + path: . + - run: cat LICENSE.txt + - name: Build wheels + run: | + export RUSTFLAGS='-C target-cpu=skylake' + docker run --rm -v $(pwd):/io \ + --workdir /io \ + konstin2/maturin:v0.12.16 \ + build --release --manylinux 2010 --cargo-extra-args="--locked" + - name: Archive wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: target/wheels/* + + # NOTE: PyPI publish needs to be done manually for now after release passed the vote + # release: + # name: Publish in PyPI + # needs: [build-manylinux, build-python-mac-win] + # runs-on: ubuntu-latest + # steps: + # - uses: actions/download-artifact@v2 + # - name: Publish to PyPI + # uses: pypa/gh-action-pypi-publish@master + # with: + # user: __token__ + # password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/ballista-python.test.yaml b/.github/workflows/ballista-python.test.yaml new file mode 100644 index 000000000..40872ef45 --- /dev/null +++ b/.github/workflows/ballista-python.test.yaml @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Python test +on: + push: + branches: [main] + pull_request: + branches: [main] + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + test-matrix: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: + - "3.10" + toolchain: + - "stable" + - "beta" + # we are not that much eager in walking on the edge yet + # - nightly + # build stable for only 3.7 + include: + - python-version: "3.7" + toolchain: "stable" + steps: + - uses: actions/checkout@v2 + + - name: Setup Rust Toolchain + uses: actions-rs/toolchain@v1 + id: rust-toolchain + with: + toolchain: ${{ matrix.toolchain }} + override: true + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Cache Cargo + uses: actions/cache@v2 + with: + path: ~/.cargo + key: cargo-cache-${{ steps.rust-toolchain.outputs.rustc_hash }}-${{ hashFiles('Cargo.lock') }} + + - name: Check Formatting + uses: actions-rs/cargo@v1 + if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} + with: + command: fmt + args: -- --check + + - name: Run Clippy + uses: actions-rs/cargo@v1 + if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} + with: + command: clippy + args: --all-targets --all-features -- -D clippy::all + + - name: Create Virtualenv (3.10) + if: ${{ matrix.python-version == '3.10' }} + run: | + python -m venv venv + source venv/bin/activate + pip install -r requirements-310.txt + + - name: Create Virtualenv (3.7) + if: ${{ matrix.python-version == '3.7' }} + run: | + python -m venv venv + source venv/bin/activate + pip install -r requirements-37.txt + + - name: Run Python Linters + if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} + run: | + source venv/bin/activate + flake8 --exclude venv --ignore=E501 + black --line-length 79 --diff --check . + + - name: Run tests + run: | + source venv/bin/activate + maturin develop --cargo-extra-args='--locked' + RUST_BACKTRACE=1 pytest -v . diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml new file mode 100644 index 000000000..6e54d1296 --- /dev/null +++ b/.github/workflows/python_build.yml @@ -0,0 +1,131 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Python Release Build +on: + push: + tags: + - "*-rc*" + +defaults: + run: + working-directory: ./python + +jobs: + generate-license: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Generate license file + run: python ../dev/create_license.py + - uses: actions/upload-artifact@v2 + with: + name: python-wheel-license + path: python/LICENSE.txt + + build-python-mac-win: + needs: [generate-license] + name: Mac/Win + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + os: [macos-latest, windows-latest] + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly-2021-10-23 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install maturin==0.11.5 + + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v2 + with: + name: python-wheel-license + path: python + + - name: Build Python package + run: maturin build --release --no-sdist --strip + + - name: List Windows wheels + if: matrix.os == 'windows-latest' + run: dir target\wheels\ + + - name: List Mac wheels + if: matrix.os != 'windows-latest' + run: find target/wheels/ + + - name: Archive wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: python/target/wheels/* + + build-manylinux: + needs: [generate-license] + name: Manylinux + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v2 + with: + name: python-wheel-license + path: python + - run: cat LICENSE.txt + - name: Build wheels + run: | + export RUSTFLAGS='-C target-cpu=skylake' + docker run --rm -v $(pwd)/..:/io \ + --workdir /io/python \ + konstin2/maturin:v0.11.2 \ + build --release --manylinux 2010 + - name: Archive wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: python/target/wheels/* + + # NOTE: PyPI publish needs to be done manually for now after release passed the vote + # release: + # name: Publish in PyPI + # needs: [build-manylinux, build-python-mac-win] + # runs-on: ubuntu-latest + # steps: + # - uses: actions/download-artifact@v2 + # - name: Publish to PyPI + # uses: pypa/gh-action-pypi-publish@master + # with: + # user: __token__ + # password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml new file mode 100644 index 000000000..01a36af87 --- /dev/null +++ b/.github/workflows/python_test.yaml @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Python test +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Rust toolchain + run: | + rustup toolchain install nightly-2021-10-23 + rustup default nightly-2021-10-23 + rustup component add rustfmt + - name: Cache Cargo + uses: actions/cache@v2 + with: + path: /home/runner/.cargo + key: cargo-maturin-cache- + - name: Cache Rust dependencies + uses: actions/cache@v2 + with: + path: /home/runner/target + key: target-maturin-cache- + - uses: actions/setup-python@v2 + with: + python-version: "3.10" + - name: Create Virtualenv + run: | + python -m venv venv + source venv/bin/activate + pip install -r python/requirements.txt + - name: Run Linters + run: | + source venv/bin/activate + flake8 python --ignore=E501 + black --line-length 79 --diff --check python + - name: Run tests + run: | + source venv/bin/activate + cd python + maturin develop + RUST_BACKTRACE=1 pytest -v . + env: + CARGO_HOME: "/home/runner/.cargo" + CARGO_TARGET_DIR: "/home/runner/target" diff --git a/Cargo.toml b/Cargo.toml index 78076d482..9b9e79324 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ members = [ "ballista/rust/scheduler", "examples", ] -exclude = ["ballista-cli"] +exclude = ["ballista-cli", "python"] # cargo build --profile release-lto [profile.release-lto] diff --git a/python/.cargo/config b/python/.cargo/config new file mode 100644 index 000000000..0b24f30cf --- /dev/null +++ b/python/.cargo/config @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] diff --git a/python/.dockerignore b/python/.dockerignore new file mode 100644 index 000000000..08c131c2e --- /dev/null +++ b/python/.dockerignore @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +target +venv diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 000000000..586db7c4a --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +/target +venv +.venv diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md new file mode 100644 index 000000000..a27ad0adb --- /dev/null +++ b/python/CHANGELOG.md @@ -0,0 +1,86 @@ + + +# Changelog + +## [Unreleased](https://github.com/datafusion-contrib/datafusion-python/tree/HEAD) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1...HEAD) + +**Merged pull requests:** + +- use \_\_getitem\_\_ for df column selection [\#41](https://github.com/datafusion-contrib/datafusion-python/pull/41) ([Jimexist](https://github.com/Jimexist)) +- fix demo in readme [\#40](https://github.com/datafusion-contrib/datafusion-python/pull/40) ([Jimexist](https://github.com/Jimexist)) +- Implement select_columns [\#39](https://github.com/datafusion-contrib/datafusion-python/pull/39) ([andygrove](https://github.com/andygrove)) +- update readme and changelog [\#38](https://github.com/datafusion-contrib/datafusion-python/pull/38) ([Jimexist](https://github.com/Jimexist)) +- Add PyDataFrame.explain [\#36](https://github.com/datafusion-contrib/datafusion-python/pull/36) ([andygrove](https://github.com/andygrove)) +- Release 0.5.0 [\#34](https://github.com/datafusion-contrib/datafusion-python/pull/34) ([Jimexist](https://github.com/Jimexist)) +- disable nightly in workflow [\#33](https://github.com/datafusion-contrib/datafusion-python/pull/33) ([Jimexist](https://github.com/Jimexist)) +- update requirements to 37 and 310, update readme [\#32](https://github.com/datafusion-contrib/datafusion-python/pull/32) ([Jimexist](https://github.com/Jimexist)) +- Add custom global allocator [\#30](https://github.com/datafusion-contrib/datafusion-python/pull/30) ([matthewmturner](https://github.com/matthewmturner)) +- Remove pandas dependency [\#25](https://github.com/datafusion-contrib/datafusion-python/pull/25) ([matthewmturner](https://github.com/matthewmturner)) +- upgrade datafusion and pyo3 [\#20](https://github.com/datafusion-contrib/datafusion-python/pull/20) ([Jimexist](https://github.com/Jimexist)) +- update maturin 0.12+ [\#17](https://github.com/datafusion-contrib/datafusion-python/pull/17) ([Jimexist](https://github.com/Jimexist)) +- Update README.md [\#16](https://github.com/datafusion-contrib/datafusion-python/pull/16) ([Jimexist](https://github.com/Jimexist)) +- apply cargo clippy --fix [\#15](https://github.com/datafusion-contrib/datafusion-python/pull/15) ([Jimexist](https://github.com/Jimexist)) +- update test workflow to include rust clippy and check [\#14](https://github.com/datafusion-contrib/datafusion-python/pull/14) ([Jimexist](https://github.com/Jimexist)) +- use maturin 0.12.6 [\#13](https://github.com/datafusion-contrib/datafusion-python/pull/13) ([Jimexist](https://github.com/Jimexist)) +- apply cargo fmt [\#12](https://github.com/datafusion-contrib/datafusion-python/pull/12) ([Jimexist](https://github.com/Jimexist)) +- use stable not nightly [\#11](https://github.com/datafusion-contrib/datafusion-python/pull/11) ([Jimexist](https://github.com/Jimexist)) +- ci: test against more compilers, setup clippy and fix clippy lints [\#9](https://github.com/datafusion-contrib/datafusion-python/pull/9) ([cpcloud](https://github.com/cpcloud)) +- Fix use of importlib.metadata and unify requirements.txt [\#8](https://github.com/datafusion-contrib/datafusion-python/pull/8) ([cpcloud](https://github.com/cpcloud)) +- Ship the Cargo.lock file in the source distribution [\#7](https://github.com/datafusion-contrib/datafusion-python/pull/7) ([cpcloud](https://github.com/cpcloud)) +- add \_\_version\_\_ attribute to datafusion object [\#3](https://github.com/datafusion-contrib/datafusion-python/pull/3) ([tfeda](https://github.com/tfeda)) +- fix ci by fixing directories [\#2](https://github.com/datafusion-contrib/datafusion-python/pull/2) ([Jimexist](https://github.com/Jimexist)) +- setup workflow [\#1](https://github.com/datafusion-contrib/datafusion-python/pull/1) ([Jimexist](https://github.com/Jimexist)) + +## [0.5.1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1) (2022-03-15) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1-rc1...0.5.1) + +## [0.5.1-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1-rc1) (2022-03-15) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0...0.5.1-rc1) + +## [0.5.0](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0) (2022-03-10) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc2...0.5.0) + +## [0.5.0-rc2](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc2) (2022-03-10) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc1...0.5.0-rc2) + +**Closed issues:** + +- Add support for Ballista [\#37](https://github.com/datafusion-contrib/datafusion-python/issues/37) +- Implement DataFrame.explain [\#35](https://github.com/datafusion-contrib/datafusion-python/issues/35) + +## [0.5.0-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc1) (2022-03-09) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/4c98b8e9c3c3f8e2e6a8f2d1ffcfefda344c4680...0.5.0-rc1) + +**Closed issues:** + +- Investigate exposing additional optimizations [\#28](https://github.com/datafusion-contrib/datafusion-python/issues/28) +- Use custom allocator in Python build [\#27](https://github.com/datafusion-contrib/datafusion-python/issues/27) +- Why is pandas a requirement? [\#24](https://github.com/datafusion-contrib/datafusion-python/issues/24) +- Unable to build [\#18](https://github.com/datafusion-contrib/datafusion-python/issues/18) +- Setup CI against multiple Python version [\#6](https://github.com/datafusion-contrib/datafusion-python/issues/6) + +\* _This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)_ diff --git a/python/Cargo.lock b/python/Cargo.lock new file mode 100644 index 000000000..e71932b3b --- /dev/null +++ b/python/Cargo.lock @@ -0,0 +1,1607 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom 0.2.6", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + +[[package]] +name = "arrow" +version = "13.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6bee230122beb516ead31935a61f683715f987c6f003eff44ad6986624105a" +dependencies = [ + "bitflags", + "chrono", + "comfy-table", + "csv", + "flatbuffers", + "half", + "hex", + "indexmap", + "lazy_static", + "lexical-core", + "multiversion", + "num", + "pyo3", + "rand 0.8.5", + "regex", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "async-trait" +version = "0.1.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "blake2" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "3.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +dependencies = [ + "jobserver", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "winapi", +] + +[[package]] +name = "comfy-table" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + +[[package]] +name = "cpufeatures" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crypto-common" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "datafusion" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8174c458d0266ba442038160fad2c98f02924e6179d6d46175f600b69abb5bb7" +dependencies = [ + "ahash", + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-data-access", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-row", + "futures", + "hashbrown 0.12.1", + "lazy_static", + "log", + "num_cpus", + "ordered-float 3.0.0", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "pyo3", + "rand 0.8.5", + "smallvec", + "sqlparser", + "tempfile", + "tokio", + "tokio-stream", + "uuid 1.0.0", +] + +[[package]] +name = "datafusion-common" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c687e12a58d1499b19ee899fa467d122c8cc9223570af6f3eb3c4d9d9be929b" +dependencies = [ + "arrow", + "ordered-float 3.0.0", + "parquet", + "pyo3", + "sqlparser", +] + +[[package]] +name = "datafusion-data-access" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cca9e1200ddd362d97f22d185e98995ebd3737d2b3d69a200a06a5099fa699a" +dependencies = [ + "async-trait", + "chrono", + "futures", + "glob", + "parking_lot", + "tempfile", + "tokio", +] + +[[package]] +name = "datafusion-expr" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d20909d70931b88605b6f121c0ed820cec1d5b802cb51b7b5759f0421be3add8" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "sqlparser", +] + +[[package]] +name = "datafusion-physical-expr" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad9083cb20b57216430d5b8e52341782d9520ce77e65c60803e330fd09bdfa6e" +dependencies = [ + "ahash", + "arrow", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-row", + "hashbrown 0.12.1", + "lazy_static", + "md-5", + "ordered-float 3.0.0", + "paste", + "rand 0.8.5", + "regex", + "sha2", + "unicode-segmentation", +] + +[[package]] +name = "datafusion-python" +version = "0.6.0" +dependencies = [ + "datafusion", + "datafusion-common", + "datafusion-expr", + "mimalloc", + "pyo3", + "rand 0.7.3", + "tokio", + "uuid 0.8.2", +] + +[[package]] +name = "datafusion-row" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f649a2021eefef44e0bef6782d053148b2fef74db7592fecfe87e042d52947a" +dependencies = [ + "arrow", + "datafusion-common", + "paste", + "rand 0.8.5", +] + +[[package]] +name = "digest" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + +[[package]] +name = "flatbuffers" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a" +dependencies = [ + "bitflags", + "smallvec", + "thiserror", +] + +[[package]] +name = "flate2" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af" +dependencies = [ + "cfg-if", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "futures" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" + +[[package]] +name = "futures-executor" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" + +[[package]] +name = "futures-macro" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" + +[[package]] +name = "futures-task" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" + +[[package]] +name = "futures-util" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.10.2+wasi-snapshot-preview1", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "hashbrown" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" +dependencies = [ + "ahash", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "indexmap" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee" +dependencies = [ + "autocfg", + "hashbrown 0.11.2", +] + +[[package]] +name = "indoc" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05a0bd019339e5d968b37855180087b7b9d512c5046fbd244cf8c95687927d6e" + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "integer-encoding" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + +[[package]] +name = "jobserver" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92912c4af2e7d9075be3e5e3122c4d7263855fa6cce34fbece4dd08e5884624d" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f518eed87c3be6debe6d26b855c97358d8a11bf05acec137e5f53080f5ad2dd8" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afc852ec67c6538bbb2b9911116a385b24510e879a69ab516e6a151b15a79168" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c72a9d52c5c4e62fa2cdc2cb6c694a39ae1382d9c2a17a466f18e272a0930eb1" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a89ec1d062e481210c309b672f73a0567b7855f21e7d2fae636df44d12e97f9" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "094060bd2a7c2ff3a16d5304a6ae82727cb3cc9d1c70f813cc73f744c319337e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "libmimalloc-sys" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c" +dependencies = [ + "cc", +] + +[[package]] +name = "lock_api" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lz4" +version = "1.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "md-5" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" +dependencies = [ + "digest", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "mimalloc" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698" +dependencies = [ + "libmimalloc-sys", +] + +[[package]] +name = "miniz_oxide" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082" +dependencies = [ + "adler", +] + +[[package]] +name = "multiversion" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" +dependencies = [ + "multiversion-macros", +] + +[[package]] +name = "multiversion-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fbc387afefefd5e9e39493299f3069e14a140dd34dc19b4c1c1a8fddb6a790" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" + +[[package]] +name = "ordered-float" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-float" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking_lot" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "parquet" +version = "13.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c6d737baed48775e87a69aa262f1fa2f1d6bd074dedbe9cac244b9aabf2a0b4" +dependencies = [ + "arrow", + "base64", + "brotli", + "byteorder", + "chrono", + "flate2", + "lz4", + "num", + "num-bigint", + "parquet-format", + "rand 0.8.5", + "snap", + "thrift", + "zstd", +] + +[[package]] +name = "parquet-format" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5" +dependencies = [ + "thrift", +] + +[[package]] +name = "paste" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + +[[package]] +name = "proc-macro2" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6302e85060011447471887705bb7838f14aba43fcb06957d823739a496b3dc" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b65b546c35d8a3b1b2f0ddbac7c6a569d759f357f2b9df884f5d6b719152c8" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c275a07127c1aca33031a563e384ffdd485aee34ef131116fcd58e3430d1742b" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284fc4485bfbcc9850a6d661d627783f18d19c2ab55880b021671c4ba83e90f7" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53bda0f58f73f5c5429693c96ed57f7abdb38fdfc28ae06da4101a257adb7faf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom 0.2.6", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "redox_syscall" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rustversion" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" + +[[package]] +name = "serde_derive" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" +dependencies = [ + "indexmap", + "itoa 1.0.2", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "slab" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "snap" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" + +[[package]] +name = "sqlparser" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc2739f3a9bfc68e2f7b7695589f6cb0181c88af73ceaee0c84215cd2a2ae28" +dependencies = [ + "log", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" + +[[package]] +name = "strum_macros" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "subtle" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" + +[[package]] +name = "syn" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7fa7e55043acb85fca6b3c01485a2eeb6b69c5d21002e273c79e465f43b7ac1" + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "thiserror" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "thrift" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" +dependencies = [ + "byteorder", + "integer-encoding", + "log", + "ordered-float 1.1.1", + "threadpool", +] + +[[package]] +name = "tokio" +version = "1.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4903bf0427cf68dddd5aa6a93220756f8be0c34fcfa9f5e6191e103e15a31395" +dependencies = [ + "num_cpus", + "once_cell", + "parking_lot", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "typenum" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + +[[package]] +name = "unicode-ident" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" + +[[package]] +name = "unicode-segmentation" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "unindent" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52fee519a3e570f7df377a06a1a7775cdbfb7aa460be7e08de2b1f0e69973a44" + +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom 0.2.6", +] + +[[package]] +name = "uuid" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cfcd319456c4d6ea10087ed423473267e1a071f3bc0aa89f80d60997843c6f0" +dependencies = [ + "getrandom 0.2.6", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.1+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" +dependencies = [ + "cc", + "libc", +] diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 000000000..0c3a42c3d --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "ballista-python" +version = "0.6.0" +homepage = "https://github.com/apache/arrow" +repository = "https://github.com/apache/arrow" +authors = ["Apache Arrow "] +description = "Build and run queries against data" +readme = "README.md" +license = "Apache-2.0" +edition = "2021" +rust-version = "1.57" + +[package.metadata.maturin] +name = "ballista._internal" + +[dependencies] +ballista = { git = "https://github.com/apache/arrow-ballista", rev = "9e78b8ecd55cc3e9d46387c87098c4e6625294eb" } +datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710", features = ["pyarrow"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710" } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710" } + +mimalloc = { version = "*", default-features = false } +pyo3 = { version = "~0.16.5", features = ["extension-module", "abi3", "abi3-py37"] } +rand = "0.7" +tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } +uuid = { version = "0.8", features = ["v4"] } + +[lib] +crate-type = ["cdylib", "rlib"] +name = "ballista_python" + +[profile.release] +codegen-units = 1 +lto = true diff --git a/python/LICENSE.txt b/python/LICENSE.txt new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/python/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/python/README.md b/python/README.md index b3a2a3061..a9d7b873e 100644 --- a/python/README.md +++ b/python/README.md @@ -17,6 +17,89 @@ under the License. --> -# DataFusion in Python +# Ballista in Python -This directory is now moved to its [dedicated repository](https://github.com/datafusion-contrib/datafusion-python). +[![Python test](https://github.com/datafusion-contrib/datafusion-python/actions/workflows/test.yaml/badge.svg)](https://github.com/datafusion-contrib/datafusion-python/actions/workflows/test.yaml) + +This is a Python library that binds to [Apache Arrow](https://arrow.apache.org/) in-memory query engine [DataFusion](https://github.com/apache/arrow-datafusion). + +Like pyspark, it allows you to build a plan through SQL or a DataFrame API against in-memory data, parquet or CSV files, run it in a multi-threaded environment, and obtain the result back in Python. + +The major advantage of this library over other execution engines is that this library achieves zero-copy between Python and its execution engine: there is no cost in using UDFs, UDAFs, and collecting the results to Python apart from having to lock the GIL when running those operations. + +Its query engine, DataFusion, is written in [Rust](https://www.rust-lang.org/), which makes strong assumptions about thread safety and lack of memory leaks. + +Technically, zero-copy is achieved via the [c data interface](https://arrow.apache.org/docs/format/CDataInterface.html). + +## How to use it + +Simple usage: + +```python +import ballista +ctx = ballista.BallistaContext(host="host.docker.internal") +ctx.register_parquet("table", "/data") +df = ctx.sql("SELECT * FROM table LIMIT 10") +df.show() +``` + +## How to install (from pip) + +```bash +pip install ballista +# or +python -m pip install ballista +``` + +You can verify the installation by running: + +```python +>>> import ballista +>>> ballista.__version__ +'0.6.0' +``` + +## How to develop + +This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). + +Bootstrap: + +```bash +# fetch this repo +git clone git@github.com:datafusion-contrib/datafusion-python.git +# prepare development environment (used to build wheel / install in development) +python3 -m venv venv +# activate the venv +source venv/bin/activate +# update pip itself if necessary +python -m pip install -U pip +# install dependencies (for Python 3.8+) +python -m pip install -r requirements-310.txt +``` + +Whenever rust code changes (your changes or via `git pull`): + +```bash +# make sure you activate the venv using "source venv/bin/activate" first +maturin develop +python -m pytest +``` + +## How to update dependencies + +To change test dependencies, change the `requirements.in` and run + +```bash +# install pip-tools (this can be done only once), also consider running in venv +python -m pip install pip-tools +python -m piptools compile --generate-hashes -o requirements-310.txt +``` + +To update dependencies, run with `-U` + +```bash +python -m piptools compile -U --generate-hashes -o requirements-310.txt +``` + +More details [here](https://github.com/jazzband/pip-tools) diff --git a/python/ballista/__init__.py b/python/ballista/__init__.py new file mode 100644 index 000000000..96108f9c6 --- /dev/null +++ b/python/ballista/__init__.py @@ -0,0 +1,120 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABCMeta, abstractmethod +from typing import List + +try: + import importlib.metadata as importlib_metadata +except ImportError: + import importlib_metadata + + +import datafusion + +import pyarrow as pa + +from ._internal import ( + DataFrame, + BallistaContext, + Expression, +) + + +__version__ = importlib_metadata.version(__name__) + + +__all__ = [ + "DataFrame", + "BallistaContext", + "Expression", + "AggregateUDF", + "ScalarUDF", + "column", + "literal", +] + + +class Accumulator(metaclass=ABCMeta): + @abstractmethod + def state(self) -> List[pa.Scalar]: + pass + + @abstractmethod + def update(self, values: pa.Array) -> None: + pass + + @abstractmethod + def merge(self, states: pa.Array) -> None: + pass + + @abstractmethod + def evaluate(self) -> pa.Scalar: + pass + + +def column(value): + return Expression.column(value) + + +col = column + + +def literal(value): + if not isinstance(value, pa.Scalar): + value = pa.scalar(value) + return Expression.literal(value) + + +lit = literal + + +def udf(func, input_types, return_type, volatility, name=None): + """ + Create a new User Defined Function + """ + if not callable(func): + raise TypeError("`func` argument must be callable") + if name is None: + name = func.__qualname__ + return datafusion.ScalarUDF( + name=name, + func=func, + input_types=input_types, + return_type=return_type, + volatility=volatility, + ) + + +def udaf(accum, input_type, return_type, state_type, volatility, name=None): + """ + Create a new User Defined Aggregate Function + """ + if not issubclass(accum, Accumulator): + raise TypeError( + "`accum` must implement the abstract base class Accumulator" + ) + if name is None: + name = accum.__qualname__ + return datafusion.AggregateUDF( + name=name, + accumulator=accum, + input_type=input_type, + return_type=return_type, + state_type=state_type, + volatility=volatility, + ) diff --git a/python/ballista/functions.py b/python/ballista/functions.py new file mode 100644 index 000000000..782ecba22 --- /dev/null +++ b/python/ballista/functions.py @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from ._internal import functions + + +def __getattr__(name): + return getattr(functions, name) diff --git a/python/ballista/tests/__init__.py b/python/ballista/tests/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/python/ballista/tests/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/python/ballista/tests/conftest.py b/python/ballista/tests/conftest.py new file mode 100644 index 000000000..93662a082 --- /dev/null +++ b/python/ballista/tests/conftest.py @@ -0,0 +1,33 @@ +import pytest +from datafusion import SessionContext +import pyarrow as pa + + +@pytest.fixture +def ctx(): + return SessionContext() + + +@pytest.fixture +def database(ctx, tmp_path): + path = tmp_path / "test.csv" + + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + ctx.register_csv("csv1", str(path)) + ctx.register_csv( + "csv2", + path, + has_header=True, + delimiter=",", + schema_infer_max_records=10, + ) diff --git a/python/ballista/tests/generic.py b/python/ballista/tests/generic.py new file mode 100644 index 000000000..1f984a40a --- /dev/null +++ b/python/ballista/tests/generic.py @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime + +import numpy as np +import pyarrow as pa +import pyarrow.csv + +# used to write parquet files +import pyarrow.parquet as pq + + +def data(): + np.random.seed(1) + data = np.concatenate( + [ + np.random.normal(0, 0.01, size=50), + np.random.normal(50, 0.01, size=50), + ] + ) + return pa.array(data) + + +def data_with_nans(): + np.random.seed(0) + data = np.random.normal(0, 0.01, size=50) + mask = np.random.randint(0, 2, size=50) + data[mask == 0] = np.NaN + return data + + +def data_datetime(f): + data = [ + datetime.datetime.now(), + datetime.datetime.now() - datetime.timedelta(days=1), + datetime.datetime.now() + datetime.timedelta(days=1), + ] + return pa.array( + data, type=pa.timestamp(f), mask=np.array([False, True, False]) + ) + + +def data_date32(): + data = [ + datetime.date(2000, 1, 1), + datetime.date(1980, 1, 1), + datetime.date(2030, 1, 1), + ] + return pa.array( + data, type=pa.date32(), mask=np.array([False, True, False]) + ) + + +def data_timedelta(f): + data = [ + datetime.timedelta(days=100), + datetime.timedelta(days=1), + datetime.timedelta(seconds=1), + ] + return pa.array( + data, type=pa.duration(f), mask=np.array([False, True, False]) + ) + + +def data_binary_other(): + return np.array([1, 0, 0], dtype="u4") + + +def write_parquet(path, data): + table = pa.Table.from_arrays([data], names=["a"]) + pq.write_table(table, path) + return str(path) diff --git a/python/ballista/tests/test_aggregation.py b/python/ballista/tests/test_aggregation.py new file mode 100644 index 000000000..bba1ed41d --- /dev/null +++ b/python/ballista/tests/test_aggregation.py @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import SessionContext, column +from datafusion import functions as f + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_built_in_aggregation(df): + col_a = column("a") + col_b = column("b") + df = df.aggregate( + [], + [f.max(col_a), f.min(col_a), f.count(col_a), f.approx_distinct(col_b)], + ) + result = df.collect()[0] + assert result.column(0) == pa.array([3]) + assert result.column(1) == pa.array([1]) + assert result.column(2) == pa.array([3], type=pa.uint64()) + assert result.column(3) == pa.array([2], type=pa.uint64()) diff --git a/python/ballista/tests/test_catalog.py b/python/ballista/tests/test_catalog.py new file mode 100644 index 000000000..a9bdf72b3 --- /dev/null +++ b/python/ballista/tests/test_catalog.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + + +def test_basic(ctx, database): + with pytest.raises(KeyError): + ctx.catalog("non-existent") + + default = ctx.catalog() + assert default.names() == ["public"] + + for database in [default.database("public"), default.database()]: + assert database.names() == {"csv1", "csv", "csv2"} + + table = database.table("csv") + assert table.kind == "physical" + assert table.schema == pa.schema( + [ + pa.field("int", pa.int64(), nullable=False), + pa.field("str", pa.string(), nullable=False), + pa.field("float", pa.float64(), nullable=False), + ] + ) diff --git a/python/ballista/tests/test_context.py b/python/ballista/tests/test_context.py new file mode 100644 index 000000000..4d4a38c9d --- /dev/null +++ b/python/ballista/tests/test_context.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa + + +def test_register_record_batches(ctx): + # create a RecordBatch and register it as memtable + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + ctx.register_record_batches("t", [[batch]]) + + assert ctx.tables() == {"t"} + + result = ctx.sql("SELECT a+b, a-b FROM t").collect() + + assert result[0].column(0) == pa.array([5, 7, 9]) + assert result[0].column(1) == pa.array([-3, -3, -3]) + + +def test_create_dataframe_registers_unique_table_name(ctx): + # create a RecordBatch and register it as memtable + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + df = ctx.create_dataframe([[batch]]) + tables = list(ctx.tables()) + + assert df + assert len(tables) == 1 + assert len(tables[0]) == 33 + assert tables[0].startswith("c") + # ensure that the rest of the table name contains + # only hexadecimal numbers + for c in tables[0][1:]: + assert c in "0123456789abcdef" + + +def test_register_table(ctx, database): + default = ctx.catalog() + public = default.database("public") + assert public.names() == {"csv", "csv1", "csv2"} + table = public.table("csv") + + ctx.register_table("csv3", table) + assert public.names() == {"csv", "csv1", "csv2", "csv3"} + + +def test_deregister_table(ctx, database): + default = ctx.catalog() + public = default.database("public") + assert public.names() == {"csv", "csv1", "csv2"} + + ctx.deregister_table("csv") + assert public.names() == {"csv1", "csv2"} diff --git a/python/ballista/tests/test_dataframe.py b/python/ballista/tests/test_dataframe.py new file mode 100644 index 000000000..43c260ae2 --- /dev/null +++ b/python/ballista/tests/test_dataframe.py @@ -0,0 +1,199 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import functions as f +from datafusion import DataFrame, SessionContext, column, literal, udf + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + return ctx.create_dataframe([[batch]]) + + +@pytest.fixture +def struct_df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([{"c": 1}, {"c": 2}, {"c": 3}]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + return ctx.create_dataframe([[batch]]) + + +def test_select(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([5, 7, 9]) + assert result.column(1) == pa.array([-3, -3, -3]) + + +def test_select_columns(df): + df = df.select_columns("b", "a") + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([4, 5, 6]) + assert result.column(1) == pa.array([1, 2, 3]) + + +def test_filter(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ).filter(column("a") > literal(2)) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([9]) + assert result.column(1) == pa.array([-3]) + + +def test_sort(df): + df = df.sort(column("b").sort(ascending=False)) + + table = pa.Table.from_batches(df.collect()) + expected = {"a": [3, 2, 1], "b": [6, 5, 4]} + + assert table.to_pydict() == expected + + +def test_limit(df): + df = df.limit(1) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert len(result.column(0)) == 1 + assert len(result.column(1)) == 1 + + +def test_udf(df): + # is_null is a pa function over arrays + is_null = udf( + lambda x: x.is_null(), + [pa.int64()], + pa.bool_(), + volatility="immutable", + ) + + df = df.select(is_null(column("a"))) + result = df.collect()[0].column(0) + + assert result == pa.array([False, False, False]) + + +def test_join(): + ctx = SessionContext() + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]]) + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2]), pa.array([8, 10])], + names=["a", "c"], + ) + df1 = ctx.create_dataframe([[batch]]) + + df = df.join(df1, join_keys=(["a"], ["a"]), how="inner") + df = df.sort(column("a").sort(ascending=True)) + table = pa.Table.from_batches(df.collect()) + + expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} + assert table.to_pydict() == expected + + +def test_window_lead(df): + df = df.select( + column("a"), + f.alias( + f.window( + "lead", [column("b")], order_by=[f.order_by(column("b"))] + ), + "a_next", + ), + ) + + table = pa.Table.from_batches(df.collect()) + + expected = {"a": [1, 2, 3], "a_next": [5, 6, None]} + assert table.to_pydict() == expected + + +def test_get_dataframe(tmp_path): + ctx = SessionContext() + + path = tmp_path / "test.csv" + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + + df = ctx.table("csv") + assert isinstance(df, DataFrame) + + +def test_struct_select(struct_df): + df = struct_df.select( + column("a")["c"] + column("b"), + column("a")["c"] - column("b"), + ) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([5, 7, 9]) + assert result.column(1) == pa.array([-3, -3, -3]) + + +def test_explain(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ) + df.explain() diff --git a/python/ballista/tests/test_functions.py b/python/ballista/tests/test_functions.py new file mode 100644 index 000000000..daa2f1967 --- /dev/null +++ b/python/ballista/tests/test_functions.py @@ -0,0 +1,219 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import numpy as np +import pyarrow as pa +import pytest + +from datafusion import SessionContext, column +from datafusion import functions as f +from datafusion import literal + + +@pytest.fixture +def df(): + ctx = SessionContext() + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array(["Hello", "World", "!"]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_literal(df): + df = df.select( + literal(1), + literal("1"), + literal("OK"), + literal(3.14), + literal(True), + literal(b"hello world"), + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array([1] * 3) + assert result.column(1) == pa.array(["1"] * 3) + assert result.column(2) == pa.array(["OK"] * 3) + assert result.column(3) == pa.array([3.14] * 3) + assert result.column(4) == pa.array([True] * 3) + assert result.column(5) == pa.array([b"hello world"] * 3) + + +def test_lit_arith(df): + """ + Test literals with arithmetic operations + """ + df = df.select( + literal(1) + column("b"), f.concat(column("a"), literal("!")) + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array([5, 6, 7]) + assert result.column(1) == pa.array(["Hello!", "World!", "!!"]) + + +def test_math_functions(): + ctx = SessionContext() + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([0.1, -0.7, 0.55])], names=["value"] + ) + df = ctx.create_dataframe([[batch]]) + + values = np.array([0.1, -0.7, 0.55]) + col_v = column("value") + df = df.select( + f.abs(col_v), + f.sin(col_v), + f.cos(col_v), + f.tan(col_v), + f.asin(col_v), + f.acos(col_v), + f.exp(col_v), + f.ln(col_v + literal(pa.scalar(1))), + f.log2(col_v + literal(pa.scalar(1))), + f.log10(col_v + literal(pa.scalar(1))), + f.random(), + ) + batches = df.collect() + assert len(batches) == 1 + result = batches[0] + + np.testing.assert_array_almost_equal(result.column(0), np.abs(values)) + np.testing.assert_array_almost_equal(result.column(1), np.sin(values)) + np.testing.assert_array_almost_equal(result.column(2), np.cos(values)) + np.testing.assert_array_almost_equal(result.column(3), np.tan(values)) + np.testing.assert_array_almost_equal(result.column(4), np.arcsin(values)) + np.testing.assert_array_almost_equal(result.column(5), np.arccos(values)) + np.testing.assert_array_almost_equal(result.column(6), np.exp(values)) + np.testing.assert_array_almost_equal( + result.column(7), np.log(values + 1.0) + ) + np.testing.assert_array_almost_equal( + result.column(8), np.log2(values + 1.0) + ) + np.testing.assert_array_almost_equal( + result.column(9), np.log10(values + 1.0) + ) + np.testing.assert_array_less(result.column(10), np.ones_like(values)) + + +def test_string_functions(df): + df = df.select(f.md5(column("a")), f.lower(column("a"))) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array( + [ + "8b1a9953c4611296a827abf8c47804d7", + "f5a7924e621e84c9280a9a27e1bcb7f6", + "9033e0e305f247c0c3c80d0c7848c8b3", + ] + ) + assert result.column(1) == pa.array(["hello", "world", "!"]) + + +def test_hash_functions(df): + exprs = [ + f.digest(column("a"), literal(m)) + for m in ("md5", "sha256", "sha512", "blake2s", "blake3") + ] + df = df.select(*exprs) + result = df.collect() + assert len(result) == 1 + result = result[0] + b = bytearray.fromhex + assert result.column(0) == pa.array( + [ + b("8B1A9953C4611296A827ABF8C47804D7"), + b("F5A7924E621E84C9280A9A27E1BCB7F6"), + b("9033E0E305F247C0C3C80D0C7848C8B3"), + ] + ) + assert result.column(1) == pa.array( + [ + b( + "185F8DB32271FE25F561A6FC938B2E26" + "4306EC304EDA518007D1764826381969" + ), + b( + "78AE647DC5544D227130A0682A51E30B" + "C7777FBB6D8A8F17007463A3ECD1D524" + ), + b( + "BB7208BC9B5D7C04F1236A82A0093A5E" + "33F40423D5BA8D4266F7092C3BA43B62" + ), + ] + ) + assert result.column(2) == pa.array( + [ + b( + "3615F80C9D293ED7402687F94B22D58E" + "529B8CC7916F8FAC7FDDF7FBD5AF4CF7" + "77D3D795A7A00A16BF7E7F3FB9561EE9" + "BAAE480DA9FE7A18769E71886B03F315" + ), + b( + "8EA77393A42AB8FA92500FB077A9509C" + "C32BC95E72712EFA116EDAF2EDFAE34F" + "BB682EFDD6C5DD13C117E08BD4AAEF71" + "291D8AACE2F890273081D0677C16DF0F" + ), + b( + "3831A6A6155E509DEE59A7F451EB3532" + "4D8F8F2DF6E3708894740F98FDEE2388" + "9F4DE5ADB0C5010DFB555CDA77C8AB5D" + "C902094C52DE3278F35A75EBC25F093A" + ), + ] + ) + assert result.column(3) == pa.array( + [ + b( + "F73A5FBF881F89B814871F46E26AD3FA" + "37CB2921C5E8561618639015B3CCBB71" + ), + b( + "B792A0383FB9E7A189EC150686579532" + "854E44B71AC394831DAED169BA85CCC5" + ), + b( + "27988A0E51812297C77A433F63523334" + "6AEE29A829DCF4F46E0F58F402C6CFCB" + ), + ] + ) + assert result.column(4) == pa.array( + [ + b( + "FBC2B0516EE8744D293B980779178A35" + "08850FDCFE965985782C39601B65794F" + ), + b( + "BF73D18575A736E4037D45F9E316085B" + "86C19BE6363DE6AA789E13DEAACC1C4E" + ), + b( + "C8D11B9F7237E4034ADBCD2005735F9B" + "C4C597C75AD89F4492BEC8F77D15F7EB" + ), + ] + ) diff --git a/python/ballista/tests/test_imports.py b/python/ballista/tests/test_imports.py new file mode 100644 index 000000000..9522bc31a --- /dev/null +++ b/python/ballista/tests/test_imports.py @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +import ballista +from ballista import ( + AggregateUDF, + DataFrame, + SessionContext, + Expression, + ScalarUDF, + functions, +) + + +def test_import_ballista(): + assert ballista.__name__ == "ballista" + + +def test_ballista_python_version(): + assert ballista.__version__ is not None + + +def test_class_module_is_ballista(): + for klass in [ + SessionContext, + Expression, + DataFrame, + ScalarUDF, + AggregateUDF, + ]: + assert klass.__module__ == "ballista" + + +def test_import_from_functions_submodule(): + from ballista.functions import abs, sin # noqa + + assert functions.abs is abs + assert functions.sin is sin + + msg = "cannot import name 'foobar' from 'ballista.functions'" + with pytest.raises(ImportError, match=msg): + from ballista.functions import foobar # noqa + + +def test_classes_are_inheritable(): + class MyExecContext(SessionContext): + pass + + class MyExpression(Expression): + pass + + class MyDataFrame(DataFrame): + pass diff --git a/python/ballista/tests/test_indexing.py b/python/ballista/tests/test_indexing.py new file mode 100644 index 000000000..9e65c0efd --- /dev/null +++ b/python/ballista/tests/test_indexing.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import SessionContext + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_indexing(df): + assert df["a"] is not None + assert df["a", "b"] is not None + assert df[("a", "b")] is not None + assert df[["a"]] is not None + + +def test_err(df): + with pytest.raises(Exception) as e_info: + df["c"] + + assert "Schema error: No field named 'c'" in e_info.value.args[0] + + with pytest.raises(Exception) as e_info: + df[1] + + assert ( + "DataFrame can only be indexed by string index or indices" + in e_info.value.args[0] + ) diff --git a/python/ballista/tests/test_sql.py b/python/ballista/tests/test_sql.py new file mode 100644 index 000000000..cde5425a8 --- /dev/null +++ b/python/ballista/tests/test_sql.py @@ -0,0 +1,245 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import numpy as np +import pyarrow as pa +import pytest + +from datafusion import udf + +from . import generic as helpers + + +def test_no_table(ctx): + with pytest.raises(Exception, match="DataFusion error"): + ctx.sql("SELECT a FROM b").collect() + + +def test_register_csv(ctx, tmp_path): + path = tmp_path / "test.csv" + + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + ctx.register_csv("csv1", str(path)) + ctx.register_csv( + "csv2", + path, + has_header=True, + delimiter=",", + schema_infer_max_records=10, + ) + alternative_schema = pa.schema( + [ + ("some_int", pa.int16()), + ("some_bytes", pa.string()), + ("some_floats", pa.float32()), + ] + ) + ctx.register_csv("csv3", path, schema=alternative_schema) + + assert ctx.tables() == {"csv", "csv1", "csv2", "csv3"} + + for table in ["csv", "csv1", "csv2"]: + result = ctx.sql(f"SELECT COUNT(int) AS cnt FROM {table}").collect() + result = pa.Table.from_batches(result) + assert result.to_pydict() == {"cnt": [4]} + + result = ctx.sql("SELECT * FROM csv3").collect() + result = pa.Table.from_batches(result) + assert result.schema == alternative_schema + + with pytest.raises( + ValueError, match="Delimiter must be a single character" + ): + ctx.register_csv("csv4", path, delimiter="wrong") + + +def test_register_parquet(ctx, tmp_path): + path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) + ctx.register_parquet("t", path) + assert ctx.tables() == {"t"} + + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() + result = pa.Table.from_batches(result) + assert result.to_pydict() == {"cnt": [100]} + + +def test_execute(ctx, tmp_path): + data = [1, 1, 2, 2, 3, 11, 12] + + # single column, "a" + path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(data)) + ctx.register_parquet("t", path) + + assert ctx.tables() == {"t"} + + # count + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() + + expected = pa.array([7], pa.uint64()) + expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] + assert result == expected + + # where + expected = pa.array([2], pa.uint64()) + expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a > 10").collect() + assert result == expected + + # group by + results = ctx.sql( + "SELECT CAST(a as int) AS a, COUNT(a) AS cnt FROM t GROUP BY a" + ).collect() + + # group by returns batches + result_keys = [] + result_values = [] + for result in results: + pydict = result.to_pydict() + result_keys.extend(pydict["a"]) + result_values.extend(pydict["cnt"]) + + result_keys, result_values = ( + list(t) for t in zip(*sorted(zip(result_keys, result_values))) + ) + + assert result_keys == [1, 2, 3, 11, 12] + assert result_values == [2, 2, 1, 1, 1] + + # order by + result = ctx.sql( + "SELECT a, CAST(a AS int) AS a_int FROM t ORDER BY a DESC LIMIT 2" + ).collect() + expected_a = pa.array([50.0219, 50.0152], pa.float64()) + expected_cast = pa.array([50, 50], pa.int32()) + expected = [ + pa.RecordBatch.from_arrays([expected_a, expected_cast], ["a", "a_int"]) + ] + np.testing.assert_equal(expected[0].column(1), expected[0].column(1)) + + +def test_cast(ctx, tmp_path): + """ + Verify that we can cast + """ + path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) + ctx.register_parquet("t", path) + + valid_types = [ + "smallint", + "int", + "bigint", + "float(32)", + "float(64)", + "float", + ] + + select = ", ".join( + [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)] + ) + + # can execute, which implies that we can cast + ctx.sql(f"SELECT {select} FROM t").collect() + + +@pytest.mark.parametrize( + ("fn", "input_types", "output_type", "input_values", "expected_values"), + [ + ( + lambda x: x, + [pa.float64()], + pa.float64(), + [-1.2, None, 1.2], + [-1.2, None, 1.2], + ), + ( + lambda x: x.is_null(), + [pa.float64()], + pa.bool_(), + [-1.2, None, 1.2], + [False, True, False], + ), + ], +) +def test_udf( + ctx, tmp_path, fn, input_types, output_type, input_values, expected_values +): + # write to disk + path = helpers.write_parquet( + tmp_path / "a.parquet", pa.array(input_values) + ) + ctx.register_parquet("t", path) + + func = udf( + fn, input_types, output_type, name="func", volatility="immutable" + ) + ctx.register_udf(func) + + batches = ctx.sql("SELECT func(a) AS tt FROM t").collect() + result = batches[0].column(0) + + assert result == pa.array(expected_values) + + +_null_mask = np.array([False, True, False]) + + +@pytest.mark.parametrize( + "arr", + [ + pa.array(["a", "b", "c"], pa.utf8(), _null_mask), + pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask), + pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask), + pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask), + pa.array([False, True, True], None, _null_mask), + pa.array([0, 1, 2], None), + helpers.data_binary_other(), + helpers.data_date32(), + helpers.data_with_nans(), + # C data interface missing + pytest.param( + pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), + marks=pytest.mark.xfail, + ), + pytest.param(helpers.data_datetime("s"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ms"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("us"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ns"), marks=pytest.mark.xfail), + # Not writtable to parquet + pytest.param(helpers.data_timedelta("s"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ms"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("us"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ns"), marks=pytest.mark.xfail), + ], +) +def test_simple_select(ctx, tmp_path, arr): + path = helpers.write_parquet(tmp_path / "a.parquet", arr) + ctx.register_parquet("t", path) + + batches = ctx.sql("SELECT a AS tt FROM t").collect() + result = batches[0].column(0) + + np.testing.assert_equal(result, arr) diff --git a/python/ballista/tests/test_udaf.py b/python/ballista/tests/test_udaf.py new file mode 100644 index 000000000..c2b29d199 --- /dev/null +++ b/python/ballista/tests/test_udaf.py @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List + +import pyarrow as pa +import pyarrow.compute as pc +import pytest + +from datafusion import Accumulator, SessionContext, column, udaf + + +class Summarize(Accumulator): + """ + Interface of a user-defined accumulation. + """ + + def __init__(self): + self._sum = pa.scalar(0.0) + + def state(self) -> List[pa.Scalar]: + return [self._sum] + + def update(self, values: pa.Array) -> None: + # Not nice since pyarrow scalars can't be summed yet. + # This breaks on `None` + self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) + + def merge(self, states: pa.Array) -> None: + # Not nice since pyarrow scalars can't be summed yet. + # This breaks on `None` + self._sum = pa.scalar(self._sum.as_py() + pc.sum(states).as_py()) + + def evaluate(self) -> pa.Scalar: + return self._sum + + +class NotSubclassOfAccumulator: + pass + + +class MissingMethods(Accumulator): + def __init__(self): + self._sum = pa.scalar(0) + + def state(self) -> List[pa.Scalar]: + return [self._sum] + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +@pytest.mark.skip(reason="df.collect() will hang, need more investigations") +def test_errors(df): + with pytest.raises(TypeError): + udaf( + NotSubclassOfAccumulator, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + accum = udaf( + MissingMethods, + pa.int64(), + pa.int64(), + [pa.int64()], + volatility="immutable", + ) + df = df.aggregate([], [accum(column("a"))]) + + msg = ( + "Can't instantiate abstract class MissingMethods with abstract " + "methods evaluate, merge, update" + ) + with pytest.raises(Exception, match=msg): + df.collect() + + +def test_aggregate(df): + summarize = udaf( + Summarize, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + df = df.aggregate([], [summarize(column("a"))]) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) + + +def test_group_by(df): + summarize = udaf( + Summarize, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + df = df.aggregate([column("b")], [summarize(column("a"))]) + + batches = df.collect() + + arrays = [batch.column(1) for batch in batches] + joined = pa.concat_arrays(arrays) + assert joined == pa.array([1.0 + 2.0, 3.0]) diff --git a/python/dev/create_license.py b/python/dev/create_license.py new file mode 100644 index 000000000..2a67cb8fd --- /dev/null +++ b/python/dev/create_license.py @@ -0,0 +1,252 @@ +#!/usr/bin/python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is a mirror of https://github.com/apache/arrow-datafusion/blob/master/dev/create_license.py + +import json +import subprocess + +subprocess.check_output(["cargo", "install", "cargo-license"]) +data = subprocess.check_output( + [ + "cargo", + "license", + "--avoid-build-deps", + "--avoid-dev-deps", + "--do-not-bundle", + "--json", + ] +) +data = json.loads(data) + +result = """ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +result += "\n------------------\n\n" +result += "This software is built and contains the following software:\n\n" +result += "(automatically generated via [cargo-license](https://crates.io/crates/cargo-license))\n\n" +for item in data: + license = item["license"] + name = item["name"] + version = item["version"] + repository = item["repository"] + result += "------------------\n\n" + result += f"### {name} {version}\n* source: [{repository}]({repository})\n* license: {license}\n\n" + +with open("LICENSE.txt", "w") as f: + f.write(result) diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 000000000..2d9d30e78 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[build-system] +requires = ["maturin>=0.11,<0.13"] +build-backend = "maturin" + +[project] +name = "ballista" +description = "Build and run queries against data" +readme = "README.md" +license = {file = "LICENSE.txt"} +requires-python = ">=3.6" +keywords = ["ballista", "datafusion", "dataframe", "rust", "query-engine"] +classifier = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python", + "Programming Language :: Rust", +] +dependencies = [ + "datafusion>=0.5.2", + "pyarrow>=1", +] + +[project.urls] +documentation = "https://arrow.apache.org/ballista/python" +repository = "https://github.com/apache/arrow-ballista" + +[tool.isort] +profile = "black" + +[tool.maturin] +sdist-include = ["Cargo.lock"] diff --git a/python/requirements-310.txt b/python/requirements-310.txt new file mode 100644 index 000000000..30a2291c4 --- /dev/null +++ b/python/requirements-310.txt @@ -0,0 +1,213 @@ +# +# This file is autogenerated by pip-compile with python 3.10 +# To update, run: +# +# pip-compile --generate-hashes +# +attrs==21.4.0 \ + --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ + --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd + # via pytest +black==22.3.0 \ + --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ + --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ + --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ + --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ + --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ + --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ + --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ + --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ + --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ + --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ + --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ + --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ + --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ + --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ + --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ + --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ + --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ + --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ + --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ + --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ + --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ + --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ + --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d + # via -r requirements.in +click==8.1.3 \ + --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ + --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 + # via black +flake8==4.0.1 \ + --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ + --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d + # via -r requirements.in +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +isort==5.10.1 \ + --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ + --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 + # via -r requirements.in +maturin==0.12.16 \ + --hash=sha256:29a635699db1f4981b891a4ee51ddcae8c410136ed40103232aea3b5f62e8504 \ + --hash=sha256:4b5fe1de8b8e7ba5a9f52002b24a2d8148a23d1260c7bd59291c319ccc5b31f1 \ + --hash=sha256:54ecff17c64cf5c5dc59ff22745517ea56b791995e70008d1dcd1623ce609f78 \ + --hash=sha256:641c8ed8452cb8a288baf953be78d03e27e60189a64f00cc7bcc1731d158e8f6 \ + --hash=sha256:70a042197fdcb726c911146a1c875f65f596de122a01eeb58af10faf3bd3a2c5 \ + --hash=sha256:781abebb255061b5eda0413ecbac22b88a7ab50ecaee607fe5d8e3c55ab48e52 \ + --hash=sha256:83a8f9378c320e981412f8d367e181af22f145d489a7da0a0c3aea86cf23f048 \ + --hash=sha256:8aeb62a328bf4d9439758b59ccf5360a5f3127bbe58bedbcb6c64e888de3eb36 \ + --hash=sha256:8d11e801216f4c91b2ba9da4bad615ffc3638f80a7ba973245a0154dcfdbee64 \ + --hash=sha256:917f77382cdff55d2f290d0f58b7e6f4a7aaa74b58e2b61e4c67b37786d8a965 \ + --hash=sha256:97756ad5ff113478de237b029add91def0a40af0dc5e120c25e1595addd9c151 \ + --hash=sha256:9ce67a844d63d1ba8cdcf903ee2e6e0b21c0b0461b97c8737751d74002ded4c4 \ + --hash=sha256:a026515e39fd48ee5318de57ddc6841a5fbcd5169b3860fb9ac9ea9521cc6027 \ + --hash=sha256:bc4da52ef0c7e975396e7e6fb90da8858c518b4dccb810ceabec9db7ecedde57 \ + --hash=sha256:d63f60dd5dddb165f824b2d8e593dcb31300d832eb6cbc6288dd484e29dfbd89 \ + --hash=sha256:e5e4e3bfcf209ea1a6d20cade2de1ea716e17ea491a7a8b3fee0e45a10aa1e98 \ + --hash=sha256:e7e3fa53c5207c05d4148ecbc0ce7463b7757989dadebcd8ab3a61c67b874157 + # via -r requirements.in +mccabe==0.6.1 \ + --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ + --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f + # via flake8 +mypy==0.950 \ + --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ + --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ + --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ + --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ + --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ + --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ + --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ + --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ + --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ + --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ + --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ + --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ + --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ + --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ + --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ + --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ + --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ + --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ + --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ + --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ + --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ + --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ + --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b + # via -r requirements.in +mypy-extensions==0.4.3 \ + --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ + --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 + # via + # black + # mypy +numpy==1.22.3 \ + --hash=sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676 \ + --hash=sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4 \ + --hash=sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce \ + --hash=sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123 \ + --hash=sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1 \ + --hash=sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e \ + --hash=sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5 \ + --hash=sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d \ + --hash=sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a \ + --hash=sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab \ + --hash=sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75 \ + --hash=sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168 \ + --hash=sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4 \ + --hash=sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f \ + --hash=sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18 \ + --hash=sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62 \ + --hash=sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe \ + --hash=sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430 \ + --hash=sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802 \ + --hash=sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa + # via + # -r requirements.in + # pyarrow +packaging==21.3 \ + --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ + --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 + # via pytest +pathspec==0.9.0 \ + --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ + --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 + # via black +platformdirs==2.5.2 \ + --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ + --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +py==1.11.0 \ + --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ + --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 + # via pytest +pyarrow==8.0.0 \ + --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ + --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ + --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ + --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ + --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ + --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ + --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ + --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ + --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ + --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ + --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ + --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ + --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ + --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ + --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ + --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ + --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ + --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ + --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ + --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ + --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ + --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ + --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ + --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ + --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ + --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ + --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ + --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ + --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ + --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb + # via -r requirements.in +pycodestyle==2.8.0 \ + --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ + --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f + # via flake8 +pyflakes==2.4.0 \ + --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ + --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e + # via flake8 +pyparsing==3.0.9 \ + --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ + --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc + # via packaging +pytest==7.1.2 \ + --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ + --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 + # via -r requirements.in +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via -r requirements.in +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via + # black + # maturin + # mypy + # pytest +typing-extensions==4.2.0 \ + --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ + --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 + # via mypy diff --git a/python/requirements-37.txt b/python/requirements-37.txt new file mode 100644 index 000000000..b1776eed7 --- /dev/null +++ b/python/requirements-37.txt @@ -0,0 +1,268 @@ +# +# This file is autogenerated by pip-compile with python 3.7 +# To update, run: +# +# pip-compile --generate-hashes +# +attrs==21.4.0 \ + --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ + --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd + # via pytest +black==22.3.0 \ + --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ + --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ + --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ + --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ + --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ + --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ + --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ + --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ + --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ + --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ + --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ + --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ + --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ + --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ + --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ + --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ + --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ + --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ + --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ + --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ + --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ + --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ + --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d + # via -r requirements.in +click==8.1.3 \ + --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ + --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 + # via black +flake8==4.0.1 \ + --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ + --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d + # via -r requirements.in +importlib-metadata==4.2.0 ; python_version < "3.8" \ + --hash=sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b \ + --hash=sha256:b7e52a1f8dec14a75ea73e0891f3060099ca1d8e6a462a4dff11c3e119ea1b31 + # via + # -r requirements.in + # click + # flake8 + # pluggy + # pytest +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +isort==5.10.1 \ + --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ + --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 + # via -r requirements.in +maturin==0.12.16 \ + --hash=sha256:29a635699db1f4981b891a4ee51ddcae8c410136ed40103232aea3b5f62e8504 \ + --hash=sha256:4b5fe1de8b8e7ba5a9f52002b24a2d8148a23d1260c7bd59291c319ccc5b31f1 \ + --hash=sha256:54ecff17c64cf5c5dc59ff22745517ea56b791995e70008d1dcd1623ce609f78 \ + --hash=sha256:641c8ed8452cb8a288baf953be78d03e27e60189a64f00cc7bcc1731d158e8f6 \ + --hash=sha256:70a042197fdcb726c911146a1c875f65f596de122a01eeb58af10faf3bd3a2c5 \ + --hash=sha256:781abebb255061b5eda0413ecbac22b88a7ab50ecaee607fe5d8e3c55ab48e52 \ + --hash=sha256:83a8f9378c320e981412f8d367e181af22f145d489a7da0a0c3aea86cf23f048 \ + --hash=sha256:8aeb62a328bf4d9439758b59ccf5360a5f3127bbe58bedbcb6c64e888de3eb36 \ + --hash=sha256:8d11e801216f4c91b2ba9da4bad615ffc3638f80a7ba973245a0154dcfdbee64 \ + --hash=sha256:917f77382cdff55d2f290d0f58b7e6f4a7aaa74b58e2b61e4c67b37786d8a965 \ + --hash=sha256:97756ad5ff113478de237b029add91def0a40af0dc5e120c25e1595addd9c151 \ + --hash=sha256:9ce67a844d63d1ba8cdcf903ee2e6e0b21c0b0461b97c8737751d74002ded4c4 \ + --hash=sha256:a026515e39fd48ee5318de57ddc6841a5fbcd5169b3860fb9ac9ea9521cc6027 \ + --hash=sha256:bc4da52ef0c7e975396e7e6fb90da8858c518b4dccb810ceabec9db7ecedde57 \ + --hash=sha256:d63f60dd5dddb165f824b2d8e593dcb31300d832eb6cbc6288dd484e29dfbd89 \ + --hash=sha256:e5e4e3bfcf209ea1a6d20cade2de1ea716e17ea491a7a8b3fee0e45a10aa1e98 \ + --hash=sha256:e7e3fa53c5207c05d4148ecbc0ce7463b7757989dadebcd8ab3a61c67b874157 + # via -r requirements.in +mccabe==0.6.1 \ + --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ + --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f + # via flake8 +mypy==0.950 \ + --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ + --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ + --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ + --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ + --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ + --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ + --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ + --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ + --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ + --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ + --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ + --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ + --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ + --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ + --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ + --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ + --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ + --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ + --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ + --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ + --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ + --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ + --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b + # via -r requirements.in +mypy-extensions==0.4.3 \ + --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ + --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 + # via + # black + # mypy +numpy==1.21.6 \ + --hash=sha256:1dbe1c91269f880e364526649a52eff93ac30035507ae980d2fed33aaee633ac \ + --hash=sha256:357768c2e4451ac241465157a3e929b265dfac85d9214074985b1786244f2ef3 \ + --hash=sha256:3820724272f9913b597ccd13a467cc492a0da6b05df26ea09e78b171a0bb9da6 \ + --hash=sha256:4391bd07606be175aafd267ef9bea87cf1b8210c787666ce82073b05f202add1 \ + --hash=sha256:4aa48afdce4660b0076a00d80afa54e8a97cd49f457d68a4342d188a09451c1a \ + --hash=sha256:58459d3bad03343ac4b1b42ed14d571b8743dc80ccbf27444f266729df1d6f5b \ + --hash=sha256:5c3c8def4230e1b959671eb959083661b4a0d2e9af93ee339c7dada6759a9470 \ + --hash=sha256:5f30427731561ce75d7048ac254dbe47a2ba576229250fb60f0fb74db96501a1 \ + --hash=sha256:643843bcc1c50526b3a71cd2ee561cf0d8773f062c8cbaf9ffac9fdf573f83ab \ + --hash=sha256:67c261d6c0a9981820c3a149d255a76918278a6b03b6a036800359aba1256d46 \ + --hash=sha256:67f21981ba2f9d7ba9ade60c9e8cbaa8cf8e9ae51673934480e45cf55e953673 \ + --hash=sha256:6aaf96c7f8cebc220cdfc03f1d5a31952f027dda050e5a703a0d1c396075e3e7 \ + --hash=sha256:7c4068a8c44014b2d55f3c3f574c376b2494ca9cc73d2f1bd692382b6dffe3db \ + --hash=sha256:7c7e5fa88d9ff656e067876e4736379cc962d185d5cd808014a8a928d529ef4e \ + --hash=sha256:7f5ae4f304257569ef3b948810816bc87c9146e8c446053539947eedeaa32786 \ + --hash=sha256:82691fda7c3f77c90e62da69ae60b5ac08e87e775b09813559f8901a88266552 \ + --hash=sha256:8737609c3bbdd48e380d463134a35ffad3b22dc56295eff6f79fd85bd0eeeb25 \ + --hash=sha256:9f411b2c3f3d76bba0865b35a425157c5dcf54937f82bbeb3d3c180789dd66a6 \ + --hash=sha256:a6be4cb0ef3b8c9250c19cc122267263093eee7edd4e3fa75395dfda8c17a8e2 \ + --hash=sha256:bcb238c9c96c00d3085b264e5c1a1207672577b93fa666c3b14a45240b14123a \ + --hash=sha256:bf2ec4b75d0e9356edea834d1de42b31fe11f726a81dfb2c2112bc1eaa508fcf \ + --hash=sha256:d136337ae3cc69aa5e447e78d8e1514be8c3ec9b54264e680cf0b4bd9011574f \ + --hash=sha256:d4bf4d43077db55589ffc9009c0ba0a94fa4908b9586d6ccce2e0b164c86303c \ + --hash=sha256:d6a96eef20f639e6a97d23e57dd0c1b1069a7b4fd7027482a4c5c451cd7732f4 \ + --hash=sha256:d9caa9d5e682102453d96a0ee10c7241b72859b01a941a397fd965f23b3e016b \ + --hash=sha256:dd1c8f6bd65d07d3810b90d02eba7997e32abbdf1277a481d698969e921a3be0 \ + --hash=sha256:e31f0bb5928b793169b87e3d1e070f2342b22d5245c755e2b81caa29756246c3 \ + --hash=sha256:ecb55251139706669fdec2ff073c98ef8e9a84473e51e716211b41aa0f18e656 \ + --hash=sha256:ee5ec40fdd06d62fe5d4084bef4fd50fd4bb6bfd2bf519365f569dc470163ab0 \ + --hash=sha256:f17e562de9edf691a42ddb1eb4a5541c20dd3f9e65b09ded2beb0799c0cf29bb \ + --hash=sha256:fdffbfb6832cd0b300995a2b08b8f6fa9f6e856d562800fea9182316d99c4e8e + # via + # -r requirements.in + # pyarrow +packaging==21.3 \ + --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ + --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 + # via pytest +pathspec==0.9.0 \ + --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ + --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 + # via black +platformdirs==2.5.2 \ + --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ + --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +py==1.11.0 \ + --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ + --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 + # via pytest +pyarrow==8.0.0 \ + --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ + --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ + --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ + --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ + --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ + --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ + --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ + --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ + --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ + --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ + --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ + --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ + --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ + --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ + --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ + --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ + --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ + --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ + --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ + --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ + --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ + --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ + --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ + --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ + --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ + --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ + --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ + --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ + --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ + --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb + # via -r requirements.in +pycodestyle==2.8.0 \ + --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ + --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f + # via flake8 +pyflakes==2.4.0 \ + --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ + --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e + # via flake8 +pyparsing==3.0.9 \ + --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ + --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc + # via packaging +pytest==7.1.2 \ + --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ + --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 + # via -r requirements.in +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via -r requirements.in +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via + # black + # maturin + # mypy + # pytest +typed-ast==1.5.3 \ + --hash=sha256:20d5118e494478ef2d3a2702d964dae830aedd7b4d3b626d003eea526be18718 \ + --hash=sha256:27e46cdd01d6c3a0dd8f728b6a938a6751f7bd324817501c15fb056307f918c6 \ + --hash=sha256:27f25232e2dd0edfe1f019d6bfaaf11e86e657d9bdb7b0956db95f560cceb2b3 \ + --hash=sha256:3042bfc9ca118712c9809201f55355479cfcdc17449f9f8db5e744e9625c6805 \ + --hash=sha256:37e5349d1d5de2f4763d534ccb26809d1c24b180a477659a12c4bde9dd677d74 \ + --hash=sha256:4fff9fdcce59dc61ec1b317bdb319f8f4e6b69ebbe61193ae0a60c5f9333dc49 \ + --hash=sha256:542cd732351ba8235f20faa0fc7398946fe1a57f2cdb289e5497e1e7f48cfedb \ + --hash=sha256:5dc2c11ae59003d4a26dda637222d9ae924387f96acae9492df663843aefad55 \ + --hash=sha256:8831479695eadc8b5ffed06fdfb3e424adc37962a75925668deeb503f446c0a3 \ + --hash=sha256:8cdf91b0c466a6c43f36c1964772918a2c04cfa83df8001ff32a89e357f8eb06 \ + --hash=sha256:8e0b8528838ffd426fea8d18bde4c73bcb4167218998cc8b9ee0a0f2bfe678a6 \ + --hash=sha256:8ef1d96ad05a291f5c36895d86d1375c0ee70595b90f6bb5f5fdbee749b146db \ + --hash=sha256:9ad3b48cf2b487be140072fb86feff36801487d4abb7382bb1929aaac80638ea \ + --hash=sha256:9cc9e1457e1feb06b075c8ef8aeb046a28ec351b1958b42c7c31c989c841403a \ + --hash=sha256:9e237e74fd321a55c90eee9bc5d44be976979ad38a29bbd734148295c1ce7617 \ + --hash=sha256:c9f1a27592fac87daa4e3f16538713d705599b0a27dfe25518b80b6b017f0a6d \ + --hash=sha256:d64dabc6336ddc10373922a146fa2256043b3b43e61f28961caec2a5207c56d5 \ + --hash=sha256:e20d196815eeffb3d76b75223e8ffed124e65ee62097e4e73afb5fec6b993e7a \ + --hash=sha256:e34f9b9e61333ecb0f7d79c21c28aa5cd63bec15cb7e1310d7d3da6ce886bc9b \ + --hash=sha256:ed44e81517364cb5ba367e4f68fca01fba42a7a4690d40c07886586ac267d9b9 \ + --hash=sha256:ee852185964744987609b40aee1d2eb81502ae63ee8eef614558f96a56c1902d \ + --hash=sha256:f60d9de0d087454c91b3999a296d0c4558c1666771e3460621875021bf899af9 \ + --hash=sha256:f818c5b81966d4728fec14caa338e30a70dfc3da577984d38f97816c4b3071ec \ + --hash=sha256:fd5df1313915dbd70eaaa88c19030b441742e8b05e6103c631c83b75e0435ccc + # via + # black + # mypy +typing-extensions==4.2.0 \ + --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ + --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 + # via + # black + # importlib-metadata + # mypy +zipp==3.8.0 \ + --hash=sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad \ + --hash=sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099 + # via importlib-metadata diff --git a/python/requirements.in b/python/requirements.in new file mode 100644 index 000000000..7ee6a48dc --- /dev/null +++ b/python/requirements.in @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +black +flake8 +isort +maturin +mypy +numpy +pyarrow +pytest +toml +importlib_metadata; python_version < "3.8" diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 000000000..358578ecb --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,282 @@ +# +# This file is autogenerated by pip-compile with python 3.10 +# To update, run: +# +# pip-compile --generate-hashes +# +attrs==21.2.0 \ + --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ + --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb + # via pytest +black==21.9b0 \ + --hash=sha256:380f1b5da05e5a1429225676655dddb96f5ae8c75bdf91e53d798871b902a115 \ + --hash=sha256:7de4cfc7eb6b710de325712d40125689101d21d25283eed7e9998722cf10eb91 + # via -r requirements.in +click==8.0.3 \ + --hash=sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3 \ + --hash=sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b + # via black +flake8==4.0.1 \ + --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ + --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d + # via -r requirements.in +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +isort==5.9.3 \ + --hash=sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899 \ + --hash=sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2 + # via -r requirements.in +maturin==0.11.5 \ + --hash=sha256:07074778b063a439fdfd5501bd1d1823a216ec5b657d3ecde78fd7f2c4782422 \ + --hash=sha256:1ce666c386ff9c3c2b5d7d3ca4b1f9f675c38d7540ffbda0d5d5bc7d6ddde49a \ + --hash=sha256:20f9c30701c9932ed8026ceaf896fc77ecc76cebd6a182668dbc10ed597f8789 \ + --hash=sha256:3354d030b88c938a33bf407a6c0f79ccdd2cce3e1e3e4a2d0c92dc2e063adc6e \ + --hash=sha256:4191b0b7362b3025096faf126ff15cb682fbff324ac4a6ca18d55bb16e2b759b \ + --hash=sha256:70381be1585cb9fa5c02b83af80ae661aaad959e8aa0fddcfe195b004054bd69 \ + --hash=sha256:7bf96e7586bfdb5b0fadc6d662534b8a41123b33dff084fa383a81ded0ce5334 \ + --hash=sha256:ab2b3ccf66f5e0f9c3904d215835337b1bd305e79e3bf53b65bbc80a5755e01b \ + --hash=sha256:b0ac45879a7d624b47d72b093ae3370270894c19779f42aad7568a92951c5d47 \ + --hash=sha256:c2ded8b4ef9210d627bb966bc67661b7db259535f6062afe1ce5605406b50f3f \ + --hash=sha256:d78f24561a5e02f7d119b348b26e5772ad5698a43ca49e8facb9ce77cf273714 + # via -r requirements.in +mccabe==0.6.1 \ + --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ + --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f + # via flake8 +mypy==0.910 \ + --hash=sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9 \ + --hash=sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a \ + --hash=sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9 \ + --hash=sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e \ + --hash=sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2 \ + --hash=sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212 \ + --hash=sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b \ + --hash=sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885 \ + --hash=sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150 \ + --hash=sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703 \ + --hash=sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072 \ + --hash=sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457 \ + --hash=sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e \ + --hash=sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0 \ + --hash=sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb \ + --hash=sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97 \ + --hash=sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8 \ + --hash=sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811 \ + --hash=sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6 \ + --hash=sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de \ + --hash=sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504 \ + --hash=sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921 \ + --hash=sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d + # via -r requirements.in +mypy-extensions==0.4.3 \ + --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ + --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 + # via + # black + # mypy +numpy==1.21.3 \ + --hash=sha256:043e83bfc274649c82a6f09836943e4a4aebe5e33656271c7dbf9621dd58b8ec \ + --hash=sha256:160ccc1bed3a8371bf0d760971f09bfe80a3e18646620e9ded0ad159d9749baa \ + --hash=sha256:188031f833bbb623637e66006cf75e933e00e7231f67e2b45cf8189612bb5dc3 \ + --hash=sha256:28f15209fb535dd4c504a7762d3bc440779b0e37d50ed810ced209e5cea60d96 \ + --hash=sha256:29fb3dcd0468b7715f8ce2c0c2d9bbbaf5ae686334951343a41bd8d155c6ea27 \ + --hash=sha256:2a6ee9620061b2a722749b391c0d80a0e2ae97290f1b32e28d5a362e21941ee4 \ + --hash=sha256:300321e3985c968e3ae7fbda187237b225f3ffe6528395a5b7a5407f73cf093e \ + --hash=sha256:32437f0b275c1d09d9c3add782516413e98cd7c09e6baf4715cbce781fc29912 \ + --hash=sha256:3c09418a14471c7ae69ba682e2428cae5b4420a766659605566c0fa6987f6b7e \ + --hash=sha256:49c6249260890e05b8111ebfc391ed58b3cb4b33e63197b2ec7f776e45330721 \ + --hash=sha256:4cc9b512e9fb590797474f58b7f6d1f1b654b3a94f4fa8558b48ca8b3cfc97cf \ + --hash=sha256:508b0b513fa1266875524ba8a9ecc27b02ad771fe1704a16314dc1a816a68737 \ + --hash=sha256:50cd26b0cf6664cb3b3dd161ba0a09c9c1343db064e7c69f9f8b551f5104d654 \ + --hash=sha256:5c4193f70f8069550a1788bd0cd3268ab7d3a2b70583dfe3b2e7f421e9aace06 \ + --hash=sha256:5dfe9d6a4c39b8b6edd7990091fea4f852888e41919d0e6722fe78dd421db0eb \ + --hash=sha256:63571bb7897a584ca3249c86dd01c10bcb5fe4296e3568b2e9c1a55356b6410e \ + --hash=sha256:75621882d2230ab77fb6a03d4cbccd2038511491076e7964ef87306623aa5272 \ + --hash=sha256:75eb7cadc8da49302f5b659d40ba4f6d94d5045fbd9569c9d058e77b0514c9e4 \ + --hash=sha256:88a5d6b268e9ad18f3533e184744acdaa2e913b13148160b1152300c949bbb5f \ + --hash=sha256:8a10968963640e75cc0193e1847616ab4c718e83b6938ae74dea44953950f6b7 \ + --hash=sha256:90bec6a86b348b4559b6482e2b684db4a9a7eed1fa054b86115a48d58fbbf62a \ + --hash=sha256:98339aa9911853f131de11010f6dd94c8cec254d3d1f7261528c3b3e3219f139 \ + --hash=sha256:a99a6b067e5190ac6d12005a4d85aa6227c5606fa93211f86b1dafb16233e57d \ + --hash=sha256:bffa2eee3b87376cc6b31eee36d05349571c236d1de1175b804b348dc0941e3f \ + --hash=sha256:c6c2d535a7beb1f8790aaa98fd089ceab2e3dd7ca48aca0af7dc60e6ef93ffe1 \ + --hash=sha256:cc14e7519fab2a4ed87d31f99c31a3796e4e1fe63a86ebdd1c5a1ea78ebd5896 \ + --hash=sha256:dd0482f3fc547f1b1b5d6a8b8e08f63fdc250c58ce688dedd8851e6e26cff0f3 \ + --hash=sha256:dde972a1e11bb7b702ed0e447953e7617723760f420decb97305e66fb4afc54f \ + --hash=sha256:e54af82d68ef8255535a6cdb353f55d6b8cf418a83e2be3569243787a4f4866f \ + --hash=sha256:e606e6316911471c8d9b4618e082635cfe98876007556e89ce03d52ff5e8fcf0 \ + --hash=sha256:f41b018f126aac18583956c54544db437f25c7ee4794bcb23eb38bef8e5e192a \ + --hash=sha256:f8f4625536926a155b80ad2bbff44f8cc59e9f2ad14cdda7acf4c135b4dc8ff2 \ + --hash=sha256:fe52dbe47d9deb69b05084abd4b0df7abb39a3c51957c09f635520abd49b29dd + # via + # -r requirements.in + # pandas + # pyarrow +packaging==21.0 \ + --hash=sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7 \ + --hash=sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14 + # via pytest +pandas==1.3.4 \ + --hash=sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd \ + --hash=sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9 \ + --hash=sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f \ + --hash=sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab \ + --hash=sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143 \ + --hash=sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a \ + --hash=sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4 \ + --hash=sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f \ + --hash=sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b \ + --hash=sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc \ + --hash=sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7 \ + --hash=sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd \ + --hash=sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae \ + --hash=sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c \ + --hash=sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5 \ + --hash=sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9 \ + --hash=sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63 \ + --hash=sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a \ + --hash=sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b \ + --hash=sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5 \ + --hash=sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec + # via -r requirements.in +pathspec==0.9.0 \ + --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ + --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 + # via black +platformdirs==2.4.0 \ + --hash=sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2 \ + --hash=sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +py==1.10.0 \ + --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ + --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a + # via pytest +pyarrow==6.0.0 \ + --hash=sha256:004185e0babc6f3c3fba6ba4f106e406a0113d0f82bb9ad9a8571a1978c45d04 \ + --hash=sha256:0204e80777ab8f4e9abd3a765a8ec07ed1e3c4630bacda50d2ce212ef0f3826f \ + --hash=sha256:072c1a0fca4509eefd7d018b78542fb7e5c63aaf5698f1c0a6e45628ae17ba44 \ + --hash=sha256:15dc0d673d3f865ca63c877bd7a2eced70b0a08969fb733a28247134b8a1f18b \ + --hash=sha256:1c38263ea438a1666b13372e7565450cfeec32dbcd1c2595749476a58465eaec \ + --hash=sha256:281ce5fa03621d786a9beb514abb09846db7f0221b50eabf543caa24037eaacd \ + --hash=sha256:2d2c681659396c745e4f1988d5dd41dcc3ad557bb8d4a8c2e44030edafc08a91 \ + --hash=sha256:376c4b5f248ae63df21fe15c194e9013753164be2d38f4b3fb8bde63ac5a1958 \ + --hash=sha256:465f87fa0be0b2928b2beeba22b5813a0203fb05d90fd8563eea48e08ecc030e \ + --hash=sha256:477c746ef42c039348a288584800e299456c80c5691401bb9b19aa9c02a427b7 \ + --hash=sha256:5144bd9db2920c7cb566c96462d62443cc239104f94771d110f74393f2fb42a2 \ + --hash=sha256:5408fa8d623e66a0445f3fb0e4027fd219bf99bfb57422d543d7b7876e2c5b55 \ + --hash=sha256:5be62679201c441356d3f2a739895dcc8d4d299f2a6eabcd2163bfb6a898abba \ + --hash=sha256:5c666bc6a1cebf01206e2dc1ab05f25f39f35d3a499e0ef5cd635225e07306ca \ + --hash=sha256:6163d82cca7541774b00503c295fe86a1722820eddb958b57f091bb6f5b0a6db \ + --hash=sha256:6a1d9a2f4ee812ed0bd4182cabef99ea914ac297274f0de086f2488093d284ef \ + --hash=sha256:7a683f71b848eb6310b4ec48c0def55dac839e9994c1ac874c9b2d3d5625def1 \ + --hash=sha256:82fe80309e01acf29e3943a1f6d3c98ec109fe1d356bc1ac37d639bcaadcf684 \ + --hash=sha256:8c23f8cdecd3d9e49f9b0f9a651ae5549d1d32fd4901fb1bdc2d327edfba844f \ + --hash=sha256:8d41dfb09ba9236cca6245f33088eb42f3c54023da281139241e0f9f3b4b754e \ + --hash=sha256:a19e58dfb04e451cd8b7bdec3ac8848373b95dfc53492c9a69789aa9074a3c1b \ + --hash=sha256:a50d2f77b86af38ceabf45617208b9105d20e7a5eebc584e7c8c0acededd82ce \ + --hash=sha256:a5bed4f948c032c40597302e9bdfa65f62295240306976ecbe43a54924c6f94f \ + --hash=sha256:ac941a147d14993987cc8b605b721735a34b3e54d167302501fb4db1ad7382c7 \ + --hash=sha256:b86d175262db1eb46afdceb36d459409eb6f8e532d3dec162f8bf572c7f57623 \ + --hash=sha256:bf3400780c4d3c9cb43b1e8a1aaf2e1b7199a0572d0a645529d2784e4d0d8497 \ + --hash=sha256:c7a6e7e0bf8779e9c3428ced85507541f3da9a0675e2f4781d4eb2c7042cbf81 \ + --hash=sha256:cc1d4a70efd583befe92d4ea6f74ed2e0aa31ccdde767cd5cae8e77c65a1c2d4 \ + --hash=sha256:d046dc78a9337baa6415be915c5a16222505233e238a1017f368243c89817eea \ + --hash=sha256:da7860688c33ca88ac05f1a487d32d96d9caa091412496c35f3d1d832145675a \ + --hash=sha256:ddf2e6e3b321adaaf716f2d5af8e92d205a9671e0cb7c0779710a567fd1dd580 \ + --hash=sha256:e81508239a71943759cee272ce625ae208092dd36ef2c6713fccee30bbcf52bb \ + --hash=sha256:ea64a48a85c631eb2a0ea13ccdec5143c85b5897836b16331ee4289d27a57247 \ + --hash=sha256:ed0be080cf595ea15ff1c9ff4097bbf1fcc4b50847d98c0a3c0412fbc6ede7e9 \ + --hash=sha256:fb701ec4a94b92102606d4e88f0b8eba34f09a5ad8e014eaa4af76f42b7f62ae \ + --hash=sha256:fbda7595f24a639bcef3419ecfac17216efacb09f7b0f1b4c4c97f900d65ca0e + # via -r requirements.in +pycodestyle==2.8.0 \ + --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ + --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f + # via flake8 +pyflakes==2.4.0 \ + --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ + --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e + # via flake8 +pyparsing==3.0.3 \ + --hash=sha256:9e3511118010f112a4b4b435ae50e1eaa610cda191acb9e421d60cf5fde83455 \ + --hash=sha256:f8d3fe9fc404576c5164f0f0c4e382c96b85265e023c409c43d48f65da9d60d0 + # via packaging +pytest==6.2.5 \ + --hash=sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89 \ + --hash=sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134 + # via -r requirements.in +python-dateutil==2.8.2 \ + --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ + --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 + # via pandas +pytz==2021.3 \ + --hash=sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c \ + --hash=sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326 + # via pandas +regex==2021.10.23 \ + --hash=sha256:0c186691a7995ef1db61205e00545bf161fb7b59cdb8c1201c89b333141c438a \ + --hash=sha256:0dcc0e71118be8c69252c207630faf13ca5e1b8583d57012aae191e7d6d28b84 \ + --hash=sha256:0f7552429dd39f70057ac5d0e897e5bfe211629652399a21671e53f2a9693a4e \ + --hash=sha256:129472cd06062fb13e7b4670a102951a3e655e9b91634432cfbdb7810af9d710 \ + --hash=sha256:13ec99df95003f56edcd307db44f06fbeb708c4ccdcf940478067dd62353181e \ + --hash=sha256:1f2b59c28afc53973d22e7bc18428721ee8ca6079becf1b36571c42627321c65 \ + --hash=sha256:2b20f544cbbeffe171911f6ce90388ad36fe3fad26b7c7a35d4762817e9ea69c \ + --hash=sha256:2fb698037c35109d3c2e30f2beb499e5ebae6e4bb8ff2e60c50b9a805a716f79 \ + --hash=sha256:34d870f9f27f2161709054d73646fc9aca49480617a65533fc2b4611c518e455 \ + --hash=sha256:391703a2abf8013d95bae39145d26b4e21531ab82e22f26cd3a181ee2644c234 \ + --hash=sha256:450dc27483548214314640c89a0f275dbc557968ed088da40bde7ef8fb52829e \ + --hash=sha256:45b65d6a275a478ac2cbd7fdbf7cc93c1982d613de4574b56fd6972ceadb8395 \ + --hash=sha256:5095a411c8479e715784a0c9236568ae72509450ee2226b649083730f3fadfc6 \ + --hash=sha256:530fc2bbb3dc1ebb17f70f7b234f90a1dd43b1b489ea38cea7be95fb21cdb5c7 \ + --hash=sha256:56f0c81c44638dfd0e2367df1a331b4ddf2e771366c4b9c5d9a473de75e3e1c7 \ + --hash=sha256:5e9c9e0ce92f27cef79e28e877c6b6988c48b16942258f3bc55d39b5f911df4f \ + --hash=sha256:6d7722136c6ed75caf84e1788df36397efdc5dbadab95e59c2bba82d4d808a4c \ + --hash=sha256:74d071dbe4b53c602edd87a7476ab23015a991374ddb228d941929ad7c8c922e \ + --hash=sha256:7b568809dca44cb75c8ebb260844ea98252c8c88396f9d203f5094e50a70355f \ + --hash=sha256:80bb5d2e92b2258188e7dcae5b188c7bf868eafdf800ea6edd0fbfc029984a88 \ + --hash=sha256:8d1cdcda6bd16268316d5db1038965acf948f2a6f43acc2e0b1641ceab443623 \ + --hash=sha256:9f665677e46c5a4d288ece12fdedf4f4204a422bb28ff05f0e6b08b7447796d1 \ + --hash=sha256:a30513828180264294953cecd942202dfda64e85195ae36c265daf4052af0464 \ + --hash=sha256:a7a986c45d1099a5de766a15de7bee3840b1e0e1a344430926af08e5297cf666 \ + --hash=sha256:a940ca7e7189d23da2bfbb38973832813eab6bd83f3bf89a977668c2f813deae \ + --hash=sha256:ab7c5684ff3538b67df3f93d66bd3369b749087871ae3786e70ef39e601345b0 \ + --hash=sha256:be04739a27be55631069b348dda0c81d8ea9822b5da10b8019b789e42d1fe452 \ + --hash=sha256:c0938ddd60cc04e8f1faf7a14a166ac939aac703745bfcd8e8f20322a7373019 \ + --hash=sha256:cb46b542133999580ffb691baf67410306833ee1e4f58ed06b6a7aaf4e046952 \ + --hash=sha256:d134757a37d8640f3c0abb41f5e68b7cf66c644f54ef1cb0573b7ea1c63e1509 \ + --hash=sha256:de557502c3bec8e634246588a94e82f1ee1b9dfcfdc453267c4fb652ff531570 \ + --hash=sha256:ded0c4a3eee56b57fcb2315e40812b173cafe79d2f992d50015f4387445737fa \ + --hash=sha256:e1dae12321b31059a1a72aaa0e6ba30156fe7e633355e445451e4021b8e122b6 \ + --hash=sha256:eb672217f7bd640411cfc69756ce721d00ae600814708d35c930930f18e8029f \ + --hash=sha256:ee684f139c91e69fe09b8e83d18b4d63bf87d9440c1eb2eeb52ee851883b1b29 \ + --hash=sha256:f3f9a91d3cc5e5b0ddf1043c0ae5fa4852f18a1c0050318baf5fc7930ecc1f9c + # via black +six==1.16.0 \ + --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ + --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + # via python-dateutil +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via + # -r requirements.in + # maturin + # mypy + # pytest +tomli==1.2.2 \ + --hash=sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee \ + --hash=sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade + # via black +typing-extensions==3.10.0.2 \ + --hash=sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e \ + --hash=sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7 \ + --hash=sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34 + # via + # black + # mypy diff --git a/python/rust-toolchain b/python/rust-toolchain new file mode 100644 index 000000000..12b27c03a --- /dev/null +++ b/python/rust-toolchain @@ -0,0 +1 @@ +nightly-2021-10-23 diff --git a/python/src/ballista_context.rs b/python/src/ballista_context.rs new file mode 100644 index 000000000..e3bf21923 --- /dev/null +++ b/python/src/ballista_context.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use std::path::PathBuf; + +use crate::errors::BallistaError; +use crate::utils::wait_for_future; + +use crate::dataframe::PyDataFrame; +use ballista::prelude::*; +use datafusion::arrow::datatypes::Schema; +use datafusion::prelude::{AvroReadOptions, CsvReadOptions, ParquetReadOptions}; + +/// `PyBallistaContext` is able to plan and execute DataFusion plans. +/// It has a powerful optimizer, a physical planner for local execution, and a +/// multi-threaded execution engine to perform the execution. +#[pyclass(name = "BallistaContext", module = "ballista", subclass, unsendable)] +pub(crate) struct PyBallistaContext { + ctx: BallistaContext, +} + +#[pymethods] +impl PyBallistaContext { + #[new] + #[args(port = "50050")] + fn new(py: Python, host: &str, port: u16) -> PyResult { + let config = BallistaConfig::builder() + .set("ballista.shuffle.partitions", "4") + .build() + .map_err(BallistaError::from)?; + + let result = BallistaContext::remote(host, port, &config); + let ctx = wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(PyBallistaContext { ctx }) + } + + /// Returns a PyDataFrame whose plan corresponds to the SQL statement. + fn sql(&mut self, query: &str, py: Python) -> PyResult { + let ctx = &self.ctx; + + let result = ctx.sql(query); + let df = wait_for_future(py, result).map_err(BallistaError::from)?; + Ok(PyDataFrame::new(df)) + } + + #[allow(clippy::too_many_arguments)] + #[args( + schema = "None", + has_header = "true", + delimiter = "\",\"", + schema_infer_max_records = "1000", + file_extension = "\".csv\"" + )] + fn register_csv( + &mut self, + name: &str, + path: PathBuf, + schema: Option, + has_header: bool, + delimiter: &str, + schema_infer_max_records: usize, + file_extension: &str, + py: Python, + ) -> PyResult<()> { + let ctx = &self.ctx; + + let path = path + .to_str() + .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; + + let delimiter = delimiter.as_bytes(); + if delimiter.len() != 1 { + return Err(PyValueError::new_err( + "Delimiter must be a single character", + )); + } + + let mut options = CsvReadOptions::new() + .has_header(has_header) + .delimiter(delimiter[0]) + .schema_infer_max_records(schema_infer_max_records) + .file_extension(file_extension); + options.schema = schema.as_ref(); + + let result = ctx.register_csv(name, path, options); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } + + fn register_avro(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let ctx = &self.ctx; + + let result = ctx.register_avro(name, path, AvroReadOptions::default()); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } + + fn register_parquet(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let ctx = &self.ctx; + + let result = ctx.register_parquet(name, path, ParquetReadOptions::default()); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } +} diff --git a/python/src/catalog.rs b/python/src/catalog.rs new file mode 100644 index 000000000..f93c795ec --- /dev/null +++ b/python/src/catalog.rs @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashSet; +use std::sync::Arc; + +use pyo3::exceptions::PyKeyError; +use pyo3::prelude::*; + +use datafusion::{ + arrow::pyarrow::PyArrowConvert, + catalog::{catalog::CatalogProvider, schema::SchemaProvider}, + datasource::{TableProvider, TableType}, +}; + +#[pyclass(name = "Catalog", module = "datafusion", subclass)] +pub(crate) struct PyCatalog { + catalog: Arc, +} + +#[pyclass(name = "Database", module = "datafusion", subclass)] +pub(crate) struct PyDatabase { + database: Arc, +} + +#[pyclass(name = "Table", module = "datafusion", subclass)] +pub(crate) struct PyTable { + table: Arc, +} + +impl PyCatalog { + pub fn new(catalog: Arc) -> Self { + Self { catalog } + } +} + +impl PyDatabase { + pub fn new(database: Arc) -> Self { + Self { database } + } +} + +impl PyTable { + pub fn new(table: Arc) -> Self { + Self { table } + } +} + +#[pymethods] +impl PyCatalog { + fn names(&self) -> Vec { + self.catalog.schema_names() + } + + #[args(name = "\"public\"")] + fn database(&self, name: &str) -> PyResult { + match self.catalog.schema(name) { + Some(database) => Ok(PyDatabase::new(database)), + None => Err(PyKeyError::new_err(format!( + "Database with name {} doesn't exist.", + name + ))), + } + } +} + +#[pymethods] +impl PyDatabase { + fn names(&self) -> HashSet { + self.database.table_names().into_iter().collect() + } + + fn table(&self, name: &str) -> PyResult { + match self.database.table(name) { + Some(table) => Ok(PyTable::new(table)), + None => Err(PyKeyError::new_err(format!( + "Table with name {} doesn't exist.", + name + ))), + } + } + + // register_table + // deregister_table +} + +#[pymethods] +impl PyTable { + /// Get a reference to the schema for this table + #[getter] + fn schema(&self, py: Python) -> PyResult { + self.table.schema().to_pyarrow(py) + } + + /// Get the type of this table for metadata/catalog purposes. + #[getter] + fn kind(&self) -> &str { + match self.table.table_type() { + TableType::Base => "physical", + TableType::View => "view", + TableType::Temporary => "temporary", + } + } + + // fn scan + // fn statistics + // fn has_exact_statistics + // fn supports_filter_pushdown +} diff --git a/python/src/context.rs b/python/src/context.rs new file mode 100644 index 000000000..7f386bac3 --- /dev/null +++ b/python/src/context.rs @@ -0,0 +1,173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::path::PathBuf; +use std::{collections::HashSet, sync::Arc}; + +use uuid::Uuid; + +use pyo3::exceptions::{PyKeyError, PyValueError}; +use pyo3::prelude::*; + +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::datasource::MemTable; +use datafusion::execution::context::ExecutionContext; +use datafusion::prelude::CsvReadOptions; + +use crate::catalog::PyCatalog; +use crate::dataframe::PyDataFrame; +use crate::errors::DataFusionError; +use crate::udf::PyScalarUDF; +use crate::utils::wait_for_future; + +/// `PyExecutionContext` is able to plan and execute DataFusion plans. +/// It has a powerful optimizer, a physical planner for local execution, and a +/// multi-threaded execution engine to perform the execution. +#[pyclass(name = "ExecutionContext", module = "datafusion", subclass, unsendable)] +pub(crate) struct PyExecutionContext { + ctx: ExecutionContext, +} + +#[pymethods] +impl PyExecutionContext { + // TODO(kszucs): should expose the configuration options as keyword arguments + #[new] + fn new() -> Self { + PyExecutionContext { + ctx: ExecutionContext::new(), + } + } + + /// Returns a PyDataFrame whose plan corresponds to the SQL statement. + fn sql(&mut self, query: &str, py: Python) -> PyResult { + let result = self.ctx.sql(query); + let df = wait_for_future(py, result).map_err(DataFusionError::from)?; + Ok(PyDataFrame::new(df)) + } + + fn create_dataframe( + &mut self, + partitions: Vec>, + ) -> PyResult { + let table = MemTable::try_new(partitions[0][0].schema(), partitions) + .map_err(DataFusionError::from)?; + + // generate a random (unique) name for this table + // table name cannot start with numeric digit + let name = "c".to_owned() + + &Uuid::new_v4() + .to_simple() + .encode_lower(&mut Uuid::encode_buffer()); + + self.ctx + .register_table(&*name, Arc::new(table)) + .map_err(DataFusionError::from)?; + let table = self.ctx.table(&*name).map_err(DataFusionError::from)?; + + let df = PyDataFrame::new(table); + Ok(df) + } + + fn register_record_batches( + &mut self, + name: &str, + partitions: Vec>, + ) -> PyResult<()> { + let schema = partitions[0][0].schema(); + let table = MemTable::try_new(schema, partitions)?; + self.ctx + .register_table(name, Arc::new(table)) + .map_err(DataFusionError::from)?; + Ok(()) + } + + fn register_parquet(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let result = self.ctx.register_parquet(name, path); + wait_for_future(py, result).map_err(DataFusionError::from)?; + Ok(()) + } + + #[args( + schema = "None", + has_header = "true", + delimiter = "\",\"", + schema_infer_max_records = "1000", + file_extension = "\".csv\"" + )] + fn register_csv( + &mut self, + name: &str, + path: PathBuf, + schema: Option, + has_header: bool, + delimiter: &str, + schema_infer_max_records: usize, + file_extension: &str, + py: Python, + ) -> PyResult<()> { + let path = path + .to_str() + .ok_or(PyValueError::new_err("Unable to convert path to a string"))?; + let delimiter = delimiter.as_bytes(); + if delimiter.len() != 1 { + return Err(PyValueError::new_err( + "Delimiter must be a single character", + )); + } + + let mut options = CsvReadOptions::new() + .has_header(has_header) + .delimiter(delimiter[0]) + .schema_infer_max_records(schema_infer_max_records) + .file_extension(file_extension); + options.schema = schema.as_ref(); + + let result = self.ctx.register_csv(name, path, options); + wait_for_future(py, result).map_err(DataFusionError::from)?; + + Ok(()) + } + + fn register_udf(&mut self, udf: PyScalarUDF) -> PyResult<()> { + self.ctx.register_udf(udf.function); + Ok(()) + } + + #[args(name = "\"datafusion\"")] + fn catalog(&self, name: &str) -> PyResult { + match self.ctx.catalog(name) { + Some(catalog) => Ok(PyCatalog::new(catalog)), + None => Err(PyKeyError::new_err(format!( + "Catalog with name {} doesn't exist.", + &name + ))), + } + } + + fn tables(&self) -> HashSet { + self.ctx.tables().unwrap() + } + + fn table(&self, name: &str) -> PyResult { + Ok(PyDataFrame::new(self.ctx.table(name)?)) + } + + fn empty_table(&self) -> PyResult { + Ok(PyDataFrame::new(self.ctx.read_empty()?)) + } +} diff --git a/python/src/dataframe.rs b/python/src/dataframe.rs new file mode 100644 index 000000000..cff1733fb --- /dev/null +++ b/python/src/dataframe.rs @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::errors::DataFusionError; +use crate::expression::PyExpr; +use crate::utils::wait_for_future; +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::pyarrow::PyArrowConvert; +use datafusion::arrow::util::pretty; +use datafusion::dataframe::DataFrame; +use datafusion::logical_plan::JoinType; +use pyo3::exceptions::PyTypeError; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use std::sync::Arc; + +/// A PyDataFrame is a representation of a logical plan and an API to compose statements. +/// Use it to build a plan and `.collect()` to execute the plan and collect the result. +/// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. +#[pyclass(name = "DataFrame", module = "ballista", subclass)] +#[derive(Clone)] +pub(crate) struct PyDataFrame { + df: Arc, +} + +impl PyDataFrame { + /// creates a new PyDataFrame + pub fn new(df: Arc) -> Self { + Self { df } + } +} + +#[pymethods] +impl PyDataFrame { + fn __getitem__(&self, key: PyObject) -> PyResult { + Python::with_gil(|py| { + if let Ok(key) = key.extract::<&str>(py) { + self.select_columns(vec![key]) + } else if let Ok(tuple) = key.extract::<&PyTuple>(py) { + let keys = tuple + .iter() + .map(|item| item.extract::<&str>()) + .collect::>>()?; + self.select_columns(keys) + } else if let Ok(keys) = key.extract::>(py) { + self.select_columns(keys) + } else { + let message = "DataFrame can only be indexed by string index or indices"; + Err(PyTypeError::new_err(message)) + } + }) + } + + /// Returns the schema from the logical plan + fn schema(&self) -> Schema { + self.df.schema().into() + } + + #[args(args = "*")] + fn select_columns(&self, args: Vec<&str>) -> PyResult { + let df = self.df.select_columns(&args)?; + Ok(Self::new(df)) + } + + #[args(args = "*")] + fn select(&self, args: Vec) -> PyResult { + let expr = args.into_iter().map(|e| e.into()).collect(); + let df = self.df.select(expr)?; + Ok(Self::new(df)) + } + + fn filter(&self, predicate: PyExpr) -> PyResult { + let df = self.df.filter(predicate.into())?; + Ok(Self::new(df)) + } + + fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyResult { + let group_by = group_by.into_iter().map(|e| e.into()).collect(); + let aggs = aggs.into_iter().map(|e| e.into()).collect(); + let df = self.df.aggregate(group_by, aggs)?; + Ok(Self::new(df)) + } + + #[args(exprs = "*")] + fn sort(&self, exprs: Vec) -> PyResult { + let exprs = exprs.into_iter().map(|e| e.into()).collect(); + let df = self.df.sort(exprs)?; + Ok(Self::new(df)) + } + + fn limit(&self, count: usize) -> PyResult { + let df = self.df.limit(count)?; + Ok(Self::new(df)) + } + + /// Executes the plan, returning a list of `RecordBatch`es. + /// Unless some order is specified in the plan, there is no + /// guarantee of the order of the result. + fn collect(&self, py: Python) -> PyResult> { + let batches = wait_for_future(py, self.df.collect())?; + // cannot use PyResult> return type due to + // https://github.com/PyO3/pyo3/issues/1813 + batches.into_iter().map(|rb| rb.to_pyarrow(py)).collect() + } + + /// Print the result, 20 lines by default + #[args(num = "20")] + fn show(&self, py: Python, num: usize) -> PyResult<()> { + let df = self.df.limit(num)?; + let batches = wait_for_future(py, df.collect())?; + Ok(pretty::print_batches(&batches)?) + } + + fn join( + &self, + right: PyDataFrame, + join_keys: (Vec<&str>, Vec<&str>), + how: &str, + ) -> PyResult { + let join_type = match how { + "inner" => JoinType::Inner, + "left" => JoinType::Left, + "right" => JoinType::Right, + "full" => JoinType::Full, + "semi" => JoinType::Semi, + "anti" => JoinType::Anti, + how => { + return Err(DataFusionError::Common(format!( + "The join type {} does not exist or is not implemented", + how + )) + .into()) + } + }; + + let df = self + .df + .join(right.df, join_type, &join_keys.0, &join_keys.1, None)?; + Ok(Self::new(df)) + } + + /// Print the query plan + #[args(verbose = false, analyze = false)] + fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { + let df = self.df.explain(verbose, analyze)?; + let batches = wait_for_future(py, df.collect())?; + Ok(pretty::print_batches(&batches)?) + } +} diff --git a/python/src/errors.rs b/python/src/errors.rs new file mode 100644 index 000000000..7aecbed90 --- /dev/null +++ b/python/src/errors.rs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use core::fmt; + +use ballista::prelude::BallistaError as InnerBallistaError; +use datafusion::arrow::error::ArrowError; +use datafusion::error::DataFusionError as InnerDataFusionError; +use pyo3::{exceptions::PyException, PyErr}; + +#[derive(Debug)] +pub enum DataFusionError { + ExecutionError(InnerDataFusionError), + ArrowError(ArrowError), + Common(String), +} + +impl fmt::Display for DataFusionError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {:?}", e), + DataFusionError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), + DataFusionError::Common(e) => write!(f, "{}", e), + } + } +} + +impl From for DataFusionError { + fn from(err: ArrowError) -> DataFusionError { + DataFusionError::ArrowError(err) + } +} + +impl From for DataFusionError { + fn from(err: InnerDataFusionError) -> DataFusionError { + DataFusionError::ExecutionError(err) + } +} + +impl From for PyErr { + fn from(err: DataFusionError) -> PyErr { + PyException::new_err(err.to_string()) + } +} + +impl From for BallistaError { + fn from(err: InnerDataFusionError) -> BallistaError { + BallistaError::DataFusionExecutionError(err) + } +} + +impl From for BallistaError { + fn from(err: InnerBallistaError) -> BallistaError { + BallistaError::ExecutionError(err) + } +} + +impl fmt::Display for BallistaError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + BallistaError::DataFusionExecutionError(e) => write!(f, "Datafusion error: {:?}", e), + BallistaError::ExecutionError(e) => write!(f, "Ballista error: {:?}", e), + BallistaError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), + BallistaError::Common(e) => write!(f, "{}", e), + } + } +} + +#[derive(Debug)] +pub enum BallistaError { + DataFusionExecutionError(InnerDataFusionError), + ExecutionError(InnerBallistaError), + ArrowError(ArrowError), + Common(String), +} + +impl From for PyErr { + fn from(err: BallistaError) -> PyErr { + PyException::new_err(err.to_string()) + } +} diff --git a/python/src/expression.rs b/python/src/expression.rs new file mode 100644 index 000000000..b3275ccf0 --- /dev/null +++ b/python/src/expression.rs @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::{basic::CompareOp, prelude::*}; +use std::convert::{From, Into}; + +use datafusion::arrow::datatypes::DataType; +use datafusion::logical_plan::{col, lit, Expr}; + +use datafusion::scalar::ScalarValue; + +/// An PyExpr that can be used on a DataFrame +#[pyclass(name = "Expression", module = "ballista", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct PyExpr { + pub(crate) expr: Expr, +} + +impl From for Expr { + fn from(expr: PyExpr) -> Expr { + expr.expr + } +} + +impl From for PyExpr { + fn from(expr: Expr) -> PyExpr { + PyExpr { expr } + } +} + +#[pymethods] +impl PyExpr { + fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { + let expr = match op { + CompareOp::Lt => self.expr.clone().lt(other.expr), + CompareOp::Le => self.expr.clone().lt_eq(other.expr), + CompareOp::Eq => self.expr.clone().eq(other.expr), + CompareOp::Ne => self.expr.clone().not_eq(other.expr), + CompareOp::Gt => self.expr.clone().gt(other.expr), + CompareOp::Ge => self.expr.clone().gt_eq(other.expr), + }; + expr.into() + } + + fn __str__(&self) -> PyResult { + Ok(format!("{}", self.expr)) + } + + fn __add__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() + rhs.expr).into()) + } + + fn __sub__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() - rhs.expr).into()) + } + + fn __truediv__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() / rhs.expr).into()) + } + + fn __mul__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() * rhs.expr).into()) + } + + fn __mod__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().modulus(rhs.expr).into()) + } + + fn __and__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().and(rhs.expr).into()) + } + + fn __or__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().or(rhs.expr).into()) + } + + fn __invert__(&self) -> PyResult { + Ok(self.expr.clone().not().into()) + } + + fn __getitem__(&self, key: &str) -> PyResult { + Ok(Expr::GetIndexedField { + expr: Box::new(self.expr.clone()), + key: ScalarValue::Utf8(Some(key.to_string())), + } + .into()) + } + + #[staticmethod] + pub fn literal(value: ScalarValue) -> PyExpr { + lit(value).into() + } + + #[staticmethod] + pub fn column(value: &str) -> PyExpr { + col(value).into() + } + + /// assign a name to the PyExpr + pub fn alias(&self, name: &str) -> PyExpr { + self.expr.clone().alias(name).into() + } + + /// Create a sort PyExpr from an existing PyExpr. + #[args(ascending = true, nulls_first = true)] + pub fn sort(&self, ascending: bool, nulls_first: bool) -> PyExpr { + self.expr.clone().sort(ascending, nulls_first).into() + } + + pub fn is_null(&self) -> PyExpr { + self.expr.clone().is_null().into() + } + + pub fn cast(&self, to: DataType) -> PyExpr { + // self.expr.cast_to() requires DFSchema to validate that the cast + // is supported, omit that for now + let expr = Expr::Cast { + expr: Box::new(self.expr.clone()), + data_type: to, + }; + expr.into() + } +} diff --git a/python/src/functions.rs b/python/src/functions.rs new file mode 100644 index 000000000..1a2c4ed73 --- /dev/null +++ b/python/src/functions.rs @@ -0,0 +1,338 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::{prelude::*, wrap_pyfunction}; + +use datafusion::logical_plan; +use datafusion::physical_plan::aggregates::AggregateFunction; +use datafusion_expr::BuiltinScalarFunction; + +use crate::errors; +use crate::expression::PyExpr; + +#[pyfunction] +fn array(value: Vec) -> PyExpr { + PyExpr { + expr: logical_plan::array(value.into_iter().map(|x| x.expr).collect::>()), + } +} + +#[pyfunction] +fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { + logical_plan::in_list( + expr.expr, + value.into_iter().map(|x| x.expr).collect::>(), + negated, + ) + .into() +} + +/// Current date and time +#[pyfunction] +fn now() -> PyExpr { + PyExpr { + // here lit(0) is a stub for conform to arity + expr: logical_plan::now(logical_plan::lit(0)), + } +} + +/// Returns a random value in the range 0.0 <= x < 1.0 +#[pyfunction] +fn random() -> PyExpr { + PyExpr { + expr: logical_plan::random(), + } +} + +/// Computes a binary hash of the given data. type is the algorithm to use. +/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. +#[pyfunction(value, method)] +fn digest(value: PyExpr, method: PyExpr) -> PyExpr { + PyExpr { + expr: logical_plan::digest(value.expr, method.expr), + } +} + +/// Concatenates the text representations of all the arguments. +/// NULL arguments are ignored. +#[pyfunction(args = "*")] +fn concat(args: Vec) -> PyResult { + let args = args.into_iter().map(|e| e.expr).collect::>(); + Ok(logical_plan::concat(&args).into()) +} + +/// Concatenates all but the first argument, with separators. +/// The first argument is used as the separator string, and should not be NULL. +/// Other NULL arguments are ignored. +#[pyfunction(sep, args = "*")] +fn concat_ws(sep: String, args: Vec) -> PyResult { + let args = args.into_iter().map(|e| e.expr).collect::>(); + Ok(logical_plan::concat_ws(sep, &args).into()) +} + +/// Creates a new Sort expression +#[pyfunction] +fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyResult { + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::Sort { + expr: Box::new(expr.expr), + asc: asc.unwrap_or(true), + nulls_first: nulls_first.unwrap_or(true), + }, + }) +} + +/// Creates a new Alias expression +#[pyfunction] +fn alias(expr: PyExpr, name: &str) -> PyResult { + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::Alias(Box::new(expr.expr), String::from(name)), + }) +} + +/// Creates a new Window function expression +#[pyfunction] +fn window( + name: &str, + args: Vec, + partition_by: Option>, + order_by: Option>, +) -> PyResult { + use std::str::FromStr; + let fun = datafusion_expr::window_function::WindowFunction::from_str(name) + .map_err(|e| -> errors::DataFusionError { e.into() })?; + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::WindowFunction { + fun, + args: args.into_iter().map(|x| x.expr).collect::>(), + partition_by: partition_by + .unwrap_or_default() + .into_iter() + .map(|x| x.expr) + .collect::>(), + order_by: order_by + .unwrap_or_default() + .into_iter() + .map(|x| x.expr) + .collect::>(), + window_frame: None, + }, + }) +} + +macro_rules! scalar_function { + ($NAME: ident, $FUNC: ident) => { + scalar_function!($NAME, $FUNC, stringify!($NAME)); + }; + ($NAME: ident, $FUNC: ident, $DOC: expr) => { + #[doc = $DOC] + #[pyfunction(args = "*")] + fn $NAME(args: Vec) -> PyExpr { + let expr = logical_plan::Expr::ScalarFunction { + fun: BuiltinScalarFunction::$FUNC, + args: args.into_iter().map(|e| e.into()).collect(), + }; + expr.into() + } + }; +} + +macro_rules! aggregate_function { + ($NAME: ident, $FUNC: ident) => { + aggregate_function!($NAME, $FUNC, stringify!($NAME)); + }; + ($NAME: ident, $FUNC: ident, $DOC: expr) => { + #[doc = $DOC] + #[pyfunction(args = "*", distinct = "false")] + fn $NAME(args: Vec, distinct: bool) -> PyExpr { + let expr = logical_plan::Expr::AggregateFunction { + fun: AggregateFunction::$FUNC, + args: args.into_iter().map(|e| e.into()).collect(), + distinct, + }; + expr.into() + } + }; +} + +scalar_function!(abs, Abs); +scalar_function!(acos, Acos); +scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character."); +scalar_function!(asin, Asin); +scalar_function!(atan, Atan); +scalar_function!( + bit_length, + BitLength, + "Returns number of bits in the string (8 times the octet_length)." +); +scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string."); +scalar_function!(ceil, Ceil); +scalar_function!( + character_length, + CharacterLength, + "Returns number of characters in the string." +); +scalar_function!(chr, Chr, "Returns the character with the given code."); +scalar_function!(cos, Cos); +scalar_function!(exp, Exp); +scalar_function!(floor, Floor); +scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); +scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); +scalar_function!(ln, Ln); +scalar_function!(log10, Log10); +scalar_function!(log2, Log2); +scalar_function!(lower, Lower, "Converts the string to all lower case"); +scalar_function!(lpad, Lpad, "Extends the string to length length by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right)."); +scalar_function!(ltrim, Ltrim, "Removes the longest string containing only characters in characters (a space by default) from the start of string."); +scalar_function!( + md5, + MD5, + "Computes the MD5 hash of the argument, with the result written in hexadecimal." +); +scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); +scalar_function!(regexp_match, RegexpMatch); +scalar_function!( + regexp_replace, + RegexpReplace, + "Replaces substring(s) matching a POSIX regular expression" +); +scalar_function!( + repeat, + Repeat, + "Repeats string the specified number of times." +); +scalar_function!( + replace, + Replace, + "Replaces all occurrences in string of substring from with substring to." +); +scalar_function!( + reverse, + Reverse, + "Reverses the order of the characters in the string." +); +scalar_function!(right, Right, "Returns last n characters in the string, or when n is negative, returns all but first |n| characters."); +scalar_function!(round, Round); +scalar_function!(rpad, Rpad, "Extends the string to length length by appending the characters fill (a space by default). If the string is already longer than length then it is truncated."); +scalar_function!(rtrim, Rtrim, "Removes the longest string containing only characters in characters (a space by default) from the end of string."); +scalar_function!(sha224, SHA224); +scalar_function!(sha256, SHA256); +scalar_function!(sha384, SHA384); +scalar_function!(sha512, SHA512); +scalar_function!(signum, Signum); +scalar_function!(sin, Sin); +scalar_function!( + split_part, + SplitPart, + "Splits string at occurrences of delimiter and returns the n'th field (counting from one)." +); +scalar_function!(sqrt, Sqrt); +scalar_function!( + starts_with, + StartsWith, + "Returns true if string starts with prefix." +); +scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)"); +scalar_function!(substr, Substr); +scalar_function!(tan, Tan); +scalar_function!( + to_hex, + ToHex, + "Converts the number to its equivalent hexadecimal representation." +); +scalar_function!(to_timestamp, ToTimestamp); +scalar_function!(translate, Translate, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); +scalar_function!(trim, Trim, "Removes the longest string containing only characters in characters (a space by default) from the start, end, or both ends (BOTH is the default) of string."); +scalar_function!(trunc, Trunc); +scalar_function!(upper, Upper, "Converts the string to all upper case."); + +aggregate_function!(avg, Avg); +aggregate_function!(count, Count); +aggregate_function!(max, Max); +aggregate_function!(min, Min); +aggregate_function!(sum, Sum); +aggregate_function!(approx_distinct, ApproxDistinct); + +pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { + m.add_wrapped(wrap_pyfunction!(abs))?; + m.add_wrapped(wrap_pyfunction!(acos))?; + m.add_wrapped(wrap_pyfunction!(approx_distinct))?; + m.add_wrapped(wrap_pyfunction!(alias))?; + m.add_wrapped(wrap_pyfunction!(array))?; + m.add_wrapped(wrap_pyfunction!(ascii))?; + m.add_wrapped(wrap_pyfunction!(asin))?; + m.add_wrapped(wrap_pyfunction!(atan))?; + m.add_wrapped(wrap_pyfunction!(avg))?; + m.add_wrapped(wrap_pyfunction!(bit_length))?; + m.add_wrapped(wrap_pyfunction!(btrim))?; + m.add_wrapped(wrap_pyfunction!(ceil))?; + m.add_wrapped(wrap_pyfunction!(character_length))?; + m.add_wrapped(wrap_pyfunction!(chr))?; + m.add_wrapped(wrap_pyfunction!(concat_ws))?; + m.add_wrapped(wrap_pyfunction!(concat))?; + m.add_wrapped(wrap_pyfunction!(cos))?; + m.add_wrapped(wrap_pyfunction!(count))?; + m.add_wrapped(wrap_pyfunction!(digest))?; + m.add_wrapped(wrap_pyfunction!(exp))?; + m.add_wrapped(wrap_pyfunction!(floor))?; + m.add_wrapped(wrap_pyfunction!(in_list))?; + m.add_wrapped(wrap_pyfunction!(initcap))?; + m.add_wrapped(wrap_pyfunction!(left))?; + m.add_wrapped(wrap_pyfunction!(ln))?; + m.add_wrapped(wrap_pyfunction!(log10))?; + m.add_wrapped(wrap_pyfunction!(log2))?; + m.add_wrapped(wrap_pyfunction!(lower))?; + m.add_wrapped(wrap_pyfunction!(lpad))?; + m.add_wrapped(wrap_pyfunction!(ltrim))?; + m.add_wrapped(wrap_pyfunction!(max))?; + m.add_wrapped(wrap_pyfunction!(md5))?; + m.add_wrapped(wrap_pyfunction!(min))?; + m.add_wrapped(wrap_pyfunction!(now))?; + m.add_wrapped(wrap_pyfunction!(octet_length))?; + m.add_wrapped(wrap_pyfunction!(order_by))?; + m.add_wrapped(wrap_pyfunction!(random))?; + m.add_wrapped(wrap_pyfunction!(regexp_match))?; + m.add_wrapped(wrap_pyfunction!(regexp_replace))?; + m.add_wrapped(wrap_pyfunction!(repeat))?; + m.add_wrapped(wrap_pyfunction!(replace))?; + m.add_wrapped(wrap_pyfunction!(reverse))?; + m.add_wrapped(wrap_pyfunction!(right))?; + m.add_wrapped(wrap_pyfunction!(round))?; + m.add_wrapped(wrap_pyfunction!(rpad))?; + m.add_wrapped(wrap_pyfunction!(rtrim))?; + m.add_wrapped(wrap_pyfunction!(sha224))?; + m.add_wrapped(wrap_pyfunction!(sha256))?; + m.add_wrapped(wrap_pyfunction!(sha384))?; + m.add_wrapped(wrap_pyfunction!(sha512))?; + m.add_wrapped(wrap_pyfunction!(signum))?; + m.add_wrapped(wrap_pyfunction!(sin))?; + m.add_wrapped(wrap_pyfunction!(split_part))?; + m.add_wrapped(wrap_pyfunction!(sqrt))?; + m.add_wrapped(wrap_pyfunction!(starts_with))?; + m.add_wrapped(wrap_pyfunction!(strpos))?; + m.add_wrapped(wrap_pyfunction!(substr))?; + m.add_wrapped(wrap_pyfunction!(sum))?; + m.add_wrapped(wrap_pyfunction!(tan))?; + m.add_wrapped(wrap_pyfunction!(to_hex))?; + m.add_wrapped(wrap_pyfunction!(to_timestamp))?; + m.add_wrapped(wrap_pyfunction!(translate))?; + m.add_wrapped(wrap_pyfunction!(trim))?; + m.add_wrapped(wrap_pyfunction!(trunc))?; + m.add_wrapped(wrap_pyfunction!(upper))?; + m.add_wrapped(wrap_pyfunction!(window))?; + Ok(()) +} diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 000000000..70400f3f9 --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use mimalloc::MiMalloc; +use pyo3::prelude::*; + +mod ballista_context; +mod dataframe; +pub mod errors; +mod expression; +mod functions; +pub mod utils; + +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + +/// Low-level DataFusion internal package. +/// +/// The higher-level public API is defined in pure python files under the +/// datafusion directory. +#[pymodule] +fn _internal(py: Python, m: &PyModule) -> PyResult<()> { + // Register the python classes + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + // Register the functions as a submodule + let funcs = PyModule::new(py, "functions")?; + functions::init_module(funcs)?; + m.add_submodule(funcs)?; + + Ok(()) +} diff --git a/python/src/udaf.rs b/python/src/udaf.rs new file mode 100644 index 000000000..1de6e6320 --- /dev/null +++ b/python/src/udaf.rs @@ -0,0 +1,153 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use pyo3::{prelude::*, types::PyTuple}; + +use datafusion::arrow::array::ArrayRef; +use datafusion::arrow::datatypes::DataType; +use datafusion::arrow::pyarrow::PyArrowConvert; +use datafusion::error::{DataFusionError, Result}; +use datafusion::logical_plan; +use datafusion::physical_plan::aggregates::AccumulatorFunctionImplementation; +use datafusion::physical_plan::udaf::AggregateUDF; +use datafusion::physical_plan::Accumulator; +use datafusion::scalar::ScalarValue; + +use crate::expression::PyExpr; +use crate::utils::parse_volatility; + +#[derive(Debug)] +struct RustAccumulator { + accum: PyObject, +} + +impl RustAccumulator { + fn new(accum: PyObject) -> Self { + Self { accum } + } +} + +impl Accumulator for RustAccumulator { + fn state(&self) -> Result> { + Python::with_gil(|py| self.accum.as_ref(py).call_method0("state")?.extract()) + .map_err(|e| DataFusionError::Execution(format!("{}", e))) + } + + fn update(&mut self, _values: &[ScalarValue]) -> Result<()> { + // no need to implement as datafusion does not use it + todo!() + } + + fn merge(&mut self, _states: &[ScalarValue]) -> Result<()> { + // no need to implement as datafusion does not use it + todo!() + } + + fn evaluate(&self) -> Result { + Python::with_gil(|py| self.accum.as_ref(py).call_method0("evaluate")?.extract()) + .map_err(|e| DataFusionError::Execution(format!("{}", e))) + } + + fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + Python::with_gil(|py| { + // 1. cast args to Pyarrow array + let py_args = values + .iter() + .map(|arg| arg.data().to_owned().to_pyarrow(py).unwrap()) + .collect::>(); + let py_args = PyTuple::new(py, py_args); + + // 2. call function + self.accum + .as_ref(py) + .call_method1("update", py_args) + .map_err(|e| DataFusionError::Execution(format!("{}", e)))?; + + Ok(()) + }) + } + + fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { + Python::with_gil(|py| { + let state = &states[0]; + + // 1. cast states to Pyarrow array + let state = state + .to_pyarrow(py) + .map_err(|e| DataFusionError::Execution(format!("{}", e)))?; + + // 2. call merge + self.accum + .as_ref(py) + .call_method1("merge", (state,)) + .map_err(|e| DataFusionError::Execution(format!("{}", e)))?; + + Ok(()) + }) + } +} + +pub fn to_rust_accumulator(accum: PyObject) -> AccumulatorFunctionImplementation { + Arc::new(move || -> Result> { + let accum = Python::with_gil(|py| { + accum + .call0(py) + .map_err(|e| DataFusionError::Execution(format!("{}", e))) + })?; + Ok(Box::new(RustAccumulator::new(accum))) + }) +} + +/// Represents a AggregateUDF +#[pyclass(name = "AggregateUDF", module = "datafusion", subclass)] +#[derive(Debug, Clone)] +pub struct PyAggregateUDF { + pub(crate) function: AggregateUDF, +} + +#[pymethods] +impl PyAggregateUDF { + #[new(name, accumulator, input_type, return_type, state_type, volatility)] + fn new( + name: &str, + accumulator: PyObject, + input_type: DataType, + return_type: DataType, + state_type: Vec, + volatility: &str, + ) -> PyResult { + let function = logical_plan::create_udaf( + &name, + input_type, + Arc::new(return_type), + parse_volatility(volatility)?, + to_rust_accumulator(accumulator), + Arc::new(state_type), + ); + Ok(Self { function }) + } + + /// creates a new PyExpr with the call of the udf + #[call] + #[args(args = "*")] + fn __call__(&self, args: Vec) -> PyResult { + let args = args.iter().map(|e| e.expr.clone()).collect(); + Ok(self.function.call(args).into()) + } +} diff --git a/python/src/udf.rs b/python/src/udf.rs new file mode 100644 index 000000000..379c44987 --- /dev/null +++ b/python/src/udf.rs @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use pyo3::{prelude::*, types::PyTuple}; + +use datafusion::arrow::array::ArrayRef; +use datafusion::arrow::datatypes::DataType; +use datafusion::arrow::pyarrow::PyArrowConvert; +use datafusion::error::DataFusionError; +use datafusion::logical_plan; +use datafusion::physical_plan::functions::{ + make_scalar_function, ScalarFunctionImplementation, +}; +use datafusion::physical_plan::udf::ScalarUDF; + +use crate::expression::PyExpr; +use crate::utils::parse_volatility; + +/// Create a DataFusion's UDF implementation from a python function +/// that expects pyarrow arrays. This is more efficient as it performs +/// a zero-copy of the contents. +fn to_rust_function(func: PyObject) -> ScalarFunctionImplementation { + make_scalar_function( + move |args: &[ArrayRef]| -> Result { + Python::with_gil(|py| { + // 1. cast args to Pyarrow arrays + let py_args = args + .iter() + .map(|arg| arg.data().to_owned().to_pyarrow(py).unwrap()) + .collect::>(); + let py_args = PyTuple::new(py, py_args); + + // 2. call function + let value = func.as_ref(py).call(py_args, None); + let value = match value { + Ok(n) => Ok(n), + Err(error) => Err(DataFusionError::Execution(format!("{:?}", error))), + }?; + + // 3. cast to arrow::array::Array + let array = ArrayRef::from_pyarrow(value).unwrap(); + Ok(array) + }) + }, + ) +} + +/// Represents a PyScalarUDF +#[pyclass(name = "ScalarUDF", module = "datafusion", subclass)] +#[derive(Debug, Clone)] +pub struct PyScalarUDF { + pub(crate) function: ScalarUDF, +} + +#[pymethods] +impl PyScalarUDF { + #[new(name, func, input_types, return_type, volatility)] + fn new( + name: &str, + func: PyObject, + input_types: Vec, + return_type: DataType, + volatility: &str, + ) -> PyResult { + let function = logical_plan::create_udf( + name, + input_types, + Arc::new(return_type), + parse_volatility(volatility)?, + to_rust_function(func), + ); + Ok(Self { function }) + } + + /// creates a new PyExpr with the call of the udf + #[call] + #[args(args = "*")] + fn __call__(&self, args: Vec) -> PyResult { + let args = args.iter().map(|e| e.expr.clone()).collect(); + Ok(self.function.call(args).into()) + } +} diff --git a/python/src/utils.rs b/python/src/utils.rs new file mode 100644 index 000000000..795058bc9 --- /dev/null +++ b/python/src/utils.rs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::prelude::*; +use std::future::Future; +use tokio::runtime::Runtime; + +/// Utility to collect rust futures with GIL released +pub fn wait_for_future(py: Python, f: F) -> F::Output +where + F: Send, + F::Output: Send, +{ + let rt = Runtime::new().unwrap(); + py.allow_threads(|| rt.block_on(f)) +}